%matplotlib inline import torch import torch.optim as optim import numpy as np from matplotlib import pyplot as plt torch.set_printoptions(edgeitems=2)
deftraining_loop(n_epochs, learning_rate, params, X, Y, print_params=True): for epoch in range(1, n_epochs + 1): w, b = params
Y_hat = model(X, w, b) # <1> loss = loss_fn(Y_hat, Y) grad = grad_fn(X, Y, Y_hat, w, b) # <2>
params = params - learning_rate * grad # 手动梯度下降
if epoch % 30000 == 0: # <3> print('Epoch %d, Loss %f' % (epoch, float(loss))) if print_params: print(' Params:', params) print(' Grad: ', grad)
ifnot torch.isfinite(loss).all(): break# <3>
return params
1 2 3 4 5 6 7 8 9
params = training_loop( n_epochs = 330000, learning_rate = 1e-4, params = torch.tensor([1.00, 0.01]), X = X, Y = Y, print_params = False)
params
Epoch 30000, Loss 12.095908
Epoch 60000, Loss 6.133891
Epoch 90000, Loss 4.048903
Epoch 120000, Loss 3.319792
Epoch 150000, Loss 3.064586
Epoch 180000, Loss 2.975681
Epoch 210000, Loss 2.944574
Epoch 240000, Loss 2.933552
Epoch 270000, Loss 2.929731
Epoch 300000, Loss 2.928472
Epoch 330000, Loss 2.927906
tensor([ 0.5358, -17.2503])
Epoch 30000, Loss 12.095908
Epoch 60000, Loss 6.133891
Epoch 90000, Loss 4.048903
Epoch 120000, Loss 3.319792
Epoch 150000, Loss 3.064586
Epoch 180000, Loss 2.975681
Epoch 210000, Loss 2.944574
Epoch 240000, Loss 2.933552
Epoch 270000, Loss 2.929731
Epoch 300000, Loss 2.928472
Epoch 330000, Loss 2.927906
tensor([ 0.5358, -17.2503], requires_grad=True)
training_loop( n_epochs = 330000, optimizer = optimizer, params = params, # <1> X = X, Y = Y)
Epoch 30000, Loss 12.095908
Epoch 60000, Loss 6.133891
Epoch 90000, Loss 4.048905
Epoch 120000, Loss 3.319792
Epoch 150000, Loss 3.064586
Epoch 180000, Loss 2.975681
Epoch 210000, Loss 2.944574
Epoch 240000, Loss 2.933552
Epoch 270000, Loss 2.929731
Epoch 300000, Loss 2.928472
Epoch 330000, Loss 2.927906
tensor([ 0.5358, -17.2503], requires_grad=True)
deftraining_loop(n_epochs, learning_rate, params, X, Y, print_params=True): for epoch in range(1, n_epochs + 1): w, b = params
Y_hat = model(X, w, b) # <1> loss = loss_fn(Y_hat, Y) grad = grad_fn(X, Y, Y_hat, w, b) # <2>
params = params - learning_rate * grad # 手动梯度下降
if epoch % 30000 == 0: # <3> print('Epoch %d, Loss %f' % (epoch, float(loss))) if print_params: print(' Params:', params) print(' Grad: ', grad)
if tf.math.is_inf(loss): break# <3>
return params
1 2 3 4 5 6 7 8 9
params = training_loop( n_epochs = 330000, learning_rate = 1e-4, params = tf.constant([1.00, 0.01]), X = X, Y = Y, print_params = False)
params
Epoch 30000, Loss 12.095908
Epoch 60000, Loss 6.133891
Epoch 90000, Loss 4.048903
Epoch 120000, Loss 3.319792
Epoch 150000, Loss 3.064586
Epoch 180000, Loss 2.975681
Epoch 210000, Loss 2.944574
Epoch 240000, Loss 2.933552
Epoch 270000, Loss 2.929731
Epoch 300000, Loss 2.928472
Epoch 330000, Loss 2.927906
<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 0.5358124, -17.250313 ], dtype=float32)>
deftraining_loop(n_epochs, learning_rate, params, X, Y): for epoch in range(1, n_epochs + 1): with tf.GradientTape() as tape: tape.watch(params) Y_hat = model(X, *params) loss = loss_object(Y_hat, Y) gradients = tape.gradient(loss, params) params = params - learning_rate * gradients if epoch % 30000 == 0: print('Epoch %d, Loss %f' % (epoch, float(loss)))
return params
1 2 3 4 5 6
training_loop( n_epochs = 330000, learning_rate = 1e-4, params = tf.constant([1.00, 0.01]), X = X, Y = Y)
Epoch 30000, Loss 12.095908
Epoch 60000, Loss 6.133891
Epoch 90000, Loss 4.048903
Epoch 120000, Loss 3.319792
Epoch 150000, Loss 3.064586
Epoch 180000, Loss 2.975681
Epoch 210000, Loss 2.944574
Epoch 240000, Loss 2.933552
Epoch 270000, Loss 2.929731
Epoch 300000, Loss 2.928472
Epoch 330000, Loss 2.927906
<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 0.5358124, -17.250313 ], dtype=float32)>
自动求导,自动梯度下降
1 2
defmodel(X, params): return params[0] * X + params[1]
deftraining_loop(n_epochs, params, X, Y): for epoch in range(1, n_epochs + 1): with tf.GradientTape() as tape: tape.watch(params) Y_hat = model(X, params) loss = loss_object(Y_hat, Y) gradients = tape.gradient(loss, params) optimizer.apply_gradients(zip([gradients], [params])) if epoch % 30000 == 0: print('Epoch %d, Loss %f' % (epoch, float(loss)))
return params
1 2 3 4 5
training_loop( n_epochs = 330000, params = tf.Variable([1.00, 0.01]), X = X, Y = Y)
Epoch 30000, Loss 12.095908
Epoch 60000, Loss 6.133891
Epoch 90000, Loss 4.048903
Epoch 120000, Loss 3.319792
Epoch 150000, Loss 3.064586
Epoch 180000, Loss 2.975681
Epoch 210000, Loss 2.944574
Epoch 240000, Loss 2.933552
Epoch 270000, Loss 2.929731
Epoch 300000, Loss 2.928472
Epoch 330000, Loss 2.927906
<tf.Variable 'Variable:0' shape=(2,) dtype=float32, numpy=array([ 0.5358124, -17.250313 ], dtype=float32)>