PyTorch
参考原文TensorFlow框架
1 - Exploring the PyTorch Library
import math
import numpy as np
import h5py
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import functional as F
from tf_utils import load_dataset, random_mini_batches, convert_to_one_hot, predict
y_hat = torch.tensor(36)
y = torch.tensor(39)
loss = (y - y_hat) ** 2
print(loss.item())
1.1 - Linear function
def linear_function():
"""
Implements a linear function:
Initializes W to be a random tensor of shape (4,3)
Initializes X to be a random tensor of shape (3,1)
Initializes b to be a random tensor of shape (4,1)
Returns:
result -- runs the session for Y = WX + b
"""
np.random.seed(1)
X = torch.tensor(np.random.randn(3, 1))
W = torch.tensor(np.random.randn(4, 3))
b = torch.tensor(np.random.randn(4, 1))
Y = W @ X + b
result=Y.numpy()
return result
print( "result = " + str(linear_function()))
1.2 - Computing the sigmoid
def sigmoid(z):
"""
Computes the sigmoid of z
Arguments:
z -- input value, scalar or vector
Returns:
results -- the sigmoid of z
"""
x = torch.tensor(z,dtype=torch.float)
a = torch.sigmoid(x)
result=a.numpy()
return result
print ("sigmoid(0) = " + str(sigmoid(0)))
print ("sigmoid(12) = " + str(sigmoid(12)))
1.3 - Computing the Cost
def cost(logits, labels):
"""
Computes the cost using the sigmoid cross entropy
Arguments:
logits -- vector containing z, output of the last linear unit (before the final sigmoid activation)
labels -- vector of labels y (1 or 0)
Returns:
cost -- runs the session of the cost (formula (2))
"""
z = torch.tensor(logits,dtype=torch.float)
y = torch.tensor(labels,dtype=torch.float)
cost=F.binary_cross_entropy(input=z,target=y,reduction='mean').numpy()
return cost
logits = sigmoid(np.array([0.2,0.4,0.7,0.9]))
cost = cost(logits, np.array([0,0,1,1]))
print ("cost = " + str(cost))
结果与原文不一样原因一是因为tf.nn.sigmoid_cross_entropy_with_logits 还需要tf.reduce_mean 操作,而在PyTorch中可以用reduction参数来完成;原因二是原文对logits做了两次sigmoid操作,即tf.nn.sigmoid_cross_entropy_with_logits 中会先进行sigmoid操作在计算cross entropy。
1.4 - Using One Hot encodings
def one_hot_matrix(labels, C):
"""
Creates a matrix where the i-th row corresponds to the ith class number and the jth column
corresponds to the jth training example. So if example j had a label i. Then entry (i,j)
will be 1.
Arguments:
labels -- vector containing the labels
C -- number of classes, the depth of the one hot dimension
Returns:
one_hot -- one hot matrix
"""
labels=torch.tensor(labels,dtype=torch.long)
one_hot=F.one_hot(labels,num_classes=C).numpy().T
return one_hot
labels = np.array([1,2,3,0,2,1])
one_hot = one_hot_matrix(labels, C = 4)
print ("one_hot = " + str(one_hot))
1.5 - Initialize with zeros and ones
def ones(shape):
"""
Creates an array of ones of dimension shape
Arguments:
shape -- shape of the array you want to create
Returns:
ones -- array containing only ones
"""
ones = torch.ones(shape).numpy()
return ones
print ("ones = " + str(ones([3])))
2 - Building your first neural network in PyTorch
import math
import numpy as np
import h5py
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import functional as F
from tf_utils import load_dataset, random_mini_batches, convert_to_one_hot, predict
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.net = nn.Sequential(
nn.Linear(12288, 25),
nn.ReLU(),
nn.Linear(25, 12),
nn.ReLU(),
nn.Linear(12, 6),
nn.Sigmoid(),
)
self._init_parameters()
def forward(self, x):
x = self.net(x)
return x
def _init_parameters(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight.data)
nn.init.zeros_(m.bias.data)
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001,
num_epochs=1500, minibatch_size=32, print_cost=True):
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
seed = 3
(n_x, m) = X_train.shape
n_y = Y_train.shape[0]
costs = []
net.train()
for epoch in range(num_epochs):
epoch_cost = 0.
num_minibatches = int(m / minibatch_size)
seed = seed + 1
minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
minibatch_X = torch.tensor(minibatch_X).float().t()
minibatch_Y = torch.tensor(minibatch_Y).long().t()
minibatch_Y = torch.argmax(minibatch_Y, dim=1)
optimizer.zero_grad()
output = net(minibatch_X)
minibatch_cost = criterion(output, minibatch_Y)
minibatch_cost.backward()
optimizer.step()
epoch_cost += minibatch_cost.detach().numpy() / num_minibatches
if print_cost == True and epoch % 100 == 0:
print("Cost after epoch %i: %f" % (epoch, epoch_cost))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
net.eval()
X = torch.tensor(X_train).float().t()
Y = torch.tensor(Y_train).long().t()
Y = torch.argmax(Y, dim=1)
output = net(X)
output = torch.argmax(output, dim=1)
correct_prediction = output == Y
accuracy = torch.sum(correct_prediction).float() / X_train.shape[1]
print(torch.sum(correct_prediction), X_train.shape[1])
print("Train Accuracy:", accuracy.item())
X = torch.tensor(X_test).float().t()
Y = torch.tensor(Y_test).long().t()
Y = torch.argmax(Y, dim=1)
output = net(X)
output = torch.argmax(output, dim=1)
correct_prediction = output == Y
accuracy = torch.sum(correct_prediction.float()) / X_test.shape[1]
print("Test Accuracy:", accuracy.item())
if __name__ == '__main__':
np.random.seed(1)
torch.manual_seed(1)
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = load_dataset()
X_train_flatten = X_train_orig.reshape(X_train_orig.shape[0], -1).T
X_test_flatten = X_test_orig.reshape(X_test_orig.shape[0], -1).T
X_train = X_train_flatten / 255.
X_test = X_test_flatten / 255.
Y_train = convert_to_one_hot(Y_train_orig, 6)
Y_test = convert_to_one_hot(Y_test_orig, 6)
print("number of training examples = " + str(X_train.shape[1]))
print("number of test examples = " + str(X_test.shape[1]))
print("X_train shape: " + str(X_train.shape))
print("Y_train shape: " + str(Y_train.shape))
print("X_test shape: " + str(X_test.shape))
print("Y_test shape: " + str(Y_test.shape))
parameters = model(X_train, Y_train, X_test, Y_test)
|