1.构建自定义数据集
df = pd.read_csv(r'data\distance.csv')
print(df.head())
out:
num A0 A1 A2 A3 x y z label
0 0 1016.931217 4782.857143 4552.962963 6298.994709 50 50 88 1
1 1 1338.424658 4920.136986 4109.178082 5943.561644 50 100 88 1
2 2 1783.790850 5033.464052 3605.882353 5619.150327 50 150 88 1
3 3 2253.617021 5218.563830 3085.159574 5322.819149 50 200 88 1
4 4 2727.142857 5370.285714 2600.142857 5033.238095 50 250 88 1
print(df.describe())
out:
num A0 A1 ... y z label
count 648.000000 648.000000 648.000000 ... 648.000000 648.000000 648.000000
mean 323.500000 3822.361982 3831.485685 ... 250.000000 147.000000 0.500000
std 187.205769 1303.943989 1302.120626 ... 129.199174 42.187042 0.500386
min 0.000000 637.966102 578.874172 ... 50.000000 88.000000 0.000000
25% 161.750000 2890.911486 2868.041425 ... 150.000000 119.500000 0.000000
50% 323.500000 3948.425892 3974.006815 ... 250.000000 150.000000 0.500000
75% 485.250000 4732.334546 4783.172269 ... 350.000000 177.500000 1.000000
max 647.000000 6707.976654 6657.439614 ... 450.000000 200.000000 1.000000
注:mean均值 std标准差 50%中位数 25%四分位数
class TorchDataset(Dataset):
''':arg TorchDataset 继承 torch.utils.data.Dataset 类'''
def __init__(self, path):
df = pd.read_csv(path)
self.data = torch.tensor(self.normalization(df.iloc[:, 1: -1].to_numpy(dtype=np.float32)))
self.label = torch.tensor(df.iloc[:, -1].to_numpy(dtype=np.float32))
def __getitem__(self, index):
return self.data[index], self.label[index]
def __len__(self):
return len(self.data)
@staticmethod
def normalization(X: list):
minmax = MinMaxScaler()
X = minmax.fit_transform(X)
return X
pass
data = TorchDataset(r'data\distance.csv')
torch_loader = DataLoader(
dataset=data,
batch_size=2,
shuffle=False,
num_workers=0,
drop_last=True
)
for batch, (X, y) in enumerate(torch_loader):
print("batch:", batch)
print(f'X.shape:{X.shape}, y.shape:{y.shape}')
Data, Label = X, y
print("data:", Data)
print("Label:", Label)
if batch == 1:
break
out:
batch: 0
X.shape:torch.Size([14, 7]), y.shape:torch.Size([14])
data: tensor([[0.2451, 0.6446, 0.4407, 0.7390, 0.1250, 0.3750, 0.0000],
[0.9156, 0.5625, 0.6628, 0.1021, 1.0000, 0.8750, 1.0000],
[0.6445, 0.4457, 0.5700, 0.3105, 0.7500, 0.6250, 0.3750],
[0.8122, 0.3893, 0.7183, 0.2404, 1.0000, 0.6250, 0.7321],
[0.6144, 0.5965, 0.3801, 0.3971, 0.5000, 0.7500, 1.0000],
[0.4900, 0.7703, 0.1859, 0.6201, 0.1250, 0.7500, 0.7321],
[0.7527, 0.7540, 0.3163, 0.3293, 0.5000, 1.0000, 0.7321],
[0.5562, 0.1616, 0.8498, 0.6476, 0.8750, 0.1250, 0.3750],
[0.0704, 0.6935, 0.6534, 1.0000, 0.0000, 0.0000, 1.0000],
[0.2204, 0.3961, 0.7277, 0.8184, 0.3750, 0.0000, 0.3750],
[0.0669, 0.6940, 0.6085, 0.9276, 0.0000, 0.1250, 0.7321],
[0.5954, 0.7183, 0.3396, 0.4607, 0.3750, 0.7500, 0.0000],
[0.6543, 0.2643, 0.7265, 0.4357, 0.8750, 0.3750, 1.0000],
[0.7997, 0.7694, 0.4346, 0.2439, 0.6250, 1.0000, 0.7321]])
Label: tensor([0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 1.])
注:DataLoader 中 batch_size=2 表示批大小=2,即一次从数据集中,取出2个样本,batch_size 可根据情况设定。
2.划分数据集
train_size, test_size = int(len(data) * 0.8), len(data) - int(len(data) * 0.8)
train_dataset, test_dataset = random_split(data, [train_size, test_size])
batch_size = 14
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=True)
for batch, (X, y) in enumerate(train_loader):
print("batch:", batch)
print(f'X.shape:{X.shape}, y.shape:{y.shape}')
Data, Label = X, y
print("data:", Data)
print("Label:", Label)
break
out:
batch: 0
X.shape:torch.Size([14, 7]), y.shape:torch.Size([14])
data: tensor([[0.2451, 0.6446, 0.4407, 0.7390, 0.1250, 0.3750, 0.0000],
[0.9156, 0.5625, 0.6628, 0.1021, 1.0000, 0.8750, 1.0000],
[0.6445, 0.4457, 0.5700, 0.3105, 0.7500, 0.6250, 0.3750],
[0.8122, 0.3893, 0.7183, 0.2404, 1.0000, 0.6250, 0.7321],
[0.6144, 0.5965, 0.3801, 0.3971, 0.5000, 0.7500, 1.0000],
[0.4900, 0.7703, 0.1859, 0.6201, 0.1250, 0.7500, 0.7321],
[0.7527, 0.7540, 0.3163, 0.3293, 0.5000, 1.0000, 0.7321],
[0.5562, 0.1616, 0.8498, 0.6476, 0.8750, 0.1250, 0.3750],
[0.0704, 0.6935, 0.6534, 1.0000, 0.0000, 0.0000, 1.0000],
[0.2204, 0.3961, 0.7277, 0.8184, 0.3750, 0.0000, 0.3750],
[0.0669, 0.6940, 0.6085, 0.9276, 0.0000, 0.1250, 0.7321],
[0.5954, 0.7183, 0.3396, 0.4607, 0.3750, 0.7500, 0.0000],
[0.6543, 0.2643, 0.7265, 0.4357, 0.8750, 0.3750, 1.0000],
[0.7997, 0.7694, 0.4346, 0.2439, 0.6250, 1.0000, 0.7321]])
Label: tensor([0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 1.])
3.定义模型
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(7, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 2),
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
pass
4.机器是否可使用 GPU 训练
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
out:
Using cuda device
5.训练方法
def train(dataloader, model, loss_fn, optimizer):
''':arg
dataloader : dataset 实例对象的数据集
model : 类 NeuralNetwork 的实例对象
loss_fn : 损失函数,一般为计算 分类或者回归 的损失,
optimizer : 优化器,使用梯度下降法
''' size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 10 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
6.测试方法
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y.long()).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
7.实例化模型
model = NeuralNetwork().to(device)
print(model)
out:
NeuralNetwork(
(flatten): Flatten(start_dim=1, end_dim=-1)
(linear_relu_stack): Sequential(
(0): Linear(in_features=7, out_features=512, bias=True)
(1): ReLU()
(2): Linear(in_features=512, out_features=512, bias=True)
(3): ReLU()
(4): Linear(in_features=512, out_features=2, bias=True)
)
)
8.损失与优化器
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
9. 开始训练
epochs = 100
for t in range(epochs):
print(f"Epoch {t + 1}\n"
f"-------------------------------")
train(train_loader, model, loss_fn, optimizer)
test(test_loader, model, loss_fn)
print("Done!")
out:
Epoch 1
-------------------------------
loss: 0.691605 [ 0/ 518]
loss: 0.687310 [ 140/ 518]
loss: 0.695601 [ 280/ 518]
loss: 0.695044 [ 420/ 518]
Test Error:
Accuracy: 46.9%, Avg loss: 0.693291
Epoch 2
-------------------------------
loss: 0.691656 [ 0/ 518]
loss: 0.687376 [ 140/ 518]
loss: 0.695556 [ 280/ 518]
loss: 0.694968 [ 420/ 518]
Test Error:
Accuracy: 46.9%, Avg loss: 0.693299
Epoch 3
-------------------------------
loss: 0.691695 [ 0/ 518]
loss: 0.687428 [ 140/ 518]
loss: 0.695517 [ 280/ 518]
loss: 0.694901 [ 420/ 518]
Test Error:
Accuracy: 47.7%, Avg loss: 0.693306
Epoch 4
-------------------------------
loss: 0.691731 [ 0/ 518]
loss: 0.687467 [ 140/ 518]
loss: 0.695482 [ 280/ 518]
loss: 0.694830 [ 420/ 518]
Test Error:
Accuracy: 47.7%, Avg loss: 0.693311
Epoch 5
-------------------------------
loss: 0.691761 [ 0/ 518]
loss: 0.687483 [ 140/ 518]
loss: 0.695449 [ 280/ 518]
loss: 0.694756 [ 420/ 518]
Test Error:
Accuracy: 47.7%, Avg loss: 0.693317
Done!
|