数据操作
首先,我们导入torch 。
import torch
张量表示一个数值组成的数组,这个数组可能有很多维度
x = torch.arange(12)
x
tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
我们可以通过张量的shape 属性来访问张量的形状和张量中元素的总数
x.shape
torch.Size([12])
x.numel()
12
X = x.reshape(3,4)
X
tensor([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
torch.zeros((2,3,4))
tensor([[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]]])
torch.ones((2,3,4))
tensor([[[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]],
[[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]]])
print('2.张量的运算')
x = torch.tensor([1.0, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])
print(x + y)
print(x - y)
print(x * y)
print(x / y)
print(x ** y)
print(torch.exp(x))
2.张量的运算
tensor([ 3., 4., 6., 10.])
tensor([-1., 0., 2., 6.])
tensor([ 2., 4., 8., 16.])
tensor([0.5000, 1.0000, 2.0000, 4.0000])
tensor([ 1., 4., 16., 64.])
tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03])
print('3.广播机制')
a = torch.arange(3).reshape(3, 1)
b = torch.arange(2).reshape(1, 2)
print('a:', a)
print('b:', b)
print('a + b:', a + b)
3.广播机制
a: tensor([[0],
[1],
[2]])
b: tensor([[0, 1]])
a + b: tensor([[0, 1],
[1, 2],
[2, 3]])
print('4.索引和切片')
X = torch.arange(12, dtype=torch.float32).reshape(3, 4)
print('X:', X)
print('X[-1]:', X[-1])
print('X[1:3]:', X[1:3])
4.索引和切片
X: tensor([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
X[-1]: tensor([ 8., 9., 10., 11.])
X[1:3]: tensor([[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
print('6.转换为其他 Python对象')
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
A = Y.numpy()
print(type(A))
print(A)
B = torch.tensor(A)
print(type(B))
print(B)
6.转换为其他 Python对象
<class 'numpy.ndarray'>
[[2. 1. 4. 3.]
[1. 2. 3. 4.]
[4. 3. 2. 1.]]
<class 'torch.Tensor'>
tensor([[2., 1., 4., 3.],
[1., 2., 3., 4.],
[4., 3., 2., 1.]])
数据预处理
import os
os.makedirs(os.path.join('..','data'),exist_ok=True)
data_file = os.path.join('..','data','house_tiny.csv')
with open(data_file, 'w') as f:
f.write('NumRooms,Alley,Price\n')
f.write('NA,Pave,127500\n')
f.write('2,NA,106000\n')
f.write('4,NA,178100\n')
f.write('NA,NA,140000\n')
import pandas as pd
data = pd.read_csv(data_file)
data
| NumRooms | Alley | Price |
---|
0 | NaN | Pave | 127500 |
---|
1 | 2.0 | NaN | 106000 |
---|
2 | 4.0 | NaN | 178100 |
---|
3 | NaN | NaN | 140000 |
---|
import os
import numpy as np
import pandas as pd
import torch
from numpy import nan as NaN
os.makedirs(os.path.join('..', 'data'), exist_ok=True)
datafile = os.path.join('..', 'data', 'house_tiny.csv')
with open(datafile, 'w') as f:
f.write('NumRooms,Alley,Price\n')
f.write('NA,Pave,127500\n')
f.write('2,NA,106000\n')
f.write('4,NA,178100\n')
f.write('NA,NA,140000\n')
data = pd.read_csv(datafile)
print('1.原始数据:\n', data)
inputs, outputs = data.iloc[:, 0: 2], data.iloc[:, 2]
inputs = inputs.fillna(inputs.mean())
print(inputs)
print(outputs)
inputs = pd.get_dummies(inputs, dummy_na=True)
print('2.利用pandas中的get_dummies函数处理:\n', inputs)
x, y = torch.tensor(inputs.values), torch.tensor(outputs.values)
print('3.转换为张量:')
print(x)
print(y)
df1 = pd.DataFrame([[1, 2, 3], [NaN, NaN, 2], [NaN, NaN, NaN], [8, 8, NaN]])
print('4.函数fillna的用法:')
print(df1)
print(df1.fillna(100))
print(df1.fillna({0: 10, 1: 20, 2: 30}))
print(df1.fillna(method='ffill'))
df2 = pd.DataFrame(np.random.randint(0, 10, (5, 5)))
df2.iloc[1:4, 3] = NaN
df2.iloc[2:4, 4] = NaN
print(df2)
print(df2.fillna(method='bfill', limit=2))
print(df2.fillna(method="ffill", limit=1, axis=1))
1.原始数据:
NumRooms Alley Price
0 NaN Pave 127500
1 2.0 NaN 106000
2 4.0 NaN 178100
3 NaN NaN 140000
NumRooms Alley
0 3.0 Pave
1 2.0 NaN
2 4.0 NaN
3 3.0 NaN
0 127500
1 106000
2 178100
3 140000
Name: Price, dtype: int64
2.利用pandas中的get_dummies函数处理:
NumRooms Alley_Pave Alley_nan
0 3.0 1 0
1 2.0 0 1
2 4.0 0 1
3 3.0 0 1
3.转换为张量:
tensor([[3., 1., 0.],
[2., 0., 1.],
[4., 0., 1.],
[3., 0., 1.]], dtype=torch.float64)
tensor([127500, 106000, 178100, 140000])
4.函数fillna的用法:
0 1 2
0 1.0 2.0 3.0
1 NaN NaN 2.0
2 NaN NaN NaN
3 8.0 8.0 NaN
0 1 2
0 1.0 2.0 3.0
1 100.0 100.0 2.0
2 100.0 100.0 100.0
3 8.0 8.0 100.0
0 1 2
0 1.0 2.0 3.0
1 10.0 20.0 2.0
2 10.0 20.0 30.0
3 8.0 8.0 30.0
0 1 2
0 1.0 2.0 3.0
1 1.0 2.0 2.0
2 1.0 2.0 2.0
3 8.0 8.0 2.0
0 1 2 3 4
0 4 1 8 2.0 3.0
1 4 6 9 NaN 4.0
2 7 3 6 NaN NaN
3 7 0 2 NaN NaN
4 1 1 2 6.0 7.0
0 1 2 3 4
0 4 1 8 2.0 3.0
1 4 6 9 NaN 4.0
2 7 3 6 6.0 7.0
3 7 0 2 6.0 7.0
4 1 1 2 6.0 7.0
0 1 2 3 4
0 4.0 1.0 8.0 2.0 3.0
1 4.0 6.0 9.0 9.0 4.0
2 7.0 3.0 6.0 6.0 NaN
3 7.0 0.0 2.0 2.0 NaN
4 1.0 1.0 2.0 6.0 7.0
C:\Users\czr\AppData\Local\Temp/ipykernel_32116/1226956557.py:21: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.
inputs = inputs.fillna(inputs.mean()) # 用均值填充NaN
|