基于pytorch的线性回归模型,python
立即下载
资源介绍:
基于pytorch的线性回归模型的训练和测试,含csv文件数据的读取与数据集构建,训练后特征值系数的输出(以3个特征值为例),训练迭代的loss曲线显示,测试集预测结果与实际值对比可视化
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#读取含有样本数据的CSV文件
df = pd.read_csv('example.csv')
new_index = np.random.permutation(df.index)
# 打乱数据集
df = df.reindex(new_index).reset_index(drop=True)
# 设置训练集比例
train_dataset_rate=0.8
num_train = int(len(df)*train_dataset_rate)
#数据集最后一列是因变量
X_train = df.iloc[:num_train, :-1] # 训练集特征
y_train = df.iloc[:num_train, -1] # 训练集标注结果
X_test = df.iloc[num_train:, :-1] # 测试集特征
y_test = df.iloc[num_train:, -1] # 测试集标注结果
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
y_train_tensor = y_train_tensor.reshape((-1,1))
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
y_test_tensor = y_test_tensor.reshape((-1,1))
# 使用TensorDataset,DataLoader创建训练集
dataset_train = TensorDataset(X_train_tensor, y_train_tensor)
loader_train = DataLoader(dataset_train, batch_size=4, shuffle=True)
# 迭代次数
epochs=200
# 构建线性回归网络
net = nn.Sequential(nn.Linear(X_train_tensor.shape[1],1))
# 损失函数
loss = nn.MSELoss()
#优化器
optimizer = torch.optim.SGD(net.parameters(),lr=0.3)
# 初始化一个列表来存储每次迭代的loss值
loss_list = []
# 训练
for epoch in range(epochs):
for _,(X,y) in enumerate(loader_train):
optimizer.zero_grad()
l = loss(net(X),y)
l.backward()
optimizer.step()
loss_list.append(loss(net(X_train_tensor),y_train_tensor).data)
print(f'第{epoch+1}次的loss值为{loss(net(X_train_tensor),y_train_tensor).data}')
print("训练完成")
print("每个特征值的系数分别为")
print(net[0].weight.data[0].numpy())
print("偏置b为")
print(net[0].bias.data[0].numpy())
# loss曲线
plt.plot(loss_list)
plt.title('Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.show()
# 对测试集的预测结果
y_pred=net(X_test_tensor)
x = np.arange(len(y_pred))
plt.plot(x,y_test_tensor.numpy(),label='Actual Values')
plt.scatter(x,y_pred.data.numpy(),c='red',label='Predicted Values')
plt.legend()
plt.title('Comparison of Actual and Predicted Values')
plt.xlabel('Index')
plt.ylabel('Value')
plt.grid(True)
plt.show()