2022 MCM C题复现

边问 ChatGPT 边改,一晚上搞出来依托

  • LSTM
  • 预测延迟

经过

看到论文里用的 LSTM 什么的时间序列预测,就找 GPT 要了个 LSTM 板子,看起来 Loss 好像挺小的,实际上 MAPE 应该不算小(
调了半天,好不容易调出来个能看的,才注意到预测有滞后。。。
咋办捏,凉拌

实现

数据预处理
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import csv
import json

dic = {}
dic2 = {}

with open('./BCHAIN-MKPRU.csv', newline='', encoding='utf-8') as csvfile:
csvreader = csv.reader(csvfile)
next(csvreader)
for row in csvreader:
dic[row[0]] = [row[1]]

with open('./LBMA-GOLD.csv', newline='', encoding='utf-8') as csvfile:
csvreader = csv.reader(csvfile)
next(csvreader)
last = ''
for row in csvreader:
if row[0] in dic:
if row[1] != '':
dic[row[0]].append(row[1])
last = row[1]
else:
dic[row[0]].append(last)

for i in dic:
slices = i.split("/")
for s in range(len(slices)):
if len(slices[s]) < 2:
slices[s] = '0' + slices[s]
dic2["20"+slices[2] + "-" + slices[0] + "-" + slices[1]] = dic[i]


last = ''
for i in dic2:
if len(dic2[i]) < 2:
dic2[i].append(dic2[last][1])
else:
last = i

# fp = open("./data.json", "w+", encoding='utf-8')
# fp.write(json.dumps(dic2))
# fp.close()

f= open("./bitcoin.csv","w+")
f.write("date,value\n")
for i in dic2:
f.write(i+","+dic2[i][0]+"\n")
f.close()

f= open("./gold.csv","w+")
f.write("date,value\n")
last=''
for i in dic2:
if len(dic2[i]) == 2:
f.write(i+","+dic2[i][1]+"\n")
last=i
else:
f.write(i+","+dic2[last][1]+"\n")
f.close()

原始数据里有空缺,也就是只有日期没有值的行,我用上下两个算了个均值填进去的 wrong data

训练 & 模型保存
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

torch.manual_seed(42)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load CSV file
bitcoin = pd.read_csv("bitcoin.csv") # Adjust file name/path as needed
bitcoin['date'] = pd.to_datetime(bitcoin['date'])
bitcoin.set_index('date', inplace=True)

gold = pd.read_csv("gold.csv") # Adjust file name/path as needed
gold['date'] = pd.to_datetime(gold['date'])
gold.set_index('date', inplace=True)

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_bitcoin = scaler.fit_transform(bitcoin['value'].values.reshape(-1, 1))
scaled_gold = scaler.fit_transform(gold['value'].values.reshape(-1, 1))


def create_sequences(data, seq_length):
sequences = []
labels = []
for i in range(len(data) - seq_length):
seq = data[i:i + seq_length]
label = data[i + seq_length]
sequences.append(seq)
labels.append(label)
return np.array(sequences), np.array(labels)


SEQ_LENGTH = 5 # You can experiment with this value
X, y = create_sequences(scaled_bitcoin, SEQ_LENGTH)
X2, y2 = create_sequences(scaled_gold, SEQ_LENGTH)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.5, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
X2_train = torch.tensor(X2_train, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
X2_test = torch.tensor(X2_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
y2_train = torch.tensor(y2_train, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)
y2_test = torch.tensor(y2_test, dtype=torch.float32).to(device)


class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_size=100, num_layers=3, output_size=1):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)

def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :]) # Take output from the last time step
return out


model = LSTM().to(device)
criterion = nn.SmoothL1Loss().to(device)
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0001)

num_epochs = 600
print("training started")
st = time.time()
train_losses = []
for epoch in range(num_epochs):
model.train()
outputs1, outputs2 = model(X_train), model(X2_train)
loss = criterion(outputs1, y_train) + criterion(outputs2, y2_train) # 联合 loss
# outputs = model(X_train)
# loss = criterion(outputs, y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_losses.append(loss.item() / len(X_train))
if (epoch + 1) % (num_epochs / 10) == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
print("training finished, spent:", time.time()-st,"s")
# Set the model to evaluation mode
model.eval()

with torch.no_grad():
test_pred = model(torch.tensor(X, dtype=torch.float32).to(device))
test2_pred = model(torch.tensor(X2, dtype=torch.float32).to(device))

plt.figure(figsize=(14, 6))
plt.plot(y, label="Actual Price")
plt.plot(test_pred.cpu().numpy(), label="Predicted Price")
plt.legend()
plt.title("Bitcoin Price Prediction")
plt.show()

plt.figure(figsize=(14, 6))
plt.plot(y2, label="Actual Price")
plt.plot(test2_pred.cpu().numpy(), label="Predicted Price")
plt.legend()
plt.title("Gold Price Prediction")
plt.show()

# Plotting the loss-epoch graph
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), train_losses, marker='o', label="Training Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training Loss vs Epochs")
plt.legend()
plt.grid(True)
plt.show()

# Specify the file name
model_save_path = "./lstm_model.pth"

# Save the model's state dictionary
torch.save(model.state_dict(), model_save_path)

print(f"Model saved to {model_save_path}")
output
1
2
3
4
5
6
7
8
9
10
11
12
13
training started
Epoch [60/600], Loss: 0.014554675668478012
Epoch [120/600], Loss: 0.001723836176097393
Epoch [180/600], Loss: 0.0013864549109712243
Epoch [240/600], Loss: 0.0009691579034551978
Epoch [300/600], Loss: 0.0007963826647028327
Epoch [360/600], Loss: 0.0007319417782127857
Epoch [420/600], Loss: 0.0006162524223327637
Epoch [480/600], Loss: 0.0006129220128059387
Epoch [540/600], Loss: 0.0006104655331000686
Epoch [600/600], Loss: 0.0005984051385894418
training finished, spent: 10.779478311538696 seconds
Model saved to ./lstm_model.pth

不得不说,这个跑在 GPU 上是真的是比在 CPU 上快太多了
在 CPU 上跑:

output
1
2
3
4
5
6
7
8
9
10
11
12
13
training started
Epoch [60/600], Loss: 0.014558046124875546
Epoch [120/600], Loss: 0.0015929858200252056
Epoch [180/600], Loss: 0.0012274347245693207
Epoch [240/600], Loss: 0.0009665131801739335
Epoch [300/600], Loss: 0.0007424689829349518
Epoch [360/600], Loss: 0.000635020318441093
Epoch [420/600], Loss: 0.0007177351508289576
Epoch [480/600], Loss: 0.0005937849637120962
Epoch [540/600], Loss: 0.0005924825090914965
Epoch [600/600], Loss: 0.0006223751697689295
training finished, spent: 39.401917457580566 seconds
Model saved to ./lstm_model.pth

直接缩短 4 倍时间

bitcoin predict vs actual
gold predict vs actual
loss
模型加载
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import torch
import torch.nn as nn

torch.manual_seed(42)
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_size=100, num_layers=3, output_size=1):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)

def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :]) # Take output from the last time step
return out


model = LSTM().to(device)

# Load the state dictionary
model.load_state_dict(torch.load("lstm_model.pth",weights_only=True))

# Set the model to evaluation mode
model.eval()
print("Model loaded successfully!")

Update

改了改数据预处理的部分以及 LSTM 的参数,现在没有预测延迟了,蛮怪的。。

训练 & 画图 & 保存
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Check for GPU
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu") # WTF

print(f"Using device: {device}")

torch.manual_seed(42) # Set random seed

# Load the dataset
data = pd.read_csv("./bitcoin.csv")
data['date'] = pd.to_datetime(data['date'])
data.set_index('date', inplace=True)

gold = pd.read_csv("./gold.csv")
gold['date'] = pd.to_datetime(gold['date'])
gold.set_index('date', inplace=True)

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_bitcoin = scaler.fit_transform(data['value'].values.reshape(-1, 1))
scaled_gold = scaler.fit_transform(gold['value'].values.reshape(-1, 1))


# Define a custom Dataset
class LSTMDataset(Dataset):
def __init__(self, data, seq_length):
self.data = data
self.seq_length = seq_length

def __len__(self):
return len(self.data) - self.seq_length

def __getitem__(self, index):
x = self.data[index:index + self.seq_length]
y = self.data[index + self.seq_length]
return (torch.tensor(x, dtype=torch.float32).to(device),
torch.tensor(y, dtype=torch.float32).to(device))


# Hyperparameters
SEQ_LENGTH = 5
BATCH_SIZE = 512
LEARNING_RATE = 0.001
NUM_EPOCHS = 2000

# Create Dataset and DataLoaders
dataset_bitcoin = LSTMDataset(scaled_bitcoin, SEQ_LENGTH)
dataset_gold = LSTMDataset(scaled_gold, SEQ_LENGTH)

train_size = int(0.5 * len(dataset_bitcoin))
test_size = len(dataset_bitcoin) - train_size

# bitcoin
train_dataset_bitcoin, test_dataset_bitcoin = (
torch.utils.data.random_split(dataset_bitcoin, [train_size, test_size]))
train_loader_bitcoin = DataLoader(train_dataset_bitcoin, batch_size=BATCH_SIZE, shuffle=True)
test_loader_bitcoin = DataLoader(test_dataset_bitcoin, batch_size=BATCH_SIZE, shuffle=False)

# Gold
train_dataset_gold, test_dataset_gold = torch.utils.data.random_split(dataset_gold, [train_size, test_size])
train_loader_gold = DataLoader(train_dataset_gold, batch_size=BATCH_SIZE, shuffle=True)
test_loader_gold = DataLoader(test_dataset_gold, batch_size=BATCH_SIZE, shuffle=False)


# Define the LSTM model
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_size=5, num_layers=1, output_size=1):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)

def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :]) # Use the output from the last time step
return out


# Initialize the model, loss function, and optimizer
model = LSTM().to(device)
criterion = nn.SmoothL1Loss().to(device)
optimizer = torch.optim.RMSprop(model.parameters(), lr=LEARNING_RATE)

# # Load model
# model.load_state_dict(torch.load("./models/price_predict_model-1736582077.355592.pth", weights_only=True))

st = time.time()
print("Training started at:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(st)))
train_losses = []
# Training loop
for epoch in range(NUM_EPOCHS):
model.train()
total_loss = 0.0
for ((inputs, targets), (inputs2, targets2)) in zip(train_loader_bitcoin, train_loader_gold):
outputs = model(inputs)
outputs2 = model(inputs2)
loss = criterion(outputs, targets) + criterion(outputs2, targets2)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
aveLoss = total_loss / len(train_loader_bitcoin)
train_losses.append(aveLoss)
if (epoch + 1) % (NUM_EPOCHS / 10) == 0:
print(f"Epoch [{epoch + 1}/{NUM_EPOCHS}], Loss: {aveLoss}")
et = time.time()
print("Training finished at:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(et)) +
", duration:", str(et - st), "seconds")

loss_samples = []
epochId = []
for i in range(NUM_EPOCHS):
if (i + 1) % (NUM_EPOCHS / 50) == 0:
loss_samples.append(train_losses[i])
epochId.append(i + 1)

# Plotting the loss-epoch graph
plt.figure(figsize=(10, 6))
plt.plot(epochId, loss_samples, marker='o', label="Training Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training Loss vs Epochs")
plt.legend()
plt.grid(True)
plt.show()


# Evaluation
def create_sequences(data, seq_length):
sequences = []
labels = []
for i in range(len(data) - seq_length):
seq = data[i:i + seq_length]
label = data[i + seq_length]
sequences.append(seq)
labels.append(label)
return (torch.tensor(np.array(sequences), dtype=torch.float32).to(device),
torch.tensor(np.array(labels), dtype=torch.float32).to(device))


X, y = create_sequences(scaled_bitcoin, SEQ_LENGTH)
X2, y2 = create_sequences(scaled_gold, SEQ_LENGTH)

# Load model
# model.load_state_dict(torch.load("models/price_predict_model-1736582397.6991305.pth", weights_only=True))

model.eval()
with torch.no_grad():
test_pred = model(X)
test2_pred = model(X2)

test_pred = scaler.inverse_transform(test_pred.cpu().numpy())
test2_pred = scaler.inverse_transform(test2_pred.cpu().numpy())

y = scaler.inverse_transform(y.cpu().numpy())
y2 = scaler.inverse_transform(y2.cpu().numpy())

plt.figure(figsize=(14, 6))
plt.plot(y, label="Actual Price")
plt.plot(test_pred, label="Predicted Price")
plt.legend()
plt.title("Bitcoin Price Prediction")
plt.show()

plt.figure(figsize=(14, 6))
plt.plot(y2, label="Actual Price")
plt.plot(test2_pred, label="Predicted Price")
plt.legend()
plt.title("Gold Price Prediction")
plt.show()

# Calculate Mean Absolute Percentage Error
mape_bitcoin = 0.0
mape_gold = 0.0
for i, j in zip(y, test_pred):
mape_bitcoin += abs(j - i) / j
mape_bitcoin /= len(y)
for i, j in zip(y2, test2_pred):
mape_gold += abs(j - i) / j
mape_gold /= len(y2)
mape_bitcoin = mape_bitcoin[0]
mape_gold = mape_gold[0]
print("Bitcoin's MAPE:", mape_bitcoin)
print("Gold's MAPE:", mape_gold)

# Save the model
torch.save(model.state_dict(),
"./models/price_predict_model-" + str(et) + ".pth")
print("Model saved as " + "./models/price_predict_model-" + str(et) + ".pth")

不知道为什么,CPU 上跑的要比 GPU 快了,可能是这个 dataloader 是遍历开销比较大,而这个数据量比较小

可以看到,两个 MAPE 已经很小了

traning result
loss
bitcoin predict vs actual
gold predict vs actual

后来又加了 1000 个 epoch,用了小 2 个数量级的 learning rate 还是不收敛,应该是差不多了

loss