Python使用PyTorch动手实现LSTM模块
LSTM(长短时记忆网络)是一种常用的循环神经网络,它可以用于处理序列数据。在本文中,我们将介绍如何使用PyTorch实现LSTM模块,并提供两个示例说明。
示例1:使用LSTM模块实现字符级语言模型
以下是一个使用LSTM模块实现字符级语言模型的示例代码:
import torch
import torch.nn as nn
import numpy as np
# Define character-level language model
class CharLSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(CharLSTM, self).__init__()
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden):
out, hidden = self.lstm(x, hidden)
out = self.fc(out)
return out, hidden
def init_hidden(self, batch_size):
return (torch.zeros(1, batch_size, self.hidden_size),
torch.zeros(1, batch_size, self.hidden_size))
# Load data
with open('data.txt', 'r') as f:
data = f.read()
# Create character to index mapping
chars = list(set(data))
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}
# Convert data to tensor
data_idx = [char_to_idx[ch] for ch in data]
data_tensor = torch.from_numpy(np.array(data_idx)).long()
# Define hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
seq_length = 100
num_epochs = 100
batch_size = 64
learning_rate = 0.01
# Define model, loss function, and optimizer
model = CharLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train model
for epoch in range(num_epochs):
hidden = model.init_hidden(batch_size)
for i in range(0, data_tensor.size(0) - seq_length, seq_length):
inputs = data_tensor[i:i+seq_length].view(seq_length, batch_size)
targets = data_tensor[i+1:i+seq_length+1].view(seq_length, batch_size)
inputs = torch.nn.functional.one_hot(inputs, num_classes=input_size).float()
targets = targets.view(-1)
optimizer.zero_grad()
outputs, hidden = model(inputs, hidden)
loss = criterion(outputs.view(-1, output_size), targets)
loss.backward()
optimizer.step()
if (epoch+1) % 10 == 0:
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
# Generate text
with torch.no_grad():
hidden = model.init_hidden(1)
input = torch.nn.functional.one_hot(torch.randint(input_size, (1, 1)), num_classes=input_size).float()
text = idx_to_char[input.argmax().item()]
for i in range(1000):
output, hidden = model(input.view(1, 1, -1), hidden)
output = torch.softmax(output.view(-1), dim=0)
input = torch.nn.functional.one_hot(output.argmax().unsqueeze(0), num_classes=input_size).float()
text += idx_to_char[input.argmax().item()]
print(text)
在这个示例中,我们首先加载数据,并将字符映射到索引。然后,我们定义了一个CharLSTM类来实现字符级语言模型。在训练过程中,我们使用交叉熵损失函数和Adam优化器来训练模型。在训练完成后,我们使用训练好的模型生成一些文本。
示例2:使用LSTM模块实现时间序列预测
以下是一个使用LSTM模块实现时间序列预测的示例代码:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
# Generate data
np.random.seed(42)
t = np.arange(0, 100, 0.1)
x = np.sin(t) + np.random.randn(len(t)) * 0.1
# Convert data to tensors
x = torch.from_numpy(x).float()
# Define LSTM model
class LSTMPredictor(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(LSTMPredictor, self).__init__()
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden):
out, hidden = self.lstm(x, hidden)
out = self.fc(out)
return out, hidden
def init_hidden(self):
return (torch.zeros(1, 1, self.hidden_size),
torch.zeros(1, 1, self.hidden_size))
# Define hyperparameters
input_size = 1
hidden_size = 32
output_size = 1
seq_length = 10
num_epochs = 100
learning_rate = 0.01
# Define model, loss function, and optimizer
model = LSTMPredictor(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train model
for epoch in range(num_epochs):
hidden = model.init_hidden()
for i in range(0, x.size(0) - seq_length, seq_length):
inputs = x[i:i+seq_length].view(seq_length, 1, input_size)
targets = x[i+1:i+seq_length+1].view(seq_length, 1, output_size)
optimizer.zero_grad()
outputs, hidden = model(inputs, hidden)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
if (epoch+1) % 10 == 0:
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
# Test model
with torch.no_grad():
hidden = model.init_hidden()
y_pred = []
for i in range(0, x.size(0) - seq_length, seq_length):
inputs = x[i:i+seq_length].view(seq_length, 1, input_size)
outputs, hidden = model(inputs, hidden)
y_pred.append(outputs[-1].item())
# Plot results
plt.plot(t, x.numpy(), label='True')
plt.plot(t[seq_length::seq_length], y_pred, label='Predicted')
plt.legend()
plt.show()
在这个示例中,我们首先生成了一些随机数据,并将其转换为PyTorch张量。然后,我们定义了一个LSTMPredictor类来实现时间序列预测模型。在训练过程中,我们使用均方误差损失函数和Adam优化器来训练模型。在测试过程中,我们使用训练好的模型对数据进行预测,并将结果可视化。
总结
在本文中,我们介绍了如何使用PyTorch实现LSTM模块,并提供了两个示例说明。这些技术对于在深度学习中处理序列数据非常有用。
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:Python使用pytorch动手实现LSTM模块 - Python技术站