-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
executable file
·91 lines (68 loc) · 3.62 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#################################################################
# the following code is from https://github.com/mcleonard/NLG_Autoencoder/blob/master/model.py
# the code has some minor modifications.
# code originally licensed by Mat Leonard under the MIT License
#################################################################
import torch
from torch import nn
import torch.nn.functional as F
class Encoder(nn.Module):
""" Sequence to sequence bidirectional LTSM encoder network """
def __init__(self, vocab_size, embedding_size=300, hidden_size=256,
num_layers=2, drop_p=0.5):
super().__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.embedding = nn.Embedding(vocab_size, embedding_size)
self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers=num_layers,
dropout=drop_p, bidirectional=True)
def forward(self, input, hidden):
embedded = self.embedding(input)
output, hidden = self.lstm(embedded, hidden)
return output, hidden
def init_hidden(self, device='cpu'):
""" Create two tensors with shape (num_layers * num_directions, batch, hidden_size)
for the hidden state and cell state
"""
h_0, c_0 = torch.zeros(2, 2*self.num_layers, 1, self.hidden_size, device=device)
return h_0, c_0
class Decoder(nn.Module):
def __init__(self, vocab_size, embedding_size=300, hidden_size=256,
num_layers=2, drop_p=0.1, max_length=50):
super().__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.max_length = max_length
self.embedding = nn.Embedding(vocab_size, embedding_size)
self.attn = nn.Linear(self.hidden_size + embedding_size, self.max_length)
self.attn_combine = nn.Linear(self.hidden_size * 2 + embedding_size, self.hidden_size)
self.dropout = nn.Dropout(drop_p)
self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=num_layers,
dropout=drop_p, bidirectional=True)
self.out = nn.Linear(2 * hidden_size, vocab_size)
self.log_softmax = nn.LogSoftmax(dim=1)
def forward(self, input, hidden, encoder_outputs):
embedded = self.embedding(input)
embedded = self.dropout(embedded)
# Learns the attention vector (a probability distribution) here for weighting
# encoder outputs based on the decoder input and encoder hidden vector
attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0][0]), 1)), dim=1)
# Applies the attention vector (again, a probability distribution) to the encoder
# outputs which weight the encoder_outputs
attn_applied = torch.bmm(attn_weights.unsqueeze(0),
encoder_outputs.unsqueeze(0))
# Now the decoder input is combined with the weighted encoder_outputs and
# passed through a linear transformation as input to the LSTM layer
output = torch.cat((embedded[0], attn_applied[0]), 1)
output = self.attn_combine(output).unsqueeze(0)
output = F.relu(output)
output, hidden = self.lstm(output, hidden)
output = self.out(output).view(1, -1)
output = self.log_softmax(output)
return output, hidden, attn_weights
def init_hidden(self, device='cpu'):
""" Create two tensors with shape (num_layers * num_directions, batch, hidden_size)
for the hidden state and cell state
"""
h_0, c_0 = torch.zeros(2, 2*self.num_layers, 1, self.hidden_size, device=device)
return h_0, c_0