Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pie/models/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def forward(self, dec_out, enc_outs, lengths):

# apply source length mask
mask = torch_utils.make_length_mask(lengths)

# (batch x src_seq_len) => (trg_seq_len x batch x src_seq_len)
mask = mask.unsqueeze(0).expand_as(weights)
# weights = weights * mask.float()
Expand Down
111 changes: 90 additions & 21 deletions pie/models/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ def loss(self, logits, targets):
return loss

def predict_max(self, enc_outs, lengths,
max_seq_len=20, bos=None, eos=None,
context=None):
max_seq_len=20, bos=None, eos=None,
context=None):
"""
Decoding routine for inference with step-wise argmax procedure

Expand All @@ -328,37 +328,106 @@ def predict_max(self, enc_outs, lengths,
eos = eos or self.label_encoder.get_eos()
bos = bos or self.label_encoder.get_bos()
hidden, batch, device = None, enc_outs.size(1), enc_outs.device
mask = torch.ones(batch, dtype=torch.int64, device=device)
inp = torch.zeros(batch, dtype=torch.int64, device=device) + bos
hyps, scores = [], 0
hyps = []
final_scores = torch.tensor([0 for _ in range(batch)], dtype=torch.float64, device="cpu")

# As we go, we'll reduce the tensor size by popping finished prediction
# To keep adding new characters to the right words, we
# store and keep updated a Tensor where Tensor Index -> Batch Original ID
# where Batch Original ID is the Word ID (batch_size = number of words)
tensor_to_original_batch_indexes = torch.tensor(
list(range(batch)),
dtype=torch.int64,
device=device
) # Tensor(batch_size)

for _ in range(max_seq_len):
if mask.sum().item() == 0:
break

# prepare input
emb = self.embs(inp)
# Prepare input
# Context is NEVER changed after the method has been called
emb = self.embs(inp) # Tensor(batch_size x emb_size)
if context is not None:
emb = torch.cat([emb, context], dim=1)
# run rnn
emb = emb.unsqueeze(0)
emb = torch.cat([emb, context], dim=1) # Tensor(batch_size x (emb_size+context_size))

# Run rnn
emb = emb.unsqueeze(0) # Tensor(1 x batch_size x emb size(+context))

# hidden is gonna be reused by the next iteration
# outs is specific to the current run
outs, hidden = self.rnn(emb, hidden)
# Hidden : Tensor(1 x batch_size x emb_size)

outs, _ = self.attn(outs, enc_outs, lengths)
outs = self.proj(outs).squeeze(0)
# get logits

# Get logits
probs = F.log_softmax(outs, dim=1)
# sample and accumulate
score, inp = probs.max(1)
hyps.append(inp.tolist())
mask = mask * (inp != eos).long()
score = score.cpu()
score[mask == 0] = 0
scores += score

# Sample and accumulate
# Score are the probabilities
# Inp are the new characters (as int) we are adding to our predictions
score, inp = probs.max(1) # (Tensor(batch_size, dtype=float), Tensor(batch_size, dtype=int))

# We create a mask of value that are not ending the string
non_eos = (inp != eos) # Tensor(batch_size, dtype=bool)

# Using this mask, we retrieve the Indexes of items that are not EOS
# nonzero() returns a 2D Tensor where each row is an index
# not equal to 0. It can be use as a (mask) selector for other tensors (see below)
keep = torch.nonzero(non_eos).squeeze(1) # Tensor(dtype=int)

# We prepare a sequence output made of EOS which we'll fill with predictions
# torch.full() takes size as tuple for first argument, filling value as second
seq_output = torch.full((batch, ), eos, device=device, dtype=torch.int64)

# We replace the value at indexes *tensor_to_original_batch_indexes* by the prediction
# of current sequence output
seq_output[tensor_to_original_batch_indexes] = inp

# We set the score where we have EOS predictions as 0
score[inp == eos] = 0
# So that we can add the score to finale scores
final_scores[tensor_to_original_batch_indexes] += score.cpu()

# We add this new output to the final hypothesis
hyps.append(seq_output.tolist())

# If there nothing else than EOS, it's the end of the prediction time
if non_eos.sum() == 0:
break

# Otherwise, we update the tensor_to_batch_indexes by transferring
# the current associated index with the new indexes
tensor_to_original_batch_indexes = tensor_to_original_batch_indexes[keep]

# We use the Tensor of indexes that are not EOS to filter out
# Elements of the batch that are EOS.
# inp, context, lengths are all Tensor(batch_size x ....)
# so we filter them at the first dimension
inp = inp[keep]
context = context[keep]
lengths = lengths[keep]

# However, hidden is 3D (Tensor(1 x batch_size x _)
# So we filter at the second dimension directly
if isinstance(hidden, tuple): # LSTM
hidden = tuple([hid[:, keep, :] for hid in hidden])
else: # GRU
hidden = hidden[:, keep, :]

# enc_outs is Tensor(max_seq_len x batch x hidden_size)
# Seq_len is supposed to be equal to max(lengths),
# but if the maximum length is popped, it is not in sync anymore.
# In order to keep wording, we remove extra dimension if lengths.max() has changed.
# We then update the first (max_seq_len) and second (batch_size) dimensions accordingly.
max_seq_len = lengths.max()
enc_outs = enc_outs[:max_seq_len, keep, :]

hyps = [self.label_encoder.stringify(hyp) for hyp in zip(*hyps)]
scores = [s/(len(hyp) + TINY) for s, hyp in zip(scores.tolist(), hyps)]
final_scores = [s / (len(hyp) + TINY) for s, hyp in zip(final_scores, hyps)]

return hyps, scores
return hyps, final_scores

def predict_beam(self, enc_outs, lengths,
max_seq_len=50, width=12, eos=None, bos=None,
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
here = os.path.abspath(os.path.dirname(__file__))

# Package meta-data.
NAME = 'nlp-pie'
DESCRIPTION = 'A Framework for Joint Learning of Sequence Labeling Tasks'
URL = 'https://github.com/emanjavacas/pie'
NAME = 'PaPie'
DESCRIPTION = 'A Framework for Joint Learning of Sequence Labeling Tasks, forked from Pie'
URL = 'https://github.com/lascivaroma/PaPie'
AUTHOR = 'Enrique Manjavacas; Mike Kestemont; Thibault Clerice'
REQUIRES_PYTHON = '>=3.6.0'
VERSION = "0.3.7c"
VERSION = "0.3.8"

# What packages are required for this module to be executed?

Expand Down