[aisingapore#61] Changed dict output from preprocessor and unpack as …

…model input
ktyap · Nov 10, 2022 · 2cb0530 · 2cb0530
1 parent 295c066
commit 2cb0530
Show file tree

Hide file tree

Showing 6 changed files with 74 additions and 37 deletions.
diff --git a/sgnlp/models/dialogue_rnn/eval.py b/sgnlp/models/dialogue_rnn/eval.py
@@ -16,7 +16,6 @@
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
-
 def eval_model(model, dataloader, no_cuda=True):
 
     losses, preds, labels, masks = [], [], [], []
@@ -28,7 +27,7 @@ def eval_model(model, dataloader, no_cuda=True):
 
     for conversations, label, loss_mask, speaker_mask in tqdm(dataloader, leave=False):
 
-        features, lengths, umask, qmask = preprocessor(conversations, speaker_mask)
+        tensor_dict = preprocessor(conversations, speaker_mask)
 
         # create labels and mask
         if no_cuda:
@@ -47,7 +46,11 @@ def eval_model(model, dataloader, no_cuda=True):
         labels_ = label.view(-1) 
 
         # obtain log probabilities
-        output = model(features, lengths, umask, qmask, None, None, None, no_cuda)
+        tensor_dict['loss_function'] = None
+        tensor_dict['loss_mask'] = None
+        tensor_dict['label'] = None
+        tensor_dict['no_cuda'] = no_cuda
+        output = model(**tensor_dict)
         pred_ = output.prediction
 
         preds.append(pred_.data.cpu().numpy())

diff --git a/sgnlp/models/dialogue_rnn/model_usage.py b/sgnlp/models/dialogue_rnn/model_usage.py
@@ -16,13 +16,13 @@
 
 # preprocessor = DialogueRNNPreprocessor(model.transformer_model_family, model.model, model.tokenizer)
 # To force the use of CPU instead of GPU
-preprocessor = DialogueRNNPreprocessor(model.transformer_model_family, model.model, model.tokenizer, False)
+preprocessor = DialogueRNNPreprocessor(model.transformer_model_family, model.model, model.tokenizer, True)
 
 postprocessor = DialogueRNNPostprocessor()
 
 # conversations, speaker_mask
-input_batch = (
-    [
+input_batch = {
+    'conversations': [
         ["Hello, how is your day?",
             "It's not been great.",
             "What happened?",
@@ -36,7 +36,7 @@
             "Oh no. But don't worry, I am sure you will land a great offer soon!",
         ]
     ],
-    [
+    'speaker_mask' : [
         [1,
         0,
         1,
@@ -50,10 +50,14 @@
         1
         ]
     ]
-)
+}
 
-features, lengths, umask, qmask = preprocessor(input_batch[0], input_batch[1])
+tensor_dict = preprocessor(**input_batch)
+
+# output = model(**tensor_dict)
+# To force the use of CPU instead of GPU
+tensor_dict['no_cuda'] = True
+output = model(**tensor_dict)
 
-output = model(features, lengths, umask, qmask, None, None, None, False)
 predictions = postprocessor(output)
 print(predictions)
diff --git a/sgnlp/models/dialogue_rnn/modeling.py b/sgnlp/models/dialogue_rnn/modeling.py
@@ -178,16 +178,28 @@ def forward(
         loss_function=None,
         loss_mask=None,
         label=None,
-        no_cuda=True
+        no_cuda=None
     ):
+        if no_cuda is None:
+            if torch.cuda.is_available():
+                no_cuda = False
+            else:
+                no_cuda = True
 
-        start = torch.cumsum(torch.cat((lengths.data.new(1).zero_(), lengths[:-1])), 0)
-
         if no_cuda:
-            features = torch.stack([self.pad(features.narrow(0, s, l), max(lengths))
-                                for s, l in zip(start.data.tolist(), lengths.data.tolist())], 0).transpose(0, 1)
+            self.model.to('cpu')
         else:
-            features = torch.stack([self.pad(features.narrow(0, s, l).cuda(), max(lengths))
+            self.model.to('cuda')
+
+        start = torch.cumsum(torch.cat((lengths.data.new(1).zero_(), lengths[:-1])), 0)
+
+        # if no_cuda:
+        #     features = torch.stack([self.pad(features.narrow(0, s, l), max(lengths))
+        #                         for s, l in zip(start.data.tolist(), lengths.data.tolist())], 0).transpose(0, 1)
+        # else:
+        #     features = torch.stack([self.pad(features.narrow(0, s, l).cuda(), max(lengths))
+        #                         for s, l in zip(start.data.tolist(), lengths.data.tolist())], 0).transpose(0, 1)
+        features = torch.stack([self.pad(features.narrow(0, s, l), max(lengths))
                                 for s, l in zip(start.data.tolist(), lengths.data.tolist())], 0).transpose(0, 1)
 
         if no_cuda:
@@ -235,12 +247,25 @@ def forward(
             hidden = self.dropout_rec(hidden)
 
             if self.residual:
-                if features.is_cuda:
+                # features = self.fc(features)
+                # features = hidden + features
+                if not no_cuda:
                     self.fc.cuda()
-                features = self.fc(features)
-                features = hidden + features   
+                    features = self.fc(features)
+                    features = hidden + features
+                    features.cuda()
+                else:
+                    self.fc.cpu()
+                    features = self.fc(features)
+                    features = hidden + features
+                    features.cpu()
             else:
-                features = hidden  
+                features = hidden
+            # if not no_cuda:
+            #     features = features * mask
+            #     features.cuda()
+            # else:
+            #     features = features * mask
             features = features * mask
 
             if self.attention:

diff --git a/sgnlp/models/dialogue_rnn/preprocess.py b/sgnlp/models/dialogue_rnn/preprocess.py
@@ -56,5 +56,8 @@ def __call__(self, conversations, speaker_mask):
             if self.transformer_model_family == 'roberta':
                 features = features[:, 0, :]
 
-        return features, lengths, umask, qmask
+        return {'features': features,
+                'lengths': lengths,
+                'umask': umask,
+                'qmask': qmask}
 
diff --git a/sgnlp/models/dialogue_rnn/train.py b/sgnlp/models/dialogue_rnn/train.py
@@ -63,7 +63,7 @@ def train_model(model, dataloader, loss_function, optimizer=None, train=False, n
         if train:
             optimizer.zero_grad()
 
-        features, lengths, umask, qmask = preprocessor(conversations, speaker_mask)
+        tensor_dict = preprocessor(conversations, speaker_mask)
 
         # create labels and mask
         if no_cuda:
@@ -83,7 +83,11 @@ def train_model(model, dataloader, loss_function, optimizer=None, train=False, n
 
 
         # obtain log probabilities
-        output = model(features, lengths, umask, qmask, loss_function, loss_mask, labels_, no_cuda)
+        tensor_dict['loss_function'] = loss_function
+        tensor_dict['loss_mask'] = loss_mask
+        tensor_dict['label'] = labels_
+        tensor_dict['no_cuda'] = no_cuda
+        output = model(**tensor_dict)
         loss, pred_ = output.loss, output.prediction
 
 

diff --git a/sgnlp/models/dialogue_rnn/usage.py b/sgnlp/models/dialogue_rnn/usage.py
@@ -5,24 +5,18 @@
 from sgnlp.models.dialogue_rnn.postprocess import DialogueRNNPostprocessor
 
 
-# model_path = pathlib.Path(__file__).resolve().parents[0].joinpath("bak")
-# model_path = pathlib.PurePath(model_path, "model")
-# print(model_path)
-
-#config = DialogueRNNConfig.from_pretrained(pathlib.Path(model_path).joinpath("config.json"))
-#model = DialogueRNNModel.from_pretrained(pathlib.Path(model_path).joinpath("pytorch_model.bin"), config=config)
 config = DialogueRNNConfig.from_pretrained("https://storage.googleapis.com/sgnlp/models/dialogue_rnn/config.json")
 model = DialogueRNNModel.from_pretrained("https://storage.googleapis.com/sgnlp/models/dialogue_rnn/pytorch_model.bin", config=config)
 
 # preprocessor = DialogueRNNPreprocessor(model.transformer_model_family, model.model, model.tokenizer)
 # To force the use of CPU instead of GPU
-preprocessor = DialogueRNNPreprocessor(model.transformer_model_family, model.model, model.tokenizer, False)
+preprocessor = DialogueRNNPreprocessor(model.transformer_model_family, model.model, model.tokenizer, True)
 
 postprocessor = DialogueRNNPostprocessor()
 
 # conversations, speaker_mask
-input_batch = (
-    [
+input_batch = {
+    'conversations' : [
         ["Hello, how is your day?",
             "It's not been great.",
             "What happened?",
@@ -33,10 +27,10 @@
             "Hope that you will find it soon.",
             "Was the appointment important?",
             "It was a job interview and I didn't get the job.",
-            "Oh no. But don't worry, I am sure you will land a great offer soon!",
+            "Oh no. But don't worry, I am sure you will come across another great opportunity soon!",
         ]
     ],
-    [
+    'speaker_mask' : [
         [1,
         0,
         1,
@@ -50,10 +44,14 @@
         1
         ]
     ]
-)
+}
+
+tensor_dict = preprocessor(**input_batch)
 
-features, lengths, umask, qmask = preprocessor(input_batch[0], input_batch[1])
+# output = model(**tensor_dict)
+# To force the use of CPU instead of GPU
+tensor_dict['no_cuda'] = True
+output = model(**tensor_dict)
 
-output = model(features, lengths, umask, qmask, None, None, None, False)
 predictions = postprocessor(output)
 print(predictions)