Skip to content

Commit

Permalink
[aisingapore#61] Added some docstrings and code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ktyap committed Nov 9, 2022
1 parent b3bd460 commit 022a71e
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 189 deletions.
87 changes: 9 additions & 78 deletions sgnlp/models/dialogue_rnn/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,109 +143,40 @@ def eval(cfg):
model = DialogueRNNModel.from_pretrained(pretrained_model_name_or_path=pathlib.Path(model_path).joinpath(cfg.eval_args["model_name"]), config=config)

_, _, test_loader = configure_dataloaders(dataset_path, dataset, classify, batch_size)

# if not pathlib.Path(pathlib.PurePath(output_path, 'logs')).exists():
# pathlib.Path(pathlib.PurePath(output_path, 'logs')).mkdir(parents=False, exist_ok=True)
# else:
# pass

if cfg.save_eval_res:
lf = open(pathlib.PurePath(output_path, (dataset + '_' + transformer_model + '_mode_' + transformer_mode
+ '_' + classification_model + '_' + classify + '_test.txt')), 'a')

# if not pathlib.Path(pathlib.PurePath(output_path, 'results')).exists():
# pathlib.Path(pathlib.PurePath(output_path, 'results')).mkdir(parents=False, exist_ok=True)
# else:
# pass
# rf = open(pathlib.PurePath(output_path, 'results', (dataset + '_' + transformer_model + '_mode_' + transformer_mode
# + '_' + classification_model + '_' + classify + '.txt')), 'a')

# valid_losses, valid_fscores = [], []
# test_fscores = []
# best_loss, best_label, best_pred, best_mask = None, None, None, None

start_time = time.time()

test_loss, test_acc, test_fscore, test_label, test_pred, test_mask = eval_model(model, test_loader)

# valid_losses.append(valid_loss)
# valid_fscores.append(valid_fscore)
# test_fscores.append(test_fscore)

# if best_loss == None or best_loss > test_loss:
# best_loss, best_label, best_pred, best_mask =\
# test_loss, test_label, test_pred, test_mask

x = 'test_loss {} test_acc {} test_fscore {} time {}'.\
format(test_loss, test_acc, test_fscore, round(time.time()-start_time, 2))

logger.info(x)

if cfg.save_eval_res:
lf.write(x + '\n')

# valid_fscores = np.array(valid_fscores).transpose()
# test_fscores = np.array(test_fscores).transpose()

# print('Test performance.')
# if dataset == 'dailydialog' and classify =='emotion':
# score1 = test_fscores[0][np.argmin(valid_losses)]
# score2 = test_fscores[0][np.argmax(valid_fscores[0])]
# score3 = test_fscores[1][np.argmin(valid_losses)]
# score4 = test_fscores[1][np.argmax(valid_fscores[1])]
# score5 = test_fscores[2][np.argmin(valid_losses)]
# score6 = test_fscores[2][np.argmax(valid_fscores[2])]
# score7 = test_fscores[3][np.argmin(valid_losses)]
# score8 = test_fscores[3][np.argmax(valid_fscores[3])]
# score9 = test_fscores[4][np.argmin(valid_losses)]
# score10 = test_fscores[4][np.argmax(valid_fscores[4])]
# score11 = test_fscores[5][np.argmin(valid_losses)]
# score12 = test_fscores[5][np.argmax(valid_fscores[5])]

# scores = [score1, score2, score3, score4, score5, score6,
# score7, score8, score9, score10, score11, score12]
# scores_val_loss = [score1, score3, score5, score7, score9, score11]
# scores_val_f1 = [score2, score4, score6, score8, score10, score12]

# print ('Scores: Weighted, Weighted w/o Neutral, Micro, Micro w/o Neutral, Macro, Macro w/o Neutral')
# print('F1@Best Valid Loss: {}'.format(scores_val_loss))
# print('F1@Best Valid F1: {}'.format(scores_val_f1))

# elif (dataset=='dailydialog' and classify=='act') or (dataset=='persuasion'):
# score1 = test_fscores[0][np.argmin(valid_losses)]
# score2 = test_fscores[0][np.argmax(valid_fscores[0])]
# score3 = test_fscores[1][np.argmin(valid_losses)]
# score4 = test_fscores[1][np.argmax(valid_fscores[1])]
# score5 = test_fscores[2][np.argmin(valid_losses)]
# score6 = test_fscores[2][np.argmax(valid_fscores[2])]

# scores = [score1, score2, score3, score4, score5, score6]
# scores_val_loss = [score1, score3, score5]
# scores_val_f1 = [score2, score4, score6]

# print ('Scores: Weighted, Micro, Macro')
# print('F1@Best Valid Loss: {}'.format(scores_val_loss))
# print('F1@Best Valid F1: {}'.format(scores_val_f1))

# else:
# score1 = test_fscores[0][np.argmin(valid_losses)]
# score2 = test_fscores[0][np.argmax(valid_fscores[0])]
# scores = [score1, score2]
# print('F1@Best Valid Loss: {}; F1@Best Valid F1: {}'.format(score1, score2))

# scores = [str(item) for item in scores]

# rf.write('\t'.join(scores) + '\t' + str(cfg) + '\n')

if cfg.save_eval_res:
lf.write(str(cfg) + '\n')
lf.write('Test F1: {}'.format(test_fscore))
lf.write('\n' + str(classification_report(test_label, test_pred, sample_weight=test_mask, digits=4)) + '\n')
lf.write(str(confusion_matrix(test_label, test_pred, sample_weight=test_mask)) + '\n')
lf.write('-'*50 + '\n\n')
#rf.close()
lf.close()

if __name__ == "__main__":
"""Calls the eval method with a pretrained model using test sets.
Example::
To run with default parameters:
python -m eval
To run with custom training config:
python -m train --config config/dialogueRNN_config.json
"""
cfg = parse_args_and_load_config()
eval(cfg)

16 changes: 5 additions & 11 deletions sgnlp/models/dialogue_rnn/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,30 +34,24 @@ class DialogueRNNPreTrainedModel(PreTrainedModel):
config_class = DialogueRNNConfig
base_model_prefix = "dialogueRNN"

def _init_weights(self, module: nn.Module) -> None:
"""
Initialize the weights
"""
pass

class DialogueRNNModel(DialogueRNNPreTrainedModel):
"""TODO The Latent Structure Refinement Model performs relation classification on all pairs of entity clusters.
"""The Dialogue RNN Model performs emotion recognition in conversations.
This model is also a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`__
subclass. Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to
general usage and behavior.
Args:
config (:class:`~sgnlp.models.DialogueRNN.config.DialogueRNNConfig`):
config (:class:`~sgnlp.models.dialogue_rnn.config.DialogueRNNConfig`):
Model configuration class with all the parameters of the model. Initializing with a config file does not
load the weights associated with the model, only the configuration.
Use the :obj:`.from_pretrained` method to load the model weights.
Example::
from sgnlp.models.bieru import DialogueRNNModel, DialogueRNNConfig
from sgnlp.models.dialogue_rnn import DialogueRNNModel, DialogueRNNConfig
# Method 1: Loading a default model
config = DialogueRNNConfig()
model = DialogueRNNModel(config)
# Method 2: Loading from pretrained
TODO config = DialogueRNNConfig.from_pretrained('https://storage.googleapis.com/sgnlp/models/lsr/config.json')
TODO model = DialogueRNNModel.from_pretrained('https://storage.googleapis.com/sgnlp/models/lsr/pytorch_model.bin',
config = DialogueRNNConfig.from_pretrained('https://storage.googleapis.com/sgnlp/models/dialogue_rnn/config.json')
model = DialogueRNNModel.from_pretrained('https://storage.googleapis.com/sgnlp/models/dialogue_rnn/pytorch_model.bin',
config=config)
"""
def __init__(self, config: DialogueRNNConfig) -> None:
Expand Down
23 changes: 11 additions & 12 deletions sgnlp/models/dialogue_rnn/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,21 @@


class DialogueRNNPostprocessor:
"""This class processes :class:`~sgnlp.models.lsr.modeling.LsrModelOutput` to a readable format.
"""This class processes :class:`~sgnlp.models.dialogue_rnn.modeling.DialogueRNNModelOutput` to a readable format.
"""
def __init__(self, dataset="iemocap", classify="emotion") -> None:
if dataset == "iemocap":
if classify == "emotion":
self.label_index_map = {
0: "Happy",
1: "Sad",
2: "Neutral",
3: "Angry",
4: "Excited",
5: "Frustrated"
}
if dataset == "iemocap" and classify == "emotion":
self.label_index_map = {
0: "Happy",
1: "Sad",
2: "Neutral",
3: "Angry",
4: "Excited",
5: "Frustrated"
}
else:
raise ValueError("'dataset' and 'classify' must be defined")
raise ValueError("'dataset' and 'classify' must be 'iemocap' and 'emotion' respectively")

def __call__(self, preds) -> Any:
raw_preds = preds.prediction.detach().numpy()
Expand Down
10 changes: 1 addition & 9 deletions sgnlp/models/dialogue_rnn/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,13 @@
class DialogueRNNPreprocessor:
"""Class to initialise the Preprocessor for DialogueRNNModel.
Preprocesses inputs and tokenises them so they can be used with DialogueRNNModel.
Args:
Returns:
features:
lengths:
umask:
qmask:
"""
def __init__(self, transformer_model_family, transformer_model, tokenizer=None):
self.transformer_model_family = transformer_model_family
self.model = transformer_model
self.tokenizer = tokenizer

def __call__(self, conversations, speaker_mask): #loss_mask, speaker_mask):
def __call__(self, conversations, speaker_mask):
# create umask and qmasks
lengths = [len(item) for item in conversations]

Expand Down
97 changes: 18 additions & 79 deletions sgnlp/models/dialogue_rnn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,15 @@ def configure_optimizers(model, weight_decay, learning_rate, adam_epsilon):
return optimizer


def train_or_eval_model(model, dataloader, loss_function=None, optimizer=None, train=False):
def train_model(model, dataloader, loss_function, optimizer=None, train=False):
"""Run training and evaluation using training and validation sets.
Args:
model (_type_): _description_
dataloader (_type_): _description_
loss_function (_type_, optional): _description_. Defaults to None.
optimizer (_type_, optional): _description_. Defaults to None.
train (bool, optional): _description_. Defaults to False.
Returns:
_type_: _description_
model (DialogueRNNModel): DialogueRNNModel
dataloader (DataLoader): Dataloader for IEMOCAP dataset
loss_function (MaskedNLLLoss): MaskedNLLLoss
optimizer (AdamW, optional): Optimizer
train (bool, optional): Train if True. Defaults to False.
"""
losses, preds, labels, masks = [], [], [], []
assert not train or optimizer!=None
Expand Down Expand Up @@ -195,36 +192,22 @@ def train(cfg):
optimizer = configure_optimizers(model, cfg.train_args["weight_decay"], cfg.train_args["lr"], cfg.train_args["adam_epsilon"])
train_loader, valid_loader, test_loader = configure_dataloaders(dataset_path, dataset, classify, batch_size)

# if not pathlib.Path(pathlib.PurePath(output_path, 'logs')).exists():
# pathlib.Path(pathlib.PurePath(output_path, 'logs')).mkdir(parents=False, exist_ok=True)
# else:
# pass
if cfg.save_train_res:
lf = open(pathlib.PurePath(output_path, (dataset + '_' + transformer_model + '_mode_' + transformer_mode
+ '_' + classification_model + '_' + classify + '_train.txt')), 'a')

# if not pathlib.Path(pathlib.PurePath(output_path, 'results')).exists():
# pathlib.Path(pathlib.PurePath(output_path, 'results')).mkdir(parents=False, exist_ok=True)
# else:
# pass
# rf = open(pathlib.PurePath(output_path, 'results', (dataset + '_' + transformer_model + '_mode_' + transformer_mode
# + '_' + classification_model + '_' + classify + '.txt')), 'a')

valid_losses, valid_fscores = [], []
# test_fscores = []
best_loss, best_label, best_pred, best_mask, best_fscore = None, None, None, None, None

for e in range(n_epochs):
start_time = time.time()
train_loss, train_acc, train_fscore, _, _, _ = train_or_eval_model(model, train_loader, loss_function, optimizer, True)
train_loss, train_acc, train_fscore, _, _, _ = train_model(model, train_loader, loss_function, optimizer, True)

valid_loss, valid_acc, valid_fscore, valid_label, valid_pred, valid_mask = train_or_eval_model(model, valid_loader, loss_function)

# test_loss, test_acc, test_fscore, test_label, test_pred, test_mask = train_or_eval_model(model, test_loader, loss_function)
valid_loss, valid_acc, valid_fscore, valid_label, valid_pred, valid_mask = train_model(model, valid_loader, loss_function)

valid_losses.append(valid_loss)
valid_fscores.append(valid_fscore)
# test_fscores.append(test_fscore)

# Save model based on best fscore for validation set
if best_fscore == None or best_fscore > valid_fscore:
Expand All @@ -247,69 +230,25 @@ def train(cfg):

if cfg.save_train_res:
lf.write(x + '\n')

# valid_fscores = np.array(valid_fscores).transpose()
# test_fscores = np.array(test_fscores).transpose()

# print('Test performance.')
# if dataset == 'dailydialog' and classify =='emotion':
# score1 = test_fscores[0][np.argmin(valid_losses)]
# score2 = test_fscores[0][np.argmax(valid_fscores[0])]
# score3 = test_fscores[1][np.argmin(valid_losses)]
# score4 = test_fscores[1][np.argmax(valid_fscores[1])]
# score5 = test_fscores[2][np.argmin(valid_losses)]
# score6 = test_fscores[2][np.argmax(valid_fscores[2])]
# score7 = test_fscores[3][np.argmin(valid_losses)]
# score8 = test_fscores[3][np.argmax(valid_fscores[3])]
# score9 = test_fscores[4][np.argmin(valid_losses)]
# score10 = test_fscores[4][np.argmax(valid_fscores[4])]
# score11 = test_fscores[5][np.argmin(valid_losses)]
# score12 = test_fscores[5][np.argmax(valid_fscores[5])]

# scores = [score1, score2, score3, score4, score5, score6,
# score7, score8, score9, score10, score11, score12]
# scores_val_loss = [score1, score3, score5, score7, score9, score11]
# scores_val_f1 = [score2, score4, score6, score8, score10, score12]

# print ('Scores: Weighted, Weighted w/o Neutral, Micro, Micro w/o Neutral, Macro, Macro w/o Neutral')
# print('F1@Best Valid Loss: {}'.format(scores_val_loss))
# print('F1@Best Valid F1: {}'.format(scores_val_f1))

# elif (dataset=='dailydialog' and classify=='act') or (dataset=='persuasion'):
# score1 = test_fscores[0][np.argmin(valid_losses)]
# score2 = test_fscores[0][np.argmax(valid_fscores[0])]
# score3 = test_fscores[1][np.argmin(valid_losses)]
# score4 = test_fscores[1][np.argmax(valid_fscores[1])]
# score5 = test_fscores[2][np.argmin(valid_losses)]
# score6 = test_fscores[2][np.argmax(valid_fscores[2])]

# scores = [score1, score2, score3, score4, score5, score6]
# scores_val_loss = [score1, score3, score5]
# scores_val_f1 = [score2, score4, score6]

# print ('Scores: Weighted, Micro, Macro')
# print('F1@Best Valid Loss: {}'.format(scores_val_loss))
# print('F1@Best Valid F1: {}'.format(scores_val_f1))

# else:
# score1 = test_fscores[0][np.argmin(valid_losses)]
# score2 = test_fscores[0][np.argmax(valid_fscores[0])]
# scores = [score1, score2]
# print('F1@Best Valid Loss: {}; F1@Best Valid F1: {}'.format(score1, score2))

# scores = [str(item) for item in scores]

# rf.write('\t'.join(scores) + '\t' + str(cfg) + '\n')

if cfg.save_train_res:
lf.write(str(cfg) + '\n')
lf.write('Best Valid F1: {}'.format(best_fscore))
lf.write('\n' + str(classification_report(best_label, best_pred, sample_weight=best_mask, digits=4)) + '\n')
lf.write('\n' + 'Confusion matrix (valid)' + '\n')
lf.write(str(confusion_matrix(best_label, best_pred, sample_weight=best_mask)) + '\n')
lf.write('-'*50 + '\n\n')
#rf.close()
lf.close()

if __name__ == "__main__":
"""Calls the train method using training and validation sets.
Example::
To run with default parameters:
python -m train
To run with custom training config:
python -m train --config config/dialogueRNN_config.json
"""
cfg = parse_args_and_load_config()
train(cfg)

0 comments on commit 022a71e

Please sign in to comment.