From 0b6b0ee9ba9a965d720cb871dd3d1586bf1d47c1 Mon Sep 17 00:00:00 2001 From: lohithreddyk Date: Thu, 10 Nov 2022 16:58:56 -0600 Subject: [PATCH 1/3] Updated pseudo-ref function as metrics --- env.py | 10 +++------- pseudo_func.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 pseudo_func.py diff --git a/env.py b/env.py index c18dd3b..df9bc67 100644 --- a/env.py +++ b/env.py @@ -1,11 +1,6 @@ import functools import evaluate - -# fix: GPU OOM (TF exhausts GPU memory, crashing PyTorch) -import tensorflow as tf -gpus = tf.config.experimental.list_physical_devices('GPU') -for gpu in gpus: - tf.config.experimental.set_memory_growth(gpu, True) +import pseudo_func datasets = { "newsroom": { @@ -14,7 +9,7 @@ "document_column": "ArticleText", "system_summary_column": "SystemSummary", "reference_summary_column": "ReferenceSummary", - "approaches": ["trad", "new"], + "approaches": ["new"], "human_eval_only_path": "dataloader/newsroom-human-eval.csv", # you need to get this file. See ReadMe. "refs_path": "dataloader/test.jsonl", # you need to get this file. See ReadMe. "human_eval_w_refs_path": "dataloader/newsroom_human_eval_with_refs.csv" @@ -57,6 +52,7 @@ "bleurt": evaluate.load('bleurt', config_name='BLEURT-20', module_type='metric').compute, "rouge": functools.partial(evaluate.load("rouge").compute, use_aggregator=False), "bertscore": functools.partial(evaluate.load("bertscore").compute, lang='en', use_fast_tokenizer=True), + "pseudo_metric":functools.partial(pseudo_func.pseudo_func) } diff --git a/pseudo_func.py b/pseudo_func.py new file mode 100644 index 0000000..c1ec1c4 --- /dev/null +++ b/pseudo_func.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +"""PegasusDemo.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1j9LyFcJLiv37zZloRudYq7Wk9YrgPbfv +""" + + +"""# New Section""" + + +from transformers import PegasusForConditionalGeneration, PegasusTokenizer +import torch +import evaluate + + +"""## pseudo_metric function""" + +import pandas as pd +import csv +model_name = "google/pegasus-xsum" +device = "cuda" if torch.cuda.is_available() else "cpu" +tokenizer = PegasusTokenizer.from_pretrained(model_name) +model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device) +pseudo_ref_sum = [] + +def pseudo_func(predictions,references): + + for system_sum, src_text in zip (predictions, references) : + batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device) + translated = model.generate(**batch) + tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True) + pseudo_ref_sum.append(tgt_text[0]) + print(system_sum,tgt_text[0]) + rouge = evaluate.load('rouge') + results =(rouge.compute(predictions= pseudo_ref_sum, + references= predictions, + use_aggregator=False)) + + + print(results) + return results \ No newline at end of file From 7078b37d9d4fd19ee7ebfc7a8544187242c96568 Mon Sep 17 00:00:00 2001 From: Lohith reddy kalluru <46594020+lohithreddyk@users.noreply.github.com> Date: Sat, 12 Nov 2022 19:00:19 -0600 Subject: [PATCH 2/3] Created a abstract function for model_name and ref_based_metric_name --- pseudo_func.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/pseudo_func.py b/pseudo_func.py index c1ec1c4..2caf8b3 100644 --- a/pseudo_func.py +++ b/pseudo_func.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- """PegasusDemo.ipynb - Automatically generated by Colaboratory. - Original file is located at https://colab.research.google.com/drive/1j9LyFcJLiv37zZloRudYq7Wk9YrgPbfv """ @@ -20,13 +18,14 @@ import pandas as pd import csv -model_name = "google/pegasus-xsum" -device = "cuda" if torch.cuda.is_available() else "cpu" -tokenizer = PegasusTokenizer.from_pretrained(model_name) -model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device) -pseudo_ref_sum = [] -def pseudo_func(predictions,references): + + +def pseudo_func(predictions,references, model_name, ref_based_metric_name): + device = "cuda" if torch.cuda.is_available() else "cpu" + tokenizer = PegasusTokenizer.from_pretrained(model_name) + model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device) + pseudo_ref_sum = [] for system_sum, src_text in zip (predictions, references) : batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device) @@ -34,11 +33,17 @@ def pseudo_func(predictions,references): tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True) pseudo_ref_sum.append(tgt_text[0]) print(system_sum,tgt_text[0]) - rouge = evaluate.load('rouge') - results =(rouge.compute(predictions= pseudo_ref_sum, + + metricfn = evaluate.load(ref_based_metric_name) + if ref_based_metric_name == 'rouge' : + results =(metricfn.compute(predictions= pseudo_ref_sum, references= predictions, use_aggregator=False)) - + + else : + results =(metricfn.compute(predictions= pseudo_ref_sum, + references= predictions)) + print(results) - return results \ No newline at end of file + return results From bbe5bf43059a8f010645d5be521e929e6337d583 Mon Sep 17 00:00:00 2001 From: Lohith reddy kalluru <46594020+lohithreddyk@users.noreply.github.com> Date: Sat, 12 Nov 2022 19:04:11 -0600 Subject: [PATCH 3/3] updated env.py to create abstract function with model_name and ref_based_metric_name for pseudo_func --- env.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/env.py b/env.py index df9bc67..9630c27 100644 --- a/env.py +++ b/env.py @@ -52,7 +52,8 @@ "bleurt": evaluate.load('bleurt', config_name='BLEURT-20', module_type='metric').compute, "rouge": functools.partial(evaluate.load("rouge").compute, use_aggregator=False), "bertscore": functools.partial(evaluate.load("bertscore").compute, lang='en', use_fast_tokenizer=True), - "pseudo_metric":functools.partial(pseudo_func.pseudo_func) + "pseudo_metric":functools.partial(pseudo_func.pseudo_func, model_name = 'google/pegasus-xsum',ref_based_metric_name='rouge') + }