Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions utilization/dataset/goldenswag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import re
from functools import cached_property

from .multiple_choice_dataset import MultipleChoiceDataset


class Goldenswag(MultipleChoiceDataset):
"""The dataset of golenswag. http://arxiv.org/abs/2504.07825

HellaSwag: Can a Machine Really Finish Your Sentence? (Zellers et al., 2019)
Hellaswag is a new dataset for commonsense NLI. The paper was published at ACL2019.

Example:
'activity_label': 'Roof shingle removal',
'ctx_a': 'A man is sitting on a roof.',
'ctx_b': 'he',
'ctx': 'A man is sitting on a roof. he',
'endings': ['is using wrap to wrap a pair of skis.',
'is ripping level tiles off.',
"is holding a rubik's cube.",
'starts pulling up roofing on a roof.'],
'label': '3'
"""

instruction = "{{ preprocess(activity_label + ': ' + ctx_a + ' ' + ctx_b.capitalize()) }}{{'\n' + options + '\nAnswer:' if options}}"
evaluation_set = "validation"
example_set = None
load_args = ("PleIAs/GoldenSwag",)

def init_arguments(self):
self.jinja2_env.globals["preprocess"] = self.preprocess

@staticmethod
def preprocess(text):
text = text.strip()
text = text.replace(" [title]", ". ")
text = re.sub("\\[.*?\\]", "", text)
text = text.replace(" ", " ")
return text.strip()

def format_instance(self, instance):
instance["options"] = [self.preprocess(instance["endings"][i]) for i in range(4)]
return instance

@cached_property
def references(self):
return [int(instance["label"]) for instance in self.evaluation_data]
Loading