-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathgsm8k.py
More file actions
23 lines (21 loc) · 846 Bytes
/
gsm8k.py
File metadata and controls
23 lines (21 loc) · 846 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
@LOAD_DATASET.register_module()
class NewGSM8KDataset(BaseDataset):
@staticmethod
def load(path):
path = get_data_path(path)
if environ.get('DATASET_SOURCE') == 'ModelScope':
from modelscope import MsDataset
dataset = MsDataset.load(dataset_name=path)
else:
datasets = {}
for split in ['train', 'test']:
split_path = os.path.join(path, split + '.jsonl')
split_path = path
dataset = []
with open(split_path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line.strip())
dataset.append(line)
datasets[split] = Dataset.from_list(dataset)
dataset = DatasetDict(datasets)
return dataset