-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathdataloader.py
52 lines (37 loc) · 1.44 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pandas as pd
class Dataloader():
'''Bike Sharing Dataset dataloader.
'''
def __init__(self, csv_path):
''' Initialize Bike Sharing Dataset dataloader.
Arguments:
csv_path {str} -- Path to the Bike Sharing Dataset CSV file.
'''
self.csv_path = csv_path
self.data = pd.read_csv(self.csv_path)
# Shuffle
self.data.sample(frac=1.0, replace=True, random_state=1)
def getHeader(self):
''' Get the column names of the Bike Sharing CSV file.
Returns:
[list of str] -- the column names of the csv file
'''
return list(self.data.columns.values)
def getData(self):
''' Get the pandas frames for the training, validation and test split
Returns:
[pandas frames] -- the pandas frames for the different splits
'''
# Split data into train, validation and test set with 60:20:20 ratio
split_train = int(60 / 100 * len(self.data))
split_val = int(80 / 100 * len(self.data))
train = self.data[:split_train]
val = self.data[split_train:split_val]
test = self.data[split_val:]
return train, val, test
def getFullData(self):
''' Get all the data in one single pandas frame.
Returns:
[pandas frame] -- the complete Bike Sharing Dataset data
'''
return self.data