From a9f878db9c8a272ed614b99d5451ea7e54165876 Mon Sep 17 00:00:00 2001
From: Ben <ben10pickers@gmail.com>
Date: Wed, 14 Apr 2021 20:29:04 -0400
Subject: [PATCH 1/2] Added an adapter class that cleans and transforms the
 data from github

---
 src/covidify/sources/data_adapter.py          | 146 +++++++++++++++
 .../sources/data_sources_interface.py         |  10 ++
 src/covidify/sources/github.py                | 169 +++---------------
 3 files changed, 180 insertions(+), 145 deletions(-)
 create mode 100644 src/covidify/sources/data_adapter.py
 create mode 100644 src/covidify/sources/data_sources_interface.py

diff --git a/src/covidify/sources/data_adapter.py b/src/covidify/sources/data_adapter.py
new file mode 100644
index 0000000..bb36a01
--- /dev/null
+++ b/src/covidify/sources/data_adapter.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+from __future__ import print_function
+import pandas as pd
+import re
+import os
+import sys
+import git
+import numpy as np
+from tqdm import tqdm
+from time import strftime
+from dateutil.parser import parse
+from datetime import datetime, date, time
+from covidify.config import REPO, TMP_FOLDER, TMP_GIT, DATA, KEEP_COLS, NUMERIC_COLS
+from data_sources_interface import DataInterface
+from github import Github
+
+class DataAdapter(DataInterface):
+
+    def __init__(self):
+        pass
+
+    def clean_sheet_names(new_ranges):
+        # Remove all sheets that dont have a numeric header
+        return [x for x in new_ranges if re.search(r'\d', x)]
+
+    def clone_repo(TMP_FOLDER, REPO):
+        print('Cloning Data Repo...')
+        git.Git(TMP_FOLDER).clone(REPO)
+
+    def get_date(last_update):
+        return parse(str(last_update).split(' ')[0]).strftime("%Y-%m-%d")
+
+    def get_csv_date(f):
+        return get_date(f.split('.')[0] + ' ')
+
+
+    def fix_country_names(tmp_df):
+        '''
+        Cleaning up after JHU's bullshit data management
+        '''
+        # Asian Countries
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Mainland China'),'China', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Korea, South'),'South Korea', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Republic of Korea'),'South Korea', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Hong Kong SAR'),'Hong Kong', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Taipei and environs'),'Taiwan', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Taiwan*'),'Taiwan', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Macao SAR'),'Macau', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Iran (Islamic Republic of)'),'Iran', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Viet Nam'),'Vietnam', tmp_df['country'])
+
+        #European Countries
+        tmp_df['country'] = np.where((tmp_df['country']  == 'UK'),'United Kingdom', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == ' Azerbaijan'),'Azerbaijan', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Bosnia and Herzegovina'),'Bosnia', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Czech Republic'),'Czechia', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Republic of Ireland'),'Ireland', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'North Ireland'),'Ireland', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Republic of Moldova'),'Moldova', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Russian Federation'),'Russia', tmp_df['country'])
+
+        #African Countries
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Congo (Brazzaville)'),'Congo', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Congo (Kinshasa)'),'Congo', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Republic of the Congo'),'Congo', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Gambia, The'),'Gambia', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'The Gambia'),'Gambia', tmp_df['country'])
+
+        # Western Countries
+        tmp_df['country'] = np.where((tmp_df['country']  == 'USA'),'America', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'US'),'America', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Bahamas, The'),'The Bahamas', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Bahamas'),'The Bahamas', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'st. Martin'),'Saint Martin', tmp_df['country'])
+        tmp_df['country'] = np.where((tmp_df['country']  == 'St. Martin'),'Saint Martin', tmp_df['country'])
+
+
+        # Others
+        tmp_df['country'] = np.where((tmp_df['country']  == 'Cruise Ship'),'Others', tmp_df['country'])
+
+        return tmp_df
+
+    # Now that we have all the data we now need to clean it
+    # - Fill null values
+    # - remore suspected values
+    # - change column names
+    def clean_data(df):
+        tmp_df = df.copy()
+
+        if 'Demised' in tmp_df.columns:
+            tmp_df.rename(columns={'Demised':'deaths'}, inplace=True)
+
+        if 'Country/Region' in tmp_df.columns:
+            tmp_df.rename(columns={'Country/Region':'country'}, inplace=True)
+
+        if 'Country_Region' in tmp_df.columns:
+            tmp_df.rename(columns={'Country_Region':'country'}, inplace=True)
+
+        if 'Province/State' in tmp_df.columns:
+            tmp_df.rename(columns={'Province/State':'province'}, inplace=True)
+
+        if 'Province_State' in tmp_df.columns:
+            tmp_df.rename(columns={'Province_State':'province'}, inplace=True)
+
+        if 'Last Update' in tmp_df.columns:
+            tmp_df.rename(columns={'Last Update':'datetime'}, inplace=True)
+
+        if 'Last_Update' in tmp_df.columns:
+            tmp_df.rename(columns={'Last_Update':'datetime'}, inplace=True)
+
+        #Lower case all col names
+        tmp_df.columns = map(str.lower, tmp_df.columns)
+
+        for col in tmp_df[NUMERIC_COLS]:
+            tmp_df[col] = tmp_df[col].fillna(0)
+            tmp_df[col] = tmp_df[col].astype(int)
+
+        return tmp_df
+
+    def get_data(cleaned_sheets):
+        all_csv = []
+        # Import all CSV's
+        for f in tqdm(sorted(cleaned_sheets), desc='... loading data: '):
+            if 'csv' in f:
+                try:
+                    tmp_df = pd.read_csv(os.path.join(DATA, f), index_col=None,header=0, parse_dates=['Last Update'])
+                except:
+                    # Temporary fix for JHU's bullshit data management
+                    tmp_df = pd.read_csv(os.path.join(DATA, f), index_col=None,header=0, parse_dates=['Last_Update'])
+
+                tmp_df = clean_data(tmp_df)
+                tmp_df['date'] = tmp_df['datetime'].apply(get_date) # remove time to get date
+                tmp_df['file_date'] = get_csv_date(f) #Get date of csv from file name
+                tmp_df = tmp_df[KEEP_COLS]
+                tmp_df['province'].fillna(tmp_df['country'], inplace=True) #If no region given, fill it with country
+                all_csv.append(tmp_df)
+
+        df_raw = pd.concat(all_csv, axis=0, ignore_index=True, sort=True)  # concatenate all csv's into one df
+        df_raw = fix_country_names(df_raw)    # Fix mispelled country names
+        df_raw = df_raw.sort_values(by=['datetime'])
+        return df_raw
+
+    def get(self):
+        github = Github()
+        cleaned_sheets = clean_sheet_names(github.get())
+        df = get_data(cleaned_sheets)
diff --git a/src/covidify/sources/data_sources_interface.py b/src/covidify/sources/data_sources_interface.py
new file mode 100644
index 0000000..5f14c47
--- /dev/null
+++ b/src/covidify/sources/data_sources_interface.py
@@ -0,0 +1,10 @@
+import abc
+
+class DataInterface(object, metaclass=abc.ABCMeta):
+
+    def __init__(self):
+        pass
+
+    @abc.abstractmethod
+    def get():
+        raise NotImplementedError('User must define get()')
diff --git a/src/covidify/sources/github.py b/src/covidify/sources/github.py
index 38c7a71..03f0bb8 100644
--- a/src/covidify/sources/github.py
+++ b/src/covidify/sources/github.py
@@ -9,161 +9,40 @@
 from tqdm import tqdm
 from time import strftime
 from dateutil.parser import parse
-from datetime import datetime, date, time 
+from datetime import datetime, date, time
 from covidify.config import REPO, TMP_FOLDER, TMP_GIT, DATA, KEEP_COLS, NUMERIC_COLS
+from data_sources_interface import DataInterface
 
-def clean_sheet_names(new_ranges):
-    # Remove all sheets that dont have a numeric header
-    return [x for x in new_ranges if re.search(r'\d', x)]
+class Github(DataInterface):
 
-def clone_repo(TMP_FOLDER, REPO):
-    print('Cloning Data Repo...')
-    git.Git(TMP_FOLDER).clone(REPO)
+    data_sheet = None
 
-def get_date(last_update):
-    return parse(str(last_update).split(' ')[0]).strftime("%Y-%m-%d")
+    def __init__(self):
+        # Create Tmp Folder
+        if not os.path.isdir(TMP_FOLDER):
+            print('Creating folder...')
+            print('...', TMP_FOLDER)
+            os.mkdir(TMP_FOLDER)
 
-def get_csv_date(f):
-    return get_date(f.split('.')[0] + ' ')
-
-
-def fix_country_names(tmp_df):
-    '''
-    Cleaning up after JHU's bullshit data management
-    '''
-    # Asian Countries
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Mainland China'),'China', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Korea, South'),'South Korea', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Republic of Korea'),'South Korea', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Hong Kong SAR'),'Hong Kong', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Taipei and environs'),'Taiwan', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Taiwan*'),'Taiwan', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Macao SAR'),'Macau', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Iran (Islamic Republic of)'),'Iran', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Viet Nam'),'Vietnam', tmp_df['country'])
-
-    #European Countries
-    tmp_df['country'] = np.where((tmp_df['country']  == 'UK'),'United Kingdom', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == ' Azerbaijan'),'Azerbaijan', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Bosnia and Herzegovina'),'Bosnia', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Czech Republic'),'Czechia', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Republic of Ireland'),'Ireland', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'North Ireland'),'Ireland', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Republic of Moldova'),'Moldova', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Russian Federation'),'Russia', tmp_df['country'])
-
-    #African Countries
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Congo (Brazzaville)'),'Congo', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Congo (Kinshasa)'),'Congo', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Republic of the Congo'),'Congo', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Gambia, The'),'Gambia', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'The Gambia'),'Gambia', tmp_df['country'])
-
-    # Western Countries
-    tmp_df['country'] = np.where((tmp_df['country']  == 'USA'),'America', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'US'),'America', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Bahamas, The'),'The Bahamas', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Bahamas'),'The Bahamas', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'st. Martin'),'Saint Martin', tmp_df['country'])
-    tmp_df['country'] = np.where((tmp_df['country']  == 'St. Martin'),'Saint Martin', tmp_df['country'])
-    
-
-    # Others
-    tmp_df['country'] = np.where((tmp_df['country']  == 'Cruise Ship'),'Others', tmp_df['country'])
-
-    return tmp_df
-
-# Now that we have all the data we now need to clean it 
-# - Fill null values
-# - remore suspected values
-# - change column names
-def clean_data(df):
-    tmp_df = df.copy()
-
-    if 'Demised' in tmp_df.columns:
-        tmp_df.rename(columns={'Demised':'deaths'}, inplace=True)
-
-    if 'Country/Region' in tmp_df.columns:
-        tmp_df.rename(columns={'Country/Region':'country'}, inplace=True)
-
-    if 'Country_Region' in tmp_df.columns:
-        tmp_df.rename(columns={'Country_Region':'country'}, inplace=True)
-    
-    if 'Province/State' in tmp_df.columns:
-        tmp_df.rename(columns={'Province/State':'province'}, inplace=True)
-
-    if 'Province_State' in tmp_df.columns:
-        tmp_df.rename(columns={'Province_State':'province'}, inplace=True)
-
-    if 'Last Update' in tmp_df.columns:
-        tmp_df.rename(columns={'Last Update':'datetime'}, inplace=True)
-
-    if 'Last_Update' in tmp_df.columns:
-        tmp_df.rename(columns={'Last_Update':'datetime'}, inplace=True)
-
-    #Lower case all col names
-    tmp_df.columns = map(str.lower, tmp_df.columns) 
-
-    for col in tmp_df[NUMERIC_COLS]:
-        tmp_df[col] = tmp_df[col].fillna(0)
-        tmp_df[col] = tmp_df[col].astype(int)
-
-    return tmp_df
-
-def get_data(cleaned_sheets):
-    all_csv = []
-    # Import all CSV's
-    for f in tqdm(sorted(cleaned_sheets), desc='... loading data: '):
-        if 'csv' in f:
+        #Check if repo exists
+        #git pull if it does
+        if not os.path.isdir(TMP_GIT):
+            clone_repo(TMP_FOLDER, REPO)
+        else:
             try:
-                tmp_df = pd.read_csv(os.path.join(DATA, f), index_col=None,header=0, parse_dates=['Last Update'])  
+                print('git pull from', REPO)
+                rep = git.Repo(TMP_GIT)
+                rep.remotes.origin.pull()
             except:
-                # Temporary fix for JHU's bullshit data management
-                tmp_df = pd.read_csv(os.path.join(DATA, f), index_col=None,header=0, parse_dates=['Last_Update'])  
-
-            tmp_df = clean_data(tmp_df)
-            tmp_df['date'] = tmp_df['datetime'].apply(get_date) # remove time to get date
-            tmp_df['file_date'] = get_csv_date(f) #Get date of csv from file name
-            tmp_df = tmp_df[KEEP_COLS]
-            tmp_df['province'].fillna(tmp_df['country'], inplace=True) #If no region given, fill it with country
-            all_csv.append(tmp_df)
+                print('Could not pull from', REPO)
+                sys.exit(1)
 
-    df_raw = pd.concat(all_csv, axis=0, ignore_index=True, sort=True)  # concatenate all csv's into one df
-    df_raw = fix_country_names(df_raw)    # Fix mispelled country names
-    df_raw = df_raw.sort_values(by=['datetime'])
-    return df_raw
+        data_sheet = os.listdir(DATA)
 
 
-# use this function to fetch the data
-def get():
-        
-    # Create Tmp Folder
-    if not os.path.isdir(TMP_FOLDER):
-        print('Creating folder...')
-        print('...', TMP_FOLDER)
-        os.mkdir(TMP_FOLDER)
 
-    #Check if repo exists
-    #git pull if it does
-    if not os.path.isdir(TMP_GIT):
-        clone_repo(TMP_FOLDER, REPO)
-    else:
-        try:
-            print('git pull from', REPO)
-            rep = git.Repo(TMP_GIT)
-            rep.remotes.origin.pull()
-        except:
-            print('Could not pull from', REPO)
-            sys.exit(1)
 
-    sheets = os.listdir(DATA)
-    
-    # Clean the result to the sheet tabs we want
-    print('Getting sheets...')
-    cleaned_sheets = clean_sheet_names(sheets)
+    # use this function to fetch the data
+    def get(self):
 
-    # Aggregate all the data from sheets
-    df = get_data(cleaned_sheets)
-    
-    #Clean the column names
-    return df
\ No newline at end of file
+        return self.data_sheet

From 0f8a5f34134f0e2eb4720cea2d8fb40f517a4f7a Mon Sep 17 00:00:00 2001
From: Ben <ben10pickers@gmail.com>
Date: Wed, 14 Apr 2021 20:43:10 -0400
Subject: [PATCH 2/2] Fixed files to now refer to the new adapter class

---
 src/covidify/data_prep.py            | 40 +++++++++++++++-------------
 src/covidify/list_countries.py       | 12 +++++----
 src/covidify/sources/data_adapter.py |  6 ++++-
 src/covidify/sources/github.py       |  3 +++
 4 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/src/covidify/data_prep.py b/src/covidify/data_prep.py
index 6153d44..ba3fe45 100644
--- a/src/covidify/data_prep.py
+++ b/src/covidify/data_prep.py
@@ -19,13 +19,14 @@
 import pandas as pd
 from string import capwords
 from difflib import get_close_matches
-from datetime import datetime, date, time 
+from datetime import datetime, date, time
 
-from covidify.sources import github, wiki
+from covidify.sources.data_adapter import PandasDataAdapter
 from covidify.config import REPO, TMP_FOLDER, TMP_GIT, DATA
 from covidify.utils.utils import replace_arg_score
 
 
+
 args = docopt.docopt(__doc__)
 out = args['--output_folder']
 country = args['--country']
@@ -34,6 +35,9 @@
 
 
 ############ DATA SELECTION ############
+#Initialize an instance of the adapter
+github_data = PandasDataAdapter()
+
 
 if '_' in country:
     country = replace_arg_score(country)
@@ -42,22 +46,22 @@
     country = None
 
 if source == 'JHU':
-    df = github.get()
-    
+    df = github_data.get()
+
 elif source == 'wiki':
     print('Apologies, the wikipedia source is not ready yet - getting github data')
-    df = github.get()
-    
+    df = github_data.get()
+
 
 
 ############ COUNTRY SELECTION ############
 
 def get_similar_countries(c, country_list):
     pos_countries = get_close_matches(c, country_list)
-    
+
     if len(pos_countries) > 0:
         print('\033[1;31m'+c, 'was not listed. did you mean', pos_countries[0].capitalize() + '?\033[0;0m')
-        
+
         #Only delete if its a covidify generated folder
         if 'Desktop/covidify-output-' in out:
             os.system('rm -rf ' + out)
@@ -67,13 +71,13 @@ def get_similar_countries(c, country_list):
         if 'Desktop/covidify-output-' in out:
             os.system('rm -rf ' + out)
         sys.exit(1)
-        
+
 def check_specified_country(df, country):
     '''
     let user filter reports by country, if not found
     then give a option if the string is similar
     '''
-    
+
     # Get all unique countries in the data
     country_list = list(map(lambda x:x.lower().strip(), set(df.country.values)))
 
@@ -85,7 +89,7 @@ def check_specified_country(df, country):
         # give similar option if similarity found
         if country.lower() not in country_list:
             get_similar_countries(country, country_list)
-            
+
         else:
             #Return filtered dataframe
             print('... filtering data for', country)
@@ -170,9 +174,9 @@ def get_top_countries(data):
     # Get top N infected countries
     tmp_df = data.copy()
     tmp_df = tmp_df[tmp_df.file_date == df.file_date.max()]
-    return tmp_df.groupby(['country']).agg({'confirmed': 'sum'}).sort_values('confirmed',ascending=False).head(top).index 
-        
-TOP_N_COUNTRIES = get_top_countries(df)    
+    return tmp_df.groupby(['country']).agg({'confirmed': 'sum'}).sort_values('confirmed',ascending=False).head(top).index
+
+TOP_N_COUNTRIES = get_top_countries(df)
 
 tmp_df = df[df.country.isin(TOP_N_COUNTRIES)].copy()
 
@@ -188,7 +192,7 @@ def get_day_counts(d, country):
                                                 'deaths': 'sum'})
     result_df['date'] = data['file_date'].unique()
     result_df['country'] = country
-        
+
     result_df = result_df[result_df.confirmed >= 500]
     result_df.insert(loc=0, column='day', value=np.arange(len(result_df)))
     return result_df
@@ -196,10 +200,10 @@ def get_day_counts(d, country):
 df_list = []
 
 for country in TOP_N_COUNTRIES:
-    print('   ...', country + ': ' +  str(tmp_df[(tmp_df.file_date == df.file_date.max()) & 
+    print('   ...', country + ': ' +  str(tmp_df[(tmp_df.file_date == df.file_date.max()) &
                                                  (tmp_df.country == country)].confirmed.sum()))
     df_list.append(get_day_counts(tmp_df[tmp_df.country == country], country))
-    
+
 log_df = pd.concat(df_list, axis=0, ignore_index=True)
 
 
@@ -227,4 +231,4 @@ def get_day_counts(d, country):
 log_df.astype(str).to_csv(os.path.join(save_dir, log_file_name))
 print('...', log_file_name)
 
-print('Done!')
\ No newline at end of file
+print('Done!')
diff --git a/src/covidify/list_countries.py b/src/covidify/list_countries.py
index 8637489..eee03a8 100644
--- a/src/covidify/list_countries.py
+++ b/src/covidify/list_countries.py
@@ -1,6 +1,6 @@
 '''
 This script is for listing countries that have cases of corona virus.
-This is so you can decide which country to make a report for. 
+This is so you can decide which country to make a report for.
 
 '''
 
@@ -9,17 +9,19 @@
 import click
 import covidify
 import numpy as np
-from covidify.sources import github
+from covidify.sources.data_adapter import PandasDataAdapter
 from covidify.config import SCRIPT
 
 def get_countries():
     print('Getting available countries...')
-    df = github.get()
+    #Initialize an instance of the adapter
+    github_data = PandasDataAdapter()
+    df = github_data.get()
     df = df[df.confirmed > 0]
 
     countries = sorted(list(set(df.country.values)))
 
     for a,b,c in zip(countries[::3],countries[1::3],countries[2::3]):
         print('{:<30}{:<30}{:<}'.format(a,b,c))
-        
-    print('\n\033[1;31mNUMBER OF COUNTRIES/AREAS INFECTED:\033[0;0m', len(countries))
\ No newline at end of file
+
+    print('\n\033[1;31mNUMBER OF COUNTRIES/AREAS INFECTED:\033[0;0m', len(countries))
diff --git a/src/covidify/sources/data_adapter.py b/src/covidify/sources/data_adapter.py
index bb36a01..2019121 100644
--- a/src/covidify/sources/data_adapter.py
+++ b/src/covidify/sources/data_adapter.py
@@ -14,7 +14,10 @@
 from data_sources_interface import DataInterface
 from github import Github
 
-class DataAdapter(DataInterface):
+class PandasDataAdapter(DataInterface):
+    '''
+    Transforms the CSV data sheets into a pandas dataframe and cleans the data
+    '''
 
     def __init__(self):
         pass
@@ -144,3 +147,4 @@ def get(self):
         github = Github()
         cleaned_sheets = clean_sheet_names(github.get())
         df = get_data(cleaned_sheets)
+        return df
diff --git a/src/covidify/sources/github.py b/src/covidify/sources/github.py
index 03f0bb8..126f94c 100644
--- a/src/covidify/sources/github.py
+++ b/src/covidify/sources/github.py
@@ -14,6 +14,9 @@
 from data_sources_interface import DataInterface
 
 class Github(DataInterface):
+    '''
+    Grabs Covid data from John Hopkins University Github repo, and stores data as a CSV
+    '''
 
     data_sheet = None