From 3e7bfef59904cd26bfa2eb95c923d5d82c533ef0 Mon Sep 17 00:00:00 2001 From: Troy Sincomb Date: Thu, 16 Jul 2020 16:40:48 -0700 Subject: [PATCH 1/5] alt version of what could --- sparcur/cli.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/sparcur/cli.py b/sparcur/cli.py index ec54a4fa..d19ef335 100755 --- a/sparcur/cli.py +++ b/sparcur/cli.py @@ -1673,8 +1673,8 @@ def protocols(self): inst = [i for i in wat] res = Async(rate=5)(deferred(i.dereference)(idlib.Auto) for i in inst) pis = [i for i in res if isinstance(i, idlib.Pio)] - #dat = Async(rate=5)(deferred(lambda p: p.data)(i) for i in pis) - #dois = [d['protocol']['doi'] for d in dat if d] + # dat = Async(rate=5)(deferred(lambda p: p.data)(i) for i in pis) + # dois = [d['protocol']['doi'] for d in dat if d] dois = [p.doi for p in pis] breakpoint() @@ -1690,6 +1690,33 @@ def integration(self): #apj = [pj for c in intr.anchor.children for pj in c.protocol_jsons] breakpoint() + def ontologyIDPopulation(self): + import gspread # https://gspread.readthedocs.io/en/latest/oauth2.html#oauth-client-id + # with google drive api enabled you can access each google sheet by name! + import pandas as pd + from pyontutils.scigraph import Graph, Vocabulary + sgv = Vocabulary(cache=True, verbose=False) + + gc = gspread.oauth() + sparc_proctur = gc.open('sparc protcur annotation tags') + worksheet = sparc_proctur.worksheet('working-ilxtr:technique') + df = pd.DataFrame(worksheet.get_all_values()) + df.columns = df.iloc[0] # first row is actually the header + df.drop(df.index[0], inplace=True) # drop first row that was the header + + iris = [] + for row in df.itertuples(): + matches = sgv.findByTerm(term=row.exact) + _iris = [match['iri'] for match in matches] + if not iris: + iris.append(None) + elif len(iris) == 0: + iris.append(_iris[0]) + else: + iris.append(', '.join(_iris)) + df['ontology id'] = iris + worksheet.update([df.columns.values.tolist()] + df.values.tolist()) + class Fix(Shell): From 5baaafc60bd6534fd13bef7fdcd36283a5d22c5d Mon Sep 17 00:00:00 2001 From: Troy Sincomb Date: Thu, 16 Jul 2020 16:44:13 -0700 Subject: [PATCH 2/5] cleaned up --- sparcur/cli.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sparcur/cli.py b/sparcur/cli.py index d19ef335..9c6a3f7b 100755 --- a/sparcur/cli.py +++ b/sparcur/cli.py @@ -1691,13 +1691,18 @@ def integration(self): breakpoint() def ontologyIDPopulation(self): - import gspread # https://gspread.readthedocs.io/en/latest/oauth2.html#oauth-client-id - # with google drive api enabled you can access each google sheet by name! + """ Update ontology id col based on exact column label + + # https://gspread.readthedocs.io/en/latest/oauth2.html#oauth-client-id + # with google drive api enabled you can access each google sheet by name! + """ + import gspread import pandas as pd from pyontutils.scigraph import Graph, Vocabulary - sgv = Vocabulary(cache=True, verbose=False) - gc = gspread.oauth() + sgv = Vocabulary(cache=True, verbose=False) # direct import seemed simple + + gc = gspread.oauth() # uses ~/.config/gspread/credentials.json; will prop if missing something sparc_proctur = gc.open('sparc protcur annotation tags') worksheet = sparc_proctur.worksheet('working-ilxtr:technique') df = pd.DataFrame(worksheet.get_all_values()) @@ -1715,6 +1720,7 @@ def ontologyIDPopulation(self): else: iris.append(', '.join(_iris)) df['ontology id'] = iris + worksheet.update([df.columns.values.tolist()] + df.values.tolist()) From e4a8dc42e00332b6dd95eadb13ba7e87b5fd8c6c Mon Sep 17 00:00:00 2001 From: Troy Sincomb Date: Thu, 16 Jul 2020 16:55:45 -0700 Subject: [PATCH 3/5] - --- sparcur/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sparcur/cli.py b/sparcur/cli.py index 9c6a3f7b..c2a9c1d5 100755 --- a/sparcur/cli.py +++ b/sparcur/cli.py @@ -1693,8 +1693,8 @@ def integration(self): def ontologyIDPopulation(self): """ Update ontology id col based on exact column label - # https://gspread.readthedocs.io/en/latest/oauth2.html#oauth-client-id - # with google drive api enabled you can access each google sheet by name! + # https://gspread.readthedocs.io/en/latest/oauth2.html#oauth-client-id + # with google drive api enabled you can access each google sheet by name! """ import gspread import pandas as pd @@ -1702,7 +1702,7 @@ def ontologyIDPopulation(self): sgv = Vocabulary(cache=True, verbose=False) # direct import seemed simple - gc = gspread.oauth() # uses ~/.config/gspread/credentials.json; will prop if missing something + gc = gspread.oauth() # add OAuth2 as ~/.config/gspread/credentials.json; will prop if missing something sparc_proctur = gc.open('sparc protcur annotation tags') worksheet = sparc_proctur.worksheet('working-ilxtr:technique') df = pd.DataFrame(worksheet.get_all_values()) From cae97be1b6cbb10af19cba7dd1acfb488a9e44b3 Mon Sep 17 00:00:00 2001 From: Troy Sincomb Date: Thu, 16 Jul 2020 17:39:00 -0700 Subject: [PATCH 4/5] - --- sparcur/cli.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sparcur/cli.py b/sparcur/cli.py index c2a9c1d5..82405445 100755 --- a/sparcur/cli.py +++ b/sparcur/cli.py @@ -1705,9 +1705,8 @@ def ontologyIDPopulation(self): gc = gspread.oauth() # add OAuth2 as ~/.config/gspread/credentials.json; will prop if missing something sparc_proctur = gc.open('sparc protcur annotation tags') worksheet = sparc_proctur.worksheet('working-ilxtr:technique') - df = pd.DataFrame(worksheet.get_all_values()) - df.columns = df.iloc[0] # first row is actually the header - df.drop(df.index[0], inplace=True) # drop first row that was the header + row_list = worksheet.get_all_values() + df = pd.DataFrame(row_list[1:], columns=row_list[0]) iris = [] for row in df.itertuples(): From 724d72d42ab63ef9c3d8e50a0129eabe89a16e73 Mon Sep 17 00:00:00 2001 From: Troy Sincomb Date: Thu, 16 Jul 2020 17:40:57 -0700 Subject: [PATCH 5/5] - --- sparcur/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sparcur/cli.py b/sparcur/cli.py index 82405445..46463be4 100755 --- a/sparcur/cli.py +++ b/sparcur/cli.py @@ -1705,8 +1705,8 @@ def ontologyIDPopulation(self): gc = gspread.oauth() # add OAuth2 as ~/.config/gspread/credentials.json; will prop if missing something sparc_proctur = gc.open('sparc protcur annotation tags') worksheet = sparc_proctur.worksheet('working-ilxtr:technique') - row_list = worksheet.get_all_values() - df = pd.DataFrame(row_list[1:], columns=row_list[0]) + header, *body = worksheet.get_all_values() + df = pd.DataFrame(body, columns=header) iris = [] for row in df.itertuples():