Skip to content

Commit

Permalink
Merge pull request #39 from linkml/additional-formats
Browse files Browse the repository at this point in the history
Additional formats
  • Loading branch information
cmungall authored Mar 6, 2025
2 parents e43d18f + 9ccdf50 commit e4aa4ba
Show file tree
Hide file tree
Showing 16 changed files with 3,206 additions and 791 deletions.
812 changes: 812 additions & 0 deletions docs/how-to/Calculate-Enrichment.ipynb

Large diffs are not rendered by default.

2,231 changes: 1,461 additions & 770 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pystow = "^0.5.4"
black = { version=">=24.0.0", optional = true }
ruff = { version=">=0.6.2", optional = true }
llm = { version="*", optional = true }
lightrdf = { version="*", optional = true }
tiktoken = { version="*", optional = true }
pymongo = "^4.11"
neo4j = { version="*", optional = true }
Expand Down Expand Up @@ -91,6 +92,7 @@ renderer = ["linkml_renderer"]
fastapi = ["fastapi", "uvicorn"]
frictionless = ["frictionless"]
scipy = ["scipy", "scikit-learn"]
rdf = ["lightrdf"]
#ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
bigquery = ["google-cloud-bigquery"]
all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "bigquery"]
Expand Down
56 changes: 53 additions & 3 deletions src/linkml_store/api/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,12 @@ def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
return qr.rows[0]
return None

def find(self, where: Optional[Any] = None, **kwargs) -> QueryResult:
def find(
self,
where: Optional[Any] = None,
select_cols: Optional[List[str] ] = None,
**kwargs,
) -> QueryResult:
"""
Find objects in the collection using a where query.
Expand Down Expand Up @@ -484,10 +489,14 @@ def find(self, where: Optional[Any] = None, **kwargs) -> QueryResult:
:param where:
:param select_cols:
:param kwargs:
:return:
"""
query = self._create_query(where_clause=where)
query = self._create_query(
where_clause=where,
select_cols=select_cols,
)
self._pre_query_hook(query)
return self.query(query, **kwargs)

Expand Down Expand Up @@ -608,6 +617,47 @@ def row2array(row):
new_qr.rows = [r[1] for r in results]
return new_qr

def group_by(
self,
group_by_fields: List[str],
inlined_field = "objects",
agg_map: Optional[Dict[str, str]] = None,
where: Optional[Dict] = None,
**kwargs,
) -> QueryResult:
"""
Group objects in the collection by a column.
:param group_by:
:param where:
:param kwargs:
:return:
"""
if isinstance(group_by_fields, str):
group_by_fields = [group_by_fields]
df = self.find(where=where, limit=-1).rows_dataframe
pk_fields = agg_map.get("first", []) + group_by_fields
list_fields = agg_map.get("list", [])
if not list_fields:
list_fields = [a for a in df.columns if a not in pk_fields]

grouped_objs = defaultdict(list)
for _, row in df.iterrows():
pk = tuple(row[pk_fields])
grouped_objs[pk].append({k: row[k] for k in list_fields})
results = []
for pk, objs in grouped_objs.items():
top_obj = {k: v for k, v in zip(pk_fields, pk)}
top_obj[inlined_field] = objs
results.append(top_obj)
r = QueryResult(
num_rows=len(results),
rows=results
)
return r



@property
def is_internal(self) -> bool:
"""
Expand Down Expand Up @@ -1062,7 +1112,7 @@ def induce_class_definition_from_objects(
multivalued = any(multivalueds)
inlined = any(inlineds)
if multivalued and False in multivalueds:
raise ValueError(f"Mixed list non list: {vs} // inferred= {multivalueds}")
logger.info(f"Mixed list non list: {vs} // inferred= {multivalueds}")
# if not rngs:
# raise AssertionError(f"Empty rngs for {k} = {vs}")
rng = rngs[0] if rngs else None
Expand Down
6 changes: 4 additions & 2 deletions src/linkml_store/api/stores/duckdb/duckdb_collection.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Union, Tuple

import sqlalchemy as sqla
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
Expand Down Expand Up @@ -94,7 +94,9 @@ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True,

def query_facets(
self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
) -> Dict[str, Dict[str, int]]:
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
if facet_limit is None:
facet_limit = DEFAULT_FACET_LIMIT
results = {}
cd = self.class_definition()
with self.parent.engine.connect() as conn:
Expand Down
2 changes: 2 additions & 0 deletions src/linkml_store/api/stores/mongodb/mongodb_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ def query_facets(
facet_limit=DEFAULT_FACET_LIMIT,
**kwargs,
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
if facet_limit is None:
facet_limit = DEFAULT_FACET_LIMIT
results = {}
if not facet_columns:
facet_columns = list(self.class_definition().attributes.keys())
Expand Down
1 change: 0 additions & 1 deletion src/linkml_store/api/stores/mongodb/mongodb_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def _db_name(self) -> str:
if self.handle:
parsed_url = urlparse(self.handle)
path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
print(path_parts)
db_name = path_parts[0] if path_parts else "default"
else:
db_name = "default"
Expand Down
8 changes: 7 additions & 1 deletion src/linkml_store/api/stores/solr/solr_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,18 @@ def query(self, query: Query, **kwargs) -> QueryResult:
return QueryResult(query=query, num_rows=num_rows, rows=rows)

def query_facets(
self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
self,
where: Optional[Dict] = None,
facet_columns: List[str] = None,
facet_limit=DEFAULT_FACET_LIMIT,
facet_min_count: int = 1,
**kwargs
) -> Dict[str, Dict[str, int]]:
solr_query = self._build_solr_query(where)
solr_query["facet"] = "true"
solr_query["facet.field"] = facet_columns
solr_query["facet.limit"] = facet_limit
solr_query["facet.mincount"] = facet_min_count

logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")

Expand Down
Loading

0 comments on commit e4aa4ba

Please sign in to comment.