Skip to content

Commit

Permalink
Merge pull request #21 from linkml/web-api
Browse files Browse the repository at this point in the history
web api
  • Loading branch information
cmungall authored Jul 26, 2024
2 parents a5ce8d0 + 01addac commit 48c6a08
Show file tree
Hide file tree
Showing 17 changed files with 1,051 additions and 178 deletions.
115 changes: 76 additions & 39 deletions docs/how-to/Index-caDSR.ipynb

Large diffs are not rendered by default.

39 changes: 39 additions & 0 deletions docs/manual/data-model.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Data Model\n",
"\n",
"The LinkML-Store data model is based around a three-level structure:\n",
"\n",
" * A `Client` "
],
"metadata": {
"collapsed": false
},
"id": "d3371bb475f6fe4a"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
45 changes: 33 additions & 12 deletions src/linkml_data_browser/app.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
import logging
import os
from typing import Any, Dict

import numpy as np
import pandas as pd
import streamlit as st
import yaml
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
from linkml_store import Client
from linkml_store.api import Collection
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
from linkml_store.api.queries import QueryResult

logger = logging.getLogger(__name__)

# Set page config to make layout "wide" by default
st.set_page_config(layout="wide")

config = None
if os.environ.get("LINKML_STORE_CONFIG"):
with open(os.environ["LINKML_STORE_CONFIG"], "r") as f:
config = yaml.safe_load(f)

# Initialize client
client = Client().from_config(config) if config else Client()

DEFAULT_LIMIT = 25

Expand All @@ -33,14 +43,16 @@ def init_reset_filters(cd: ClassDefinition, reset=False):
st.session_state[key] = "" # Assuming text input, adjust for other types


def apply_filters(collection: Collection, filters: Dict[str, Any], offset: int, limit: int, **kwargs):
print(f"FILTERS={filters}")
return collection.find(filters, offset=offset, limit=limit, **kwargs)
def apply_filters(collection: Collection, filters: Dict[str, Any], offset: int, limit: int, **kwargs) -> QueryResult:
print(f"FILTERS={filters} // offset={offset}")
qr = collection.find(filters, offset=offset, limit=limit, **kwargs)
print(f"QR={qr.num_rows}")
return qr


def render_filter_widget(collection: Collection, attribute: SlotDefinition):
"""Render appropriate Streamlit widget based on column type."""
logger.info("Rendering filter widget")
logger.info(f"Rendering filter widget: {attribute.name}")
# print(f"{attribute.name} // RANGE={attribute.range}")
# col_type = attribute.range
col_name = attribute.name
Expand Down Expand Up @@ -72,24 +84,30 @@ def render_filter_widget(collection: Collection, attribute: SlotDefinition):
# Main function to render the app
def main():
st.title("LinkML Table Browser")
selected_db = st.selectbox("Select a Database", list(DBS.keys()), key="db_selector")
db_names = list(client.databases.keys())
selected_db = st.selectbox("Select a Database", db_names, key="db_selector")
print(f"DB SELECTED={selected_db}")
# con = duckdb.connect(DB_PATH.format(db=selected_db))
db_name = DB_PATH.format(db=selected_db)
database = DuckDBDatabase(f"duckdb:///{db_name}")
# db_name = DB_PATH.format(db=selected_db)
# database = DuckDBDatabase(f"duckdb:///{db_name}")
database = client.get_database(selected_db)
st.write(f"Connected to {selected_db}")
candidate_tables = DBS.get(selected_db)
candidate_tables = database.list_collection_names()
print(f"COLLECtiONS={candidate_tables}")
if len(candidate_tables) > 1:
curr_table = st.selectbox("Select a Table", candidate_tables, key="table_selector")
else:
curr_table = DBS.get(selected_db)[0]
curr_table = candidate_tables[0]
collection = database.get_collection(curr_table)
print(f"CURR={collection.alias} // {collection.target_class_name}")
cd = collection.class_definition()
print(f"CD={cd.name} // {len(cd.attributes)}")
filters = {}

# Pagination setup
session_state = st.session_state
if "current_page" not in session_state:
print(f"RESETTING CP// {session_state}")
session_state.current_page = 0 # Start with page 0
rows_per_page = DEFAULT_LIMIT

Expand All @@ -105,8 +123,8 @@ def main():
if filter_widget is not None and filter_widget != "":
filters[att_name] = filter_widget
new_value = filters.get(att_name)
if prev_value != new_value:
# print(f"CHANGE FOR {att_name}: {prev_value} -> {new_value}")
if prev_value != new_value and not (not prev_value and not new_value):
print(f"CHANGE FOR {att_name}: {prev_value} -> {new_value}")
filter_changed = True
# st.session_state[key] = new_value
facet_key = f"facet_view_{att_name}"
Expand All @@ -116,13 +134,15 @@ def main():
st.sidebar.write(facet_df)
# If any filter has changed, reset pagination
if filter_changed:
print(f"FILTER CHANGED={filter_changed}")
st.session_state.current_page = 0 # Reset offset
result = apply_filters(collection, filters, session_state.current_page * rows_per_page, rows_per_page)
# if filter_changed:
# facet_results = collection.query_facets(filters, facet_columns=["evidence_type"])
# print(f"FACET={facet_results}")
st.write(f"Number of rows: {result.num_rows}")
st.write(f"Page: {session_state.current_page + 1}")
print(f"SESSION STATE: {session_state}")
filtered_data = pd.DataFrame(result.rows)

# Pagination buttons
Expand All @@ -133,6 +153,7 @@ def main():
if session_state.current_page > 0:
session_state.current_page -= 1
if next_button.button("Next"):
print(f"NEXT: CP={session_state.current_page} RPP={rows_per_page} NR={result.num_rows}")
# Assuming result.num_rows gives the total number of rows after filtering, not just this page's rows
if (session_state.current_page + 1) * rows_per_page < result.num_rows:
session_state.current_page += 1
Expand Down
22 changes: 17 additions & 5 deletions src/linkml_store/api/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,10 @@ def get(self, ids: Optional[List[IDENTIFIER]], **kwargs) -> QueryResult:
id_field = self.identifier_attribute_name
if not id_field:
raise ValueError(f"No identifier for {self.name}")
return self.find({id_field: ids})
if len(ids) == 1:
return self.find({id_field: ids[0]})
else:
return self.find({id_field: {"$in": ids}})

def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
"""
Expand Down Expand Up @@ -518,7 +521,7 @@ def exists(self) -> Optional[bool]:
:return:
"""
cd = self.class_definition()
return cd is not None
return cd is not None and cd.attributes

def load_from_source(self, load_if_exists=False):
"""
Expand All @@ -535,11 +538,19 @@ def load_from_source(self, load_if_exists=False):
kwargs = source.arguments or {}
if source.local_path:
objects = load_objects(
metadata.source.local_path, format=source.format, expected_type=source.expected_type, **kwargs
metadata.source.local_path,
format=source.format,
expected_type=source.expected_type,
compression=source.compression,
**kwargs,
)
elif metadata.source.url:
objects = load_objects_from_url(
metadata.source.url, format=source.format, expected_type=source.expected_type, **kwargs
metadata.source.url,
format=source.format,
expected_type=source.expected_type,
compression=source.compression,
**kwargs,
)
self.insert(objects)

Expand Down Expand Up @@ -746,6 +757,7 @@ def class_definition(self) -> Optional[ClassDefinition]:
sv: SchemaView = self.parent.schema_view
if sv:
cls = sv.get_class(self.target_class_name)
# cls = sv.schema.classes[self.target_class_name]
if cls and not cls.attributes:
if not sv.class_induced_slots(cls.name):
for att in self._induce_attributes():
Expand Down Expand Up @@ -868,7 +880,7 @@ def induce_class_definition_from_objects(
exact_dimensions_list.append(v.shape)
break
if isinstance(v, list):
v = v[0]
v = v[0] if v else None
multivalueds.append(True)
elif isinstance(v, dict):
v = list(v.values())[0]
Expand Down
3 changes: 2 additions & 1 deletion src/linkml_store/api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class CollectionSource(ConfiguredBaseModel):
refresh_interval_days: Optional[float] = None
expected_type: Optional[str] = None
format: Optional[str] = None
compression: Optional[str] = None
arguments: Optional[Dict[str, Any]] = None


Expand Down Expand Up @@ -73,11 +74,11 @@ class CollectionConfig(ConfiguredBaseModel):
default=None,
description="Metadata about the source",
)
# TODO: derived_from
derived_from: Optional[List[DerivationConfiguration]] = Field(
default=None,
description="LinkML-Map derivations",
)
page_size: Optional[int] = Field(default=None, description="Suggested page size (items per page) in apps and APIs")


class DatabaseConfig(ConfiguredBaseModel):
Expand Down
35 changes: 32 additions & 3 deletions src/linkml_store/api/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)

from linkml_store.api.types import CollectionType
from linkml_store.utils.format_utils import load_objects, render_output
from linkml_store.utils.format_utils import Format, load_objects, render_output
from linkml_store.utils.patch_utils import PatchDict

try:
Expand Down Expand Up @@ -705,19 +705,35 @@ def drop(self, **kwargs):
"""
raise NotImplementedError()

def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
def import_database(self, location: str, source_format: Optional[Union[str, Format]] = None, **kwargs):
"""
Import a database from a file or location.
:param location: location of the file
:param source_format: source format
:param kwargs: additional arguments
"""
if isinstance(source_format, str):
source_format = Format(source_format)
if isinstance(source_format, Format):
if source_format.is_dump_format() and source_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
# import into a test instance
tmp_handle = source_format.value
client = self.parent
tmp_db = client.attach_database(tmp_handle, alias="tmp")
# TODO: check for infinite recursion
tmp_db.import_database(location, source_format=source_format)
obj = {}
for coll in tmp_db.list_collections():
qr = coll.find({}, limit=-1)
obj[coll.alias] = qr.rows
self.store(obj)
return
objects = load_objects(location, format=source_format)
for obj in objects:
self.store(obj)

def export_database(self, location: str, target_format: Optional[str] = None, **kwargs):
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
"""
Export a database to a file or location.
Expand All @@ -726,10 +742,23 @@ def export_database(self, location: str, target_format: Optional[str] = None, **
:param kwargs: additional arguments
"""
obj = {}
if isinstance(target_format, str):
target_format = Format(target_format)
for coll in self.list_collections():
qr = coll.find({}, limit=-1)
obj[coll.alias] = qr.rows
logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
if isinstance(target_format, Format):
if target_format.is_dump_format() and target_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
tmp_handle = target_format.value
client = self.parent
tmp_db = client.attach_database(tmp_handle, alias="tmp")
tmp_db.store(obj)
# TODO: check for infinite recursion
tmp_db.export_database(location, target_format=target_format)
return
if Path(location).is_dir():
raise ValueError(f"{location} is a directory; cannot write {target_format} to a dir")
with open(location, "w", encoding="utf-8") as stream:
stream.write(render_output(obj, format=target_format))

Expand Down
34 changes: 31 additions & 3 deletions src/linkml_store/api/stores/duckdb/duckdb_database.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import json
import logging
from pathlib import Path
from typing import Optional
from typing import Optional, Union

import pandas as pd
import sqlalchemy
from duckdb import DuckDBPyConnection
from linkml_runtime import SchemaView
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
from linkml_runtime.utils.schema_builder import SchemaBuilder
Expand All @@ -14,6 +13,7 @@
from linkml_store.api import Database
from linkml_store.api.queries import Query, QueryResult
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
from linkml_store.utils.format_utils import Format
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql

TYPE_MAP = {
Expand Down Expand Up @@ -45,7 +45,7 @@ class DuckDBDatabase(Database):
types are used for nested inlined objects.
"""

_connection: DuckDBPyConnection = None
# _connection: DuckDBPyConnection = None
_engine: sqlalchemy.Engine = None
collection_class = DuckDBCollection

Expand Down Expand Up @@ -202,3 +202,31 @@ def induce_schema_view(self) -> SchemaView:
cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
schema.classes[cls.name] = cls
return SchemaView(schema)

def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
if target_format == "duckdb" or target_format == Format.SQLDUMP_DUCKDB:
path = Path(location)
if path.exists():
if path.is_file():
path.unlink()
with self.engine.connect() as conn:
sql = text(f"EXPORT DATABASE '{location}'")
conn.execute(sql)
else:
super().export_database(location, target_format=target_format, **kwargs)

def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
"""
Import a database from a file or location.
:param location: location of the file
:param source_format: source format
:param kwargs: additional arguments
"""
if source_format == Format.SQLDUMP_DUCKDB.value or source_format == Format.SQLDUMP_DUCKDB:
with self.engine.connect() as conn:
sql = text(f"IMPORT DATABASE '{location}'")
conn.execute(sql)
conn.commit()
else:
super().import_database(location, source_format=source_format, **kwargs)
Loading

0 comments on commit 48c6a08

Please sign in to comment.