WIP: Resource-based interface

acroz · acroz · commit 77bcd396f844 · 2019-05-10T15:55:38.000+01:00
diff --git a/faculty/experiments.py b/faculty/experiments.py
@@ -0,0 +1,71 @@
+from attr import attrs, attrib
+import pandas
+
+import faculty  # TODO: Avoid possible circular imports
+
+
+class QueryResult(object):
+    def __init__(self, iterable):
+        self.iterable = iterable
+
+    def __iter__(self):
+        return iter(self.iterable)
+
+
+class ExperimentRunQueryResult(QueryResult):
+    def as_dataframe(self):
+        records = []
+        for run in self:
+            row = {
+                "Experiment ID": run.experiment_id,
+                "Run ID": run.id,
+                "Status": run.status.value,
+                "Started At": run.started_at,
+            }
+            for metric in run.metrics:
+                row[metric.key] = row[metric.value]
+            records.append(row)
+        return pandas.DataFrame(records)
+
+
+@attrs
+class ExperimentRun(object):
+    id = attrib()
+    run_number = attrib()
+    experiment_id = attrib()
+    name = attrib()
+    parent_run_id = attrib()
+    artifact_location = attrib()
+    status = attrib()
+    started_at = attrib()
+    ended_at = attrib()
+    deleted_at = attrib()
+    tags = attrib()
+    params = attrib()
+    metrics = attrib()
+
+    @classmethod
+    def _from_client_model(cls, client_object):
+        return cls(**client_object._asdict())
+
+    @classmethod
+    def query(cls, project_id, experiment_ids=None):
+        def get_runs():
+            client = faculty.client("experiment")
+
+            response = client.list_runs(project_id, experiment_ids)
+            yield from map(cls._from_client_model, response.runs)
+
+            while response.pagination.next is not None:
+                response = client.list_runs(
+                    project_id,
+                    experiment_ids,
+                    start=response.pagination.next.start,
+                    limit=response.pagination.next.limit,
+                )
+                yield from map(cls._from_client_model, response.runs)
+
+        # Open question:
+        # Should we evalutate the entire set of runs before returning the
+        # result, or is it ok to have them lazily evaluated
+        return ExperimentRunQueryResult(get_runs())
diff --git a/setup.py b/setup.py
@@ -33,6 +33,8 @@
         "enum34; python_version<'3.4'",
         # Install marshmallow with 'reco' (recommended) extras to ensure a
         # compatible version of python-dateutil is available
+        "attr",
+        "pandas",
         "marshmallow[reco]>=3.0.0rc3",
         "marshmallow_enum",
         "boto3",