Skip to content

Commit 0aff61b

Browse files
committed
Greatly simplify the template saving logic
1 parent b6de0d3 commit 0aff61b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+771
-1219
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1818
- Use databackend to perform metadata duties
1919
- Add `db.create` and `db.insert` instead of `auto_schema`
2020
- Merkel-tree implementation replacing random `.uuid` with deterministic implementation
21+
- Simplify the `Template` class
2122

2223
#### New Features & Functionality
2324

superduper/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from .base.schema import Schema
2222
from .components.application import Application
2323
from .components.component import Component
24+
from .components.cron_job import CronJob, FunctionCronJob
2425
from .components.dataset import Dataset
2526
from .components.listener import Listener
2627
from .components.metric import Metric
@@ -35,7 +36,7 @@
3536
from .components.plugin import Plugin
3637
from .components.streamlit import Streamlit
3738
from .components.table import Table
38-
from .components.template import QueryTemplate, Template
39+
from .components.template import Template
3940
from .components.vector_index import VectorIndex
4041

4142
REQUIRES = [
@@ -54,6 +55,8 @@
5455
'QueryModel',
5556
'Validation',
5657
'Model',
58+
'CronJob',
59+
'FunctionCronJob',
5760
'Trainer',
5861
'model',
5962
'Listener',
@@ -65,7 +68,6 @@
6568
'Table',
6669
'Application',
6770
'Template',
68-
'QueryTemplate',
6971
'Application',
7072
'Component',
7173
'pickle_serializer',

superduper/backends/base/backends.py

+92-19
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,99 @@
11
import typing as t
22
from abc import ABC, abstractmethod
3+
from collections import defaultdict
34

45
if t.TYPE_CHECKING:
56
from superduper.base.datalayer import Datalayer
67
from superduper.components.component import Component
78

89

10+
class Bookkeeping(ABC):
11+
"""Mixin class for tracking components and associated tools."""
12+
13+
def __init__(self):
14+
self.component_uuid_mapping = defaultdict(set)
15+
self.uuid_component_mapping = {}
16+
self.tool_uuid_mapping = defaultdict(set)
17+
self.uuid_tool_mapping = {}
18+
self.tools = {}
19+
20+
def build_tool(self, component: 'Component'):
21+
"""Build a tool from a component.
22+
23+
:param component: Component to build tool from.
24+
"""
25+
pass
26+
27+
def get_tool(self, uuid: str):
28+
"""Get the tool from a uuid.
29+
30+
:param uuid: UUID of the tool.
31+
"""
32+
tool_id = self.uuid_tool_mapping[uuid]
33+
return self.tools[tool_id]
34+
35+
def put_component(self, component: 'Component', **kwargs):
36+
"""Put a component to the backend.
37+
38+
:param component: Component to put.
39+
:param kwargs: kwargs dictionary.
40+
"""
41+
tool = self.build_tool(component)
42+
tool.db = self.db
43+
self.component_uuid_mapping[(component.component, component.identifier)].add(
44+
component.uuid
45+
)
46+
self.uuid_component_mapping[component.uuid] = (
47+
component.component,
48+
component.identifier,
49+
)
50+
self.uuid_tool_mapping[component.uuid] = tool.identifier
51+
self.tool_uuid_mapping[tool.identifier].add(component.uuid)
52+
self.tools[tool.identifier] = tool
53+
tool.initialize(**kwargs)
54+
55+
def drop_component(self, component: str, identifier: str):
56+
"""Drop the component from backend.
57+
58+
:param component: Component name.
59+
:param identifier: Component identifier.
60+
"""
61+
uuids = self.component_uuid_mapping[(component, identifier)]
62+
tool_ids = []
63+
for uuid in uuids:
64+
del self.uuid_component_mapping[uuid]
65+
tool_id = self.uuid_tool_mapping[uuid]
66+
tool_ids.append(tool_id)
67+
del self.uuid_tool_mapping[uuid]
68+
self.tool_uuid_mapping[tool_id].remove(uuid)
69+
if not self.tool_uuid_mapping[tool_id]:
70+
self.tools[tool_id].drop()
71+
del self.tools[tool_id]
72+
del self.component_uuid_mapping[(component, identifier)]
73+
74+
def drop(self):
75+
"""Drop the backend."""
76+
for tool in self.tools.values():
77+
tool.drop()
78+
self.component_uuid_mapping = defaultdict(set)
79+
self.uuid_component_mapping = {}
80+
self.tool_uuid_mapping = defaultdict(set)
81+
self.uuid_tool_mapping = {}
82+
self.tools = {}
83+
84+
def list_components(self):
85+
"""List components, and identifiers deployed."""
86+
return list(self.component_uuid_mapping.keys())
87+
88+
def list_tools(self):
89+
"""List tools deployed."""
90+
return list(self.tools.keys())
91+
92+
def list_uuids(self):
93+
"""List uuids deployed."""
94+
return list(self.uuid_component_mapping.keys())
95+
96+
997
class BaseBackend(ABC):
1098
"""Base backend class for cluster client."""
1199

@@ -34,34 +122,19 @@ def initialize(self):
34122
"""To be called on program start."""
35123
pass
36124

37-
def put_component(self, component: 'Component', **kwargs):
125+
@abstractmethod
126+
def put_component(self, component: 'Component'):
38127
"""Add a component to the deployment.
39128
40129
:param component: ``Component`` to put.
41-
:param kwargs: kwargs dictionary.
42130
"""
43-
# This is to make sure that we only have 1 version
44-
# of each component implemented at any given time
45-
# TODO: get identifier in string component argument.
46-
identifier = ''
47-
if isinstance(component, str):
48-
uuid = component
49-
else:
50-
uuid = component.uuid
51-
identifier = component.identifier
52-
53-
if uuid in self.list_uuids():
54-
return
55-
if identifier in self.list_components():
56-
del self[component.identifier]
57-
58-
self._put(component, **kwargs)
59131

60132
@abstractmethod
61133
def drop_component(self, component: str, identifier: str):
62134
"""Drop the component from backend.
63135
64-
:param identifier: Component identifier
136+
:param component: Component name.
137+
:param identifier: Component identifier.
65138
"""
66139

67140
@property

superduper/backends/base/cluster.py

+5-10
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# TODO rename queue + crontab to scheduler
22
import dataclasses as dc
33
import time
4-
from abc import ABC, abstractmethod
54
import typing as t
5+
from abc import ABC, abstractmethod
66

77
from superduper.backends.base.cache import Cache
88
from superduper.backends.base.cdc import CDCBackend
@@ -16,16 +16,15 @@
1616
class Cluster(ABC):
1717
"""Cluster object for managing the backend.
1818
19-
:param compute: The compute backend.
2019
:param cache: The cache backend.
21-
:param queue: The queue backend.
20+
:param scheduler: The scheduler backend.
2221
:param vector_search: The vector search backend.
2322
:param cdc: The change data capture backend.
2423
:param crontab: The crontab backend.
2524
"""
2625

27-
cache: Cache
28-
scheduler: BaseScheduler
26+
cache: Cache
27+
scheduler: BaseScheduler
2928
vector_search: VectorSearchBackend
3029
cdc: CDCBackend
3130
crontab: CrontabBackend
@@ -87,11 +86,7 @@ def load_custom_plugins(self):
8786
plugin = self.db.load('Plugin', plugin)
8887

8988
def initialize(self):
90-
"""Initialize the cluster.
91-
92-
:param with_compute: Boolean to init
93-
compute.
94-
"""
89+
"""Initialize the cluster."""
9590
from superduper import logging
9691

9792
start = time.time()

superduper/backends/base/compute.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
from superduper.backends.base.backends import BaseBackend
55

66
if t.TYPE_CHECKING:
7-
from superduper.components.component import Component
87
from superduper.base.datalayer import Datalayer
98
from superduper.base.event import Job
9+
from superduper.components.component import Component
1010

1111

1212
class ComputeBackend(BaseBackend):
@@ -23,14 +23,10 @@ class ComputeBackend(BaseBackend):
2323
def release_futures(self, context: str):
2424
"""Release futures from backend.
2525
26-
:param context: Context of futures to release.
26+
:param context: Futures context to release.
2727
"""
2828
pass
2929

30-
def get_local_client(self):
31-
"""Returns a local version of self."""
32-
pass
33-
3430
@abstractmethod
3531
def submit(self, job: 'Job') -> t.Any:
3632
"""
@@ -45,14 +41,15 @@ def disconnect(self) -> None:
4541
"""Disconnect the client."""
4642
pass
4743

44+
@abstractmethod
4845
def initialize(self):
4946
"""Connect to address."""
5047

48+
@abstractmethod
5149
def put_component(self, component: 'Component'):
5250
"""Create handler on component declare.
5351
54-
:param args: *args for `create_handler`
55-
:param kwargs: *kwargs for `create_handler`
52+
:param component: Component to put.
5653
"""
5754

5855
@property
@@ -68,8 +65,10 @@ def db(self, value: 'Datalayer'):
6865
"""
6966
self._db = value
7067

71-
def drop_component(self, uuid: str):
68+
@abstractmethod
69+
def drop_component(self, component: str, identifier: str):
7270
"""Drop the component from compute.
7371
74-
:param uuid: Component uuid.
72+
:param component: Component name.
73+
:param identifier: Component identifier.
7574
"""

superduper/backends/base/crontab.py

-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@
33

44
class CrontabBackend(BaseBackend):
55
"""Base class for crontab backends."""
6-

0 commit comments

Comments
 (0)