diff --git a/examples/dynamic_service/pipeline/README.md b/examples/dynamic_service/pipeline/README.md new file mode 100644 index 00000000000..2d7f3f27054 --- /dev/null +++ b/examples/dynamic_service/pipeline/README.md @@ -0,0 +1,44 @@ +# BentoML Sklearn Example: document classification pipeline + +0. Install dependencies: + +```bash +pip install -r ./requirements.txt +``` + +1. Train a document classification pipeline model + +```bash +python ./train.py +``` + +2. Run the service: + +```bash +bentoml serve service.py:svc +``` + +3. Send test request + +Test the `/predict` endpoint: +```bash +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_model_0 +``` + +Test the `/predict_proba` endpoint: +```bash +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_proba_model_0 +``` + + +4. Build Bento + +``` +bentoml build +``` + +5. Build docker image + +``` +bentoml containerize doc_classifier:latest +``` diff --git a/examples/dynamic_service/pipeline/bentofile.yaml b/examples/dynamic_service/pipeline/bentofile.yaml new file mode 100644 index 00000000000..fa5f43b01e1 --- /dev/null +++ b/examples/dynamic_service/pipeline/bentofile.yaml @@ -0,0 +1,6 @@ +service: "service:DynamicService" +include: + - "service.py" + - "requirements.txt" +python: + requirements_txt: "./requirements.txt" diff --git a/examples/sklearn/pipeline/requirements.txt b/examples/dynamic_service/pipeline/requirements.txt similarity index 100% rename from examples/sklearn/pipeline/requirements.txt rename to examples/dynamic_service/pipeline/requirements.txt diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py new file mode 100644 index 00000000000..13685070c5d --- /dev/null +++ b/examples/dynamic_service/pipeline/service.py @@ -0,0 +1,77 @@ +from typing import Any + +import bentoml + +"""The following example is based on the sklearn/pipeline example. + +The concept revolves around dynamically constructing service endpoints: + +Imagine you have n models ready for production. +When creating your Bento, you may not know in advance which models will be served. +Therefore, you create an endpoint for every available model that can be deployed. + +Scenario: You trained hundreds of models. +While they are still in the training pipeline, you want to begin serving your first models already in production. + +When constructing Bentos, you require a predefined service.py file. However, the number of endpoints is unknown +during construction of this file. You aim to reuse the same file each time you create a new Bento, without the need +to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index, +for instance. """ + + +def wrap_service_methods( + model: bentoml.Model, + targets: Any, + predict_route: str, + predict_name: str, + predict_proba_route: str, + predict_proba_name: str, +): + """Wrap models in service methods and annotate as api.""" + + @bentoml.api(route=predict_route, name=predict_name) + async def predict(input_doc: str): + predictions = await model.predict.async_run([input_doc]) + return {"result": targets[predictions[0]]} + + @bentoml.api(route=predict_proba_route, name=predict_proba_name) + async def predict_proba(input_doc: str): + predictions = await model.predict_proba.async_run([input_doc]) + return predictions[0] + + return predict, predict_proba + + +class_attrs = {} # Empty dict for storing methods +# Manually add api methods to local scope as via locals() method (current scope). +distinct_models = set() +for model in bentoml.models.list(): + distinct_models.add(model.tag.name) +for idx, available_model in enumerate(distinct_models): + if "twenty_news_group" in available_model: + bento_model = bentoml.sklearn.get(f"{available_model}:latest") + target_names = bento_model.custom_objects["target_names"] + path_predict = f"predict_model_{idx}" + path_predict_proba = f"predict_proba_model_{idx}" + + ( + class_attrs[path_predict], + class_attrs[path_predict_proba], + ) = wrap_service_methods( + bento_model, + target_names, + predict_route="/" + path_predict, + predict_name="/" + path_predict, + predict_proba_route=path_predict_proba, + predict_proba_name=path_predict_proba, + ) + +# Create class with type and add generated methods +DynamicServiceClass = type( + "DynamicService", + (object,), + class_attrs, +) + +# Create Endpoint Service defined in bentofile.yaml +DynamicService = bentoml.service(workers=1, resources={"cpu": "1"})(DynamicServiceClass) diff --git a/examples/dynamic_service/pipeline/train.py b/examples/dynamic_service/pipeline/train.py new file mode 100644 index 00000000000..71e05e48973 --- /dev/null +++ b/examples/dynamic_service/pipeline/train.py @@ -0,0 +1,118 @@ +import logging +from pprint import pprint +from time import time + +from sklearn.datasets import fetch_20newsgroups +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.feature_extraction.text import TfidfTransformer +from sklearn.linear_model import SGDClassifier +from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline + +import bentoml + +# Display progress logs on stdout +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + +# Load some categories from the training set +categories = [ + "alt.atheism", + "talk.religion.misc", +] + +# Uncomment the following to do the analysis on all the categories +# categories = None + +print("Loading 20 newsgroups dataset for categories:") +print(categories) + +data = fetch_20newsgroups(subset="train", categories=categories) +print("%d documents" % len(data.filenames)) +print("%d categories" % len(data.target_names)) +print() + +# Define a pipeline combining a text feature extractor with a simple classifier +pipeline = Pipeline( + [ + ("vect", CountVectorizer()), + ("tfidf", TfidfTransformer()), + ("clf", SGDClassifier(loss="log_loss")), + ] +) + +# Parameters to use for grid search. Uncommenting more parameters will give +# better exploring power but will increase processing time in a combinatorial +# way +parameters = { + "vect__max_df": (0.5, 0.75, 1.0), + # 'vect__max_features': (None, 5000, 10000, 50000), + "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams + # 'tfidf__use_idf': (True, False), + # 'tfidf__norm': ('l1', 'l2'), + "clf__max_iter": (20,), + "clf__alpha": (0.00001, 0.000001), + "clf__penalty": ("l2", "elasticnet"), + # 'clf__max_iter': (10, 50, 80), +} + +# Find the best parameters for both the feature extraction and the +# classifier +grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) + +print("Performing grid search...") +print("pipeline:", [name for name, _ in pipeline.steps]) +print("parameters:") +pprint(parameters) +t0 = time() +grid_search.fit(data.data, data.target) +print("done in %0.3fs" % (time() - t0)) +print() + +print("Best score: %0.3f" % grid_search.best_score_) +best_parameters = grid_search.best_estimator_.get_params() +best_parameters = { + param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) +} +print(f"Best parameters set: {best_parameters}") + +bento_model = bentoml.sklearn.save_model( + "twenty_news_group", + grid_search.best_estimator_, + signatures={ + "predict": {"batchable": True, "batch_dim": 0}, + "predict_proba": {"batchable": True, "batch_dim": 0}, + }, + custom_objects={ + "target_names": data.target_names, + }, + metadata=best_parameters, +) +print(f"Model saved: {bento_model}") + +# Test running inference with BentoML runner +test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner() +test_runner.init_local() +assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( + ["hello"] +) + +bento_model = bentoml.sklearn.save_model( + "twenty_news_group_second", + grid_search.best_estimator_, + signatures={ + "predict": {"batchable": True, "batch_dim": 0}, + "predict_proba": {"batchable": True, "batch_dim": 0}, + }, + custom_objects={ + "target_names": data.target_names, + }, + metadata=best_parameters, +) +print(f"Model saved: {bento_model}") + +# Test running inference with BentoML runner +test_runner = bentoml.sklearn.get("twenty_news_group_second:latest").to_runner() +test_runner.init_local() +assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( + ["hello"] +) diff --git a/examples/sklearn/pipeline/build_bento.py b/examples/sklearn/pipeline/build_bento.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/examples/sklearn/pipeline/train.py b/examples/sklearn/pipeline/train.py index aa9ee1a5ee4..1230bd5615f 100644 --- a/examples/sklearn/pipeline/train.py +++ b/examples/sklearn/pipeline/train.py @@ -11,87 +11,91 @@ import bentoml -# Display progress logs on stdout -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +if __name__ == "__main__": + # Display progress logs on stdout + logging.basicConfig( + level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s" + ) -# Load some categories from the training set -categories = [ - "alt.atheism", - "talk.religion.misc", -] + # Load some categories from the training set + categories = [ + "alt.atheism", + "talk.religion.misc", + ] -# Uncomment the following to do the analysis on all the categories -# categories = None + # Uncomment the following to do the analysis on all the categories + # categories = None -print("Loading 20 newsgroups dataset for categories:") -print(categories) + print("Loading 20 newsgroups dataset for categories:") + print(categories) -data = fetch_20newsgroups(subset="train", categories=categories) -print("%d documents" % len(data.filenames)) -print("%d categories" % len(data.target_names)) -print() + data = fetch_20newsgroups(subset="train", categories=categories) + print("%d documents" % len(data.filenames)) + print("%d categories" % len(data.target_names)) + print() -# Define a pipeline combining a text feature extractor with a simple classifier -pipeline = Pipeline( - [ - ("vect", CountVectorizer()), - ("tfidf", TfidfTransformer()), - ("clf", SGDClassifier(loss="log_loss")), - ] -) + # Define a pipeline combining a text feature extractor with a simple classifier + pipeline = Pipeline( + [ + ("vect", CountVectorizer()), + ("tfidf", TfidfTransformer()), + ("clf", SGDClassifier(loss="log_loss")), + ] + ) -# Parameters to use for grid search. Uncommenting more parameters will give -# better exploring power but will increase processing time in a combinatorial -# way -parameters = { - "vect__max_df": (0.5, 0.75, 1.0), - # 'vect__max_features': (None, 5000, 10000, 50000), - "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams - # 'tfidf__use_idf': (True, False), - # 'tfidf__norm': ('l1', 'l2'), - "clf__max_iter": (20,), - "clf__alpha": (0.00001, 0.000001), - "clf__penalty": ("l2", "elasticnet"), - # 'clf__max_iter': (10, 50, 80), -} + # Parameters to use for grid search. Uncommenting more parameters will give + # better exploring power but will increase processing time in a combinatorial + # way + parameters = { + "vect__max_df": (0.5, 0.75, 1.0), + # 'vect__max_features': (None, 5000, 10000, 50000), + "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams + # 'tfidf__use_idf': (True, False), + # 'tfidf__norm': ('l1', 'l2'), + "clf__max_iter": (20,), + "clf__alpha": (0.00001, 0.000001), + "clf__penalty": ("l2", "elasticnet"), + # 'clf__max_iter': (10, 50, 80), + } -# Find the best parameters for both the feature extraction and the -# classifier -grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) + # Find the best parameters for both the feature extraction and the + # classifier + grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) -print("Performing grid search...") -print("pipeline:", [name for name, _ in pipeline.steps]) -print("parameters:") -pprint(parameters) -t0 = time() -grid_search.fit(data.data, data.target) -print("done in %0.3fs" % (time() - t0)) -print() + print("Performing grid search...") + print("pipeline:", [name for name, _ in pipeline.steps]) + print("parameters:") + pprint(parameters) + t0 = time() + grid_search.fit(data.data, data.target) + print("done in %0.3fs" % (time() - t0)) + print() -print("Best score: %0.3f" % grid_search.best_score_) -best_parameters = grid_search.best_estimator_.get_params() -best_parameters = { - param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) -} -print(f"Best parameters set: {best_parameters}") + print("Best score: %0.3f" % grid_search.best_score_) + best_parameters = grid_search.best_estimator_.get_params() + best_parameters = { + param_name: best_parameters[param_name] + for param_name in sorted(parameters.keys()) + } + print(f"Best parameters set: {best_parameters}") -bento_model = bentoml.sklearn.save_model( - "twenty_news_group", - grid_search.best_estimator_, - signatures={ - "predict": {"batchable": True, "batch_dim": 0}, - "predict_proba": {"batchable": True, "batch_dim": 0}, - }, - custom_objects={ - "target_names": data.target_names, - }, - metadata=best_parameters, -) -print(f"Model saved: {bento_model}") + bento_model = bentoml.sklearn.save_model( + "twenty_news_group", + grid_search.best_estimator_, + signatures={ + "predict": {"batchable": True, "batch_dim": 0}, + "predict_proba": {"batchable": True, "batch_dim": 0}, + }, + custom_objects={ + "target_names": data.target_names, + }, + metadata=best_parameters, + ) + print(f"Model saved: {bento_model}") -# Test running inference with BentoML runner -test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner() -test_runner.init_local() -assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( - ["hello"] -) + # Test running inference with BentoML runner + test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner() + test_runner.init_local() + assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( + ["hello"] + ) diff --git a/grpc-client/bentoml b/grpc-client/bentoml index 542ee9833b2..f6754da92e0 120000 --- a/grpc-client/bentoml +++ b/grpc-client/bentoml @@ -1 +1 @@ -../src/bentoml \ No newline at end of file +../src/bentoml diff --git a/grpc-client/java/src/main/proto/v1 b/grpc-client/java/src/main/proto/v1 index cbeb74766ad..c252109319f 120000 --- a/grpc-client/java/src/main/proto/v1 +++ b/grpc-client/java/src/main/proto/v1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1 \ No newline at end of file +../../../../bentoml/grpc/v1 diff --git a/grpc-client/java/src/main/proto/v1alpha1 b/grpc-client/java/src/main/proto/v1alpha1 index cfa00eec479..f8068df30e8 120000 --- a/grpc-client/java/src/main/proto/v1alpha1 +++ b/grpc-client/java/src/main/proto/v1alpha1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1alpha1 \ No newline at end of file +../../../../bentoml/grpc/v1alpha1 diff --git a/grpc-client/kotlin/src/main/proto/v1 b/grpc-client/kotlin/src/main/proto/v1 index cbeb74766ad..c252109319f 120000 --- a/grpc-client/kotlin/src/main/proto/v1 +++ b/grpc-client/kotlin/src/main/proto/v1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1 \ No newline at end of file +../../../../bentoml/grpc/v1 diff --git a/grpc-client/kotlin/src/main/proto/v1alpha1 b/grpc-client/kotlin/src/main/proto/v1alpha1 index cfa00eec479..f8068df30e8 120000 --- a/grpc-client/kotlin/src/main/proto/v1alpha1 +++ b/grpc-client/kotlin/src/main/proto/v1alpha1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1alpha1 \ No newline at end of file +../../../../bentoml/grpc/v1alpha1 diff --git a/src/bentoml/_internal/service/service.py b/src/bentoml/_internal/service/service.py index c41dfed797b..c7cb0f7e2b5 100644 --- a/src/bentoml/_internal/service/service.py +++ b/src/bentoml/_internal/service/service.py @@ -286,6 +286,38 @@ def is_service_importable(self) -> bool: return True + # fmt: off + # case 1: function is not defined, but input and output are + @t.overload + def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], user_defined_callback: t.Callable[..., t.Any],) -> None: ... + # case 2: the decorator itself with custom routes + @t.overload + def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], user_defined_callback: t.Callable[..., t.Any], *, route: str = ...) -> None: ... + # fmt: on + def add_api( + self, + input: IODescriptor[IOType], + output: IODescriptor[IOType], + user_defined_callback: t.Callable[..., t.Any], + *, + name: str | None = None, + doc: str | None = None, + route: str | None = None, + ) -> None: + _api = InferenceAPI[IOType]( + name=first_not_none(name, default=user_defined_callback.__name__), + user_defined_callback=user_defined_callback, + input_descriptor=input, + output_descriptor=output, + doc=doc, + route=route, + ) + if _api.name in self.apis: + raise BentoMLException( + f"API {_api.name} is already defined in Service {self.name}" + ) + self.apis[_api.name] = _api + # fmt: off # case 1: function is not defined, but input and output are @t.overload