From 8b167aab8958eba81b76f37c47f94c2edae49884 Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Tue, 30 Jan 2024 20:06:00 +0100 Subject: [PATCH 01/12] Added add_api functionalty with example | draft --- examples/dynamic_service/pipeline/README.md | 44 ++++++ .../dynamic_service/pipeline/bentofile.yaml | 5 + .../pipeline/requirements.txt | 0 examples/dynamic_service/pipeline/service.py | 67 ++++++++ examples/dynamic_service/pipeline/train.py | 97 ++++++++++++ examples/sklearn/pipeline/build_bento.py | 0 examples/sklearn/pipeline/train.py | 148 +++++++++--------- src/bentoml/_internal/service/service.py | 31 ++++ 8 files changed, 318 insertions(+), 74 deletions(-) create mode 100644 examples/dynamic_service/pipeline/README.md create mode 100644 examples/dynamic_service/pipeline/bentofile.yaml rename examples/{sklearn => dynamic_service}/pipeline/requirements.txt (100%) create mode 100644 examples/dynamic_service/pipeline/service.py create mode 100644 examples/dynamic_service/pipeline/train.py create mode 100644 examples/sklearn/pipeline/build_bento.py diff --git a/examples/dynamic_service/pipeline/README.md b/examples/dynamic_service/pipeline/README.md new file mode 100644 index 00000000000..a41e30f8709 --- /dev/null +++ b/examples/dynamic_service/pipeline/README.md @@ -0,0 +1,44 @@ +# BentoML Sklearn Example: document classification pipeline + +0. Install dependencies: + +```bash +pip install -r ./requirements.txt +``` + +1. Train a document classification pipeline model + +```bash +python ./train.py +``` + +2. Run the service: + +```bash +bentoml serve service.py:svc +``` + +3. Send test request + +Test the `/predict` endpoint: +```bash +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict +``` + +Test the `/predict_proba` endpoint: +```bash +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_proba +``` + + +4. Build Bento + +``` +bentoml build +``` + +5. Build docker image + +``` +bentoml containerize doc_classifier:latest +``` diff --git a/examples/dynamic_service/pipeline/bentofile.yaml b/examples/dynamic_service/pipeline/bentofile.yaml new file mode 100644 index 00000000000..44cad3591f8 --- /dev/null +++ b/examples/dynamic_service/pipeline/bentofile.yaml @@ -0,0 +1,5 @@ +service: "service.py:svc" +include: + - "service.py" +python: + requirements_txt: "./requirements.txt" diff --git a/examples/sklearn/pipeline/requirements.txt b/examples/dynamic_service/pipeline/requirements.txt similarity index 100% rename from examples/sklearn/pipeline/requirements.txt rename to examples/dynamic_service/pipeline/requirements.txt diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py new file mode 100644 index 00000000000..869d6e7f5fe --- /dev/null +++ b/examples/dynamic_service/pipeline/service.py @@ -0,0 +1,67 @@ +import bentoml +from bentoml.io import JSON +from bentoml.io import Text + +"""The following example is based on the example sklearn/pipeline. + +The idea of dynamically building the service endpoints: + +Imaging you have n models ready for production. When building your bento, you do not actually know, which models should +be served, so you create a endpoint for every model that is available for deployment. + +Scenario: You are training hundreds of models, while still are in the training pipeline, you already want to serve your +first models in production. + +When building bentos, you need a predefined service.py file - but with an unknown number of endpoints when building. +You want to reuse a single file everytime when creating a new bento, without changing the service definitions each time. +Every model should have (for example) a route with a running index. +""" + + +def wrap_service_methods(runner: Runner, targets: Any): + """Pass Runner and target names, as they are needed in both methods. + + Note: Only passed arguments are available in the methods below, scope is not overwritten. + """ + + async def predict(input_doc: str): + predictions = await runner.predict.async_run([input_doc]) + return {"result": targets[predictions[0]]} + + async def predict_proba(input_doc: str): + predictions = await runner.predict_proba.async_run([input_doc]) + return predictions[0] + + return predict, predict_proba + + +bento_model = bentoml.sklearn.get("twenty_news_group:latest") + +target_names = bento_model.custom_objects["target_names"] + +# Imaging we have different models, which need the same predict and predict_proba implementations +model_runner_list: [Runner] = [bento_model.to_runner(), bento_model.to_runner()] + +svc = bentoml.Service("doc_classifier", runners=[model_runner]) + +for idx, model_runner in enumerate(model_runner_list): + path_predict = f"predict_model_{idx}" + path_predict_proba = f"predict_proba_model_{idx}" + fn_pred, fn_pred_proba = setMethod(runner=model_runner, targets=target_names) + + svc.add_api( + input=Text(), + output=JSON(), + user_defined_callback=fn_pred, + name=path_predict, + doc=None, + route=path_predict, + ) + svc.add_api( + input=Text(), + output=JSON(), + user_defined_callback=fn_pred_proba, + name=path_predict_proba, + doc=None, + route=path_predict_proba, + ) diff --git a/examples/dynamic_service/pipeline/train.py b/examples/dynamic_service/pipeline/train.py new file mode 100644 index 00000000000..aa9ee1a5ee4 --- /dev/null +++ b/examples/dynamic_service/pipeline/train.py @@ -0,0 +1,97 @@ +import logging +from pprint import pprint +from time import time + +from sklearn.datasets import fetch_20newsgroups +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.feature_extraction.text import TfidfTransformer +from sklearn.linear_model import SGDClassifier +from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline + +import bentoml + +# Display progress logs on stdout +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + +# Load some categories from the training set +categories = [ + "alt.atheism", + "talk.religion.misc", +] + +# Uncomment the following to do the analysis on all the categories +# categories = None + +print("Loading 20 newsgroups dataset for categories:") +print(categories) + +data = fetch_20newsgroups(subset="train", categories=categories) +print("%d documents" % len(data.filenames)) +print("%d categories" % len(data.target_names)) +print() + +# Define a pipeline combining a text feature extractor with a simple classifier +pipeline = Pipeline( + [ + ("vect", CountVectorizer()), + ("tfidf", TfidfTransformer()), + ("clf", SGDClassifier(loss="log_loss")), + ] +) + +# Parameters to use for grid search. Uncommenting more parameters will give +# better exploring power but will increase processing time in a combinatorial +# way +parameters = { + "vect__max_df": (0.5, 0.75, 1.0), + # 'vect__max_features': (None, 5000, 10000, 50000), + "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams + # 'tfidf__use_idf': (True, False), + # 'tfidf__norm': ('l1', 'l2'), + "clf__max_iter": (20,), + "clf__alpha": (0.00001, 0.000001), + "clf__penalty": ("l2", "elasticnet"), + # 'clf__max_iter': (10, 50, 80), +} + +# Find the best parameters for both the feature extraction and the +# classifier +grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) + +print("Performing grid search...") +print("pipeline:", [name for name, _ in pipeline.steps]) +print("parameters:") +pprint(parameters) +t0 = time() +grid_search.fit(data.data, data.target) +print("done in %0.3fs" % (time() - t0)) +print() + +print("Best score: %0.3f" % grid_search.best_score_) +best_parameters = grid_search.best_estimator_.get_params() +best_parameters = { + param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) +} +print(f"Best parameters set: {best_parameters}") + +bento_model = bentoml.sklearn.save_model( + "twenty_news_group", + grid_search.best_estimator_, + signatures={ + "predict": {"batchable": True, "batch_dim": 0}, + "predict_proba": {"batchable": True, "batch_dim": 0}, + }, + custom_objects={ + "target_names": data.target_names, + }, + metadata=best_parameters, +) +print(f"Model saved: {bento_model}") + +# Test running inference with BentoML runner +test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner() +test_runner.init_local() +assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( + ["hello"] +) diff --git a/examples/sklearn/pipeline/build_bento.py b/examples/sklearn/pipeline/build_bento.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/examples/sklearn/pipeline/train.py b/examples/sklearn/pipeline/train.py index aa9ee1a5ee4..c648b2b6faa 100644 --- a/examples/sklearn/pipeline/train.py +++ b/examples/sklearn/pipeline/train.py @@ -10,88 +10,88 @@ from sklearn.pipeline import Pipeline import bentoml +if __name__ == "__main__": + # Display progress logs on stdout + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") -# Display progress logs on stdout -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") - -# Load some categories from the training set -categories = [ - "alt.atheism", - "talk.religion.misc", -] + # Load some categories from the training set + categories = [ + "alt.atheism", + "talk.religion.misc", + ] -# Uncomment the following to do the analysis on all the categories -# categories = None + # Uncomment the following to do the analysis on all the categories + # categories = None -print("Loading 20 newsgroups dataset for categories:") -print(categories) + print("Loading 20 newsgroups dataset for categories:") + print(categories) -data = fetch_20newsgroups(subset="train", categories=categories) -print("%d documents" % len(data.filenames)) -print("%d categories" % len(data.target_names)) -print() + data = fetch_20newsgroups(subset="train", categories=categories) + print("%d documents" % len(data.filenames)) + print("%d categories" % len(data.target_names)) + print() -# Define a pipeline combining a text feature extractor with a simple classifier -pipeline = Pipeline( - [ - ("vect", CountVectorizer()), - ("tfidf", TfidfTransformer()), - ("clf", SGDClassifier(loss="log_loss")), - ] -) + # Define a pipeline combining a text feature extractor with a simple classifier + pipeline = Pipeline( + [ + ("vect", CountVectorizer()), + ("tfidf", TfidfTransformer()), + ("clf", SGDClassifier(loss="log_loss")), + ] + ) -# Parameters to use for grid search. Uncommenting more parameters will give -# better exploring power but will increase processing time in a combinatorial -# way -parameters = { - "vect__max_df": (0.5, 0.75, 1.0), - # 'vect__max_features': (None, 5000, 10000, 50000), - "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams - # 'tfidf__use_idf': (True, False), - # 'tfidf__norm': ('l1', 'l2'), - "clf__max_iter": (20,), - "clf__alpha": (0.00001, 0.000001), - "clf__penalty": ("l2", "elasticnet"), - # 'clf__max_iter': (10, 50, 80), -} + # Parameters to use for grid search. Uncommenting more parameters will give + # better exploring power but will increase processing time in a combinatorial + # way + parameters = { + "vect__max_df": (0.5, 0.75, 1.0), + # 'vect__max_features': (None, 5000, 10000, 50000), + "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams + # 'tfidf__use_idf': (True, False), + # 'tfidf__norm': ('l1', 'l2'), + "clf__max_iter": (20,), + "clf__alpha": (0.00001, 0.000001), + "clf__penalty": ("l2", "elasticnet"), + # 'clf__max_iter': (10, 50, 80), + } -# Find the best parameters for both the feature extraction and the -# classifier -grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) + # Find the best parameters for both the feature extraction and the + # classifier + grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) -print("Performing grid search...") -print("pipeline:", [name for name, _ in pipeline.steps]) -print("parameters:") -pprint(parameters) -t0 = time() -grid_search.fit(data.data, data.target) -print("done in %0.3fs" % (time() - t0)) -print() + print("Performing grid search...") + print("pipeline:", [name for name, _ in pipeline.steps]) + print("parameters:") + pprint(parameters) + t0 = time() + grid_search.fit(data.data, data.target) + print("done in %0.3fs" % (time() - t0)) + print() -print("Best score: %0.3f" % grid_search.best_score_) -best_parameters = grid_search.best_estimator_.get_params() -best_parameters = { - param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) -} -print(f"Best parameters set: {best_parameters}") + print("Best score: %0.3f" % grid_search.best_score_) + best_parameters = grid_search.best_estimator_.get_params() + best_parameters = { + param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) + } + print(f"Best parameters set: {best_parameters}") -bento_model = bentoml.sklearn.save_model( - "twenty_news_group", - grid_search.best_estimator_, - signatures={ - "predict": {"batchable": True, "batch_dim": 0}, - "predict_proba": {"batchable": True, "batch_dim": 0}, - }, - custom_objects={ - "target_names": data.target_names, - }, - metadata=best_parameters, -) -print(f"Model saved: {bento_model}") + bento_model = bentoml.sklearn.save_model( + "twenty_news_group", + grid_search.best_estimator_, + signatures={ + "predict": {"batchable": True, "batch_dim": 0}, + "predict_proba": {"batchable": True, "batch_dim": 0}, + }, + custom_objects={ + "target_names": data.target_names, + }, + metadata=best_parameters, + ) + print(f"Model saved: {bento_model}") -# Test running inference with BentoML runner -test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner() -test_runner.init_local() -assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( - ["hello"] -) + # Test running inference with BentoML runner + test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner() + test_runner.init_local() + assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( + ["hello"] + ) diff --git a/src/bentoml/_internal/service/service.py b/src/bentoml/_internal/service/service.py index 63bf433992c..8505c04134a 100644 --- a/src/bentoml/_internal/service/service.py +++ b/src/bentoml/_internal/service/service.py @@ -283,6 +283,37 @@ def is_service_importable(self) -> bool: return False return True + # fmt: off + # case 1: function is not defined, but input and output are + @t.overload + def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], user_defined_callback: t.Callable[..., t.Any],) -> None: ... + # case 2: the decorator itself with custom routes + @t.overload + def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], user_defined_callback: t.Callable[..., t.Any], *, route: str = ...) -> None: ... + # fmt: on + def add_api( + self, + input: IODescriptor[IOType], + output: IODescriptor[IOType], + user_defined_callback: t.Callable[..., t.Any], + *, + name: str | None = None, + doc: str | None = None, + route: str | None = None, + ) -> None: + _api = InferenceAPI[IOType]( + name=first_not_none(name, default=fn.__name__), + user_defined_callback=user_defined_callback, + input_descriptor=input, + output_descriptor=output, + doc=doc, + route=route, + ) + if _api.name in self.apis: + raise BentoMLException( + f"API {_api.name} is already defined in Service {self.name}" + ) + self.apis[_api.name] = _api # fmt: off # case 1: function is not defined, but input and output are From a28891deccdc935fff3b41efea7eeec84e5607cf Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Sat, 10 Feb 2024 11:59:32 +0100 Subject: [PATCH 02/12] working but unclean version of dynamic services --- examples/dynamic_service/pipeline/README.md | 4 +- .../dynamic_service/pipeline/bentofile.yaml | 1 + examples/dynamic_service/pipeline/service.py | 45 ++++++++++++------- examples/dynamic_service/pipeline/train.py | 21 +++++++++ src/bentoml/_internal/service/service.py | 2 +- 5 files changed, 53 insertions(+), 20 deletions(-) diff --git a/examples/dynamic_service/pipeline/README.md b/examples/dynamic_service/pipeline/README.md index a41e30f8709..2d7f3f27054 100644 --- a/examples/dynamic_service/pipeline/README.md +++ b/examples/dynamic_service/pipeline/README.md @@ -22,12 +22,12 @@ bentoml serve service.py:svc Test the `/predict` endpoint: ```bash -curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_model_0 ``` Test the `/predict_proba` endpoint: ```bash -curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_proba +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_proba_model_0 ``` diff --git a/examples/dynamic_service/pipeline/bentofile.yaml b/examples/dynamic_service/pipeline/bentofile.yaml index 44cad3591f8..568a5250135 100644 --- a/examples/dynamic_service/pipeline/bentofile.yaml +++ b/examples/dynamic_service/pipeline/bentofile.yaml @@ -1,5 +1,6 @@ service: "service.py:svc" include: - "service.py" + - "requirements.txt" python: requirements_txt: "./requirements.txt" diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py index 869d6e7f5fe..dcee9cace50 100644 --- a/examples/dynamic_service/pipeline/service.py +++ b/examples/dynamic_service/pipeline/service.py @@ -1,21 +1,25 @@ +from typing import Any + import bentoml +from bentoml import Runner from bentoml.io import JSON from bentoml.io import Text -"""The following example is based on the example sklearn/pipeline. +"""The following example is based on the sklearn/pipeline example. -The idea of dynamically building the service endpoints: +The concept revolves around dynamically constructing service endpoints: -Imaging you have n models ready for production. When building your bento, you do not actually know, which models should -be served, so you create a endpoint for every model that is available for deployment. +Imagine you have n models ready for production. +When creating your Bento, you may not know in advance which models will be served. +Therefore, you create an endpoint for every available model that can be deployed. -Scenario: You are training hundreds of models, while still are in the training pipeline, you already want to serve your -first models in production. +Scenario: You trained hundreds of models. +While they are still in the training pipeline, you want to begin serving your first models already in production. -When building bentos, you need a predefined service.py file - but with an unknown number of endpoints when building. -You want to reuse a single file everytime when creating a new bento, without changing the service definitions each time. -Every model should have (for example) a route with a running index. -""" +When constructing Bentos, you require a predefined service.py file. However, the number of endpoints is unknown +during construction of this file. You aim to reuse the same file each time you create a new Bento, without the need +to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index, +for instance. """ def wrap_service_methods(runner: Runner, targets: Any): @@ -35,19 +39,26 @@ async def predict_proba(input_doc: str): return predict, predict_proba -bento_model = bentoml.sklearn.get("twenty_news_group:latest") +available_model_set = set() +# Add all unique variations of twenty_news_group to the service +for available_model in bentoml.models.list(): + if "twenty_news_group" in available_model.tag.name: + available_model_set.add(available_model.tag.name) -target_names = bento_model.custom_objects["target_names"] +model_runner_list: [Runner] = [] +target_names: [] = [] -# Imaging we have different models, which need the same predict and predict_proba implementations -model_runner_list: [Runner] = [bento_model.to_runner(), bento_model.to_runner()] +for available_model in available_model_set: + bento_model = bentoml.sklearn.get(f"{available_model}:latest") + target_names.append(bento_model.custom_objects["target_names"]) + model_runner_list.append(bento_model.to_runner()) -svc = bentoml.Service("doc_classifier", runners=[model_runner]) +svc = bentoml.Service("doc_classifier", runners=model_runner_list) -for idx, model_runner in enumerate(model_runner_list): +for idx, (model_runner, target_name) in enumerate(zip(model_runner_list, target_names)): path_predict = f"predict_model_{idx}" path_predict_proba = f"predict_proba_model_{idx}" - fn_pred, fn_pred_proba = setMethod(runner=model_runner, targets=target_names) + fn_pred, fn_pred_proba = wrap_service_methods(runner=model_runner, targets=target_name) svc.add_api( input=Text(), diff --git a/examples/dynamic_service/pipeline/train.py b/examples/dynamic_service/pipeline/train.py index aa9ee1a5ee4..83797d04305 100644 --- a/examples/dynamic_service/pipeline/train.py +++ b/examples/dynamic_service/pipeline/train.py @@ -95,3 +95,24 @@ assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( ["hello"] ) + +bento_model = bentoml.sklearn.save_model( + "twenty_news_group_second", + grid_search.best_estimator_, + signatures={ + "predict": {"batchable": True, "batch_dim": 0}, + "predict_proba": {"batchable": True, "batch_dim": 0}, + }, + custom_objects={ + "target_names": data.target_names, + }, + metadata=best_parameters, +) +print(f"Model saved: {bento_model}") + +# Test running inference with BentoML runner +test_runner = bentoml.sklearn.get("twenty_news_group_second:latest").to_runner() +test_runner.init_local() +assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( + ["hello"] +) \ No newline at end of file diff --git a/src/bentoml/_internal/service/service.py b/src/bentoml/_internal/service/service.py index 8505c04134a..34a7c62c9c3 100644 --- a/src/bentoml/_internal/service/service.py +++ b/src/bentoml/_internal/service/service.py @@ -302,7 +302,7 @@ def add_api( route: str | None = None, ) -> None: _api = InferenceAPI[IOType]( - name=first_not_none(name, default=fn.__name__), + name=first_not_none(name, default=user_defined_callback.__name__), user_defined_callback=user_defined_callback, input_descriptor=input, output_descriptor=output, From 8299fd5c851bc26ea704e33c3a73b303a3f539c1 Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Sat, 10 Feb 2024 12:34:14 +0100 Subject: [PATCH 03/12] run pre-commit hook on changed files --- examples/dynamic_service/pipeline/service.py | 10 ++++++---- examples/dynamic_service/pipeline/train.py | 2 +- src/bentoml/_internal/service/service.py | 17 +++++++++-------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py index dcee9cace50..b7980c210ed 100644 --- a/examples/dynamic_service/pipeline/service.py +++ b/examples/dynamic_service/pipeline/service.py @@ -16,9 +16,9 @@ Scenario: You trained hundreds of models. While they are still in the training pipeline, you want to begin serving your first models already in production. -When constructing Bentos, you require a predefined service.py file. However, the number of endpoints is unknown -during construction of this file. You aim to reuse the same file each time you create a new Bento, without the need -to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index, +When constructing Bentos, you require a predefined service.py file. However, the number of endpoints is unknown +during construction of this file. You aim to reuse the same file each time you create a new Bento, without the need +to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index, for instance. """ @@ -58,7 +58,9 @@ async def predict_proba(input_doc: str): for idx, (model_runner, target_name) in enumerate(zip(model_runner_list, target_names)): path_predict = f"predict_model_{idx}" path_predict_proba = f"predict_proba_model_{idx}" - fn_pred, fn_pred_proba = wrap_service_methods(runner=model_runner, targets=target_name) + fn_pred, fn_pred_proba = wrap_service_methods( + runner=model_runner, targets=target_name + ) svc.add_api( input=Text(), diff --git a/examples/dynamic_service/pipeline/train.py b/examples/dynamic_service/pipeline/train.py index 83797d04305..71e05e48973 100644 --- a/examples/dynamic_service/pipeline/train.py +++ b/examples/dynamic_service/pipeline/train.py @@ -115,4 +115,4 @@ test_runner.init_local() assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( ["hello"] -) \ No newline at end of file +) diff --git a/src/bentoml/_internal/service/service.py b/src/bentoml/_internal/service/service.py index 34a7c62c9c3..cf91e93c56c 100644 --- a/src/bentoml/_internal/service/service.py +++ b/src/bentoml/_internal/service/service.py @@ -283,6 +283,7 @@ def is_service_importable(self) -> bool: return False return True + # fmt: off # case 1: function is not defined, but input and output are @t.overload @@ -292,14 +293,14 @@ def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], use def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], user_defined_callback: t.Callable[..., t.Any], *, route: str = ...) -> None: ... # fmt: on def add_api( - self, - input: IODescriptor[IOType], - output: IODescriptor[IOType], - user_defined_callback: t.Callable[..., t.Any], - *, - name: str | None = None, - doc: str | None = None, - route: str | None = None, + self, + input: IODescriptor[IOType], + output: IODescriptor[IOType], + user_defined_callback: t.Callable[..., t.Any], + *, + name: str | None = None, + doc: str | None = None, + route: str | None = None, ) -> None: _api = InferenceAPI[IOType]( name=first_not_none(name, default=user_defined_callback.__name__), From fda3a5f743b4b2aa9c297d81d083fac51df7069a Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Sat, 10 Feb 2024 12:37:49 +0100 Subject: [PATCH 04/12] Missing checks --- examples/sklearn/pipeline/train.py | 8 ++++++-- grpc-client/bentoml | 2 +- grpc-client/java/src/main/proto/v1 | 2 +- grpc-client/java/src/main/proto/v1alpha1 | 2 +- grpc-client/kotlin/src/main/proto/v1 | 2 +- grpc-client/kotlin/src/main/proto/v1alpha1 | 2 +- 6 files changed, 11 insertions(+), 7 deletions(-) diff --git a/examples/sklearn/pipeline/train.py b/examples/sklearn/pipeline/train.py index c648b2b6faa..1230bd5615f 100644 --- a/examples/sklearn/pipeline/train.py +++ b/examples/sklearn/pipeline/train.py @@ -10,9 +10,12 @@ from sklearn.pipeline import Pipeline import bentoml + if __name__ == "__main__": # Display progress logs on stdout - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + logging.basicConfig( + level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s" + ) # Load some categories from the training set categories = [ @@ -71,7 +74,8 @@ print("Best score: %0.3f" % grid_search.best_score_) best_parameters = grid_search.best_estimator_.get_params() best_parameters = { - param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) + param_name: best_parameters[param_name] + for param_name in sorted(parameters.keys()) } print(f"Best parameters set: {best_parameters}") diff --git a/grpc-client/bentoml b/grpc-client/bentoml index 542ee9833b2..f6754da92e0 120000 --- a/grpc-client/bentoml +++ b/grpc-client/bentoml @@ -1 +1 @@ -../src/bentoml \ No newline at end of file +../src/bentoml diff --git a/grpc-client/java/src/main/proto/v1 b/grpc-client/java/src/main/proto/v1 index cbeb74766ad..c252109319f 120000 --- a/grpc-client/java/src/main/proto/v1 +++ b/grpc-client/java/src/main/proto/v1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1 \ No newline at end of file +../../../../bentoml/grpc/v1 diff --git a/grpc-client/java/src/main/proto/v1alpha1 b/grpc-client/java/src/main/proto/v1alpha1 index cfa00eec479..f8068df30e8 120000 --- a/grpc-client/java/src/main/proto/v1alpha1 +++ b/grpc-client/java/src/main/proto/v1alpha1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1alpha1 \ No newline at end of file +../../../../bentoml/grpc/v1alpha1 diff --git a/grpc-client/kotlin/src/main/proto/v1 b/grpc-client/kotlin/src/main/proto/v1 index cbeb74766ad..c252109319f 120000 --- a/grpc-client/kotlin/src/main/proto/v1 +++ b/grpc-client/kotlin/src/main/proto/v1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1 \ No newline at end of file +../../../../bentoml/grpc/v1 diff --git a/grpc-client/kotlin/src/main/proto/v1alpha1 b/grpc-client/kotlin/src/main/proto/v1alpha1 index cfa00eec479..f8068df30e8 120000 --- a/grpc-client/kotlin/src/main/proto/v1alpha1 +++ b/grpc-client/kotlin/src/main/proto/v1alpha1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1alpha1 \ No newline at end of file +../../../../bentoml/grpc/v1alpha1 From b7169c707f12d6d7e9357955ad5ccf68dba25421 Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Tue, 30 Jan 2024 20:06:00 +0100 Subject: [PATCH 05/12] Added add_api functionalty with example | draft --- examples/dynamic_service/pipeline/README.md | 44 ++++++ .../dynamic_service/pipeline/bentofile.yaml | 5 + .../pipeline/requirements.txt | 0 examples/dynamic_service/pipeline/service.py | 67 ++++++++ examples/dynamic_service/pipeline/train.py | 97 ++++++++++++ examples/sklearn/pipeline/build_bento.py | 0 examples/sklearn/pipeline/train.py | 148 +++++++++--------- src/bentoml/_internal/service/service.py | 31 ++++ 8 files changed, 318 insertions(+), 74 deletions(-) create mode 100644 examples/dynamic_service/pipeline/README.md create mode 100644 examples/dynamic_service/pipeline/bentofile.yaml rename examples/{sklearn => dynamic_service}/pipeline/requirements.txt (100%) create mode 100644 examples/dynamic_service/pipeline/service.py create mode 100644 examples/dynamic_service/pipeline/train.py create mode 100644 examples/sklearn/pipeline/build_bento.py diff --git a/examples/dynamic_service/pipeline/README.md b/examples/dynamic_service/pipeline/README.md new file mode 100644 index 00000000000..a41e30f8709 --- /dev/null +++ b/examples/dynamic_service/pipeline/README.md @@ -0,0 +1,44 @@ +# BentoML Sklearn Example: document classification pipeline + +0. Install dependencies: + +```bash +pip install -r ./requirements.txt +``` + +1. Train a document classification pipeline model + +```bash +python ./train.py +``` + +2. Run the service: + +```bash +bentoml serve service.py:svc +``` + +3. Send test request + +Test the `/predict` endpoint: +```bash +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict +``` + +Test the `/predict_proba` endpoint: +```bash +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_proba +``` + + +4. Build Bento + +``` +bentoml build +``` + +5. Build docker image + +``` +bentoml containerize doc_classifier:latest +``` diff --git a/examples/dynamic_service/pipeline/bentofile.yaml b/examples/dynamic_service/pipeline/bentofile.yaml new file mode 100644 index 00000000000..44cad3591f8 --- /dev/null +++ b/examples/dynamic_service/pipeline/bentofile.yaml @@ -0,0 +1,5 @@ +service: "service.py:svc" +include: + - "service.py" +python: + requirements_txt: "./requirements.txt" diff --git a/examples/sklearn/pipeline/requirements.txt b/examples/dynamic_service/pipeline/requirements.txt similarity index 100% rename from examples/sklearn/pipeline/requirements.txt rename to examples/dynamic_service/pipeline/requirements.txt diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py new file mode 100644 index 00000000000..869d6e7f5fe --- /dev/null +++ b/examples/dynamic_service/pipeline/service.py @@ -0,0 +1,67 @@ +import bentoml +from bentoml.io import JSON +from bentoml.io import Text + +"""The following example is based on the example sklearn/pipeline. + +The idea of dynamically building the service endpoints: + +Imaging you have n models ready for production. When building your bento, you do not actually know, which models should +be served, so you create a endpoint for every model that is available for deployment. + +Scenario: You are training hundreds of models, while still are in the training pipeline, you already want to serve your +first models in production. + +When building bentos, you need a predefined service.py file - but with an unknown number of endpoints when building. +You want to reuse a single file everytime when creating a new bento, without changing the service definitions each time. +Every model should have (for example) a route with a running index. +""" + + +def wrap_service_methods(runner: Runner, targets: Any): + """Pass Runner and target names, as they are needed in both methods. + + Note: Only passed arguments are available in the methods below, scope is not overwritten. + """ + + async def predict(input_doc: str): + predictions = await runner.predict.async_run([input_doc]) + return {"result": targets[predictions[0]]} + + async def predict_proba(input_doc: str): + predictions = await runner.predict_proba.async_run([input_doc]) + return predictions[0] + + return predict, predict_proba + + +bento_model = bentoml.sklearn.get("twenty_news_group:latest") + +target_names = bento_model.custom_objects["target_names"] + +# Imaging we have different models, which need the same predict and predict_proba implementations +model_runner_list: [Runner] = [bento_model.to_runner(), bento_model.to_runner()] + +svc = bentoml.Service("doc_classifier", runners=[model_runner]) + +for idx, model_runner in enumerate(model_runner_list): + path_predict = f"predict_model_{idx}" + path_predict_proba = f"predict_proba_model_{idx}" + fn_pred, fn_pred_proba = setMethod(runner=model_runner, targets=target_names) + + svc.add_api( + input=Text(), + output=JSON(), + user_defined_callback=fn_pred, + name=path_predict, + doc=None, + route=path_predict, + ) + svc.add_api( + input=Text(), + output=JSON(), + user_defined_callback=fn_pred_proba, + name=path_predict_proba, + doc=None, + route=path_predict_proba, + ) diff --git a/examples/dynamic_service/pipeline/train.py b/examples/dynamic_service/pipeline/train.py new file mode 100644 index 00000000000..aa9ee1a5ee4 --- /dev/null +++ b/examples/dynamic_service/pipeline/train.py @@ -0,0 +1,97 @@ +import logging +from pprint import pprint +from time import time + +from sklearn.datasets import fetch_20newsgroups +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.feature_extraction.text import TfidfTransformer +from sklearn.linear_model import SGDClassifier +from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline + +import bentoml + +# Display progress logs on stdout +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + +# Load some categories from the training set +categories = [ + "alt.atheism", + "talk.religion.misc", +] + +# Uncomment the following to do the analysis on all the categories +# categories = None + +print("Loading 20 newsgroups dataset for categories:") +print(categories) + +data = fetch_20newsgroups(subset="train", categories=categories) +print("%d documents" % len(data.filenames)) +print("%d categories" % len(data.target_names)) +print() + +# Define a pipeline combining a text feature extractor with a simple classifier +pipeline = Pipeline( + [ + ("vect", CountVectorizer()), + ("tfidf", TfidfTransformer()), + ("clf", SGDClassifier(loss="log_loss")), + ] +) + +# Parameters to use for grid search. Uncommenting more parameters will give +# better exploring power but will increase processing time in a combinatorial +# way +parameters = { + "vect__max_df": (0.5, 0.75, 1.0), + # 'vect__max_features': (None, 5000, 10000, 50000), + "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams + # 'tfidf__use_idf': (True, False), + # 'tfidf__norm': ('l1', 'l2'), + "clf__max_iter": (20,), + "clf__alpha": (0.00001, 0.000001), + "clf__penalty": ("l2", "elasticnet"), + # 'clf__max_iter': (10, 50, 80), +} + +# Find the best parameters for both the feature extraction and the +# classifier +grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) + +print("Performing grid search...") +print("pipeline:", [name for name, _ in pipeline.steps]) +print("parameters:") +pprint(parameters) +t0 = time() +grid_search.fit(data.data, data.target) +print("done in %0.3fs" % (time() - t0)) +print() + +print("Best score: %0.3f" % grid_search.best_score_) +best_parameters = grid_search.best_estimator_.get_params() +best_parameters = { + param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) +} +print(f"Best parameters set: {best_parameters}") + +bento_model = bentoml.sklearn.save_model( + "twenty_news_group", + grid_search.best_estimator_, + signatures={ + "predict": {"batchable": True, "batch_dim": 0}, + "predict_proba": {"batchable": True, "batch_dim": 0}, + }, + custom_objects={ + "target_names": data.target_names, + }, + metadata=best_parameters, +) +print(f"Model saved: {bento_model}") + +# Test running inference with BentoML runner +test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner() +test_runner.init_local() +assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( + ["hello"] +) diff --git a/examples/sklearn/pipeline/build_bento.py b/examples/sklearn/pipeline/build_bento.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/examples/sklearn/pipeline/train.py b/examples/sklearn/pipeline/train.py index aa9ee1a5ee4..c648b2b6faa 100644 --- a/examples/sklearn/pipeline/train.py +++ b/examples/sklearn/pipeline/train.py @@ -10,88 +10,88 @@ from sklearn.pipeline import Pipeline import bentoml +if __name__ == "__main__": + # Display progress logs on stdout + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") -# Display progress logs on stdout -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") - -# Load some categories from the training set -categories = [ - "alt.atheism", - "talk.religion.misc", -] + # Load some categories from the training set + categories = [ + "alt.atheism", + "talk.religion.misc", + ] -# Uncomment the following to do the analysis on all the categories -# categories = None + # Uncomment the following to do the analysis on all the categories + # categories = None -print("Loading 20 newsgroups dataset for categories:") -print(categories) + print("Loading 20 newsgroups dataset for categories:") + print(categories) -data = fetch_20newsgroups(subset="train", categories=categories) -print("%d documents" % len(data.filenames)) -print("%d categories" % len(data.target_names)) -print() + data = fetch_20newsgroups(subset="train", categories=categories) + print("%d documents" % len(data.filenames)) + print("%d categories" % len(data.target_names)) + print() -# Define a pipeline combining a text feature extractor with a simple classifier -pipeline = Pipeline( - [ - ("vect", CountVectorizer()), - ("tfidf", TfidfTransformer()), - ("clf", SGDClassifier(loss="log_loss")), - ] -) + # Define a pipeline combining a text feature extractor with a simple classifier + pipeline = Pipeline( + [ + ("vect", CountVectorizer()), + ("tfidf", TfidfTransformer()), + ("clf", SGDClassifier(loss="log_loss")), + ] + ) -# Parameters to use for grid search. Uncommenting more parameters will give -# better exploring power but will increase processing time in a combinatorial -# way -parameters = { - "vect__max_df": (0.5, 0.75, 1.0), - # 'vect__max_features': (None, 5000, 10000, 50000), - "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams - # 'tfidf__use_idf': (True, False), - # 'tfidf__norm': ('l1', 'l2'), - "clf__max_iter": (20,), - "clf__alpha": (0.00001, 0.000001), - "clf__penalty": ("l2", "elasticnet"), - # 'clf__max_iter': (10, 50, 80), -} + # Parameters to use for grid search. Uncommenting more parameters will give + # better exploring power but will increase processing time in a combinatorial + # way + parameters = { + "vect__max_df": (0.5, 0.75, 1.0), + # 'vect__max_features': (None, 5000, 10000, 50000), + "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams + # 'tfidf__use_idf': (True, False), + # 'tfidf__norm': ('l1', 'l2'), + "clf__max_iter": (20,), + "clf__alpha": (0.00001, 0.000001), + "clf__penalty": ("l2", "elasticnet"), + # 'clf__max_iter': (10, 50, 80), + } -# Find the best parameters for both the feature extraction and the -# classifier -grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) + # Find the best parameters for both the feature extraction and the + # classifier + grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) -print("Performing grid search...") -print("pipeline:", [name for name, _ in pipeline.steps]) -print("parameters:") -pprint(parameters) -t0 = time() -grid_search.fit(data.data, data.target) -print("done in %0.3fs" % (time() - t0)) -print() + print("Performing grid search...") + print("pipeline:", [name for name, _ in pipeline.steps]) + print("parameters:") + pprint(parameters) + t0 = time() + grid_search.fit(data.data, data.target) + print("done in %0.3fs" % (time() - t0)) + print() -print("Best score: %0.3f" % grid_search.best_score_) -best_parameters = grid_search.best_estimator_.get_params() -best_parameters = { - param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) -} -print(f"Best parameters set: {best_parameters}") + print("Best score: %0.3f" % grid_search.best_score_) + best_parameters = grid_search.best_estimator_.get_params() + best_parameters = { + param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) + } + print(f"Best parameters set: {best_parameters}") -bento_model = bentoml.sklearn.save_model( - "twenty_news_group", - grid_search.best_estimator_, - signatures={ - "predict": {"batchable": True, "batch_dim": 0}, - "predict_proba": {"batchable": True, "batch_dim": 0}, - }, - custom_objects={ - "target_names": data.target_names, - }, - metadata=best_parameters, -) -print(f"Model saved: {bento_model}") + bento_model = bentoml.sklearn.save_model( + "twenty_news_group", + grid_search.best_estimator_, + signatures={ + "predict": {"batchable": True, "batch_dim": 0}, + "predict_proba": {"batchable": True, "batch_dim": 0}, + }, + custom_objects={ + "target_names": data.target_names, + }, + metadata=best_parameters, + ) + print(f"Model saved: {bento_model}") -# Test running inference with BentoML runner -test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner() -test_runner.init_local() -assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( - ["hello"] -) + # Test running inference with BentoML runner + test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner() + test_runner.init_local() + assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( + ["hello"] + ) diff --git a/src/bentoml/_internal/service/service.py b/src/bentoml/_internal/service/service.py index c41dfed797b..8a929c28628 100644 --- a/src/bentoml/_internal/service/service.py +++ b/src/bentoml/_internal/service/service.py @@ -285,6 +285,37 @@ def is_service_importable(self) -> bool: return False return True + # fmt: off + # case 1: function is not defined, but input and output are + @t.overload + def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], user_defined_callback: t.Callable[..., t.Any],) -> None: ... + # case 2: the decorator itself with custom routes + @t.overload + def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], user_defined_callback: t.Callable[..., t.Any], *, route: str = ...) -> None: ... + # fmt: on + def add_api( + self, + input: IODescriptor[IOType], + output: IODescriptor[IOType], + user_defined_callback: t.Callable[..., t.Any], + *, + name: str | None = None, + doc: str | None = None, + route: str | None = None, + ) -> None: + _api = InferenceAPI[IOType]( + name=first_not_none(name, default=fn.__name__), + user_defined_callback=user_defined_callback, + input_descriptor=input, + output_descriptor=output, + doc=doc, + route=route, + ) + if _api.name in self.apis: + raise BentoMLException( + f"API {_api.name} is already defined in Service {self.name}" + ) + self.apis[_api.name] = _api # fmt: off # case 1: function is not defined, but input and output are From cc86bc53362f49f9cf0736ff11bb910b10e6b4c9 Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Sat, 10 Feb 2024 11:59:32 +0100 Subject: [PATCH 06/12] working but unclean version of dynamic services --- examples/dynamic_service/pipeline/README.md | 4 +- .../dynamic_service/pipeline/bentofile.yaml | 1 + examples/dynamic_service/pipeline/service.py | 45 ++++++++++++------- examples/dynamic_service/pipeline/train.py | 21 +++++++++ src/bentoml/_internal/service/service.py | 2 +- 5 files changed, 53 insertions(+), 20 deletions(-) diff --git a/examples/dynamic_service/pipeline/README.md b/examples/dynamic_service/pipeline/README.md index a41e30f8709..2d7f3f27054 100644 --- a/examples/dynamic_service/pipeline/README.md +++ b/examples/dynamic_service/pipeline/README.md @@ -22,12 +22,12 @@ bentoml serve service.py:svc Test the `/predict` endpoint: ```bash -curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_model_0 ``` Test the `/predict_proba` endpoint: ```bash -curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_proba +curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_proba_model_0 ``` diff --git a/examples/dynamic_service/pipeline/bentofile.yaml b/examples/dynamic_service/pipeline/bentofile.yaml index 44cad3591f8..568a5250135 100644 --- a/examples/dynamic_service/pipeline/bentofile.yaml +++ b/examples/dynamic_service/pipeline/bentofile.yaml @@ -1,5 +1,6 @@ service: "service.py:svc" include: - "service.py" + - "requirements.txt" python: requirements_txt: "./requirements.txt" diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py index 869d6e7f5fe..dcee9cace50 100644 --- a/examples/dynamic_service/pipeline/service.py +++ b/examples/dynamic_service/pipeline/service.py @@ -1,21 +1,25 @@ +from typing import Any + import bentoml +from bentoml import Runner from bentoml.io import JSON from bentoml.io import Text -"""The following example is based on the example sklearn/pipeline. +"""The following example is based on the sklearn/pipeline example. -The idea of dynamically building the service endpoints: +The concept revolves around dynamically constructing service endpoints: -Imaging you have n models ready for production. When building your bento, you do not actually know, which models should -be served, so you create a endpoint for every model that is available for deployment. +Imagine you have n models ready for production. +When creating your Bento, you may not know in advance which models will be served. +Therefore, you create an endpoint for every available model that can be deployed. -Scenario: You are training hundreds of models, while still are in the training pipeline, you already want to serve your -first models in production. +Scenario: You trained hundreds of models. +While they are still in the training pipeline, you want to begin serving your first models already in production. -When building bentos, you need a predefined service.py file - but with an unknown number of endpoints when building. -You want to reuse a single file everytime when creating a new bento, without changing the service definitions each time. -Every model should have (for example) a route with a running index. -""" +When constructing Bentos, you require a predefined service.py file. However, the number of endpoints is unknown +during construction of this file. You aim to reuse the same file each time you create a new Bento, without the need +to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index, +for instance. """ def wrap_service_methods(runner: Runner, targets: Any): @@ -35,19 +39,26 @@ async def predict_proba(input_doc: str): return predict, predict_proba -bento_model = bentoml.sklearn.get("twenty_news_group:latest") +available_model_set = set() +# Add all unique variations of twenty_news_group to the service +for available_model in bentoml.models.list(): + if "twenty_news_group" in available_model.tag.name: + available_model_set.add(available_model.tag.name) -target_names = bento_model.custom_objects["target_names"] +model_runner_list: [Runner] = [] +target_names: [] = [] -# Imaging we have different models, which need the same predict and predict_proba implementations -model_runner_list: [Runner] = [bento_model.to_runner(), bento_model.to_runner()] +for available_model in available_model_set: + bento_model = bentoml.sklearn.get(f"{available_model}:latest") + target_names.append(bento_model.custom_objects["target_names"]) + model_runner_list.append(bento_model.to_runner()) -svc = bentoml.Service("doc_classifier", runners=[model_runner]) +svc = bentoml.Service("doc_classifier", runners=model_runner_list) -for idx, model_runner in enumerate(model_runner_list): +for idx, (model_runner, target_name) in enumerate(zip(model_runner_list, target_names)): path_predict = f"predict_model_{idx}" path_predict_proba = f"predict_proba_model_{idx}" - fn_pred, fn_pred_proba = setMethod(runner=model_runner, targets=target_names) + fn_pred, fn_pred_proba = wrap_service_methods(runner=model_runner, targets=target_name) svc.add_api( input=Text(), diff --git a/examples/dynamic_service/pipeline/train.py b/examples/dynamic_service/pipeline/train.py index aa9ee1a5ee4..83797d04305 100644 --- a/examples/dynamic_service/pipeline/train.py +++ b/examples/dynamic_service/pipeline/train.py @@ -95,3 +95,24 @@ assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( ["hello"] ) + +bento_model = bentoml.sklearn.save_model( + "twenty_news_group_second", + grid_search.best_estimator_, + signatures={ + "predict": {"batchable": True, "batch_dim": 0}, + "predict_proba": {"batchable": True, "batch_dim": 0}, + }, + custom_objects={ + "target_names": data.target_names, + }, + metadata=best_parameters, +) +print(f"Model saved: {bento_model}") + +# Test running inference with BentoML runner +test_runner = bentoml.sklearn.get("twenty_news_group_second:latest").to_runner() +test_runner.init_local() +assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( + ["hello"] +) \ No newline at end of file diff --git a/src/bentoml/_internal/service/service.py b/src/bentoml/_internal/service/service.py index 8a929c28628..ae64e407c57 100644 --- a/src/bentoml/_internal/service/service.py +++ b/src/bentoml/_internal/service/service.py @@ -304,7 +304,7 @@ def add_api( route: str | None = None, ) -> None: _api = InferenceAPI[IOType]( - name=first_not_none(name, default=fn.__name__), + name=first_not_none(name, default=user_defined_callback.__name__), user_defined_callback=user_defined_callback, input_descriptor=input, output_descriptor=output, From 6fc21a1746af6f63910d194f4d9d82f469cf5ef7 Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Sat, 10 Feb 2024 12:34:14 +0100 Subject: [PATCH 07/12] run pre-commit hook on changed files --- examples/dynamic_service/pipeline/service.py | 10 ++++++---- examples/dynamic_service/pipeline/train.py | 2 +- src/bentoml/_internal/service/service.py | 17 +++++++++-------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py index dcee9cace50..b7980c210ed 100644 --- a/examples/dynamic_service/pipeline/service.py +++ b/examples/dynamic_service/pipeline/service.py @@ -16,9 +16,9 @@ Scenario: You trained hundreds of models. While they are still in the training pipeline, you want to begin serving your first models already in production. -When constructing Bentos, you require a predefined service.py file. However, the number of endpoints is unknown -during construction of this file. You aim to reuse the same file each time you create a new Bento, without the need -to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index, +When constructing Bentos, you require a predefined service.py file. However, the number of endpoints is unknown +during construction of this file. You aim to reuse the same file each time you create a new Bento, without the need +to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index, for instance. """ @@ -58,7 +58,9 @@ async def predict_proba(input_doc: str): for idx, (model_runner, target_name) in enumerate(zip(model_runner_list, target_names)): path_predict = f"predict_model_{idx}" path_predict_proba = f"predict_proba_model_{idx}" - fn_pred, fn_pred_proba = wrap_service_methods(runner=model_runner, targets=target_name) + fn_pred, fn_pred_proba = wrap_service_methods( + runner=model_runner, targets=target_name + ) svc.add_api( input=Text(), diff --git a/examples/dynamic_service/pipeline/train.py b/examples/dynamic_service/pipeline/train.py index 83797d04305..71e05e48973 100644 --- a/examples/dynamic_service/pipeline/train.py +++ b/examples/dynamic_service/pipeline/train.py @@ -115,4 +115,4 @@ test_runner.init_local() assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict( ["hello"] -) \ No newline at end of file +) diff --git a/src/bentoml/_internal/service/service.py b/src/bentoml/_internal/service/service.py index ae64e407c57..c7cb0f7e2b5 100644 --- a/src/bentoml/_internal/service/service.py +++ b/src/bentoml/_internal/service/service.py @@ -285,6 +285,7 @@ def is_service_importable(self) -> bool: return False return True + # fmt: off # case 1: function is not defined, but input and output are @t.overload @@ -294,14 +295,14 @@ def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], use def add_api(self, input: IODescriptor[IOType], output: IODescriptor[IOType], user_defined_callback: t.Callable[..., t.Any], *, route: str = ...) -> None: ... # fmt: on def add_api( - self, - input: IODescriptor[IOType], - output: IODescriptor[IOType], - user_defined_callback: t.Callable[..., t.Any], - *, - name: str | None = None, - doc: str | None = None, - route: str | None = None, + self, + input: IODescriptor[IOType], + output: IODescriptor[IOType], + user_defined_callback: t.Callable[..., t.Any], + *, + name: str | None = None, + doc: str | None = None, + route: str | None = None, ) -> None: _api = InferenceAPI[IOType]( name=first_not_none(name, default=user_defined_callback.__name__), From cc20f20fbaa6658556e8a117f0b6d9394af399b5 Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Sat, 10 Feb 2024 12:37:49 +0100 Subject: [PATCH 08/12] Missing checks --- examples/sklearn/pipeline/train.py | 8 ++++++-- grpc-client/bentoml | 2 +- grpc-client/java/src/main/proto/v1 | 2 +- grpc-client/java/src/main/proto/v1alpha1 | 2 +- grpc-client/kotlin/src/main/proto/v1 | 2 +- grpc-client/kotlin/src/main/proto/v1alpha1 | 2 +- 6 files changed, 11 insertions(+), 7 deletions(-) diff --git a/examples/sklearn/pipeline/train.py b/examples/sklearn/pipeline/train.py index c648b2b6faa..1230bd5615f 100644 --- a/examples/sklearn/pipeline/train.py +++ b/examples/sklearn/pipeline/train.py @@ -10,9 +10,12 @@ from sklearn.pipeline import Pipeline import bentoml + if __name__ == "__main__": # Display progress logs on stdout - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + logging.basicConfig( + level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s" + ) # Load some categories from the training set categories = [ @@ -71,7 +74,8 @@ print("Best score: %0.3f" % grid_search.best_score_) best_parameters = grid_search.best_estimator_.get_params() best_parameters = { - param_name: best_parameters[param_name] for param_name in sorted(parameters.keys()) + param_name: best_parameters[param_name] + for param_name in sorted(parameters.keys()) } print(f"Best parameters set: {best_parameters}") diff --git a/grpc-client/bentoml b/grpc-client/bentoml index 542ee9833b2..f6754da92e0 120000 --- a/grpc-client/bentoml +++ b/grpc-client/bentoml @@ -1 +1 @@ -../src/bentoml \ No newline at end of file +../src/bentoml diff --git a/grpc-client/java/src/main/proto/v1 b/grpc-client/java/src/main/proto/v1 index cbeb74766ad..c252109319f 120000 --- a/grpc-client/java/src/main/proto/v1 +++ b/grpc-client/java/src/main/proto/v1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1 \ No newline at end of file +../../../../bentoml/grpc/v1 diff --git a/grpc-client/java/src/main/proto/v1alpha1 b/grpc-client/java/src/main/proto/v1alpha1 index cfa00eec479..f8068df30e8 120000 --- a/grpc-client/java/src/main/proto/v1alpha1 +++ b/grpc-client/java/src/main/proto/v1alpha1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1alpha1 \ No newline at end of file +../../../../bentoml/grpc/v1alpha1 diff --git a/grpc-client/kotlin/src/main/proto/v1 b/grpc-client/kotlin/src/main/proto/v1 index cbeb74766ad..c252109319f 120000 --- a/grpc-client/kotlin/src/main/proto/v1 +++ b/grpc-client/kotlin/src/main/proto/v1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1 \ No newline at end of file +../../../../bentoml/grpc/v1 diff --git a/grpc-client/kotlin/src/main/proto/v1alpha1 b/grpc-client/kotlin/src/main/proto/v1alpha1 index cfa00eec479..f8068df30e8 120000 --- a/grpc-client/kotlin/src/main/proto/v1alpha1 +++ b/grpc-client/kotlin/src/main/proto/v1alpha1 @@ -1 +1 @@ -../../../../bentoml/grpc/v1alpha1 \ No newline at end of file +../../../../bentoml/grpc/v1alpha1 From 0ed2ab5c7e1e0ac859c2394c725af26cf25d84af Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Sun, 17 Mar 2024 10:41:05 +0100 Subject: [PATCH 09/12] added dynamic service --- .../dynamic_service/pipeline/bentofile.yaml | 2 +- examples/dynamic_service/pipeline/service.py | 96 +++++++++---------- 2 files changed, 45 insertions(+), 53 deletions(-) diff --git a/examples/dynamic_service/pipeline/bentofile.yaml b/examples/dynamic_service/pipeline/bentofile.yaml index 568a5250135..fa5f43b01e1 100644 --- a/examples/dynamic_service/pipeline/bentofile.yaml +++ b/examples/dynamic_service/pipeline/bentofile.yaml @@ -1,4 +1,4 @@ -service: "service.py:svc" +service: "service:DynamicService" include: - "service.py" - "requirements.txt" diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py index b7980c210ed..e04a0b575a5 100644 --- a/examples/dynamic_service/pipeline/service.py +++ b/examples/dynamic_service/pipeline/service.py @@ -1,9 +1,6 @@ from typing import Any - import bentoml -from bentoml import Runner -from bentoml.io import JSON -from bentoml.io import Text + """The following example is based on the sklearn/pipeline example. @@ -21,60 +18,55 @@ to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index, for instance. """ - -def wrap_service_methods(runner: Runner, targets: Any): - """Pass Runner and target names, as they are needed in both methods. - - Note: Only passed arguments are available in the methods below, scope is not overwritten. - """ - +def wrap_service_methods(model: bentoml.Model, + targets: Any, + predict_route: str, + predict_name: str, + predict_proba_route: str, + predict_proba_name: str, + ): + """Wrap models in service methods and annotate as api.""" + @bentoml.api(route=predict_route, name=predict_name) async def predict(input_doc: str): - predictions = await runner.predict.async_run([input_doc]) + predictions = await model.predict.async_run([input_doc]) return {"result": targets[predictions[0]]} + @bentoml.api(route=predict_proba_route, name=predict_proba_name) async def predict_proba(input_doc: str): - predictions = await runner.predict_proba.async_run([input_doc]) + predictions = await model.predict_proba.async_run([input_doc]) return predictions[0] + return predict, predict_proba -available_model_set = set() -# Add all unique variations of twenty_news_group to the service -for available_model in bentoml.models.list(): - if "twenty_news_group" in available_model.tag.name: - available_model_set.add(available_model.tag.name) - -model_runner_list: [Runner] = [] -target_names: [] = [] - -for available_model in available_model_set: - bento_model = bentoml.sklearn.get(f"{available_model}:latest") - target_names.append(bento_model.custom_objects["target_names"]) - model_runner_list.append(bento_model.to_runner()) - -svc = bentoml.Service("doc_classifier", runners=model_runner_list) - -for idx, (model_runner, target_name) in enumerate(zip(model_runner_list, target_names)): - path_predict = f"predict_model_{idx}" - path_predict_proba = f"predict_proba_model_{idx}" - fn_pred, fn_pred_proba = wrap_service_methods( - runner=model_runner, targets=target_name - ) - - svc.add_api( - input=Text(), - output=JSON(), - user_defined_callback=fn_pred, - name=path_predict, - doc=None, - route=path_predict, - ) - svc.add_api( - input=Text(), - output=JSON(), - user_defined_callback=fn_pred_proba, - name=path_predict_proba, - doc=None, - route=path_predict_proba, - ) +@bentoml.service( +workers=1, resources={"cpu": "1"} +) +class DynamicService: + """Dynamic Service class. + + Note: Variables must not be added in the init function, as the service apis would not be visible in the openapi doc. + """ + + # Manually add api methods to local scope as via locals() method (current scope). + for idx, available_model in enumerate(bentoml.models.list()): + if "twenty_news_group" in available_model.tag.name: + print(f"Creating Endpoint {idx}") + bento_model = bentoml.sklearn.get(f"{available_model.tag.name}:latest") + target_names = bento_model.custom_objects["target_names"] + path_predict = f"predict_model_{idx}" + path_predict_proba = f"predict_proba_model_{idx}" + + locals()[path_predict], locals()[path_predict_proba] = wrap_service_methods(bento_model, + target_names, + predict_route=path_predict, + predict_name=path_predict, + predict_proba_route=path_predict_proba, + predict_proba_name=path_predict_proba, + ) + + def __init__(self): + """Nothing to do here.""" + ... + From 05fa5a1e40976335bcc44b4cf68e0cae15f87e9b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 Mar 2024 09:44:21 +0000 Subject: [PATCH 10/12] ci: auto fixes from pre-commit.ci For more information, see https://pre-commit.ci --- examples/dynamic_service/pipeline/service.py | 40 ++++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py index e04a0b575a5..20a86db81bb 100644 --- a/examples/dynamic_service/pipeline/service.py +++ b/examples/dynamic_service/pipeline/service.py @@ -1,6 +1,6 @@ from typing import Any -import bentoml +import bentoml """The following example is based on the sklearn/pipeline example. @@ -18,14 +18,17 @@ to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index, for instance. """ -def wrap_service_methods(model: bentoml.Model, - targets: Any, - predict_route: str, - predict_name: str, - predict_proba_route: str, - predict_proba_name: str, - ): + +def wrap_service_methods( + model: bentoml.Model, + targets: Any, + predict_route: str, + predict_name: str, + predict_proba_route: str, + predict_proba_name: str, +): """Wrap models in service methods and annotate as api.""" + @bentoml.api(route=predict_route, name=predict_name) async def predict(input_doc: str): predictions = await model.predict.async_run([input_doc]) @@ -36,13 +39,10 @@ async def predict_proba(input_doc: str): predictions = await model.predict_proba.async_run([input_doc]) return predictions[0] - return predict, predict_proba -@bentoml.service( -workers=1, resources={"cpu": "1"} -) +@bentoml.service(workers=1, resources={"cpu": "1"}) class DynamicService: """Dynamic Service class. @@ -58,15 +58,15 @@ class DynamicService: path_predict = f"predict_model_{idx}" path_predict_proba = f"predict_proba_model_{idx}" - locals()[path_predict], locals()[path_predict_proba] = wrap_service_methods(bento_model, - target_names, - predict_route=path_predict, - predict_name=path_predict, - predict_proba_route=path_predict_proba, - predict_proba_name=path_predict_proba, - ) + locals()[path_predict], locals()[path_predict_proba] = wrap_service_methods( + bento_model, + target_names, + predict_route=path_predict, + predict_name=path_predict, + predict_proba_route=path_predict_proba, + predict_proba_name=path_predict_proba, + ) def __init__(self): """Nothing to do here.""" ... - From 3bc355dc6f1177b2cd88d1105a654c391cefad15 Mon Sep 17 00:00:00 2001 From: Christopher Holzweber Date: Mon, 18 Mar 2024 19:22:39 +0100 Subject: [PATCH 11/12] added services with type() --- examples/dynamic_service/pipeline/service.py | 54 +++++++++----------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py index e04a0b575a5..e0ed50003f1 100644 --- a/examples/dynamic_service/pipeline/service.py +++ b/examples/dynamic_service/pipeline/service.py @@ -39,34 +39,30 @@ async def predict_proba(input_doc: str): return predict, predict_proba - -@bentoml.service( -workers=1, resources={"cpu": "1"} +class_attrs = {} # Empty dict for storing methods +# Manually add api methods to local scope as via locals() method (current scope). +distinct_models = set() +for model in bentoml.models.list(): + distinct_models.add(model.tag.name) +for idx, available_model in enumerate(distinct_models): + if "twenty_news_group" in available_model: + bento_model = bentoml.sklearn.get(f"{available_model}:latest") + target_names = bento_model.custom_objects["target_names"] + path_predict = f"predict_model_{idx}" + path_predict_proba = f"predict_proba_model_{idx}" + + class_attrs[path_predict],class_attrs[path_predict_proba] = wrap_service_methods(bento_model, + target_names, + predict_route="/"+path_predict, + predict_name="/"+path_predict, + predict_proba_route=path_predict_proba, + predict_proba_name=path_predict_proba, + ) + +# Create class with type and add generated methods +DynamicServiceClass = type( + "DynamicService", (object,), class_attrs, ) -class DynamicService: - """Dynamic Service class. - - Note: Variables must not be added in the init function, as the service apis would not be visible in the openapi doc. - """ - - # Manually add api methods to local scope as via locals() method (current scope). - for idx, available_model in enumerate(bentoml.models.list()): - if "twenty_news_group" in available_model.tag.name: - print(f"Creating Endpoint {idx}") - bento_model = bentoml.sklearn.get(f"{available_model.tag.name}:latest") - target_names = bento_model.custom_objects["target_names"] - path_predict = f"predict_model_{idx}" - path_predict_proba = f"predict_proba_model_{idx}" - - locals()[path_predict], locals()[path_predict_proba] = wrap_service_methods(bento_model, - target_names, - predict_route=path_predict, - predict_name=path_predict, - predict_proba_route=path_predict_proba, - predict_proba_name=path_predict_proba, - ) - - def __init__(self): - """Nothing to do here.""" - ... +# Create Endpoint Service defined in bentofile.yaml +DynamicService = bentoml.service(workers=1, resources={"cpu": "1"})(DynamicServiceClass) \ No newline at end of file From 9a42ea0504a782950ed4d4d1c77a9317323076e7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 18:27:56 +0000 Subject: [PATCH 12/12] ci: auto fixes from pre-commit.ci For more information, see https://pre-commit.ci --- examples/dynamic_service/pipeline/service.py | 31 ++++++++++++-------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/examples/dynamic_service/pipeline/service.py b/examples/dynamic_service/pipeline/service.py index 6296ce99231..13685070c5d 100644 --- a/examples/dynamic_service/pipeline/service.py +++ b/examples/dynamic_service/pipeline/service.py @@ -1,6 +1,6 @@ from typing import Any -import bentoml +import bentoml """The following example is based on the sklearn/pipeline example. @@ -28,6 +28,7 @@ def wrap_service_methods( predict_proba_name: str, ): """Wrap models in service methods and annotate as api.""" + @bentoml.api(route=predict_route, name=predict_name) async def predict(input_doc: str): predictions = await model.predict.async_run([input_doc]) @@ -38,10 +39,10 @@ async def predict_proba(input_doc: str): predictions = await model.predict_proba.async_run([input_doc]) return predictions[0] - return predict, predict_proba -class_attrs = {} # Empty dict for storing methods + +class_attrs = {} # Empty dict for storing methods # Manually add api methods to local scope as via locals() method (current scope). distinct_models = set() for model in bentoml.models.list(): @@ -53,18 +54,24 @@ async def predict_proba(input_doc: str): path_predict = f"predict_model_{idx}" path_predict_proba = f"predict_proba_model_{idx}" - class_attrs[path_predict],class_attrs[path_predict_proba] = wrap_service_methods(bento_model, - target_names, - predict_route="/"+path_predict, - predict_name="/"+path_predict, - predict_proba_route=path_predict_proba, - predict_proba_name=path_predict_proba, - ) + ( + class_attrs[path_predict], + class_attrs[path_predict_proba], + ) = wrap_service_methods( + bento_model, + target_names, + predict_route="/" + path_predict, + predict_name="/" + path_predict, + predict_proba_route=path_predict_proba, + predict_proba_name=path_predict_proba, + ) # Create class with type and add generated methods DynamicServiceClass = type( - "DynamicService", (object,), class_attrs, + "DynamicService", + (object,), + class_attrs, ) # Create Endpoint Service defined in bentofile.yaml -DynamicService = bentoml.service(workers=1, resources={"cpu": "1"})(DynamicServiceClass) \ No newline at end of file +DynamicService = bentoml.service(workers=1, resources={"cpu": "1"})(DynamicServiceClass)