bentoml · holzweber · Jan 30, 2024 · Feb 10, 2024 · Feb 10, 2024 · Feb 10, 2024
@@ -0,0 +1,44 @@
+# BentoML Sklearn Example: document classification pipeline
+
+0. Install dependencies:
+
+```bash
+pip install -r ./requirements.txt
+```
+
+1. Train a document classification pipeline model
+
+```bash
+python ./train.py
+```
+
+2. Run the service:
+
+```bash
+bentoml serve service.py:svc
+```
+
+3. Send test request
+
+Test the `/predict` endpoint:
+```bash
+curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_model_0
+```
+
+Test the `/predict_proba` endpoint:
+```bash
+curl -X POST -H "content-type: application/text" --data "hello world" http://127.0.0.1:3000/predict_proba_model_0
+```
+
+
+4. Build Bento
+
+```
+bentoml build
+```
+
+5. Build docker image
+
+```
+bentoml containerize doc_classifier:latest
+```
@@ -0,0 +1,6 @@
+service: "service:DynamicService"
+include:
+  - "service.py"
+  - "requirements.txt"
+python:
+  requirements_txt: "./requirements.txt"
@@ -0,0 +1,77 @@
+from typing import Any
+
+import bentoml
+
+"""The following example is based on the sklearn/pipeline example.
+
+The concept revolves around dynamically constructing service endpoints:
+
+Imagine you have n models ready for production.
+When creating your Bento, you may not know in advance which models will be served.
+Therefore, you create an endpoint for every available model that can be deployed.
+
+Scenario: You trained hundreds of models.
+While they are still in the training pipeline, you want to begin serving your first models already in production.
+
+When constructing Bentos, you require a predefined service.py file. However, the number of endpoints is unknown
+during construction of this file. You aim to reuse the same file each time you create a new Bento, without the need
+to alter the service definitions repeatedly. Each model should ideally have a route with a unique running index,
+for instance. """
+
+
+def wrap_service_methods(
+    model: bentoml.Model,
+    targets: Any,
+    predict_route: str,
+    predict_name: str,
+    predict_proba_route: str,
+    predict_proba_name: str,
+):
+    """Wrap models in service methods and annotate as api."""
+
+    @bentoml.api(route=predict_route, name=predict_name)
+    async def predict(input_doc: str):
+        predictions = await model.predict.async_run([input_doc])
+        return {"result": targets[predictions[0]]}
+
+    @bentoml.api(route=predict_proba_route, name=predict_proba_name)
+    async def predict_proba(input_doc: str):
+        predictions = await model.predict_proba.async_run([input_doc])
+        return predictions[0]
+
+    return predict, predict_proba
+
+
+class_attrs = {}  # Empty dict for storing methods
+# Manually add api methods to local scope as via locals() method (current scope).
+distinct_models = set()
+for model in bentoml.models.list():
+    distinct_models.add(model.tag.name)
+for idx, available_model in enumerate(distinct_models):
+    if "twenty_news_group" in available_model:
+        bento_model = bentoml.sklearn.get(f"{available_model}:latest")
+        target_names = bento_model.custom_objects["target_names"]
+        path_predict = f"predict_model_{idx}"
+        path_predict_proba = f"predict_proba_model_{idx}"
+
+        (
+            class_attrs[path_predict],
+            class_attrs[path_predict_proba],
+        ) = wrap_service_methods(
+            bento_model,
+            target_names,
+            predict_route="/" + path_predict,
+            predict_name="/" + path_predict,
+            predict_proba_route=path_predict_proba,
+            predict_proba_name=path_predict_proba,
+        )
+
+#  Create class with type and add generated methods
+DynamicServiceClass = type(
+    "DynamicService",
+    (object,),
+    class_attrs,
+)
+
+#  Create Endpoint Service defined in bentofile.yaml
+DynamicService = bentoml.service(workers=1, resources={"cpu": "1"})(DynamicServiceClass)
@@ -0,0 +1,118 @@
+import logging
+from pprint import pprint
+from time import time
+
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.linear_model import SGDClassifier
+from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import Pipeline
+
+import bentoml
+
+# Display progress logs on stdout
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+
+# Load some categories from the training set
+categories = [
+    "alt.atheism",
+    "talk.religion.misc",
+]
+
+# Uncomment the following to do the analysis on all the categories
+# categories = None
+
+print("Loading 20 newsgroups dataset for categories:")
+print(categories)
+
+data = fetch_20newsgroups(subset="train", categories=categories)
+print("%d documents" % len(data.filenames))
+print("%d categories" % len(data.target_names))
+print()
+
+# Define a pipeline combining a text feature extractor with a simple classifier
+pipeline = Pipeline(
+    [
+        ("vect", CountVectorizer()),
+        ("tfidf", TfidfTransformer()),
+        ("clf", SGDClassifier(loss="log_loss")),
+    ]
+)
+
+# Parameters to use for grid search. Uncommenting more parameters will give
+# better exploring power but will increase processing time in a combinatorial
+# way
+parameters = {
+    "vect__max_df": (0.5, 0.75, 1.0),
+    # 'vect__max_features': (None, 5000, 10000, 50000),
+    "vect__ngram_range": ((1, 1), (1, 2)),  # unigrams or bigrams
+    # 'tfidf__use_idf': (True, False),
+    # 'tfidf__norm': ('l1', 'l2'),
+    "clf__max_iter": (20,),
+    "clf__alpha": (0.00001, 0.000001),
+    "clf__penalty": ("l2", "elasticnet"),
+    # 'clf__max_iter': (10, 50, 80),
+}
+
+# Find the best parameters for both the feature extraction and the
+# classifier
+grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
+
+print("Performing grid search...")
+print("pipeline:", [name for name, _ in pipeline.steps])
+print("parameters:")
+pprint(parameters)
+t0 = time()
+grid_search.fit(data.data, data.target)
+print("done in %0.3fs" % (time() - t0))
+print()
+
+print("Best score: %0.3f" % grid_search.best_score_)
+best_parameters = grid_search.best_estimator_.get_params()
+best_parameters = {
+    param_name: best_parameters[param_name] for param_name in sorted(parameters.keys())
+}
+print(f"Best parameters set: {best_parameters}")
+
+bento_model = bentoml.sklearn.save_model(
+    "twenty_news_group",
+    grid_search.best_estimator_,
+    signatures={
+        "predict": {"batchable": True, "batch_dim": 0},
+        "predict_proba": {"batchable": True, "batch_dim": 0},
+    },
+    custom_objects={
+        "target_names": data.target_names,
+    },
+    metadata=best_parameters,
+)
+print(f"Model saved: {bento_model}")
+
+# Test running inference with BentoML runner
+test_runner = bentoml.sklearn.get("twenty_news_group:latest").to_runner()
+test_runner.init_local()
+assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict(
+    ["hello"]
+)
+
+bento_model = bentoml.sklearn.save_model(
+    "twenty_news_group_second",
+    grid_search.best_estimator_,
+    signatures={
+        "predict": {"batchable": True, "batch_dim": 0},
+        "predict_proba": {"batchable": True, "batch_dim": 0},
+    },
+    custom_objects={
+        "target_names": data.target_names,
+    },
+    metadata=best_parameters,
+)
+print(f"Model saved: {bento_model}")
+
+# Test running inference with BentoML runner
+test_runner = bentoml.sklearn.get("twenty_news_group_second:latest").to_runner()
+test_runner.init_local()
+assert test_runner.predict.run(["hello"]) == grid_search.best_estimator_.predict(
+    ["hello"]
+)