Support V2 GRPC for KServe Model Server (kserve#2415)

* Implemented gRPC in kserve Signed-off-by: Suresh-Nakkeran <[email protected]> * Fix gRPC custom model example and test Signed-off-by: Dan Sun <[email protected]> * Add python v2 grpc type interface file Signed-off-by: Dan Sun <[email protected]> * Fix transformer gRPC example Signed-off-by: Dan Sun <[email protected]> * Fix custom grpc docker file Signed-off-by: Suresh-Nakkeran <[email protected]> * Fixed issue in master merge Added kserve version in custom_model_grpc Dockerfile Added ServerMetadata function in grpc servicer Truncated points in grpc test case response Signed-off-by: Suresh-Nakkeran <[email protected]> * Removed dependency of asyncio in server start Signed-off-by: Suresh-Nakkeran <[email protected]> * Fix asyncio.run for examples Signed-off-by: Dan Sun <[email protected]> * Add processing/asyncio threading arguments Signed-off-by: Dan Sun <[email protected]> * Gracefully terminate grpc server Signed-off-by: Dan Sun <[email protected]> * Added support to read headers Signed-off-by: Suresh-Nakkeran <[email protected]> * Added grpc response conversation Signed-off-by: Suresh-Nakkeran <[email protected]> * Added logging interceptor in gRPC Signed-off-by: Suresh-Nakkeran <[email protected]> * Added test cases for grpc model with transformer Signed-off-by: Suresh-Nakkeran <[email protected]> Signed-off-by: Suresh-Nakkeran <[email protected]> Signed-off-by: Dan Sun <[email protected]> Co-authored-by: Dan Sun <[email protected]>
zillow · Nov 5, 2022 · 16df93f · 16df93f
1 parent 6a49354
commit 16df93f
Show file tree

Hide file tree

Showing 48 changed files with 2,327 additions and 90 deletions.
diff --git a/.github/workflows/custom-model-grpc-publish.yml b/.github/workflows/custom-model-grpc-publish.yml
@@ -0,0 +1,75 @@
+name: Custom model gRPC Docker Publisher
+
+on:
+  push:
+    # Publish `master` as Docker `latest` image.
+    branches:
+      - master
+
+  # Run tests for any PRs.
+  pull_request:
+
+env:
+  IMAGE_NAME: custom-model-grpc
+
+jobs:
+  # Run tests.
+  # See also https://docs.docker.com/docker-hub/builds/automated-testing/
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Run tests
+        run: |
+          if [ -f docker-compose.test.yml ]; then
+            docker-compose --file docker-compose.test.yml build
+            docker-compose --file docker-compose.test.yml run sut
+          else
+            cd python
+            docker build . --file custom_model_grpc.Dockerfile
+          fi
+
+  # Push image to GitHub Packages.
+  # See also https://docs.docker.com/docker-hub/builds/
+  push:
+    # Ensure test job passes before pushing image.
+    needs: test
+
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push'
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Build image
+        run: |
+          cd python
+          docker build . --file custom_model_grpc.Dockerfile --tag $IMAGE_NAME
+
+      - name: Log into registry
+        run: docker login -u ${{ secrets.DOCKER_USER }} -p ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Push image
+        run: |
+          IMAGE_ID=kserve/$IMAGE_NAME
+
+          # Change all uppercase to lowercase
+          IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')
+
+          # Strip git ref prefix from version
+          VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,')
+
+          # Strip "v" prefix from tag name
+          # [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//')
+
+          # Use Docker `latest` tag convention
+          [ "$VERSION" == "master" ] && VERSION=latest
+
+          echo IMAGE_ID=$IMAGE_ID
+          echo VERSION=$VERSION
+
+          docker tag $IMAGE_NAME $IMAGE_ID:$VERSION
+          docker push $IMAGE_ID:$VERSION
+
diff --git a/.github/workflows/custom-transformer-grpc-publish.yml b/.github/workflows/custom-transformer-grpc-publish.yml
@@ -0,0 +1,75 @@
+name: Custom image transformer gRPC Docker Publisher
+
+on:
+  push:
+    # Publish `master` as Docker `latest` image.
+    branches:
+      - master
+
+  # Run tests for any PRs.
+  pull_request:
+
+env:
+  IMAGE_NAME: custom-image-transformer-grpc
+
+jobs:
+  # Run tests.
+  # See also https://docs.docker.com/docker-hub/builds/automated-testing/
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Run tests
+        run: |
+          if [ -f docker-compose.test.yml ]; then
+            docker-compose --file docker-compose.test.yml build
+            docker-compose --file docker-compose.test.yml run sut
+          else
+            cd python
+            docker build . --file custom_transformer_grpc.Dockerfile
+          fi
+
+  # Push image to GitHub Packages.
+  # See also https://docs.docker.com/docker-hub/builds/
+  push:
+    # Ensure test job passes before pushing image.
+    needs: test
+
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push'
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Build image
+        run: |
+          cd python
+          docker build . --file custom_transformer_grpc.Dockerfile --tag $IMAGE_NAME
+
+      - name: Log into registry
+        run: docker login -u ${{ secrets.DOCKER_USER }} -p ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Push image
+        run: |
+          IMAGE_ID=kserve/$IMAGE_NAME
+
+          # Change all uppercase to lowercase
+          IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')
+
+          # Strip git ref prefix from version
+          VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,')
+
+          # Strip "v" prefix from tag name
+          # [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//')
+
+          # Use Docker `latest` tag convention
+          [ "$VERSION" == "master" ] && VERSION=latest
+
+          echo IMAGE_ID=$IMAGE_ID
+          echo VERSION=$VERSION
+
+          docker tag $IMAGE_NAME $IMAGE_ID:$VERSION
+          docker push $IMAGE_ID:$VERSION
+
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
@@ -77,7 +77,17 @@ jobs:
         uses: ishworkh/docker-image-artifact-upload@v1
         with:
           image: "kserve/image-transformer:${{ github.sha }}"
+
+      - name: Upload custom model grpc image
+        uses: ishworkh/docker-image-artifact-upload@v1
+        with:
+          image: "kserve/custom-model-grpc:${{ github.sha }}"
 
+      - name: Upload custom model transformer grpc image
+        uses: ishworkh/docker-image-artifact-upload@v1
+        with:
+          image: "kserve/custom-image-transformer-grpc:${{ github.sha }}"
+
   explainer-runtime-build:
     runs-on: ubuntu-latest
     steps:
@@ -322,7 +332,6 @@ jobs:
       - name: Install KServe
         run: |
           ./test/scripts/gh-actions/setup-kserve.sh
-
           kubectl get pods -n kserve
           kubectl describe pods -n kserve
       - name: Patch qpext image
@@ -338,3 +347,39 @@ jobs:
         if: always()
         run: |
           ./test/scripts/gh-actions/status-check.sh
+
+  test-grpc:
+    runs-on: ubuntu-latest
+    needs: [kserve-image-build, predictor-runtime-build, explainer-runtime-build]
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-go@v2
+        with:
+          go-version: '1.17.3'
+      - uses: ./.github/actions/minikube-setup
+      - uses: ./.github/actions/base-download
+
+      - name: Download custom model grpc image
+        uses: ishworkh/docker-image-artifact-download@v1
+        with:
+          image: "kserve/custom-model-grpc:${{ github.sha }}"
+
+      - name: Download custom transformer grpc image
+        uses: ishworkh/docker-image-artifact-download@v1
+        with:
+          image: "kserve/custom-image-transformer-grpc:${{ github.sha }}"
+
+      - name: Install KServe
+        run: |
+          ./test/scripts/gh-actions/setup-kserve.sh
+          kubectl get pods -n kserve
+          kubectl describe pods -n kserve
+      - name: Run E2E tests
+        timeout-minutes: 40
+        run: |
+          ./test/scripts/gh-actions/run-e2e-tests.sh "grpc"
+          kubectl get pods -n kserve
+      - name: Check system status
+        if: always()
+        run: |
+          ./test/scripts/gh-actions/status-check.sh
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
@@ -23,7 +23,7 @@ jobs:
         uses: py-actions/flake8@v1
         with:
           max-line-length: "120"
-          exclude: "docs/samples/v1beta1/onnx/assets/*.py,python/kserve/test/test_v1*.py,python/kserve/kserve/__init__.py,python/kserve/test/test_knative*.py"
+          exclude: "docs/samples/v1beta1/onnx/assets/*.py,python/kserve/test/test_v1*.py,python/kserve/kserve/__init__.py,python/kserve/test/test_knative*.py,python/kserve/kserve/grpc/grpc_predict_v2*.py"
   build:
     runs-on: ubuntu-latest
     strategy:

diff --git a/docs/samples/multimodelserving/sklearn/trainedmodels.yaml b/docs/samples/multimodelserving/sklearn/trainedmodels.yaml
@@ -5,7 +5,7 @@ metadata:
 spec:
   inferenceService: "sklearn-iris-example"
   model:
-    storageUri: "gs://kfserving-examples/models/sklearn/v1/model"
+    storageUri: "gs://kfserving-examples/models/sklearn/1.0/model"
     framework: "sklearn"
     memory: "256Mi"
 ---
@@ -16,6 +16,6 @@ metadata:
 spec:
   inferenceService: "sklearn-iris-example"
   model:
-    storageUri: "gs://kfserving-examples/models/sklearn/v1/model"
+    storageUri: "gs://kfserving-examples/models/sklearn/1.0/model"
     framework: "sklearn"
     memory: "256Mi"
diff --git a/docs/samples/v1beta1/custom/custom_model/README.md b/docs/samples/v1beta1/custom/custom_model/README.md
@@ -110,7 +110,7 @@ kubectl apply -f custom.yaml
 Expected Output
 
 ```
-$ inferenceservice.serving.kubeflow.org/custom-model created
+$ inferenceservice.serving.kserve.io/custom-model created
 ```
 
 ### Arguments and Environment Variables
@@ -119,7 +119,7 @@ You can supply additional command arguments on the container spec to configure t
 you need to make sure model object is fork friendly for multi-processing to work. Alternatively you can decorate your model server
 class with replicas and in this case each model server is created as a python worker independent of the server.
 - `--http_port`: the http port model server is listening on, the default port is 8080 
-- `--max_buffer_size`: Max socker buffer size for tornado http client, the default limit is 10Mi.
+- `--max_buffer_size`: Max socker buffer size, the default gRPC limit is 4Mi.
 - `--max_asyncio_workers`: Max number of workers to spawn for python async io loop, by default it is `min(32,cpu.limit + 4)`
 
 ### Run a prediction

diff --git a/python/VERSION b/python/VERSION
@@ -1 +1 @@
-0.9.0
+0.10.0rc0
diff --git a/python/aiffairness/aifserver/__main__.py b/python/aiffairness/aifserver/__main__.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import argparse
-import asyncio
 
 import kserve
 import json
@@ -71,4 +70,4 @@
         unprivileged_groups=args.unprivileged_groups
     )
     model.load()
-    asyncio.run(kserve.ModelServer().start([model]))
+    kserve.ModelServer().start([model])
diff --git a/python/aixexplainer/aixserver/__main__.py b/python/aixexplainer/aixserver/__main__.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import argparse
-import asyncio
 
 import kserve
 from .model import AIXModel
@@ -52,4 +51,4 @@
                      top_labels=args.top_labels, min_weight=args.min_weight,
                      positive_only=args.positive_only, explainer_type=args.explainer_type)
     model.load()
-    asyncio.run(kserve.ModelServer().start([model]))
+    kserve.ModelServer().start([model])
diff --git a/python/alibiexplainer/alibiexplainer/__main__.py b/python/alibiexplainer/alibiexplainer/__main__.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import asyncio
+
 import logging
 import os
 import sys
@@ -49,7 +49,7 @@ def main():
         alibi_model,
     )
     explainer.load()
-    asyncio.run(kserve.ModelServer().start(models=[explainer]))
+    kserve.ModelServer().start(models=[explainer])
 
 
 if __name__ == "__main__":

diff --git a/python/artexplainer/artserver/__main__.py b/python/artexplainer/artserver/__main__.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import argparse
-import asyncio
 
 from artserver import ARTModel
 
@@ -42,4 +41,4 @@
     model = ARTModel(args.model_name, args.predictor_host, adversary_type=args.adversary_type,
                      nb_classes=args.nb_classes, max_iter=args.max_iter)
     model.load()
-    asyncio.run(kserve.ModelServer().start([model]))
+    kserve.ModelServer().start([model])
diff --git a/python/custom_model/model.py b/python/custom_model/model.py
@@ -11,8 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import asyncio
-
 import kserve
 from torchvision import models, transforms
 from typing import Dict
@@ -27,11 +25,12 @@ def __init__(self, name: str):
         super().__init__(name)
         self.name = name
         self.load()
+        self.model = None
+        self.ready = False
 
     def load(self):
-        model = models.alexnet(pretrained=True)
-        model.eval()
-        self.model = model
+        self.model = models.alexnet(pretrained=True)
+        self.model.eval()
         self.ready = True
 
     def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
@@ -57,7 +56,7 @@ def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
 
         output = self.model(input_batch)
 
-        torch.nn.functional.softmax(output, dim=1)[0]
+        torch.nn.functional.softmax(output, dim=1)
 
         values, top_5 = torch.topk(output, 5)
 
@@ -67,4 +66,4 @@ def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
 if __name__ == "__main__":
     model = AlexNetModel("custom-model")
     model.load()
-    asyncio.run(kserve.ModelServer(workers=1).start([model]))
+    kserve.ModelServer(workers=1).start([model])