diff --git a/CMakeLists.txt b/CMakeLists.txt index c1e0f81..d9157f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.18.0) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL "") + project(OneFlowServing) if(NOT CMAKE_BUILD_TYPE) diff --git a/examples_embedding/embedding/client.py b/examples_embedding/embedding/client.py new file mode 100644 index 0000000..1853d62 --- /dev/null +++ b/examples_embedding/embedding/client.py @@ -0,0 +1,36 @@ +""" +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import time +import numpy as np +import tritonclient.http as httpclient + + +if __name__ == '__main__': + triton_client = httpclient.InferenceServerClient(url='127.0.0.1:8000') + + data = np.ones((10000,39)).astype(np.int64) + + inputs = [] + inputs.append(httpclient.InferInput('INPUT_0', data.shape, "INT64")) + inputs[0].set_data_from_numpy(data, binary_data=True) + outputs = [] + outputs.append(httpclient.InferRequestedOutput('OUTPUT_0', binary_data=True, class_count=1)) + now = time.time() + results = triton_client.infer("embedding", inputs=inputs, outputs=outputs) + print(time.time() - now) + output_data0 = results.as_numpy('OUTPUT_0') + print(output_data0.shape) diff --git a/examples_embedding/embedding/config.pbtxt b/examples_embedding/embedding/config.pbtxt new file mode 100644 index 0000000..6666203 --- /dev/null +++ b/examples_embedding/embedding/config.pbtxt @@ -0,0 +1,27 @@ +name: "embedding" +backend: "oneflow" +max_batch_size: 10000 + +input [ + { + name: "INPUT_0" + data_type: TYPE_INT64 + dims: [ 39 ] + } +] + +output [ + { + name: "OUTPUT_0" + data_type: TYPE_FP32 + dims: [ 1 ] + } +] + +instance_group [ + { + count: 1 + kind: KIND_GPU + gpus: [ 0 ] + } +] diff --git a/src/triton/model_state.cpp b/src/triton/model_state.cpp index 9647a02..733fae7 100644 --- a/src/triton/model_state.cpp +++ b/src/triton/model_state.cpp @@ -338,6 +338,7 @@ ModelState::LoadModel( graph->reset( new oneflow_api::Graph(oneflow_api::Graph::Load(model_path, device))); + if (MaxBatchSize() > 0) { (*graph)->set_batch_size(MaxBatchSize()); }