model=server.model("stable_diffusion")
+responses=model.infer(inputs={"prompt":[["butterfly in new york, realistic, 4k, photograph"]]})
+
+forresponseinresponses:
+ generated_image=numpy.from_dlpack(response.outputs["generated_image"])
+ generated_image=generated_image.squeeze().astype(numpy.uint8)
+ image_=Image.fromarray(generated_image)
+ image_.save("sample_generated_image.jpg")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/client_guide/build/html/objects.inv b/docs/client_guide/build/html/objects.inv
new file mode 100644
index 0000000000..a0b5f4cef6
Binary files /dev/null and b/docs/client_guide/build/html/objects.inv differ
diff --git a/docs/client_guide/build/html/search.html b/docs/client_guide/build/html/search.html
new file mode 100644
index 0000000000..6686ab6896
--- /dev/null
+++ b/docs/client_guide/build/html/search.html
@@ -0,0 +1,117 @@
+
+
+
+
+
+
+
+ Search — Triton Inference Server Python API 24.01 documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/client_guide/build/html/searchindex.js b/docs/client_guide/build/html/searchindex.js
new file mode 100644
index 0000000000..4f9003cd82
--- /dev/null
+++ b/docs/client_guide/build/html/searchindex.js
@@ -0,0 +1 @@
+Search.setIndex({"alltitles":{"Hello World":[[0,"hello-world"]],"In-Process Server Example":[[0,"in-process-server-example"]],"Installation":[[0,"installation"]],"Introduction":[[0,"introduction"]],"Requirements":[[0,"requirements"]],"Send Inference Request":[[0,"send-inference-request"]],"Stable Diffusion Example":[[0,"stable-diffusion-example"]],"Triton Inference Server Python API":[[0,null]]},"docnames":["index"],"envversion":{"sphinx":65,"sphinx.domains.c":3,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":9,"sphinx.domains.index":1,"sphinx.domains.javascript":3,"sphinx.domains.math":2,"sphinx.domains.python":4,"sphinx.domains.rst":2,"sphinx.domains.std":2},"filenames":["index.rst"],"indexentries":{},"objects":{},"objnames":{},"objtypes":{},"terms":{"13":0,"3":0,"4k":0,"In":[],"The":0,"add":[],"an":0,"astyp":0,"build":0,"butterfli":0,"cd":0,"clone":0,"cloud":0,"com":0,"compat":0,"contain":0,"content":[],"cuda":0,"detail":[],"docker":0,"document":0,"driver":0,"edg":0,"enter":0,"exampl":[],"focus":0,"framework":0,"from":0,"from_dlpack":0,"fromarrai":0,"generated_imag":0,"git":0,"github":0,"http":0,"ident":0,"imag":0,"image_":0,"import":0,"inferenc":0,"input":0,"jpg":0,"linux":0,"model":0,"model_repositori":0,"new":0,"numpi":0,"optim":0,"output":0,"photograph":0,"pil":0,"print":0,"process":[],"prompt":0,"provid":0,"python3":0,"realist":0,"request":[],"respons":0,"restructuredtext":[],"run":0,"sample_generated_imag":0,"save":0,"see":[],"send":[],"sh":0,"shell":0,"solut":0,"squeez":0,"stable_diffus":0,"start":0,"string_input":0,"string_output":0,"syntax":[],"system":0,"thi":0,"to_string_arrai":0,"triton_inference_server_python_api":0,"tritonserv":0,"tutori":0,"uint8":0,"us":[],"workspac":0,"york":0,"your":[]},"titles":["Triton Inference Server Python API"],"titleterms":{"In":0,"api":0,"diffus":0,"document":[],"exampl":0,"hello":0,"infer":0,"instal":0,"introduct":0,"process":0,"python":0,"request":0,"requir":0,"send":0,"server":0,"stabl":0,"triton":0,"world":0}})
\ No newline at end of file
diff --git a/docs/client_guide/make.bat b/docs/client_guide/make.bat
new file mode 100644
index 0000000000..dc1312ab09
--- /dev/null
+++ b/docs/client_guide/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.https://www.sphinx-doc.org/
+ exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/client_guide/python_readme.rst b/docs/client_guide/python_readme.rst
index 51951fad51..28d146360c 100644
--- a/docs/client_guide/python_readme.rst
+++ b/docs/client_guide/python_readme.rst
@@ -1,4 +1,4 @@
-..
+..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
@@ -25,106 +25,106 @@
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-.. raw:: html
+Triton Inference Server In-Process Python API
+=============================================
+Starting with release 24.01, Triton Inference Server includes a Python package
+that allows developers to embed Triton Inference Server instances in their
+Python applications. The in-process Python API matches the functionality of
+the in-process C API while providing a higher-level abstraction.
-Triton Inference Server In-Process Python API [BETA]
-====================================================
+.. note::
+ As the API is in BETA, please expect some changes. All feedback is welcome.
-Starting with release 24.01 Triton Inference Server will include a
-Python package enabling developers to embed Triton Inference Server
-instances in their Python applications. The in-process Python API is
-designed to match the functionality of the in-process C API while
-providing a higher level abstraction. At its core the API relies on a
-1:1 python binding of the C API and provides all the flexibility and
-power of the C API with a simpler to use interface.
+Contents
+--------
- [!Note] As the API is in BETA please expect some changes as we test
- out different features and get feedback. All feedback is weclome and
- we look forward to hearing from you!
+- `Requirements <#requirements>`__
+- `Installation <#installation>`__
+- `Hello World <#hello-world>`__
+- `Stable Diffusion <#stable-diffusion>`__
+- `Ray Serve Deployment <../tutorials/Triton_Inference_Server_Python_API/examples/rayserve>`__
-| `Requirements <#requirements>`__ \| `Installation <#installation>`__
- \| `Hello World <#hello-world>`__ \| `Stable
- Diffusion <#stable-diffusion>`__ \| `Ray Serve
- Deployment <../tutorials/Triton_Inference_Server_Python_API/examples/rayserve>`__ \|
Requirements
------------
-The following instructions require a linux system with Docker installed.
-For CUDA support, make sure your CUDA driver meets the requirements in
-“NVIDIA Driver” section of Deep Learning Framework support matrix:
-https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html
+- Linux system with Docker installed.
+- CUDA driver meeting the requirements in the
+ `NVIDIA Deep Learning Framework support matrix `__.
Installation
------------
-The tutorial and Python API package are designed to be installed and run
-within the ``nvcr.io/nvidia/tritonserver:24.01-py3`` docker image.
+The tutorial and Python API package are designed to run within the
+``nvcr.io/nvidia/tritonserver:24.01-py3`` Docker image.
-A set of convenience scripts are provided to create a docker image based
-on the ``nvcr.io/nvidia/tritonserver:24.01-py3`` image with the Python
-API installed plus additional dependencies required for the examples.
+Convenience scripts are provided to create a Docker image with the Python API
+and example dependencies.
Triton Inference Server 24.01 + Python API
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------------
Clone Repository
^^^^^^^^^^^^^^^^
-.. code:: bash
+.. code-block:: bash
+
git clone https://github.com/triton-inference-server/tutorials.git
cd tutorials/Triton_Inference_Server_Python_API
+
Build ``triton-python-api:r24.01`` Image
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-.. code:: bash
+.. code-block:: bash
+
./build.sh
+
Supported Backends
^^^^^^^^^^^^^^^^^^
-The built image includes all the backends shipped by default in the
-tritonserver ``nvcr.io/nvidia/tritonserver:24.01-py3`` container.
+The built image includes all backends shipped by default in the
+``nvcr.io/nvidia/tritonserver:24.01-py3`` container:
::
dali fil identity onnxruntime openvino python pytorch repeat square tensorrt
Included Models
-^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^
+
+The ``default`` build includes an ``identity`` model for testing input/output operations:
-The ``default`` build includes an ``identity`` model that can be used
-for exercising basic operations including sending input tensors of
-different data types. The ``identity`` model copies provided inputs of
-``shape [-1, -1]`` to outputs of shape ``[-1, -1]``. Inputs are named
-``data_type_input`` and outputs are named ``data_type_output``
-(e.g. ``string_input``, ``string_output``, ``fp16_input``,
-``fp16_output``).
+- Inputs: ``data_type_input`` (string, fp16, etc.)
+- Outputs: ``data_type_output``
Hello World
-----------
-Start ``triton-python-api:r24.01`` Container
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Start Container
+^^^^^^^^^^^^^^
-The following command starts a container and volume mounts the current
-directory as ``workspace``.
+.. code-block:: bash
-.. code:: bash
./run.sh
+
Enter Python Shell
-~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
-.. code:: bash
python3
-Create and Start a Server Instance
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. code:: python
+In-Process Server Example
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
import tritonserver
server = tritonserver.Server(model_repository="/workspace/identity-models")
server.start()
+
List Models
-~~~~~~~~~~~
+^^^^^^^^^^^
::
@@ -133,131 +133,97 @@ List Models
Example Output
^^^^^^^^^^^^^^
-``server.models()`` returns a dictionary of the available models with
-their current state.
+.. code-block:: python
-.. code:: python
{('identity', 1): {'name': 'identity', 'version': 1, 'state': 'READY'}}
+
Send an Inference Request
-~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
-.. code:: python
model = server.model("identity")
responses = model.infer(inputs={"string_input":[["hello world!"]]})
-Iterate through Responses
-~~~~~~~~~~~~~~~~~~~~~~~~~
-``model.infer()`` returns an iterator that can be used to process the
-results of an inference request.
+Iterate Responses
+^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
-.. code:: python
for response in responses:
print(response.outputs["string_output"].to_string_array())
-.. _example-output-1:
-
-Example Output
-^^^^^^^^^^^^^^
-
-.. code:: python
- [['hello world!']]
-Stable Diffusion
-----------------
-This example is based on the
-`Popular_Models_Guide/StableDiffusion <../tutorials/Popular_Models_Guide/StableDiffusion/README.html>`__
-tutorial.
+gRPC Python Client
+-----------------
-Build ``triton-python-api:r24.01-diffusion`` Image and Stable Diffusion Models
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: python
-Please note the following command will take many minutes depending on
-your hardware configuration and network connection.
+ import tritonclient.grpc as grpcclient
+ import numpy as np
-.. code:: bash
- ./build.sh --framework diffusion --build-models
-.. _supported-backends-1:
+ # Connect to server
+ url = "localhost:8001"
+ client = grpcclient.InferenceServerClient(url)
-Supported Backends
-^^^^^^^^^^^^^^^^^^
+ # Input data
+ input_data = np.array([[1.0, 2.0, 3.0]], dtype=np.float32)
-The built image includes all the backends shipped by default in the
-tritonserver ``nvcr.io/nvidia/tritonserver:24.01-py3`` container.
+ # Send request
+ response = client.infer(model_name="identity",
+ inputs={"input": input_data})
-::
+ # Output
+ print(response.as_numpy("output"))
- dali fil identity onnxruntime openvino python pytorch repeat square tensorrt
+Stable Diffusion Example
+-----------------------
-.. _included-models-1:
+Build diffusion image
+^^^^^^^^^^^^^^^^^^^^
-Included Models
-^^^^^^^^^^^^^^^
+.. code-block:: bash
-The ``diffusion`` build includes a ``stable_diffustion`` pipeline that
-takes a text prompt and returns a generated image. For more details on
-the models and pipeline please see the
-`Popular_Models_Guide/StableDiffusion <../tutorials/Popular_Models_Guide/StableDiffusion/README.html>`__
-tutorial.
+ ./build.sh --framework diffusion --build-models
Start Container
-~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^
-The following command starts a container and volume mounts the current
-directory as ``workspace``.
+.. code-block:: bash
-.. code:: bash
./run.sh --framework diffusion
-.. _enter-python-shell-1:
-Enter Python Shell
-~~~~~~~~~~~~~~~~~~
-
-.. code:: bash
- python3
-.. _create-and-start-a-server-instance-1:
+Python In-Process Server
+^^^^^^^^^^^^^^^^^^^^^^^
-Create and Start a Server Instance
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. code-block:: python
-.. code:: python
import tritonserver
import numpy
from PIL import Image
+
server = tritonserver.Server(model_repository="/workspace/diffusion-models")
server.start()
-.. _list-models-1:
List Models
-~~~~~~~~~~~
+^^^^^^^^^^^
::
server.models()
-.. _example-output-2:
+Send Request and Save Image
+^^^^^^^^^^^^^^^^^^^^^^^^^^
-Example Output
-^^^^^^^^^^^^^^
-
-.. code:: python
- {('stable_diffusion', 1): {'name': 'stable_diffusion', 'version': 1, 'state': 'READY'}, ('text_encoder', 1): {'name': 'text_encoder', 'version': 1, 'state': 'READY'}, ('vae', 1): {'name': 'vae', 'version': 1, 'state': 'READY'}}
-.. _send-an-inference-request-1:
-
-Send an Inference Request
-~~~~~~~~~~~~~~~~~~~~~~~~~
+.. code-block:: python
-.. code:: python
model = server.model("stable_diffusion")
responses = model.infer(inputs={"prompt":[["butterfly in new york, realistic, 4k, photograph"]]})
-Iterate through Responses and save image
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. code:: python
for response in responses:
generated_image = numpy.from_dlpack(response.outputs["generated_image"])
generated_image = generated_image.squeeze().astype(numpy.uint8)
image_ = Image.fromarray(generated_image)
image_.save("sample_generated_image.jpg")
-.. _example-output-3:
Example Output
^^^^^^^^^^^^^^
@@ -265,4 +231,4 @@ Example Output
.. figure:: ../tutorials/Triton_Inference_Server_Python_API/docs/sample_generated_image.jpg
:alt: sample_generated_image
- sample_generated_image
\ No newline at end of file
+ sample_generated_image
diff --git a/docs/client_guide/source/conf.py b/docs/client_guide/source/conf.py
new file mode 100644
index 0000000000..70ea716413
--- /dev/null
+++ b/docs/client_guide/source/conf.py
@@ -0,0 +1,28 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = 'Triton Inference Server Python API'
+copyright = '2025, NVIDIA'
+author = 'NVIDIA'
+release = '24.01'
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = []
+
+templates_path = ['_templates']
+exclude_patterns = []
+
+
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = 'sphinx_rtd_theme'
+html_static_path = ['_static']
diff --git a/docs/client_guide/source/index.rst b/docs/client_guide/source/index.rst
new file mode 100644
index 0000000000..e8437e1167
--- /dev/null
+++ b/docs/client_guide/source/index.rst
@@ -0,0 +1,111 @@
+Triton Inference Server Python API
+==================================
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+Introduction
+Requirements
+Installation
+Hello World
+In-Process Server Example
+Send Inference Request
+Stable Diffusion Example
+
+Introduction
+------------
+
+The Triton Inference Server provides an optimized cloud and edge inferencing solution.
+This documentation focuses on the Python API.
+
+Requirements
+------------
+
+- Linux system with Docker installed.
+- CUDA driver compatible with Triton.
+- Python 3.13+ (for Python API)
+
+Installation
+------------
+
+.. code-block:: bash
+
+ git clone https://github.com/triton-inference-server/tutorials.git
+ cd tutorials/Triton_Inference_Server_Python_API
+ ./build.sh
+
+Hello World
+-----------
+
+Start the container:
+
+.. code-block:: bash
+
+ ./run.sh
+
+Enter Python shell:
+
+.. code-block:: bash
+
+ python3
+
+In-Process Server Example
+-------------------------
+
+.. code-block:: python
+
+ import tritonserver
+ server = tritonserver.Server(model_repository="/workspace/identity-models")
+ server.start()
+
+Send Inference Request
+----------------------
+
+.. code-block:: python
+
+ model = server.model("identity")
+ responses = model.infer(inputs={"string_input":[["hello world!"]]})
+
+ for response in responses:
+ print(response.outputs["string_output"].to_string_array())
+
+Stable Diffusion Example
+-------------------------
+
+
+Build diffusion image:
+
+.. code-block:: bash
+
+ ./build.sh --framework diffusion --build-models
+
+Start container:
+
+.. code-block:: bash
+
+ ./run.sh --framework diffusion
+
+Python In-Process Server:
+
+.. code-block:: python
+
+ import tritonserver
+ import numpy
+ from PIL import Image
+
+ server = tritonserver.Server(model_repository="/workspace/diffusion-models")
+ server.start()
+
+Send request and save image:
+
+.. code-block:: python
+
+ model = server.model("stable_diffusion")
+ responses = model.infer(inputs={"prompt":[["butterfly in new york, realistic, 4k, photograph"]]})
+
+ for response in responses:
+ generated_image = numpy.from_dlpack(response.outputs["generated_image"])
+ generated_image = generated_image.squeeze().astype(numpy.uint8)
+ image_ = Image.fromarray(generated_image)
+ image_.save("sample_generated_image.jpg")