diff --git a/build.py b/build.py index 1145a8db9a..d6ce601575 100755 --- a/build.py +++ b/build.py @@ -92,6 +92,67 @@ THIS_SCRIPT_DIR = os.path.dirname(os.path.abspath(getsourcefile(lambda: 0))) +ELEMENTS = { + "backend": ["tag", "org", "cmake"], + "repoagent": ["tag", "org", "cmake"], + "cache": ["tag", "org", "cmake"], + "filesystem": ["strict"], + "endpoint": ["strict"], + "feature": ["strict"], + "component": ["tag", "strict", "required"], +} + +ELEMENTS_LISTS = { + "backend": [ + "ensemble", + "identity", + "square", + "repeat", + "onnxruntime", + "python", + "dali", + "pytorch", + "openvino", + "fil", + "tensorrt", + ], + "repoagent": [ + "checksum", + ], + "cache": [ + "local", + "redis", + ], + "filesystem": [ + "gcs", + "s3", + "azure_storage", + ], + "endpoint": [ + "http", + "grpc", + "sagemaker", + "vertex-ai", + ], + "feature": [ + "logging", + "stats", + "metrics", + "gpu_metrics", + "cpu_metrics", + "tracing", + "nvtx", + "gpu", + "mali_gpu", + ], + "component": [ + "common", + "core", + "backend", + "thirdparty", + ], +} + def log(msg, force=False): if force or not FLAGS.quiet: @@ -343,10 +404,7 @@ def cmake_core_arg(name, type, value): # command-line specified value if one is given. if name in OVERRIDE_CORE_CMAKE_FLAGS: value = OVERRIDE_CORE_CMAKE_FLAGS[name] - if type is None: - type = "" - else: - type = ":{}".format(type) + type = ":{}".format(type) if type else "" return '"-D{}{}={}"'.format(name, type, value) @@ -354,11 +412,7 @@ def cmake_core_enable(name, flag): # Return cmake -D setting to set name=flag?ON:OFF for core # build. Use command-line specified value for 'flag' if one is # given. - if name in OVERRIDE_CORE_CMAKE_FLAGS: - value = OVERRIDE_CORE_CMAKE_FLAGS[name] - else: - value = "ON" if flag else "OFF" - return '"-D{}:BOOL={}"'.format(name, value) + return cmake_core_arg(name, "BOOL", "ON" if flag else "OFF") def cmake_core_extra_args(): @@ -368,116 +422,114 @@ def cmake_core_extra_args(): return args -def cmake_backend_arg(backend, name, type, value): - # Return cmake -D setting to set name=value for backend build. Use +def cmake_element_arg(element, element_val, name, type, value): + # Return cmake -D setting to set name=value for build. Use # command-line specified value if one is given. - if backend in OVERRIDE_BACKEND_CMAKE_FLAGS: - if name in OVERRIDE_BACKEND_CMAKE_FLAGS[backend]: - value = OVERRIDE_BACKEND_CMAKE_FLAGS[backend][name] - if type is None: - type = "" - else: - type = ":{}".format(type) + element_flags = getattr(FLAGS, element) + if "cmake_override" in element_flags[element_val]: + value = element_flags[element_val]["cmake_override"].get(name, value) + type = ":{}".format(type) if type else "" return '"-D{}{}={}"'.format(name, type, value) -def cmake_backend_enable(backend, name, flag): - # Return cmake -D setting to set name=flag?ON:OFF for backend +def cmake_backend_arg(*args, **kwargs): + return cmake_element_arg("backend", *args, **kwargs) + + +def cmake_element_enable(element, element_val, name, flag): + # Return cmake -D setting to set name=flag?ON:OFF for # build. Use command-line specified value for 'flag' if one is # given. - value = None - if backend in OVERRIDE_BACKEND_CMAKE_FLAGS: - if name in OVERRIDE_BACKEND_CMAKE_FLAGS[backend]: - value = OVERRIDE_BACKEND_CMAKE_FLAGS[backend][name] - if value is None: - value = "ON" if flag else "OFF" - return '"-D{}:BOOL={}"'.format(name, value) + return cmake_element_arg( + element, element_val, name, "BOOL", "ON" if flag else "OFF" + ) -def cmake_backend_extra_args(backend): +def cmake_backend_enable(*args, **kwargs): + return cmake_element_enable("backend", *args, **kwargs) + + +def cmake_element_extra_args(element, element_val): args = [] - if backend in EXTRA_BACKEND_CMAKE_FLAGS: - for k, v in EXTRA_BACKEND_CMAKE_FLAGS[backend].items(): + element_flags = getattr(FLAGS, element) + if "cmake_extra" in element_flags[element_val]: + for k, v in element_flags[element_val]["cmake_extra"].items(): args.append('"-D{}={}"'.format(k, v)) return args -def cmake_repoagent_arg(name, type, value): - # For now there is no override for repo-agents - if type is None: - type = "" - else: - type = ":{}".format(type) - return '"-D{}{}={}"'.format(name, type, value) +def cmake_backend_extra_args(backend): + return cmake_element_extra_args("backend", backend) -def cmake_repoagent_enable(name, flag): - # For now there is no override for repo-agents - value = "ON" if flag else "OFF" - return '"-D{}:BOOL={}"'.format(name, value) +def cmake_repoagent_arg(*args, **kwargs): + return cmake_element_arg("repoagent", *args, **kwargs) -def cmake_repoagent_extra_args(): - # For now there is no extra args for repo-agents - args = [] - return args +def cmake_repoagent_enable(*args, **kwargs): + return cmake_element_enable("repoagent", *args, **kwargs) -def cmake_cache_arg(name, type, value): - # For now there is no override for caches - if type is None: - type = "" - else: - type = ":{}".format(type) - return '"-D{}{}={}"'.format(name, type, value) +def cmake_repoagent_extra_args(repoagent): + return cmake_element_extra_args("repoagent", repoagent) -def cmake_cache_enable(name, flag): - # For now there is no override for caches - value = "ON" if flag else "OFF" - return '"-D{}:BOOL={}"'.format(name, value) +def cmake_cache_arg(*args, **kwargs): + return cmake_element_arg("cache", *args, **kwargs) -def cmake_cache_extra_args(): - # For now there is no extra args for caches - args = [] - return args +def cmake_cache_enable(*args, **kwargs): + return cmake_element_enable("cache", *args, **kwargs) + +def cmake_cache_extra_args(cache): + return cmake_element_extra_args("cache", cache) -def core_cmake_args(components, backends, cmake_dir, install_dir): + +def core_cmake_args(cmake_dir, install_dir): cargs = [ cmake_core_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type), cmake_core_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir), cmake_core_arg("TRITON_VERSION", "STRING", FLAGS.version), cmake_core_arg("TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization), - cmake_core_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]), - cmake_core_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]), - cmake_core_arg("TRITON_BACKEND_REPO_TAG", "STRING", components["backend"]), cmake_core_arg( - "TRITON_THIRD_PARTY_REPO_TAG", "STRING", components["thirdparty"] + "TRITON_COMMON_REPO_TAG", "STRING", FLAGS.component["common"]["tag"] + ), + cmake_core_arg( + "TRITON_CORE_REPO_TAG", "STRING", FLAGS.component["core"]["tag"] + ), + cmake_core_arg( + "TRITON_BACKEND_REPO_TAG", "STRING", FLAGS.component["backend"]["tag"] + ), + cmake_core_arg( + "TRITON_THIRD_PARTY_REPO_TAG", + "STRING", + FLAGS.component["thirdparty"]["tag"], ), ] - cargs.append(cmake_core_enable("TRITON_ENABLE_LOGGING", FLAGS.enable_logging)) - cargs.append(cmake_core_enable("TRITON_ENABLE_STATS", FLAGS.enable_stats)) - cargs.append(cmake_core_enable("TRITON_ENABLE_METRICS", FLAGS.enable_metrics)) + cargs.append(cmake_core_enable("TRITON_ENABLE_LOGGING", "logging" in FLAGS.feature)) + cargs.append(cmake_core_enable("TRITON_ENABLE_STATS", "stats" in FLAGS.feature)) + cargs.append(cmake_core_enable("TRITON_ENABLE_METRICS", "metrics" in FLAGS.feature)) cargs.append( - cmake_core_enable("TRITON_ENABLE_METRICS_GPU", FLAGS.enable_gpu_metrics) + cmake_core_enable("TRITON_ENABLE_METRICS_GPU", "gpu_metrics" in FLAGS.feature) ) cargs.append( - cmake_core_enable("TRITON_ENABLE_METRICS_CPU", FLAGS.enable_cpu_metrics) + cmake_core_enable("TRITON_ENABLE_METRICS_CPU", "cpu_metrics" in FLAGS.feature) ) - cargs.append(cmake_core_enable("TRITON_ENABLE_TRACING", FLAGS.enable_tracing)) - cargs.append(cmake_core_enable("TRITON_ENABLE_NVTX", FLAGS.enable_nvtx)) + cargs.append(cmake_core_enable("TRITON_ENABLE_TRACING", "tracing" in FLAGS.feature)) + cargs.append(cmake_core_enable("TRITON_ENABLE_NVTX", "nvtx" in FLAGS.feature)) - cargs.append(cmake_core_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu)) + cargs.append(cmake_core_enable("TRITON_ENABLE_GPU", "gpu" in FLAGS.feature)) cargs.append( cmake_core_arg( "TRITON_MIN_COMPUTE_CAPABILITY", None, FLAGS.min_compute_capability ) ) - cargs.append(cmake_core_enable("TRITON_ENABLE_MALI_GPU", FLAGS.enable_mali_gpu)) + cargs.append( + cmake_core_enable("TRITON_ENABLE_MALI_GPU", "mali_gpu" in FLAGS.feature) + ) cargs.append(cmake_core_enable("TRITON_ENABLE_GRPC", "grpc" in FLAGS.endpoint)) cargs.append(cmake_core_enable("TRITON_ENABLE_HTTP", "http" in FLAGS.endpoint)) @@ -496,8 +548,12 @@ def core_cmake_args(components, backends, cmake_dir, install_dir): ) ) - cargs.append(cmake_core_enable("TRITON_ENABLE_ENSEMBLE", "ensemble" in backends)) - cargs.append(cmake_core_enable("TRITON_ENABLE_TENSORRT", "tensorrt" in backends)) + cargs.append( + cmake_core_enable("TRITON_ENABLE_ENSEMBLE", "ensemble" in FLAGS.backend) + ) + cargs.append( + cmake_core_enable("TRITON_ENABLE_TENSORRT", "tensorrt" in FLAGS.backend) + ) cargs += cmake_core_extra_args() cargs.append(cmake_dir) @@ -508,21 +564,27 @@ def repoagent_repo(ra): return "{}_repository_agent".format(ra) -def repoagent_cmake_args(images, components, ra, install_dir): +def repoagent_cmake_args(images, ra, install_dir): args = [] cargs = args + [ - cmake_repoagent_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type), - cmake_repoagent_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir), + cmake_repoagent_arg(ra, "CMAKE_BUILD_TYPE", None, FLAGS.build_type), + cmake_repoagent_arg(ra, "CMAKE_INSTALL_PREFIX", "PATH", install_dir), cmake_repoagent_arg( - "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization + ra, "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization + ), + cmake_repoagent_arg( + ra, "TRITON_COMMON_REPO_TAG", "STRING", FLAGS.component["common"]["tag"] + ), + cmake_repoagent_arg( + ra, "TRITON_CORE_REPO_TAG", "STRING", FLAGS.component["core"]["tag"] ), - cmake_repoagent_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]), - cmake_repoagent_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]), ] - cargs.append(cmake_repoagent_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu)) - cargs += cmake_repoagent_extra_args() + cargs.append( + cmake_repoagent_enable(ra, "TRITON_ENABLE_GPU", "gpu" in FLAGS.feature) + ) + cargs += cmake_repoagent_extra_args(ra) cargs.append("..") return cargs @@ -532,21 +594,25 @@ def cache_repo(cache): return "{}_cache".format(cache) -def cache_cmake_args(images, components, cache, install_dir): +def cache_cmake_args(images, cache, install_dir): args = [] cargs = args + [ - cmake_cache_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type), - cmake_cache_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir), + cmake_cache_arg(cache, "CMAKE_BUILD_TYPE", None, FLAGS.build_type), + cmake_cache_arg(cache, "CMAKE_INSTALL_PREFIX", "PATH", install_dir), + cmake_cache_arg( + cache, "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization + ), + cmake_cache_arg( + cache, "TRITON_COMMON_REPO_TAG", "STRING", FLAGS.component["common"]["tag"] + ), cmake_cache_arg( - "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization + cache, "TRITON_CORE_REPO_TAG", "STRING", FLAGS.component["core"]["tag"] ), - cmake_cache_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]), - cmake_cache_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]), ] - cargs.append(cmake_cache_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu)) - cargs += cmake_cache_extra_args() + cargs.append(cmake_cache_enable(cache, "TRITON_ENABLE_GPU", "gpu" in FLAGS.feature)) + cargs += cmake_cache_extra_args(cache) cargs.append("..") return cargs @@ -555,11 +621,11 @@ def backend_repo(be): return "{}_backend".format(be) -def backend_cmake_args(images, components, be, install_dir, library_paths): +def backend_cmake_args(images, be, install_dir): cmake_build_type = FLAGS.build_type if be == "onnxruntime": - args = onnxruntime_cmake_args(images, library_paths) + args = onnxruntime_cmake_args(images) elif be == "openvino": args = openvino_cmake_args() elif be == "python": @@ -589,20 +655,26 @@ def backend_cmake_args(images, components, be, install_dir, library_paths): cmake_backend_arg( be, "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization ), - cmake_backend_arg(be, "TRITON_COMMON_REPO_TAG", "STRING", components["common"]), - cmake_backend_arg(be, "TRITON_CORE_REPO_TAG", "STRING", components["core"]), cmake_backend_arg( - be, "TRITON_BACKEND_REPO_TAG", "STRING", components["backend"] + be, "TRITON_COMMON_REPO_TAG", "STRING", FLAGS.component["common"]["tag"] + ), + cmake_backend_arg( + be, "TRITON_CORE_REPO_TAG", "STRING", FLAGS.component["core"]["tag"] + ), + cmake_backend_arg( + be, "TRITON_BACKEND_REPO_TAG", "STRING", FLAGS.component["backend"]["tag"] ), ] - cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_GPU", FLAGS.enable_gpu)) + cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_GPU", "gpu" in FLAGS.feature)) cargs.append( - cmake_backend_enable(be, "TRITON_ENABLE_MALI_GPU", FLAGS.enable_mali_gpu) + cmake_backend_enable(be, "TRITON_ENABLE_MALI_GPU", "mali_gpu" in FLAGS.feature) ) - cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_STATS", FLAGS.enable_stats)) cargs.append( - cmake_backend_enable(be, "TRITON_ENABLE_METRICS", FLAGS.enable_metrics) + cmake_backend_enable(be, "TRITON_ENABLE_STATS", "stats" in FLAGS.feature) + ) + cargs.append( + cmake_backend_enable(be, "TRITON_ENABLE_METRICS", "metrics" in FLAGS.feature) ) # [DLIS-4950] always enable below once Windows image is updated with CUPTI @@ -617,7 +689,7 @@ def backend_cmake_args(images, components, be, install_dir, library_paths): "Warning: Detected iGPU build, backend utility 'device memory tracker' will be disabled as iGPU doesn't contain required version of the library." ) cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", False)) - elif FLAGS.enable_gpu: + elif "gpu" in FLAGS.feature: cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", True)) cargs += cmake_backend_extra_args(be) @@ -653,17 +725,19 @@ def pytorch_cmake_args(images): # TODO: TPRD-372 TorchTRT extension is not currently supported by our manylinux build # TODO: TPRD-373 NVTX extension is not currently supported by our manylinux build if target_platform() != "rhel": - if FLAGS.enable_gpu: + if "gpu" in FLAGS.feature: cargs.append( cmake_backend_enable("pytorch", "TRITON_PYTORCH_ENABLE_TORCHTRT", True) ) cargs.append( - cmake_backend_enable("pytorch", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx) + cmake_backend_enable( + "pytorch", "TRITON_ENABLE_NVTX", "nvtx" in FLAGS.feature + ) ) return cargs -def onnxruntime_cmake_args(images, library_paths): +def onnxruntime_cmake_args(images): cargs = [ cmake_backend_arg( "onnxruntime", @@ -676,7 +750,7 @@ def onnxruntime_cmake_args(images, library_paths): ] # TRITON_ENABLE_GPU is already set for all backends in backend_cmake_args() - if FLAGS.enable_gpu: + if "gpu" in FLAGS.feature: # TODO: TPRD-712 TensorRT is not currently supported by our RHEL build for SBSA. if target_platform() != "rhel" or ( target_platform() == "rhel" and target_machine() == "x86_64" @@ -688,17 +762,17 @@ def onnxruntime_cmake_args(images, library_paths): ) if target_platform() == "windows": - if "base" in images: + if "buildbase" in images: cargs.append( cmake_backend_arg( - "onnxruntime", "TRITON_BUILD_CONTAINER", None, images["base"] + "onnxruntime", "TRITON_BUILD_CONTAINER", None, images["buildbase"] ) ) else: - if "base" in images: + if "buildbase" in images: cargs.append( cmake_backend_arg( - "onnxruntime", "TRITON_BUILD_CONTAINER", None, images["base"] + "onnxruntime", "TRITON_BUILD_CONTAINER", None, images["buildbase"] ) ) else: @@ -754,17 +828,17 @@ def openvino_cmake_args(): ) ] if target_platform() == "windows": - if "base" in images: + if "buildbase" in images: cargs.append( cmake_backend_arg( - "openvino", "TRITON_BUILD_CONTAINER", None, images["base"] + "openvino", "TRITON_BUILD_CONTAINER", None, images["buildbase"] ) ) else: - if "base" in images: + if "buildbase" in images: cargs.append( cmake_backend_arg( - "openvino", "TRITON_BUILD_CONTAINER", None, images["base"] + "openvino", "TRITON_BUILD_CONTAINER", None, images["buildbase"] ) ) else: @@ -781,7 +855,7 @@ def openvino_cmake_args(): def tensorrt_cmake_args(): cargs = [ - cmake_backend_enable("tensorrt", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx), + cmake_backend_enable("tensorrt", "TRITON_ENABLE_NVTX", "nvtx" in FLAGS.feature), ] if target_platform() == "windows": cargs.append( @@ -801,9 +875,11 @@ def dali_cmake_args(): def fil_cmake_args(images): cargs = [cmake_backend_enable("fil", "TRITON_FIL_DOCKER_BUILD", True)] - if "base" in images: + if "buildbase" in images: cargs.append( - cmake_backend_arg("fil", "TRITON_BUILD_CONTAINER", None, images["base"]) + cmake_backend_arg( + "fil", "TRITON_BUILD_CONTAINER", None, images["buildbase"] + ) ) else: cargs.append( @@ -1006,7 +1082,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap): && (cd /tmp && tar xzf boost.tar.gz) \\ && mv /tmp/boost_1_80_0/boost /usr/include/boost """ - if FLAGS.enable_gpu: + if "gpu" in FLAGS.feature: df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine()) df += """ ENV TRITON_SERVER_VERSION ${TRITON_VERSION} @@ -1119,7 +1195,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap): """ - if FLAGS.enable_gpu: + if "gpu" in FLAGS.feature: df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine()) df += """ @@ -1192,9 +1268,7 @@ def create_dockerfile_cibase(ddir, dockerfile_name, argmap): dfile.write(df) -def create_dockerfile_linux( - ddir, dockerfile_name, argmap, backends, repoagents, caches, endpoints -): +def create_dockerfile_linux(ddir, dockerfile_name, argmap): df = """ ARG TRITON_VERSION={} ARG TRITON_CONTAINER_VERSION={} @@ -1209,7 +1283,7 @@ def create_dockerfile_linux( # PyTorch backends need extra CUDA and other # dependencies during runtime that are missing in the CPU-only base container. # These dependencies must be copied from the Triton Min image. - if not FLAGS.enable_gpu and ("pytorch" in backends): + if not "gpu" in FLAGS.feature and ("pytorch" in FLAGS.backend): df += """ ############################################################################ ## Triton Min image @@ -1230,7 +1304,7 @@ def create_dockerfile_linux( """ df += dockerfile_prepare_container_linux( - argmap, backends, FLAGS.enable_gpu, target_machine() + argmap, "gpu" in FLAGS.feature, target_machine() ) df += """ @@ -1249,7 +1323,7 @@ def create_dockerfile_linux( """ if not FLAGS.no_core_build: # Add feature labels for SageMaker endpoint - if "sagemaker" in endpoints: + if "sagemaker" in FLAGS.endpoint: df += """ LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true LABEL com.amazonaws.sagemaker.capabilities.multi-models=true @@ -1257,11 +1331,11 @@ def create_dockerfile_linux( """ # This is required since libcublasLt.so is not present during the build # stage of the PyTorch backend - if not FLAGS.enable_gpu and ("pytorch" in backends): + if not "gpu" in FLAGS.feature and ("pytorch" in FLAGS.backend): df += """ RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.13 backends/pytorch/libtorch_cuda.so """ - if "tensorrtllm" in backends: + if "tensorrtllm" in FLAGS.backend: df += """ # Install required packages for TRT-LLM models # Remove contents that are not needed in runtime @@ -1288,7 +1362,7 @@ def create_dockerfile_linux( dfile.write(df) -def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_machine): +def dockerfile_prepare_container_linux(argmap, enable_gpu, target_machine): gpu_enabled = 1 if enable_gpu else 0 # Common steps to produce docker images shared by build.py and compose.py. # Sets environment variables, installs dependencies and adds entrypoint @@ -1307,21 +1381,21 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach """ # Necessary for libtorch.so to find correct HPCX libraries - if "pytorch" in backends: + if "pytorch" in FLAGS.backend: df += """ ENV LD_LIBRARY_PATH /opt/hpcx/ucc/lib/:/opt/hpcx/ucx/lib/:${LD_LIBRARY_PATH} """ backend_dependencies = "" # libgomp1 is needed by both onnxruntime and pytorch backends - if ("onnxruntime" in backends) or ("pytorch" in backends): + if ("onnxruntime" in FLAGS.backend) or ("pytorch" in FLAGS.backend): backend_dependencies = "libgomp1" # libgfortran5 is needed by pytorch backend on ARM - if ("pytorch" in backends) and (target_machine == "aarch64"): + if ("pytorch" in FLAGS.backend) and (target_machine == "aarch64"): backend_dependencies += " libgfortran5" # openssh-server is needed for fastertransformer - if "fastertransformer" in backends: + if "fastertransformer" in FLAGS.backend: backend_dependencies += " openssh-server" df += """ @@ -1398,10 +1472,10 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach ENV TCMALLOC_RELEASE_RATE 200 """ - if "fastertransformer" in backends: + if "fastertransformer" in FLAGS.backend: be = "fastertransformer" url = "https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/{}/docker/create_dockerfile_and_build.py".format( - backends[be] + FLAGS.backend[be]["tag"] ) response = requests.get(url) spec = importlib.util.spec_from_loader( @@ -1411,7 +1485,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach exec(response.content, fastertransformer_buildscript.__dict__) df += fastertransformer_buildscript.create_postbuild(is_multistage_build=False) - if enable_gpu: + if "gpu" in FLAGS.feature: df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine) # This segment will break the RHEL SBSA build. Need to determine whether # this is necessary to incorporate. @@ -1424,10 +1498,10 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach && rm -f ${_CUDA_COMPAT_PATH}/lib """ else: - df += add_cpu_libs_to_linux_dockerfile(backends, target_machine) + df += add_cpu_libs_to_linux_dockerfile(target_machine) # Add dependencies needed for python backend - if "python" in backends: + if "python" in FLAGS.backend: if target_platform() == "rhel": df += """ # python3, python3-pip and some pip installs required for the python backend @@ -1462,7 +1536,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach virtualenv \\ && rm -rf /var/lib/apt/lists/* """ - if "tensorrtllm" in backends: + if "tensorrtllm" in FLAGS.backend: df += """ # Updating the openssh-client to fix for the CVE-2024-6387. This can be removed when trtllm uses a later CUDA container(12.5 or later) RUN apt-get update \\ @@ -1471,7 +1545,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach && rm -rf /var/lib/apt/lists/* """ - if "vllm" in backends: + if "vllm" in FLAGS.backend: df += f""" # Install required packages for vLLM models ARG BUILD_PUBLIC_VLLM="true" @@ -1509,7 +1583,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach ENV LD_LIBRARY_PATH /usr/local/lib:/usr/local/lib/python${{PYVER}}/dist-packages/torch/lib:${{LD_LIBRARY_PATH}} """ - if "dali" in backends: + if "dali" in FLAGS.backend: df += """ # Update Python path to include DALI ENV PYTHONPATH=/opt/tritonserver/backends/dali/wheel/dali:$PYTHONPATH @@ -1525,7 +1599,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach # The CPU-only build uses ubuntu as the base image, and so the # entrypoint files are not available in /opt/nvidia in the base # image, so we must provide them ourselves. - if not enable_gpu: + if not "gpu" in FLAGS.feature: df += """ COPY docker/cpu_only/ /opt/nvidia/ ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] @@ -1541,10 +1615,10 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach return df -def add_cpu_libs_to_linux_dockerfile(backends, target_machine): +def add_cpu_libs_to_linux_dockerfile(target_machine): df = "" libs_arch = "aarch64" if target_machine == "aarch64" else "x86_64" - if "pytorch" in backends: + if "pytorch" in FLAGS.backend: # Add extra dependencies for pytorch backend. # Note: Even though the build is CPU-only, the version of pytorch # we are using depend upon libraries like cuda and cudnn. Since @@ -1587,7 +1661,7 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine): cuda_arch=cuda_arch, libs_arch=libs_arch ) - if "pytorch" in backends: + if "pytorch" in FLAGS.backend: # Add NCCL dependency for pytorch backend. # Note: Even though the build is CPU-only, the version of # pytorch we are using depends upon the NCCL library. @@ -1626,9 +1700,7 @@ def change_default_python_version_rhel(version): return df -def create_dockerfile_windows( - ddir, dockerfile_name, argmap, backends, repoagents, caches -): +def create_dockerfile_windows(ddir, dockerfile_name, argmap): df = """ ARG TRITON_VERSION={} ARG TRITON_CONTAINER_VERSION={} @@ -1675,9 +1747,7 @@ def create_dockerfile_windows( dfile.write(df) -def create_build_dockerfiles( - container_build_dir, images, backends, repoagents, caches, endpoints -): +def create_build_dockerfiles(container_build_dir, images): if "base" in images: base_image = images["base"] if target_platform() == "rhel": @@ -1688,7 +1758,7 @@ def create_build_dockerfiles( base_image = "mcr.microsoft.com/dotnet/framework/sdk:4.8" elif target_platform() == "rhel": raise KeyError("A base image must be specified when targeting RHEL") - elif FLAGS.enable_gpu: + elif "gpu" in FLAGS.feature: base_image = "nvcr.io/nvidia/tritonserver:{}-py3-min".format( FLAGS.upstream_container_version ) @@ -1707,8 +1777,8 @@ def create_build_dockerfiles( # For CPU-only image we need to copy some cuda libraries and dependencies # since we are using PyTorch containers that are not CPU-only. if ( - not FLAGS.enable_gpu - and ("pytorch" in backends) + not "gpu" in FLAGS.feature + and ("pytorch" in FLAGS.backend) and (target_platform() != "windows") ): if "gpu-base" in images: @@ -1733,19 +1803,12 @@ def create_build_dockerfiles( FLAGS.build_dir, "Dockerfile", dockerfileargmap, - backends, - repoagents, - caches, ) else: create_dockerfile_linux( FLAGS.build_dir, "Dockerfile", dockerfileargmap, - backends, - repoagents, - caches, - endpoints, ) # Dockerfile used for the creating the CI base image. @@ -1788,6 +1851,11 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_ "--pull", ] + if FLAGS.no_container_cache: + baseargs += [ + "--no-cache", + ] + # Windows docker runs in a VM and memory needs to be specified # explicitly (at least for some configurations of docker). if target_platform() == "windows": @@ -1931,9 +1999,7 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_ docker_script.cmd(cibaseargs, check_exitcode=True) -def core_build( - cmake_script, repo_dir, cmake_dir, build_dir, install_dir, components, backends -): +def core_build(cmake_script, repo_dir, cmake_dir, build_dir, install_dir): repo_build_dir = os.path.join(build_dir, "tritonserver", "build") repo_install_dir = os.path.join(build_dir, "tritonserver", "install") @@ -1942,9 +2008,7 @@ def core_build( cmake_script.comment() cmake_script.mkdir(repo_build_dir) cmake_script.cwd(repo_build_dir) - cmake_script.cmake( - core_cmake_args(components, backends, cmake_dir, repo_install_dir) - ) + cmake_script.cmake(core_cmake_args(cmake_dir, repo_install_dir)) cmake_script.makeinstall() if target_platform() == "windows": @@ -2058,14 +2122,11 @@ def tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir): def backend_build( be, cmake_script, - tag, build_dir, install_dir, - github_organization, images, - components, - library_paths, ): + tag, github_organization = FLAGS.backend[be]["tag"], FLAGS.backend[be]["org"] repo_build_dir = os.path.join(build_dir, be, "build") repo_install_dir = os.path.join(build_dir, be, "install") @@ -2075,25 +2136,17 @@ def backend_build( cmake_script.comment() cmake_script.mkdir(build_dir) cmake_script.cwd(build_dir) + repository_name = backend_repo(be) if be == "tensorrtllm": - github_organization = ( - "https://github.com/NVIDIA" - if "triton-inference-server" in FLAGS.github_organization - else FLAGS.github_organization - ) repository_name = "TensorRT-LLM" - cmake_script.gitclone(repository_name, tag, be, github_organization) - else: - cmake_script.gitclone(backend_repo(be), tag, be, github_organization) + cmake_script.gitclone(repository_name, tag, be, github_organization) if be == "tensorrtllm": tensorrtllm_prebuild(cmake_script) cmake_script.mkdir(repo_build_dir) cmake_script.cwd(repo_build_dir) - cmake_script.cmake( - backend_cmake_args(images, components, be, repo_install_dir, library_paths) - ) + cmake_script.cmake(backend_cmake_args(images, be, repo_install_dir)) cmake_script.makeinstall() if be == "tensorrtllm": @@ -2128,11 +2181,10 @@ def backend_build( def backend_clone( be, clone_script, - tag, build_dir, install_dir, - github_organization, ): + tag, github_organization = FLAGS.backend[be]["tag"], FLAGS.backend[be]["org"] clone_script.commentln(8) clone_script.comment(f"'{be}' backend") clone_script.comment("Delete this section to remove backend from build") @@ -2162,9 +2214,7 @@ def backend_clone( clone_script.blankln() -def repo_agent_build( - ra, cmake_script, build_dir, install_dir, repoagent_repo, repoagents -): +def repo_agent_build(ra, cmake_script, build_dir, install_dir): repo_build_dir = os.path.join(build_dir, ra, "build") repo_install_dir = os.path.join(build_dir, ra, "install") @@ -2175,12 +2225,12 @@ def repo_agent_build( cmake_script.mkdir(build_dir) cmake_script.cwd(build_dir) cmake_script.gitclone( - repoagent_repo(ra), repoagents[ra], ra, FLAGS.github_organization + repoagent_repo(ra), FLAGS.repoagent[ra]["tag"], ra, FLAGS.repoagent[ra]["org"] ) cmake_script.mkdir(repo_build_dir) cmake_script.cwd(repo_build_dir) - cmake_script.cmake(repoagent_cmake_args(images, components, ra, repo_install_dir)) + cmake_script.cmake(repoagent_cmake_args(images, ra, repo_install_dir)) cmake_script.makeinstall() cmake_script.mkdir(os.path.join(install_dir, "repoagents")) @@ -2195,7 +2245,7 @@ def repo_agent_build( cmake_script.blankln() -def cache_build(cache, cmake_script, build_dir, install_dir, cache_repo, caches): +def cache_build(cache, cmake_script, build_dir, install_dir): repo_build_dir = os.path.join(build_dir, cache, "build") repo_install_dir = os.path.join(build_dir, cache, "install") @@ -2206,12 +2256,12 @@ def cache_build(cache, cmake_script, build_dir, install_dir, cache_repo, caches) cmake_script.mkdir(build_dir) cmake_script.cwd(build_dir) cmake_script.gitclone( - cache_repo(cache), caches[cache], cache, FLAGS.github_organization + cache_repo(cache), FLAGS.cache[cache]["tag"], cache, FLAGS.cache[cache]["org"] ) cmake_script.mkdir(repo_build_dir) cmake_script.cwd(repo_build_dir) - cmake_script.cmake(cache_cmake_args(images, components, cache, repo_install_dir)) + cmake_script.cmake(cache_cmake_args(images, cache, repo_install_dir)) cmake_script.makeinstall() cmake_script.mkdir(os.path.join(install_dir, "caches")) @@ -2226,9 +2276,7 @@ def cache_build(cache, cmake_script, build_dir, install_dir, cache_repo, caches) cmake_script.blankln() -def cibase_build( - cmake_script, repo_dir, cmake_dir, build_dir, install_dir, ci_dir, backends -): +def cibase_build(cmake_script, repo_dir, cmake_dir, build_dir, install_dir, ci_dir): repo_install_dir = os.path.join(build_dir, "tritonserver", "install") cmake_script.commentln(8) @@ -2300,7 +2348,7 @@ def cibase_build( # The onnxruntime_backend build produces some artifacts that # are needed for CI testing. - if "onnxruntime" in backends: + if "onnxruntime" in FLAGS.backend: ort_install_dir = os.path.join(build_dir, "onnxruntime", "install") cmake_script.mkdir(os.path.join(ci_dir, "qa", "L0_custom_ops")) if target_platform() != "igpu": @@ -2320,7 +2368,7 @@ def cibase_build( # rebuilt with specific options. cmake_script.mkdir(os.path.join(ci_dir, "tritonbuild")) for be in ("identity", "python"): - if be in backends: + if be in FLAGS.backend: cmake_script.rmdir(os.path.join(build_dir, be, "build")) cmake_script.rmdir(os.path.join(build_dir, be, "install")) cmake_script.cpdir( @@ -2338,9 +2386,9 @@ def finalize_build(cmake_script, install_dir, ci_dir): cmake_script.cmd(f"chmod -R a+rw {ci_dir}") -def enable_all(): +def enable_all(default): if target_platform() != "windows": - all_backends = [ + FLAGS.backend = [ "ensemble", "identity", "square", @@ -2353,21 +2401,22 @@ def enable_all(): "fil", "tensorrt", ] - all_repoagents = ["checksum"] - all_caches = ["local", "redis"] - all_filesystems = ["gcs", "s3", "azure_storage"] - all_endpoints = ["http", "grpc", "sagemaker", "vertex-ai"] - - FLAGS.enable_logging = True - FLAGS.enable_stats = True - FLAGS.enable_metrics = True - FLAGS.enable_gpu_metrics = True - FLAGS.enable_cpu_metrics = True - FLAGS.enable_tracing = True - FLAGS.enable_nvtx = True - FLAGS.enable_gpu = True + FLAGS.repoagent = ["checksum"] + FLAGS.cache = ["local", "redis"] + FLAGS.filesystem = ["gcs", "s3", "azure_storage"] + FLAGS.endpoint = ["http", "grpc", "sagemaker", "vertex-ai"] + FLAGS.feature = [ + "logging", + "stats", + "metrics", + "gpu_metrics", + "cpu_metrics", + "tracing", + "nvtx", + "gpu", + ] else: - all_backends = [ + FLAGS.backend = [ "ensemble", "identity", "square", @@ -2376,51 +2425,24 @@ def enable_all(): "openvino", "tensorrt", ] - all_repoagents = ["checksum"] - all_caches = ["local", "redis"] - all_filesystems = [] - all_endpoints = ["http", "grpc"] - - FLAGS.enable_logging = True - FLAGS.enable_stats = True - FLAGS.enable_tracing = True - FLAGS.enable_gpu = True - - requested_backends = [] - for be in FLAGS.backend: - parts = be.split(":") - requested_backends += [parts[0]] - for be in all_backends: - if be not in requested_backends: - FLAGS.backend += [be] - - requested_repoagents = [] - for ra in FLAGS.repoagent: - parts = ra.split(":") - requested_repoagents += [parts[0]] - for ra in all_repoagents: - if ra not in requested_repoagents: - FLAGS.repoagent += [ra] - - requested_caches = [] - for cache in FLAGS.cache: - parts = cache.split(":") - requested_caches += [parts[0]] - for cache in all_caches: - if cache not in requested_caches: - FLAGS.cache += [cache] - - for fs in all_filesystems: - if fs not in FLAGS.filesystem: - FLAGS.filesystem += [fs] - - for ep in all_endpoints: - if ep not in FLAGS.endpoint: - FLAGS.endpoint += [ep] + FLAGS.repoagent = ["checksum"] + FLAGS.cache = ["local", "redis"] + FLAGS.filesystem = [] + FLAGS.endpoint = ["http", "grpc"] + FLAGS.feature = ["logging", "stats", "tracing", "gpu"] + + FLAGS.component = ["common", "core", "backend", "thirdparty"] + + # populate default values as independent objects + for element in ELEMENTS: + attr = getattr(FLAGS, element) + setattr(FLAGS, element, {key: default(element) for key in attr}) if __name__ == "__main__": - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) group_qv = parser.add_mutually_exclusive_group() group_qv.add_argument( @@ -2468,6 +2490,12 @@ def enable_all(): required=False, help="Do not use Docker --pull argument when building container.", ) + parser.add_argument( + "--no-container-cache", + action="store_true", + required=False, + help="Use Docker --no-cache argument when building container.", + ) parser.add_argument( "--container-memory", default=None, @@ -2478,7 +2506,8 @@ def enable_all(): "--target-platform", required=False, default=None, - help='Target platform for build, can be "linux", "rhel", "windows" or "igpu". If not specified, build targets the current platform.', + choices=["linux", "rhel", "windows", "igpu"], + help="Target platform for build. If not specified, build targets the current platform.", ) parser.add_argument( "--target-machine", @@ -2520,20 +2549,14 @@ def enable_all(): type=str, required=False, default="/tmp", - help="Temporary directory used for building inside docker. Default is /tmp.", - ) - parser.add_argument( - "--library-paths", - action="append", - required=False, - default=None, - help="Specify library paths for respective backends in build as [:].", + help="Temporary directory used for building inside docker.", ) parser.add_argument( "--build-type", required=False, default="Release", - help='Build type, one of "Release", "Debug", "RelWithDebInfo" or "MinSizeRel". Default is "Release".', + choices=["Release", "Debug", "RelWithDebInfo", "MinSizeRel"], + help="Build type.", ) parser.add_argument( "-j", @@ -2549,7 +2572,7 @@ def enable_all(): type=str, required=False, default="https://github.com/triton-inference-server", - help='The GitHub organization containing the repos used for the build. Defaults to "https://github.com/triton-inference-server".', + help="The GitHub organization containing the repos used for the build.", ) parser.add_argument( "--version", @@ -2578,58 +2601,116 @@ def enable_all(): parser.add_argument( "--image", action="append", + metavar=("", ""), + nargs=2, required=False, - help='Use specified Docker image in build as ,. can be "base", "gpu-base", or "pytorch".', - ) - - parser.add_argument( - "--enable-all", - action="store_true", - required=False, - help="Enable all standard released Triton features, backends, repository agents, caches, endpoints and file systems.", - ) - parser.add_argument( - "--enable-logging", action="store_true", required=False, help="Enable logging." - ) - parser.add_argument( - "--enable-stats", - action="store_true", - required=False, - help="Enable statistics collection.", - ) - parser.add_argument( - "--enable-metrics", - action="store_true", - required=False, - help="Enable metrics reporting.", - ) - parser.add_argument( - "--enable-gpu-metrics", - action="store_true", - required=False, - help="Include GPU metrics in reported metrics.", + default=[], + help='Use specified Docker image in build. can be "base", "gpu-base", or "pytorch".', ) parser.add_argument( - "--enable-cpu-metrics", + "--use-buildbase", + default=False, action="store_true", - required=False, - help="Include CPU metrics in reported metrics.", - ) - parser.add_argument( - "--enable-tracing", action="store_true", required=False, help="Enable tracing." - ) - parser.add_argument( - "--enable-nvtx", action="store_true", required=False, help="Enable NVTX." - ) - parser.add_argument( - "--enable-gpu", action="store_true", required=False, help="Enable GPU support." + help='Use local temporary "buildbase" Docker image as "base" image to build backends', ) + parser.add_argument( - "--enable-mali-gpu", + "--enable-all", action="store_true", required=False, - help="Enable ARM MALI GPU support.", + help="Enable all standard released Triton features, backends, repository agents, caches, endpoints, and file systems.", ) + + def add_cmake_args(element, parser): + dependent_kwargs = dict( + nargs=3, + metavar=(f"<{element}>", "", ""), + ) + if element == "core": + # "core" is a special case: it is already a specific component, so no need to select a specific instance of its element type + dependent_kwargs = dict( + nargs=2, + metavar=("", ""), + ) + parser.add_argument( + f"--extra-{element}-cmake-arg", + action="append", + required=False, + default=[], + help=f"Extra CMake argument for {element} build. The argument is passed to CMake as -D= and is included after all CMake arguments added by build.py.", + **dependent_kwargs, + ) + parser.add_argument( + f"--override-{element}-cmake-arg", + action="append", + required=False, + default=[], + help=f"Override specified backend CMake argument in the {element} build. The argument is passed to CMake as -D=. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the {element} build use --extra-{element}-cmake-arg.", + **dependent_kwargs, + ) + + element_groups = {} + for element, properties in ELEMENTS.items(): + if "strict" in properties: + kwargs = {"choices": ELEMENTS_LISTS[element]} + help_list = ", ".join(ELEMENTS_LISTS[element]) + else: + kwargs = {} + help_list = ", ".join(ELEMENTS_LISTS[element] + ["..."]) + + group = parser.add_argument_group( + element, + f"Options to configure {element}s, including: {help_list}", + ) + element_groups[element] = group + + if not "required" in properties: + group.add_argument( + f"--enable-{element}", + metavar=f"<{element}> [<{element}> ...]", + nargs="*", + action="extend", + required=False, + default=[], + help=f"Enable requested {element}(s)", + **kwargs, + ) + group.add_argument( + f"--disable-{element}", + metavar=f"<{element}> [<{element}> ...]", + nargs="*", + action="extend", + required=False, + default=[], + help=f"Disable requested {element}(s) (remove from --enable-all standard list)", + **kwargs, + ) + if "tag" in properties: + group.add_argument( + f"--{element}-tag", + action="append", + metavar=(f"<{element}>", ""), + nargs=2, + required=False, + default=[], + help=f'Select for specified <{element}>. If starts with "pull/" then it refers to a pull-request reference, otherwise indicates the git tag/branch to use for the build. If the version is non-development then the default is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default is "main" (e.g. version YY.MMdev -> branch main).', + ) + if "org" in properties: + group.add_argument( + f"--{element}-org", + action="append", + metavar=(f"<{element}>", ""), + nargs=2, + required=False, + default=[], + help=f"Select for specified <{element}>, to use the fork of the corresponding repository from instead of the default --github-organization value.", + ) + if "cmake" in properties: + add_cmake_args(element, group) + + # special case + add_cmake_args("core", element_groups["component"]) + parser.add_argument( "--min-compute-capability", type=str, @@ -2638,78 +2719,18 @@ def enable_all(): help="Minimum CUDA compute capability supported by server.", ) - parser.add_argument( - "--endpoint", - action="append", - required=False, - help='Include specified endpoint in build. Allowed values are "grpc", "http", "vertex-ai" and "sagemaker".', - ) - parser.add_argument( - "--filesystem", - action="append", - required=False, - help='Include specified filesystem in build. Allowed values are "gcs", "azure_storage" and "s3".', - ) parser.add_argument( "--no-core-build", action="store_true", required=False, help="Do not build Triton core shared library or executable.", ) - parser.add_argument( - "--backend", - action="append", - required=False, - help='Include specified backend in build as [:]. If starts with "pull/" then it refers to a pull-request reference, otherwise indicates the git tag/branch to use for the build. If the version is non-development then the default is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default is "main" (e.g. version YY.MMdev -> branch main).', - ) - parser.add_argument( - "--repo-tag", - action="append", - required=False, - help='The version of a component to use in the build as :. can be "common", "core", "backend" or "thirdparty". indicates the git tag/branch to use for the build. Currently does not support pull-request reference. If the version is non-development then the default is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default is "main" (e.g. version YY.MMdev -> branch main).', - ) - parser.add_argument( - "--repoagent", - action="append", - required=False, - help='Include specified repo agent in build as [:]. If starts with "pull/" then it refers to a pull-request reference, otherwise indicates the git tag/branch to use for the build. If the version is non-development then the default is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default is "main" (e.g. version YY.MMdev -> branch main).', - ) - parser.add_argument( - "--cache", - action="append", - required=False, - help='Include specified cache in build as [:]. If starts with "pull/" then it refers to a pull-request reference, otherwise indicates the git tag/branch to use for the build. If the version is non-development then the default is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default is "main" (e.g. version YY.MMdev -> branch main).', - ) parser.add_argument( "--no-force-clone", action="store_true", default=False, help="Do not create fresh clones of repos that have already been cloned.", ) - parser.add_argument( - "--extra-core-cmake-arg", - action="append", - required=False, - help="Extra CMake argument as =. The argument is passed to CMake as -D= and is included after all CMake arguments added by build.py for the core builds.", - ) - parser.add_argument( - "--override-core-cmake-arg", - action="append", - required=False, - help="Override specified CMake argument in the build as =. The argument is passed to CMake as -D=. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the core build use --extra-core-cmake-arg.", - ) - parser.add_argument( - "--extra-backend-cmake-arg", - action="append", - required=False, - help="Extra CMake argument for a backend build as :=. The argument is passed to CMake as -D= and is included after all CMake arguments added by build.py for the backend.", - ) - parser.add_argument( - "--override-backend-cmake-arg", - action="append", - required=False, - help="Override specified backend CMake argument in the build as :=. The argument is passed to CMake as -D=. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the backend build use --extra-backend-cmake-arg.", - ) parser.add_argument( "--release-version", required=False, @@ -2728,6 +2749,12 @@ def enable_all(): default=DEFAULT_TRITON_VERSION_MAP["upstream_container_version"], help="This flag sets the upstream container version for Triton Inference Server to be built. Default: the latest released version.", ) + parser.add_argument( + "--default-repo-tag", + required=False, + default=None, + help="Override the calculated default-repo-tag value", + ) parser.add_argument( "--ort-version", required=False, @@ -2770,6 +2797,7 @@ def enable_all(): required=False, nargs=2, metavar=("key", "value"), + default=[], help="Add build secrets in the form of . These secrets are used during the build process for vllm. The secrets are passed to the Docker build step as `--secret id=`. The following keys are expected and their purposes are described below:\n\n" " - 'req': A file containing a list of dependencies for pip (e.g., requirements.txt).\n" " - 'build_public_vllm': A flag (default is 'true') indicating whether to build the public VLLM version.\n\n" @@ -2777,38 +2805,6 @@ def enable_all(): ) FLAGS = parser.parse_args() - if FLAGS.image is None: - FLAGS.image = [] - if FLAGS.repo_tag is None: - FLAGS.repo_tag = [] - if FLAGS.backend is None: - FLAGS.backend = [] - if FLAGS.endpoint is None: - FLAGS.endpoint = [] - if FLAGS.filesystem is None: - FLAGS.filesystem = [] - if FLAGS.repoagent is None: - FLAGS.repoagent = [] - if FLAGS.cache is None: - FLAGS.cache = [] - if FLAGS.library_paths is None: - FLAGS.library_paths = [] - if FLAGS.extra_core_cmake_arg is None: - FLAGS.extra_core_cmake_arg = [] - if FLAGS.override_core_cmake_arg is None: - FLAGS.override_core_cmake_arg = [] - if FLAGS.override_backend_cmake_arg is None: - FLAGS.override_backend_cmake_arg = [] - if FLAGS.extra_backend_cmake_arg is None: - FLAGS.extra_backend_cmake_arg = [] - if FLAGS.build_secret is None: - FLAGS.build_secret = [] - - # if --enable-all is specified, then update FLAGS to enable all - # settings, backends, repo-agents, caches, file systems, endpoints, etc. - if FLAGS.enable_all: - enable_all() - # When doing a docker build, --build-dir, --install-dir and # --cmake-dir must not be set. We will use the build/ subdir # within the server/ repo that contains this build.py script for @@ -2856,6 +2852,8 @@ def enable_all(): cver = FLAGS.triton_container_version if not cver.endswith("dev"): default_repo_tag = "r" + cver + if FLAGS.default_repo_tag: + default_repo_tag = FLAGS.default_repo_tag log("default repo-tag: {}".format(default_repo_tag)) # For other versions use the TRITON_VERSION_MAP unless explicitly @@ -2867,153 +2865,169 @@ def enable_all(): log("container version {}".format(FLAGS.container_version)) log("upstream container version {}".format(FLAGS.upstream_container_version)) - for ep in FLAGS.endpoint: - log(f'endpoint "{ep}"') - for fs in FLAGS.filesystem: - log(f'filesystem "{fs}"') - - # Initialize map of backends to build and repo-tag for each. - backends = {} - for be in FLAGS.backend: - parts = be.split(":") - if len(parts) == 1: - parts.append(default_repo_tag) - log('backend "{}" at tag/branch "{}"'.format(parts[0], parts[1])) - backends[parts[0]] = parts[1] - - if "vllm" in backends: - if "python" not in backends: - log( - "vLLM backend requires Python backend, adding Python backend with tag {}".format( - backends["vllm"] - ) - ) - backends["python"] = backends["vllm"] - secrets = dict(getattr(FLAGS, "build_secret", [])) if secrets: requirements = secrets.get("req", "") build_public_vllm = secrets.get("build_public_vllm", "true") log('Build Arg for BUILD_PUBLIC_VLLM: "{}"'.format(build_public_vllm)) - # Initialize map of repo agents to build and repo-tag for each. - repoagents = {} - for be in FLAGS.repoagent: - parts = be.split(":") - if len(parts) == 1: - parts.append(default_repo_tag) - log('repoagent "{}" at tag/branch "{}"'.format(parts[0], parts[1])) - repoagents[parts[0]] = parts[1] - - # Initialize map of caches to build and repo-tag for each. - caches = {} - for be in FLAGS.cache: - parts = be.split(":") - if len(parts) == 1: - parts.append(default_repo_tag) - log('cache "{}" at tag/branch "{}"'.format(parts[0], parts[1])) - caches[parts[0]] = parts[1] + # initialize element maps + def default_element_dict(element): + default = {} + if "tag" in ELEMENTS[element]: + default["tag"] = default_repo_tag + if "org" in ELEMENTS[element]: + default["org"] = FLAGS.github_organization + if "cmake" in ELEMENTS[element]: + default["cmake_extra"] = {} + default["cmake_override"] = {} + return default + + for element, properties in ELEMENTS.items(): + setattr(FLAGS, f"{element}", {}) + attr = getattr(FLAGS, f"{element}") + # populate the required ones + if "required" in properties: + for value in ELEMENTS_LISTS[element]: + attr[value] = default_element_dict(element) - # Initialize map of docker images. - images = {} - for img in FLAGS.image: - parts = img.split(",") - fail_if( - len(parts) != 2, "--image must specify ," - ) - fail_if( - parts[0] not in ["base", "gpu-base", "pytorch"], - "unsupported value for --image", - ) - log('image "{}": "{}"'.format(parts[0], parts[1])) - images[parts[0]] = parts[1] + # if --enable-all is specified, then update FLAGS to enable all + # settings, backends, repo-agents, caches, file systems, endpoints, etc. + if FLAGS.enable_all: + enable_all(default_element_dict) - # Initialize map of library paths for each backend. - library_paths = {} - for lpath in FLAGS.library_paths: - parts = lpath.split(":") - if len(parts) == 2: - log('backend "{}" library path "{}"'.format(parts[0], parts[1])) - library_paths[parts[0]] = parts[1] + # Process per-element information + def do_enable(element, map, key): + if key not in map: + map[key] = default_element_dict(element) - # Parse any explicitly specified cmake arguments - for cf in FLAGS.extra_core_cmake_arg: - parts = cf.split("=") - fail_if(len(parts) != 2, "--extra-core-cmake-arg must specify =") - log('CMake core extra "-D{}={}"'.format(parts[0], parts[1])) - EXTRA_CORE_CMAKE_FLAGS[parts[0]] = parts[1] - - for cf in FLAGS.override_core_cmake_arg: - parts = cf.split("=") - fail_if( - len(parts) != 2, "--override-core-cmake-arg must specify =" - ) - log('CMake core override "-D{}={}"'.format(parts[0], parts[1])) - OVERRIDE_CORE_CMAKE_FLAGS[parts[0]] = parts[1] + def do_disable(element, map, key): + if key in map: + map.pop(key) - for cf in FLAGS.extra_backend_cmake_arg: - parts = cf.split(":", 1) - fail_if( - len(parts) != 2, - "--extra-backend-cmake-arg must specify :=", - ) - be = parts[0] - parts = parts[1].split("=", 1) - fail_if( - len(parts) != 2, - "--extra-backend-cmake-arg must specify :=", - ) - fail_if( - be not in backends, - '--extra-backend-cmake-arg specifies backend "{}" which is not included in build'.format( - be - ), - ) - log('backend "{}" CMake extra "-D{}={}"'.format(be, parts[0], parts[1])) - if be not in EXTRA_BACKEND_CMAKE_FLAGS: - EXTRA_BACKEND_CMAKE_FLAGS[be] = {} - EXTRA_BACKEND_CMAKE_FLAGS[be][parts[0]] = parts[1] + def do_tag(element, map, key, value): + map[key]["tag"] = value - for cf in FLAGS.override_backend_cmake_arg: - parts = cf.split(":", 1) - fail_if( - len(parts) != 2, - "--override-backend-cmake-arg must specify :=", - ) - be = parts[0] - parts = parts[1].split("=", 1) - fail_if( - len(parts) != 2, - "--override-backend-cmake-arg must specify :=", - ) - fail_if( - be not in backends, - '--override-backend-cmake-arg specifies backend "{}" which is not included in build'.format( - be - ), - ) - log('backend "{}" CMake override "-D{}={}"'.format(be, parts[0], parts[1])) - if be not in OVERRIDE_BACKEND_CMAKE_FLAGS: - OVERRIDE_BACKEND_CMAKE_FLAGS[be] = {} - OVERRIDE_BACKEND_CMAKE_FLAGS[be][parts[0]] = parts[1] - - # Initialize map of common components and repo-tag for each. - components = { - "common": default_repo_tag, - "core": default_repo_tag, - "backend": default_repo_tag, - "thirdparty": default_repo_tag, + def do_org(element, map, key, value): + map[key]["org"] = value + + def do_cmake_extra(element, map, key, name, value): + map[key]["cmake_extra"][name] = value + + def do_cmake_override(element, map, key, name, value): + map[key]["cmake_override"][name] = value + + attr_fns = { + "enable": do_enable, + "disable": do_disable, + "tag": do_tag, + "org": do_org, + "extracmakearg": do_cmake_extra, + "overridecmakearg": do_cmake_override, } - for be in FLAGS.repo_tag: - parts = be.split(":") - fail_if(len(parts) != 2, "--repo-tag must specify :") + for element in ELEMENTS: + map = getattr(FLAGS, element) + attr_names = [ + f"enable_{element}", + f"disable_{element}", + f"{element}_tag", + f"{element}_org", + f"extra_{element}_cmake_arg", + f"override_{element}_cmake_arg", + ] + for attr_name in attr_names: + attr = getattr(FLAGS, attr_name, None) + if not attr: + continue + attr_fn = attr_fns[attr_name.replace(element, "").replace("_", "")] + for item in attr: + if not isinstance(item, list): + item = [item] + attr_fn(element, map, *item) + + # Handle special cases + if "vllm" in FLAGS.backend: + if "python" not in FLAGS.backend: + log( + "vLLM backend requires Python backend, adding Python backend with tag {}, org {}".format( + FLAGS.backend["vllm"]["tag"], FLAGS.backend["vllm"]["org"] + ) + ) + FLAGS.backend["python"] = FLAGS.backend["vllm"] + + # If armnn_tflite backend, source from external repo for git clone + if "armnn_tflite" in FLAGS.backend: + if FLAGS.backend["armnn_tflite"]["org"] == FLAGS.github_organization: + FLAGS.backend["armnn_tflite"][ + "org" + ] = "https://gitlab.com/arm-research/smarter/" + + if "tensorrtllm" in FLAGS.backend: + if FLAGS.backend["tensorrtllm"]["org"] == FLAGS.github_organization: + FLAGS.backend["tensorrtllm"]["org"] = "https://github.com/NVIDIA" + + # Print final element info + def format_cmake_args(args): + return ", ".join(['"-D{}={}"'.format(n, v) for n, v in args.items()]) + + for element in ELEMENTS: + map = getattr(FLAGS, element) + for key, info in map.items(): + log( + " ".join( + filter( + None, + [ + f'{element} "{key}"', + 'at tag/branch "{}"'.format(info["tag"]) + if "tag" in info + else "", + 'from org "{}"'.format(info["org"]) + if "org" in info + else "", + ], + ) + ) + ) + if any(["cmake" in prop for prop in info]): + cmake_extra_str = ( + format_cmake_args(info["cmake_extra"]) + if "cmake_extra" in info + else "" + ) + if cmake_extra_str: + log(" CMake extra: " + cmake_extra_str) + cmake_override_str = ( + format_cmake_args(info["cmake_override"]) + if "cmake_override" in info + else "" + ) + if cmake_override_str: + log(" CMake override: " + cmake_override_str) + + # Parse any explicitly specified cmake arguments + for key, val in FLAGS.extra_core_cmake_arg: + log('core: CMake extra "-D{}={}"'.format(key, val)) + EXTRA_CORE_CMAKE_FLAGS[key] = val + + for key, val in FLAGS.override_core_cmake_arg: + log('core: CMake override "-D{}={}"'.format(key, val)) + OVERRIDE_CORE_CMAKE_FLAGS[key] = val + + # Initialize map of docker images. + images = {} + for key, val in FLAGS.image: fail_if( - parts[0] not in components, - '--repo-tag must be "common", "core", "backend", or "thirdparty"', + key not in ["base", "gpu-base", "pytorch"], + "unsupported value for --image", ) - components[parts[0]] = parts[1] - for c in components: - log('component "{}" at tag/branch "{}"'.format(c, components[c])) + log('image "{}": "{}"'.format(key, val)) + images[key] = val + if FLAGS.use_buildbase: + images["buildbase"] = "tritonserver_buildbase" + else: + if "base" in images: + images["buildbase"] = images["base"] # Set the build, install, and cmake directories to use for the # generated build scripts and Dockerfiles. If building without @@ -3062,64 +3076,46 @@ def enable_all(): script_cmake_dir, script_build_dir, script_install_dir, - components, - backends, ) # Commands to build each backend... - for be in backends: + for be in FLAGS.backend: # Core backends are not built separately from core so skip... if be in CORE_BACKENDS: continue - # If armnn_tflite backend, source from external repo for git clone - if be == "armnn_tflite": - github_organization = "https://gitlab.com/arm-research/smarter/" - else: - github_organization = FLAGS.github_organization - if be == "vllm": backend_clone( be, cmake_script, - backends[be], script_build_dir, script_install_dir, - github_organization, ) else: backend_build( be, cmake_script, - backends[be], script_build_dir, script_install_dir, - github_organization, images, - components, - library_paths, ) # Commands to build each repo agent... - for ra in repoagents: + for ra in FLAGS.repoagent: repo_agent_build( ra, cmake_script, script_build_dir, script_install_dir, - repoagent_repo, - repoagents, ) # Commands to build each cache... - for cache in caches: + for cache in FLAGS.cache: cache_build( cache, cmake_script, script_build_dir, script_install_dir, - cache_repo, - caches, ) # Commands needed only when building with Docker... @@ -3133,7 +3129,6 @@ def enable_all(): script_build_dir, script_install_dir, script_ci_dir, - backends, ) # When building with Docker the install and ci artifacts @@ -3153,9 +3148,7 @@ def enable_all(): if target_platform() == "windows": script_name += ".ps1" - create_build_dockerfiles( - script_build_dir, images, backends, repoagents, caches, FLAGS.endpoint - ) + create_build_dockerfiles(script_build_dir, images) create_docker_build_script(script_name, script_install_dir, script_ci_dir) # In not dry-run, execute the script to perform the build... If a diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md index 9724b25971..0c709affdc 100644 --- a/docs/customization_guide/build.md +++ b/docs/customization_guide/build.md @@ -96,7 +96,7 @@ building with Docker. * In the *build* subdirectory of the server repo, generate the docker_build script, the cmake_build script and the Dockerfiles - needed to build Triton. If you use the --dryrun flag, build.py will + needed to build Triton. If you use the `--dryrun` flag, build.py will stop here so that you can examine these files. * Run the docker_build script to perform the Docker-based build. The @@ -105,7 +105,7 @@ building with Docker. * Build the *tritonserver_buildbase* Docker image that collects all the build dependencies needed to build Triton. The *tritonserver_buildbase* image is based on a minimal/base - image. When building with GPU support (--enable-gpu), the *min* + image. When building with GPU support (`--enable-feature gpu`), the *min* image is the [\-py3-min](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver) image pulled from [NGC](https://ngc.nvidia.com) that contains the @@ -113,6 +113,8 @@ building with Docker. build Triton. When building without GPU support, the *min* image is the standard ubuntu:22.04 image. + * The flag `--use-buildbase` can be specified to automate the use of the *tritonserver_buildbase* image to build backends that require a base image. + * Run the cmake_build script within the *tritonserver_buildbase* image to actually build Triton. The cmake_build script performs the following steps. @@ -149,17 +151,18 @@ building with Docker. By default, build.py does not enable any of Triton's optional features but you can enable all features, backends, and repository agents with -the --enable-all flag. The -v flag turns on verbose output. +the `--enable-all` flag. The `-v` flag turns on verbose output. ```bash $ ./build.py -v --enable-all ``` -If you want to enable only certain Triton features, backends and -repository agents, do not specify --enable-all. Instead you must -specify the individual flags as documented by --help. +If you want to enable only certain Triton features, backends, and +repository agents, there are two options: +a. do not specify `--enable-all`, and instead specify the individual flags as documented by `--help`. +b. specify `--enable-all` and then disable selected features that you wish to omit using the `--disable-...` arguments, also documented by `--help`. -#### Building With Specific GitHub Branches +#### Building With Specific GitHub Branches and Organization As described above, the build is performed in the server repo, but source from several other repos is fetched during the build @@ -168,7 +171,7 @@ other repos, but if you want to control which branch is used in these other repos you can as shown in the following example. ```bash -$ ./build.py ... --repo-tag=common: --repo-tag=core: --repo-tag=backend: --repo-tag=thirdparty: ... --backend=tensorrt: ... --repoagent=checksum: ... +$ ./build.py ... --component-tag common --component-tag core --component-tag backend --component-tag thirdparty ... --backend-tag tensorrt ... --repoagent-tag checksum ... ``` If you are building on a release branch then `` will @@ -182,13 +185,18 @@ instead use the corresponding branch/tag in the build. For example, if you have a branch called "mybranch" in the [onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend) repo that you want to use in the build, you would specify ---backend=onnxruntime:mybranch. +`--backend-tag onnxruntime mybranch`. + +If you want to build a backend from an alternative organization or user ``, you can include a similar argument: +```bash +$ ./build.py ... --backend-org onnxruntime https://github.com/ +``` #### CPU-Only Build If you want to build without GPU support you must specify individual -feature flags and not include the `--enable-gpu` and -`--enable-gpu-metrics` flags. Only the following backends are +feature flags and not include the `--enable-feature gpu` and +`--enable-feature gpu-metrics` flags. Only the following backends are available for a non-GPU / CPU-only build: `identity`, `repeat`, `ensemble`, `square`, `pytorch`, `onnxruntime`, `openvino`, `python` and `fil`. @@ -196,7 +204,7 @@ available for a non-GPU / CPU-only build: `identity`, `repeat`, `ensemble`, CPU-only builds of the PyTorch backends require some CUDA stubs and runtime dependencies that are not present in the CPU-only base container. These are retrieved from a GPU base container, which can be changed with the -`--image=gpu-base,nvcr.io/nvidia/tritonserver:-py3-min` flag. +`--image gpu-base nvcr.io/nvidia/tritonserver:-py3-min` flag. ### Building Without Docker @@ -209,7 +217,7 @@ repo branch for the release you are interested in building (or the *main* branch to build from the development branch). To determine what dependencies are required by the build, run build.py -with the --dryrun flag, and then looking in the build subdirectory at +with the `--dryrun` flag, and then looking in the build subdirectory at Dockerfile.buildbase. ```bash @@ -217,8 +225,8 @@ $ ./build.py -v --enable-all ``` From Dockerfile.buildbase you can see what dependencies you need to -install on your host system. Note that when building with --enable-gpu -(or --enable-all), Dockerfile.buildbase depends on the +install on your host system. Note that when building with `--enable-feature gpu` +(or `--enable-all`), Dockerfile.buildbase depends on the [\-py3-min](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver) image pulled from [NGC](https://ngc.nvidia.com). Unfortunately, a Dockerfile is not currently available for the @@ -228,7 +236,7 @@ cuDNN](#cuda-cublas-cudnn) and [TensorRT](#tensorrt) dependencies as described below. Once you have installed these dependencies on your build system you -can then use build.py with the --no-container-build flag to build +can then use build.py with the `--no-container-build` flag to build Triton. ```bash @@ -271,8 +279,8 @@ difference is that the minimal/base image used as the base of Dockerfile.buildbase image can be built from the provided [Dockerfile.win10.min](https://github.com/triton-inference-server/server/blob/main/Dockerfile.win10.min) file as described in [Windows 10 "Min" Image](#windows-10-min-image). When running build.py -use the --image flag to specify the tag that you assigned to this -image. For example, --image=base,win10-py3-min. +use the `--image` flag to specify the tag that you assigned to this +image. For example, `--image base win10-py3-min`. ### Windows and Docker @@ -320,7 +328,7 @@ and so you must enable them explicitly. The following build.py invocation builds all features and backends available on windows. ```bash -python build.py --cmake-dir=/build --build-dir=/tmp/citritonbuild --no-container-pull --image=base,win10-py3-min --enable-logging --enable-stats --enable-tracing --enable-gpu --endpoint=grpc --endpoint=http --repo-tag=common: --repo-tag=core: --repo-tag=backend: --repo-tag=thirdparty: --backend=ensemble --backend=tensorrt: --backend=onnxruntime: --backend=openvino: --backend=python: +python build.py --cmake-dir /build --build-dir /tmp/citritonbuild --no-container-pull --image base win10-py3-min --enable-feature logging stats tracing gpu --enable-endpoint grpc http --component-tag common --component-tag core --component-tag backend --component-tag thirdparty --enable-backend ensemble tensorrt onnxruntime openvino python --backend-tag tensorrt --backend-tag onnxruntime --backend-tag openvino --backend-tag python ``` If you are building on *main* branch then `` will @@ -334,7 +342,12 @@ branch/tag in the build. For example, if you have a branch called "mybranch" in the [onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend) repo that you want to use in the build, you would specify ---backend=onnxruntime:mybranch. +`--backend-tag onnxruntime mybranch`. + +If you want to build a backend from an alternative organization or user ``, you can include a similar argument: +```bash +python build.py ... --backend-org onnxruntime https://github.com/ +``` ### Extract Build Artifacts @@ -394,7 +407,7 @@ and cmake_build or the equivalent commands to perform a build. depends on that package. For example, Triton supports the S3 filesystem by building the aws-sdk-cpp package. If aws-sdk-cpp doesn't build for your platform then you can remove the need for - that package by not specifying --filesystem=s3 when you run + that package by not specifying `--enable-filesystem s3` when you run build.py. In general, you should start by running build.py with the minimal required feature set. @@ -496,7 +509,7 @@ re-running `make` (or `msbuild.exe`). ### Building with Debug Symbols -To build with Debug symbols, use the --build-type=Debug argument while +To build with Debug symbols, use the `--build-type Debug` argument while launching build.py. If building directly with CMake use --DCMAKE_BUILD_TYPE=Debug. You can then launch the built server with +`-DCMAKE_BUILD_TYPE=Debug`. You can then launch the built server with gdb and see the debug symbols/information in the gdb trace. diff --git a/docs/user_guide/debugging_guide.md b/docs/user_guide/debugging_guide.md index e5b0263d30..69b7b16ea4 100644 --- a/docs/user_guide/debugging_guide.md +++ b/docs/user_guide/debugging_guide.md @@ -129,7 +129,7 @@ The easiest step to start with is running perf_analyzer to get a breakdown of th The next step would be to use a performance profiler. One profiler we recommend is [Nsight Systems](https://developer.nvidia.com/nsight-systems) (nsys), optionally including NVIDIA Tools Extension (NVTX) markers to profile Triton. -The Triton server container already has nsys installed. However, Triton does not build with the NVTX markers by default. If you want to use NVTX markers, you should build Triton with build.py, using the “--enable-nvtx” flag. This will provide details around some phases of processing a request, such as queueing, running inference, and handling outputs. +The Triton server container already has nsys installed. However, Triton does not build with the NVTX markers by default. If you want to use NVTX markers, you should build Triton with build.py, using the “--enable-feature nvtx” flag. This will provide details around some phases of processing a request, such as queueing, running inference, and handling outputs. You can profile Triton by running `nsys profile tritonserver --model-repository …`. The [nsys documentation](https://docs.nvidia.com/nsight-systems/UserGuide/index.html) provides more options and details for getting a thorough overview of what is going on.