From 8d872cf315924e9bce556d922ff0744da2172f8b Mon Sep 17 00:00:00 2001 From: QI JUN <22017000+QiJune@users.noreply.github.com> Date: Thu, 12 Jun 2025 11:51:49 +0800 Subject: [PATCH 1/8] switch pybind11 to nanobind Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> --- .gitmodules | 3 + 3rdparty/nanobind | 1 + cpp/CMakeLists.txt | 4 +- cpp/tensorrt_llm/pybind/CMakeLists.txt | 7 +- .../pybind/batch_manager/bindings.cpp | 13 ++- .../pybind/batch_manager/bindings.h | 4 +- cpp/tensorrt_llm/pybind/bindings.cpp | 12 +-- cpp/tensorrt_llm/pybind/executor/bindings.cpp | 15 ++- cpp/tensorrt_llm/pybind/executor/bindings.h | 4 +- cpp/tensorrt_llm/pybind/runtime/bindings.cpp | 99 ++++++++++--------- cpp/tensorrt_llm/pybind/runtime/bindings.h | 4 +- .../pybind/userbuffers/bindings.cpp | 4 +- .../pybind/userbuffers/bindings.h | 4 +- 13 files changed, 88 insertions(+), 86 deletions(-) create mode 160000 3rdparty/nanobind diff --git a/.gitmodules b/.gitmodules index 555349aa253..31970ad4054 100644 --- a/.gitmodules +++ b/.gitmodules @@ -20,3 +20,6 @@ [submodule "3rdparty/xgrammar"] path = 3rdparty/xgrammar url = https://github.com/mlc-ai/xgrammar.git +[submodule "3rdparty/nanobind"] + path = 3rdparty/nanobind + url = https://github.com/wjakob/nanobind diff --git a/3rdparty/nanobind b/3rdparty/nanobind new file mode 160000 index 00000000000..3d577d099a0 --- /dev/null +++ b/3rdparty/nanobind @@ -0,0 +1 @@ +Subproject commit 3d577d099a05f71a7860d8c6d80d2dd1fb92d9e1 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d82bebb73c4..cdde865a564 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -170,6 +170,7 @@ get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH) set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty) add_subdirectory(${3RDPARTY_DIR}/pybind11 ${CMAKE_CURRENT_BINARY_DIR}/pybind11) +add_subdirectory(${3RDPARTY_DIR}/nanobind) # include as system to suppress warnings include_directories( @@ -181,7 +182,8 @@ include_directories( ${3RDPARTY_DIR}/cutlass/tools/util/include ${3RDPARTY_DIR}/NVTX/include ${3RDPARTY_DIR}/json/include - ${3RDPARTY_DIR}/pybind11/include) + ${3RDPARTY_DIR}/pybind11/include + ${3RDPARTY_DIR}/nanobind/include) if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11") add_definitions("-DENABLE_BF16") diff --git a/cpp/tensorrt_llm/pybind/CMakeLists.txt b/cpp/tensorrt_llm/pybind/CMakeLists.txt index f02599e6089..834d7235743 100755 --- a/cpp/tensorrt_llm/pybind/CMakeLists.txt +++ b/cpp/tensorrt_llm/pybind/CMakeLists.txt @@ -23,7 +23,7 @@ set(SRCS include_directories(${PROJECT_SOURCE_DIR}/include) -pybind11_add_module(${TRTLLM_PYBIND_MODULE} ${SRCS}) +nanobind_add_module(${TRTLLM_PYBIND_MODULE} ${SRCS}) set_property(TARGET ${TRTLLM_PYBIND_MODULE} PROPERTY POSITION_INDEPENDENT_CODE ON) @@ -34,9 +34,8 @@ target_link_libraries( ${TRTLLM_PYBIND_MODULE} PUBLIC ${SHARED_TARGET} ${UNDEFINED_FLAG} ${NO_AS_NEEDED_FLAG} ${Python3_LIBRARIES} ${TORCH_LIBRARIES} torch_python) -target_compile_definitions( - ${TRTLLM_PYBIND_MODULE} PUBLIC TRTLLM_PYBIND_MODULE=${TRTLLM_PYBIND_MODULE} - PYBIND11_DETAILED_ERROR_MESSAGES=1) +target_compile_definitions(${TRTLLM_PYBIND_MODULE} + PUBLIC TRTLLM_PYBIND_MODULE=${TRTLLM_PYBIND_MODULE}) if(NOT WIN32) set_target_properties( diff --git a/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp b/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp index 4274bbe62dc..f3a90fa68e5 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp @@ -30,14 +30,13 @@ #include "tensorrt_llm/runtime/torchView.h" #include -#include -#include -#include -#include -#include + +#include +#include +#include #include -namespace py = pybind11; +namespace py = nanobind; namespace tb = tensorrt_llm::batch_manager; namespace tle = tensorrt_llm::executor; namespace tr = tensorrt_llm::runtime; @@ -47,7 +46,7 @@ using namespace tensorrt_llm::runtime; namespace tensorrt_llm::pybind::batch_manager { -void initBindings(pybind11::module_& m) +void initBindings(py::module_& m) { using GenLlmReq = tb::GenericLlmRequest; diff --git a/cpp/tensorrt_llm/pybind/batch_manager/bindings.h b/cpp/tensorrt_llm/pybind/batch_manager/bindings.h index 4c36ea3f78c..d57694e72cc 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/bindings.h +++ b/cpp/tensorrt_llm/pybind/batch_manager/bindings.h @@ -18,11 +18,11 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::pybind::batch_manager { -void initBindings(pybind11::module_& m); +void initBindings(nanobind::module_& m); } diff --git a/cpp/tensorrt_llm/pybind/bindings.cpp b/cpp/tensorrt_llm/pybind/bindings.cpp index ebda5773abb..5ca99d6a5e1 100644 --- a/cpp/tensorrt_llm/pybind/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/bindings.cpp @@ -15,11 +15,9 @@ * limitations under the License. */ -#include -#include -#include -#include -#include +#include +#include +#include #include #include @@ -45,7 +43,7 @@ #include "tensorrt_llm/runtime/samplingConfig.h" #include "tensorrt_llm/runtime/utils/mpiUtils.h" -namespace py = pybind11; +namespace py = nanobind; namespace tb = tensorrt_llm::batch_manager; namespace tbk = tensorrt_llm::batch_manager::kv_cache_manager; namespace tpb = tensorrt_llm::pybind::batch_manager; @@ -69,7 +67,7 @@ tr::SamplingConfig makeSamplingConfig(std::vector const& con } } // namespace -PYBIND11_MODULE(TRTLLM_PYBIND_MODULE, m) +NB_MODULE(TRTLLM_PYBIND_MODULE, m) { m.doc() = "TensorRT-LLM Python bindings for C++ runtime"; diff --git a/cpp/tensorrt_llm/pybind/executor/bindings.cpp b/cpp/tensorrt_llm/pybind/executor/bindings.cpp index 502ab705374..8f3cd0b0b43 100644 --- a/cpp/tensorrt_llm/pybind/executor/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/executor/bindings.cpp @@ -22,16 +22,13 @@ #include "tensorrt_llm/executor/executor.h" #include "tensorrt_llm/executor/types.h" -#include -#include -#include -#include -#include -#include +#include +#include +#include #include -namespace py = pybind11; +namespace py = nanobind; namespace tle = tensorrt_llm::executor; using SizeType32 = tle::SizeType32; @@ -39,14 +36,14 @@ namespace tensorrt_llm::pybind::executor { template -void instantiateEventDiff(pybind11::module& m, std::string const& name) +void instantiateEventDiff(py::module& m, std::string const& name) { py::class_>(m, ("KVCacheEventDiff" + name).c_str()) .def_readonly("old_value", &tle::KVCacheEventDiff::oldValue) .def_readonly("new_value", &tle::KVCacheEventDiff::newValue); } -void initBindings(pybind11::module_& m) +void initBindings(py::module_& m) { m.attr("__version__") = tle::version(); py::enum_(m, "ModelType") diff --git a/cpp/tensorrt_llm/pybind/executor/bindings.h b/cpp/tensorrt_llm/pybind/executor/bindings.h index ea9946d46d0..3ad76c17838 100644 --- a/cpp/tensorrt_llm/pybind/executor/bindings.h +++ b/cpp/tensorrt_llm/pybind/executor/bindings.h @@ -18,12 +18,12 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::pybind::executor { // Register bindings for executor API. -void initBindings(pybind11::module_& m); +void initBindings(nanobind::module_& m); } // namespace tensorrt_llm::pybind::executor diff --git a/cpp/tensorrt_llm/pybind/runtime/bindings.cpp b/cpp/tensorrt_llm/pybind/runtime/bindings.cpp index 6a9a2e0dcd2..07d6d348852 100644 --- a/cpp/tensorrt_llm/pybind/runtime/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/runtime/bindings.cpp @@ -40,8 +40,11 @@ #include "tensorrt_llm/runtime/torchView.h" #include #include -#include -#include + +#include +#include +#include + #include namespace tr = tensorrt_llm::runtime; @@ -54,73 +57,73 @@ class PyITensor : public tensorrt_llm::runtime::ITensor [[nodiscard]] void* data() override { - PYBIND11_OVERRIDE_PURE(void*, /* Return type */ - ITensor, /* Parent class */ - data /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + NB_OVERRIDE_PURE(void*, /* Return type */ + ITensor, /* Parent class */ + data /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] void const* data() const override { - PYBIND11_OVERRIDE_PURE(void const*, /* Return type */ - ITensor, /* Parent class */ - data /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + NB_OVERRIDE_PURE(void const*, /* Return type */ + ITensor, /* Parent class */ + data /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] std::size_t getSize() const override { - PYBIND11_OVERRIDE_PURE(std::size_t, /* Return type */ - ITensor, /* Parent class */ - getSize /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + NB_OVERRIDE_PURE(std::size_t, /* Return type */ + ITensor, /* Parent class */ + getSize /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] std::size_t getCapacity() const override { - PYBIND11_OVERRIDE_PURE(std::size_t, /* Return type */ - ITensor, /* Parent class */ - getCapacity /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + NB_OVERRIDE_PURE(std::size_t, /* Return type */ + ITensor, /* Parent class */ + getCapacity /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] DataType getDataType() const override { - PYBIND11_OVERRIDE_PURE(DataType, /* Return type */ - ITensor, /* Parent class */ - getDataType /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + NB_OVERRIDE_PURE(DataType, /* Return type */ + ITensor, /* Parent class */ + getDataType /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] tr::MemoryType getMemoryType() const override { - PYBIND11_OVERRIDE_PURE(tr::MemoryType, /* Return type */ - ITensor, /* Parent class */ - getMemoryType /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + NB_OVERRIDE_PURE(tr::MemoryType, /* Return type */ + ITensor, /* Parent class */ + getMemoryType /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] char const* getMemoryTypeName() const override { - PYBIND11_OVERRIDE_PURE(char const*, /* Return type */ - ITensor, /* Parent class */ - getMemoryTypeName /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + NB_OVERRIDE_PURE(char const*, /* Return type */ + ITensor, /* Parent class */ + getMemoryTypeName /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } virtual void resize(std::size_t newSize) override { - PYBIND11_OVERRIDE_PURE(void, /* Return type */ - ITensor, /* Parent class */ - resize /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + NB_OVERRIDE_PURE(void, /* Return type */ + ITensor, /* Parent class */ + resize /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } @@ -135,19 +138,19 @@ class PyITensor : public tensorrt_llm::runtime::ITensor [[nodiscard]] Shape const& getShape() const override { - PYBIND11_OVERRIDE_PURE(Shape const&, /* Return type */ - ITensor, /* Parent class */ - getShape /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + NB_OVERRIDE_PURE(Shape const&, /* Return type */ + ITensor, /* Parent class */ + getShape /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } void reshape(Shape const& dims) override { - PYBIND11_OVERRIDE_PURE(void, /* Return type */ - ITensor, /* Parent class */ - reshape, /* Name of function in C++ (must match Python name) */ - dims /* Argument(s) */ + NB_OVERRIDE_PURE(void, /* Return type */ + ITensor, /* Parent class */ + reshape, /* Name of function in C++ (must match Python name) */ + dims /* Argument(s) */ ); } }; @@ -162,35 +165,35 @@ class PyIGptDecoder : public tr::IGptDecoder std::optional const& output = std::nullopt, std::optional const> const& requests = std::nullopt) override { - PYBIND11_OVERRIDE_PURE(void, IGptDecoder, setup, samplingConfig, batchSize, batchSlots, output, requests); + NB_OVERRIDE_PURE(void, IGptDecoder, setup, samplingConfig, batchSize, batchSlots, output, requests); } void forwardAsync(tr::DecodingOutput& output, tr::DecodingInput const& input) override { - PYBIND11_OVERRIDE_PURE(void, IGptDecoder, forwardAsync, output, input); + NB_OVERRIDE_PURE(void, IGptDecoder, forwardAsync, output, input); } void forwardSync(tr::DecodingOutput& output, tr::DecodingInput const& input) override { - PYBIND11_OVERRIDE_PURE(void, IGptDecoder, forwardSync, output, input); + NB_OVERRIDE_PURE(void, IGptDecoder, forwardSync, output, input); } tr::SamplingConfig const& getSamplingConfig() override { - PYBIND11_OVERRIDE_PURE(tr::SamplingConfig const&, IGptDecoder, getSamplingConfig); + NB_OVERRIDE_PURE(tr::SamplingConfig const&, IGptDecoder, getSamplingConfig); } void disableLookahead(std::optional const& samplingConfig, tr::SizeType32 batchSize, tr::DecodingInput::TensorConstPtr batchSlots) override { - PYBIND11_OVERRIDE_PURE(void, IGptDecoder, disableLookahead, samplingConfig, batchSize, batchSlots); + NB_OVERRIDE_PURE(void, IGptDecoder, disableLookahead, samplingConfig, batchSize, batchSlots); } }; namespace tensorrt_llm::pybind::runtime { -void initBindings(pybind11::module_& m) +void initBindings(py::module_& m) { py::classh(m, "ITensor").def(py::init()); py::class_(m, "TaskLayerModuleConfig") diff --git a/cpp/tensorrt_llm/pybind/runtime/bindings.h b/cpp/tensorrt_llm/pybind/runtime/bindings.h index b8e1ab66574..7acac226212 100644 --- a/cpp/tensorrt_llm/pybind/runtime/bindings.h +++ b/cpp/tensorrt_llm/pybind/runtime/bindings.h @@ -18,9 +18,9 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include -namespace py = pybind11; +namespace py = nanobind; namespace tensorrt_llm::pybind::runtime { diff --git a/cpp/tensorrt_llm/pybind/userbuffers/bindings.cpp b/cpp/tensorrt_llm/pybind/userbuffers/bindings.cpp index c8c32e5589b..50ce8a01317 100644 --- a/cpp/tensorrt_llm/pybind/userbuffers/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/userbuffers/bindings.cpp @@ -19,13 +19,13 @@ #include "tensorrt_llm/kernels/userbuffers/ub_interface.h" #include "tensorrt_llm/kernels/userbuffers/userbuffersManager.h" -namespace py = pybind11; +namespace py = nanobind; namespace tub = tensorrt_llm::runtime::ub; namespace tensorrt_llm::kernels::userbuffers { -void UserBufferBindings::initBindings(pybind11::module_& m) +void UserBufferBindings::initBindings(py::module_& m) { py::class_(m, "UBBuffer") .def_readonly("size", &tub::UBBuffer::size) diff --git a/cpp/tensorrt_llm/pybind/userbuffers/bindings.h b/cpp/tensorrt_llm/pybind/userbuffers/bindings.h index 3a8fba2cc6f..e8913a5d846 100644 --- a/cpp/tensorrt_llm/pybind/userbuffers/bindings.h +++ b/cpp/tensorrt_llm/pybind/userbuffers/bindings.h @@ -18,13 +18,13 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::kernels::userbuffers { class UserBufferBindings { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::kernels::userbuffers From fe06017ec0a9270ce3f36a25dd2f4cc2a6e232d6 Mon Sep 17 00:00:00 2001 From: QI JUN <22017000+QiJune@users.noreply.github.com> Date: Thu, 12 Jun 2025 12:44:43 +0800 Subject: [PATCH 2/8] fix Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cdde865a564..1699d0dc640 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -170,7 +170,7 @@ get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH) set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty) add_subdirectory(${3RDPARTY_DIR}/pybind11 ${CMAKE_CURRENT_BINARY_DIR}/pybind11) -add_subdirectory(${3RDPARTY_DIR}/nanobind) +add_subdirectory(${3RDPARTY_DIR}/nanobind ${CMAKE_CURRENT_BINARY_DIR}/nanobind) # include as system to suppress warnings include_directories( From b059ebb1de69c5c4068ca35a71901e56996f54a6 Mon Sep 17 00:00:00 2001 From: QI JUN <22017000+QiJune@users.noreply.github.com> Date: Thu, 12 Jun 2025 13:37:20 +0800 Subject: [PATCH 3/8] fix Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> --- cpp/tensorrt_llm/pybind/runtime/bindings.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/tensorrt_llm/pybind/runtime/bindings.cpp b/cpp/tensorrt_llm/pybind/runtime/bindings.cpp index 07d6d348852..45b621c2e51 100644 --- a/cpp/tensorrt_llm/pybind/runtime/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/runtime/bindings.cpp @@ -44,6 +44,7 @@ #include #include #include +#include #include From b80a28ab25b43b8994f7aa488ca09a9a5a5e4a6e Mon Sep 17 00:00:00 2001 From: QI JUN <22017000+QiJune@users.noreply.github.com> Date: Thu, 12 Jun 2025 13:57:53 +0800 Subject: [PATCH 4/8] fix Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> --- .../pybind/batch_manager/cacheTransceiver.h | 4 +- cpp/tensorrt_llm/pybind/common/bindTypes.h | 4 +- .../pybind/common/customCasters.h | 54 +++++++++---------- 3 files changed, 29 insertions(+), 33 deletions(-) diff --git a/cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.h b/cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.h index 71221d8a7cb..49f65619e60 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.h +++ b/cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.h @@ -18,13 +18,13 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::batch_manager { class CacheTransceiverBindings { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::batch_manager diff --git a/cpp/tensorrt_llm/pybind/common/bindTypes.h b/cpp/tensorrt_llm/pybind/common/bindTypes.h index 5959bc8f70c..9ce6da21e9f 100644 --- a/cpp/tensorrt_llm/pybind/common/bindTypes.h +++ b/cpp/tensorrt_llm/pybind/common/bindTypes.h @@ -18,12 +18,12 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace PybindUtils { -namespace py = pybind11; +namespace py = nanobind; template void bindList(py::module& m, std::string const& name) diff --git a/cpp/tensorrt_llm/pybind/common/customCasters.h b/cpp/tensorrt_llm/pybind/common/customCasters.h index 3d1eea7e3f3..a629c1ad0a5 100644 --- a/cpp/tensorrt_llm/pybind/common/customCasters.h +++ b/cpp/tensorrt_llm/pybind/common/customCasters.h @@ -17,12 +17,6 @@ #pragma once -#include "pybind11/cast.h" -#include "pybind11/detail/common.h" -#include "pybind11/detail/descr.h" -#include "pybind11/pybind11.h" -#include "pybind11/pytypes.h" - #include "tensorrt_llm/batch_manager/common.h" #include "tensorrt_llm/batch_manager/decoderBuffers.h" #include "tensorrt_llm/common/optionalRef.h" @@ -33,19 +27,20 @@ #include "tensorrt_llm/runtime/torchView.h" #include -#include #include +#include + // Pybind requires to have a central include in order for type casters to work. // Opaque bindings add a type caster, so they have the same requirement. // See the warning in https://pybind11.readthedocs.io/en/stable/advanced/cast/custom.html // Opaque bindings -PYBIND11_MAKE_OPAQUE(tensorrt_llm::batch_manager::ReqIdsSet) -PYBIND11_MAKE_OPAQUE(std::vector) +NB_MAKE_OPAQUE(tensorrt_llm::batch_manager::ReqIdsSet) +NB_MAKE_OPAQUE(std::vector) // Custom casters -namespace PYBIND11_NAMESPACE +namespace NB_NAMESPACE { namespace detail @@ -56,9 +51,9 @@ struct type_caster> { using value_conv = make_caster; - PYBIND11_TYPE_CASTER(tensorrt_llm::common::OptionalRef, value_conv::name); + nb::cast(tensorrt_llm::common::OptionalRef, value_conv::name); - bool load(handle src, bool convert) + bool from_python(handle src, bool convert) { if (src.is_none()) { @@ -68,7 +63,7 @@ struct type_caster> } value_conv conv; - if (!conv.load(src, convert)) + if (!conv.from_python(src, convert)) return false; // Create an OptionalRef with a reference to the converted value @@ -101,7 +96,7 @@ struct PathCaster } public: - static handle cast(T const& path, return_value_policy, handle) + static handle from_cpp(T const& path, return_value_policy, handle) { if (auto py_str = unicode_from_fs_native(path.native())) { @@ -110,7 +105,7 @@ struct PathCaster return nullptr; } - bool load(handle handle, bool) + bool from_python(handle handle, bool) { PyObject* native = nullptr; if constexpr (std::is_same_v) @@ -146,7 +141,7 @@ struct PathCaster return true; } - PYBIND11_TYPE_CASTER(T, const_name("os.PathLike")); + nb::cast(T, const_name("os.PathLike")); }; template <> @@ -158,9 +153,9 @@ template <> class type_caster { public: - PYBIND11_TYPE_CASTER(tensorrt_llm::executor::StreamPtr, _("int")); + nb::cast(tensorrt_llm::executor::StreamPtr, _("int")); - bool load([[maybe_unused]] handle src, bool) + bool from_python([[maybe_unused]] handle src, bool) { auto stream_ptr = src.cast(); value = std::make_shared(reinterpret_cast(stream_ptr)); @@ -168,7 +163,7 @@ class type_caster return true; } - static handle cast( + static handle from_cpp( tensorrt_llm::executor::StreamPtr const& src, return_value_policy /* policy */, handle /* parent */) { // Return cudaStream_t as integer. @@ -180,10 +175,10 @@ template <> struct type_caster { public: - PYBIND11_TYPE_CASTER(tensorrt_llm::executor::Tensor, _("torch.Tensor")); + nb::cast(tensorrt_llm::executor::Tensor, _("torch.Tensor")); // Convert PyObject(torch.Tensor) -> tensorrt_llm::executor::Tensor - bool load(handle src, bool) + bool from_python(handle src, bool) { PyObject* obj = src.ptr(); if (THPVariable_Check(obj)) @@ -196,7 +191,8 @@ struct type_caster } // Convert tensorrt_llm::executor::Tensor -> PyObject(torch.Tensor) - static handle cast(tensorrt_llm::executor::Tensor const& src, return_value_policy /* policy */, handle /* parent */) + static handle from_cpp( + tensorrt_llm::executor::Tensor const& src, return_value_policy /* policy */, handle /* parent */) { return THPVariable_Wrap(tensorrt_llm::runtime::Torch::tensor(tensorrt_llm::executor::detail::toITensor(src))); } @@ -206,10 +202,10 @@ template <> struct type_caster { public: - PYBIND11_TYPE_CASTER(tensorrt_llm::runtime::ITensor::SharedPtr, _("torch.Tensor")); + nb::cast(tensorrt_llm::runtime::ITensor::SharedPtr, _("torch.Tensor")); // Convert PyObject(torch.Tensor) -> tensorrt_llm::runtime::ITensor::SharedPtr - bool load(handle src, bool) + bool from_python(handle src, bool) { PyObject* obj = src.ptr(); if (THPVariable_Check(obj)) @@ -222,7 +218,7 @@ struct type_caster } // Convert tensorrt_llm::runtime::ITensor::SharedPtr -> PyObject(torch.Tensor) - static handle cast( + static handle from_cpp( tensorrt_llm::runtime::ITensor::SharedPtr const& src, return_value_policy /* policy */, handle /* parent */) { if (src == nullptr) @@ -237,10 +233,10 @@ template <> struct type_caster { public: - PYBIND11_TYPE_CASTER(tensorrt_llm::runtime::ITensor::SharedConstPtr, _("torch.Tensor")); + nb::cast(tensorrt_llm::runtime::ITensor::SharedConstPtr, _("torch.Tensor")); // Convert PyObject(torch.Tensor) -> tensorrt_llm::runtime::ITensor::SharedConstPtr - bool load(handle src, bool) + bool from_python(handle src, bool) { PyObject* obj = src.ptr(); if (THPVariable_Check(obj)) @@ -253,7 +249,7 @@ struct type_caster } // Convert tensorrt_llm::runtime::ITensor::SharedConstPtr -> PyObject(torch.Tensor) - static handle cast(tensorrt_llm::runtime::ITensor::SharedConstPtr const& src, return_value_policy /* policy */, + static handle from_cpp(tensorrt_llm::runtime::ITensor::SharedConstPtr const& src, return_value_policy /* policy */, handle /* parent */) { if (src == nullptr) @@ -266,4 +262,4 @@ struct type_caster }; } // namespace detail -} // namespace PYBIND11_NAMESPACE +} // namespace NB_NAMESPACE From 43da2fc35744f03c41f7c34a02688011bec1d04b Mon Sep 17 00:00:00 2001 From: QI JUN <22017000+QiJune@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:11:41 +0800 Subject: [PATCH 5/8] fix Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> --- cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp | 8 ++------ cpp/tensorrt_llm/pybind/batch_manager/buffers.h | 4 ++-- cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp | 6 +----- cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h | 6 +++--- cpp/tensorrt_llm/pybind/bindings.cpp | 1 - 5 files changed, 8 insertions(+), 17 deletions(-) diff --git a/cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp b/cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp index 721b12f6872..1f438ec027f 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp +++ b/cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp @@ -23,13 +23,9 @@ #include "tensorrt_llm/batch_manager/transformerBuffers.h" #include -#include -#include -#include -#include #include -namespace py = pybind11; +namespace py = nanobind; namespace tb = tensorrt_llm::batch_manager; namespace tr = tensorrt_llm::runtime; @@ -38,7 +34,7 @@ using tr::SizeType32; namespace tensorrt_llm::pybind::batch_manager { -void Buffers::initBindings(pybind11::module_& m) +void Buffers::initBindings(py::module_& m) { py::class_(m, "TransformerBuffers") .def(py::init const&, SizeType32, SizeType32, diff --git a/cpp/tensorrt_llm/pybind/batch_manager/buffers.h b/cpp/tensorrt_llm/pybind/batch_manager/buffers.h index bfe06c0e8e8..29cba8fdfc5 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/buffers.h +++ b/cpp/tensorrt_llm/pybind/batch_manager/buffers.h @@ -18,13 +18,13 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::pybind::batch_manager { class Buffers { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::pybind::batch_manager diff --git a/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp b/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp index 5be47790c9a..ee7032851e1 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp +++ b/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp @@ -23,16 +23,12 @@ #include "tensorrt_llm/runtime/torchView.h" #include -#include -#include -#include -#include #include namespace tb = tensorrt_llm::batch_manager; namespace tbk = tensorrt_llm::batch_manager::kv_cache_manager; namespace tr = tensorrt_llm::runtime; -namespace py = pybind11; +namespace py = nanobind; using BlockKey = tbk::BlockKey; using VecUniqueTokens = tensorrt_llm::runtime::VecUniqueTokens; using SizeType32 = tensorrt_llm::runtime::SizeType32; diff --git a/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h b/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h index 67d8b13ca71..96c9235c85d 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h +++ b/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h @@ -18,14 +18,14 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::batch_manager::kv_cache_manager { class KVCacheManagerBindings { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::batch_manager::kv_cache_manager @@ -34,6 +34,6 @@ namespace tensorrt_llm::batch_manager class BasePeftCacheManagerBindings { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::batch_manager diff --git a/cpp/tensorrt_llm/pybind/bindings.cpp b/cpp/tensorrt_llm/pybind/bindings.cpp index 5ca99d6a5e1..178b7fcb957 100644 --- a/cpp/tensorrt_llm/pybind/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/bindings.cpp @@ -16,7 +16,6 @@ */ #include -#include #include #include #include From ade7e1dea792332bb7069b330b03f58eb6c0153e Mon Sep 17 00:00:00 2001 From: QI JUN <22017000+QiJune@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:17:47 +0800 Subject: [PATCH 6/8] fix Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> --- cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp | 1 - cpp/tensorrt_llm/pybind/common/bindTypes.h | 4 ++-- cpp/tensorrt_llm/pybind/executor/bindings.cpp | 1 - cpp/tensorrt_llm/pybind/runtime/bindings.cpp | 1 - 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp b/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp index f3a90fa68e5..bc8d327a19e 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp @@ -32,7 +32,6 @@ #include #include -#include #include #include diff --git a/cpp/tensorrt_llm/pybind/common/bindTypes.h b/cpp/tensorrt_llm/pybind/common/bindTypes.h index 9ce6da21e9f..8d0b55756ed 100644 --- a/cpp/tensorrt_llm/pybind/common/bindTypes.h +++ b/cpp/tensorrt_llm/pybind/common/bindTypes.h @@ -26,7 +26,7 @@ namespace PybindUtils namespace py = nanobind; template -void bindList(py::module& m, std::string const& name) +void bindList(py::module_& m, std::string const& name) { py::class_(m, name.c_str()) .def(py::init()) @@ -58,7 +58,7 @@ void bindList(py::module& m, std::string const& name) } template -void bindSet(py::module& m, std::string const& name) +void bindSet(py::module_& m, std::string const& name) { py::class_(m, name.c_str()) .def(py::init()) diff --git a/cpp/tensorrt_llm/pybind/executor/bindings.cpp b/cpp/tensorrt_llm/pybind/executor/bindings.cpp index 8f3cd0b0b43..5403031716d 100644 --- a/cpp/tensorrt_llm/pybind/executor/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/executor/bindings.cpp @@ -23,7 +23,6 @@ #include "tensorrt_llm/executor/types.h" #include -#include #include #include diff --git a/cpp/tensorrt_llm/pybind/runtime/bindings.cpp b/cpp/tensorrt_llm/pybind/runtime/bindings.cpp index 45b621c2e51..0775bf47088 100644 --- a/cpp/tensorrt_llm/pybind/runtime/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/runtime/bindings.cpp @@ -42,7 +42,6 @@ #include #include -#include #include #include From 32b5d362df689e760185152466683b2a8e67fdfe Mon Sep 17 00:00:00 2001 From: QI JUN <22017000+QiJune@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:23:02 +0800 Subject: [PATCH 7/8] fix Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> --- cpp/tensorrt_llm/pybind/common/customCasters.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/tensorrt_llm/pybind/common/customCasters.h b/cpp/tensorrt_llm/pybind/common/customCasters.h index a629c1ad0a5..29c8ee0df45 100644 --- a/cpp/tensorrt_llm/pybind/common/customCasters.h +++ b/cpp/tensorrt_llm/pybind/common/customCasters.h @@ -51,7 +51,7 @@ struct type_caster> { using value_conv = make_caster; - nb::cast(tensorrt_llm::common::OptionalRef, value_conv::name); + nanobind::cast(tensorrt_llm::common::OptionalRef, value_conv::name); bool from_python(handle src, bool convert) { @@ -141,7 +141,7 @@ struct PathCaster return true; } - nb::cast(T, const_name("os.PathLike")); + nanobind::cast(T, const_name("os.PathLike")); }; template <> @@ -153,7 +153,7 @@ template <> class type_caster { public: - nb::cast(tensorrt_llm::executor::StreamPtr, _("int")); + nanobind::cast(tensorrt_llm::executor::StreamPtr, _("int")); bool from_python([[maybe_unused]] handle src, bool) { @@ -175,7 +175,7 @@ template <> struct type_caster { public: - nb::cast(tensorrt_llm::executor::Tensor, _("torch.Tensor")); + nanobind::cast(tensorrt_llm::executor::Tensor, _("torch.Tensor")); // Convert PyObject(torch.Tensor) -> tensorrt_llm::executor::Tensor bool from_python(handle src, bool) @@ -202,7 +202,7 @@ template <> struct type_caster { public: - nb::cast(tensorrt_llm::runtime::ITensor::SharedPtr, _("torch.Tensor")); + nanobind::cast(tensorrt_llm::runtime::ITensor::SharedPtr, _("torch.Tensor")); // Convert PyObject(torch.Tensor) -> tensorrt_llm::runtime::ITensor::SharedPtr bool from_python(handle src, bool) @@ -233,7 +233,7 @@ template <> struct type_caster { public: - nb::cast(tensorrt_llm::runtime::ITensor::SharedConstPtr, _("torch.Tensor")); + nanobind::cast(tensorrt_llm::runtime::ITensor::SharedConstPtr, _("torch.Tensor")); // Convert PyObject(torch.Tensor) -> tensorrt_llm::runtime::ITensor::SharedConstPtr bool from_python(handle src, bool) From 7209b6cd384c0c3144dcbc22d378d3e9b9341b21 Mon Sep 17 00:00:00 2001 From: QI JUN <22017000+QiJune@users.noreply.github.com> Date: Thu, 12 Jun 2025 17:28:44 +0800 Subject: [PATCH 8/8] clean Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> --- cpp/tensorrt_llm/pybind/common/bindTypes.h | 8 +- .../pybind/common/customCasters.h | 54 +++++----- cpp/tensorrt_llm/pybind/runtime/bindings.cpp | 99 +++++++++---------- cpp/tensorrt_llm/pybind/runtime/bindings.h | 4 +- 4 files changed, 83 insertions(+), 82 deletions(-) diff --git a/cpp/tensorrt_llm/pybind/common/bindTypes.h b/cpp/tensorrt_llm/pybind/common/bindTypes.h index 8d0b55756ed..5959bc8f70c 100644 --- a/cpp/tensorrt_llm/pybind/common/bindTypes.h +++ b/cpp/tensorrt_llm/pybind/common/bindTypes.h @@ -18,15 +18,15 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace PybindUtils { -namespace py = nanobind; +namespace py = pybind11; template -void bindList(py::module_& m, std::string const& name) +void bindList(py::module& m, std::string const& name) { py::class_(m, name.c_str()) .def(py::init()) @@ -58,7 +58,7 @@ void bindList(py::module_& m, std::string const& name) } template -void bindSet(py::module_& m, std::string const& name) +void bindSet(py::module& m, std::string const& name) { py::class_(m, name.c_str()) .def(py::init()) diff --git a/cpp/tensorrt_llm/pybind/common/customCasters.h b/cpp/tensorrt_llm/pybind/common/customCasters.h index 29c8ee0df45..3d1eea7e3f3 100644 --- a/cpp/tensorrt_llm/pybind/common/customCasters.h +++ b/cpp/tensorrt_llm/pybind/common/customCasters.h @@ -17,6 +17,12 @@ #pragma once +#include "pybind11/cast.h" +#include "pybind11/detail/common.h" +#include "pybind11/detail/descr.h" +#include "pybind11/pybind11.h" +#include "pybind11/pytypes.h" + #include "tensorrt_llm/batch_manager/common.h" #include "tensorrt_llm/batch_manager/decoderBuffers.h" #include "tensorrt_llm/common/optionalRef.h" @@ -27,20 +33,19 @@ #include "tensorrt_llm/runtime/torchView.h" #include +#include #include -#include - // Pybind requires to have a central include in order for type casters to work. // Opaque bindings add a type caster, so they have the same requirement. // See the warning in https://pybind11.readthedocs.io/en/stable/advanced/cast/custom.html // Opaque bindings -NB_MAKE_OPAQUE(tensorrt_llm::batch_manager::ReqIdsSet) -NB_MAKE_OPAQUE(std::vector) +PYBIND11_MAKE_OPAQUE(tensorrt_llm::batch_manager::ReqIdsSet) +PYBIND11_MAKE_OPAQUE(std::vector) // Custom casters -namespace NB_NAMESPACE +namespace PYBIND11_NAMESPACE { namespace detail @@ -51,9 +56,9 @@ struct type_caster> { using value_conv = make_caster; - nanobind::cast(tensorrt_llm::common::OptionalRef, value_conv::name); + PYBIND11_TYPE_CASTER(tensorrt_llm::common::OptionalRef, value_conv::name); - bool from_python(handle src, bool convert) + bool load(handle src, bool convert) { if (src.is_none()) { @@ -63,7 +68,7 @@ struct type_caster> } value_conv conv; - if (!conv.from_python(src, convert)) + if (!conv.load(src, convert)) return false; // Create an OptionalRef with a reference to the converted value @@ -96,7 +101,7 @@ struct PathCaster } public: - static handle from_cpp(T const& path, return_value_policy, handle) + static handle cast(T const& path, return_value_policy, handle) { if (auto py_str = unicode_from_fs_native(path.native())) { @@ -105,7 +110,7 @@ struct PathCaster return nullptr; } - bool from_python(handle handle, bool) + bool load(handle handle, bool) { PyObject* native = nullptr; if constexpr (std::is_same_v) @@ -141,7 +146,7 @@ struct PathCaster return true; } - nanobind::cast(T, const_name("os.PathLike")); + PYBIND11_TYPE_CASTER(T, const_name("os.PathLike")); }; template <> @@ -153,9 +158,9 @@ template <> class type_caster { public: - nanobind::cast(tensorrt_llm::executor::StreamPtr, _("int")); + PYBIND11_TYPE_CASTER(tensorrt_llm::executor::StreamPtr, _("int")); - bool from_python([[maybe_unused]] handle src, bool) + bool load([[maybe_unused]] handle src, bool) { auto stream_ptr = src.cast(); value = std::make_shared(reinterpret_cast(stream_ptr)); @@ -163,7 +168,7 @@ class type_caster return true; } - static handle from_cpp( + static handle cast( tensorrt_llm::executor::StreamPtr const& src, return_value_policy /* policy */, handle /* parent */) { // Return cudaStream_t as integer. @@ -175,10 +180,10 @@ template <> struct type_caster { public: - nanobind::cast(tensorrt_llm::executor::Tensor, _("torch.Tensor")); + PYBIND11_TYPE_CASTER(tensorrt_llm::executor::Tensor, _("torch.Tensor")); // Convert PyObject(torch.Tensor) -> tensorrt_llm::executor::Tensor - bool from_python(handle src, bool) + bool load(handle src, bool) { PyObject* obj = src.ptr(); if (THPVariable_Check(obj)) @@ -191,8 +196,7 @@ struct type_caster } // Convert tensorrt_llm::executor::Tensor -> PyObject(torch.Tensor) - static handle from_cpp( - tensorrt_llm::executor::Tensor const& src, return_value_policy /* policy */, handle /* parent */) + static handle cast(tensorrt_llm::executor::Tensor const& src, return_value_policy /* policy */, handle /* parent */) { return THPVariable_Wrap(tensorrt_llm::runtime::Torch::tensor(tensorrt_llm::executor::detail::toITensor(src))); } @@ -202,10 +206,10 @@ template <> struct type_caster { public: - nanobind::cast(tensorrt_llm::runtime::ITensor::SharedPtr, _("torch.Tensor")); + PYBIND11_TYPE_CASTER(tensorrt_llm::runtime::ITensor::SharedPtr, _("torch.Tensor")); // Convert PyObject(torch.Tensor) -> tensorrt_llm::runtime::ITensor::SharedPtr - bool from_python(handle src, bool) + bool load(handle src, bool) { PyObject* obj = src.ptr(); if (THPVariable_Check(obj)) @@ -218,7 +222,7 @@ struct type_caster } // Convert tensorrt_llm::runtime::ITensor::SharedPtr -> PyObject(torch.Tensor) - static handle from_cpp( + static handle cast( tensorrt_llm::runtime::ITensor::SharedPtr const& src, return_value_policy /* policy */, handle /* parent */) { if (src == nullptr) @@ -233,10 +237,10 @@ template <> struct type_caster { public: - nanobind::cast(tensorrt_llm::runtime::ITensor::SharedConstPtr, _("torch.Tensor")); + PYBIND11_TYPE_CASTER(tensorrt_llm::runtime::ITensor::SharedConstPtr, _("torch.Tensor")); // Convert PyObject(torch.Tensor) -> tensorrt_llm::runtime::ITensor::SharedConstPtr - bool from_python(handle src, bool) + bool load(handle src, bool) { PyObject* obj = src.ptr(); if (THPVariable_Check(obj)) @@ -249,7 +253,7 @@ struct type_caster } // Convert tensorrt_llm::runtime::ITensor::SharedConstPtr -> PyObject(torch.Tensor) - static handle from_cpp(tensorrt_llm::runtime::ITensor::SharedConstPtr const& src, return_value_policy /* policy */, + static handle cast(tensorrt_llm::runtime::ITensor::SharedConstPtr const& src, return_value_policy /* policy */, handle /* parent */) { if (src == nullptr) @@ -262,4 +266,4 @@ struct type_caster }; } // namespace detail -} // namespace NB_NAMESPACE +} // namespace PYBIND11_NAMESPACE diff --git a/cpp/tensorrt_llm/pybind/runtime/bindings.cpp b/cpp/tensorrt_llm/pybind/runtime/bindings.cpp index 0775bf47088..6a9a2e0dcd2 100644 --- a/cpp/tensorrt_llm/pybind/runtime/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/runtime/bindings.cpp @@ -40,11 +40,8 @@ #include "tensorrt_llm/runtime/torchView.h" #include #include - -#include -#include -#include - +#include +#include #include namespace tr = tensorrt_llm::runtime; @@ -57,73 +54,73 @@ class PyITensor : public tensorrt_llm::runtime::ITensor [[nodiscard]] void* data() override { - NB_OVERRIDE_PURE(void*, /* Return type */ - ITensor, /* Parent class */ - data /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(void*, /* Return type */ + ITensor, /* Parent class */ + data /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] void const* data() const override { - NB_OVERRIDE_PURE(void const*, /* Return type */ - ITensor, /* Parent class */ - data /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(void const*, /* Return type */ + ITensor, /* Parent class */ + data /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] std::size_t getSize() const override { - NB_OVERRIDE_PURE(std::size_t, /* Return type */ - ITensor, /* Parent class */ - getSize /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(std::size_t, /* Return type */ + ITensor, /* Parent class */ + getSize /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] std::size_t getCapacity() const override { - NB_OVERRIDE_PURE(std::size_t, /* Return type */ - ITensor, /* Parent class */ - getCapacity /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(std::size_t, /* Return type */ + ITensor, /* Parent class */ + getCapacity /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] DataType getDataType() const override { - NB_OVERRIDE_PURE(DataType, /* Return type */ - ITensor, /* Parent class */ - getDataType /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(DataType, /* Return type */ + ITensor, /* Parent class */ + getDataType /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] tr::MemoryType getMemoryType() const override { - NB_OVERRIDE_PURE(tr::MemoryType, /* Return type */ - ITensor, /* Parent class */ - getMemoryType /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(tr::MemoryType, /* Return type */ + ITensor, /* Parent class */ + getMemoryType /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } [[nodiscard]] char const* getMemoryTypeName() const override { - NB_OVERRIDE_PURE(char const*, /* Return type */ - ITensor, /* Parent class */ - getMemoryTypeName /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(char const*, /* Return type */ + ITensor, /* Parent class */ + getMemoryTypeName /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } virtual void resize(std::size_t newSize) override { - NB_OVERRIDE_PURE(void, /* Return type */ - ITensor, /* Parent class */ - resize /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(void, /* Return type */ + ITensor, /* Parent class */ + resize /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } @@ -138,19 +135,19 @@ class PyITensor : public tensorrt_llm::runtime::ITensor [[nodiscard]] Shape const& getShape() const override { - NB_OVERRIDE_PURE(Shape const&, /* Return type */ - ITensor, /* Parent class */ - getShape /* Name of function in C++ (must match Python name) */ - /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(Shape const&, /* Return type */ + ITensor, /* Parent class */ + getShape /* Name of function in C++ (must match Python name) */ + /* Argument(s) */ ); } void reshape(Shape const& dims) override { - NB_OVERRIDE_PURE(void, /* Return type */ - ITensor, /* Parent class */ - reshape, /* Name of function in C++ (must match Python name) */ - dims /* Argument(s) */ + PYBIND11_OVERRIDE_PURE(void, /* Return type */ + ITensor, /* Parent class */ + reshape, /* Name of function in C++ (must match Python name) */ + dims /* Argument(s) */ ); } }; @@ -165,35 +162,35 @@ class PyIGptDecoder : public tr::IGptDecoder std::optional const& output = std::nullopt, std::optional const> const& requests = std::nullopt) override { - NB_OVERRIDE_PURE(void, IGptDecoder, setup, samplingConfig, batchSize, batchSlots, output, requests); + PYBIND11_OVERRIDE_PURE(void, IGptDecoder, setup, samplingConfig, batchSize, batchSlots, output, requests); } void forwardAsync(tr::DecodingOutput& output, tr::DecodingInput const& input) override { - NB_OVERRIDE_PURE(void, IGptDecoder, forwardAsync, output, input); + PYBIND11_OVERRIDE_PURE(void, IGptDecoder, forwardAsync, output, input); } void forwardSync(tr::DecodingOutput& output, tr::DecodingInput const& input) override { - NB_OVERRIDE_PURE(void, IGptDecoder, forwardSync, output, input); + PYBIND11_OVERRIDE_PURE(void, IGptDecoder, forwardSync, output, input); } tr::SamplingConfig const& getSamplingConfig() override { - NB_OVERRIDE_PURE(tr::SamplingConfig const&, IGptDecoder, getSamplingConfig); + PYBIND11_OVERRIDE_PURE(tr::SamplingConfig const&, IGptDecoder, getSamplingConfig); } void disableLookahead(std::optional const& samplingConfig, tr::SizeType32 batchSize, tr::DecodingInput::TensorConstPtr batchSlots) override { - NB_OVERRIDE_PURE(void, IGptDecoder, disableLookahead, samplingConfig, batchSize, batchSlots); + PYBIND11_OVERRIDE_PURE(void, IGptDecoder, disableLookahead, samplingConfig, batchSize, batchSlots); } }; namespace tensorrt_llm::pybind::runtime { -void initBindings(py::module_& m) +void initBindings(pybind11::module_& m) { py::classh(m, "ITensor").def(py::init()); py::class_(m, "TaskLayerModuleConfig") diff --git a/cpp/tensorrt_llm/pybind/runtime/bindings.h b/cpp/tensorrt_llm/pybind/runtime/bindings.h index 7acac226212..b8e1ab66574 100644 --- a/cpp/tensorrt_llm/pybind/runtime/bindings.h +++ b/cpp/tensorrt_llm/pybind/runtime/bindings.h @@ -18,9 +18,9 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include -namespace py = nanobind; +namespace py = pybind11; namespace tensorrt_llm::pybind::runtime {