Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@
[submodule "3rdparty/xgrammar"]
path = 3rdparty/xgrammar
url = https://github.com/mlc-ai/xgrammar.git
[submodule "3rdparty/nanobind"]
path = 3rdparty/nanobind
url = https://github.com/wjakob/nanobind
1 change: 1 addition & 0 deletions 3rdparty/nanobind
Submodule nanobind added at 3d577d
4 changes: 3 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)

set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
add_subdirectory(${3RDPARTY_DIR}/pybind11 ${CMAKE_CURRENT_BINARY_DIR}/pybind11)
add_subdirectory(${3RDPARTY_DIR}/nanobind ${CMAKE_CURRENT_BINARY_DIR}/nanobind)

# include as system to suppress warnings
include_directories(
Expand All @@ -181,7 +182,8 @@ include_directories(
${3RDPARTY_DIR}/cutlass/tools/util/include
${3RDPARTY_DIR}/NVTX/include
${3RDPARTY_DIR}/json/include
${3RDPARTY_DIR}/pybind11/include)
${3RDPARTY_DIR}/pybind11/include
${3RDPARTY_DIR}/nanobind/include)

if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11")
add_definitions("-DENABLE_BF16")
Expand Down
7 changes: 3 additions & 4 deletions cpp/tensorrt_llm/pybind/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ set(SRCS

include_directories(${PROJECT_SOURCE_DIR}/include)

pybind11_add_module(${TRTLLM_PYBIND_MODULE} ${SRCS})
nanobind_add_module(${TRTLLM_PYBIND_MODULE} ${SRCS})

set_property(TARGET ${TRTLLM_PYBIND_MODULE} PROPERTY POSITION_INDEPENDENT_CODE
ON)
Expand All @@ -34,9 +34,8 @@ target_link_libraries(
${TRTLLM_PYBIND_MODULE}
PUBLIC ${SHARED_TARGET} ${UNDEFINED_FLAG} ${NO_AS_NEEDED_FLAG}
${Python3_LIBRARIES} ${TORCH_LIBRARIES} torch_python)
target_compile_definitions(
${TRTLLM_PYBIND_MODULE} PUBLIC TRTLLM_PYBIND_MODULE=${TRTLLM_PYBIND_MODULE}
PYBIND11_DETAILED_ERROR_MESSAGES=1)
target_compile_definitions(${TRTLLM_PYBIND_MODULE}
PUBLIC TRTLLM_PYBIND_MODULE=${TRTLLM_PYBIND_MODULE})

if(NOT WIN32)
set_target_properties(
Expand Down
12 changes: 5 additions & 7 deletions cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,12 @@
#include "tensorrt_llm/runtime/torchView.h"

#include <ATen/ATen.h>
#include <pybind11/chrono.h>
#include <pybind11/functional.h>
#include <pybind11/operators.h>
#include <pybind11/stl.h>
#include <pybind11/stl_bind.h>

#include <nanobind/nanobind.h>
#include <nanobind/operators.h>
#include <torch/extension.h>

namespace py = pybind11;
namespace py = nanobind;
namespace tb = tensorrt_llm::batch_manager;
namespace tle = tensorrt_llm::executor;
namespace tr = tensorrt_llm::runtime;
Expand All @@ -47,7 +45,7 @@ using namespace tensorrt_llm::runtime;
namespace tensorrt_llm::pybind::batch_manager
{

void initBindings(pybind11::module_& m)
void initBindings(py::module_& m)
{
using GenLlmReq = tb::GenericLlmRequest<runtime::ITensor::SharedPtr>;

Expand Down
4 changes: 2 additions & 2 deletions cpp/tensorrt_llm/pybind/batch_manager/bindings.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
#pragma once

#include "tensorrt_llm/pybind/common/customCasters.h"
#include <pybind11/pybind11.h>
#include <nanobind/nanobind.h>

namespace tensorrt_llm::pybind::batch_manager
{

void initBindings(pybind11::module_& m);
void initBindings(nanobind::module_& m);

}
8 changes: 2 additions & 6 deletions cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,9 @@
#include "tensorrt_llm/batch_manager/transformerBuffers.h"

#include <ATen/ATen.h>
#include <pybind11/functional.h>
#include <pybind11/operators.h>
#include <pybind11/stl.h>
#include <pybind11/stl_bind.h>
#include <torch/extension.h>

namespace py = pybind11;
namespace py = nanobind;
namespace tb = tensorrt_llm::batch_manager;
namespace tr = tensorrt_llm::runtime;

Expand All @@ -38,7 +34,7 @@ using tr::SizeType32;
namespace tensorrt_llm::pybind::batch_manager
{

void Buffers::initBindings(pybind11::module_& m)
void Buffers::initBindings(py::module_& m)
{
py::class_<tb::TransformerBuffers>(m, "TransformerBuffers")
.def(py::init<SizeType32, SizeType32, std::vector<SizeType32> const&, SizeType32, SizeType32,
Expand Down
4 changes: 2 additions & 2 deletions cpp/tensorrt_llm/pybind/batch_manager/buffers.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
#pragma once

#include "tensorrt_llm/pybind/common/customCasters.h"
#include <pybind11/pybind11.h>
#include <nanobind/nanobind.h>

namespace tensorrt_llm::pybind::batch_manager
{
class Buffers
{
public:
static void initBindings(pybind11::module_& m);
static void initBindings(nanobind::module_& m);
};
} // namespace tensorrt_llm::pybind::batch_manager
4 changes: 2 additions & 2 deletions cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
#pragma once

#include "tensorrt_llm/pybind/common/customCasters.h"
#include <pybind11/pybind11.h>
#include <nanobind/nanobind.h>

namespace tensorrt_llm::batch_manager
{
class CacheTransceiverBindings
{
public:
static void initBindings(pybind11::module_& m);
static void initBindings(nanobind::module_& m);
};
} // namespace tensorrt_llm::batch_manager
6 changes: 1 addition & 5 deletions cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@
#include "tensorrt_llm/runtime/torchView.h"

#include <ATen/ATen.h>
#include <pybind11/functional.h>
#include <pybind11/operators.h>
#include <pybind11/stl.h>
#include <pybind11/stl_bind.h>
#include <torch/extension.h>

namespace tb = tensorrt_llm::batch_manager;
namespace tbk = tensorrt_llm::batch_manager::kv_cache_manager;
namespace tr = tensorrt_llm::runtime;
namespace py = pybind11;
namespace py = nanobind;
using BlockKey = tbk::BlockKey;
using VecUniqueTokens = tensorrt_llm::runtime::VecUniqueTokens;
using SizeType32 = tensorrt_llm::runtime::SizeType32;
Expand Down
6 changes: 3 additions & 3 deletions cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
#pragma once

#include "tensorrt_llm/pybind/common/customCasters.h"
#include <pybind11/pybind11.h>
#include <nanobind/nanobind.h>

namespace tensorrt_llm::batch_manager::kv_cache_manager
{
class KVCacheManagerBindings
{
public:
static void initBindings(pybind11::module_& m);
static void initBindings(nanobind::module_& m);
};
} // namespace tensorrt_llm::batch_manager::kv_cache_manager

Expand All @@ -34,6 +34,6 @@ namespace tensorrt_llm::batch_manager
class BasePeftCacheManagerBindings
{
public:
static void initBindings(pybind11::module_& m);
static void initBindings(nanobind::module_& m);
};
} // namespace tensorrt_llm::batch_manager
11 changes: 4 additions & 7 deletions cpp/tensorrt_llm/pybind/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,8 @@
* limitations under the License.
*/

#include <pybind11/cast.h>
#include <pybind11/functional.h>
#include <pybind11/operators.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <nanobind/nanobind.h>
#include <nanobind/operators.h>
#include <torch/extension.h>
#include <vector>

Expand All @@ -45,7 +42,7 @@
#include "tensorrt_llm/runtime/samplingConfig.h"
#include "tensorrt_llm/runtime/utils/mpiUtils.h"

namespace py = pybind11;
namespace py = nanobind;
namespace tb = tensorrt_llm::batch_manager;
namespace tbk = tensorrt_llm::batch_manager::kv_cache_manager;
namespace tpb = tensorrt_llm::pybind::batch_manager;
Expand All @@ -69,7 +66,7 @@ tr::SamplingConfig makeSamplingConfig(std::vector<tr::SamplingConfig> const& con
}
} // namespace

PYBIND11_MODULE(TRTLLM_PYBIND_MODULE, m)
NB_MODULE(TRTLLM_PYBIND_MODULE, m)
{
m.doc() = "TensorRT-LLM Python bindings for C++ runtime";

Expand Down
14 changes: 5 additions & 9 deletions cpp/tensorrt_llm/pybind/executor/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,27 @@
#include "tensorrt_llm/executor/executor.h"
#include "tensorrt_llm/executor/types.h"

#include <pybind11/cast.h>
#include <pybind11/chrono.h>
#include <pybind11/functional.h>
#include <pybind11/operators.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <nanobind/nanobind.h>
#include <nanobind/operators.h>

#include <optional>

namespace py = pybind11;
namespace py = nanobind;
namespace tle = tensorrt_llm::executor;
using SizeType32 = tle::SizeType32;

namespace tensorrt_llm::pybind::executor
{

template <typename T>
void instantiateEventDiff(pybind11::module& m, std::string const& name)
void instantiateEventDiff(py::module& m, std::string const& name)
{
py::class_<tle::KVCacheEventDiff<T>>(m, ("KVCacheEventDiff" + name).c_str())
.def_readonly("old_value", &tle::KVCacheEventDiff<T>::oldValue)
.def_readonly("new_value", &tle::KVCacheEventDiff<T>::newValue);
}

void initBindings(pybind11::module_& m)
void initBindings(py::module_& m)
{
m.attr("__version__") = tle::version();
py::enum_<tle::ModelType>(m, "ModelType")
Expand Down
4 changes: 2 additions & 2 deletions cpp/tensorrt_llm/pybind/executor/bindings.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
#pragma once

#include "tensorrt_llm/pybind/common/customCasters.h"
#include <pybind11/pybind11.h>
#include <nanobind/nanobind.h>

namespace tensorrt_llm::pybind::executor
{

// Register bindings for executor API.
void initBindings(pybind11::module_& m);
void initBindings(nanobind::module_& m);

} // namespace tensorrt_llm::pybind::executor
4 changes: 2 additions & 2 deletions cpp/tensorrt_llm/pybind/userbuffers/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
#include "tensorrt_llm/kernels/userbuffers/ub_interface.h"
#include "tensorrt_llm/kernels/userbuffers/userbuffersManager.h"

namespace py = pybind11;
namespace py = nanobind;
namespace tub = tensorrt_llm::runtime::ub;

namespace tensorrt_llm::kernels::userbuffers
{

void UserBufferBindings::initBindings(pybind11::module_& m)
void UserBufferBindings::initBindings(py::module_& m)
{
py::class_<tub::UBBuffer>(m, "UBBuffer")
.def_readonly("size", &tub::UBBuffer::size)
Expand Down
4 changes: 2 additions & 2 deletions cpp/tensorrt_llm/pybind/userbuffers/bindings.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
#pragma once

#include "tensorrt_llm/pybind/common/customCasters.h"
#include <pybind11/pybind11.h>
#include <nanobind/nanobind.h>

namespace tensorrt_llm::kernels::userbuffers
{
class UserBufferBindings
{
public:
static void initBindings(pybind11::module_& m);
static void initBindings(nanobind::module_& m);
};
} // namespace tensorrt_llm::kernels::userbuffers