diff --git a/docs/envvars.rst b/docs/envvars.rst index bd62ccac46..3ead99bbd2 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -119,6 +119,25 @@ Runtime Environment Variables These environment variables control the behavior of Transformer Engine during execution. +General +^^^^^^^ + +.. envvar:: NVTE_TENSOR_HANDLE_POOL_SIZE_MB + + :Type: ``int`` (positive integer) + :Default: ``20`` + :Description: Size in MiB of the internal ``NVTETensor`` handle pool. Increase this + value if an application legitimately creates more tensor handles than + the default pool can hold. + +.. envvar:: NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB + + :Type: ``int`` (positive integer) + :Default: ``20`` + :Description: Size in MiB of the internal ``NVTEGroupedTensor`` handle pool. Increase + this value if an application legitimately creates more grouped tensor + handles than the default pool can hold. + Attention Backend Selection ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/transformer_engine/common/transformer_engine.cpp b/transformer_engine/common/transformer_engine.cpp index b3179d38fd..066ca1dc8c 100644 --- a/transformer_engine/common/transformer_engine.cpp +++ b/transformer_engine/common/transformer_engine.cpp @@ -10,8 +10,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -393,6 +395,49 @@ void CheckOutputGroupedTensor(const GroupedTensor &t, std::string_view name, boo CheckGroupedTensorShapeArrays(t, name); } +namespace { + +constexpr size_t kDefaultTensorHandlePoolSizeMB = 20; +constexpr size_t kBytesPerMB = 1024 * 1024; + +size_t GetTensorHandlePoolSizeMB(const char *env_var) { + const char *env_value = std::getenv(env_var); + if (env_value == nullptr || env_value[0] == '\0') { + return kDefaultTensorHandlePoolSizeMB; + } + + const std::string value(env_value); + constexpr const char *kWhitespace = " \t\n\r\f\v"; + const size_t first = value.find_first_not_of(kWhitespace); + const size_t last = value.find_last_not_of(kWhitespace); + NVTE_CHECK(first != std::string::npos, env_var, " must be a positive integer."); + + size_t pool_size_mb = 0; + for (size_t i = first; i <= last; ++i) { + NVTE_CHECK(value[i] >= '0' && value[i] <= '9', env_var, " must be a positive integer, got \"", + value, "\"."); + const size_t digit = static_cast(value[i] - '0'); + NVTE_CHECK(pool_size_mb <= (std::numeric_limits::max() - digit) / 10, env_var, + " is too large."); + pool_size_mb = pool_size_mb * 10 + digit; + } + + NVTE_CHECK(pool_size_mb > 0, env_var, " must be a positive integer."); + NVTE_CHECK(pool_size_mb <= std::numeric_limits::max() / kBytesPerMB, env_var, + " is too large."); + return pool_size_mb; +} + +size_t GetTensorHandlePoolCapacity(size_t pool_size_mb, size_t handle_size, const char *handle_name, + const char *env_var) { + const size_t pool_size_bytes = pool_size_mb * kBytesPerMB; + NVTE_CHECK(pool_size_bytes >= handle_size, env_var, "=", pool_size_mb, + " MiB is too small for one ", handle_name, " handle of size ", handle_size, " bytes."); + return pool_size_bytes / handle_size; +} + +} // namespace + class TensorAllocator { public: static TensorAllocator &instance() { @@ -406,8 +451,10 @@ class TensorAllocator { std::lock_guard lock(mutex); const size_t available = free_list.size() + (memory.capacity() - memory.size()); NVTE_CHECK(available >= N, "Cannot allocate ", N, - " new NVTETensors. Maximum number of tensors reached: ", MAX_TENSOR_NUM, - ". There is probably a memory leak in your application."); + " new NVTETensors. Maximum number of tensors reached: ", MAX_TENSOR_NUM, " (", + TENSOR_HANDLE_POOL_SIZE_MB, + " MiB handle pool). If your application legitimately needs more tensor handles, " + "increase NVTE_TENSOR_HANDLE_POOL_SIZE_MB."); for (size_t i = 0; i < N; ++i) { uintptr_t index; if (!free_list.empty()) { @@ -479,9 +526,11 @@ class TensorAllocator { std::mutex mutex; std::atomic size; - // Allocate at most 20 MB for tensors // Should be replaced by virtual memory allocation - const size_t MAX_TENSOR_NUM = 20 * 1024 * 1024 / sizeof(Tensor); + const size_t TENSOR_HANDLE_POOL_SIZE_MB = + GetTensorHandlePoolSizeMB("NVTE_TENSOR_HANDLE_POOL_SIZE_MB"); + const size_t MAX_TENSOR_NUM = GetTensorHandlePoolCapacity( + TENSOR_HANDLE_POOL_SIZE_MB, sizeof(Tensor), "NVTETensor", "NVTE_TENSOR_HANDLE_POOL_SIZE_MB"); std::vector free_list; std::vector memory; bool debug = false; @@ -532,7 +581,9 @@ class GroupedTensorAllocator { } NVTE_ERROR( "Cannot allocate a new NVTEGroupedTensor. Maximum number of grouped tensors reached: ", - MAX_GROUPED_TENSOR_NUM, ". There is probably a memory leak in your application."); + MAX_GROUPED_TENSOR_NUM, " (", GROUPED_TENSOR_HANDLE_POOL_SIZE_MB, + " MiB handle pool). If your application legitimately needs more grouped tensor handles, " + "increase NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB."); } void Free(NVTEGroupedTensor t) { @@ -564,8 +615,11 @@ class GroupedTensorAllocator { std::mutex mutex; std::atomic size; - // Allocate at most 20 MB for grouped tensors - const size_t MAX_GROUPED_TENSOR_NUM = 20 * 1024 * 1024 / sizeof(GroupedTensor); + const size_t GROUPED_TENSOR_HANDLE_POOL_SIZE_MB = + GetTensorHandlePoolSizeMB("NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB"); + const size_t MAX_GROUPED_TENSOR_NUM = + GetTensorHandlePoolCapacity(GROUPED_TENSOR_HANDLE_POOL_SIZE_MB, sizeof(GroupedTensor), + "NVTEGroupedTensor", "NVTE_GROUPED_TENSOR_HANDLE_POOL_SIZE_MB"); std::vector free_list; std::vector memory; };