From 744c96d432c7462f71b156177bb1e841083d076b Mon Sep 17 00:00:00 2001 From: -LAN- Date: Tue, 31 Dec 2024 02:17:37 +0800 Subject: [PATCH 1/2] feat: implement asynchronous token counting in GPT2Tokenizer Signed-off-by: -LAN- --- .../model_providers/__base/tokenizers/gpt2_tokenzier.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py index 6dab0aaf2d41e7..40f0f50755eeba 100644 --- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py +++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py @@ -1,9 +1,12 @@ +from concurrent.futures import ProcessPoolExecutor from os.path import abspath, dirname, join from threading import Lock from typing import Any from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer # type: ignore +_executor = ProcessPoolExecutor(max_workers=1) + _tokenizer: Any = None _lock = Lock() @@ -20,7 +23,8 @@ def _get_num_tokens_by_gpt2(text: str) -> int: @staticmethod def get_num_tokens(text: str) -> int: - return GPT2Tokenizer._get_num_tokens_by_gpt2(text) + future = _executor.submit(GPT2Tokenizer._get_num_tokens_by_gpt2, text) + return future.result() @staticmethod def get_encoder() -> Any: From e3763ca7011684c3010cf876b3f544631d5e4760 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Tue, 31 Dec 2024 15:16:52 +0800 Subject: [PATCH 2/2] feat: replace ProcessPoolExecutor with gevent threadpool for token counting Signed-off-by: -LAN- --- .../__base/tokenizers/gpt2_tokenzier.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py index 40f0f50755eeba..ab45a9580338b6 100644 --- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py +++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py @@ -1,14 +1,13 @@ -from concurrent.futures import ProcessPoolExecutor from os.path import abspath, dirname, join from threading import Lock -from typing import Any +from typing import Any, cast +import gevent.threadpool # type: ignore from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer # type: ignore -_executor = ProcessPoolExecutor(max_workers=1) - _tokenizer: Any = None _lock = Lock() +_pool = gevent.threadpool.ThreadPool(1) class GPT2Tokenizer: @@ -23,8 +22,9 @@ def _get_num_tokens_by_gpt2(text: str) -> int: @staticmethod def get_num_tokens(text: str) -> int: - future = _executor.submit(GPT2Tokenizer._get_num_tokens_by_gpt2, text) - return future.result() + future = _pool.spawn(GPT2Tokenizer._get_num_tokens_by_gpt2, text) + result = future.get(block=True) + return cast(int, result) @staticmethod def get_encoder() -> Any: