55import numpy as np
66
77from lmdeploy .pytorch .messages import SchedulerSequence
8+ from lmdeploy .utils import get_logger , logging_timer
89
910from ..config import CacheConfig
1011from .block_manager import BaseBlockManager
1112
1213
14+ logger = get_logger ('lmdeploy' )
15+
16+
1317class Node :
1418 """node of block trie."""
1519
@@ -69,6 +73,7 @@ def get_root(self, adapter_name: str):
6973 self ._roots [adapter_name ] = Node (- 1 , - 1 , None )
7074 return self ._roots [adapter_name ]
7175
76+ @logging_timer ('BlockTrie_Match' , logger )
7277 def match (self , seq : SchedulerSequence ) -> Dict [int , int ]:
7378 """match sequence and cache."""
7479 copy_map = {}
@@ -79,6 +84,7 @@ def match(self, seq: SchedulerSequence) -> Dict[int, int]:
7984 copy_map = self ._match_multimodals (seq )
8085 return copy_map
8186
87+ @logging_timer ('BlockTrie_Allocate' , logger )
8288 def allocate (self , seq : SchedulerSequence ) -> Dict [int , int ]:
8389 """allocate."""
8490 copy_map = {}
@@ -214,20 +220,17 @@ def _match_multimodals(self, seq: SchedulerSequence) -> Dict[int, int]:
214220 last_max_num_matched = curr .num_matched
215221
216222 num_matched = curr .num_matched
217- print (f'>>> match seq { seq .seq_id } { num_matched } { seq .num_all_ids } ' )
223+ logger . debug (f'Matching seq- { seq .seq_id } { num_matched } / { seq .num_all_ids } ' )
218224
219225 def __match_success (node : Node ):
220226 nonlocal curr , num_matched , copy_map
221- print (
222- f'matched success: seq_id { seq .seq_id } range=({ num_matched } , { node .num_matched } ), block={ node .block } is_full={ node .is_full } '
223- )
227+ logger .debug (f'Matched token range=({ num_matched } , { node .num_matched } ), block={ node .block } is_full={ node .is_full } ' )
224228
225229 if not node .is_full :
226230 # when match an unfull block, need to copy to reuse the kv cache in that block
227231 block = self .allocator .allocate (1 , device = 'gpu' ).item ()
228- print (
229- f'matched create new copy block { node .block } -> { block } now free blocks={ self .block_manager .get_num_free_gpu_blocks ()} '
230- )
232+ logger .debug (
233+ f'Create new copy block { node .block } -> { block } now free blocks={ self .block_manager .get_num_free_gpu_blocks ()} ' )
231234 copy_map [node .block ] = block
232235 else :
233236 block = node .block
@@ -256,7 +259,7 @@ def __match_success(node: Node):
256259 curr_tokens = seq .history_cache [num_matched :cur_matched_end ]
257260 is_full = len (curr_tokens ) == block_size
258261 if (not is_full ) and self .block_manager .get_num_free_gpu_blocks () < 1 :
259- print (f'>>> no free gpu blocks seq { seq .seq_id } ' )
262+ logger . debug (f'No free gpu blocks for seq { seq .seq_id } ' )
260263 continue
261264 hash_data = tuple (curr_tokens )
262265 if cur_mm_hash_values :
@@ -326,7 +329,7 @@ def _allocate_multimodals(self, seq: SchedulerSequence) -> Dict[int, int]:
326329 num_matched = node .num_matched
327330 num_all_ids = seq .num_all_ids
328331
329- print (f'>>> allocate seq { seq .seq_id } { num_matched } { seq .num_all_ids } ' )
332+ logger . debug (f'Allocate seq- { seq .seq_id } { num_matched } / { seq .num_all_ids } ' )
330333
331334 if len (node .children ) == 0 and node .parent is not None :
332335 self .leaves .remove (node )
@@ -355,16 +358,15 @@ def __add_unfull_nodes(parent: Node, multi_segments: List[Tuple[Tuple[str], int]
355358 child .tokens ) and block != child .block :
356359 copy_map [child .block ] = block
357360 unfull_nodes .append (child )
358- print (
359- f'allocate seq { seq .seq_id } num_matched={ num_matched } reuse a unfull node block={ block } ' )
361+ logger .debug (f'allocate num_matched={ num_matched } reuse a [unfull] node block={ block } ' )
360362 else :
361363 child = Node (hash_key = hash_key ,
362364 block = block ,
363365 tokens = curr_tokens ,
364366 num_matched = cur_matched_end ,
365367 is_full = is_full ,
366368 mm_hashes = cur_mm_hash_values )
367- print (f'allocate seq { seq . seq_id } num_matched={ num_matched } add a unfull node block={ block } ' )
369+ logger . debug (f'allocate num_matched={ num_matched } add a [ unfull] node block={ block } ' )
368370 child .parent = parent
369371 blocks .append (child .block )
370372 unfull_nodes .append (child )
@@ -392,15 +394,15 @@ def __add_full_node(node, mm_hash_values):
392394 node = child
393395 free_blocks .append (block )
394396 logical_blocks [block_id ] = node .block
395- print (f'allocate seq { seq . seq_id } num_matched={ num_matched } reuse [full] node block={ node .block } ' )
397+ logger . debug (f'allocate num_matched={ num_matched } reuse [full] node block={ node .block } ' )
396398 else :
397399 node = Node (hash_key = hash_key ,
398400 block = block ,
399401 tokens = curr_tokens ,
400402 num_matched = num_matched + block_size ,
401403 is_full = is_full ,
402404 mm_hashes = mm_hash_values )
403- print (f'allocate seq { seq . seq_id } num_matched={ num_matched } add [full] node block={ block } ' )
405+ logger . debug (f'allocate num_matched={ num_matched } add [full] node block={ block } ' )
404406 node .parent = parent
405407 blocks .append (node .block )
406408 return node , True
@@ -450,17 +452,20 @@ def __add_full_node(node, mm_hash_values):
450452
451453 return copy_map
452454
455+ @logging_timer ('BlockTrie_Evict' , logger )
453456 def evict (self , max_num_blocks : int ):
454457 """evict."""
455458 if not self .enable :
456459 return 0
460+ logger .debug (f'Need to evict max_num_blocks={ max_num_blocks } ' )
457461
458462 def __remove_leaf (leaves , evicted_blocks ):
459463 _ , leaf = heapq .heappop (leaves )
460464 evicted_blocks .append (leaf .block )
461465 parent = leaf .parent
462466 leaf .parent = None
463467 self .leaves .remove (leaf )
468+ logger .debug (f'Evict block={ leaf .block } node.mm_hashes={ leaf .mm_hashes } ' )
464469 return parent , leaf
465470
466471 def __add_leaf (leaves , parent ):
@@ -497,12 +502,13 @@ def __add_leaf(leaves, parent):
497502 parent .children ) == 0 and self .allocator .get_ref_count (parent .block ) == 1 :
498503 tmp_parent = parent .parent
499504 evicted_blocks .append (parent .block )
505+ logger .debug (f'Evict block={ parent .block } node.mm_hashes={ parent .mm_hashes } ' )
500506 parent .parent = None
501507 parent = tmp_parent
502508
503509 if len (parent .children ) == 0 :
504510 __add_leaf (leaves , parent )
505511
506512 self .allocator .free (np .array (evicted_blocks ))
507-
513+ logger . debug ( f'Evict final blocks= { evicted_blocks } refs= { self . allocator . get_ref_count ( np . array ( evicted_blocks )) } ' )
508514 return len (evicted_blocks )
0 commit comments