Skip to content

Commit f50c956

Browse files
committed
add debug info
1 parent fd98e6c commit f50c956

File tree

1 file changed

+21
-15
lines changed

1 file changed

+21
-15
lines changed

lmdeploy/pytorch/paging/block_trie.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,15 @@
55
import numpy as np
66

77
from lmdeploy.pytorch.messages import SchedulerSequence
8+
from lmdeploy.utils import get_logger, logging_timer
89

910
from ..config import CacheConfig
1011
from .block_manager import BaseBlockManager
1112

1213

14+
logger = get_logger('lmdeploy')
15+
16+
1317
class Node:
1418
"""node of block trie."""
1519

@@ -69,6 +73,7 @@ def get_root(self, adapter_name: str):
6973
self._roots[adapter_name] = Node(-1, -1, None)
7074
return self._roots[adapter_name]
7175

76+
@logging_timer('BlockTrie_Match', logger)
7277
def match(self, seq: SchedulerSequence) -> Dict[int, int]:
7378
"""match sequence and cache."""
7479
copy_map = {}
@@ -79,6 +84,7 @@ def match(self, seq: SchedulerSequence) -> Dict[int, int]:
7984
copy_map = self._match_multimodals(seq)
8085
return copy_map
8186

87+
@logging_timer('BlockTrie_Allocate', logger)
8288
def allocate(self, seq: SchedulerSequence) -> Dict[int, int]:
8389
"""allocate."""
8490
copy_map = {}
@@ -214,20 +220,17 @@ def _match_multimodals(self, seq: SchedulerSequence) -> Dict[int, int]:
214220
last_max_num_matched = curr.num_matched
215221

216222
num_matched = curr.num_matched
217-
print(f'>>> match seq {seq.seq_id} {num_matched} {seq.num_all_ids}')
223+
logger.debug(f'Matching seq-{seq.seq_id} {num_matched}/{seq.num_all_ids}')
218224

219225
def __match_success(node: Node):
220226
nonlocal curr, num_matched, copy_map
221-
print(
222-
f'matched success: seq_id {seq.seq_id} range=({num_matched}, {node.num_matched}), block={node.block} is_full={node.is_full}'
223-
)
227+
logger.debug(f'Matched token range=({num_matched}, {node.num_matched}), block={node.block} is_full={node.is_full}')
224228

225229
if not node.is_full:
226230
# when match an unfull block, need to copy to reuse the kv cache in that block
227231
block = self.allocator.allocate(1, device='gpu').item()
228-
print(
229-
f'matched create new copy block {node.block} -> {block} now free blocks={self.block_manager.get_num_free_gpu_blocks()}'
230-
)
232+
logger.debug(
233+
f'Create new copy block {node.block} -> {block} now free blocks={self.block_manager.get_num_free_gpu_blocks()}')
231234
copy_map[node.block] = block
232235
else:
233236
block = node.block
@@ -256,7 +259,7 @@ def __match_success(node: Node):
256259
curr_tokens = seq.history_cache[num_matched:cur_matched_end]
257260
is_full = len(curr_tokens) == block_size
258261
if (not is_full) and self.block_manager.get_num_free_gpu_blocks() < 1:
259-
print(f'>>> no free gpu blocks seq {seq.seq_id}')
262+
logger.debug(f'No free gpu blocks for seq {seq.seq_id}')
260263
continue
261264
hash_data = tuple(curr_tokens)
262265
if cur_mm_hash_values:
@@ -326,7 +329,7 @@ def _allocate_multimodals(self, seq: SchedulerSequence) -> Dict[int, int]:
326329
num_matched = node.num_matched
327330
num_all_ids = seq.num_all_ids
328331

329-
print(f'>>> allocate seq {seq.seq_id} {num_matched} {seq.num_all_ids}')
332+
logger.debug(f'Allocate seq-{seq.seq_id} {num_matched}/{seq.num_all_ids}')
330333

331334
if len(node.children) == 0 and node.parent is not None:
332335
self.leaves.remove(node)
@@ -355,16 +358,15 @@ def __add_unfull_nodes(parent: Node, multi_segments: List[Tuple[Tuple[str], int]
355358
child.tokens) and block != child.block:
356359
copy_map[child.block] = block
357360
unfull_nodes.append(child)
358-
print(
359-
f'allocate seq {seq.seq_id } num_matched={num_matched} reuse a unfull node block={block} ')
361+
logger.debug(f'allocate num_matched={num_matched} reuse a [unfull] node block={block}')
360362
else:
361363
child = Node(hash_key=hash_key,
362364
block=block,
363365
tokens=curr_tokens,
364366
num_matched=cur_matched_end,
365367
is_full=is_full,
366368
mm_hashes=cur_mm_hash_values)
367-
print(f'allocate seq {seq.seq_id } num_matched={num_matched} add a unfull node block={block} ')
369+
logger.debug(f'allocate num_matched={num_matched} add a [unfull] node block={block} ')
368370
child.parent = parent
369371
blocks.append(child.block)
370372
unfull_nodes.append(child)
@@ -392,15 +394,15 @@ def __add_full_node(node, mm_hash_values):
392394
node = child
393395
free_blocks.append(block)
394396
logical_blocks[block_id] = node.block
395-
print(f'allocate seq {seq.seq_id } num_matched={num_matched} reuse [full] node block={node.block}')
397+
logger.debug(f'allocate num_matched={num_matched} reuse [full] node block={node.block}')
396398
else:
397399
node = Node(hash_key=hash_key,
398400
block=block,
399401
tokens=curr_tokens,
400402
num_matched=num_matched + block_size,
401403
is_full=is_full,
402404
mm_hashes=mm_hash_values)
403-
print(f'allocate seq {seq.seq_id } num_matched={num_matched} add [full] node block={block}')
405+
logger.debug(f'allocate num_matched={num_matched} add [full] node block={block}')
404406
node.parent = parent
405407
blocks.append(node.block)
406408
return node, True
@@ -450,17 +452,20 @@ def __add_full_node(node, mm_hash_values):
450452

451453
return copy_map
452454

455+
@logging_timer('BlockTrie_Evict', logger)
453456
def evict(self, max_num_blocks: int):
454457
"""evict."""
455458
if not self.enable:
456459
return 0
460+
logger.debug(f'Need to evict max_num_blocks={max_num_blocks}')
457461

458462
def __remove_leaf(leaves, evicted_blocks):
459463
_, leaf = heapq.heappop(leaves)
460464
evicted_blocks.append(leaf.block)
461465
parent = leaf.parent
462466
leaf.parent = None
463467
self.leaves.remove(leaf)
468+
logger.debug(f'Evict block={leaf.block} node.mm_hashes={leaf.mm_hashes}')
464469
return parent, leaf
465470

466471
def __add_leaf(leaves, parent):
@@ -497,12 +502,13 @@ def __add_leaf(leaves, parent):
497502
parent.children) == 0 and self.allocator.get_ref_count(parent.block) == 1:
498503
tmp_parent = parent.parent
499504
evicted_blocks.append(parent.block)
505+
logger.debug(f'Evict block={parent.block} node.mm_hashes={parent.mm_hashes}')
500506
parent.parent = None
501507
parent = tmp_parent
502508

503509
if len(parent.children) == 0:
504510
__add_leaf(leaves, parent)
505511

506512
self.allocator.free(np.array(evicted_blocks))
507-
513+
logger.debug(f'Evict final blocks={evicted_blocks} refs={self.allocator.get_ref_count(np.array(evicted_blocks))}')
508514
return len(evicted_blocks)

0 commit comments

Comments
 (0)