You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
2025-04-11T13:41:22.164186Z ERROR warmup{max_input_length=None max_prefill_tokens=4096 max_total_tokens=None max_batch_size=None}:warmup: text_generation_router_v3::client: backends/v3/src/client/mod.rs:45: Server error: BatchPrefillWithPagedKVCacheWrapper.plan() got an unexpected keyword argument 'head_dim'
2025-04-11T13:41:22.164259Z ERROR text_generation_launcher: Method Warmup encountered an error.
Traceback (most recent call last):
File "/app/venv/bin/text-generation-server", line 10, in <module>
sys.exit(app())
File "/app/venv/lib/python3.12/site-packages/typer/main.py", line 323, in __call__
return get_command(self)(*args, **kwargs)
File "/app/venv/lib/python3.12/site-packages/click/core.py", line 1161, in __call__
return self.main(*args, **kwargs)
File "/app/venv/lib/python3.12/site-packages/typer/core.py", line 743, in main
return _main(
File "/app/venv/lib/python3.12/site-packages/typer/core.py", line 198, in _main
rv = self.invoke(ctx)
File "/app/venv/lib/python3.12/site-packages/click/core.py", line 1697, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/app/venv/lib/python3.12/site-packages/click/core.py", line 1443, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/app/venv/lib/python3.12/site-packages/click/core.py", line 788, in invoke
return __callback(*args, **kwargs)
File "/app/venv/lib/python3.12/site-packages/typer/main.py", line 698, in wrapper
return callback(**use_params)
File "/app/venv/lib/python3.12/site-packages/text_generation_server/cli.py", line 119, in serve
server.serve(
File "/app/venv/lib/python3.12/site-packages/text_generation_server/server.py", line 315, in serve
asyncio.run(
File "/usr/lib/python3.12/asyncio/runners.py", line 194, in run
return runner.run(main)
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
File "/usr/lib/python3.12/asyncio/base_events.py", line 674, in run_until_complete
self.run_forever()
File "/usr/lib/python3.12/asyncio/base_events.py", line 641, in run_forever
self._run_once()
File "/usr/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once
handle._run()
File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/app/venv/lib/python3.12/site-packages/grpc_interceptor/server.py", line 165, in invoke_intercept_method
return await self.intercept(
> File "/app/venv/lib/python3.12/site-packages/text_generation_server/interceptor.py", line 24, in intercept
return await response
File "/app/venv/lib/python3.12/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py", line 120, in _unary_interceptor
raise error
File "/app/venv/lib/python3.12/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py", line 111, in _unary_interceptor
return await behavior(request_or_iterator, context)
File "/app/venv/lib/python3.12/site-packages/text_generation_server/server.py", line 144, in Warmup
self.model.warmup(batch, max_input_tokens, max_total_tokens)
File "/app/venv/lib/python3.12/site-packages/text_generation_server/models/flash_causal_lm.py", line 1548, in warmup
_, _batch, _ = self.generate_token(batch)
File "/usr/lib/python3.12/contextlib.py", line 81, in inner
return func(*args, **kwds)
File "/app/venv/lib/python3.12/site-packages/text_generation_server/models/flash_causal_lm.py", line 1928, in generate_token
out, speculative_logits = self.forward(batch, adapter_data)
File "/app/venv/lib/python3.12/site-packages/text_generation_server/models/flash_causal_lm.py", line 1810, in forward
with self._forward_context(
File "/usr/lib/python3.12/contextlib.py", line 137, in __enter__
return next(self.gen)
File "/app/venv/lib/python3.12/site-packages/text_generation_server/layers/attention/flashinfer.py", line 86, in use_prefill_with_paged_kv_state
state.plan(
TypeError: BatchPrefillWithPagedKVCacheWrapper.plan() got an unexpected keyword argument 'head_dim'
The text was updated successfully, but these errors were encountered:
Given this code in text-generation-inference
text-generation-inference/server/text_generation_server/layers/attention/flashinfer.py
Lines 86 to 97 in 9a8d046
and this code from flashinfer
https://github.com/flashinfer-ai/flashinfer/blob/55576c626421b5ee7e7ebe74afd26465c8ae863f/flashinfer/prefill.py#L1164-L1188
I'm getting:
The text was updated successfully, but these errors were encountered: