From 23fc8efeb02f14699052dad338452a464c24da9d Mon Sep 17 00:00:00 2001 From: kevinWangSheng Date: Thu, 9 Apr 2026 00:27:41 -0700 Subject: [PATCH] fix(hooks): extract text from multimodal input for UserPromptSubmit prompt field When user_input is a list of ContentPart (multimodal input with images, videos, etc.), the hook payload's prompt field was always set to an empty string. Now we join the text from all TextPart items in the list so hook scripts receive the actual user text. Closes #1779 --- src/kimi_cli/soul/kimisoul.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/kimi_cli/soul/kimisoul.py b/src/kimi_cli/soul/kimisoul.py index 9365c11b9..a60e38f8c 100644 --- a/src/kimi_cli/soul/kimisoul.py +++ b/src/kimi_cli/soul/kimisoul.py @@ -480,7 +480,12 @@ async def run(self, user_input: str | list[ContentPart]): set_session_id(self._runtime.session.id) # --- UserPromptSubmit hook --- - text_input_for_hook = user_input if isinstance(user_input, str) else "" + if isinstance(user_input, str): + text_input_for_hook = user_input + else: + text_input_for_hook = "\n".join( + part.text for part in user_input if hasattr(part, "text") + ) from kimi_cli.hooks import events hook_results = await self._hook_engine.trigger(