-
Notifications
You must be signed in to change notification settings - Fork 1k
fix: Use LLM to generate unique scene prompts for video extensions #318
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
| Grok video generation service. | ||
| """ | ||
|
|
||
| import aiohttp | ||
| import asyncio | ||
| import math | ||
| import re | ||
|
|
@@ -40,6 +41,56 @@ | |
| _POST_ID_URL_PATTERN = r"/generated/([0-9a-fA-F-]{32,36})/" | ||
|
|
||
|
|
||
| async def _generate_scene_prompts_llm(original_prompt: str, num_scenes: int) -> List[str]: | ||
| """Use local Grok API to generate unique scene prompts for each video round.""" | ||
|
|
||
| num_scenes = min(num_scenes, 6) | ||
|
|
||
| system_msg = f"""Break this video concept into {num_scenes} sequential scenes that flow naturally. | ||
|
|
||
| Original concept: "{original_prompt}" | ||
|
|
||
| CRITICAL RULES: | ||
| - Each scene MUST be DIFFERENT (no repetition) | ||
| - Scenes progress naturally like a story | ||
| - Smooth transitions between scenes | ||
| - Each scene continues from the previous one | ||
| - Output ONLY a JSON array: ["scene 1 description", "scene 2 description", ...] | ||
|
|
||
| Generate exactly {num_scenes} unique scenes:""" | ||
|
|
||
| try: | ||
| async with aiohttp.ClientSession() as session: | ||
| async with session.post( | ||
| "http://localhost:8000/v1/chat/completions", | ||
| headers={"Content-Type": "application/json"}, | ||
| json={ | ||
| "model": "grok-4.1-fast", | ||
| "messages": [{"role": "user", "content": system_msg}], | ||
| "temperature": 0.8, | ||
| "max_tokens": 2000 | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add |
||
| }, | ||
| timeout=aiohttp.ClientTimeout(total=30) | ||
| ) as resp: | ||
| if resp.status == 200: | ||
| data = await resp.json() | ||
| content = data.get("choices", [{}])[0].get("message", {}).get("content", "") | ||
| match = re.search(r'\[[\s\S]*?\]', content) | ||
| if match: | ||
| scenes = orjson.loads(match.group(0)) | ||
| if isinstance(scenes, list) and len(scenes) >= num_scenes: | ||
| logger.info(f"Generated {len(scenes)} unique scene prompts via LLM") | ||
| return [str(s) for s in scenes[:num_scenes]] | ||
| else: | ||
| logger.warning(f"LLM API error {resp.status}") | ||
| except Exception as e: | ||
| logger.warning(f"LLM scene generation failed: {e}") | ||
|
|
||
| # Fallback to original prompt with scene numbers | ||
| logger.info(f"Using fallback scene prompts") | ||
| return [f"{original_prompt} (scene {i+1}/{num_scenes})" for i in range(num_scenes)] | ||
|
|
||
|
|
||
| @dataclass(frozen=True) | ||
| class VideoRoundPlan: | ||
| round_index: int | ||
|
|
@@ -811,6 +862,10 @@ async def completions( | |
| round_plan = _build_round_plan(target_length, is_super=is_super_pool) | ||
| total_rounds = len(round_plan) | ||
|
|
||
| # Generate unique scene prompts for each round using LLM | ||
| scene_prompts = await _generate_scene_prompts_llm(prompt, total_rounds) | ||
| logger.info(f"Using {len(scene_prompts)} scene prompts for {total_rounds} rounds") | ||
|
|
||
| service = VideoService() | ||
| message = _build_message(prompt, preset) | ||
|
|
||
|
|
@@ -849,18 +904,22 @@ async def _run_round_collect( | |
| original_id: Optional[str], | ||
| source: str, | ||
| ) -> VideoRoundResult: | ||
| # Use unique scene prompt for this round | ||
| round_prompt = scene_prompts[plan.round_index - 1] if plan.round_index <= len(scene_prompts) else prompt | ||
| round_message = _build_message(round_prompt, preset) | ||
|
|
||
| config_override = _build_round_config( | ||
| plan, | ||
| seed_post_id=seed_id, | ||
| last_post_id=last_id, | ||
| original_post_id=original_id, | ||
| prompt=prompt, | ||
| prompt=round_prompt, | ||
| aspect_ratio=aspect_ratio, | ||
| resolution_name=generation_resolution, | ||
| ) | ||
| response = await _request_round_stream( | ||
| token=token, | ||
| message=message, | ||
| message=round_message, | ||
| model_config_override=config_override, | ||
| ) | ||
| return await _collect_round_result(response, model=model, source=source) | ||
|
|
@@ -874,18 +933,22 @@ async def _stream_chain() -> AsyncGenerator[str, None]: | |
|
|
||
| try: | ||
| for plan in round_plan: | ||
| # Use unique scene prompt for this round | ||
| round_prompt = scene_prompts[plan.round_index - 1] if plan.round_index <= len(scene_prompts) else prompt | ||
| round_message = _build_message(round_prompt, preset) | ||
|
|
||
| config_override = _build_round_config( | ||
| plan, | ||
| seed_post_id=seed_id, | ||
| last_post_id=last_id, | ||
| original_post_id=original_id, | ||
| prompt=prompt, | ||
| prompt=round_prompt, | ||
| aspect_ratio=aspect_ratio, | ||
| resolution_name=generation_resolution, | ||
| ) | ||
| response = await _request_round_stream( | ||
| token=token, | ||
| message=message, | ||
| message=round_message, | ||
| model_config_override=config_override, | ||
| ) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,10 +2,13 @@ | |
| Direct video extension service (app-chat based). | ||
| """ | ||
|
|
||
| import aiohttp | ||
| import re | ||
| import time | ||
| import uuid | ||
| from typing import Any, Dict, Optional | ||
| from typing import Any, Dict, List, Optional | ||
|
|
||
| import orjson | ||
|
|
||
| from app.core.exceptions import AppException, ErrorType, UpstreamException, ValidationException | ||
| from app.core.logger import logger | ||
|
|
@@ -18,6 +21,51 @@ | |
|
|
||
| VIDEO_MODEL_ID = "grok-imagine-1.0-video" | ||
|
|
||
|
|
||
| async def _generate_scene_prompt_for_extend(original_prompt: str, current_scene: int, total_scenes: int) -> str: | ||
| """Use local Grok API to generate unique scene prompt for video extension.""" | ||
|
|
||
| system_msg = f"""Continue this video concept with scene {current_scene} of {total_scenes}. | ||
| Original concept: "{original_prompt}" | ||
| CRITICAL RULES: | ||
| - Scene MUST continue from previous scene | ||
| - Natural progression, NO repetition | ||
| - Different angle/action from previous | ||
| - Output ONLY the scene description (no JSON, no quotes) | ||
| Generate scene {current_scene}:""" | ||
|
|
||
| try: | ||
| async with aiohttp.ClientSession() as session: | ||
| async with session.post( | ||
| "http://localhost:8000/v1/chat/completions", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need api_key |
||
| headers={"Content-Type": "application/json"}, | ||
| json={ | ||
| "model": "grok-4.1-fast", | ||
| "messages": [{"role": "user", "content": system_msg}], | ||
| "temperature": 0.8, | ||
| "max_tokens": 300 | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. stream:False |
||
| }, | ||
| timeout=aiohttp.ClientTimeout(total=20) | ||
| ) as resp: | ||
| if resp.status == 200: | ||
| data = await resp.json() | ||
| content = data.get("choices", [{}])[0].get("message", {}).get("content", "").strip() | ||
| if content: | ||
| logger.info(f"Generated extend scene {current_scene}/{total_scenes} via LLM") | ||
| return content | ||
| else: | ||
| logger.warning(f"LLM API error {resp.status}") | ||
| except Exception as e: | ||
| logger.warning(f"LLM extend scene generation failed: {e}") | ||
|
|
||
| # Fallback | ||
| logger.info(f"Using fallback extend prompt") | ||
| return f"{original_prompt} (continuation {current_scene}/{total_scenes})" | ||
|
|
||
|
|
||
| _RATIO_MAP = { | ||
| "1280x720": "16:9", | ||
| "720x1280": "9:16", | ||
|
|
@@ -120,6 +168,14 @@ async def extend( | |
| ) | ||
| resolution_name = _normalize_resolution(resolution) | ||
|
|
||
| # LLM ile extend için sahne promptu oluştur | ||
| current_scene = int(start_time / 6) + 2 # +2 çünkü base video 1. sahne | ||
| total_scenes = 6 # Max 6 sahne (30 saniye) | ||
|
|
||
| logger.info(f"🎬 EXTEND: start_time={start_time}s → scene {current_scene}/{total_scenes}") | ||
| extend_prompt = await _generate_scene_prompt_for_extend(prompt, current_scene, total_scenes) | ||
| logger.info(f"🎥 EXTEND scene {current_scene}: {extend_prompt[:150]}...") | ||
|
|
||
| token_mgr = await get_token_manager() | ||
| await token_mgr.reload_if_stale() | ||
|
|
||
|
|
@@ -147,7 +203,7 @@ async def extend( | |
| "videoExtensionStartTime": float(start_time), | ||
| "extendPostId": reference_id, | ||
| "stitchWithExtendPostId": True, | ||
| "originalPrompt": prompt, | ||
| "originalPrompt": extend_prompt, | ||
| "originalPostId": reference_id, | ||
| "originalRefType": "ORIGINAL_REF_TYPE_VIDEO_EXTENSION", | ||
| "mode": "custom", | ||
|
|
@@ -165,7 +221,7 @@ async def extend( | |
| response = await AppChatReverse.request( | ||
| session, | ||
| token, | ||
| message=f"{prompt} --mode=custom", | ||
| message=f"{extend_prompt} --mode=custom", | ||
| model="grok-3", | ||
| tool_overrides={"videoGen": True}, | ||
| model_config_override=model_config_override, | ||
|
|
@@ -200,7 +256,7 @@ async def extend( | |
| "created_at": now, | ||
| "completed_at": now, | ||
| "status": "completed", | ||
| "prompt": prompt, | ||
| "prompt": extend_prompt, | ||
| "reference_id": reference_id, | ||
| "start_time": float(start_time), | ||
| "ratio": aspect_ratio, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This api need api_key ;