Skip to content

Commit 19a4032

Browse files
authored
Merge pull request #835 from inclusionAI/aworld_audio
[diffusion]: video_creator -> diffusion_video [audio]: new agent audio_generator, support doubao_tts
2 parents 6875cdd + 34f499e commit 19a4032

File tree

25 files changed

+2327
-93
lines changed

25 files changed

+2327
-93
lines changed

aworld-cli/src/aworld_cli/console.py

Lines changed: 95 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from rich.style import Style
1616
from rich.table import Table
1717
from rich.text import Text
18-
import os
1918

2019
from aworld.logs.util import logger
2120
from ._globals import console
@@ -30,6 +29,7 @@ class AWorldCLI:
3029
def __init__(self):
3130
self.console = console
3231
self.user_input = UserInputHandler(console)
32+
# self.team_handler = InteractiveTeamHandler(console)
3333

3434
def _get_gradient_text(self, text: str, start_color: str, end_color: str) -> Text:
3535
"""Create a Text object with a horizontal gradient."""
@@ -176,13 +176,13 @@ async def _edit_models_config(self, config, current_config: dict):
176176
else:
177177
default_cfg.pop('base_url', None)
178178

179-
# Diffusion (models.diffusion -> DIFFUSION_* for video_creator agent)
180-
self.console.print("\n[bold]Diffusion configuration[/bold] [dim](optional, for video_creator agent)[/dim]")
179+
# Diffusion (models.diffusion -> DIFFUSION_* for diffusion agent)
180+
self.console.print("\n[bold]Diffusion configuration[/bold] [dim](optional, for diffusion agent)[/dim]")
181181
self.console.print(" [dim]Leave empty to use Media LLM or default LLM config above[/dim]\n")
182182
if 'diffusion' not in current_config['models']:
183-
# Migrate from legacy models.video_creator
184-
current_config['models']['diffusion'] = current_config['models'].get('video_creator') or {}
185-
current_config['models'].pop('video_creator', None)
183+
# Migrate from legacy models.diffusion
184+
current_config['models']['diffusion'] = current_config['models'].get('diffusion') or {}
185+
current_config['models'].pop('diffusion', None)
186186
diff_cfg = current_config['models']['diffusion']
187187

188188
current_diff_api_key = diff_cfg.get('api_key', '')
@@ -230,6 +230,58 @@ async def _edit_models_config(self, config, current_config: dict):
230230
if not diff_cfg:
231231
current_config['models'].pop('diffusion', None)
232232

233+
# Audio (models.audio -> AUDIO_* for audio agent)
234+
self.console.print("\n[bold]Audio configuration[/bold] [dim](optional, for audio agent)[/dim]")
235+
self.console.print(" [dim]Leave empty to use Media LLM or default LLM config above[/dim]\n")
236+
if 'audio' not in current_config['models']:
237+
current_config['models']['audio'] = {}
238+
audio_cfg = current_config['models']['audio']
239+
240+
current_audio_api_key = audio_cfg.get('api_key', '')
241+
if current_audio_api_key:
242+
masked = current_audio_api_key[:8] + "..." if len(current_audio_api_key) > 8 else "***"
243+
self.console.print(f" [dim]Current AUDIO_API_KEY: {masked}[/dim]")
244+
audio_api_key = Prompt.ask(" AUDIO_API_KEY", default=current_audio_api_key, password=True)
245+
if audio_api_key:
246+
audio_cfg['api_key'] = audio_api_key
247+
else:
248+
audio_cfg.pop('api_key', None)
249+
250+
current_audio_model = audio_cfg.get('model', '')
251+
self.console.print(" [dim]e.g. claude-3-5-sonnet-20241022 · Enter to inherit from Media/default[/dim]")
252+
audio_model = Prompt.ask(" AUDIO_MODEL_NAME", default=current_audio_model)
253+
if audio_model:
254+
audio_cfg['model'] = audio_model
255+
else:
256+
audio_cfg.pop('model', None)
257+
258+
current_audio_base_url = audio_cfg.get('base_url', '')
259+
audio_base_url = Prompt.ask(" AUDIO_BASE_URL", default=current_audio_base_url)
260+
if audio_base_url:
261+
audio_cfg['base_url'] = audio_base_url
262+
else:
263+
audio_cfg.pop('base_url', None)
264+
265+
current_audio_provider = audio_cfg.get('provider', 'openai')
266+
audio_provider = Prompt.ask(" AUDIO_PROVIDER", default=current_audio_provider)
267+
if audio_provider:
268+
audio_cfg['provider'] = audio_provider
269+
else:
270+
audio_cfg.pop('provider', None)
271+
272+
current_audio_temp = audio_cfg.get('temperature', 0.1)
273+
audio_temp = Prompt.ask(" AUDIO_TEMPERATURE", default=str(current_audio_temp))
274+
if audio_temp:
275+
try:
276+
audio_cfg['temperature'] = float(audio_temp)
277+
except ValueError:
278+
audio_cfg.pop('temperature', None)
279+
else:
280+
audio_cfg.pop('temperature', None)
281+
282+
if not audio_cfg:
283+
current_config['models'].pop('audio', None)
284+
233285
config.save_config(current_config)
234286
self.console.print(f"\n[green]✅ Configuration saved to {config.get_config_path()}[/green]")
235287
table = Table(title="Default LLM Configuration", box=box.ROUNDED)
@@ -258,6 +310,19 @@ async def _edit_models_config(self, config, current_config: dict):
258310
self.console.print()
259311
self.console.print(diff_table)
260312

313+
if current_config['models'].get('audio'):
314+
audio_table = Table(title="Audio Configuration (AUDIO_*)", box=box.ROUNDED)
315+
audio_table.add_column("Setting", style="cyan")
316+
audio_table.add_column("Value", style="green")
317+
for key, value in current_config['models']['audio'].items():
318+
if key == 'api_key':
319+
masked_value = value[:8] + "..." if len(str(value)) > 8 else "***"
320+
audio_table.add_row(key, masked_value)
321+
else:
322+
audio_table.add_row(key, str(value))
323+
self.console.print()
324+
self.console.print(audio_table)
325+
261326
async def _edit_skills_config(self, config, current_config: dict):
262327
"""Edit skills section of config (global SKILLS_PATH and per-agent XXX_SKILLS_PATH)."""
263328
default_skills_path = str(Path.home() / ".aworld" / "skills")
@@ -905,6 +970,7 @@ async def run_chat_session(self, agent_name: str, executor: Callable[[str], Any]
905970
f"Type '/agents' to list all available agents.\n"
906971
f"Type '/cost' for current session, '/cost -all' for global history.\n"
907972
f"Type '/compact' to run context compression.\n"
973+
f"Type '/team' for agent team management.\n"
908974
f"Type '/memory' to edit project context, '/memory view' to view, '/memory status' for status.\n"
909975
f"Use @filename to include images or text files (e.g., @photo.jpg or @document.txt)."
910976
)
@@ -921,6 +987,7 @@ async def run_chat_session(self, agent_name: str, executor: Callable[[str], Any]
921987
slash_cmds = [
922988
"/agents", "/skills", "/new", "/restore", "/latest",
923989
"/exit", "/quit", "/switch", "/cost", "/cost -all", "/compact",
990+
"/team",
924991
"/memory", "/memory view", "/memory reload", "/memory status",
925992
]
926993
switch_with_agents = [f"/switch {n}" for n in agent_names] if agent_names else []
@@ -941,6 +1008,7 @@ async def run_chat_session(self, agent_name: str, executor: Callable[[str], Any]
9411008
"/memory view": "View current memory content",
9421009
"/memory reload": "Reload memory from file",
9431010
"/memory status": "Show memory system status",
1011+
"/team": "Agent team management commands",
9441012
"exit": "Exit chat",
9451013
"quit": "Exit chat",
9461014
}
@@ -1178,12 +1246,12 @@ async def run_chat_session(self, agent_name: str, executor: Callable[[str], Any]
11781246
try:
11791247
parts = user_input.split(maxsplit=1)
11801248
subcommand = parts[1] if len(parts) > 1 else ""
1181-
1249+
11821250
# Import required modules
11831251
import os
11841252
from pathlib import Path
11851253
import subprocess
1186-
1254+
11871255
# Find AWORLD.md file
11881256
def find_aworld_file():
11891257
"""Find AWORLD.md in standard locations"""
@@ -1197,11 +1265,11 @@ def find_aworld_file():
11971265
if path.exists():
11981266
return path
11991267
return None
1200-
1268+
12011269
def get_editor():
12021270
"""Get editor from environment variables"""
12031271
return os.environ.get('VISUAL') or os.environ.get('EDITOR') or 'nano'
1204-
1272+
12051273
if subcommand == "view":
12061274
# View current memory content
12071275
aworld_file = find_aworld_file()
@@ -1216,20 +1284,20 @@ def get_editor():
12161284
from rich.syntax import Syntax
12171285
syntax = Syntax(content, "markdown", theme="monokai", line_numbers=False)
12181286
self.console.print(Panel(syntax, title="AWORLD.md", border_style="cyan"))
1219-
1287+
12201288
elif subcommand == "reload":
12211289
# Reload memory from file
12221290
self.console.print("[dim]Memory reload functionality requires agent restart.[/dim]")
12231291
self.console.print("[dim]The AWORLD.md file will be automatically loaded on next agent start.[/dim]")
1224-
1292+
12251293
elif subcommand == "status":
12261294
# Show memory system status
12271295
aworld_file = find_aworld_file()
1228-
from rich.table import Table
1296+
# Use global Table import (line 16) instead of local import
12291297
table = Table(title="Memory System Status", box=box.ROUNDED)
12301298
table.add_column("Property", style="cyan")
12311299
table.add_column("Value", style="green")
1232-
1300+
12331301
if aworld_file:
12341302
table.add_row("AWORLD.md Location", str(aworld_file))
12351303
table.add_row("File Size", f"{aworld_file.stat().st_size} bytes")
@@ -1240,25 +1308,25 @@ def get_editor():
12401308
else:
12411309
table.add_row("AWORLD.md Location", "Not found")
12421310
table.add_row("Status", "❌ Not configured")
1243-
1311+
12441312
table.add_row("Feature", "AWORLDFileNeuron")
12451313
table.add_row("Auto-load", "Enabled")
12461314
self.console.print(table)
1247-
1315+
12481316
else:
12491317
# Edit AWORLD.md (default action)
12501318
aworld_file = find_aworld_file()
1251-
1319+
12521320
if not aworld_file:
12531321
# Create new file in user directory (DEFAULT)
12541322
default_location = Path.home() / '.aworld' / 'AWORLD.md'
12551323
self.console.print(f"[yellow]No AWORLD.md found. Creating new file at:[/yellow]")
12561324
self.console.print(f"[cyan]{default_location}[/cyan]")
12571325
self.console.print(f"[dim](Default: ~/.aworld/AWORLD.md)[/dim]\n")
1258-
1326+
12591327
# Create directory if needed
12601328
default_location.parent.mkdir(parents=True, exist_ok=True)
1261-
1329+
12621330
# Create template
12631331
template = """# Project Context
12641332
@@ -1283,11 +1351,11 @@ def get_editor():
12831351
"""
12841352
default_location.write_text(template, encoding='utf-8')
12851353
aworld_file = default_location
1286-
1354+
12871355
# Open in editor
12881356
editor = get_editor()
12891357
self.console.print(f"[dim]Opening {aworld_file} in {editor}...[/dim]")
1290-
1358+
12911359
try:
12921360
# Open editor and wait for it to close
12931361
result = subprocess.run([editor, str(aworld_file)])
@@ -1301,13 +1369,18 @@ def get_editor():
13011369
self.console.print("[dim]Set EDITOR or VISUAL environment variable to your preferred editor.[/dim]")
13021370
except Exception as e:
13031371
self.console.print(f"[red]Error opening editor: {e}[/red]")
1304-
1372+
13051373
except Exception as e:
13061374
self.console.print(f"[red]Error handling memory command: {e}[/red]")
13071375
import traceback
13081376
traceback.print_exc()
13091377
continue
13101378

1379+
# Handle team command
1380+
if user_input.lower().startswith("/team"):
1381+
# await self.team_handler.handle_command(user_input)
1382+
continue
1383+
13111384
# Handle agents command
13121385
if user_input.lower() in ("/agents", "agents"):
13131386
try:

aworld-cli/src/aworld_cli/core/config.py

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,9 +281,9 @@ def _apply_filesystem_config(filesystem_cfg: Optional[Dict[str, Any]] = None) ->
281281

282282
def _apply_diffusion_models_config(models_config: Dict[str, Any]) -> None:
283283
"""
284-
Apply models.diffusion config to DIFFUSION_* env vars for video_creator agent.
284+
Apply models.diffusion config to DIFFUSION_* env vars for diffusion agent.
285285
Priority: models.diffusion config > existing DIFFUSION_* env vars > LLM_*.
286-
Supports models.video_creator for backwards compatibility.
286+
Supports models.diffusion for backwards compatibility.
287287
"""
288288
diff_cfg = models_config.get('diffusion')
289289
diff_cfg = diff_cfg if isinstance(diff_cfg, dict) else {}
@@ -345,6 +345,69 @@ def _apply_diffusion_models_config(models_config: Dict[str, Any]) -> None:
345345
os.environ['DIFFUSION_TEMPERATURE'] = str(float(temperature))
346346

347347

348+
def _apply_audio_models_config(models_config: Dict[str, Any]) -> None:
349+
"""
350+
Apply models.audio config to AUDIO_* env vars for audio agent.
351+
Priority: models.audio config > existing AUDIO_* env vars > LLM_*.
352+
"""
353+
audio_cfg = models_config.get('audio')
354+
audio_cfg = audio_cfg if isinstance(audio_cfg, dict) else {}
355+
api_key = (audio_cfg.get('api_key') or '').strip()
356+
model_name = (audio_cfg.get('model') or '').strip()
357+
base_url = (audio_cfg.get('base_url') or '').strip()
358+
provider = (audio_cfg.get('provider') or '').strip()
359+
temperature = audio_cfg.get('temperature')
360+
361+
if not api_key:
362+
api_key = (os.environ.get('AUDIO_API_KEY') or '').strip()
363+
if not api_key:
364+
api_key = (os.environ.get('LLM_API_KEY') or '').strip()
365+
if not api_key:
366+
for key in ('OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'GEMINI_API_KEY'):
367+
v = (os.environ.get(key) or '').strip()
368+
if v:
369+
api_key = v
370+
if not provider and 'OPENAI' in key:
371+
provider = 'openai'
372+
elif not provider and 'ANTHROPIC' in key:
373+
provider = 'anthropic'
374+
elif not provider and 'GEMINI' in key:
375+
provider = 'gemini'
376+
break
377+
if not model_name:
378+
model_name = (os.environ.get('AUDIO_MODEL_NAME') or '').strip()
379+
if not model_name:
380+
model_name = (os.environ.get('LLM_MODEL_NAME') or '').strip()
381+
if not base_url:
382+
base_url = (os.environ.get('AUDIO_BASE_URL') or '').strip()
383+
if not base_url:
384+
base_url = (os.environ.get('LLM_BASE_URL') or '').strip()
385+
if not base_url:
386+
for key in ('OPENAI_BASE_URL', 'ANTHROPIC_BASE_URL', 'GEMINI_BASE_URL'):
387+
v = (os.environ.get(key) or '').strip()
388+
if v:
389+
base_url = v
390+
break
391+
if not provider:
392+
provider = (os.environ.get('AUDIO_PROVIDER') or '').strip()
393+
if not provider:
394+
provider = 'openai'
395+
if temperature is None:
396+
env_temp = (os.environ.get('AUDIO_TEMPERATURE') or '').strip()
397+
if env_temp:
398+
temperature = float(env_temp)
399+
400+
if api_key:
401+
os.environ['AUDIO_API_KEY'] = api_key
402+
if model_name:
403+
os.environ['AUDIO_MODEL_NAME'] = model_name
404+
if base_url:
405+
os.environ['AUDIO_BASE_URL'] = base_url
406+
os.environ['AUDIO_PROVIDER'] = provider
407+
if temperature is not None:
408+
os.environ['AUDIO_TEMPERATURE'] = str(float(temperature))
409+
410+
348411
def _apply_models_config_to_env(models_config: Dict[str, Any]) -> None:
349412
"""
350413
Apply models config (api_key, model, base_url) to os.environ.
@@ -381,6 +444,7 @@ def _apply_models_config_to_env(models_config: Dict[str, Any]) -> None:
381444
if base_url:
382445
os.environ['LLM_BASE_URL'] = base_url
383446
_apply_diffusion_models_config(models_config)
447+
_apply_audio_models_config(models_config)
384448
return
385449
# Legacy: nested models.default.{provider} or models.{provider}
386450
default_providers = {k: v for k, v in default_cfg.items()
@@ -422,6 +486,7 @@ def _apply_models_config_to_env(models_config: Dict[str, Any]) -> None:
422486
os.environ['LLM_BASE_URL'] = base_url
423487

424488
_apply_diffusion_models_config(models_config)
489+
_apply_audio_models_config(models_config)
425490

426491

427492
def _load_from_local_env(source_path: str) -> tuple[Dict[str, Any], str, str]:
@@ -439,6 +504,7 @@ def _load_from_local_env(source_path: str) -> tuple[Dict[str, Any], str, str]:
439504
})
440505
# Apply DIFFUSION_* from LLM_* when not set in .env
441506
_apply_diffusion_models_config({})
507+
_apply_audio_models_config({})
442508
logger.info(f"[config] load_dotenv loaded from: {source_path} {os.environ.get('LLM_MODEL_NAME')} {os.environ.get('LLM_BASE_URL')}")
443509
return _env_to_config(), "local", source_path
444510

aworld-cli/src/aworld_cli/core/context.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def check_session_token_limit(
8282

8383
history = JSONLHistory(str(history_path))
8484
stats = history.get_token_stats(session_id=session_id)
85+
logger.info(f"check_session_token_limit|agent_name={agent_name}|session_id={session_id}|limit={limit}|stats={stats}")
8586

8687
# Use current agent's context_window_tokens (ctx) when agent_name provided
8788
if agent_name:
@@ -90,7 +91,7 @@ def check_session_token_limit(
9091
total = (
9192
agent_stats.get("context_window_tokens", 0)
9293
if agent_stats
93-
else stats.get("total_tokens", 0)
94+
else 0
9495
)
9596
else:
9697
total = stats.get("total_tokens", 0)

aworld-cli/src/aworld_cli/history.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ def get_token_stats(self, session_id: Optional[str] = None) -> Dict:
378378

379379
return stats
380380

381-
def format_cost_display(self, session_id: Optional[str] = None, limit: int = 10) -> str:
381+
def format_cost_display(self, session_id: Optional[str] = None, limit: int = 20) -> str:
382382
"""
383383
Format token cost statistics for display.
384384

aworld-cli/src/aworld_cli/inner_plugins/smllc/agents/video_creator/__init__.py renamed to aworld-cli/src/aworld_cli/inner_plugins/smllc/agents/audio/__init__.py

File renamed without changes.

0 commit comments

Comments
 (0)