diff --git a/.coveragerc b/.coveragerc
deleted file mode 100644
index 39c8d23373..0000000000
--- a/.coveragerc
+++ /dev/null
@@ -1,5 +0,0 @@
-[html]
-directory = coverage
-
-[run]
-data_file = /workspace/report/$COMMIT_ID/cov-temp-flagscale/.coverage_$LOCAL_RANK
diff --git a/.github/workflows/unit_tests_common.yml b/.github/workflows/unit_tests_common.yml
index 9a7de91fb9..ac70b72bd6 100644
--- a/.github/workflows/unit_tests_common.yml
+++ b/.github/workflows/unit_tests_common.yml
@@ -162,6 +162,7 @@ jobs:
             ${INSTALL_DIR:+--install-dir "$INSTALL_DIR"} \
             --no-system --no-dev --no-base --no-task \
             --src-deps megatron-lm \
+            --pip-deps typer \
             --retry-count 3
 
           # Copy test data (keep existing logic)
diff --git a/__init__.py b/__init__.py
index a5830884fc..e69de29bb2 100644
--- a/__init__.py
+++ b/__init__.py
@@ -1,11 +0,0 @@
-import os
-import sys
-
-# Add parent directory to path to import version
-_parent_dir = os.path.dirname(os.path.abspath(__file__))
-if _parent_dir not in sys.path:
-    sys.path.insert(0, _parent_dir)
-
-from version import FLAGSCALE_VERSION
-
-__version__ = FLAGSCALE_VERSION
diff --git a/flagscale/__init__.py b/flagscale/__init__.py
index e69de29bb2..85301627f7 100644
--- a/flagscale/__init__.py
+++ b/flagscale/__init__.py
@@ -0,0 +1,27 @@
+def _get_version() -> str:
+    """Get version from importlib.metadata or parse pyproject.toml as fallback."""
+    try:
+        from importlib.metadata import version
+
+        return version("flagscale")
+    except Exception:
+        pass
+
+    # Fallback: parse pyproject.toml for development mode (Python 3.11+)
+    try:
+        from pathlib import Path
+
+        import tomllib
+
+        pyproject_path = Path(__file__).parent.parent / "pyproject.toml"
+        if pyproject_path.exists():
+            with open(pyproject_path, "rb") as f:
+                data = tomllib.load(f)
+                return data.get("project", {}).get("version", "0.0.0")
+    except Exception:
+        pass
+
+    return "0.0.0"
+
+
+__version__ = _get_version()
diff --git a/flagscale/cli.py b/flagscale/cli.py
index 741a0644eb..36e91c11b5 100644
--- a/flagscale/cli.py
+++ b/flagscale/cli.py
@@ -1,165 +1,426 @@
-import os
 import subprocess
 import sys
+from enum import Enum
+from pathlib import Path
 
-import click
+import typer
 
-VERSION = "0.8.0"
+from flagscale import __version__ as FLAGSCALE_VERSION
 
+app = typer.Typer(
+    name="flagscale",
+    help="FlagScale CLI - comprehensive toolkit for large model lifecycle.",
+    add_completion=False,
+)
 
-@click.group(context_settings={"help_option_names": ["-h", "--help"]})
-@click.version_option(VERSION, "-v", "--version", message=f"flagscale version {VERSION}")
-def flagscale():
-    """
-    FlagScale is a comprehensive toolkit designed to support the entire lifecycle of large models,
-    developed with the backing of the Beijing Academy of Artificial Intelligence (BAAI).
-    """
-    pass
-
+# ============================================================================
+# Helper Functions
+# ============================================================================
 
-@flagscale.command()
-@click.argument("model_name", type=str)
-@click.argument("yaml_path", type=click.Path(exists=True), required=False)
-def train(model_name, yaml_path=None):
-    """
-    Train model from yaml.
-    """
-    from flag_scale.run import main as run_main
 
+def resolve_config(model_name: str, yaml_path: Path | None, task: str) -> tuple[str, str]:
+    """Resolve config path and name from model_name or yaml_path"""
     if yaml_path:
-        if os.path.isabs(yaml_path):
-            yaml_path = yaml_path
-        else:
-            yaml_path = os.path.join(os.getcwd(), yaml_path)
-        if not os.path.exists(yaml_path):
-            click.echo(f"Error: The yaml {yaml_path} does not exist.", err=True)
-            return
-        click.echo(f"Using configuration yaml: {yaml_path}")
-    else:
-        default_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
-        # Default is train.yaml
-        yaml_path = os.path.join(default_dir, "examples", model_name, "conf", "train.yaml")
-        if not os.path.exists(yaml_path):
-            click.echo(f"Error: The yaml {yaml_path} does not exist.", err=True)
-            return
-        click.echo(f"Using default configuration yaml: {yaml_path}")
-    click.echo(f"Start training from the yaml {yaml_path}...")
-    yaml_path = os.path.abspath(yaml_path)
-    config_path = os.path.dirname(yaml_path)
-    config_name = os.path.splitext(os.path.basename(yaml_path))[0]
-    click.echo(f"config_path: {config_path}")
-    click.echo(f"config_name: {config_name}")
-
-    sys.argv = ["run.py", f"--config-path={config_path}", f"--config-name={config_name}"]
+        yaml_path = yaml_path.resolve()
+        if not yaml_path.exists():
+            typer.echo(f"Error: {yaml_path} does not exist", err=True)
+            raise typer.Exit(1)
+        return str(yaml_path.parent), yaml_path.stem
+
+    script_dir = Path(__file__).parent.parent
+    yaml_path = script_dir / "examples" / model_name / "conf" / f"{task}.yaml"
+    if not yaml_path.exists():
+        typer.echo(f"Error: {yaml_path} does not exist", err=True)
+        raise typer.Exit(1)
+    return str(yaml_path.parent), yaml_path.stem
+
+
+def run_task(
+    task_type: str, config_path: str, config_name: str, action: str, extra_args: list | None = None
+):
+    """Execute task via flagscale.run"""
+    from flagscale.run import main as run_main
+
+    args = [
+        "run.py",
+        f"--config-path={config_path}",
+        f"--config-name={config_name}",
+        f"action={action}",
+    ]
+    if extra_args:
+        args.extend(extra_args)
+    sys.argv = args
     run_main()
 
 
-@flagscale.command()
-@click.argument("model_name", type=str)
-@click.argument("yaml_path", type=click.Path(exists=True), required=False)
-@click.option(
-    "--model-path", "model_path", required=False, type=str, help="The weight path of model"
-)
-@click.option("--port", "port", required=False, type=int, help="The port of serve")
-@click.option(
-    "--engine-args",
-    "engine_args",
-    required=False,
-    type=str,
-    help='Model config as JSON string, e.g. \'{"a":1, "b":2}\'',
-)
-def serve(model_name, yaml_path=None, model_path=None, port=None, engine_args=None):
-    """
-    Serve model from yaml.
+def get_action(stop: bool, dryrun: bool, test: bool, query: bool, tune: bool) -> str:
+    """Determine action from flags (mutually exclusive)"""
+    flags = [
+        ("stop", stop),
+        ("dryrun", dryrun),
+        ("test", test),
+        ("query", query),
+        ("auto_tune", tune),
+    ]
+    set_flags = [name for name, value in flags if value]
+
+    if len(set_flags) > 1:
+        typer.echo(f"Error: Flags are mutually exclusive: --{', --'.join(set_flags)}", err=True)
+        raise typer.Exit(1)
+
+    if set_flags:
+        return set_flags[0]
+    return "run"  # default
+
+
+# ============================================================================
+# Run Command: flagscale run --config-path <path> --config-name <name> [options]
+# This replaces the old `python run.py` interface
+# ============================================================================
+
+
+class Action(str, Enum):
+    run = "run"
+    dryrun = "dryrun"
+    test = "test"
+    stop = "stop"
+    query = "query"
+    auto_tune = "auto_tune"
+
+
+@app.command("run")
+def run_cmd(
+    config_path: Path = typer.Option(..., "--config-path", "-p", help="Path to config directory"),
+    config_name: str = typer.Option(
+        ..., "--config-name", "-n", help="Config file name (without .yaml)"
+    ),
+    action: Action = typer.Option(Action.run, "--action", "-a", help="Action to perform"),
+    overrides: list[str] | None = typer.Argument(
+        None, help="Additional Hydra overrides (e.g., key=value)"
+    ),
+):
+    """Run task with explicit config path and name (replaces python run.py)
+
+    Example:
+        flagscale run --config-path ./examples/qwen3/conf --config-name train --action run
+        flagscale run -p ./examples/qwen3/conf -n train -a stop
     """
-    from flag_scale.run import main as run_main
+    from flagscale.run import main as run_main
 
-    if yaml_path:
-        if os.path.isabs(yaml_path):
-            yaml_path = yaml_path
-        else:
-            yaml_path = os.path.join(os.getcwd(), yaml_path)
-        if not os.path.exists(yaml_path):
-            click.echo(f"Error: The yaml {yaml_path} does not exist.", err=True)
-            return
-        click.echo(f"Using configuration yaml: {yaml_path}")
-    else:
-        default_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
-        # Default is serve.yaml
-        yaml_path = os.path.join(default_dir, "examples", model_name, "conf", "serve.yaml")
-        if not os.path.exists(yaml_path):
-            click.echo(f"Error: The yaml {yaml_path} does not exist.", err=True)
-            return
-        click.echo(f"Using default configuration yaml: {yaml_path}")
-    click.secho(
-        "Warning: When serving, please specify the relevant environment variables. When serving on multiple machines, ensure that the necessary parameters, such as hostfile, are set correctly. For details, refer to the following link: https://github.com/FlagOpen/FlagScale/blob/main/flagscale/serve/README.md",
-        fg="yellow",
-    )
-    click.echo(f"Start serving from the yaml {yaml_path}...")
-    yaml_path = os.path.abspath(yaml_path)
-    config_path = os.path.dirname(yaml_path)
-    config_name = os.path.splitext(os.path.basename(yaml_path))[0]
-    click.echo(f"config_path: {config_path}")
-    click.echo(f"config_name: {config_name}")
-    args = ["run.py", f"--config-path={config_path}", f"--config-name={config_name}"]
-    if model_path:
-        args.append(f"+experiment.runner.cli_args.model_path={model_path}")
-    if port:
-        args.append(f"+experiment.runner.cli_args.port={port}")
-    if engine_args:
-        args.append(f"+experiment.runner.cli_args.engine_args='{engine_args}'")
+    config_path = config_path.resolve()
+    if not config_path.exists():
+        typer.echo(f"Error: Config path does not exist: {config_path}", err=True)
+        raise typer.Exit(1)
+
+    config_file = config_path / f"{config_name}.yaml"
+    if not config_file.exists():
+        typer.echo(f"Error: Config file does not exist: {config_file}", err=True)
+        raise typer.Exit(1)
+
+    args = [
+        "flagscale",
+        f"--config-path={config_path}",
+        f"--config-name={config_name}",
+        f"action={action.value}",
+    ]
+    if overrides:
+        args.extend(overrides)
+
+    typer.echo(f"Running: {' '.join(args)}")
     sys.argv = args
     run_main()
 
 
-@flagscale.command()
-# Input the name of the Docker image (required)
-@click.option("--image", "image_name", required=True, type=str, help="The name of the Docker image")
-# Input the address of the Git repository (required)
-@click.option(
-    "--ckpt", "ckpt_name", required=True, type=str, help="The address of the ckpt's git repository"
-)
-# Input the address of the local directory (optional)
-@click.option(
-    "--ckpt-path", "ckpt_path", type=click.Path(), required=False, help="The path to save ckpt"
-)
-def pull(image_name, ckpt_name, ckpt_path):
-    "Docker pull image and git clone ckpt."
+# ============================================================================
+# Task Commands: flagscale <task> [--flags] <model>
+# ============================================================================
+
+
+@app.command()
+def train(
+    model: str = typer.Argument(..., help="Model name (e.g., aquila, llama)"),
+    config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"),
+    stop: bool = typer.Option(False, "--stop", help="Stop training"),
+    dryrun: bool = typer.Option(False, "--dryrun", help="Validate config only"),
+    test: bool = typer.Option(False, "--test", help="Run with test"),
+    query: bool = typer.Option(False, "--query", help="Query status"),
+    tune: bool = typer.Option(False, "--tune", help="Auto-tune"),
+):
+    """Train a model"""
+    action = get_action(stop, dryrun, test, query, tune)
+    cfg_path, cfg_name = resolve_config(model, config, "train")
+    typer.echo(f"Train {model} [{action}]")
+    typer.echo(f"config_path: {cfg_path}")
+    typer.echo(f"config_name: {cfg_name}")
+    run_task("train", cfg_path, cfg_name, action)
+
+
+@app.command()
+def serve(
+    model: str = typer.Argument(..., help="Model name"),
+    config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"),
+    stop: bool = typer.Option(False, "--stop", help="Stop serving"),
+    test: bool = typer.Option(False, "--test", help="Test serving"),
+    tune: bool = typer.Option(False, "--tune", help="Auto-tune"),
+    port: int | None = typer.Option(None, "--port", help="Server port"),
+    model_path: str | None = typer.Option(None, "--model-path", help="Model weights path"),
+    engine_args: str | None = typer.Option(
+        None, "--engine-args", help="Engine args as JSON string, e.g. '{\"a\":1}'"
+    ),
+):
+    """Serve a model"""
+    action = "stop" if stop else ("test" if test else ("auto_tune" if tune else "run"))
+    cfg_path, cfg_name = resolve_config(model, config, "serve")
+    extra = []
+    if port:
+        extra.append(f"+experiment.runner.cli_args.port={port}")
+    if model_path:
+        extra.append(f"+experiment.runner.cli_args.model_path={model_path}")
+    if engine_args:
+        extra.append(f"+experiment.runner.cli_args.engine_args='{engine_args}'")
+    typer.echo(f"Serve {model} [{action}]")
+    typer.echo(f"config_path: {cfg_path}")
+    typer.echo(f"config_name: {cfg_name}")
+    if action == "run":
+        typer.secho(
+            "Warning: When serving, please specify the relevant environment variables. "
+            "When serving on multiple machines, ensure that the necessary parameters, "
+            "such as hostfile, are set correctly. For details, refer to: "
+            "https://github.com/FlagOpen/FlagScale/blob/main/flagscale/serve/README.md",
+            fg="yellow",
+        )
+    run_task("serve", cfg_path, cfg_name, action, extra)
+
+
+@app.command()
+def inference(
+    model: str = typer.Argument(..., help="Model name"),
+    config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"),
+    stop: bool = typer.Option(False, "--stop", help="Stop inference"),
+    dryrun: bool = typer.Option(False, "--dryrun", help="Validate config only"),
+    test: bool = typer.Option(False, "--test", help="Run with test"),
+):
+    """Run inference"""
+    action = get_action(stop, dryrun, test, False, False)
+    cfg_path, cfg_name = resolve_config(model, config, "inference")
+    typer.echo(f"Inference {model} [{action}]")
+    run_task("inference", cfg_path, cfg_name, action)
+
+
+@app.command()
+def rl(
+    model: str = typer.Argument(..., help="Model name"),
+    config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"),
+    stop: bool = typer.Option(False, "--stop", help="Stop RL training"),
+    dryrun: bool = typer.Option(False, "--dryrun", help="Validate config only"),
+    test: bool = typer.Option(False, "--test", help="Run with test"),
+):
+    """Run RL training"""
+    action = get_action(stop, dryrun, test, False, False)
+    cfg_path, cfg_name = resolve_config(model, config, "rl")
+    typer.echo(f"RL {model} [{action}]")
+    run_task("rl", cfg_path, cfg_name, action)
+
+
+@app.command()
+def compress(
+    model: str = typer.Argument(..., help="Model name"),
+    config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"),
+    stop: bool = typer.Option(False, "--stop", help="Stop compression"),
+    dryrun: bool = typer.Option(False, "--dryrun", help="Validate config only"),
+):
+    """Compress a model"""
+    action = "stop" if stop else ("dryrun" if dryrun else "run")
+    cfg_path, cfg_name = resolve_config(model, config, "compress")
+    typer.echo(f"Compress {model} [{action}]")
+    run_task("compress", cfg_path, cfg_name, action)
+
+
+# ============================================================================
+# Install Command (delegates to tools/install)
+# ============================================================================
+
+
+class Platform(str, Enum):
+    cuda = "cuda"
+    default = "default"
+
+
+class PkgManager(str, Enum):
+    uv = "uv"
+    pip = "pip"
+    conda = "conda"
+
+
+@app.command()
+def install(
+    platform: Platform = typer.Option(Platform.cuda, "--platform", "-p", help="Platform"),
+    task: str = typer.Option(
+        "train", "--task", "-t", help="Task (train, inference, serve, rl, all)"
+    ),
+    pkg_mgr: PkgManager = typer.Option(PkgManager.uv, "--pkg-mgr", help="Package manager"),
+    no_system: bool = typer.Option(False, "--no-system", help="Skip system packages"),
+    no_dev: bool = typer.Option(False, "--no-dev", help="Skip dev phase"),
+    no_base: bool = typer.Option(False, "--no-base", help="Skip base phase"),
+    no_task: bool = typer.Option(False, "--no-task", help="Skip task phase"),
+    only_pip: bool = typer.Option(
+        False, "--only-pip", help="Only install pip packages (skip apt and source builds)"
+    ),
+    debug: bool = typer.Option(False, "--debug", help="Dry-run mode"),
+):
+    """Install dependencies via tools/install/install.sh"""
+    script_dir = Path(__file__).parent.parent
+    install_script = script_dir / "tools" / "install" / "install.sh"
+
+    if not install_script.exists():
+        typer.echo(f"Error: Install script not found: {install_script}", err=True)
+        raise typer.Exit(1)
+
+    cmd = [
+        str(install_script),
+        "--platform",
+        platform.value,
+        "--task",
+        task,
+        "--pkg-mgr",
+        pkg_mgr.value,
+    ]
+    if no_system:
+        cmd.append("--no-system")
+    if only_pip:
+        cmd.append("--only-pip")
+    if no_dev:
+        cmd.append("--no-dev")
+    if no_base:
+        cmd.append("--no-base")
+    if no_task:
+        cmd.append("--no-task")
+    if debug:
+        cmd.append("--debug")
+
+    typer.echo(f"Running: {' '.join(cmd)}")
+    result = subprocess.run(cmd)
+    if result.returncode != 0:
+        raise typer.Exit(result.returncode)
+
+
+# ============================================================================
+# Test Command
+# ============================================================================
+
+
+@app.command("test")
+def run_tests(
+    platform: Platform = typer.Option(Platform.cuda, "--platform", help="Platform"),
+    device: str = typer.Option("gpu", "--device", help="Device (gpu, cpu)"),
+    test_type: str = typer.Option("unit", "--type", help="Test type (unit, functional)"),
+    task: str | None = typer.Option(None, "--task", help="Task to test"),
+    model: str | None = typer.Option(None, "--model", help="Model to test"),
+    test_list: str | None = typer.Option(None, "--list", help="Specific tests to run"),
+):
+    """Run tests"""
+    script_dir = Path(__file__).parent.parent
+    test_script = script_dir / "tests" / "test_utils" / "runners" / "run_tests.sh"
+
+    if not test_script.exists():
+        typer.echo(f"Error: Test script not found: {test_script}", err=True)
+        raise typer.Exit(1)
+
+    cmd = [
+        str(test_script),
+        "--platform",
+        platform.value,
+        "--device",
+        device,
+        "--type",
+        test_type,
+    ]
+    if task:
+        cmd.extend(["--task", task])
+    if model:
+        cmd.extend(["--model", model])
+    if test_list:
+        cmd.extend(["--list", test_list])
+
+    typer.echo(f"Running: {' '.join(cmd)}")
+    result = subprocess.run(cmd)
+    if result.returncode != 0:
+        raise typer.Exit(result.returncode)
+
+
+# ============================================================================
+# Pull Command
+# ============================================================================
+
+
+@app.command()
+def pull(
+    image: str = typer.Option(..., "--image", help="Docker image name"),
+    ckpt: str = typer.Option(..., "--ckpt", help="Checkpoint git repository"),
+    ckpt_path: Path | None = typer.Option(None, "--ckpt-path", help="Path to save checkpoint"),
+):
+    """Pull Docker image and clone checkpoint repository."""
     # If ckpt_path is not provided, use the default download directory
     if ckpt_path is None:
-        ckpt_path = os.path.join(os.getcwd(), "model_download")
+        ckpt_path = Path.cwd() / "model_download"
 
     # Check and create the directory
-    if not os.path.exists(ckpt_path):
-        os.makedirs(ckpt_path)
-        print(f"Directory {ckpt_path} created.")
+    if not ckpt_path.exists():
+        ckpt_path.mkdir(parents=True)
+        typer.echo(f"Directory {ckpt_path} created.")
 
     # Pull the Docker image
     try:
-        print(f"Pulling Docker image: {image_name}...")
-        subprocess.run(["docker", "pull", image_name], check=True)
-        print(f"Successfully pulled Docker image: {image_name}")
+        typer.echo(f"Pulling Docker image: {image}...")
+        subprocess.run(["docker", "pull", image], check=True)
+        typer.echo(f"Successfully pulled Docker image: {image}")
     except subprocess.CalledProcessError:
-        print(f"Failed to pull Docker image: {image_name}")
-        return
+        typer.echo(f"Failed to pull Docker image: {image}", err=True)
+        raise typer.Exit(1)
 
     # Clone the Git repository
     try:
-        print(f"Cloning Git repository: {ckpt_name} into {ckpt_path}...")
-        subprocess.run(["git", "clone", ckpt_name, ckpt_path], check=True)
-        print(f"Successfully cloned Git repository: {ckpt_name}")
+        typer.echo(f"Cloning Git repository: {ckpt} into {ckpt_path}...")
+        subprocess.run(["git", "clone", ckpt, str(ckpt_path)], check=True)
+        typer.echo(f"Successfully cloned Git repository: {ckpt}")
     except subprocess.CalledProcessError:
-        print(f"Failed to clone Git repository: {ckpt_name}")
-        return
+        typer.echo(f"Failed to clone Git repository: {ckpt}", err=True)
+        raise typer.Exit(1)
 
     # Pull large files using Git LFS
-    print("Pulling Git LFS files...")
+    typer.echo("Pulling Git LFS files...")
     try:
-        subprocess.run(["git", "lfs", "pull"], cwd=ckpt_path, check=True)
-        print("Successfully pulled Git LFS files")
+        subprocess.run(["git", "lfs", "pull"], cwd=str(ckpt_path), check=True)
+        typer.echo("Successfully pulled Git LFS files")
     except subprocess.CalledProcessError:
-        print("Failed to pull Git LFS files")
-        return
+        typer.echo("Failed to pull Git LFS files", err=True)
+        raise typer.Exit(1)
+
+
+# ============================================================================
+# Version
+# ============================================================================
+
+
+def version_callback(value: bool):
+    if value:
+        typer.echo(f"flagscale version {FLAGSCALE_VERSION}")
+        raise typer.Exit()
+
+
+@app.callback()
+def main(
+    version: bool = typer.Option(
+        None, "--version", "-v", callback=version_callback, is_eager=True, help="Show version"
+    ),
+):
+    """FlagScale CLI - comprehensive toolkit for large model lifecycle."""
+    pass
+
+
+# Entry point
+def flagscale():
+    app()
 
 
 if __name__ == "__main__":
diff --git a/flagscale/run.py b/flagscale/run.py
new file mode 100644
index 0000000000..e67545f470
--- /dev/null
+++ b/flagscale/run.py
@@ -0,0 +1,145 @@
+import os
+import warnings
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+
+from flagscale.logger import logger
+from flagscale.runner.autotuner_factory import AutotunerFactory
+from flagscale.runner.runner_base import Runner
+from flagscale.runner.runner_inference import SSHInferenceRunner
+from flagscale.runner.runner_serve import CloudServeRunner, SSHServeRunner
+from flagscale.runner.runner_train import CloudTrainRunner, SSHTrainRunner
+from flagscale.runner.utils import is_master
+
+# To accommodate the scenario where the before_start field is used to switch to the actual environment during program execution,
+# we have placed the import statements inside the function body rather than at the beginning of the file.
+
+
+FLAGSCALE_USE_V1 = os.environ.get("FLAGSCALE_USE_V1", "1").lower() in ("1", "true")
+
+VALID_TASKS = {"train", "inference", "compress", "serve", "rl"}
+
+LEGACY_RUNNER_MAP = {
+    "train": SSHTrainRunner,
+    "inference": SSHInferenceRunner,
+    "serve": SSHServeRunner,
+}
+
+# task_type -> allowed actions
+TASK_ACTIONS = {
+    "train": {"run", "dryrun", "test", "stop", "query", "auto_tune"},
+    "inference": {"run", "dryrun", "test", "stop"},
+    "serve": {"run", "test", "stop", "auto_tune"},
+    "compress": {"run", "dryrun", "stop"},
+    "rl": {"run", "dryrun", "test", "stop"},
+}
+
+
+def check_and_reset_deploy_config(config: DictConfig) -> None:
+    if config.experiment.get("deploy", {}):
+        OmegaConf.set_struct(config.experiment.runner, False)
+        config.experiment.runner.deploy = config.experiment.deploy
+        del config.experiment.deploy
+        warnings.warn(
+            "'config.experiment.deploy' has been moved to 'config.experiment.runner.deploy'. "
+            "Support for the old location will be removed in a future release."
+        )
+        OmegaConf.set_struct(config.experiment.runner, True)
+
+
+def validate_task(task_type: str, action: str) -> None:
+    if task_type not in VALID_TASKS:
+        raise ValueError(f"Invalid task_type '{task_type}', must be one of {sorted(VALID_TASKS)}")
+
+    allowed_actions = TASK_ACTIONS[task_type]
+    if action not in allowed_actions:
+        raise ValueError(
+            f"Action '{action}' is not allowed for task_type '{task_type}'. "
+            f"Allowed actions: {sorted(allowed_actions)}"
+        )
+
+
+def get_runner(config: DictConfig, task_type: str):
+    runner_type = config.experiment.runner.get("type", "ssh")
+
+    if runner_type == "cloud":
+        if task_type == "train":
+            return CloudTrainRunner(config)
+        elif task_type == "serve":
+            if FLAGSCALE_USE_V1:
+                return Runner(config)
+            else:
+                return CloudServeRunner(config)
+        else:
+            raise NotImplementedError(f"Task type '{task_type}' is not supported by cloud runner")
+
+    if FLAGSCALE_USE_V1:
+        return Runner(config)
+
+    logger.warning(
+        "Using legacy runner, which will be removed in future. Please use new runner instead."
+    )
+
+    assert task_type in LEGACY_RUNNER_MAP, (
+        f"Task type '{task_type}' is not supported by legacy runner"
+    )
+    return LEGACY_RUNNER_MAP[task_type](config)
+
+
+def handle_auto_tune(config: DictConfig, task_type: str) -> None:
+    if task_type not in {"serve", "train"}:
+        raise NotImplementedError(f"Auto tune is not implemented for task type '{task_type}'")
+
+    # Only one autotuner process for MPI-based runs
+    if task_type == "train" and not is_master(config):
+        return
+
+    AutoTuner = AutotunerFactory.get_autotuner(task_type)
+    AutoTuner(config).tune()
+
+
+def execute_action(runner, action: str, task_type: str, config: DictConfig) -> None:
+    if action == "run":
+        if task_type == "train":
+            enable_monitoring = config.experiment.runner.get("enable_monitoring", False)
+            enable_gpu_health_check = config.experiment.runner.get("enable_gpu_health_check", False)
+            runner.run(
+                enable_monitoring=enable_monitoring, enable_gpu_health_check=enable_gpu_health_check
+            )
+            if enable_monitoring:
+                logger.info("Monitor service will be started automatically when training begins.")
+        else:
+            runner.run()
+    elif action == "dryrun":
+        runner.run(dryrun=True)
+    elif action == "test":
+        runner.run(with_test=True)
+    elif action == "stop":
+        runner.stop()
+    elif action == "query":
+        runner.query()
+    else:
+        raise ValueError(f"Unknown action '{action}'")
+
+
+@hydra.main(version_base=None, config_name="config")
+def main(config: DictConfig) -> None:
+    check_and_reset_deploy_config(config)
+
+    task_type = config.experiment.task.get("type", None)
+    action = config.action
+    validate_task(task_type, action)
+
+    # auto_tune invokes the runner internally
+    if action == "auto_tune":
+        handle_auto_tune(config, task_type)
+        return
+
+    runner = get_runner(config, task_type)
+    execute_action(runner, action, task_type, config)
+    return
+
+
+if __name__ == "__main__":
+    main()
diff --git a/flagscale/train/megatron/training/training.py b/flagscale/train/megatron/training/training.py
index e17a8da153..ef0e98e073 100644
--- a/flagscale/train/megatron/training/training.py
+++ b/flagscale/train/megatron/training/training.py
@@ -2931,6 +2931,7 @@ def evaluate(
     verbose=False,
     non_loss_data_func=None,
     extra_valid_index=None,
+    eval_iters=None,
 ):
     """Evaluation."""
     args = get_args()
diff --git a/pyproject.toml b/pyproject.toml
index 784174fb8b..9c934e0246 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,24 +2,28 @@
 requires = [
     "setuptools>=77.0",
     "wheel>=0.45.1",
-    "pip>=25.3",
 ]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "flag_scale"
+name = "flagscale"
 version = "1.0.0"
 description = "FlagScale is a comprehensive toolkit designed to support the entire lifecycle of large models, developed with the backing of the Beijing Academy of Artificial Intelligence (BAAI)."
 readme = "README.md"
 license = {text = "Apache-2.0"}
 requires-python = ">=3.10"
-dynamic = ["dependencies"]
 
 [project.urls]
-Homepage = "https://github.com/flagos-ai/FlagScale"
+Homepage = "https://github.com/flagos-ai/FlagScale.git"
 
 [project.scripts]
-flagscale = "flag_scale.flagscale.cli:flagscale"
+flagscale = "flagscale.cli:flagscale"
+
+[tool.setuptools.packages.find]
+include = ["flagscale*"]
+
+[tool.setuptools.package-data]
+"flagscale" = ["**/*.yaml", "**/*.yml"]
 
 [tool.ruff]
 line-length = 100
@@ -88,7 +92,7 @@ ignore = [
     "E501",         # Line too long
     "E722",         # Use bare except, specify exception instead
     "E731",         # lambda assignment
-    "E741",         # Use variables named ‘l’, ‘O’, or ‘I’
+    "E741",         # Use variables named 'l', 'O', or 'I'
     "F403", "F405", # star imports
     "B905",         # zip strict
     "B007",         # unused loop var
@@ -115,11 +119,3 @@ extend-ignore-identifiers-re = [
     "nd_*",
     "\\d+[Bb][-]?[Aa]\\d+(\\.\\d+)?[Bb]", # protect 30BA3B
 ]
-
-#[tool.mypy]
-#plugins = ["pydantic.mypy"]
-#ignore_missing_imports = true
-#check_untyped_defs = true
-#follow_imports = "silent"
-#mypy_path = "."
-#explicit_package_bases = true
diff --git a/requirements/common.txt b/requirements/common.txt
index c23812a8f7..7dd478af8c 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -17,3 +17,6 @@ requests==2.32.3
 aiohttp==3.11.11
 pyyaml==6.0.2
 regex==2025.10.22
+
+# CLI
+typer>=0.9.0
diff --git a/run.py b/run.py
index e67545f470..6f43937e4d 100644
--- a/run.py
+++ b/run.py
@@ -1,126 +1,23 @@
-import os
-import warnings
+#!/usr/bin/env python
+"""Backward compatibility wrapper for flagscale.run.
 
-import hydra
-from omegaconf import DictConfig, OmegaConf
-
-from flagscale.logger import logger
-from flagscale.runner.autotuner_factory import AutotunerFactory
-from flagscale.runner.runner_base import Runner
-from flagscale.runner.runner_inference import SSHInferenceRunner
-from flagscale.runner.runner_serve import CloudServeRunner, SSHServeRunner
-from flagscale.runner.runner_train import CloudTrainRunner, SSHTrainRunner
-from flagscale.runner.utils import is_master
-
-# To accommodate the scenario where the before_start field is used to switch to the actual environment during program execution,
-# we have placed the import statements inside the function body rather than at the beginning of the file.
-
-
-FLAGSCALE_USE_V1 = os.environ.get("FLAGSCALE_USE_V1", "1").lower() in ("1", "true")
-
-VALID_TASKS = {"train", "inference", "compress", "serve", "rl"}
-
-LEGACY_RUNNER_MAP = {
-    "train": SSHTrainRunner,
-    "inference": SSHInferenceRunner,
-    "serve": SSHServeRunner,
-}
-
-# task_type -> allowed actions
-TASK_ACTIONS = {
-    "train": {"run", "dryrun", "test", "stop", "query", "auto_tune"},
-    "inference": {"run", "dryrun", "test", "stop"},
-    "serve": {"run", "test", "stop", "auto_tune"},
-    "compress": {"run", "dryrun", "stop"},
-    "rl": {"run", "dryrun", "test", "stop"},
-}
-
-
-def check_and_reset_deploy_config(config: DictConfig) -> None:
-    if config.experiment.get("deploy", {}):
-        OmegaConf.set_struct(config.experiment.runner, False)
-        config.experiment.runner.deploy = config.experiment.deploy
-        del config.experiment.deploy
-        warnings.warn(
-            "'config.experiment.deploy' has been moved to 'config.experiment.runner.deploy'. "
-            "Support for the old location will be removed in a future release."
-        )
-        OmegaConf.set_struct(config.experiment.runner, True)
-
-
-def validate_task(task_type: str, action: str) -> None:
-    if task_type not in VALID_TASKS:
-        raise ValueError(f"Invalid task_type '{task_type}', must be one of {sorted(VALID_TASKS)}")
+This file is kept for backward compatibility with existing scripts and documentation.
+New code should use: python -m flagscale.run
 
-    allowed_actions = TASK_ACTIONS[task_type]
-    if action not in allowed_actions:
-        raise ValueError(
-            f"Action '{action}' is not allowed for task_type '{task_type}'. "
-            f"Allowed actions: {sorted(allowed_actions)}"
-        )
-
-
-def get_runner(config: DictConfig, task_type: str):
-    runner_type = config.experiment.runner.get("type", "ssh")
-
-    if runner_type == "cloud":
-        if task_type == "train":
-            return CloudTrainRunner(config)
-        elif task_type == "serve":
-            if FLAGSCALE_USE_V1:
-                return Runner(config)
-            else:
-                return CloudServeRunner(config)
-        else:
-            raise NotImplementedError(f"Task type '{task_type}' is not supported by cloud runner")
-
-    if FLAGSCALE_USE_V1:
-        return Runner(config)
-
-    logger.warning(
-        "Using legacy runner, which will be removed in future. Please use new runner instead."
-    )
-
-    assert task_type in LEGACY_RUNNER_MAP, (
-        f"Task type '{task_type}' is not supported by legacy runner"
-    )
-    return LEGACY_RUNNER_MAP[task_type](config)
-
-
-def handle_auto_tune(config: DictConfig, task_type: str) -> None:
-    if task_type not in {"serve", "train"}:
-        raise NotImplementedError(f"Auto tune is not implemented for task type '{task_type}'")
-
-    # Only one autotuner process for MPI-based runs
-    if task_type == "train" and not is_master(config):
-        return
-
-    AutoTuner = AutotunerFactory.get_autotuner(task_type)
-    AutoTuner(config).tune()
+Note: This file has its own @hydra.main decorator to ensure config paths are resolved
+relative to the current working directory, not relative to the flagscale package.
+"""
 
+import hydra
+from omegaconf import DictConfig
 
-def execute_action(runner, action: str, task_type: str, config: DictConfig) -> None:
-    if action == "run":
-        if task_type == "train":
-            enable_monitoring = config.experiment.runner.get("enable_monitoring", False)
-            enable_gpu_health_check = config.experiment.runner.get("enable_gpu_health_check", False)
-            runner.run(
-                enable_monitoring=enable_monitoring, enable_gpu_health_check=enable_gpu_health_check
-            )
-            if enable_monitoring:
-                logger.info("Monitor service will be started automatically when training begins.")
-        else:
-            runner.run()
-    elif action == "dryrun":
-        runner.run(dryrun=True)
-    elif action == "test":
-        runner.run(with_test=True)
-    elif action == "stop":
-        runner.stop()
-    elif action == "query":
-        runner.query()
-    else:
-        raise ValueError(f"Unknown action '{action}'")
+from flagscale.run import (
+    check_and_reset_deploy_config,
+    execute_action,
+    get_runner,
+    handle_auto_tune,
+    validate_task,
+)
 
 
 @hydra.main(version_base=None, config_name="config")
@@ -131,14 +28,12 @@ def main(config: DictConfig) -> None:
     action = config.action
     validate_task(task_type, action)
 
-    # auto_tune invokes the runner internally
     if action == "auto_tune":
         handle_auto_tune(config, task_type)
         return
 
     runner = get_runner(config, task_type)
     execute_action(runner, action, task_type, config)
-    return
 
 
 if __name__ == "__main__":
diff --git a/setup.py b/setup.py
index bf1e1cfbd6..39aacf3bfe 100644
--- a/setup.py
+++ b/setup.py
@@ -1,94 +1,95 @@
 import os
-import sys
+import subprocess
 
 from setuptools import setup
-from setuptools._distutils._log import log
 
-# Add current directory to path to import version
-_current_dir = os.path.dirname(os.path.abspath(__file__))
-if _current_dir not in sys.path:
-    sys.path.insert(0, _current_dir)
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 
-from version import FLAGSCALE_VERSION
 
+def _get_version() -> str:
+    """Read version from pyproject.toml (single source of truth, Python 3.11+)."""
+    try:
+        import tomllib
+
+        pyproject_path = os.path.join(SCRIPT_DIR, "pyproject.toml")
+        with open(pyproject_path, "rb") as f:
+            data = tomllib.load(f)
+            return data.get("project", {}).get("version", "0.0.0")
+    except Exception:
+        return "0.0.0"
+
+
+FLAGSCALE_VERSION = _get_version()
+INSTALL_SCRIPT = os.path.join(SCRIPT_DIR, "tools", "install", "install.sh")
+
+
+def is_pip_isolated_build():
+    """Check if we're running in pip's isolated build environment.
+
+    Prefer detection via pip/PEP 517 specific environment variables rather than
+    fragile filesystem heuristics that can misclassify user code directories.
+    """
+    # Common environment markers set by pip during isolated/PEP 517 builds.
+    pip_env_markers = (
+        "PEP517_BUILD_BACKEND",
+        "PIP_BUILD_TRACKER",
+        "PIP_REQ_TRACKER",
+        "PIP_ISOLATED_ENV",
+    )
+    for var in pip_env_markers:
+        if os.environ.get(var):
+            return True
+    return False
 
-def _read_requirements_file(requirements_path):
-    """Read the requirements file and return the dependency list"""
-    requirements = []
+
+def run_install_script():
+    """Invoke tools/install/install.sh for dependency installation."""
+    if not os.path.exists(INSTALL_SCRIPT):
+        print(f"[flagscale] Warning: Install script not found: {INSTALL_SCRIPT}")
+        return
+
+    platform = os.environ.get("FLAGSCALE_PLATFORM", "cuda")
+    task = os.environ.get("FLAGSCALE_TASK", "all")
+
+    cmd = [
+        INSTALL_SCRIPT,
+        "--platform",
+        platform,
+        "--task",
+        task,
+        "--pkg-mgr",
+        "pip",
+        "--no-system",
+        "--only-pip",
+        "--src-deps",
+        "megatron-lm",
+    ]
+
+    print(f"[flagscale] Running: {' '.join(cmd)}")
     try:
-        with open(requirements_path, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                requirements.append(line)
-    except FileNotFoundError:
-        print(f"[WARNING] Requirements file not found: {requirements_path}")
-        return []
-    return requirements
-
-
-def deduplicate_dependencies(dependencies):
-    """Deduplicate the dependencies"""
-    seen = set()
-    result = []
-    for dep in dependencies:
-        pkg_name = (
-            dep.split("==")[0]
-            .split(">=")[0]
-            .split("<=")[0]
-            .split(">")[0]
-            .split("<")[0]
-            .split("!=")[0]
-            .strip()
-        )
-        pkg_name_lower = pkg_name.lower()
-        if pkg_name_lower not in seen:
-            seen.add(pkg_name_lower)
-            result.append(dep)
-    return result
-
-
-def _get_install_requires():
-    """get install_requires list"""
-    install_requires = []
-
-    install_requires.extend(_read_requirements_file("requirements/requirements-base.txt"))
-    install_requires.extend(_read_requirements_file("requirements/requirements-common.txt"))
-    core_deps = ["setuptools==79.0.1", "packaging>=24.2", "importlib_metadata>=8.5.0"]
-
-    all_deps = install_requires + core_deps
-    result = deduplicate_dependencies(all_deps)
-    log.info(f"[build] install_requires Unique dependencies: {result}")
-
-    return result
+        subprocess.run(cmd, check=True)
+    except subprocess.CalledProcessError as exc:
+        raise RuntimeError(f"Installation failed with exit code {exc.returncode}") from exc
+
+
+# Run install.sh when using: pip install --no-build-isolation .
+# Control via env vars: FLAGSCALE_PLATFORM (default: cuda), FLAGSCALE_TASK (default: all)
+if not is_pip_isolated_build():
+    run_install_script()
 
+# NOTE: Installation methods:
+# 1. pip install .                      -> Installs flagscale CLI only (isolated build)
+# 2. pip install --no-build-isolation . -> Installs CLI + pip deps + megatron-lm (no apt)
+#    Control with: FLAGSCALE_PLATFORM=cuda FLAGSCALE_TASK=train pip install --no-build-isolation .
+# 3. flagscale install                  -> Full installation (apt + pip + all source deps)
 
 setup(
-    name="flag_scale",
+    name="flagscale",
     version=FLAGSCALE_VERSION,
-    description="FlagScale is a comprehensive toolkit designed to support the entire lifecycle of large models, developed with the backing of the Beijing Academy of Artificial Intelligence (BAAI). ",
+    description="FlagScale is a comprehensive toolkit designed to support the entire lifecycle of large models, developed with the backing of the Beijing Academy of Artificial Intelligence (BAAI).",
     url="https://github.com/FlagOpen/FlagScale",
-    packages=[
-        "flag_scale",
-        "flag_scale.flagscale",
-        "flag_scale.examples",
-        "flag_scale.tools",
-        "flag_scale.tests",
-    ],
-    package_dir={
-        "flag_scale": "",
-        "flag_scale.flagscale": "flagscale",
-        "flag_scale.examples": "examples",
-        "flag_scale.tools": "tools",
-        "flag_scale.tests": "tests",
-    },
-    package_data={
-        "flag_scale.flagscale": ["**/*"],
-        "flag_scale.examples": ["**/*"],
-        "flag_scale.tools": ["**/*"],
-        "flag_scale.tests": ["**/*"],
-    },
-    install_requires=_get_install_requires(),
-    entry_points={"console_scripts": ["flagscale=flag_scale.flagscale.cli:flagscale"]},
+    packages=["flagscale"],
+    package_dir={"flagscale": "flagscale"},
+    install_requires=["typer>=0.9.0"],
+    entry_points={"console_scripts": ["flagscale=flagscale.cli:flagscale"]},
 )
diff --git a/tests/unit_tests/test_cli.py b/tests/unit_tests/test_cli.py
new file mode 100644
index 0000000000..85814b5d0f
--- /dev/null
+++ b/tests/unit_tests/test_cli.py
@@ -0,0 +1,143 @@
+from pathlib import Path
+
+import pytest
+from click.exceptions import Exit as ClickExit
+
+from flagscale.cli import get_action, resolve_config
+
+
+class TestGetAction:
+    """Tests for get_action() function"""
+
+    def test_default_returns_run(self):
+        """No flags set returns 'run'"""
+        assert get_action(False, False, False, False, False) == "run"
+
+    def test_stop_flag(self):
+        """stop=True returns 'stop'"""
+        assert get_action(True, False, False, False, False) == "stop"
+
+    def test_dryrun_flag(self):
+        """dryrun=True returns 'dryrun'"""
+        assert get_action(False, True, False, False, False) == "dryrun"
+
+    def test_test_flag(self):
+        """test=True returns 'test'"""
+        assert get_action(False, False, True, False, False) == "test"
+
+    def test_query_flag(self):
+        """query=True returns 'query'"""
+        assert get_action(False, False, False, True, False) == "query"
+
+    def test_tune_flag(self):
+        """tune=True returns 'auto_tune'"""
+        assert get_action(False, False, False, False, True) == "auto_tune"
+
+    def test_mutually_exclusive_stop_dryrun(self, capsys):
+        """Multiple flags (stop and dryrun) raises Exit(1)"""
+        with pytest.raises(ClickExit) as exc_info:
+            get_action(True, True, False, False, False)
+        assert exc_info.value.exit_code == 1
+
+    def test_mutually_exclusive_all_flags(self, capsys):
+        """All flags set raises Exit(1)"""
+        with pytest.raises(ClickExit) as exc_info:
+            get_action(True, True, True, True, True)
+        assert exc_info.value.exit_code == 1
+
+    def test_mutually_exclusive_test_query(self, capsys):
+        """Multiple flags (test and query) raises Exit(1)"""
+        with pytest.raises(ClickExit) as exc_info:
+            get_action(False, False, True, True, False)
+        assert exc_info.value.exit_code == 1
+
+
+class TestResolveConfig:
+    """Tests for resolve_config() function"""
+
+    def test_with_yaml_path(self, tmp_path):
+        """Explicit yaml path returns parent dir and stem"""
+        yaml_file = tmp_path / "test_config.yaml"
+        yaml_file.write_text("test: value")
+
+        path, name = resolve_config("model", yaml_file, "train")
+
+        assert path == str(tmp_path)
+        assert name == "test_config"
+
+    def test_with_nested_yaml_path(self, tmp_path):
+        """Yaml path in nested directory works correctly"""
+        nested_dir = tmp_path / "conf" / "nested"
+        nested_dir.mkdir(parents=True)
+        yaml_file = nested_dir / "my_train.yaml"
+        yaml_file.write_text("model: test")
+
+        path, name = resolve_config("model", yaml_file, "train")
+
+        assert path == str(nested_dir)
+        assert name == "my_train"
+
+    def test_yaml_not_exists(self):
+        """Non-existent yaml path raises Exit(1)"""
+        with pytest.raises(ClickExit) as exc_info:
+            resolve_config("model", Path("/nonexistent/path/config.yaml"), "train")
+        assert exc_info.value.exit_code == 1
+
+    def test_model_not_found(self):
+        """Non-existent model raises Exit(1)"""
+        with pytest.raises(ClickExit) as exc_info:
+            resolve_config("nonexistent_model_xyz_12345", None, "train")
+        assert exc_info.value.exit_code == 1
+
+    def test_from_model_name_aquila(self, mocker):
+        """Resolves config from examples/aquila/conf directory if it exists"""
+        # This test checks if the function correctly constructs the path
+        # We mock Path.exists() to control the test
+        mocker.patch.object(Path, "exists", return_value=True)
+
+        # The function should construct path: script_dir / "examples" / model / "conf" / f"{task}.yaml"
+        try:
+            path, name = resolve_config("aquila", None, "train")
+            # If aquila exists, it should return the path
+            assert "aquila" in path or name == "train"
+        except SystemExit:
+            # If aquila doesn't exist in the test environment, that's expected
+            pass
+
+
+class TestResolveConfigEdgeCases:
+    """Edge case tests for resolve_config()"""
+
+    def test_yaml_path_with_spaces(self, tmp_path):
+        """Yaml path with spaces in directory name works"""
+        spaced_dir = tmp_path / "path with spaces"
+        spaced_dir.mkdir()
+        yaml_file = spaced_dir / "config.yaml"
+        yaml_file.write_text("test: value")
+
+        path, name = resolve_config("model", yaml_file, "train")
+
+        assert "path with spaces" in path
+        assert name == "config"
+
+    def test_yaml_path_absolute(self, tmp_path):
+        """Absolute yaml path is resolved correctly"""
+        yaml_file = tmp_path / "absolute_test.yaml"
+        yaml_file.write_text("test: value")
+
+        # Use absolute path
+        abs_path = yaml_file.resolve()
+        path, name = resolve_config("model", abs_path, "train")
+
+        assert Path(path).is_absolute()
+        assert name == "absolute_test"
+
+    def test_empty_model_name_with_yaml(self, tmp_path):
+        """Empty model name works when yaml_path is provided"""
+        yaml_file = tmp_path / "config.yaml"
+        yaml_file.write_text("test: value")
+
+        path, name = resolve_config("", yaml_file, "train")
+
+        assert path == str(tmp_path)
+        assert name == "config"
diff --git a/tests/unit_tests/test_init.py b/tests/unit_tests/test_init.py
new file mode 100644
index 0000000000..b672bd1c05
--- /dev/null
+++ b/tests/unit_tests/test_init.py
@@ -0,0 +1,52 @@
+def test_get_version_from_importlib_metadata(mocker):
+    """Test version retrieval from importlib.metadata"""
+    mocker.patch("importlib.metadata.version", return_value="1.2.3")
+
+    # Import the function fresh to test with mocked metadata
+    from flagscale import _get_version
+
+    # Since the module is already imported, we need to call the function directly
+    # The mock should be in place for the importlib.metadata.version call
+    result = _get_version()
+    assert result == "1.2.3"
+
+
+def test_get_version_fallback_to_pyproject(mocker, tmp_path):
+    """Test fallback to pyproject.toml when importlib fails"""
+    # Make importlib.metadata.version raise an exception
+    mocker.patch("importlib.metadata.version", side_effect=Exception("Not installed"))
+
+    # Create a fake pyproject.toml
+    pyproject_content = b'[project]\nversion = "2.0.0"\n'
+    pyproject_path = tmp_path / "pyproject.toml"
+    pyproject_path.write_bytes(pyproject_content)
+
+    from flagscale import _get_version
+
+    # This test verifies the fallback logic exists
+    # The actual behavior depends on file system state
+    result = _get_version()
+    assert isinstance(result, str)
+
+
+def test_get_version_fallback_to_default(mocker):
+    """Test fallback to '0.0.0' when all methods fail"""
+    # Make importlib.metadata.version raise an exception
+    mocker.patch("importlib.metadata.version", side_effect=Exception("Not installed"))
+
+    # Make tomllib.load raise an exception
+    mocker.patch("tomllib.load", side_effect=Exception("Parse error"))
+
+    from flagscale import _get_version
+
+    result = _get_version()
+    # Should return a string (either version or default)
+    assert isinstance(result, str)
+
+
+def test_version_is_string():
+    """Test that __version__ is always a string"""
+    from flagscale import __version__
+
+    assert isinstance(__version__, str)
+    assert len(__version__) > 0
diff --git a/tests/unit_tests/test_run.py b/tests/unit_tests/test_run.py
new file mode 100644
index 0000000000..fd81459c80
--- /dev/null
+++ b/tests/unit_tests/test_run.py
@@ -0,0 +1,164 @@
+import pytest
+from omegaconf import OmegaConf
+
+from flagscale.run import (
+    TASK_ACTIONS,
+    VALID_TASKS,
+    check_and_reset_deploy_config,
+    validate_task,
+)
+
+
+class TestValidateTask:
+    """Tests for validate_task() function"""
+
+    @pytest.mark.parametrize(
+        "task,action",
+        [
+            ("train", "run"),
+            ("train", "dryrun"),
+            ("train", "test"),
+            ("train", "stop"),
+            ("train", "query"),
+            ("train", "auto_tune"),
+            ("inference", "run"),
+            ("inference", "dryrun"),
+            ("inference", "test"),
+            ("inference", "stop"),
+            ("serve", "run"),
+            ("serve", "test"),
+            ("serve", "stop"),
+            ("serve", "auto_tune"),
+            ("compress", "run"),
+            ("compress", "dryrun"),
+            ("compress", "stop"),
+            ("rl", "run"),
+            ("rl", "dryrun"),
+            ("rl", "test"),
+            ("rl", "stop"),
+        ],
+    )
+    def test_valid_combinations(self, task, action):
+        """Valid task/action pairs should not raise"""
+        validate_task(task, action)  # Should not raise
+
+    def test_invalid_task(self):
+        """Invalid task raises ValueError"""
+        with pytest.raises(ValueError, match="Invalid task_type"):
+            validate_task("invalid_task", "run")
+
+    def test_invalid_task_empty_string(self):
+        """Empty string task raises ValueError"""
+        with pytest.raises(ValueError, match="Invalid task_type"):
+            validate_task("", "run")
+
+    def test_invalid_action_for_train(self):
+        """Invalid action for train raises ValueError"""
+        with pytest.raises(ValueError, match="not allowed for task_type"):
+            validate_task("train", "invalid_action")
+
+    def test_invalid_action_query_for_inference(self):
+        """Query action not allowed for inference"""
+        with pytest.raises(ValueError, match="not allowed for task_type"):
+            validate_task("inference", "query")
+
+    def test_invalid_action_query_for_serve(self):
+        """Query action not allowed for serve"""
+        with pytest.raises(ValueError, match="not allowed for task_type"):
+            validate_task("serve", "query")
+
+    def test_invalid_action_dryrun_for_serve(self):
+        """Dryrun action not allowed for serve"""
+        with pytest.raises(ValueError, match="not allowed for task_type"):
+            validate_task("serve", "dryrun")
+
+    def test_invalid_action_auto_tune_for_compress(self):
+        """Auto_tune action not allowed for compress"""
+        with pytest.raises(ValueError, match="not allowed for task_type"):
+            validate_task("compress", "auto_tune")
+
+
+class TestValidTasks:
+    """Tests for VALID_TASKS and TASK_ACTIONS constants"""
+
+    def test_valid_tasks_contains_expected(self):
+        """VALID_TASKS contains all expected task types"""
+        expected = {"train", "inference", "compress", "serve", "rl"}
+        assert VALID_TASKS == expected
+
+    def test_task_actions_keys_match_valid_tasks(self):
+        """TASK_ACTIONS keys match VALID_TASKS"""
+        assert set(TASK_ACTIONS.keys()) == VALID_TASKS
+
+    def test_all_tasks_have_run_action(self):
+        """All tasks support the 'run' action"""
+        for task in VALID_TASKS:
+            assert "run" in TASK_ACTIONS[task], f"Task '{task}' should support 'run' action"
+
+    def test_all_tasks_have_stop_action(self):
+        """All tasks support the 'stop' action"""
+        for task in VALID_TASKS:
+            assert "stop" in TASK_ACTIONS[task], f"Task '{task}' should support 'stop' action"
+
+
+class TestCheckAndResetDeployConfig:
+    """Tests for check_and_reset_deploy_config() function"""
+
+    def test_migrates_deploy_to_runner(self):
+        """Deploy config moves from experiment.deploy to experiment.runner.deploy"""
+        config = OmegaConf.create(
+            {"experiment": {"deploy": {"key": "value", "nested": {"a": 1}}, "runner": {}}}
+        )
+
+        check_and_reset_deploy_config(config)
+
+        assert config.experiment.runner.deploy.key == "value"
+        assert config.experiment.runner.deploy.nested.a == 1
+        assert "deploy" not in config.experiment
+
+    def test_no_change_when_no_deploy(self):
+        """No change when no deploy section exists"""
+        config = OmegaConf.create({"experiment": {"runner": {"other_key": "value"}}})
+
+        check_and_reset_deploy_config(config)
+
+        assert "deploy" not in config.experiment
+        assert "deploy" not in config.experiment.runner
+        assert config.experiment.runner.other_key == "value"
+
+    def test_no_change_when_deploy_is_empty(self):
+        """No migration when deploy is empty dict"""
+        config = OmegaConf.create({"experiment": {"deploy": {}, "runner": {}}})
+
+        check_and_reset_deploy_config(config)
+
+        # Empty dict is falsy, so no migration should happen
+        assert "deploy" in config.experiment or "deploy" not in config.experiment.runner
+
+    def test_preserves_existing_runner_config(self):
+        """Migration preserves existing runner configuration"""
+        config = OmegaConf.create(
+            {
+                "experiment": {
+                    "deploy": {"deploy_key": "deploy_value"},
+                    "runner": {"existing_key": "existing_value"},
+                }
+            }
+        )
+
+        check_and_reset_deploy_config(config)
+
+        assert config.experiment.runner.existing_key == "existing_value"
+        assert config.experiment.runner.deploy.deploy_key == "deploy_value"
+
+    def test_warns_about_deprecated_location(self, mocker):
+        """Function warns about deprecated deploy location"""
+        mock_warn = mocker.patch("warnings.warn")
+
+        config = OmegaConf.create({"experiment": {"deploy": {"key": "value"}, "runner": {}}})
+
+        check_and_reset_deploy_config(config)
+
+        mock_warn.assert_called_once()
+        warning_msg = mock_warn.call_args[0][0]
+        assert "moved" in warning_msg.lower() or "deprecated" in warning_msg.lower()
diff --git a/tools/install/cuda/install_base.sh b/tools/install/cuda/install_base.sh
index d79d073f84..eedd46d8a6 100755
--- a/tools/install/cuda/install_base.sh
+++ b/tools/install/cuda/install_base.sh
@@ -19,6 +19,8 @@ done
 
 install_apt() {
     is_phase_enabled base || return 0
+    # Skip apt in only-pip mode
+    is_only_pip && { log_info "Skipping apt packages (only-pip mode)"; return 0; }
     set_step "Installing CUDA apt packages"
     run_cmd -d $DEBUG apt-get install -y --no-install-recommends $APT_PACKAGES || return 1
     log_success "CUDA apt packages installed"
diff --git a/tools/install/cuda/install_serve.sh b/tools/install/cuda/install_serve.sh
index 3c239ba62f..3f6726f20d 100755
--- a/tools/install/cuda/install_serve.sh
+++ b/tools/install/cuda/install_serve.sh
@@ -53,6 +53,11 @@ install_vllm() {
 }
 
 install_src() {
+    # Skip in only-pip mode unless we have matching src-deps overrides
+    if is_only_pip && ! has_src_deps_for_phase $SRC_DEPS_LIST; then
+        log_info "Skipping source deps (only-pip mode)"
+        return 0
+    fi
     # Skip if phase disabled and no matching src-deps
     is_phase_enabled task || has_src_deps_for_phase $SRC_DEPS_LIST || return 0
 
diff --git a/tools/install/cuda/install_train.sh b/tools/install/cuda/install_train.sh
index 5b571aa331..8c9d82ff90 100755
--- a/tools/install/cuda/install_train.sh
+++ b/tools/install/cuda/install_train.sh
@@ -92,6 +92,11 @@ install_megatron_lm() {
 }
 
 install_src() {
+    # Skip in only-pip mode unless we have matching src-deps overrides
+    if is_only_pip && ! has_src_deps_for_phase $SRC_DEPS_LIST; then
+        log_info "Skipping source deps (only-pip mode)"
+        return 0
+    fi
     # Skip if phase disabled and no matching src-deps
     is_phase_enabled task || has_src_deps_for_phase $SRC_DEPS_LIST || return 0
 
diff --git a/tools/install/install.sh b/tools/install/install.sh
index 064446b8cb..a99da3a5e5 100755
--- a/tools/install/install.sh
+++ b/tools/install/install.sh
@@ -40,6 +40,9 @@ INSTALL_DEV=true
 INSTALL_BASE=true
 INSTALL_TASK=true
 
+# Only pip flag (skip apt and source builds, only install pip packages)
+ONLY_PIP=false
+
 # Override flags (selective installation)
 SRC_DEPS=""
 PIP_DEPS=""
@@ -83,6 +86,7 @@ export_config() {
     export FLAGSCALE_SRC_DEPS="$SRC_DEPS"
     export FLAGSCALE_PIP_DEPS="$PIP_DEPS"
     export FLAGSCALE_FORCE_BUILD="$FORCE_BUILD"
+    export FLAGSCALE_ONLY_PIP="$ONLY_PIP"
 
     # Other config
     export FLAGSCALE_PLATFORM="$PLATFORM"
@@ -153,8 +157,9 @@ OPTIONS:
     --no-dev               Skip dev phase (dev requirements)
     --no-base              Skip base phase (base requirements + source)
     --no-task              Skip task phase (task requirements + source)
+    --only-pip             Only install pip packages (skip apt and source builds)
 
-  Selective Installation (overrides --no-* for specific packages):
+  Selective Installation (HIGHEST PRIORITY - overrides --no-* and --only-pip):
     --pip-deps PKGS        Install specific pip packages (comma-separated)
     --src-deps DEPS        Install specific source deps (comma-separated)
                            dev: sccache
@@ -177,6 +182,8 @@ OPTIONS:
 EXAMPLES:
     $0 --platform cuda --task train                                # Full installation
     $0 --platform cuda --task train --no-system                    # Skip system phase
+    $0 --platform cuda --task train --only-pip                     # Only pip packages
+    $0 --platform cuda --task train --only-pip --src-deps megatron-lm  # Pip + megatron-lm only
     $0 --platform cuda --task train --no-system --no-dev --no-base --no-task --src-deps megatron-lm
 EOF
 }
@@ -190,6 +197,7 @@ parse_args() {
             --no-dev)          INSTALL_DEV=false; shift ;;
             --no-base)         INSTALL_BASE=false; shift ;;
             --no-task)         INSTALL_TASK=false; shift ;;
+            --only-pip)        ONLY_PIP=true; shift ;;
             --pkg-mgr)         PKG_MGR="$2"; shift 2 ;;
             --env-name)        ENV_NAME="$2"; shift 2 ;;
             --install-dir)     FLAGSCALE_HOME="$2"; shift 2 ;;
@@ -227,6 +235,7 @@ main() {
 
     print_header "FlagScale Installation"
     log_info "Platform: $PLATFORM | Task: $TASK | Pkg: $PKG_MGR"
+    [ "$ONLY_PIP" = true ] && log_info "Only pip mode: skipping apt and source builds"
     [ -n "$SRC_DEPS" ] && log_info "Source deps override: $SRC_DEPS"
     [ -n "$PIP_DEPS" ] && log_info "Pip deps override: $PIP_DEPS"
     log_info "Install dir: $FLAGSCALE_HOME"
diff --git a/tools/install/install_dev.sh b/tools/install/install_dev.sh
index 749a9caf1d..68b5a38c4e 100755
--- a/tools/install/install_dev.sh
+++ b/tools/install/install_dev.sh
@@ -92,6 +92,11 @@ install_sccache() {
 }
 
 install_src() {
+    # Skip in only-pip mode unless we have matching src-deps overrides
+    if is_only_pip && ! has_src_deps_for_phase $SRC_DEPS_LIST; then
+        log_info "Skipping source deps (only-pip mode)"
+        return 0
+    fi
     # Skip if phase disabled and no matching src-deps
     is_phase_enabled dev || has_src_deps_for_phase $SRC_DEPS_LIST || return 0
 
diff --git a/tools/install/utils/pkg_utils.sh b/tools/install/utils/pkg_utils.sh
index 8ee69e2f0b..6ba2cc0f15 100644
--- a/tools/install/utils/pkg_utils.sh
+++ b/tools/install/utils/pkg_utils.sh
@@ -85,6 +85,12 @@ should_build_package() {
 #   FLAGSCALE_INSTALL_SYSTEM/DEV/BASE/TASK - true/false
 #   FLAGSCALE_PIP_DEPS - comma-separated pip packages
 #   FLAGSCALE_SRC_DEPS - comma-separated source deps
+#   FLAGSCALE_ONLY_PIP - true/false (skip apt and source builds)
+
+# Check if only pip mode is enabled (skip apt and source builds)
+is_only_pip() {
+    [ "${FLAGSCALE_ONLY_PIP:-false}" = true ]
+}
 
 is_phase_enabled() {
     local phase="$1"
@@ -109,10 +115,14 @@ is_in_override() {
 
 # Should install source dep?
 # Usage: should_install_src <phase> <dep_name>
+# Priority: --src-deps override > --only-pip > phase enabled
 should_install_src() {
     local phase="$1" item="$2"
-    is_phase_enabled "$phase" && return 0
+    # Override flags have highest priority
     is_in_override src "$item" && return 0
+    # Skip source builds in only-pip mode (unless overridden above)
+    is_only_pip && return 1
+    is_phase_enabled "$phase" && return 0
     return 1
 }
 
@@ -120,7 +130,33 @@ should_install_src() {
 # Phase-Scoped Filtering
 # =============================================================================
 
-# Get pip-deps that match a requirements file
+# Expand requirements file content, resolving -r includes recursively
+# Usage: expand_requirements_file <req_file>
+expand_requirements_file() {
+    local req_file="$1"
+    local base_dir
+    base_dir="$(dirname "$req_file")"
+
+    [ ! -f "$req_file" ] && return 0
+
+    while IFS= read -r line || [ -n "$line" ]; do
+        # Handle -r includes (e.g., "-r ../common.txt" or "-r common.txt")
+        if echo "$line" | grep -qE '^-r[[:space:]]+'; then
+            local included_file
+            included_file="$(echo "$line" | sed 's/^-r[[:space:]]*//')"
+            # Resolve relative path from the base directory
+            if [ "${included_file#/}" = "$included_file" ]; then
+                included_file="$base_dir/$included_file"
+            fi
+            # Recursively expand the included file
+            expand_requirements_file "$included_file"
+        else
+            echo "$line"
+        fi
+    done < "$req_file"
+}
+
+# Get pip-deps that match a requirements file (resolves -r includes)
 get_pip_deps_for_requirements() {
     local req_file="$1"
     local pip_deps="${FLAGSCALE_PIP_DEPS:-}"
@@ -128,8 +164,12 @@ get_pip_deps_for_requirements() {
 
     [ -z "$pip_deps" ] || [ ! -f "$req_file" ] && return 0
 
+    # Expand requirements file to include all -r references
+    local expanded_content
+    expanded_content="$(expand_requirements_file "$req_file")"
+
     for pkg in $(echo "$pip_deps" | tr ',' ' '); do
-        grep -qiE "^${pkg}([=<>!~\[]|$)" "$req_file" 2>/dev/null && matched="$matched $pkg"
+        echo "$expanded_content" | grep -qiE "^${pkg}([=<>!~\[]|$)" 2>/dev/null && matched="$matched $pkg"
     done
     echo "$matched" | xargs
 }
diff --git a/version.py b/version.py
deleted file mode 100644
index 59c5e38db2..0000000000
--- a/version.py
+++ /dev/null
@@ -1 +0,0 @@
-FLAGSCALE_VERSION = "1.0.0"