Kashyab19 · Kashyab19 · Dec 24, 2025 · Dec 27, 2025 · Dec 30, 2025 · cubic-dev-ai
diff --git a/.dockerignore b/.dockerignore
@@ -1,8 +1,8 @@
 # .dockerignore
-venv/
+assets/
+qdrant_data/
+redpanda_data/
 __pycache__/
+*.pyc
 .git/
 .env
-qdrant_data/
-redpanda_data/
-*.db
diff --git a/.gitignore b/.gitignore
@@ -1,28 +1,104 @@
+# Python
 __pycache__/
-*.pyc
-*.pyo
-*.pyd
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Virtual Environments
 venv/
 .venv/
 env/
-.env
+ENV/
+env.bak/
+venv.bak/
 
+# Environment variables
+.env
+.env.local
+.env.*.local
 
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
 .DS_Store
 .AppleDouble
 .LSOverride
 
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
 
-.vscode/
-.idea/
-
-
+# Project-specific data directories
 assets/videos/
 *.mp4
-
-
 qdrant_data/
 redpanda_data/
 
-
+# Logs
 *.log
+logs/
+*.log.*
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.hypothesis/
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# pip
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Model cache (HuggingFace, etc.)
+.cache/
+models/
+*.pt
+*.pth
+*.ckpt
+
+# Temporary files
+*.tmp
+*.temp
+*.bak
+*.swp
+*~
+
+# Docker
+.dockerignore
diff --git a/Dockerfile b/Dockerfile
@@ -1,25 +1,32 @@
-# Use a slim Python image (lightweight and fast)
-# If you need GPU support later, change this to an nvidia/cuda image or pytorch/pytorch
-FROM python:3.10-slim
+# Use Python 3.12 to match your project requirements
+FROM python:3.12-slim
 
-# Set the working directory to match the volume mount in your compose file
-WORKDIR /app
-
-# Install system dependencies (often needed for AI/Inference libraries like OpenCV or numpy)
+# 1. Install System Dependencies
 RUN apt-get update && apt-get install -y \
-    build-essential \
     libgl1 \
     libglib2.0-0 \
+    ffmpeg \
+    curl \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy requirements first to leverage Docker caching
-COPY requirements.txt .
+# 2. Install uv
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
+COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
+COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
+
+# 3. Set up the application
+WORKDIR /app
+
+# 4. Install Dependencies
+# Only copy pyproject.toml (since we deleted uv.lock)
+COPY pyproject.toml ./
+
+# Run sync WITHOUT --frozen (this resolves dependencies fresh)
+RUN uv sync
 
-# Install Python dependencies
-RUN pip install --no-cache-dir -r requirements.txt
+# CRITICAL: Add the virtual environment to the PATH
+ENV PATH="/app/.venv/bin:$PATH"
 
-# Copy the rest of the code
+# 5. Copy the Code
 COPY . .
 
-# This CMD is a fallback; your docker-compose 'command' overrides this.
-CMD ["python3", "app/server.py"]
+# 6. Default Command
+CMD ["uvicorn", "app.server:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/app/audio.py b/app/audio.py
@@ -0,0 +1,110 @@
+import logging
+import os
+import uuid
+
+import ffmpeg
+import numpy as np
+from fastembed import TextEmbedding
+from faster_whisper import WhisperModel
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# --- LOAD MODELS ---
+try:
+    WHISPER = WhisperModel("base", device="cpu", compute_type="int8")
+    EMBEDDER = TextEmbedding(model_name="BAAI/bge-small-en-v1.5")
+except Exception as e:
+    logger.critical(f"Failed to load Audio models: {e}")
+
+
+def transcribe_chunk(
+    video_path: str, video_id: str, start_time: float, end_time: float
+):
+    """
+    1. Pipes audio from FFmpeg directly to memory (No Disk I/O).
+    2. Uses VAD to skip silence (Saves CPU).
+    3. Merges short segments into semantic blocks (Increases Accuracy).
+    """
+    duration = end_time - start_time
+
+    # 1. GHOST CHECK
+    if not os.path.exists(video_path):
+        logger.error(f"AUDIO ERROR: File not found at {video_path}")
+        return []
+
+    try:
+        # 2. EXTRACT AUDIO TO MEMORY (Zero-Copy)
+        # We request raw PCM data (s16le) at 16khz mono
+        out, _ = (
+            ffmpeg.input(video_path, ss=start_time, t=duration)
+            .output("pipe:", format="s16le", acodec="pcm_s16le", ac=1, ar=16000)
+            .run(capture_stdout=True, capture_stderr=True)
+        )
+
+        # Convert raw bytes to float32 numpy array (Required by Whisper)
+        # normalized between -1 and 1
+        audio_array = (
+            np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
+        )
+
+        # 3. TRANSCRIBE WITH VAD
+        # vad_filter=True stops the model from hallucinating on silence
+        segments, _ = WHISPER.transcribe(audio_array, vad_filter=True)
+
+        # 4. SEMANTIC MERGING
+        # Group segments until they form a coherent thought (~200 chars)
+        points = []
+        buffer_text = []
+        buffer_start = None
+        buffer_end = 0.0
+
+        # Generator to list so we can iterate
+        segments = list(segments)
+
+        for i, seg in enumerate(segments):
+            if not buffer_text:
+                buffer_start = seg.start
+
+            buffer_text.append(seg.text.strip())
+            buffer_end = seg.end
+
+            # Current semantic block
+            full_text = " ".join(buffer_text)
+
+            # Heuristic: If block is long enough OR it's the last segment
+            if len(full_text) >= 256 or i == len(segments) - 1:
+                # Embed the GROUPS, not the fragments
+                vector_gen = EMBEDDER.embed([full_text])
+                vector = list(vector_gen)[0].tolist()  # Unpack generator
+
+                points.append(
+                    {
+                        "id": str(uuid.uuid4()),
+                        "vector": vector,
+                        "payload": {
+                            "video_id": video_id,
+                            "text": full_text,
+                            # Map relative whisper time back to absolute video time
+                            "timestamp": start_time + buffer_start,
+                            "end_timestamp": start_time + buffer_end,
+                            "type": "audio_transcript",
+                            "strategy": "semantic_merge_256",
+                        },
+                    }
+                )
+
+                # Reset buffer
+                buffer_text = []
+                buffer_start = None
+
+        return points
+
+    except ffmpeg.Error as e:
+        error_msg = e.stderr.decode("utf8") if e.stderr else "Unknown FFmpeg error"
+        logger.error(f"FFMPEG MEMORY FAIL: {error_msg}")
+        return []
+
+    except Exception as e:
+        logger.error(f"General Audio Error: {e}")
+        return []
diff --git a/app/database.py b/app/database.py
diff --git a/app/database/__init__.py b/app/database/__init__.py