-
Notifications
You must be signed in to change notification settings - Fork 0
Features: #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Features: #1
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,8 @@ | ||
| # .dockerignore | ||
| venv/ | ||
| assets/ | ||
| qdrant_data/ | ||
| redpanda_data/ | ||
| __pycache__/ | ||
| *.pyc | ||
| .git/ | ||
| .env | ||
| qdrant_data/ | ||
| redpanda_data/ | ||
| *.db |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,28 +1,104 @@ | ||
| # Python | ||
| __pycache__/ | ||
| *.pyc | ||
| *.pyo | ||
| *.pyd | ||
| *.py[cod] | ||
| *$py.class | ||
| *.so | ||
| .Python | ||
| build/ | ||
| develop-eggs/ | ||
| dist/ | ||
| downloads/ | ||
| eggs/ | ||
| .eggs/ | ||
| lib/ | ||
| lib64/ | ||
| parts/ | ||
| sdist/ | ||
| var/ | ||
| wheels/ | ||
| pip-wheel-metadata/ | ||
| share/python-wheels/ | ||
| *.egg-info/ | ||
| .installed.cfg | ||
| *.egg | ||
| MANIFEST | ||
|
|
||
| # Virtual Environments | ||
| venv/ | ||
| .venv/ | ||
| env/ | ||
| .env | ||
| ENV/ | ||
| env.bak/ | ||
| venv.bak/ | ||
|
|
||
| # Environment variables | ||
| .env | ||
| .env.local | ||
| .env.*.local | ||
|
|
||
| # IDE | ||
| .vscode/ | ||
| .idea/ | ||
| *.swp | ||
| *.swo | ||
| *~ | ||
| .DS_Store | ||
| .AppleDouble | ||
| .LSOverride | ||
|
|
||
| # OS | ||
| .DS_Store | ||
| .DS_Store? | ||
| ._* | ||
| .Spotlight-V100 | ||
| .Trashes | ||
| ehthumbs.db | ||
| Thumbs.db | ||
|
|
||
| .vscode/ | ||
| .idea/ | ||
|
|
||
|
|
||
| # Project-specific data directories | ||
| assets/videos/ | ||
| *.mp4 | ||
|
|
||
|
|
||
| qdrant_data/ | ||
| redpanda_data/ | ||
|
|
||
|
|
||
| # Logs | ||
| *.log | ||
| logs/ | ||
| *.log.* | ||
|
|
||
| # Testing | ||
| .pytest_cache/ | ||
| .coverage | ||
| htmlcov/ | ||
| .tox/ | ||
| .hypothesis/ | ||
| .mypy_cache/ | ||
| .dmypy.json | ||
| dmypy.json | ||
|
|
||
| # Jupyter Notebook | ||
| .ipynb_checkpoints | ||
|
|
||
| # pyenv | ||
| .python-version | ||
|
|
||
| # pip | ||
| pip-log.txt | ||
| pip-delete-this-directory.txt | ||
|
|
||
| # Model cache (HuggingFace, etc.) | ||
| .cache/ | ||
| models/ | ||
| *.pt | ||
| *.pth | ||
| *.ckpt | ||
|
|
||
| # Temporary files | ||
| *.tmp | ||
| *.temp | ||
| *.bak | ||
| *.swp | ||
| *~ | ||
|
|
||
| # Docker | ||
| .dockerignore |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,25 +1,32 @@ | ||
| # Use a slim Python image (lightweight and fast) | ||
| # If you need GPU support later, change this to an nvidia/cuda image or pytorch/pytorch | ||
| FROM python:3.10-slim | ||
| # Use Python 3.12 to match your project requirements | ||
| FROM python:3.12-slim | ||
|
|
||
| # Set the working directory to match the volume mount in your compose file | ||
| WORKDIR /app | ||
|
|
||
| # Install system dependencies (often needed for AI/Inference libraries like OpenCV or numpy) | ||
| # 1. Install System Dependencies | ||
| RUN apt-get update && apt-get install -y \ | ||
| build-essential \ | ||
| libgl1 \ | ||
| libglib2.0-0 \ | ||
| ffmpeg \ | ||
| curl \ | ||
| && rm -rf /var/lib/apt/lists/* | ||
|
|
||
| # Copy requirements first to leverage Docker caching | ||
| COPY requirements.txt . | ||
| # 2. Install uv | ||
| COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv | ||
|
|
||
| # 3. Set up the application | ||
| WORKDIR /app | ||
|
|
||
| # 4. Install Dependencies | ||
| # Only copy pyproject.toml (since we deleted uv.lock) | ||
| COPY pyproject.toml ./ | ||
|
|
||
| # Run sync WITHOUT --frozen (this resolves dependencies fresh) | ||
| RUN uv sync | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P1: Builds are not reproducible without a lock file. Consider keeping Prompt for AI agents |
||
|
|
||
| # Install Python dependencies | ||
| RUN pip install --no-cache-dir -r requirements.txt | ||
| # CRITICAL: Add the virtual environment to the PATH | ||
| ENV PATH="/app/.venv/bin:$PATH" | ||
|
|
||
| # Copy the rest of the code | ||
| # 5. Copy the Code | ||
| COPY . . | ||
|
|
||
| # This CMD is a fallback; your docker-compose 'command' overrides this. | ||
| CMD ["python3", "app/server.py"] | ||
| # 6. Default Command | ||
| CMD ["uvicorn", "app.server:app", "--host", "0.0.0.0", "--port", "8000"] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| import logging | ||
| import os | ||
| import uuid | ||
|
|
||
| import ffmpeg | ||
| import numpy as np | ||
| from fastembed import TextEmbedding | ||
| from faster_whisper import WhisperModel | ||
|
|
||
| logging.basicConfig(level=logging.INFO) | ||
| logger = logging.getLogger(__name__) | ||
|
|
||
| # --- LOAD MODELS --- | ||
| try: | ||
| WHISPER = WhisperModel("base", device="cpu", compute_type="int8") | ||
| EMBEDDER = TextEmbedding(model_name="BAAI/bge-small-en-v1.5") | ||
| except Exception as e: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P1: If model loading fails, Prompt for AI agents |
||
| logger.critical(f"Failed to load Audio models: {e}") | ||
|
|
||
|
|
||
| def transcribe_chunk( | ||
| video_path: str, video_id: str, start_time: float, end_time: float | ||
| ): | ||
| """ | ||
| 1. Pipes audio from FFmpeg directly to memory (No Disk I/O). | ||
| 2. Uses VAD to skip silence (Saves CPU). | ||
| 3. Merges short segments into semantic blocks (Increases Accuracy). | ||
| """ | ||
| duration = end_time - start_time | ||
|
|
||
| # 1. GHOST CHECK | ||
| if not os.path.exists(video_path): | ||
| logger.error(f"AUDIO ERROR: File not found at {video_path}") | ||
| return [] | ||
|
|
||
| try: | ||
| # 2. EXTRACT AUDIO TO MEMORY (Zero-Copy) | ||
| # We request raw PCM data (s16le) at 16khz mono | ||
| out, _ = ( | ||
| ffmpeg.input(video_path, ss=start_time, t=duration) | ||
| .output("pipe:", format="s16le", acodec="pcm_s16le", ac=1, ar=16000) | ||
| .run(capture_stdout=True, capture_stderr=True) | ||
| ) | ||
|
|
||
| # Convert raw bytes to float32 numpy array (Required by Whisper) | ||
| # normalized between -1 and 1 | ||
| audio_array = ( | ||
| np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0 | ||
| ) | ||
|
|
||
| # 3. TRANSCRIBE WITH VAD | ||
| # vad_filter=True stops the model from hallucinating on silence | ||
| segments, _ = WHISPER.transcribe(audio_array, vad_filter=True) | ||
|
|
||
| # 4. SEMANTIC MERGING | ||
| # Group segments until they form a coherent thought (~200 chars) | ||
| points = [] | ||
| buffer_text = [] | ||
| buffer_start = None | ||
| buffer_end = 0.0 | ||
|
|
||
| # Generator to list so we can iterate | ||
| segments = list(segments) | ||
|
|
||
| for i, seg in enumerate(segments): | ||
| if not buffer_text: | ||
| buffer_start = seg.start | ||
|
|
||
| buffer_text.append(seg.text.strip()) | ||
| buffer_end = seg.end | ||
|
|
||
| # Current semantic block | ||
| full_text = " ".join(buffer_text) | ||
|
|
||
| # Heuristic: If block is long enough OR it's the last segment | ||
| if len(full_text) >= 256 or i == len(segments) - 1: | ||
| # Embed the GROUPS, not the fragments | ||
| vector_gen = EMBEDDER.embed([full_text]) | ||
| vector = list(vector_gen)[0].tolist() # Unpack generator | ||
|
|
||
| points.append( | ||
| { | ||
| "id": str(uuid.uuid4()), | ||
| "vector": vector, | ||
| "payload": { | ||
| "video_id": video_id, | ||
| "text": full_text, | ||
| # Map relative whisper time back to absolute video time | ||
| "timestamp": start_time + buffer_start, | ||
| "end_timestamp": start_time + buffer_end, | ||
| "type": "audio_transcript", | ||
| "strategy": "semantic_merge_256", | ||
| }, | ||
| } | ||
| ) | ||
|
|
||
| # Reset buffer | ||
| buffer_text = [] | ||
| buffer_start = None | ||
|
|
||
| return points | ||
|
|
||
| except ffmpeg.Error as e: | ||
| error_msg = e.stderr.decode("utf8") if e.stderr else "Unknown FFmpeg error" | ||
| logger.error(f"FFMPEG MEMORY FAIL: {error_msg}") | ||
| return [] | ||
|
|
||
| except Exception as e: | ||
| logger.error(f"General Audio Error: {e}") | ||
| return [] | ||
This file was deleted.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P2: Pin the
uvimage to a specific version instead of using:latestfor reproducible and secure builds.Prompt for AI agents