Skip to content

Commit 2a02c9b

Browse files
Initial commit: Enterprise Document Q&A System
Built a production-ready RAG system from scratch without LangChain. Features: - Custom RAG pipeline with direct Claude API integration - PDF document processing and chunking - ChromaDB vector store for semantic search - Source citations with relevance scores - Streamlit UI with chat history - Docker deployment ready - Comprehensive tests (13/13 passing) - GitHub Actions CI/CD Tech stack: Python, Claude AI, ChromaDB, PyPDF, Streamlit 🤖 Generated with Claude Code Co-Authored-By: Claude <[email protected]>
0 parents  commit 2a02c9b

25 files changed

+1842
-0
lines changed

.dockerignore

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
*.egg-info/
8+
dist/
9+
build/
10+
11+
# Virtual Environment
12+
venv/
13+
env/
14+
ENV/
15+
.venv
16+
17+
# Environment files
18+
.env
19+
20+
# IDE
21+
.vscode/
22+
.idea/
23+
*.swp
24+
*.swo
25+
26+
# OS
27+
.DS_Store
28+
Thumbs.db
29+
30+
# Git
31+
.git/
32+
.gitignore
33+
34+
# Data
35+
data/
36+
chroma_db/
37+
*.pdf
38+
*.txt
39+
40+
# Testing
41+
.pytest_cache/
42+
.coverage
43+
htmlcov/
44+
45+
# Documentation
46+
docs/
47+
README.md

.env.example

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Required API Keys
2+
ANTHROPIC_API_KEY=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
3+
4+
# Optional: Choose one for embeddings
5+
VOYAGE_API_KEY=your_voyage_api_key_here
6+
OPENAI_API_KEY=your_openai_api_key_here
7+
8+
# RAG Configuration
9+
CHUNK_SIZE=1000
10+
CHUNK_OVERLAP=200
11+
TOP_K_RESULTS=4
12+
13+
# Vector Store
14+
CHROMA_PERSIST_DIRECTORY=./chroma_db
15+
16+
# Model Configuration
17+
CLAUDE_MODEL=claude-3-5-sonnet-20241022
18+
EMBEDDING_MODEL=voyage-2 # or text-embedding-3-small for OpenAI

.github/workflows/test.yml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: Tests
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
pull_request:
7+
branches: [ main ]
8+
9+
jobs:
10+
test:
11+
runs-on: ubuntu-latest
12+
strategy:
13+
matrix:
14+
python-version: ['3.9', '3.10', '3.11']
15+
16+
steps:
17+
- uses: actions/checkout@v4
18+
19+
- name: Set up Python ${{ matrix.python-version }}
20+
uses: actions/setup-python@v5
21+
with:
22+
python-version: ${{ matrix.python-version }}
23+
24+
- name: Cache pip dependencies
25+
uses: actions/cache@v4
26+
with:
27+
path: ~/.cache/pip
28+
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
29+
restore-keys: |
30+
${{ runner.os }}-pip-
31+
32+
- name: Install dependencies
33+
run: |
34+
python -m pip install --upgrade pip
35+
pip install -r requirements.txt
36+
37+
- name: Run tests
38+
run: |
39+
pytest -v --cov=src tests/
40+
41+
- name: Check code style
42+
run: |
43+
flake8 src/ tests/ --max-line-length=100 --exclude=venv

.gitignore

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
build/
8+
develop-eggs/
9+
dist/
10+
downloads/
11+
eggs/
12+
.eggs/
13+
lib/
14+
lib64/
15+
parts/
16+
sdist/
17+
var/
18+
wheels/
19+
*.egg-info/
20+
.installed.cfg
21+
*.egg
22+
23+
# Virtual Environment
24+
venv/
25+
ENV/
26+
env/
27+
.venv
28+
29+
# Environment Variables
30+
.env
31+
32+
# IDEs
33+
.vscode/
34+
.idea/
35+
*.swp
36+
*.swo
37+
*~
38+
39+
# OS
40+
.DS_Store
41+
Thumbs.db
42+
43+
# Project Specific
44+
data/
45+
chroma_db/
46+
*.pdf
47+
*.txt
48+
*.docx
49+
50+
# Logs
51+
*.log
52+
53+
# Testing
54+
.pytest_cache/
55+
.coverage
56+
htmlcov/
57+
.tox/
58+
59+
# Jupyter
60+
.ipynb_checkpoints

Dockerfile

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
FROM python:3.11-slim
2+
3+
# Set working directory
4+
WORKDIR /app
5+
6+
# Install system dependencies
7+
RUN apt-get update && apt-get install -y \
8+
build-essential \
9+
curl \
10+
&& rm -rf /var/lib/apt/lists/*
11+
12+
# Copy requirements first for better caching
13+
COPY requirements.txt .
14+
15+
# Install Python dependencies
16+
RUN pip install --no-cache-dir -r requirements.txt
17+
18+
# Copy application code
19+
COPY . .
20+
21+
# Create directory for ChromaDB persistence
22+
RUN mkdir -p /app/chroma_db
23+
24+
# Expose Streamlit port
25+
EXPOSE 8501
26+
27+
# Health check
28+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
29+
CMD curl --fail http://localhost:8501/_stcore/health || exit 1
30+
31+
# Run the application
32+
CMD ["streamlit", "run", "src/ui/app.py", "--server.port=8501", "--server.address=0.0.0.0"]

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2025 Sandeep Uppalapati
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

0 commit comments

Comments
 (0)