Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DB_USER=admin
DB_PASS=change_me_in_production
DB_NAME=search_engine
9 changes: 8 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,15 @@ jobs:
python-version: '3.9'
cache: 'pip'

- name: Install System Dependencies
run: |
sudo apt-get update
sudo apt-get install -y librocksdb-dev zlib1g-dev libbz2-dev liblz4-dev libsnappy-dev libzstd-dev

- name: Install Dependencies
run: pip install -r requirements.txt
run: |
pip install "Cython<3.0" setuptools wheel
pip install --no-build-isolation -r requirements.txt

- name: Syntax Check
run: python -m compileall .
Expand Down
2 changes: 1 addition & 1 deletion API/config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
get "/search", to: "search#index"

# Defines the root path route ("/")
# root "posts#index"
root "search#index"
end
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,24 @@
![CodeRabbit Pull Request Reviews](https://img.shields.io/coderabbit/prs/github/Digvijay-x1/Search-Engine?utm_source=oss&utm_medium=github&utm_campaign=Digvijay-x1%2FSearch-Engine&labelColor=171717&color=FF570A&link=https%3A%2F%2Fcoderabbit.ai&label=CodeRabbit+Reviews)

## Setup

### Environment Variables

This project uses environment variables for configuration, including database credentials.

1. Copy the example environment file:
```bash
cp .env.example .env
```
2. Edit `.env` and set your own secure passwords and configuration:
```ini
DB_USER=admin
DB_PASS=your_secure_password
DB_NAME=search_engine
```

### Running with Docker

```bash
docker-compose up --build
```
2 changes: 1 addition & 1 deletion cpp/indexer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:latest
FROM ubuntu:22.04

RUN apt-get update && apt-get install -y \
build-essential \
Expand Down
17 changes: 11 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ services:
postgres_service:
image: postgres:17.2-alpine3.21
environment:
POSTGRES_USER: admin
POSTGRES_PASSWORD: password123
POSTGRES_DB: search_engine
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASS}
POSTGRES_DB: ${DB_NAME}
volumes:
- ./data/init.sql:/docker-entrypoint-initdb.d/init.sql # Runs on first startup
- pg_data:/var/lib/postgresql/data # Persistence
Expand All @@ -29,8 +29,13 @@ services:
- "5000:5000"
volumes:
- ./python/ranker:/app # Hot-reloading for Python
- ./data/crawled_pages:/shared_data
environment:
- FLASK_ENV=development
- DB_HOST=postgres_service
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASS=${DB_PASS}
networks:
- search_net

Expand Down Expand Up @@ -65,9 +70,9 @@ services:
- ./data/crawled_pages:/shared_data
environment:
- DB_HOST=postgres_service
- DB_NAME=search_engine
- DB_USER=admin
- DB_PASS=password123
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASS=${DB_PASS}
depends_on:
- redis_service
- postgres_service
Expand Down
28 changes: 25 additions & 3 deletions python/ranker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
FROM python:3.9-slim
FROM ubuntu:22.04

# Install system dependencies
# python3-dev is needed for headers
RUN apt-get update && apt-get install -y \
python3 \
python3-pip \
python3-dev \
git \
build-essential \
librocksdb-dev \
libpq-dev \
zlib1g-dev \
libbz2-dev \
liblz4-dev \
libsnappy-dev \
libzstd-dev \
&& rm -rf /var/lib/apt/lists/*

# Create a symlink for python if needed, though python3 is standard
RUN ln -s /usr/bin/python3 /usr/bin/python

WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
RUN pip3 install --no-cache-dir "Cython<3"
RUN pip3 install --no-cache-dir -r requirements.txt
COPY . .
CMD ["python", "app.py"]
CMD ["python3", "app.py"]
41 changes: 33 additions & 8 deletions python/ranker/app.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,48 @@
from flask import Flask, jsonify, request
from engine import Ranker
import time
import atexit

app = Flask(__name__)

# Mock Data (The "Database")
MOCK_INDEX = {
"computer": [{"id": 1, "title": "History of Computers"}, {"id": 2, "title": "Computer Science 101"}],
"cats": [{"id": 3, "title": "Funny Cats"}, {"id": 4, "title": "Cat Care"}]
}
# Initialize Ranker (Global Singleton)
ranker = None
try:
ranker = Ranker()
atexit.register(ranker.close)
except Exception as e:
print(f"Failed to initialize Ranker: {e}")

@app.route('/health')
def health():
return jsonify({"status": "healthy", "service": "ranker"})
status = "healthy" if ranker else "degraded"
return jsonify({"status": status, "service": "ranker"})

@app.route('/search')
def search():
global ranker
if not ranker:
# Fallback for dev/restart if before_first_request didn't fire or failed
try:
ranker = Ranker()
except Exception as e:
return jsonify({"error": f"Ranker not initialized: {str(e)}"}), 500

query = request.args.get('q', '').lower()
print(f"Received query: {query}")
results = MOCK_INDEX.get(query, [])
return jsonify({"query": query, "results": results})

start_time = time.time()
results = ranker.search(query)
duration_ms = (time.time() - start_time) * 1000

return jsonify({
"query": query,
"results": results,
"meta": {
"count": len(results),
"latency_ms": round(duration_ms, 2)
}
})

if __name__ == '__main__':
# host='0.0.0.0' is CRITICAL for Docker networking
Expand Down
Loading