Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .cursorignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
**/.env*
**/.example.env
**/.venv
**/logs/
25 changes: 25 additions & 0 deletions .cursorindexingignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
**/.env*
**/logs
**/__pycache__
**/__pycache__/*
**/logs/*
**/.venv
**/.venv/*
**/node_modules
**/node_modules/*
**/dist
**/dist/*
**/build
**/build/*
**/env
**/env/*
**/src/public
**/src/public/*
**/src/templates
**/src/templates/*
**/src/static
**/src/static/*
**/src/static/css
**/src/static/css/*
**/src/static/js
**/src/static/js/*
3 changes: 2 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ __pycache__/
htmlcov/
scripts/
Docker*
docker-compose.yml
docker-compose.yml
.cursor*
73 changes: 61 additions & 12 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# mcp/main.py
import json
import os
from src.middleware.api_key import middleware
from typing import List, Optional
from src.middleware.api_key import get_headers, middleware
from src.utils.rag import DEFAULT_VECTOR_STORE_PATH
from pydantic import BaseModel
import uvicorn
Expand All @@ -12,6 +13,7 @@
from src.utils.scrape import retrieve_webpage

from src.utils.rag import VectorStore, Splitter
from src.utils.github import GitHubAPI

# Initialize FastMCP server instance
mcp = FastMCP(
Expand All @@ -35,19 +37,66 @@ class Split(BaseModel):
chunk_size: int = 1000
chunk_overlap: int = 0


@mcp.tool()
async def scrape_to_knowledge_base(url: str, title: str, split: Split) -> str:
"""Scrape a web page and add it to the knowledge base"""
page = retrieve_webpage(url)
docs = [Document(page_content=page, metadata={"source": url, "title": title})]
if split.active:
docs = splitter.split_docs(docs)
# Add chunk index to metadata for each document
for i, doc in enumerate(docs):
doc.metadata["chunk"] = i
await vector_store.aadd_docs(docs)
return f"Scraped {url} resulting in {len(docs)} documents"
async def index_github_repository(
owner: str,
repo: str,
extensions: List[str],
path: str = "",
ref: Optional[str] = None,
chunk_size: int = 0,
chunk_overlap: int = 0
) -> str:
"""
Index a GitHub repository's contents into the knowledge base.

Args:
owner: GitHub repository owner/organization
repo: Repository name
extensions: List of file extensions to index (e.g. [".py", ".md"])
path: Optional path within repository to start indexing from
ref: Optional branch, tag, or commit SHA
chunk_size: Size of text chunks for splitting documents
chunk_overlap: Overlap between chunks

Returns:
str: Summary of indexing operation
"""
ctx = mcp.get_context()
headers = get_headers(ctx.request_context)
authorization = headers.get("authorization") or headers.get("x-github-pat")
if not authorization:
raise ValueError("Authorization header is required")

# Initialize GitHub client
github_client = GitHubAPI(pat_token=authorization)

try:
# Get and index repository contents
documents = await github_client.get_repository_contents_to_vectorstore(
owner=owner,
repo=repo,
extensions=extensions,
path=path,
ref=ref,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap
)

return (
f"Successfully indexed {len(documents)} documents from {owner}/{repo}\n"
f"Extensions processed: {', '.join(extensions)}\n"
f"Path: {path or 'root'}\n"
f"Reference: {ref or 'default branch'}"
)

except Exception as e:
return f"Error indexing repository: {str(e)}"

#####################################################################################
# RAG
#####################################################################################
@mcp.tool()
async def retrieve_documents(query: str, search_type: str = "mmr", search_kwargs: dict = {'k': 10}) -> list[dict]:
"""Rewrite the query to be more specific and retrieve documents from the knowledge base"""
Expand Down
12 changes: 11 additions & 1 deletion src/middleware/api_key.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
from starlette.exceptions import HTTPException
from src.config import Config


def get_headers(req_ctx):
scope = req_ctx.scope
# Convert bytes headers to string dictionary
headers = {
k.decode('utf-8').lower(): v.decode('utf-8')
for k, v in scope.get("headers", [])
} if scope is not None else {}
return headers

# Middleware function to check API key authentication
def middleware(req_ctx):
scope = req_ctx.scope
Expand All @@ -10,7 +20,7 @@ def middleware(req_ctx):
for k, v in scope.get("headers", [])
} if scope is not None else {}

if headers.get("x-api-key") != Config.MCP_API_KEY.value:
if headers.get("x-mcp-key") != Config.MCP_API_KEY.value:
raise HTTPException(status_code=401, detail="Unauthorized")

return True
Loading