-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
93 lines (75 loc) · 3.08 KB
/
app.py
File metadata and controls
93 lines (75 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from flask import Flask, render_template, request, jsonify ,send_file
import os
import json
from vector_db import VectorDatabase
from doc_handle import handle_document
from langchain_core.documents import Document
from llm_response import get_response
app = Flask(__name__)
# Constants
FOLDER_PATH = 'test'
PROCESSED_FILES_PATH = 'processed_files.json'
app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(), FOLDER_PATH)
db = VectorDatabase()
# Load processed files
def load_processed_files():
if os.path.exists(PROCESSED_FILES_PATH):
with open(PROCESSED_FILES_PATH, 'r') as f:
return set(json.load(f))
else:
return set()
# Save processed files
def save_processed_files(processed_files):
with open(PROCESSED_FILES_PATH, 'w') as f:
json.dump(list(processed_files), f)
# Process document
def process_document(file):
file_path = os.path.join(FOLDER_PATH, file)
# Handle document
chunks = handle_document(file_path)
documents = [Document(page_content=chunk['page_content'], metadata=chunk['metadata']) for chunk in chunks]
# Add documents to vector store
db.add_docs(documents)
# Mark file as processed
processed_files = load_processed_files()
processed_files.add(file)
save_processed_files(processed_files)
return f"{file} processed and added to the vector store."
@app.route('/')
def index():
files = [f for f in os.listdir(FOLDER_PATH) if f.endswith('.pdf')]
processed_files = load_processed_files()
new_files = [f for f in files if f not in processed_files]
return render_template('index.html', files=files, new_files=new_files)
@app.route('/process_document', methods=['POST'])
def process_doc():
file = request.json['file']
result = process_document(file)
return jsonify({'message': result})
@app.route('/query', methods=['POST'])
def query():
query_text = request.json['query']
search_results = db.perform_similarity_search(query_text, k=5) # Perform similarity search to get top chunks
chunks = [{'chunk': res.page_content, 'metadata': res.metadata} for res in search_results]
response_text = get_response(query_text, chunks)
return jsonify({'response': response_text})
@app.route('/get_pdf_content', methods=['POST'])
def get_pdf_content():
file = request.json['file']
# Here you would implement the logic to extract and return the PDF content
# For this example, we'll just return a placeholder
return jsonify({'content': f"Content of {file} would be displayed here."})
@app.route('/get_new_files', methods=['GET'])
def get_new_files():
files = [f for f in os.listdir(FOLDER_PATH) if f.endswith('.pdf')]
processed_files = load_processed_files()
new_files = [f for f in files if f not in processed_files]
return jsonify(new_files)
@app.route('/display_pdf/<filename>')
def display_pdf(filename):
try:
return send_file(os.path.join(app.config['UPLOAD_FOLDER'], filename), mimetype='application/pdf')
except FileNotFoundError:
return "PDF file not found", 404
if __name__ == '__main__':
app.run(debug=True)