-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_parallel_performance.py
More file actions
60 lines (48 loc) · 1.53 KB
/
test_parallel_performance.py
File metadata and controls
60 lines (48 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""
Test script to measure performance improvement from parallel LLM calls.
"""
from dotenv import load_dotenv
load_dotenv()
import time
from src.neo4j_service import get_neo4j_service
from src.conversational_service import get_conversational_service
# Initialize services
neo4j = get_neo4j_service()
chat = get_conversational_service(neo4j)
# Test query
question = "Who are the best Python developers for a machine learning project?"
print("=" * 60)
print("TESTING PARALLEL LLM PERFORMANCE")
print("=" * 60)
print(f"\nQuestion: {question}\n")
# Run test
start_time = time.time()
result = chat.answer_question(
question=question,
max_context_items=5
)
total_time = time.time() - start_time
# Display results
print("\n" + "=" * 60)
print("RESULTS")
print("=" * 60)
print(f"\nTotal Processing Time: {total_time:.2f} seconds")
print(f"\nContext Retrieved:")
print(f" - Projects: {len(result['context_projects'])}")
print(f" - People: {len(result['context_people'])}")
print("\n" + "-" * 60)
print("Answer:")
print("-" * 60)
print(result['answer'][:500] + "..." if len(result['answer']) > 500 else result['answer'])
print("\n" + "=" * 60)
print("PERFORMANCE NOTES")
print("=" * 60)
print("""
With Parallel Implementation:
- Projects and People searches run simultaneously in parallel threads
- Expected time: ~5-8 seconds (2 LLM calls in parallel + 1 sequential)
Without Parallel (previous implementation):
- Projects search, then People search (sequential)
- Expected time: ~10-15 seconds (3 sequential LLM calls)
Time Saved: ~40-50% faster
""")