project-knowledge-base/test_parallel_performance.py at main · BenjaminFont/project-knowledge-base · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""
Test script to measure performance improvement from parallel LLM calls.
"""
from dotenv import load_dotenv
load_dotenv()

import time
from src.neo4j_service import get_neo4j_service
from src.conversational_service import get_conversational_service

# Initialize services
neo4j = get_neo4j_service()
chat = get_conversational_service(neo4j)

# Test query
question = "Who are the best Python developers for a machine learning project?"

print("=" * 60)
print("TESTING PARALLEL LLM PERFORMANCE")
print("=" * 60)
print(f"\nQuestion: {question}\n")

# Run test
start_time = time.time()

result = chat.answer_question(
    question=question,
    max_context_items=5
)

total_time = time.time() - start_time

# Display results
print("\n" + "=" * 60)
print("RESULTS")
print("=" * 60)
print(f"\nTotal Processing Time: {total_time:.2f} seconds")
print(f"\nContext Retrieved:")
print(f"  - Projects: {len(result['context_projects'])}")
print(f"  - People: {len(result['context_people'])}")

print("\n" + "-" * 60)
print("Answer:")
print("-" * 60)
print(result['answer'][:500] + "..." if len(result['answer']) > 500 else result['answer'])

print("\n" + "=" * 60)
print("PERFORMANCE NOTES")
print("=" * 60)
print("""
With Parallel Implementation:
- Projects and People searches run simultaneously in parallel threads
- Expected time: ~5-8 seconds (2 LLM calls in parallel + 1 sequential)

Without Parallel (previous implementation):
- Projects search, then People search (sequential)
- Expected time: ~10-15 seconds (3 sequential LLM calls)

Time Saved: ~40-50% faster
""")