Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions data/projects.json
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@
"starter_code": "starter_code/survey_form/index.html"
},
{
"id": 10,
"id": 12,
"title": "API ETL Pipeline",
"skills": ["Python", "pandas", "requests"],
"level": "Intermediate",
Expand Down Expand Up @@ -503,7 +503,7 @@
"starter_code": "starter_code/ai_resume_analyzer.py"
},
{
"id": 11,
"id": 14,
"title": "Number Guessing Game",
"skills": [
"Python"
Expand Down Expand Up @@ -539,7 +539,7 @@
"starter_code": "starter_code/number_guessing.py"
},
{
"id": 12,
"id": 15,
"title": "Simple Email Automation",
"skills": [
"Python"
Expand Down Expand Up @@ -576,7 +576,7 @@
"starter_code": "starter_code/email_automation.py"
},
{
"id": 13,
"id": 16,
"title": "Quiz App",
"skills": [
"HTML",
Expand Down Expand Up @@ -616,7 +616,7 @@
"starter_code": "starter_code/quiz_app.html"
},
{
"id": 14,
"id": 17,
"title": "File Organiser Script",
"skills": ["Python"],
"level": "Beginner",
Expand Down Expand Up @@ -647,7 +647,7 @@
"starter_code": "starter_code/file_organiser.py"
},
{
"id": 15,
"id": 18,
"title": "Flashcard Study App",
"skills": ["HTML", "CSS", "JavaScript"],
"level": "Beginner",
Expand Down Expand Up @@ -678,7 +678,7 @@
"starter_code": "starter_code/flashcard_app.html"
},
{
"id": 16,
"id": 19,
"title": "Budget Tracker Web App",
"skills": ["HTML", "CSS", "JavaScript"],
"level": "Intermediate",
Expand Down Expand Up @@ -710,7 +710,7 @@
"starter_code": "starter_code/budget_tracker.html"
},
{
"id": 17,
"id": 20,
"title": "Network Port Scanner",
"skills": ["Python"],
"level": "Intermediate",
Expand Down Expand Up @@ -741,7 +741,7 @@
"starter_code": "starter_code/port_scanner.py"
},
{
"id": 18,
"id": 21,
"title": "Typing Speed Test",
"skills": ["HTML", "CSS", "JavaScript"],
"level": "Beginner",
Expand Down Expand Up @@ -773,7 +773,7 @@
"starter_code": "starter_code/typing_test.html"
},
{
"id": 19,
"id": 22,
"title": "Course Progress Tracker",
"skills": ["Python"],
"level": "Intermediate",
Expand Down
25 changes: 25 additions & 0 deletions tests/test_recommender_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
import sys


sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from utils.recommender import validate_recommendation_inputs


def test_validate_recommendation_inputs_accepts_supported_interests():
errors = validate_recommendation_inputs("Python", "Beginner", "Web", "Low")

assert errors == []


def test_validate_recommendation_inputs_accepts_interest_case_insensitively():
errors = validate_recommendation_inputs("Python", "Beginner", "machine learning/ai", "Low")

assert errors == []


def test_validate_recommendation_inputs_rejects_unknown_interest():
errors = validate_recommendation_inputs("Python", "Beginner", "Unknown", "Low")

assert "Please select a valid area of interest." in errors
66 changes: 44 additions & 22 deletions utils/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
"time": 1,
}

# Backwards-compatible aliases used by older tests and contributor examples.
WEIGHT_SKILL = SCORING_WEIGHTS["skill"]
WEIGHT_LEVEL = SCORING_WEIGHTS["level"]
WEIGHT_INTEREST = SCORING_WEIGHTS["interest"]
WEIGHT_TIME = SCORING_WEIGHTS["time"]

# Common aliases and abbreviations for skills.
# This improves recommendation accuracy by normalizing user input.
SKILL_ALIASES = {
Expand Down Expand Up @@ -55,6 +61,22 @@ def parse_skills(skills_string):
Example:
"JS, HTML5, CSS3" -> ["javascript", "html", "css"]
"""
if not isinstance(skills_string, str):
return []

stripped = skills_string.strip()
if stripped.startswith("["):
try:
parsed = json.loads(stripped)
if isinstance(parsed, list):
return [
SKILL_ALIASES.get(str(skill).strip().lower(), str(skill).strip().lower())
for skill in parsed
if str(skill).strip()
]
except json.JSONDecodeError:
pass

raw_skills = [
s.strip().lower()
for s in skills_string.split(",")
Expand Down Expand Up @@ -100,7 +122,9 @@ def score_single_project(project, user_skills, level, interest, time_availabilit
# Count how many user skills overlap with the
# skills required by the current project.
matched_skills = sum(1 for skill in user_skills if skill in project_skills)
score += matched_skills * SCORING_WEIGHTS["skill"]
if project_skills and matched_skills:
coverage = matched_skills / len(project_skills)
score += matched_skills * SCORING_WEIGHTS["skill"] * coverage

if project.get("level", "").lower() == level.lower():
score += SCORING_WEIGHTS["level"]
Expand Down Expand Up @@ -173,17 +197,7 @@ def _get_related(recommended_ids, all_projects, cluster_data):

def get_recommendations(skills_string, level, interest, time_availability):
"""
Return the top N recommended projects for the given user inputs,
along with related projects from the same cluster.

Return shape:
{
"recommendations": [ <project>, ... ], # up to MAX_RESULTS
"related": [ <project>, ... ], # up to MAX_RELATED
}

The "related" list is empty when clusters.json does not exist yet.
Run scripts/cluster_projects.py to generate it.
Return the top N recommended projects for the given user inputs.
"""
user_skills = parse_skills(skills_string)
all_projects = load_all_projects()
Expand All @@ -198,19 +212,27 @@ def get_recommendations(skills_string, level, interest, time_availability):

scored.sort(key=lambda item: item["score"], reverse=True)
top_projects = [item["project"] for item in scored[:MAX_RESULTS]]
top_ids = [p["id"] for p in top_projects]

cluster_data = _load_clusters()
related = _get_related(top_ids, all_projects, cluster_data) if cluster_data else []

return {
"recommendations": top_projects,
"related": related,
}
return top_projects


VALID_LEVELS = ["beginner", "intermediate", "advanced"]
VALID_TIME_AVAILABILITY = ["low", "medium", "high"]
VALID_INTERESTS = [
"automation",
"backend",
"business logic",
"cloud computing",
"cybersecurity",
"data",
"devops",
"education",
"games",
"machine learning/ai",
"mobile",
"productivity",
"tools",
"web",
]


def validate_recommendation_inputs(skills, level, interest, time_availability):
Expand All @@ -234,4 +256,4 @@ def validate_recommendation_inputs(skills, level, interest, time_availability):
elif time_availability.strip().lower() not in VALID_TIME_AVAILABILITY:
errors.append("Invalid time availability. Choose Low, Medium, or High.")

return errors
return errors
Loading