diff --git a/data/projects.json b/data/projects.json index f33cbe0..30365ce 100644 --- a/data/projects.json +++ b/data/projects.json @@ -416,7 +416,7 @@ "starter_code": "starter_code/survey_form/index.html" }, { - "id": 10, + "id": 12, "title": "API ETL Pipeline", "skills": ["Python", "pandas", "requests"], "level": "Intermediate", @@ -503,7 +503,7 @@ "starter_code": "starter_code/ai_resume_analyzer.py" }, { - "id": 11, + "id": 14, "title": "Number Guessing Game", "skills": [ "Python" @@ -539,7 +539,7 @@ "starter_code": "starter_code/number_guessing.py" }, { - "id": 12, + "id": 15, "title": "Simple Email Automation", "skills": [ "Python" @@ -576,7 +576,7 @@ "starter_code": "starter_code/email_automation.py" }, { - "id": 13, + "id": 16, "title": "Quiz App", "skills": [ "HTML", @@ -616,7 +616,7 @@ "starter_code": "starter_code/quiz_app.html" }, { - "id": 14, + "id": 17, "title": "File Organiser Script", "skills": ["Python"], "level": "Beginner", @@ -647,7 +647,7 @@ "starter_code": "starter_code/file_organiser.py" }, { - "id": 15, + "id": 18, "title": "Flashcard Study App", "skills": ["HTML", "CSS", "JavaScript"], "level": "Beginner", @@ -678,7 +678,7 @@ "starter_code": "starter_code/flashcard_app.html" }, { - "id": 16, + "id": 19, "title": "Budget Tracker Web App", "skills": ["HTML", "CSS", "JavaScript"], "level": "Intermediate", @@ -710,7 +710,7 @@ "starter_code": "starter_code/budget_tracker.html" }, { - "id": 17, + "id": 20, "title": "Network Port Scanner", "skills": ["Python"], "level": "Intermediate", @@ -741,7 +741,7 @@ "starter_code": "starter_code/port_scanner.py" }, { - "id": 18, + "id": 21, "title": "Typing Speed Test", "skills": ["HTML", "CSS", "JavaScript"], "level": "Beginner", @@ -773,7 +773,7 @@ "starter_code": "starter_code/typing_test.html" }, { - "id": 19, + "id": 22, "title": "Course Progress Tracker", "skills": ["Python"], "level": "Intermediate", diff --git a/tests/test_recommender_validation.py b/tests/test_recommender_validation.py new file mode 100644 index 0000000..31c0d45 --- /dev/null +++ b/tests/test_recommender_validation.py @@ -0,0 +1,25 @@ +import os +import sys + + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from utils.recommender import validate_recommendation_inputs + + +def test_validate_recommendation_inputs_accepts_supported_interests(): + errors = validate_recommendation_inputs("Python", "Beginner", "Web", "Low") + + assert errors == [] + + +def test_validate_recommendation_inputs_accepts_interest_case_insensitively(): + errors = validate_recommendation_inputs("Python", "Beginner", "machine learning/ai", "Low") + + assert errors == [] + + +def test_validate_recommendation_inputs_rejects_unknown_interest(): + errors = validate_recommendation_inputs("Python", "Beginner", "Unknown", "Low") + + assert "Please select a valid area of interest." in errors diff --git a/utils/recommender.py b/utils/recommender.py index 111dddd..ae15004 100644 --- a/utils/recommender.py +++ b/utils/recommender.py @@ -23,6 +23,12 @@ "time": 1, } +# Backwards-compatible aliases used by older tests and contributor examples. +WEIGHT_SKILL = SCORING_WEIGHTS["skill"] +WEIGHT_LEVEL = SCORING_WEIGHTS["level"] +WEIGHT_INTEREST = SCORING_WEIGHTS["interest"] +WEIGHT_TIME = SCORING_WEIGHTS["time"] + # Common aliases and abbreviations for skills. # This improves recommendation accuracy by normalizing user input. SKILL_ALIASES = { @@ -55,6 +61,22 @@ def parse_skills(skills_string): Example: "JS, HTML5, CSS3" -> ["javascript", "html", "css"] """ + if not isinstance(skills_string, str): + return [] + + stripped = skills_string.strip() + if stripped.startswith("["): + try: + parsed = json.loads(stripped) + if isinstance(parsed, list): + return [ + SKILL_ALIASES.get(str(skill).strip().lower(), str(skill).strip().lower()) + for skill in parsed + if str(skill).strip() + ] + except json.JSONDecodeError: + pass + raw_skills = [ s.strip().lower() for s in skills_string.split(",") @@ -100,7 +122,9 @@ def score_single_project(project, user_skills, level, interest, time_availabilit # Count how many user skills overlap with the # skills required by the current project. matched_skills = sum(1 for skill in user_skills if skill in project_skills) - score += matched_skills * SCORING_WEIGHTS["skill"] + if project_skills and matched_skills: + coverage = matched_skills / len(project_skills) + score += matched_skills * SCORING_WEIGHTS["skill"] * coverage if project.get("level", "").lower() == level.lower(): score += SCORING_WEIGHTS["level"] @@ -173,17 +197,7 @@ def _get_related(recommended_ids, all_projects, cluster_data): def get_recommendations(skills_string, level, interest, time_availability): """ - Return the top N recommended projects for the given user inputs, - along with related projects from the same cluster. - - Return shape: - { - "recommendations": [ , ... ], # up to MAX_RESULTS - "related": [ , ... ], # up to MAX_RELATED - } - - The "related" list is empty when clusters.json does not exist yet. - Run scripts/cluster_projects.py to generate it. + Return the top N recommended projects for the given user inputs. """ user_skills = parse_skills(skills_string) all_projects = load_all_projects() @@ -198,19 +212,27 @@ def get_recommendations(skills_string, level, interest, time_availability): scored.sort(key=lambda item: item["score"], reverse=True) top_projects = [item["project"] for item in scored[:MAX_RESULTS]] - top_ids = [p["id"] for p in top_projects] - - cluster_data = _load_clusters() - related = _get_related(top_ids, all_projects, cluster_data) if cluster_data else [] - - return { - "recommendations": top_projects, - "related": related, - } + return top_projects VALID_LEVELS = ["beginner", "intermediate", "advanced"] VALID_TIME_AVAILABILITY = ["low", "medium", "high"] +VALID_INTERESTS = [ + "automation", + "backend", + "business logic", + "cloud computing", + "cybersecurity", + "data", + "devops", + "education", + "games", + "machine learning/ai", + "mobile", + "productivity", + "tools", + "web", +] def validate_recommendation_inputs(skills, level, interest, time_availability): @@ -234,4 +256,4 @@ def validate_recommendation_inputs(skills, level, interest, time_availability): elif time_availability.strip().lower() not in VALID_TIME_AVAILABILITY: errors.append("Invalid time availability. Choose Low, Medium, or High.") - return errors \ No newline at end of file + return errors