From 81e5f3254f31a66a13e67ed731b590ec5d4b49c8 Mon Sep 17 00:00:00 2001 From: Aryan Bansal Date: Wed, 10 Jun 2026 11:39:04 +0530 Subject: [PATCH 1/2] fix: resolve pre-existing test failures and improve coverage (#833) - Add JSON array support to parse_skills() - Add coverage ratio weighting to score_single_project() - Fix VALID_INTERESTS undefined NameError in validate_recommendation_inputs() - Fix get_recommendations() to return plain list - Add VALID_LEVELS and VALID_TIME_AVAILABILITY to test imports - Fix duplicate project IDs (10, 11, 13) in projects.json --- data/projects.json | 6 +++--- tests/test_basic.py | 10 +++++++--- utils/recommender.py | 34 ++++++++++++++++++++++------------ 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/data/projects.json b/data/projects.json index f33cbe0..e1a0112 100644 --- a/data/projects.json +++ b/data/projects.json @@ -416,7 +416,7 @@ "starter_code": "starter_code/survey_form/index.html" }, { - "id": 10, + "id": 20, "title": "API ETL Pipeline", "skills": ["Python", "pandas", "requests"], "level": "Intermediate", @@ -503,7 +503,7 @@ "starter_code": "starter_code/ai_resume_analyzer.py" }, { - "id": 11, + "id": 21, "title": "Number Guessing Game", "skills": [ "Python" @@ -576,7 +576,7 @@ "starter_code": "starter_code/email_automation.py" }, { - "id": 13, + "id": 22, "title": "Quiz App", "skills": [ "HTML", diff --git a/tests/test_basic.py b/tests/test_basic.py index c52f4c1..51d6ecd 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -29,10 +29,14 @@ validate_recommendation_inputs, parse_skills, score_single_project, - WEIGHT_LEVEL, - WEIGHT_INTEREST, - WEIGHT_TIME, + SCORING_WEIGHTS, + VALID_LEVELS, + VALID_TIME_AVAILABILITY, ) + +WEIGHT_LEVEL = SCORING_WEIGHTS["level"] +WEIGHT_INTEREST = SCORING_WEIGHTS["interest"] +WEIGHT_TIME = SCORING_WEIGHTS["time"] from app import app, internal_server_error diff --git a/utils/recommender.py b/utils/recommender.py index 111dddd..8fde5f9 100644 --- a/utils/recommender.py +++ b/utils/recommender.py @@ -49,12 +49,23 @@ def parse_skills(skills_string): """ - Convert a raw comma-separated skills string into - a normalized lowercase list. + Convert a raw skills string into a normalized lowercase list. + Accepts either a JSON array (e.g. '["Python","React"]') or a + comma-separated string (e.g. "JS, HTML5, CSS3"). Example: - "JS, HTML5, CSS3" -> ["javascript", "html", "css"] + '["Python","React"]' -> ["python", "react"] + "JS, HTML5, CSS3" -> ["javascript", "html", "css"] """ + stripped = skills_string.strip() + if stripped.startswith("["): + try: + parsed = json.loads(stripped) + if isinstance(parsed, list): + raw_skills = [str(s).strip().lower() for s in parsed if str(s).strip()] + return [SKILL_ALIASES.get(skill, skill) for skill in raw_skills] + except (json.JSONDecodeError, ValueError): + pass raw_skills = [ s.strip().lower() for s in skills_string.split(",") @@ -62,7 +73,6 @@ def parse_skills(skills_string): ] return [SKILL_ALIASES.get(skill, skill) for skill in raw_skills] - # --------------------------------------------------------------------------- # Scoring # --------------------------------------------------------------------------- @@ -100,7 +110,11 @@ def score_single_project(project, user_skills, level, interest, time_availabilit # Count how many user skills overlap with the # skills required by the current project. matched_skills = sum(1 for skill in user_skills if skill in project_skills) - score += matched_skills * SCORING_WEIGHTS["skill"] + if project_skills: + coverage = matched_skills / len(project_skills) + score += matched_skills * SCORING_WEIGHTS["skill"] * coverage + else: + score += matched_skills * SCORING_WEIGHTS["skill"] if project.get("level", "").lower() == level.lower(): score += SCORING_WEIGHTS["level"] @@ -203,16 +217,12 @@ def get_recommendations(skills_string, level, interest, time_availability): cluster_data = _load_clusters() related = _get_related(top_ids, all_projects, cluster_data) if cluster_data else [] - return { - "recommendations": top_projects, - "related": related, - } + return top_projects VALID_LEVELS = ["beginner", "intermediate", "advanced"] VALID_TIME_AVAILABILITY = ["low", "medium", "high"] - def validate_recommendation_inputs(skills, level, interest, time_availability): errors = [] @@ -226,8 +236,8 @@ def validate_recommendation_inputs(skills, level, interest, time_availability): elif level.strip().lower() not in VALID_LEVELS: errors.append("Invalid experience level. Choose Beginner, Intermediate, or Advanced.") - if not interest or not isinstance(interest, str) or interest.strip().lower() not in VALID_INTERESTS: - errors.append("Please select a valid area of interest.") + if not interest or not isinstance(interest, str) or not interest.strip(): + errors.append("Please select an area of interest.") if not time_availability or not time_availability.strip(): errors.append("Please select your time availability.") From ad8bf16fa4ba1e1d2c59c90dd359fc1b35549c6b Mon Sep 17 00:00:00 2001 From: Aryan Bansal Date: Sat, 13 Jun 2026 12:28:46 +0530 Subject: [PATCH 2/2] Fix merge conflicts in recommender.py --- utils/recommender.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/utils/recommender.py b/utils/recommender.py index 510186e..cdc992f 100644 --- a/utils/recommender.py +++ b/utils/recommender.py @@ -121,12 +121,21 @@ def ml_similarity_score(project, user_skills, level, interest, time_availability return _cosine_similarity(user_vector, project_vector) def score_single_project(project, user_skills, level, interest, time_availability): + TIME_RANKS = ["low", "medium", "high"] + + user_time = time_availability.strip().lower() + project_time = project.get("time", "").strip().lower() + + # If the project needs more time than the user has, exclude it. + if project_time not in TIME_RANKS or user_time not in TIME_RANKS: + return 0 + if TIME_RANKS.index(project_time) > TIME_RANKS.index(user_time): + return 0 + score = 0 # Compare user's skills against the project's required skills project_skills = [SKILL_ALIASES.get(s.lower(), s.lower()) for s in project.get("skills", [])] - # Count how many user skills overlap with the - # skills required by the current project. matched_skills = sum(1 for skill in user_skills if skill in project_skills) if project_skills: coverage = matched_skills / len(project_skills) @@ -152,8 +161,7 @@ def score_single_project(project, user_skills, level, interest, time_availabilit def get_recommendations(skills_string, level, interest, time_availability): user_skills = parse_skills(skills_string) all_projects = load_all_projects() - - scored = [] + scored_projects = [] for project in all_projects: rule_score = score_single_project( project, @@ -162,7 +170,6 @@ def get_recommendations(skills_string, level, interest, time_availability): interest, time_availability, ) - similarity_score = ml_similarity_score( project, user_skills, @@ -171,21 +178,20 @@ def get_recommendations(skills_string, level, interest, time_availability): time_availability, all_projects, ) - - # Sort projects in descending order so the - # most relevant recommendations appear first. - scored_projects.sort(key=lambda item: (item["score"], item["project"].get("id", 0)), reverse=True) final_score = rule_score + similarity_score - if final_score > 0: scored_projects.append({ "project": project, "score": final_score, }) + # Sort projects in descending order so the + # most relevant recommendations appear first. + scored_projects.sort(key=lambda item: (item["score"], item["project"].get("id", 0)), reverse=True) + return [item["project"] for item in scored_projects[:MAX_RESULTS]] - scored_projects.sort(key=lambda item: item["score"], reverse=True) +VALID_LEVELS = ["beginner", "intermediate", "advanced"] +VALID_TIME_AVAILABILITY = ["low", "medium", "high"] - return [item["project"] for item in scored_projects[:MAX_RESULTS]] def validate_recommendation_inputs(skills, level, interest, time_availability): errors = []