Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@
validate_recommendation_inputs,
parse_skills,
score_single_project,
WEIGHT_LEVEL,
WEIGHT_INTEREST,
WEIGHT_TIME,
SCORING_WEIGHTS,
VALID_LEVELS,
VALID_TIME_AVAILABILITY,
)

WEIGHT_LEVEL = SCORING_WEIGHTS["level"]
WEIGHT_INTEREST = SCORING_WEIGHTS["interest"]
WEIGHT_TIME = SCORING_WEIGHTS["time"]
from app import app, internal_server_error


Expand Down
59 changes: 43 additions & 16 deletions utils/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,24 @@
}

def parse_skills(skills_string):
"""
Convert a raw skills string into a normalized lowercase list.
Accepts either a JSON array (e.g. '["Python","React"]') or a
comma-separated string (e.g. "JS, HTML5, CSS3").

Example:
'["Python","React"]' -> ["python", "react"]
"JS, HTML5, CSS3" -> ["javascript", "html", "css"]
"""
stripped = skills_string.strip()
if stripped.startswith("["):
try:
parsed = json.loads(stripped)
if isinstance(parsed, list):
raw_skills = [str(s).strip().lower() for s in parsed if str(s).strip()]
return [SKILL_ALIASES.get(skill, skill) for skill in raw_skills]
except (json.JSONDecodeError, ValueError):
pass
raw_skills = [
s.strip().lower()
for s in skills_string.split(",")
Expand Down Expand Up @@ -103,15 +121,27 @@ def ml_similarity_score(project, user_skills, level, interest, time_availability
return _cosine_similarity(user_vector, project_vector)

def score_single_project(project, user_skills, level, interest, time_availability):
TIME_RANKS = ["low", "medium", "high"]

user_time = time_availability.strip().lower()
project_time = project.get("time", "").strip().lower()

# If the project needs more time than the user has, exclude it.
if project_time not in TIME_RANKS or user_time not in TIME_RANKS:
return 0
if TIME_RANKS.index(project_time) > TIME_RANKS.index(user_time):
return 0

score = 0

# Compare user's skills against the project's required skills
project_skills = [SKILL_ALIASES.get(s.lower(), s.lower()) for s in project.get("skills", [])]
# Count how many user skills overlap with the
# skills required by the current project.
matched_skills = sum(1 for skill in user_skills if skill in project_skills)

score += matched_skills * SCORING_WEIGHTS["skill"]
if project_skills:
coverage = matched_skills / len(project_skills)
score += matched_skills * SCORING_WEIGHTS["skill"] * coverage
else:
score += matched_skills * SCORING_WEIGHTS["skill"]

if project.get("level", "").lower() == level.lower():
score += SCORING_WEIGHTS["level"]
Expand All @@ -131,8 +161,7 @@ def score_single_project(project, user_skills, level, interest, time_availabilit
def get_recommendations(skills_string, level, interest, time_availability):
user_skills = parse_skills(skills_string)
all_projects = load_all_projects()

scored = []
scored_projects = []
for project in all_projects:
rule_score = score_single_project(
project,
Expand All @@ -141,7 +170,6 @@ def get_recommendations(skills_string, level, interest, time_availability):
interest,
time_availability,
)

similarity_score = ml_similarity_score(
project,
user_skills,
Expand All @@ -150,21 +178,20 @@ def get_recommendations(skills_string, level, interest, time_availability):
time_availability,
all_projects,
)

# Sort projects in descending order so the
# most relevant recommendations appear first.
scored_projects.sort(key=lambda item: (item["score"], item["project"].get("id", 0)), reverse=True)
final_score = rule_score + similarity_score

if final_score > 0:
scored_projects.append({
"project": project,
"score": final_score,
})
# Sort projects in descending order so the
# most relevant recommendations appear first.
scored_projects.sort(key=lambda item: (item["score"], item["project"].get("id", 0)), reverse=True)
return [item["project"] for item in scored_projects[:MAX_RESULTS]]

scored_projects.sort(key=lambda item: item["score"], reverse=True)
VALID_LEVELS = ["beginner", "intermediate", "advanced"]
VALID_TIME_AVAILABILITY = ["low", "medium", "high"]

return [item["project"] for item in scored_projects[:MAX_RESULTS]]

def validate_recommendation_inputs(skills, level, interest, time_availability):
errors = []
Expand All @@ -179,8 +206,8 @@ def validate_recommendation_inputs(skills, level, interest, time_availability):
elif level.strip().lower() not in VALID_LEVELS:
errors.append("Invalid experience level. Choose Beginner, Intermediate, or Advanced.")

if not interest or not isinstance(interest, str) or interest.strip().lower() not in VALID_INTERESTS:
errors.append("Please select a valid area of interest.")
if not interest or not isinstance(interest, str) or not interest.strip():
errors.append("Please select an area of interest.")

if not time_availability or not time_availability.strip():
errors.append("Please select your time availability.")
Expand Down
Loading