ruxailab · Sahil-aka · Mar 31, 2026
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -1,31 +1,12 @@
-# This is a basic workflow to help you get started with Actions
-
 name: CI
 
-# Controls when the action will run.
 on:
-  # Triggers the workflow on push or pull request events but only for the main branch
-  push:
-    branches: [main]
   pull_request:
     branches: [main]
-
-  # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
 
-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  # This workflow contains a single job called "build"
   build:
-    # The type of runner that the job will run on
     runs-on: ubuntu-latest
-
-    # Steps represent a sequence of tasks that will be executed as part of the job
     steps:
-      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
       - uses: actions/checkout@v2
-      - uses: akhileshns/heroku-deploy@v3.12.12 # This is the action
-        with:
-          heroku_api_key: ${{secrets.HEROKU_API_KEY}} # Located in GitHub secrets
-          heroku_app_name: "web-eye-tracker-1204" # Must be unique in Heroku
-          heroku_email: "karine.pistili@gmail.com"
diff --git a/app/main.py b/app/main.py
@@ -1,5 +1,7 @@
 from flask import Flask, request, Response, jsonify
 from flask_cors import CORS
+import pandas as pd
+from app.services.metrics import EyeTrackingBenchmark
 
 # Local imports from app
 from app.routes import session as session_route
@@ -77,3 +79,63 @@ def batch_predict():
     if request.method == 'POST':
         return session_route.batch_predict()
     return Response('Invalid request method for route', status=405, mimetype='application/json')
+
+"""
+POST /api/session/benchmark
+
+Runs eye-tracking benchmark evaluation.
+
+Expected JSON:
+{
+    "screen_width_px": int,
+    "screen_width_cm": float,
+    "viewing_distance_cm": float,
+    "samples": [
+        {
+            "True X": float,
+            "True Y": float,
+            "Predicted X": float,
+            "Predicted Y": float
+        }
+    ]
+}
+"""
+"""
+POST /api/session/benchmark
+
+Evaluates eye-tracking accuracy and precision.
+"""
+@app.route('/api/session/benchmark', methods=['POST'])
+def run_benchmark():
+    try:
+        data = request.get_json()
+
+        required_keys = {
+            "samples",
+            "screen_width_px",
+            "screen_width_cm",
+            "viewing_distance_cm"
+        }
+
+        if not required_keys.issubset(data.keys()):
+            missing = required_keys - set(data.keys())
+            return jsonify({"error": f"Missing fields: {missing}"}), 400
+
+        df = pd.DataFrame(data["samples"])
+
+        benchmark = EyeTrackingBenchmark(
+            df=df,
+            screen_width_px=data["screen_width_px"],
+            screen_width_cm=data["screen_width_cm"],
+            viewing_distance_cm=data["viewing_distance_cm"]
+        )
+
+        results = {
+            "overall": benchmark.evaluate(),
+            "per_target": benchmark.evaluate_per_target()
+        }
+
+        return jsonify(results), 200
+
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
diff --git a/app/requirements.txt b/app/requirements.txt
@@ -17,4 +17,5 @@ threadpoolctl==3.6.0
 tzdata==2025.2
 Werkzeug==3.1.3
 gunicorn==23.0.0
-requests==2.31.0
+requests==2.31.0
+reportlab
diff --git a/app/routes/session.py b/app/routes/session.py
@@ -22,6 +22,9 @@
 # from app.services import database as db
 from app.services import gaze_tracker
 
+# Import utility for generating benchmark PDF reports
+from app.services.benchmark.report_generator import generate_benchmark_report
+
 
 # Constants
 ALLOWED_EXTENSIONS = {"txt", "webm"}
@@ -56,7 +59,11 @@ def convert_nan_to_none(obj):
         return float(obj) if isinstance(obj, np.floating) else int(obj)
     return obj
 
-
+# ------------------------------------------------------------------
+# Calibration endpoint
+# Generates training CSV files from calibration points and runs the
+# gaze prediction model to compute calibration results.
+# ------------------------------------------------------------------
 
 
 def calib_results():
@@ -147,6 +154,12 @@ def calib_results():
     data = convert_nan_to_none(data)
     return Response(json.dumps(data), status=200, mimetype='application/json')
 
+
+# ------------------------------------------------------------------
+# Batch prediction endpoint
+# Uses stored calibration data to predict gaze positions for new
+# iris tracking samples sent from the client.
+# ------------------------------------------------------------------
 def batch_predict():
     try:
         data = request.get_json()
@@ -193,4 +206,135 @@ def batch_predict():
     except Exception as e:
         print("Erro batch_predict:", e)
         traceback.print_exc()
-        return Response("Erro interno", status=500)
+        return Response("Erro interno", status=500)
+
+# ------------------------------------------------------------------
+# Benchmark evaluation endpoint
+#
+# Endpoint:
+#   POST /api/session/<session_id>/benchmark
+#
+# Computes benchmark metrics for eye-tracking predictions including:
+# - Accuracy metrics
+# - Precision metrics
+# - Per-target analysis
+#
+# Also warns if the number of samples is small (<30) since metrics
+# like p95 error may be statistically unreliable.
+# ------------------------------------------------------------------
+@app.route('/api/session/<session_id>/benchmark', methods=['POST'])
+def run_benchmark(session_id):
+    try:
+        data = request.get_json()
+
+        samples = data.get("samples")
+
+        if not samples:
+            return jsonify({"error": "Missing samples"}), 400
+
+        # Convert to DataFrame
+        df = pd.DataFrame(samples)
+
+        # Minimum sample validation
+        if len(df) < 30:
+            warning = "Sample size is small; metrics may be statistically unreliable"
+        else:
+            warning = None
+
+        # Get session metadata
+        session = Session.get(session_id)
+
+        if not session:
+            return jsonify({"error": "Session not found"}), 404
+
+        screen_width_px = session.get("screen_width_px")
+        screen_width_cm = session.get("screen_width_cm")
+        viewing_distance_cm = session.get("viewing_distance_cm")
+
+        # Import benchmark module
+        from app.services.calib_validation.metrics import EyeTrackingBenchmark
+
+        benchmark = EyeTrackingBenchmark(
+            df=df,
+            screen_width_px=screen_width_px,
+            screen_width_cm=screen_width_cm,
+            viewing_distance_cm=viewing_distance_cm
+        )
+
+        results = {
+            "overall": benchmark.evaluate(),
+            "per_target": benchmark.evaluate_per_target()
+        }
+
+        if warning:
+            results["warning"] = warning
+
+        return jsonify(convert_nan_to_none(results)), 200
+
+    except Exception as e:
+        traceback.print_exc()
+        return jsonify({"error": str(e)}), 500
+
+# ------------------------------------------------------------------
+# Benchmark report generation endpoint
+#
+# Endpoint:
+#   POST /api/session/<session_id>/benchmark/report
+#
+# Generates a downloadable PDF report summarizing benchmark results
+# and visualizing true vs predicted gaze points.
+# ------------------------------------------------------------------
+@app.route('/api/session/<session_id>/benchmark/report', methods=['POST'])
+def benchmark_report(session_id):
+    try:
+        data = request.get_json()
+        samples = data.get("samples")
+
+        if not samples:
+            return jsonify({"error": "Missing samples"}), 400
+
+        # Convert samples to DataFrame
+        df = pd.DataFrame(samples)
+
+        # Retrieve session metadata (screen + device parameters)
+        session = Session.get(session_id)
+
+        if not session:
+            return jsonify({"error": "Session not found"}), 404
+
+        screen_width_px = session.get("screen_width_px")
+        screen_width_cm = session.get("screen_width_cm")
+        viewing_distance_cm = session.get("viewing_distance_cm")
+
+        # Import benchmark evaluator
+        from app.services.calib_validation.metrics import EyeTrackingBenchmark
+
+        # Run benchmark evaluation
+        benchmark = EyeTrackingBenchmark(
+            df=df,
+            screen_width_px=screen_width_px,
+            screen_width_cm=screen_width_cm,
+            viewing_distance_cm=viewing_distance_cm
+        )
+
+        results = {
+            "overall": benchmark.evaluate(),
+            "per_target": benchmark.evaluate_per_target()
+        }
+
+        # Path where the report will be saved
+        report_path = f"reports/{session_id}_benchmark_report.pdf"
+
+        # Generate the PDF report
+        generate_benchmark_report(
+            samples,
+            results["overall"]["accuracy"],
+            report_path
+        )
+
+        # Return generated file
+        return send_file(report_path, as_attachment=True)
+
+    except Exception as e:
+        traceback.print_exc()
+        return jsonify({"error": str(e)}), 500
diff --git a/app/services/benchmark/report_generator.py b/app/services/benchmark/report_generator.py
@@ -0,0 +1,42 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
+from reportlab.lib.styles import getSampleStyleSheet
+
+
+def generate_benchmark_report(samples, metrics, output_path):
+
+    df = pd.DataFrame(samples)
+
+    # Create visualization
+    plt.figure()
+    plt.scatter(df["True X"], df["True Y"], label="True")
+    plt.scatter(df["Predicted X"], df["Predicted Y"], label="Predicted")
+
+    plt.legend()
+    plt.title("Gaze Prediction Accuracy")
+
+    plot_path = "benchmark_plot.png"
+    plt.savefig(plot_path)
+
+    styles = getSampleStyleSheet()
+
+    doc = SimpleDocTemplate(output_path)
+    elements = []
+
+    elements.append(Paragraph("Eye Tracking Benchmark Report", styles["Title"]))
+    elements.append(Spacer(1, 20))
+
+    elements.append(
+        Paragraph(
+            f"Mean Accuracy Error (px): {metrics['mean_accuracy_error_px']}",
+            styles["BodyText"],
+        )
+    )
+
+    elements.append(Spacer(1, 20))
+    elements.append(Image(plot_path))
+
+    doc.build(elements)
+
+    return output_path