diff --git a/.gitignore b/.gitignore index 564b8d3..a70d9e5 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ logs/ # Ignore Testing Coverage Results tests/coverage/.coverage -env/ \ No newline at end of file +env/ +.venv/ \ No newline at end of file diff --git a/README.md b/README.md index 04b2b28..01cacf6 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ - Built Using - Folder Structure - API Documentation +- Standardized Output Schema (Prototype) - Get Started - Native - Docker @@ -99,6 +100,17 @@ This structure helps separate the application logic, configuration files, test f ~$ git clone https://github.com/ruxailab/sentiment-analysis-api.git ~$ cd sentiment-analysis-api ``` + +## Standardized Output Schema (Prototype) + +This repository now includes an experimental normalization layer +that converts raw sentiment/emotion outputs into a standardized +JSON schema. The goal is to make results consistent across +modalities such as text, audio transcripts, and facial emotion +analysis. + +See `docs/emotion_output_schema_v1.md` for details. + 2. **Create the Virtual Environment** - **Linux** ```bash diff --git a/docs/emotion_output_schema_v1.md b/docs/emotion_output_schema_v1.md new file mode 100644 index 0000000..a151c70 --- /dev/null +++ b/docs/emotion_output_schema_v1.md @@ -0,0 +1,204 @@ +# Standardized Emotion and Sentiment Output Schema (v1) + +## Problem + +The RUXAILAB platform currently consumes sentiment and emotion analysis results from multiple services, each returning data in a different format. The text-based sentiment API returns results with labels like `POS`, `NEG`, `NEU` alongside a confidence score, while the facial emotion API returns a flat object with emotion names mapped to percentage values (`Angry`, `Happy`, `Neutral`, etc.). There is no shared structure between these outputs. + +This makes it harder to build generalized components on the frontend, to store results consistently in Firestore, and to compare or combine results across modalities. Any new analysis service (e.g. speech prosody, physiological signals) would add yet another format to handle. + +## Motivation + +Standardizing the output format across all sentiment and emotion analysis services would: + +- Allow the frontend to render results from any modality using a shared set of components +- Simplify data storage by using a consistent document structure in Firestore +- Make it easier to aggregate or compare sentiment across modalities for the same usability test session +- Reduce integration effort when adding new analysis backends + +This schema is intentionally minimal. It captures the fields that are common to all current analysis types while leaving room for future extension. + +## Proposed Schema + +```json +{ + "schema_version": "1.0", + "analysis_type": "sentiment | emotion", + "modality": "text | facial | audio", + "source_model": "string", + "timestamp": "ISO 8601 datetime string", + "task_id": "string | null", + "input_summary": "string", + "results": [ + { + "label": "string (lowercase)", + "score": "float (0.0 to 1.0)", + "intensity": "string | null", + "segment": { + "start": "float (seconds)", + "end": "float (seconds)", + "text": "string | null" + } + } + ] +} +``` + +## Field Descriptions + +### Top-level fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `schema_version` | string | yes | Version of this schema format. Currently `"1.0"`. | +| `analysis_type` | string | yes | Either `"sentiment"` or `"emotion"`. Sentiment covers polarity (positive/negative/neutral). Emotion covers categorical states (happy, sad, angry, etc.). | +| `modality` | string | yes | The input modality that was analyzed. One of `"text"`, `"facial"`, or `"audio"`. | +| `source_model` | string | yes | Identifier for the model or service that produced the result. For example `"bertweet-base-sentiment-analysis"` or `"emotiondetector-model1"`. | +| `timestamp` | string | yes | ISO 8601 formatted datetime of when the analysis was performed. | +| `task_id` | string or null | no | Optional identifier linking this result to a specific usability test task or session. | +| `input_summary` | string | yes | A short description of what was analyzed. For text input this would be the text itself (possibly truncated). For audio or video it could be the filename or a description. | +| `results` | array | yes | List of individual result entries. For simple text sentiment this will have one entry. For timestamped audio or multi-emotion facial analysis this will have multiple entries. | + +### Result entry fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `label` | string | yes | The predicted label in lowercase. For sentiment: `"positive"`, `"negative"`, `"neutral"`. For emotion: `"happy"`, `"sad"`, `"angry"`, `"disgusted"`, `"fearful"`, `"surprised"`, `"neutral"`. | +| `score` | float | yes | Confidence or proportion score between 0.0 and 1.0. | +| `intensity` | string or null | no | Optional intensity qualifier. Could be `"low"`, `"medium"`, `"high"` or null if not applicable. Reserved for future use. | +| `segment` | object or null | no | Temporal or textual segment information. Null for results that apply to the entire input. | + +### Segment fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `start` | float | yes | Start time in seconds from the beginning of the input. | +| `end` | float | yes | End time in seconds. | +| `text` | string or null | no | Transcript text for this segment, if available. | + +## Label Normalization + +A key part of standardization is normalizing labels from different models to a consistent vocabulary. The current mappings are: + +**Sentiment labels (BERTweet)** +- `POS` -> `positive` +- `NEG` -> `negative` +- `NEU` -> `neutral` + +**Emotion labels (facial analysis)** +- `Angry` -> `angry` +- `Disgusted` -> `disgusted` +- `Fearful` -> `fearful` +- `Happy` -> `happy` +- `Neutral` -> `neutral` +- `Sad` -> `sad` +- `Surprised` -> `surprised` + +New models that use different label sets should add their own mapping in the normalizer module. + +## Example Outputs + +### Text sentiment analysis + +```json +{ + "schema_version": "1.0", + "analysis_type": "sentiment", + "modality": "text", + "source_model": "bertweet-base-sentiment-analysis", + "timestamp": "2026-03-09T14:22:00Z", + "task_id": null, + "input_summary": "I love this product!", + "results": [ + { + "label": "positive", + "score": 0.95, + "intensity": null, + "segment": null + } + ] +} +``` + +### Audio transcript sentiment (pipeline output) + +```json +{ + "schema_version": "1.0", + "analysis_type": "sentiment", + "modality": "audio", + "source_model": "bertweet-base-sentiment-analysis", + "timestamp": "2026-03-09T14:25:00Z", + "task_id": "usability-session-42", + "input_summary": "sample_audio.mp3 (0ms to 10000ms)", + "results": [ + { + "label": "positive", + "score": 0.92, + "intensity": null, + "segment": { + "start": 0.0, + "end": 3.5, + "text": "I really liked the navigation" + } + }, + { + "label": "negative", + "score": 0.78, + "intensity": null, + "segment": { + "start": 3.5, + "end": 7.2, + "text": "but the search was confusing" + } + } + ] +} +``` + +### Facial emotion analysis + +```json +{ + "schema_version": "1.0", + "analysis_type": "emotion", + "modality": "facial", + "source_model": "emotiondetector-model1", + "timestamp": "2026-03-09T14:30:00Z", + "task_id": "usability-session-42", + "input_summary": "recording_task3.mp4", + "results": [ + {"label": "happy", "score": 0.45, "intensity": null, "segment": null}, + {"label": "neutral", "score": 0.30, "intensity": null, "segment": null}, + {"label": "surprised", "score": 0.12, "intensity": null, "segment": null}, + {"label": "sad", "score": 0.06, "intensity": null, "segment": null}, + {"label": "angry", "score": 0.04, "intensity": null, "segment": null}, + {"label": "fearful", "score": 0.02, "intensity": null, "segment": null}, + {"label": "disgusted", "score": 0.01, "intensity": null, "segment": null} + ] +} +``` + +## Compatibility Considerations + +### Existing API responses + +This schema is designed to coexist with the current API response format. The existing `/sentiment/analyze` and `/audio-transcript-sentiment/process` endpoints will continue to return their current format. The standardized format can be introduced as an optional wrapper or as an additional field in future versions of the API response. + +### RUXAILAB frontend + +The frontend currently expects sentiment results with `label` (POS/NEG/NEU) and `confidence` fields for audio sentiment, and emotion percentage objects for facial analysis. Adopting this schema would require updating the `AudioSentimentController` and `FacialSentimentPanel` components to read from the new structure. This can be done incrementally since the new format is a superset of the information in the current format. + +### Firestore storage + +The standardized schema maps directly to a Firestore document structure. The `results` array is compatible with Firestore's array type. The `task_id` field enables querying results by task or session. + +### Facial sentiment API + +The facial-sentiment-analysis-api currently returns emotion percentages as a flat dictionary. The normalizer module includes a converter that transforms this format into the standardized schema. No changes to the facial API itself are required. + +## Future Extensions + +- Additional modalities (e.g. `physiological`, `speech_prosody`) +- Per-segment emotion-sentiment fusion results +- Severity or intensity classification once models support it +- Batch analysis results with multiple inputs diff --git a/examples/audio_sentiment_standardized_example.json b/examples/audio_sentiment_standardized_example.json new file mode 100644 index 0000000..8a01d3f --- /dev/null +++ b/examples/audio_sentiment_standardized_example.json @@ -0,0 +1,41 @@ +{ + "schema_version": "1.0", + "analysis_type": "sentiment", + "modality": "audio", + "source_model": "bertweet-base-sentiment-analysis", + "timestamp": "2026-03-09T14:25:00Z", + "task_id": "usability-session-42", + "input_summary": "sample_audio.mp3 (0ms to 10000ms)", + "results": [ + { + "label": "positive", + "score": 0.9231, + "intensity": null, + "segment": { + "start": 0.0, + "end": 3.52, + "text": "I really liked the navigation on this page" + } + }, + { + "label": "negative", + "score": 0.7814, + "intensity": null, + "segment": { + "start": 3.52, + "end": 7.18, + "text": "but the search function was pretty confusing" + } + }, + { + "label": "neutral", + "score": 0.6102, + "intensity": null, + "segment": { + "start": 7.18, + "end": 10.0, + "text": "I guess the layout is okay" + } + } + ] +} diff --git a/normalization/__init__.py b/normalization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/normalization/normalizer.py b/normalization/normalizer.py new file mode 100644 index 0000000..400fd9c --- /dev/null +++ b/normalization/normalizer.py @@ -0,0 +1,210 @@ +""" +Normalization utilities for converting raw model outputs into the +standardized sentiment/emotion schema. + +Each normalize_* function takes the native output format of a specific +model or pipeline and returns a StandardizedOutput instance. +""" + +from datetime import datetime, timezone +from typing import Optional + +from normalization.schema import ( + StandardizedOutput, + ResultEntry, + Segment, +) + +# --------------------------------------------------------------------------- # +# Label mappings +# --------------------------------------------------------------------------- # + +# BERTweet sentiment labels -> standardized lowercase labels +BERTWEET_LABEL_MAP = { + "POS": "positive", + "NEG": "negative", + "NEU": "neutral", +} + +# Facial emotion labels (as returned by the facial-sentiment-analysis-api) +FACIAL_EMOTION_LABEL_MAP = { + "Angry": "angry", + "Disgusted": "disgusted", + "Fearful": "fearful", + "Happy": "happy", + "Neutral": "neutral", + "Sad": "sad", + "Surprised": "surprised", +} + + +def _now_iso() -> str: + """Return the current UTC time as an ISO 8601 string.""" + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +# --------------------------------------------------------------------------- # +# Text sentiment (single prediction) +# --------------------------------------------------------------------------- # + +def normalize_text_sentiment( + label: str, + confidence: float, + text: str, + source_model: str = "bertweet-base-sentiment-analysis", + task_id: Optional[str] = None, + timestamp: Optional[str] = None, +) -> StandardizedOutput: + """Convert the output of the /sentiment/analyze endpoint. + + Parameters + ---------- + label : str + Raw label from the model (e.g. "POS", "NEG", "NEU"). + confidence : float + Confidence score returned by the model. + text : str + The input text that was analyzed. + source_model : str + Identifier for the model that produced the result. + task_id : str or None + Optional task or session identifier. + timestamp : str or None + ISO 8601 timestamp. If not provided the current UTC time is used. + """ + normalized_label = BERTWEET_LABEL_MAP.get(label, label.lower()) + + return StandardizedOutput( + analysis_type="sentiment", + modality="text", + source_model=source_model, + timestamp=timestamp or _now_iso(), + task_id=task_id, + input_summary=text if len(text) <= 200 else text[:197] + "...", + results=[ + ResultEntry( + label=normalized_label, + score=round(confidence, 4), + ) + ], + ) + + +# --------------------------------------------------------------------------- # +# Audio transcript + sentiment pipeline +# --------------------------------------------------------------------------- # + +def normalize_pipeline_sentiment( + utterances: list, + audio_path: str, + start_time_ms: int, + end_time_ms: int, + source_model: str = "bertweet-base-sentiment-analysis", + task_id: Optional[str] = None, + timestamp: Optional[str] = None, +) -> StandardizedOutput: + """Convert the output of the /audio-transcript-sentiment/process endpoint. + + Parameters + ---------- + utterances : list[dict] + List of utterance dicts, each with keys: timestamp (list of two + floats), text (str), label (str), confidence (float). Utterances + that have an "error" key instead of label/confidence are skipped. + audio_path : str + Path to the audio file that was analyzed. + start_time_ms : int + Start time of the extracted segment in milliseconds. + end_time_ms : int + End time of the extracted segment in milliseconds. + source_model : str + Identifier for the model that produced the result. + task_id : str or None + Optional task or session identifier. + timestamp : str or None + ISO 8601 timestamp. + """ + results = [] + for u in utterances: + if "error" in u: + continue + raw_label = u.get("label", "") + normalized_label = BERTWEET_LABEL_MAP.get(raw_label, raw_label.lower()) + ts = u.get("timestamp", [0.0, 0.0]) + results.append( + ResultEntry( + label=normalized_label, + score=round(u.get("confidence", 0.0), 4), + segment=Segment( + start=float(ts[0]), + end=float(ts[1]), + text=u.get("text"), + ), + ) + ) + + summary = f"{audio_path} ({start_time_ms}ms to {end_time_ms}ms)" + + return StandardizedOutput( + analysis_type="sentiment", + modality="audio", + source_model=source_model, + timestamp=timestamp or _now_iso(), + task_id=task_id, + input_summary=summary, + results=results, + ) + + +# --------------------------------------------------------------------------- # +# Facial emotion percentages +# --------------------------------------------------------------------------- # + +def normalize_facial_emotions( + emotion_percentages: dict, + video_path: str, + source_model: str = "emotiondetector-model1", + task_id: Optional[str] = None, + timestamp: Optional[str] = None, +) -> StandardizedOutput: + """Convert the output of the facial-sentiment-analysis-api. + + Parameters + ---------- + emotion_percentages : dict + Dictionary mapping emotion names to float percentages (0-100 or 0-1). + Keys should be title-case names like "Happy", "Angry", etc. + video_path : str + Path or identifier of the video that was analyzed. + source_model : str + Identifier for the model that produced the result. + task_id : str or None + Optional task or session identifier. + timestamp : str or None + ISO 8601 timestamp. + """ + results = [] + for raw_label, score in emotion_percentages.items(): + normalized_label = FACIAL_EMOTION_LABEL_MAP.get(raw_label, raw_label.lower()) + # If scores are in 0-100 range, convert to 0-1 + if score > 1.0: + score = score / 100.0 + results.append( + ResultEntry( + label=normalized_label, + score=round(score, 4), + ) + ) + + # Sort by score descending so the dominant emotion comes first + results.sort(key=lambda r: r.score, reverse=True) + + return StandardizedOutput( + analysis_type="emotion", + modality="facial", + source_model=source_model, + timestamp=timestamp or _now_iso(), + task_id=task_id, + input_summary=video_path, + results=results, + ) diff --git a/normalization/schema.py b/normalization/schema.py new file mode 100644 index 0000000..79e1774 --- /dev/null +++ b/normalization/schema.py @@ -0,0 +1,56 @@ +""" +Standardized output schema for sentiment and emotion analysis results. + +Defines Pydantic models that represent the v1 schema documented in +docs/emotion_output_schema_v1.md. These models can be used to validate, +serialize, and deserialize analysis results across different modalities +(text, audio, facial) and analysis types (sentiment, emotion). +""" + +from typing import List, Optional, Literal +from pydantic import BaseModel, Field + + +class Segment(BaseModel): + """Temporal and textual segment associated with a result entry.""" + start: float = Field(..., description="Start time in seconds") + end: float = Field(..., description="End time in seconds") + text: Optional[str] = Field(None, description="Transcript text for this segment") + + +class ResultEntry(BaseModel): + """A single analysis result (one label with its score).""" + label: str = Field(..., description="Predicted label in lowercase") + score: float = Field(..., ge=0.0, le=1.0, description="Confidence or proportion score") + intensity: Optional[Literal["low", "medium", "high"]] = Field( + None, description="Optional intensity qualifier" + ) + segment: Optional[Segment] = Field( + None, description="Temporal/textual segment info, null if result covers the full input" + ) + + +class StandardizedOutput(BaseModel): + """Top-level standardized output for any sentiment or emotion analysis.""" + schema_version: str = Field("1.0", description="Schema format version") + analysis_type: Literal["sentiment", "emotion"] = Field( + ..., description="Type of analysis performed" + ) + modality: Literal["text", "facial", "audio"] = Field( + ..., description="Input modality that was analyzed" + ) + source_model: str = Field( + ..., description="Identifier of the model or service that produced the result" + ) + timestamp: str = Field( + ..., description="ISO 8601 datetime of when the analysis was performed" + ) + task_id: Optional[str] = Field( + None, description="Optional identifier for a usability test task or session" + ) + input_summary: str = Field( + ..., description="Short description of the analyzed input" + ) + results: List[ResultEntry] = Field( + ..., description="List of individual result entries" + ) diff --git a/tests/unit/test_normalization.py b/tests/unit/test_normalization.py new file mode 100644 index 0000000..a4826eb --- /dev/null +++ b/tests/unit/test_normalization.py @@ -0,0 +1,302 @@ +""" +Unit tests for the sentiment normalization module. +""" + +import json +import pytest + +from normalization.schema import StandardizedOutput, ResultEntry, Segment +from normalization.normalizer import ( + normalize_text_sentiment, + normalize_pipeline_sentiment, + normalize_facial_emotions, + BERTWEET_LABEL_MAP, + FACIAL_EMOTION_LABEL_MAP, +) + + +class TestSchema: + """Tests for the Pydantic schema models.""" + + def test_result_entry_minimal(self): + entry = ResultEntry(label="positive", score=0.95) + assert entry.label == "positive" + assert entry.score == 0.95 + assert entry.intensity is None + assert entry.segment is None + + def test_result_entry_with_segment(self): + seg = Segment(start=0.0, end=3.5, text="hello") + entry = ResultEntry(label="negative", score=0.8, segment=seg) + assert entry.segment.start == 0.0 + assert entry.segment.text == "hello" + + def test_result_entry_with_intensity(self): + entry = ResultEntry(label="angry", score=0.6, intensity="high") + assert entry.intensity == "high" + + def test_score_bounds(self): + with pytest.raises(Exception): + ResultEntry(label="positive", score=1.5) + with pytest.raises(Exception): + ResultEntry(label="positive", score=-0.1) + + def test_standardized_output_serialization(self): + output = StandardizedOutput( + analysis_type="sentiment", + modality="text", + source_model="test-model", + timestamp="2026-03-09T12:00:00Z", + input_summary="test input", + results=[ResultEntry(label="positive", score=0.9)], + ) + data = output.model_dump() + assert data["schema_version"] == "1.0" + assert data["analysis_type"] == "sentiment" + assert len(data["results"]) == 1 + + def test_standardized_output_to_json(self): + output = StandardizedOutput( + analysis_type="emotion", + modality="facial", + source_model="test-model", + timestamp="2026-03-09T12:00:00Z", + input_summary="video.mp4", + results=[ + ResultEntry(label="happy", score=0.5), + ResultEntry(label="neutral", score=0.3), + ], + ) + json_str = output.model_dump_json() + parsed = json.loads(json_str) + assert parsed["modality"] == "facial" + assert len(parsed["results"]) == 2 + + +class TestNormalizeTextSentiment: + """Tests for normalize_text_sentiment.""" + + def test_positive_label(self): + result = normalize_text_sentiment( + label="POS", + confidence=0.95, + text="I love this product!", + timestamp="2026-03-09T12:00:00Z", + ) + assert result.analysis_type == "sentiment" + assert result.modality == "text" + assert result.results[0].label == "positive" + assert result.results[0].score == 0.95 + + def test_negative_label(self): + result = normalize_text_sentiment( + label="NEG", + confidence=0.87, + text="This is terrible.", + timestamp="2026-03-09T12:00:00Z", + ) + assert result.results[0].label == "negative" + + def test_neutral_label(self): + result = normalize_text_sentiment( + label="NEU", + confidence=0.62, + text="It is okay.", + timestamp="2026-03-09T12:00:00Z", + ) + assert result.results[0].label == "neutral" + + def test_unknown_label_lowercased(self): + result = normalize_text_sentiment( + label="MIXED", + confidence=0.5, + text="Not sure about this.", + timestamp="2026-03-09T12:00:00Z", + ) + assert result.results[0].label == "mixed" + + def test_long_text_truncated(self): + long_text = "a" * 300 + result = normalize_text_sentiment( + label="POS", + confidence=0.8, + text=long_text, + timestamp="2026-03-09T12:00:00Z", + ) + assert len(result.input_summary) == 200 + + def test_task_id_included(self): + result = normalize_text_sentiment( + label="POS", + confidence=0.9, + text="Great!", + task_id="task-7", + timestamp="2026-03-09T12:00:00Z", + ) + assert result.task_id == "task-7" + + def test_auto_timestamp(self): + result = normalize_text_sentiment( + label="POS", confidence=0.9, text="Nice" + ) + assert result.timestamp is not None + assert "T" in result.timestamp + + +class TestNormalizePipelineSentiment: + """Tests for normalize_pipeline_sentiment.""" + + def setup_method(self): + self.utterances = [ + { + "timestamp": [0.0, 3.5], + "text": "I liked the interface", + "label": "POS", + "confidence": 0.92, + }, + { + "timestamp": [3.5, 7.0], + "text": "but search was confusing", + "label": "NEG", + "confidence": 0.78, + }, + ] + + def test_basic_conversion(self): + result = normalize_pipeline_sentiment( + utterances=self.utterances, + audio_path="audio.mp3", + start_time_ms=0, + end_time_ms=10000, + timestamp="2026-03-09T12:00:00Z", + ) + assert result.modality == "audio" + assert len(result.results) == 2 + assert result.results[0].label == "positive" + assert result.results[0].segment.start == 0.0 + assert result.results[0].segment.text == "I liked the interface" + assert result.results[1].label == "negative" + + def test_skips_error_utterances(self): + utterances = self.utterances + [{"error": "something failed"}] + result = normalize_pipeline_sentiment( + utterances=utterances, + audio_path="audio.mp3", + start_time_ms=0, + end_time_ms=10000, + timestamp="2026-03-09T12:00:00Z", + ) + assert len(result.results) == 2 + + def test_input_summary_format(self): + result = normalize_pipeline_sentiment( + utterances=self.utterances, + audio_path="segment.mp3", + start_time_ms=5000, + end_time_ms=15000, + timestamp="2026-03-09T12:00:00Z", + ) + assert result.input_summary == "segment.mp3 (5000ms to 15000ms)" + + def test_empty_utterances(self): + result = normalize_pipeline_sentiment( + utterances=[], + audio_path="empty.mp3", + start_time_ms=0, + end_time_ms=0, + timestamp="2026-03-09T12:00:00Z", + ) + assert len(result.results) == 0 + + +class TestNormalizeFacialEmotions: + """Tests for normalize_facial_emotions.""" + + def setup_method(self): + self.emotions = { + "Angry": 4.0, + "Disgusted": 1.0, + "Fearful": 2.0, + "Happy": 45.0, + "Neutral": 30.0, + "Sad": 6.0, + "Surprised": 12.0, + } + + def test_basic_conversion(self): + result = normalize_facial_emotions( + emotion_percentages=self.emotions, + video_path="recording.mp4", + timestamp="2026-03-09T12:00:00Z", + ) + assert result.analysis_type == "emotion" + assert result.modality == "facial" + assert len(result.results) == 7 + + def test_labels_lowercased(self): + result = normalize_facial_emotions( + emotion_percentages=self.emotions, + video_path="recording.mp4", + timestamp="2026-03-09T12:00:00Z", + ) + labels = [r.label for r in result.results] + for label in labels: + assert label == label.lower() + + def test_scores_normalized_to_0_1(self): + result = normalize_facial_emotions( + emotion_percentages=self.emotions, + video_path="recording.mp4", + timestamp="2026-03-09T12:00:00Z", + ) + for r in result.results: + assert 0.0 <= r.score <= 1.0 + + def test_sorted_by_score_descending(self): + result = normalize_facial_emotions( + emotion_percentages=self.emotions, + video_path="recording.mp4", + timestamp="2026-03-09T12:00:00Z", + ) + scores = [r.score for r in result.results] + assert scores == sorted(scores, reverse=True) + + def test_scores_already_in_0_1_range(self): + emotions_01 = { + "Happy": 0.45, + "Neutral": 0.30, + "Surprised": 0.12, + "Sad": 0.06, + "Angry": 0.04, + "Fearful": 0.02, + "Disgusted": 0.01, + } + result = normalize_facial_emotions( + emotion_percentages=emotions_01, + video_path="video.mp4", + timestamp="2026-03-09T12:00:00Z", + ) + labels_scores = {r.label: r.score for r in result.results} + assert labels_scores["happy"] == 0.45 + assert labels_scores["disgusted"] == 0.01 + + def test_task_id_passed(self): + result = normalize_facial_emotions( + emotion_percentages=self.emotions, + video_path="recording.mp4", + task_id="session-99", + timestamp="2026-03-09T12:00:00Z", + ) + assert result.task_id == "session-99" + + +class TestLabelMaps: + """Verify the label mapping dictionaries are complete.""" + + def test_bertweet_map_has_all_labels(self): + expected = {"POS", "NEG", "NEU"} + assert set(BERTWEET_LABEL_MAP.keys()) == expected + + def test_facial_map_has_all_labels(self): + expected = {"Angry", "Disgusted", "Fearful", "Happy", "Neutral", "Sad", "Surprised"} + assert set(FACIAL_EMOTION_LABEL_MAP.keys()) == expected