gabrielmouallem · gabrielmouallem · Jul 31, 2024
diff --git a/app/api/v1/search/controller.py b/app/api/v1/search/controller.py
@@ -36,8 +36,71 @@ def search_transcriptions_by_video(self, search: SearchDTO):
                     }
                 },
             ]
+
+            # Aggregation for expanded transcription within each transcription
+            expanded_transcription_stage = {
+                "$lookup": {
+                    "from": "videoTranscriptions",  # Assuming collection name is videoTranscriptions
+                    "let": {"videoId": "$video.videoId", "startTime": "$start"},
+                    "pipeline": [
+                        {
+                            "$match": {
+                                "$expr": {
+                                    "$and": [
+                                        {"$eq": ["$video.videoId", "$$videoId"]},
+                                        {
+                                            "$gt": [
+                                                "$start",
+                                                {"$subtract": ["$$startTime", 10]},
+                                            ]
+                                        },
+                                        {
+                                            "$lt": [
+                                                "$start",
+                                                {"$add": ["$$startTime", 10]},
+                                            ]
+                                        },
+                                    ]
+                                }
+                            }
+                        },
+                        {"$sort": {"start": 1}},
+                        {
+                            "$group": {
+                                "_id": "$video.videoId",
+                                "merged_start": {"$first": "$start"},
+                                "merged_end": {
+                                    "$last": {"$add": ["$start", "$duration"]}
+                                },
+                                "merged_text": {"$push": "$text"},
+                                "merged_duration": {"$sum": "$duration"},
+                            }
+                        },
+                        {
+                            "$project": {
+                                "_id": 0,
+                                "start": "$merged_start",
+                                "end": "$merged_end",
+                                "text": {
+                                    "$reduce": {
+                                        "input": "$merged_text",
+                                        "initialValue": "",
+                                        "in": {"$concat": ["$$value", " ", "$$this"]},
+                                    }
+                                },
+                                "duration": "$merged_duration",
+                            }
+                        },
+                    ],
+                    "as": "expanded_transcription",
+                }
+            }
+
             # Create the text search query
             results_pipeline = common_pipeline + [
+                {"$unwind": "$transcriptions"},
+                {"$addFields": {"start": "$transcriptions.start"}},
+                expanded_transcription_stage,
                 {"$sort": search.order_by},
                 {"$skip": skip_count},
                 {"$limit": search.per_page},
@@ -79,48 +142,3 @@ def search_transcriptions_by_video(self, search: SearchDTO):
                 status=500,
                 mimetype="application/json",
             )
-
-    def expanded_transcription(self, video_id, start_time):
-
-        # Aggregation pipeline to find 2 before and 2 after the target transcription
-        pipeline = [
-            {
-                "$match": {
-                    "video._id": video_id,
-                    "start": {"$gt": start_time - 10, "$lt": start_time + 10},
-                }
-            },
-            {"$sort": {"start": 1}},  # Ensure documents are sorted by start time
-            {
-                "$group": {
-                    "_id": "$video._id",  # Grouping by videoId, adjust if you need different granularity
-                    "merged_start": {"$first": "$start"},
-                    "merged_end": {"$last": {"$add": ["$start", "$duration"]}},
-                    "merged_text": {"$push": "$text"},
-                    "merged_duration": {"$sum": "$duration"},
-                }
-            },
-            {
-                "$project": {
-                    "_id": 0,
-                    "start": "$merged_start",
-                    "end": "$merged_end",
-                    "text": {
-                        "$reduce": {
-                            "input": "$merged_text",
-                            "initialValue": "",
-                            "in": {"$concat": ["$$value", " ", "$$this"]},
-                        }
-                    },
-                    "duration": "$merged_duration",
-                }
-            },
-        ]
-
-        transcriptions = self.search_service.aggregate_transcriptions(pipeline=pipeline)
-        if len(transcriptions) > 0:
-            return jsonify(transcriptions)
-        return (
-            jsonify({"error": "No transcriptions found or error in aggregation"}),
-            404,
-        )