Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Bot/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ COPY . .
EXPOSE 8001

# Run FastAPI app
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "4"]
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "1"]

43 changes: 42 additions & 1 deletion Bot/app/api/meetings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import json
from app.core import config
import os
import re
import html

# Now you can access the values like this:
CLIENT_ID = os.getenv("CLIENT_ID")
Expand Down Expand Up @@ -70,8 +72,47 @@ def get_meetings(body: ScheduleMeeting, token: str = Depends(OAUTH2_SCHEME)):
events = events_result.get('items', [])
scheduled_meetings = []
meetings_map = {}
URL_RE = re.compile(r'https?://[^\s<>"\']+', re.IGNORECASE)

def extract_meeting_url_from_text(text: str):
if not text:
return None
# look for normal https links first
m = URL_RE.search(text)
if m:
return m.group(0).rstrip('>')
# fallback: scheme-less hosts like "zoom.us/j/..."
for pat in [r'(?:[\w.-]+\.)?zoom\.us/\S+', r'meet\.google\.com/\S+', r'teams\.microsoft\.com/\S+']:
m2 = re.search(pat, text, flags=re.IGNORECASE)
if m2:
candidate = m2.group(0)
if not candidate.lower().startswith('http'):
candidate = 'https://' + candidate
return candidate
return None

for event in events:
meeting_url = event.get('hangoutLink')
# Prefer structured conferenceData entryPoints (video) when available
meeting_url = None
conf = event.get('conferenceData')
if conf:
entry_points = conf.get('entryPoints', [])
for ep in entry_points:
if ep.get('entryPointType') == 'video' and ep.get('uri'):
meeting_url = ep.get('uri')
break
# fallbacks
if not meeting_url:
meeting_url = event.get('hangoutLink')
if not meeting_url:
# check location, description, summary for embedded links
for field in ('location', 'description', 'summary'):
val = event.get(field)
if val:
val = html.unescape(val)
meeting_url = extract_meeting_url_from_text(val)
if meeting_url:
break

# check is bot request is aleady sent for the meeting
json_str = redis_client.get(BOT_ADDED_IN_MEETING_KEY)
Expand Down
98 changes: 95 additions & 3 deletions attendee/bots/tasks/sync_calendar_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from zoneinfo import ZoneInfo

import dateutil.parser
import html
import requests
from celery import shared_task
from django.db import transaction
Expand All @@ -21,16 +22,34 @@

logger = logging.getLogger(__name__)

URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+")
URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE)


def extract_meeting_url_from_text(text: str) -> Optional[str]:
if not text:
return None
# First pass: look for normal https:// links (case-insensitive)
for m in URL_CANDIDATE.finditer(text):
url = m.group(0).rstrip(").,;]}>")
url = m.group(0).rstrip(").,;]}")
# strip trailing '>' that sometimes remains from markdown/angle-bracket wrapping
url = url.rstrip('>')
if meeting_type_from_url(url):
return url

# Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme
# Try to find common meeting host patterns and prepend https:// when detected
scheme_less_patterns = [
r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
r"meet\.google\.com/[^\s<>\"']+",
r"teams\.microsoft\.com/[^\s<>\"']+",
]
for pat in scheme_less_patterns:
for m in re.finditer(pat, text, flags=re.IGNORECASE):
candidate = m.group(0)
if not candidate.lower().startswith("http"):
candidate = "https://" + candidate
if meeting_type_from_url(candidate):
return candidate
return None


Expand Down Expand Up @@ -457,13 +476,86 @@ def _remote_event_to_calendar_event_data(self, google_event: dict) -> dict:

# Extract meeting URL if present
meeting_url_from_conference_data = None
entry_points = []
if "conferenceData" in google_event:
entry_points = google_event["conferenceData"].get("entryPoints", [])
for entry_point in entry_points:
if entry_point.get("entryPointType") == "video":
meeting_url_from_conference_data = entry_point.get("uri")
break
meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data) or extract_meeting_url_from_text(google_event.get("hangoutLink")) or extract_meeting_url_from_text(google_event.get("location")) or extract_meeting_url_from_text(google_event.get("description")) or extract_meeting_url_from_text(google_event.get("summary"))

# Normalize/unescape free-text fields before extraction
hangout_link = google_event.get("hangoutLink")
location_text = google_event.get("location")
description_text = google_event.get("description")
summary_text = google_event.get("summary")

if description_text:
description_text = html.unescape(description_text)
if location_text:
location_text = html.unescape(location_text)
if summary_text:
summary_text = html.unescape(summary_text)

meeting_url = None
meeting_url_source = None

# Check in order and record the source field for logging
if meeting_url_from_conference_data:
meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data)
if meeting_url:
meeting_url_source = "conferenceData.entryPoints"

if not meeting_url and hangout_link:
meeting_url = extract_meeting_url_from_text(hangout_link)
if meeting_url:
meeting_url_source = "hangoutLink"

if not meeting_url and location_text:
meeting_url = extract_meeting_url_from_text(location_text)
if meeting_url:
meeting_url_source = "location"

if not meeting_url and description_text:
meeting_url = extract_meeting_url_from_text(description_text)
if meeting_url:
meeting_url_source = "description"

if not meeting_url and summary_text:
meeting_url = extract_meeting_url_from_text(summary_text)
if meeting_url:
meeting_url_source = "summary"

# Loose fallback: try to detect scheme-less Zoom/Meet/Teams links inside text
if not meeting_url:
# Try entry points liberally (some providers set non-standard URIs)
if entry_points:
for ep in entry_points:
uri = ep.get("uri")
if uri:
candidate = uri
if not candidate.lower().startswith("http"):
candidate = "https://" + candidate
if meeting_type_from_url(candidate):
meeting_url = extract_meeting_url_from_text(candidate) or candidate
meeting_url_source = "conferenceData.entryPoints.loose"
break

# Try scheme-less patterns in text fields
if not meeting_url:
loose_text = "\n".join(filter(None, [location_text, description_text, summary_text]))
if loose_text:
# common patterns
m = re.search(r"(?:[\w.-]+\.)?zoom\.us/\S+", loose_text, flags=re.IGNORECASE)
if m:
candidate = m.group(0)
if not candidate.lower().startswith("http"):
candidate = "https://" + candidate
if meeting_type_from_url(candidate):
meeting_url = candidate
meeting_url_source = "loose_text_zoom"

logger.debug("Event %s: extracted meeting_url=%s source=%s", google_event.get("id"), meeting_url, meeting_url_source)

# Extract attendees
attendees = []
Expand Down
Loading