Skip to content

Commit 9226ead

Browse files
authored
Add files via upload
1 parent d9af54c commit 9226ead

File tree

1 file changed

+175
-0
lines changed

1 file changed

+175
-0
lines changed

teams_scraper.py

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
from selenium import webdriver
2+
from selenium.webdriver.common.by import By
3+
from selenium.webdriver.edge.service import Service
4+
from selenium.webdriver.edge.options import Options
5+
from selenium.webdriver.support.ui import WebDriverWait
6+
from selenium.webdriver.support import expected_conditions as EC
7+
from bs4 import BeautifulSoup
8+
import time
9+
10+
#######################################################################################
11+
# Leave empty to use msedgedriver.exe in current folder
12+
# Or input the path to your browser driver, like "C:/WebDriver/msedgedriver.exe"
13+
# 留空使用当前文件夹中的 msedgedriver.exe
14+
# 或输入你的浏览器驱动路径,例如 "C:/WebDriver/msedgedriver.exe"
15+
driver_path = ""
16+
#######################################################################################
17+
18+
# Global list to store homework assignments / 全局列表存储作业
19+
homework_list = []
20+
21+
def setup_driver():
22+
# Configure Edge browser options / 配置 Edge 浏览器选项
23+
edge_options = Options()
24+
edge_options.add_argument("--disable-gpu")
25+
edge_options.add_argument("--start-maximized")
26+
edge_options.add_experimental_option('excludeSwitches', ['enable-logging'])
27+
28+
# Set up the Edge driver service / 设置 Edge 驱动服务
29+
service = Service(driver_path if driver_path else 'msedgedriver.exe')
30+
return webdriver.Edge(service=service, options=edge_options)
31+
32+
def parse_assignment(html_content):
33+
# Parse HTML content using BeautifulSoup / 使用 BeautifulSoup 解析 HTML 内容
34+
soup = BeautifulSoup(html_content, 'html.parser')
35+
assignments = []
36+
37+
# Find all assignment cards / 查找所有作业卡片
38+
cards = soup.find_all('div', {'data-test': 'assignment-card'})
39+
40+
for card in cards:
41+
# Get assignment title / 获取作业标题
42+
title_elem = card.find('span', {'data-test': 'assignment-card-title-all-up-view'})
43+
title = title_elem.text if title_elem else "No Title / 无标题"
44+
45+
# Get due date / 获取截止日期
46+
due_date_elem = card.find('div', {'class': 'row3Text__KMVk-'})
47+
due_date = due_date_elem.text if due_date_elem else "No Due Date / 无截止日期"
48+
49+
# Get points (might not exist) / 获取分数(可能不存在)
50+
points_elem = card.find('div', {'data-testid': 'action-contentArea1'})
51+
points = points_elem.text if points_elem else "No Points / 无分数"
52+
53+
# Get course name / 获取课程名称
54+
course_elem = card.find('div', {'data-testid': 'card-classOrModuleName'})
55+
course = course_elem.text if course_elem else "No Course Name / 无课程名称"
56+
57+
# Create assignment dictionary / 创建作业字典
58+
assignment = {
59+
'title': title,
60+
'due_date': due_date,
61+
'points': points,
62+
'course': course
63+
}
64+
assignments.append(assignment)
65+
66+
return assignments
67+
68+
def display_assignments(assignments):
69+
# Display assignments list / 显示作业列表
70+
print("\nAssignments List:")
71+
print("="*30)
72+
for idx, assignment in enumerate(assignments, 1):
73+
print(f"\n{idx}. {assignment['title']}")
74+
print(f" Course: {assignment['course']}")
75+
print(f" Due: {assignment['due_date']}")
76+
print(f" Points: {assignment['points']}")
77+
78+
def get_teams_content():
79+
driver = setup_driver()
80+
81+
try:
82+
print("\n1. Opening Microsoft Teams page")
83+
driver.get("https://teams.microsoft.com/")
84+
time.sleep(3)
85+
86+
print("\n2. Finding and clicking assignments button")
87+
try:
88+
print(" 2.1 Waiting for button to be clickable")
89+
WebDriverWait(driver, 10).until(
90+
EC.element_to_be_clickable((By.CSS_SELECTOR, "button[aria-label='分配']"))
91+
)
92+
93+
print(" 2.2 Getting button element")
94+
assign_button = driver.find_element(By.CSS_SELECTOR, "button[aria-label='分配']")
95+
96+
print(" 2.3 Clicking button")
97+
try:
98+
driver.execute_script("document.querySelector(\"button[aria-label='分配']\").click();")
99+
print(" Button clicked successfully")
100+
101+
time.sleep(5)
102+
print(f" Current URL: {driver.current_url}")
103+
print(f" Current page title: {driver.title}")
104+
105+
except Exception as e:
106+
print(f" Error during button click: {str(e)}")
107+
return None
108+
109+
print("\n3. Loading assignments page")
110+
try:
111+
print(" 3.1 Waiting for iframe")
112+
assignment_iframe = WebDriverWait(driver, 20).until(
113+
EC.presence_of_element_located((By.CSS_SELECTOR, "iframe[id^='cacheable-iframe']"))
114+
)
115+
print(" Found iframe")
116+
print(f" Iframe ID: {assignment_iframe.get_attribute('id')}")
117+
118+
print(" 3.2 Switching to iframe")
119+
driver.switch_to.frame(assignment_iframe)
120+
121+
print(" 3.3 Waiting for content")
122+
time.sleep(8)
123+
124+
print("\n4. Processing assignments")
125+
try:
126+
print(" 4.1 Getting assignment list")
127+
content = driver.find_element(By.XPATH, "//div[@data-test='assignment-list']")
128+
html_content = content.get_attribute('outerHTML')
129+
130+
print(" 4.2 Parsing assignments")
131+
assignments = parse_assignment(html_content)
132+
133+
if assignments:
134+
print(" 4.3 Storing assignments")
135+
global homework_list
136+
homework_list.extend(assignments)
137+
display_assignments(assignments)
138+
139+
return assignments
140+
141+
except Exception as e:
142+
print(" Failed to get assignments content")
143+
return None
144+
145+
except Exception as e:
146+
print(" Error finding iframe")
147+
iframes = driver.find_elements(By.TAG_NAME, "iframe")
148+
print(f" Found {len(iframes)} iframes:")
149+
for idx, iframe in enumerate(iframes):
150+
print(f" Iframe {idx}:")
151+
print(f" ID: {iframe.get_attribute('id')}")
152+
print(f" Name: {iframe.get_attribute('name')}")
153+
print(f" Src: {iframe.get_attribute('src')}")
154+
return None
155+
156+
except Exception as e:
157+
print(f"Error in step 2: {str(e)}")
158+
return None
159+
160+
except Exception as e:
161+
print(f"Error in step 1: {str(e)}")
162+
return None
163+
164+
finally:
165+
print("\n5. Closing browser")
166+
driver.quit()
167+
168+
if __name__ == "__main__":
169+
# Main execution block / 主执行块
170+
assignments = get_teams_content()
171+
if not assignments:
172+
print("Failed to get assignments")
173+
else:
174+
print("\nHomework list content:")
175+
print(homework_list)

0 commit comments

Comments
 (0)