1
- import requests
2
- from bs4 import BeautifulSoup
3
- import pymysql
4
- from dotenv import load_dotenv
5
- import os
6
- from os .path import join ,dirname
7
-
8
- dotenv_path = join (dirname (__file__ ),'.env' )
9
- load_dotenv (dotenv_path )
10
- MYSQL_HOST = os .environ .get ("MYSQL_HOST" )
11
- MYSQL_USER = os .environ .get ("MYSQL_USER" )
12
- MYSQL_PASSWORD = os .environ .get ("MYSQL_PASSWORD" )
13
- MYSQL_DATABASE = os .environ .get ("MYSQL_DATABASE" )
14
- def make_connection ():
15
- return pymysql .connect (host = MYSQL_HOST ,
16
- user = MYSQL_USER ,
17
- password = MYSQL_PASSWORD ,
18
- db = MYSQL_DATABASE ,
19
- charset = 'utf8mb4' ,
20
- cursorclass = pymysql .cursors .DictCursor )
21
-
22
- base_sp = BeautifulSoup (requests .get ("https://onlinemathcontest.com/contests/all" ).content )
23
- conn = make_connection ()
24
- with conn .cursor () as cursor :
25
- sql = "SELECT url FROM contest_info"
26
- cursor .execute (sql )
27
- contest_urls = [tmp ["url" ] for tmp in cursor ]
28
-
29
- for tmp in base_sp .find_all ("paper-card" ):
30
- if tmp .find ("h2" ) and tmp .find ("h2" ).contents [0 ] == " Past Contests " :
31
- break
32
- li = tmp .find_all ("li" )
33
- schedule = li [0 ].contents [0 ].strip ()
34
- rated = li [1 ].contents [0 ].strip ()
35
- url = tmp .find ("a" )["href" ]
36
- sp = BeautifulSoup (requests .get (url ).content )
37
- title = sp .find ("h1" ).contents [0 ]
38
- tester = []
39
- for tmp in filter (lambda x : x .contents and "tester" in x .contents [0 ], sp .find_all ("p" )):
40
- tester = [a .contents [0 ].strip () for a in tmp .find_all ("a" )]
41
- #元々存在するかどうかで場合分け
42
- if url in contest_urls :
43
- print ("UPDATE" , title )
44
- sql = "UPDATE contest_info SET title = %s, schedule = %s, rated = %s, tester = %s WHERE url = %s"
45
- else :
46
- print ("INSERT" , title )
47
- sql = "INSERT INTO contest_info (title, schedule, rated, tester, url) VALUES (%s, %s, %s, %s, %s)"
48
- cursor .execute (sql , (title , schedule , rated , "," .join (tester ), url ))
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pymysql
4
+ from dotenv import load_dotenv
5
+ import os
6
+ from os .path import join ,dirname
7
+
8
+ dotenv_path = join (dirname (__file__ ),'.env' )
9
+ load_dotenv (dotenv_path )
10
+ MYSQL_HOST = os .environ .get ("MYSQL_HOST" )
11
+ MYSQL_USER = os .environ .get ("MYSQL_USER" )
12
+ MYSQL_PASSWORD = os .environ .get ("MYSQL_PASSWORD" )
13
+ MYSQL_DATABASE = os .environ .get ("MYSQL_DATABASE" )
14
+ def make_connection ():
15
+ return pymysql .connect (host = MYSQL_HOST ,
16
+ user = MYSQL_USER ,
17
+ password = MYSQL_PASSWORD ,
18
+ db = MYSQL_DATABASE ,
19
+ charset = 'utf8mb4' ,
20
+ cursorclass = pymysql .cursors .DictCursor )
21
+
22
+ base_sp = BeautifulSoup (requests .get ("https://onlinemathcontest.com/contests/all" ).content )
23
+ conn = make_connection ()
24
+ with conn .cursor () as cursor :
25
+ sql = "SELECT url FROM contest_info"
26
+ cursor .execute (sql )
27
+ contest_urls = [tmp ["url" ] for tmp in cursor ]
28
+
29
+ for tmp in base_sp .find_all ("paper-card" ):
30
+ if tmp .find ("h2" ) and tmp .find ("h2" ).contents [0 ] == " Past Contests " :
31
+ break
32
+ li = tmp .find_all ("li" )
33
+ schedule = li [0 ].contents [0 ].strip ()
34
+ rated = li [1 ].contents [0 ].strip ()
35
+ url = tmp .find ("a" )["href" ]
36
+ sp = BeautifulSoup (requests .get (url ).content )
37
+ title = sp .find ("h1" ).contents [0 ]
38
+ writer = []
39
+ for tmp in filter (lambda x : x .contents and "Writer" in x .contents [0 ], sp .find_all ("div" )):
40
+ writer = [a .contents [0 ].strip () for a in tmp .find_all ("a" )]
41
+ tester = []
42
+ for tmp in filter (lambda x : x .contents and "Tester" in x .contents [0 ], sp .find_all ("p" )):
43
+ tester = [a .contents [0 ].strip () for a in tmp .find_all ("a" )]
44
+ #元々存在するかどうかで場合分け
45
+ if url in contest_urls :
46
+ print ("UPDATE" , title )
47
+ sql = "UPDATE contest_info SET title = %s, schedule = %s, rated = %s, writer = %s, tester = %s WHERE url = %s"
48
+ else :
49
+ print ("INSERT" , title )
50
+ sql = "INSERT INTO contest_info (title, schedule, rated, writer, tester, url) VALUES (%s, %s, %s, %s, %s, %s)"
51
+ cursor .execute (sql , (title , schedule , rated , "," .join (writer ), "," .join (tester ), url ))
49
52
conn .commit ()
0 commit comments