Skip to content

Commit 9df06a6

Browse files
committed
py
1 parent 1d19944 commit 9df06a6

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

pyprint.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# -*- coding: utf-8 -*-
2+
from selenium import webdriver
3+
from bs4 import BeautifulSoup as bs
4+
import os
5+
import time
6+
7+
# change to test folder
8+
os.chdir('/Users/easonchan/test')
9+
10+
#website_with_logins = "http://service.moj.gov.tw/lawer/associList.asp?associName=%A5x%A5_%AB%DF%AEv%A4%BD%B7|"
11+
website = []
12+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%A5x%A4%A4%AB%DF%AEv%A4%BD%B7|')
13+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%A5x%A5_%AB%DF%AEv%A4%BD%B7|')
14+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%A5x%AAF%AB%DF%AEv%A4%BD%B7|')
15+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%A5x%ABn%AB%DF%AEv%A4%BD%B7|')
16+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%A9y%C4%F5%AB%DF%AEv%A4%BD%B7|')
17+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%AA%E1%BD%AC%AB%DF%AEv%A4%BD%B7|')
18+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%ABn%A7%EB%AB%DF%AEv%A4%BD%B7|')
19+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%AB%CC%AAF%AB%DF%AEv%A4%BD%B7|')
20+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%AD]%AE%DF%AB%DF%AEv%A4%BD%B7|')
21+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%AE%E7%B6%E9%AB%DF%AEv%A4%BD%B7|')
22+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%B0%AA%B6%AF%AB%DF%AEv%A4%BD%B7|')
23+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%B0%F2%B6%A9%AB%DF%AEv%A4%BD%B7|')
24+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%B6%B3%AAL%AB%DF%AEv%A4%BD%B7|')
25+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%B7s%A6%CB%AB%DF%AEv%A4%BD%B7|')
26+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%B9%C5%B8q%AB%DF%AEv%A4%BD%B7|')
27+
website.append('http://service.moj.gov.tw/lawer/associList.asp?associName=%B9%FC%A4%C6%AB%DF%AEv%A4%BD%B7|')
28+
websitename = ['taichung','taipei','taidung','tainan','ilan','hualien','nantao','pintung','miaoli','taoyuan','kaohsiung','keelung','yunlin','hsinchu','chiayi','chunghwa']
29+
30+
driver = webdriver.Chrome()
31+
i = 10
32+
for link in website[-3:]:
33+
driver.get( str(link) )
34+
ps = driver.page_source
35+
soup = bs(ps)
36+
souptext = soup.getText()
37+
tmp = souptext.replace('\n',',')
38+
tmp = tmp.replace(',,,',',')
39+
tmp = tmp.replace(',,',',')
40+
tmp = tmp[:-15]
41+
tmp = tmp[292:]
42+
namelist = tmp.split(',')
43+
for name in namelist:
44+
f = open(websitename[i] +'_' + name + '.txt',"w+")
45+
isfound = True
46+
try:
47+
driver.find_element_by_partial_link_text(name)
48+
except:
49+
print 'Failed at:' + name
50+
isfound = False
51+
if(isfound):
52+
tmp1 = driver.find_element_by_partial_link_text(name)
53+
tmp1.click()
54+
tmp2 = driver.page_source
55+
time.sleep(1)
56+
f.write(unicode(tmp2).encode('utf8'))
57+
driver.back()
58+
time.sleep(1)
59+
f.close()
60+
61+
print websitename[i] + 'finished'
62+
i += 1
63+
64+
65+
66+
67+
68+
69+

0 commit comments

Comments
 (0)