diff --git a/ipt_connect/IPTdev/utils/dumper.py b/ipt_connect/IPTdev/utils/dumper.py index 0a8c6678..65944b2f 100644 --- a/ipt_connect/IPTdev/utils/dumper.py +++ b/ipt_connect/IPTdev/utils/dumper.py @@ -2,12 +2,13 @@ import git import urllib from urllib2 import urlopen -from find_links import unique_url +from find_links import construct_links_list dir_path_repo = str(os.getcwd()) repo = git.Repo(dir_path_repo[:-24]) last_commit = str(repo.head.commit)[:7] dir_path_dump = dir_path_repo[:-24] + 'dump/' + last_commit +unique_url = construct_links_list()[2] if not os.path.exists(dir_path_dump): os.makedirs(dir_path_dump) diff --git a/ipt_connect/IPTdev/utils/find_links.py b/ipt_connect/IPTdev/utils/find_links.py index e72dc771..ccf74d21 100644 --- a/ipt_connect/IPTdev/utils/find_links.py +++ b/ipt_connect/IPTdev/utils/find_links.py @@ -19,22 +19,21 @@ '//svg/@xmlns', ] -links_error = [] -links_static = [] -links_other = [] - -for ur in urls: - r = urlopen(head + dev + ur) - page = lxml.html.fromstring(r.read()) - for tag in tags: - for link in page.xpath(tag): - if not link.startswith('http'): - if link.startswith('//'): - links_other.append(http + link) - elif link.startswith('/static/'): - links_static.append(link) - else: - links_other.append(head + link) - else: - links_other.append(link) -unique_url = list(set(links_other)) # delete duplicate log lines +def construct_links_list(): + links_error = links_static = links_other = [] + for ur in urls: + r = urlopen(head + dev + ur) + page = lxml.html.fromstring(r.read()) + for tag in tags: + for link in page.xpath(tag): + if not link.startswith('http'): + if link.startswith('//'): + links_other.append(http + link) + elif link.startswith('/static/'): + links_static.append(link) + else: + links_other.append(head + link) + else: + links_other.append(link) + unique_url = list(set(links_other)) # delete duplicate log lines + return [links_error, links_static, unique_url] diff --git a/ipt_connect/IPTdev/utils/link_parser.py b/ipt_connect/IPTdev/utils/link_parser.py index 023008e5..7b829827 100644 --- a/ipt_connect/IPTdev/utils/link_parser.py +++ b/ipt_connect/IPTdev/utils/link_parser.py @@ -1,24 +1,6 @@ -import lxml.html import requests -from urllib2 import urlopen +from find_links import * -head = 'http://127.0.0.1:8000' -http = 'http:' -dev = '/IPTdev/' -urls = [ - 'problems', - 'participants', - 'jurys', - 'tournament', - 'teams', - 'rounds', - ] -tags = [ - '//a/@href', - '//link/@href', - '//img/@src', - '//svg/@xmlns', - ] links_for_check_404_error = [ 'roundss', 'rounds/-1/', @@ -30,25 +12,7 @@ 'jurys/-1/', ] -links_error = [] -links_static = [] -links_all = [] - -for ur in urls: - r = urlopen(head + dev + ur) - page = lxml.html.fromstring(r.read()) - for tag in tags: - for link in page.xpath(tag): - if not link.startswith('http'): - if link.startswith('//'): - links_all.append(http + link) - elif link.startswith('/static/'): - links_static.append(link) - else: - links_all.append(head + link) - else: - links_all.append(link) -unique_url = list(set(links_all)) # delete duplicate log lines +links_error, links_static, unique_url = construct_links_list() print 'Link checking ...'