-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
149 lines (129 loc) · 5.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#importing:
#regex, pygraphviz, requests, urllib, argv, and beautiful soup
import re
import pygraphviz as pgv
import requests
import urllib
from sys import argv
from bs4 import BeautifulSoup
class Course:
"""Template for course using data from text file"""
def __init__(self, name, course_id, prereqs, off_w):
self.name = name
self.course_id = course_id
self.prereqs = prereqs
self.off_w = off_w
dept = str(argv[1])
#empty list for all Course objects
courses = []
url = "http://www.washington.edu/students/crscat/" + dept.lower() + ".html"
#fetching HTML
urllib.urlretrieve(url, "test.html")
data = open("test.html","r").read()
soup = BeautifulSoup(data, "lxml")
#constructing regex pattern
patt = dept.lower()+"\d\d\d"
#opening file
with open("test.html", "r") as f:
data = f.read()
soup = BeautifulSoup(data, "lxml")
for tag in soup.find_all("a", attrs={"name":re.compile(patt)}):
for child in tag.children:
#identifying where prereqs will be in text
prereq_start = str(child).find("Prerequisite: ")
len_prereq = len("Prerequisite: ")
#empty prereq list
prereqs = []
#checking to see if prereqs exist
if prereq_start == -1:
#no they don't
prereqs = None
#off_w is offered with
off_w = None
else:
#yes they do
final_part = str(child)[prereq_start+len_prereq:]
prereq_end = final_part.find("<br/>")
final_part = final_part[:prereq_end]
final_part_list = final_part.split("Offered")
#all of these now must have prereq's
prereq_raw = final_part_list[0]
if len(final_part_list) == 2:
#only some will have joint classes
off_w_raw = final_part_list[1]
off_re = re.compile("[A-Z].[A-Z]{1,3}\s\d\d\d")
off_w = off_re.findall(off_w_raw)
else:
#if they don't, connect this with correct variable
off_w = None
#splitting into each seperate prereq
prereq_split = prereq_raw.split(";")
#iterating through and regexing out excess
for i in prereq_split:
if "or" in i:
p = re.compile("[A-Z].[A-Z]{1,3}\s\d\d\d")
#append the ENTIRE tuple of options to the prereq list
p_tup = tuple(p.findall(i))
prereqs.append(p_tup)
#the prereq list is tuple-ified at the end of the process
else:
p = re.compile("[A-Z].[A-Z]{1,3}\s\d\d\d")
#append EACH ITEM from the list of options
for j in p.findall(i):
prereqs.append(j)
#making prereqs immutable via tuple
prereqs = tuple(prereqs)
if child.b:
#defining string with course info
course_str = child.b.string
#finding index of end of course string
course_str_end = course_str.find("(")-1
#finding length of course_id
id_end = len(dept.lower())+4 #length of space and number
#defining course id, ex: CSE 143
course_id = course_str[0:id_end]
#defining actual name of the course
name = course_str[8:course_str.find("(")-1]
#instantiate object
courses.append(Course(name, course_id, prereqs, off_w))
#instantiating graph with pgv
G = pgv.AGraph(directed=True, overlap = False, splines="polyline",
nodesep=2.0, sep = +0.25)
#connecting courses with prereqs
for course in courses:
#checking for prereqs
if course.prereqs:
#setting up counter to change color of edge ->
#(cont) if there are multiple sets of prereq options
sets_option = 0
for prereq in course.prereqs:
#colors for different sets of prereq options
colors = ["red", "green", "blue", "purple", "orange", "pink"]
#determining whether there are prereq options (in a tuple)
if type(prereq) is tuple:
#looping through the ACTUAL prereq courses inside tuple
for act_prereq in prereq:
#adding edge to graph
G.add_edge(act_prereq, course.course_id)
#formatting edge
e = G.get_edge(act_prereq, course.course_id)
#adding specific color to prereq options
e.attr["color"] = colors[sets_option]
for node in (act_prereq, course.course_id):
n = G.get_node(node)
n.attr["fontsize"] = 8.0
#incrementing color option
sets_option += 1
else:
#adding edge to graph
G.add_edge(prereq, course.course_id)
#formatting edge
e = G.get_edge(prereq, course.course_id)
e.attr["color"] = "black"
#formatting both nodes
for node in (prereq, course.course_id):
n = G.get_node(node)
n.attr["fontsize"] = 7.0
G.layout(prog="neato")
G.draw("degree_graph.png")
#if __name__ == "__