23
23
24
24
def parse_args ():
25
25
parser = argparse .ArgumentParser (description = "" )
26
- parser .add_argument ("semester" )
27
- parser .add_argument ("course" )
28
- parser .add_argument ("gradeable" )
29
- parser .add_argument ("--window" ,type = int ,default = 10 )
30
- parser .add_argument ("--hash_size" ,type = int ,default = 100000 )
31
- language = parser .add_mutually_exclusive_group (required = True )
32
- language .add_argument ("--plaintext" , action = 'store_true' )
33
- language .add_argument ("--python" , action = 'store_true' )
34
- language .add_argument ("--cpp" , action = 'store_true' )
35
-
26
+ parser .add_argument ("config_path" )
36
27
args = parser .parse_args ()
37
-
38
- if (args .window < 1 ):
39
- print ("ERROR! window must be >= 1" )
40
- exit (1 )
41
-
42
28
return args
43
29
44
30
45
31
def hasher (args ,my_tokenized_file ,my_hashes_file ):
46
- with open (my_tokenized_file ,'r' ) as my_tf :
32
+ with open (args .config_path ) as lichen_config :
33
+ lichen_config_data = json .load (lichen_config )
34
+ language = lichen_config_data ["language" ]
35
+ sequence_length = int (lichen_config_data ["sequence_length" ])
36
+
37
+ if (sequence_length < 1 ):
38
+ print ("ERROR! sequence_length must be >= 1" )
39
+ exit (1 )
40
+
41
+ with open (my_tokenized_file ,'r' ,encoding = 'ISO-8859-1' ) as my_tf :
47
42
with open (my_hashes_file ,'w' ) as my_hf :
48
43
tokens = json .load (my_tf )
49
44
num = len (tokens )
50
- for i in range (0 ,num - args . window ):
45
+ for i in range (0 ,num - sequence_length ):
51
46
foo = ""
52
- if args . plaintext :
53
- for j in range (0 ,args . window ):
47
+ if language == " plaintext" :
48
+ for j in range (0 ,sequence_length ):
54
49
foo += str (tokens [i + j ].get ("value" ))
55
50
56
- elif args . python :
57
- for j in range (0 ,args . window ):
51
+ elif language == " python" :
52
+ for j in range (0 ,sequence_length ):
58
53
foo += str (tokens [i + j ].get ("type" ))
59
54
60
- elif args . cpp :
61
- for j in range (0 ,args . window ):
55
+ elif language == " cpp" :
56
+ for j in range (0 ,sequence_length ):
62
57
foo += str (tokens [i + j ].get ("type" ))
63
58
64
59
else :
@@ -77,26 +72,32 @@ def hasher(args,my_tokenized_file,my_hashes_file):
77
72
def main ():
78
73
args = parse_args ()
79
74
75
+ with open (args .config_path ) as lichen_config :
76
+ lichen_config_data = json .load (lichen_config )
77
+ semester = lichen_config_data ["semester" ]
78
+ course = lichen_config_data ["course" ]
79
+ gradeable = lichen_config_data ["gradeable" ]
80
+
80
81
sys .stdout .write ("HASH ALL..." )
81
82
sys .stdout .flush ()
82
83
83
84
# ===========================================================================
84
85
# error checking
85
- course_dir = os .path .join (SUBMITTY_DATA_DIR ,"courses" ,args . semester ,args . course )
86
+ course_dir = os .path .join (SUBMITTY_DATA_DIR ,"courses" ,semester ,course )
86
87
if not os .path .isdir (course_dir ):
87
88
print ("ERROR! " ,course_dir ," is not a valid course directory" )
88
89
exit (1 )
89
- tokenized_dir = os .path .join (course_dir ,"lichen" ,"tokenized" ,args . gradeable )
90
+ tokenized_dir = os .path .join (course_dir ,"lichen" ,"tokenized" ,gradeable )
90
91
if not os .path .isdir (tokenized_dir ):
91
92
print ("ERROR! " ,tokenized_dir ," is not a valid gradeable tokenized directory" )
92
93
exit (1 )
93
94
94
- hashes_dir = os .path .join (course_dir ,"lichen" ,"hashes" ,args . gradeable )
95
+ hashes_dir = os .path .join (course_dir ,"lichen" ,"hashes" ,gradeable )
95
96
96
97
# ===========================================================================
97
98
# walk the subdirectories
98
- for user in os .listdir (tokenized_dir ):
99
- for version in os .listdir (os .path .join (tokenized_dir ,user )):
99
+ for user in sorted ( os .listdir (tokenized_dir ) ):
100
+ for version in sorted ( os .listdir (os .path .join (tokenized_dir ,user ) )):
100
101
my_tokenized_file = os .path .join (tokenized_dir ,user ,version ,"tokens.json" )
101
102
102
103
# ===========================================================================
@@ -108,7 +109,6 @@ def main():
108
109
my_hashes_file = os .path .join (my_hashes_dir ,"hashes.txt" )
109
110
hasher (args ,my_tokenized_file ,my_hashes_file )
110
111
111
-
112
112
print ("done" )
113
113
114
114
if __name__ == "__main__" :
0 commit comments