1
+ import numpy as np
2
+ import keras
3
+ from sklearn .feature_selection import SelectKBest
4
+ from sklearn .feature_selection import chi2
5
+ from keras .datasets import imdb
6
+ from time import time
7
+ from sklearn .feature_extraction .text import CountVectorizer
8
+ from sklearn .metrics .pairwise import cosine_similarity
9
+ import pickle
10
+ import csv
11
+
12
+ from scipy .sparse import csr_matrix , csc_matrix , lil_matrix
13
+
14
+ from PySparseCoalescedTsetlinMachineCUDA .tm import MultiClassTsetlinMachine
15
+
16
+ batches = 100
17
+
18
+ s = 1.0
19
+ T = 10000
20
+ clauses = 10000
21
+
22
+ print ("READ" )
23
+
24
+ f = open ("/data/yahoo_answers_csv/train.csv" , "r" )
25
+ reader = csv .reader (f , delimiter = ',' , quotechar = '"' )
26
+ training_documents = []
27
+ training_y = []
28
+ for document in reader :
29
+ training_documents .append (" " .join (document [1 :]))
30
+ training_y .append (int (document [0 ]))
31
+ f .close ()
32
+
33
+ f = open ("/data/yahoo_answers_csv/test.csv" , "r" )
34
+ reader = csv .reader (f , delimiter = ',' , quotechar = '"' )
35
+ testing_documents = []
36
+ testing_y = []
37
+ for document in reader :
38
+ testing_documents .append (" " .join (document [1 :]))
39
+ testing_y .append (int (document [0 ]))
40
+ f .close ()
41
+
42
+ print (len (training_documents ))
43
+
44
+ vectorizer_X = CountVectorizer (binary = True , max_features = 10000 )
45
+
46
+ print ("VECTORIZE" )
47
+ X_train = vectorizer_X .fit_transform (training_documents )
48
+ feature_names = vectorizer_X .get_feature_names_out ()
49
+ number_of_features = vectorizer_X .get_feature_names_out ().shape [0 ]
50
+ Y_train = np .array (training_y )
51
+
52
+ X_test = vectorizer_X .transform (testing_documents )
53
+ Y_test = np .array (testing_y )
54
+
55
+ print ("DONE" )
56
+
57
+ epochs = 100
58
+
59
+ batch_size_train = Y_train .shape [0 ] // batches
60
+
61
+ tm = MultiClassTsetlinMachine (clauses , T , s , max_included_literals = 32 )
62
+ for i in range (epochs ):
63
+ for batch in range (batches ):
64
+ start_training = time ()
65
+ tm .fit (X_train [batch * batch_size_train :(batch + 1 )* batch_size_train ], Y_train [batch * batch_size_train :(batch + 1 )* batch_size_train ], epochs = 1 , incremental = True )
66
+ stop_training = time ()
67
+
68
+ start_testing = time ()
69
+ result_test = 100 * (tm .predict (X_test ) == Y_test ).mean ()
70
+ stop_testing = time ()
71
+
72
+ print ("#%d Accuracy Test: %.2f%% Training: %.2fs Testing: %.2fs" % (i + 1 , result_test , stop_training - start_training , stop_testing - start_testing ))
0 commit comments