Skip to content

Commit ee0e12c

Browse files
committed
Update
1 parent 0154061 commit ee0e12c

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

YahooTextCategorizationDemoBatch.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import numpy as np
2+
import keras
3+
from sklearn.feature_selection import SelectKBest
4+
from sklearn.feature_selection import chi2
5+
from keras.datasets import imdb
6+
from time import time
7+
from sklearn.feature_extraction.text import CountVectorizer
8+
from sklearn.metrics.pairwise import cosine_similarity
9+
import pickle
10+
import csv
11+
12+
from scipy.sparse import csr_matrix, csc_matrix, lil_matrix
13+
14+
from PySparseCoalescedTsetlinMachineCUDA.tm import MultiClassTsetlinMachine
15+
16+
batches = 100
17+
18+
s = 1.0
19+
T = 10000
20+
clauses = 10000
21+
22+
print("READ")
23+
24+
f = open("/data/yahoo_answers_csv/train.csv", "r")
25+
reader = csv.reader(f, delimiter=',', quotechar='"')
26+
training_documents = []
27+
training_y = []
28+
for document in reader:
29+
training_documents.append(" ".join(document[1:]))
30+
training_y.append(int(document[0]))
31+
f.close()
32+
33+
f = open("/data/yahoo_answers_csv/test.csv", "r")
34+
reader = csv.reader(f, delimiter=',', quotechar='"')
35+
testing_documents = []
36+
testing_y = []
37+
for document in reader:
38+
testing_documents.append(" ".join(document[1:]))
39+
testing_y.append(int(document[0]))
40+
f.close()
41+
42+
print(len(training_documents))
43+
44+
vectorizer_X = CountVectorizer(binary=True, max_features=10000)
45+
46+
print("VECTORIZE")
47+
X_train = vectorizer_X.fit_transform(training_documents)
48+
feature_names = vectorizer_X.get_feature_names_out()
49+
number_of_features = vectorizer_X.get_feature_names_out().shape[0]
50+
Y_train = np.array(training_y)
51+
52+
X_test = vectorizer_X.transform(testing_documents)
53+
Y_test = np.array(testing_y)
54+
55+
print("DONE")
56+
57+
epochs = 100
58+
59+
batch_size_train = Y_train.shape[0] // batches
60+
61+
tm = MultiClassTsetlinMachine(clauses, T, s, max_included_literals=32)
62+
for i in range(epochs):
63+
for batch in range(batches):
64+
start_training = time()
65+
tm.fit(X_train[batch*batch_size_train:(batch+1)*batch_size_train], Y_train[batch*batch_size_train:(batch+1)*batch_size_train], epochs=1, incremental=True)
66+
stop_training = time()
67+
68+
start_testing = time()
69+
result_test = 100*(tm.predict(X_test) == Y_test).mean()
70+
stop_testing = time()
71+
72+
print("#%d Accuracy Test: %.2f%% Training: %.2fs Testing: %.2fs" % (i+1, result_test, stop_training-start_training, stop_testing-start_testing))

0 commit comments

Comments
 (0)