-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtmp.py
94 lines (51 loc) · 3.29 KB
/
tmp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from nltk.stem.wordnet import WordNetLemmatizer
def del_stopwords(tweet):
stopwords = nltk.corpus.stopwords.words('english') + ['']
for term in tweet:
if term in stopwords:
tweet.remove(term)
return tweet
def lemmatize(tweet):
lmtzr = WordNetLemmatizer()
for i in range(len(tweet)):
tweet[i] = lmtzr.lemmatize(tweet[i])
return tweet
import nltk
import collections
import mysql.connector
def myround(var):
return round(var, 2) if var != None else None
def connectsql():
connection = mysql.connector.connect(user = "root",
password = "",
host = "localhost",
database = "twitter")
return connection
connection = connectsql()
cursor = connection.cursor()
for i in range(6):
sql = "SELECT tweet_pre_process_result, tweet_classification FROM tweets LIMIT 0, 2400"
cursor.execute(sql)
records = cursor.fetchall()
print "length of tweets : ", len(records)
print "range of testset : ", 400 * i, 400 * (i + 1)
testsets = [({'preprocessed':preprocessed}, classification) for (preprocessed, classification) in records[400 * i : 400 * (i + 1)]]
del records[400 * i : 400 * (i + 1)]
trainsets = [({'preprocessed':preprocessed}, classification) for (preprocessed, classification) in records]
classifier = nltk.NaiveBayesClassifier.train(trainsets)
rset = collections.defaultdict(set)
tset = collections.defaultdict(set)
for n, (feature, classification) in enumerate(testsets):
rset[classification].add(n)
temp = classifier.classify(feature)
tset[temp].add(n)
print "accuracy of classifier : ", myround(nltk.classify.accuracy(classifier, testsets)), "\n"
print "precision of 0 : ", myround(nltk.metrics.precision(rset[0], tset[0])), "\trecall of 0 : ", myround(nltk.metrics.recall(rset[0], tset[0])), "\tf_measure of 0 : ", myround(nltk.metrics.f_measure(rset[0], tset[0]))
print "precision of 1 : ", myround(nltk.metrics.precision(rset[1], tset[1])), "\trecall of 1 : ", myround(nltk.metrics.recall(rset[1], tset[1])), "\tf_measure of 1 : ", myround(nltk.metrics.f_measure(rset[1], tset[1]))
print "precision of 2 : ", myround(nltk.metrics.precision(rset[2], tset[2])), "\trecall of 2 : ", myround(nltk.metrics.recall(rset[2], tset[2])), "\tf_measure of 2 : ", myround(nltk.metrics.f_measure(rset[2], tset[2]))
print "precision of 3 : ", myround(nltk.metrics.precision(rset[3], tset[3])), "\trecall of 3 : ", myround(nltk.metrics.recall(rset[3], tset[3])), "\tf_measure of 3 : ", myround(nltk.metrics.f_measure(rset[3], tset[3]))
print "precision of 4 : ", myround(nltk.metrics.precision(rset[4], tset[4])), "\trecall of 4 : ", myround(nltk.metrics.recall(rset[4], tset[4])), "\tf_measure of 4 : ", myround(nltk.metrics.f_measure(rset[4], tset[4]))
print "precision of 5 : ", myround(nltk.metrics.precision(rset[5], tset[5])), "\trecall of 5 : ", myround(nltk.metrics.recall(rset[5], tset[5])), "\tf_measure of 5 : ", myround(nltk.metrics.f_measure(rset[5], tset[5]))
print "precision of 6 : ", myround(nltk.metrics.precision(rset[6], tset[6])), "\trecall of 6 : ", myround(nltk.metrics.recall(rset[6], tset[6])), "\tf_measure of 6 : ", myround(nltk.metrics.f_measure(rset[6], tset[6]))
print "\n======================================================================\n"
print classifier.show_most_informative_features(5)