-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchatbot.py
138 lines (102 loc) · 3.92 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import numpy as np
import tensorflow as tf
import tflearn
import random
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()
import json
import pickle
import warnings
warnings.filterwarnings("ignore")
#read intents
with open('intents.json') as json_data:
intents = json.load(json_data)
with open("data.pickle", "rb") as f:
words, labels, training, output = pickle.load(f)
words = []
classifyingTags = []
documents = []
stringsToIgnore = ['?,!']
#clean up the intents phrases, tokenizing them into individual words and stemming them to get root words with nltk
for intent in intents['intents']:
for pattern in intent['patterns']:
w = nltk.word_tokenize(pattern)
words.extend(w)
documents.append((w, intent['tag']))
if intent['tag'] not in classifyingTags:
classifyingTags.append(intent['tag'])
words = [stemmer.stem(w.lower()) for w in words if w not in stringsToIgnore]
words = sorted(list(set(words)))
classifyingTags = sorted(list(set(classifyingTags)))
training = []
output = []
output_empty = [0] * len(classifyingTags)
for doc in documents:
bag = []
patternWords = doc[0]
patternWords = [stemmer.stem(word.lower()) for word in patternWords]
for w in words:
bag.append(1) if w in patternWords else bag.append(0)
output_row = list(output_empty)
output_row[classifyingTags.index(doc[1])] = 1
training.append([bag, output_row])
random.shuffle(training)
training = np.array(training)
trainX = list(training[:,0])
trainY = list(training[:,1])
#Build neural network, 2 layers 8 nodes
tf.compat.v1.reset_default_graph()
net = tflearn.input_data(shape=[None, len(trainX[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(trainY[0]), activation='softmax')
net = tflearn.regression(net)
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
model.fit(trainX, trainY, n_epoch=1000, batch_size=8, show_metric=True)
#save the model
model.save('model.tflearn')
pickle.dump( {'words':words, 'classifyingTags':classifyingTags, 'trainX':trainX, 'trainY':trainY}, open( "training_data", "wb" ) )
data = pickle.load( open( "training_data", "rb" ) )
words = data['words']
classifyingTags = data['classifyingTags']
trainX = data['trainX']
trainY = data['trainY']
with open('intents.json') as json_data:
intents = json.load(json_data)
# load our saved model
model.load('./model.tflearn')
#tokenzie and stem the words in the sentence from the use
def clean_up_sentence(sentence):
sentenceWords = nltk.word_tokenize(sentence)
sentenceWords = [stemmer.stem(word.lower()) for word in sentenceWords]
return sentenceWords
#check for matching words
def loopThroughBag(sentence, words):
sentenceWords = clean_up_sentence(sentence)
bag = [0]*len(words)
for s in sentenceWords:
for i,word in enumerate(words):
if word == s:
bag[i] = 1
return(np.array(bag))
#run the model on the processed input, exclude low probability tags and return a random output from the tag with the highest probability
def classify(sentence):
results = model.predict([loopThroughBag(sentence, words)])[0]
results = [[i,r] for i,r in enumerate(results) if r>0.25]
results.sort(key=lambda x: x[1], reverse=True)
return_list = []
for r in results:
return_list.append((classifyingTags[r[0]], r[1]))
return return_list
#reponse method
def response(sentence, userID='123'):
results = classify(sentence)
if results:
while results:
for i in intents['intents']:
if i['tag'] == results[0][0]:
temp = random.choice(i['responses'])
print("RETURNING "+temp)
return temp
results.pop(0)