Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

# ChatBot

ChatBot is a machine-learning based conversational dialog engine build in
ChatBot is a machine-learning based conversational dialogue engine build in
Python which makes it possible to generate responses based on collections of
known conversations. The language independent design of ChatBot allows it
to be trained to speak any language.
Expand Down
15 changes: 14 additions & 1 deletion chatbot.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
Python 2.7.13 (v2.7.13:a06454b1afa1, Dec 17 2016, 20:42:59) [MSC v.1500 32 bit (Intel)] on win32
Type "copyright", "credits" or "license()" for more information.
>>> import re

import sqlite3
from collections import Counter
from string import punctuation
from math import sqrt

# initialize the connection to the database

connection = sqlite3.connect('chatbot.sqlite')
cursor = connection.cursor()

# create the tables needed by the program

create_table_request_list = [
'CREATE TABLE words(word TEXT UNIQUE)',
'CREATE TABLE sentences(sentence TEXT UNIQUE, used INT NOT NULL DEFAULT 0)',
Expand All @@ -23,6 +26,7 @@
pass

def get_id(entityName, text):

"""Retrieve an entity's unique ID from the database, given its associated text.
If the row is not already present, it is inserted.
The entity can either be a sentence or a word."""
Expand All @@ -37,6 +41,7 @@ def get_id(entityName, text):
return cursor.lastrowid

def get_words(text):

"""Retrieve the words present in a given string of text.
The return value is a list of tuples where the first member is a lowercase word,
and the second member the number of time it is present in the text."""
Expand All @@ -46,15 +51,19 @@ def get_words(text):
return Counter(wordsList).items()


B = 'Hello!'
B = 'Hello!
'
while True:
# output bot's message

print('B: ' + B)
# ask for user input; if blank line, exit the loop

H = raw_input('H: ').strip()
if H == '':
break
# store the association between the bot's message words and the user's response

words = get_words(B)
words_length = sum([n * len(word) for word, n in words])
sentence_id = get_id('sentence', H)
Expand All @@ -64,20 +73,24 @@ def get_words(text):
cursor.execute('INSERT INTO associations VALUES (?, ?, ?)', (word_id, sentence_id, weight))
connection.commit()
# retrieve the most likely answer from the database

cursor.execute('CREATE TEMPORARY TABLE results(sentence_id INT, sentence TEXT, weight REAL)')
words = get_words(H)
words_length = sum([n * len(word) for word, n in words])
for word, n in words:
weight = sqrt(n / float(words_length))
cursor.execute('INSERT INTO results SELECT associations.sentence_id, sentences.sentence, ?*associations.weight/(4+sentences.used) FROM words INNER JOIN associations ON associations.word_id=words.rowid INNER JOIN sentences ON sentences.rowid=associations.sentence_id WHERE words.word=?', (weight, word,))
# if matches were found, give the best one

cursor.execute('SELECT sentence_id, sentence, SUM(weight) AS sum_weight FROM results GROUP BY sentence_id ORDER BY sum_weight DESC LIMIT 1')
row = cursor.fetchone()
cursor.execute('DROP TABLE results')
# otherwise, just randomly pick one of the least used sentences

if row is None:
cursor.execute('SELECT rowid, sentence FROM sentences WHERE used = (SELECT MIN(used) FROM sentences) ORDER BY RANDOM() LIMIT 1')
row = cursor.fetchone()
# tell the database the sentence has been used once more, and prepare the sentence

B = row[1]
cursor.execute('UPDATE sentences SET used=used+1 WHERE rowid=?', (row[0],))