diff --git a/README.md b/README.md index aff37fc..0db0e2c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # ChatBot -ChatBot is a machine-learning based conversational dialog engine build in +ChatBot is a machine-learning based conversational dialogue engine build in Python which makes it possible to generate responses based on collections of known conversations. The language independent design of ChatBot allows it to be trained to speak any language. diff --git a/chatbot.py b/chatbot.py index f01c48a..5ffbbb3 100644 --- a/chatbot.py +++ b/chatbot.py @@ -1,16 +1,19 @@ Python 2.7.13 (v2.7.13:a06454b1afa1, Dec 17 2016, 20:42:59) [MSC v.1500 32 bit (Intel)] on win32 Type "copyright", "credits" or "license()" for more information. >>> import re + import sqlite3 from collections import Counter from string import punctuation from math import sqrt # initialize the connection to the database + connection = sqlite3.connect('chatbot.sqlite') cursor = connection.cursor() # create the tables needed by the program + create_table_request_list = [ 'CREATE TABLE words(word TEXT UNIQUE)', 'CREATE TABLE sentences(sentence TEXT UNIQUE, used INT NOT NULL DEFAULT 0)', @@ -23,6 +26,7 @@ pass def get_id(entityName, text): + """Retrieve an entity's unique ID from the database, given its associated text. If the row is not already present, it is inserted. The entity can either be a sentence or a word.""" @@ -37,6 +41,7 @@ def get_id(entityName, text): return cursor.lastrowid def get_words(text): + """Retrieve the words present in a given string of text. The return value is a list of tuples where the first member is a lowercase word, and the second member the number of time it is present in the text.""" @@ -46,15 +51,19 @@ def get_words(text): return Counter(wordsList).items() -B = 'Hello!' +B = 'Hello! +' while True: # output bot's message + print('B: ' + B) # ask for user input; if blank line, exit the loop + H = raw_input('H: ').strip() if H == '': break # store the association between the bot's message words and the user's response + words = get_words(B) words_length = sum([n * len(word) for word, n in words]) sentence_id = get_id('sentence', H) @@ -64,6 +73,7 @@ def get_words(text): cursor.execute('INSERT INTO associations VALUES (?, ?, ?)', (word_id, sentence_id, weight)) connection.commit() # retrieve the most likely answer from the database + cursor.execute('CREATE TEMPORARY TABLE results(sentence_id INT, sentence TEXT, weight REAL)') words = get_words(H) words_length = sum([n * len(word) for word, n in words]) @@ -71,13 +81,16 @@ def get_words(text): weight = sqrt(n / float(words_length)) cursor.execute('INSERT INTO results SELECT associations.sentence_id, sentences.sentence, ?*associations.weight/(4+sentences.used) FROM words INNER JOIN associations ON associations.word_id=words.rowid INNER JOIN sentences ON sentences.rowid=associations.sentence_id WHERE words.word=?', (weight, word,)) # if matches were found, give the best one + cursor.execute('SELECT sentence_id, sentence, SUM(weight) AS sum_weight FROM results GROUP BY sentence_id ORDER BY sum_weight DESC LIMIT 1') row = cursor.fetchone() cursor.execute('DROP TABLE results') # otherwise, just randomly pick one of the least used sentences + if row is None: cursor.execute('SELECT rowid, sentence FROM sentences WHERE used = (SELECT MIN(used) FROM sentences) ORDER BY RANDOM() LIMIT 1') row = cursor.fetchone() # tell the database the sentence has been used once more, and prepare the sentence + B = row[1] cursor.execute('UPDATE sentences SET used=used+1 WHERE rowid=?', (row[0],))