-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtruck_bot.py
528 lines (409 loc) · 21 KB
/
truck_bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
import os
import re
import sys
import time
import random
import difflib
import get_truck_brand_names
import pandas as pd
from datetime import datetime
from typing import Tuple, List, Callable
# ----------------------------------------------------------------------------------------------------------------------
# HELPER FUNCTIONS
# ----------------------------------------------------------------------------------------------------------------------
def ask(question: str, conv: List) -> Tuple[str, List]:
"""
Wrapper function for asking the question. It will record the question and the answer in the ongoing conversation
list variable. Also, if it detects that the user entered a quit keyword, it will exit the program without further
prompt.
Args:
:param question: question to ask user
:param conv: ongoing conversation list
Returns:
:return: user's input, ongoing conversation list
"""
statement = input(question)
# Write the input into the ongoing conversation list
conv.append('Bot: ' + question)
conv.append('Customer: ' + statement)
# Check if quit signal was sent
check_quit(statement, conv)
return statement, conv
def say(input_msg: str, conv: List) -> List:
"""
Wrapper function for saying something to the user. It will print the input message to the terminal, and record it in
the ongoing conversation list variable.
Args:
:param input_msg: message to the user
:param conv: ongoing conversation list
Returns:
:return: ongoing conversation list
"""
# Print the statement
print(input_msg)
# Memorize it in the conversation list
conv.append('Bot: ' + input_msg)
return conv
def check_quit(statement: str, conv: List):
"""
Quits the program if user input is 'q' or 'quit'.
Args:
:param statement: user input
:param conv: ongoing conversation list
"""
if any([statement.lower() == x for x in ['q', 'quit']]):
save_conv(conv, flag='quit')
sys.exit(0)
def negative_answer(statement: str) -> bool:
"""
Just a shorthand function for determining whether the user typed in a negative answer.
Args
:param statement: user input
Returns:
:return: boolean determining whether the user input is negative
"""
return any([statement.lower() == x for x in ['n', 'no', 'not']])
def save_fleet(fleet: pd.DataFrame):
"""
Saves the fleet data in the designated folder as a cvs file.
NOTE: assumes that the save folder already exists!
Args:
:param fleet: pandas DataFrame that holds the fleet information
"""
global fleet_path
fleet.to_csv(fleet_path)
def save_conv(conv: List, flag: str = ''):
"""
Saves the conversation data in the designated folder as a txt file.
NOTE: assumes that the save folder already exists!
Args:
:param conv: ongoing conversation list
:param flag: flag to write additional information to the file; e.g. if the user quit the conversation
"""
global conv_path
with open(conv_path, 'x') as conv_file:
for line in conv:
conv_file.write('{0}\n'.format(line))
if flag == 'quit':
conv_file.write('Customer has quit!\n')
def generate_random_user(fleet_name_suf: str, conv_name_suf: str) -> Tuple[str, str, str]:
"""
Generates random client username and truck fleet ID.
Args:
:param fleet_name_suf: suffix for creating the fleet file name
:param conv_name_suf: suffix for creating the conversation file name
Returns
:return: paths to the fleet data file and conversation data file, and today's date
"""
global fleet_path, conv_path
# Get the current date and form a string out of it
date_now = '{0}'.format(datetime.date(datetime.now()))
# Construct random username and fleet ID
rand = random.randint(1, 100000)
username = 'user' + str(rand)
fleet_id = 'fleet' + str(rand)
# Construct conversation path and fleet path
fleet_path = os.path.join(data_path, '_-_'.join([date_now, username, fleet_id, fleet_name_suf]))
conv_path = os.path.join(data_path, '_-_'.join([date_now, username, fleet_id, conv_name_suf]))
return fleet_path, conv_path, date_now
# ----------------------------------------------------------------------------------------------------------------------
# FLEET FUNCTIONS
# ----------------------------------------------------------------------------------------------------------------------
def load_brands(data: str) -> Tuple[pd.DataFrame, List]:
"""
Loads csv table with brands, generated by `get_truck_brand_names.py`
Args:
:param data: path to the csv file
Returns:
:return: full DataFrame table, list of only brand names
"""
brands_table = pd.read_csv(data)
brands = brands_table['Brand'].unique().tolist()
return brands_table, brands
def get_basic_info(data_folder, fleet_name_suf: str, conv_name_suf: str, date_now: str, conv: List) -> \
Tuple[str, str, List]:
"""
From the basic info and the current date, constructs the base of the file name that will be used for storing the
fleet data and the conversation history.
NOTE: there is a nasty hack in this function, which stores data save path, and names of fleet and conversation save
files as the first three items in the conversation list. This became necessary when I discovered that I forgot to
implement saving the conversation in case when the user decides to quit the script during runtime. This can be
implemented much more elegantly, but for the sake of expediency I'm just putting it here like this.
Args:
:param data_folder: folder path where data is stored; NOTE: this is part of the hack
:param fleet_name_suf: suffix for creating the fleet file name
:param conv_name_suf: suffix for creating the conversation file name
:param date_now: today's date
:param conv: ongoing conversation list
Returns:
:return: name of the current user, ID/designation of the fleet, and the ongoing conversation list
"""
global fleet_path, conv_path
# Start by getting the basic information: name, fleet designation
username, conv = ask('Please tell me your name: ', conv)
fleet_id, conv = ask('What is the designation of this fleet? ', conv)
# If the username or the fleet ID contain spaces, remove them
username = ''.join(username.split(' '))
fleet_id = ''.join(fleet_id.split(' '))
# Construct the base file name
fn_base = '_-_'.join([date_now, username, fleet_id])
# Construct the full fleet and conversation paths
fleet_path = os.path.join(data_folder, fn_base + fleet_name_suf) # construct the fleet file path
conv_path = os.path.join(data_folder, fn_base + conv_name_suf) # construct the conversation file path
# Output the data
return fleet_path, conv_path, conv
def get_input(input_msg: str, criterion: Callable, err_msg: str, conv: List) -> Tuple[str, List]:
"""
Prompt the user for the input, check if it corresponds to the designated criterion, and if not prompt the user again
but now by printing an error message.
Args:
:param input_msg: original message to the user, prompting for input
:param criterion: criterion that user input needs to pass
:param err_msg: error message to the user if the input does not pass the criterion
:param conv: ongoing conversation list
Returns:
:return: user input, verified against the criterion
"""
# Ask the user for input
statement, conv = ask(input_msg, conv)
# Check if the input is correct
try:
assert criterion(statement)
except AssertionError:
statement, conv = get_input(err_msg, criterion, err_msg, conv) # repeat until the user gets it or quits
# Return this piece of conversation and the user input
return statement, conv
def check_brand_name(input_msg: str, criterion: Callable, err_msg: str, brand_list: List, conv: List) -> \
Tuple[str, List]:
"""
This is an upgrade of the regular `get_input()` function, used for obtaining the correct truck brand specifically.
It takes the user input, cross-references it with the list of all truck manufacturers, and if it detects a
spelling error it offers the user several possible corrections.
Args:
:param input_msg: original message to the user, prompting for input
:param criterion: criterion that user input needs to pass
:param err_msg: error message to the user if the input does not pass the criterion
:param brand_list: list of all unique truck manufacturer names
:param conv: ongoing conversation list
Returns:
:return: corrected brand name, ongoing conversation list
"""
# Get user input in the standard way
original_brand, conv = get_input(input_msg, criterion, err_msg, conv)
# Find the nearest matches to the user input
match = difflib.get_close_matches(original_brand, brand_list, n=3, cutoff=0.4)
# Maybe the entry is too wrong for the chosen difflib cutoff
if len(match) == 0:
maybe_wrong = f'{original_brand} doesn\'t match any known truck brand. Would you like to (1) keep it or (2) ' \
f'try again? '
choice, conv = ask(maybe_wrong, conv)
# Check if the input is valid
while choice not in ['1', '2']:
choice, conv = ask('Please choose one of the following: 1/2', conv)
# Parse the input
if choice == '1':
corrected_brand = original_brand
else: # recursively repeat the conversation if the user wants it
corrected_brand, conv = check_brand_name(input_msg, criterion, err_msg, brand_list, conv)
# If there were appropriate corrections, offer suggestions
else:
# If the nearest match is exactly like the statement, the user hasn't made an error; otherwise offer up to three
# suggestions for correction
if match[0] != original_brand:
# Construct the question for the user, offering suggestions for correction
corrected_msg_q = [f'You wrote {original_brand}. Did you mean (1) {match[0]}']
corrected_msg_c = [f' (1']
if len(match) > 1:
for ii in range(1, len(match)):
if ii != len(match)-1:
corrected_msg_q.append(f', ({ii+1}) {match[ii]}')
corrected_msg_c.append(f'/{ii+1}')
else:
corrected_msg_q.append(f', or ({ii+1}) {match[ii]}')
corrected_msg_c.append(f'/{ii+1}/n)?')
else:
corrected_msg_c.append(f'/n)?')
corrected_msg = ''.join([''.join(corrected_msg_q), ''.join(corrected_msg_c)])
# Ask the user
choice, conv = ask(corrected_msg, conv)
# Check if the input is valid
permitted = [str(x) for x in range(1, len(match)+1)]
permitted.extend(['n', 'no', 'not'])
while choice.lower() not in permitted:
choice, conv = ask('Please choose one of the following: ' +
''.join(corrected_msg_c).lstrip(' (').rstrip(')?'), conv)
# Parse the input
if negative_answer(choice):
corrected_brand = original_brand
else:
corrected_brand = match[int(choice)-1]
# If the nearest match is exactly like the original_brand, then accept that answer
else:
corrected_brand = original_brand
# Return the brand name
return corrected_brand, conv
def get_single_truck(truck_nr: int, truck_brands: List, conv: List) -> Tuple[pd.Series, List]:
"""
Collects all the relevant information about a single truck in the fleet and returns it in pandas Series.
Args:
:param truck_nr: number of the truck in the fleet
:param truck_brands: list of unique truck brands
:param conv: ongoing conversation list
Returns:
:return: pandas Series with truck information, ongoing conversation list
"""
conv = say('\nPlease provide details for vehicle nr. {0}.'.format(truck_nr), conv)
# Get truck name
input_msg = 'Brand: '
criterion = str.isalpha
err_msg = 'Brand name should not contain only letters; please try again: '
brand, conv = check_brand_name(input_msg, criterion, err_msg, truck_brands, conv)
# Get truck model; TODO: ask a domain expert what would be a more general model name pattern
input_msg = 'Model: '
criterion = lambda x: re.match(r'[a-zA-Z]{2} \d+', x) # assume this pattern due to lack of domain expertise
err_msg = 'Model name should have the pattern of two letters followed by space followed by a series of numbers,' \
'e.g. "SC 3200"; please try again: '
model, conv = get_input(input_msg, criterion, err_msg, conv)
# Get truck engine size in cubic centimeters
input_msg = 'Engine size (in cubic centimeters): ' # TODO: would be nice to have unit awareness and conversion
criterion = lambda x: re.match(r'^\d+$', x)
err_msg = 'Engine size should contain only numbers; please try again: '
engine_size, conv = get_input(input_msg, criterion, err_msg, conv)
# Get number of truck axles
input_msg = 'Number of truck axles: '
criterion = lambda x: re.match(r'^\d{1,2}$', x)
err_msg = 'Please enter only a single- or double-digit whole number: '
axle_number, conv = get_input(input_msg, criterion, err_msg, conv)
# Get truck weight in tonnes
input_msg = 'Truck weight in metric tonnes: '
criterion = lambda x: re.match(r'^\d+(\.\d*)?$', x)
err_msg = 'Truck weight should contain only whole or decimal numbers; please try again: '
weight, conv = get_input(input_msg, criterion, err_msg, conv)
# Get maximal load of the truck in tonnes
input_msg = 'Truck maximal load in metric tonnes: '
criterion = lambda x: re.match(r'^\d+(\.\d*)?$', x)
err_msg = 'Maximal load should contain only whole or decimal numbers; please try again: '
max_load, conv = get_input(input_msg, criterion, err_msg, conv)
# Put it all in the Series
truck = pd.Series(data=[brand, model, engine_size, axle_number, weight, max_load],
index=['Brand', 'Model', 'Engine (cc)', 'Axle number', 'Weight (T)', 'Max load (T)'])
# Check if the information is correct
conv = say('Please check if the following information is correct (y/n): ', conv)
for key, value in truck.to_dict().items(): # doing it this way in order to avoid "dtype:object" that pandas prints
conv = say('{0:>15} {1:<10}'.format(key, value), conv)
statement, conv = ask('> ', conv)
if negative_answer(statement): # if the user input was negative
conv = say('No problem, let\'s try again.', conv)
conv = say('Please provide details for vehicle nr. {0}.'.format(truck_nr), conv)
truck = get_single_truck(truck_nr, truck_brands, conv)
# Return the truck information
return truck, conv
def check_fleet(fleet: pd.DataFrame, truck_brands: List, conv: List) -> Tuple[pd.DataFrame, List]:
"""
Takes the collected fleet information and asks a user to verify it. If there is something wrong, it collects again
the correct information for a particular truck/row.
Args:
:param fleet: pandas DataFrame with the information about all the trucks in the fleet
:param truck_brands: list of unique truck brands
:param conv: ongoing conversation list
Returns:
:return: pandas DataFrame with the information about all the trucks in the fleet, and ongoing conversation list
"""
time.sleep(0.5)
conv = say('\nFleet information collected. '
'Please take a look at the table and tell us if everything is correct (y/n).\n', conv)
conv = say(fleet.to_string(), conv)
conv = say('\n', conv)
statement, conv = ask('> ', conv)
if negative_answer(statement): # if user input is negative
truck_nr, conv = get_input('What is the number of truck that contains incorrect information? ',
str.isnumeric,
'Please provide whole numbers only: ',
conv)
truck_nr = int(truck_nr) # we made sure in the step above that this is going to be valid
truck, conv = get_single_truck(truck_nr, truck_brands, conv)
fleet.loc[truck_nr] = truck
# Check it again
fleet, conv = check_fleet(fleet, truck_brands, conv)
# Return if everything is alright
return fleet, conv
def get_fleet(truck_brands: List, conv: List) -> Tuple[pd.DataFrame, List]:
"""
This is the main function for obtaining the fleet info. It collects all the user information and data, verifies it,
and constructs a pandas DataFrame of the fleet where each row represents a single truck, and each column one
property of that truck
Args:
:param truck_brands: list of unique truck brands
:param conv: ongoing conversation list
Returns:
:return: fleet information as a pandas DataFrame, and ongoing conversation list
"""
# Initialize the fleet table
fleet = pd.DataFrame(data=None,
columns=['Brand', 'Model', 'Engine (cc)', 'Axle number', 'Weight (T)', 'Max load (T)'])
# Get the number of trucks in the fleet
total_trucks, conv = get_input('How many vehicles are there in this fleet? ',
str.isnumeric,
'Please provide whole numbers only: ',
conv)
total_trucks = int(total_trucks)
# Collect the fleet data
conv = say('\nWe will now collect your fleet information.', conv)
time.sleep(0.5)
for truck_nr in range(1, total_trucks+1):
truck, conv = get_single_truck(truck_nr, truck_brands, conv) # get the properties of this truck
fleet = fleet.append(truck, ignore_index=True) # write it into the fleet table
# Set the fleet index to start from 1, so that it's easier for customers to query it
fleet.index += 1
fleet.index.name = 'Truck nr.'
# Check if the fleet information is correct; if not, re-do the offending rows
fleet, conv = check_fleet(fleet, truck_brands, conv)
return fleet, conv
# ----------------------------------------------------------------------------------------------------------------------
# MAIN function
# ----------------------------------------------------------------------------------------------------------------------
def main(data_folder: str, brands_file: str, fleet_name_suf: str, conv_name_suf: str, date_now: str):
"""
Perform the conversation with the customer, collect the data, write it into a csv file, and save the entire
dialogue in a txt file.
Args:
:param data_folder: path to the folder where the data (conversation, fleet) is saved
:param brands_file: path to the csv file containing truck brand data
:param fleet_name_suf: suffix for creating the fleet file name
:param conv_name_suf: suffix for creating the conversation file name
:param date_now: today's date
"""
global fleet_path, conv_path
conv = [] # main conversation list, where the entire dialogue will be stored
# Check if the list of truck brands exists in the main folder
if not os.path.isfile(brands_file):
get_truck_brand_names.truck_brands_table(brands_file)
# Load truck brands into a list
_, brands = load_brands(brands_file) # we only need the list of brands for now
# Start the conversation
conv = say('Hello, I am here to help you organize your fleet.', conv)
conv = say('If at any point you want to quit from the program, just type "q" or "quit".', conv)
time.sleep(0.5)
conv = say('Let us first collect basic information.', conv)
time.sleep(0.5)
# Get basic information and construct the base of the file name used for saving the data
fleet_path, conv_path, conv = get_basic_info(data_folder, fleet_name_suf, conv_name_suf, date_now, conv)
# Obtain the complete fleet information
fleet, conv = get_fleet(brands, conv)
# Say goodbye
conv = say('Thank you, and have a nice day!', conv)
# Save the data
save_fleet(fleet)
save_conv(conv)
# ----------------------------------------------------------------------------------------------------------------------
# MAIN function
# ----------------------------------------------------------------------------------------------------------------------
if __name__ == "__main__":
# Initialize some variables
data_path = './data' # path to the data folder
brands_path = './truck_brands.csv' # path to the csv file containing truck brand data
fleet_suffix = 'fleetdata.csv' # suffix for creating the fleet file name
conv_suffix = 'conversation.txt' # suffix for creating the conversation file name
# Initialize random user, for bookkeeping purposes
fleet_path, conv_path, today = generate_random_user(fleet_suffix, conv_suffix)
main(data_path, brands_path, fleet_suffix, conv_suffix, today)