-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpre_text.py
More file actions
124 lines (94 loc) · 3.8 KB
/
pre_text.py
File metadata and controls
124 lines (94 loc) · 3.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import nltk
from nltk.corpus import words, stopwords
from nltk.stem import PorterStemmer
english_words = words.words()
import string
import re
import random
from chat_tools.chat import chat as ch
chat = ch
from chat_tools.mappings import mappings as typicals
from chat_tools.responses import responses as rs
responses = rs
# Map chatting words to english
def chat_to_english(text):
"""
Description:
This function maps chatting words or abbreviations to their corresponding English equivalents. It takes a text string as input and processes it by tokenizing the text into individual words. It then checks each word against a predefined dictionary of chatting words and replaces them with their English counterparts if a match is found. Finally, the function reassembles the modified tokens into a single string and returns it.
Parameters:
text: A string representing the input text to be processed and mapped to English.
Returns:
A string containing the modified text with chatting words replaced by their English equivalents.
Example Usage:
text = "omg ikr, that's so cool!"
mapped_text = chat_to_english(text)
print(mapped_text)
Output: "oh my god I know, that is so cool!"
Note:
The function relies on a dictionary called 'chat' that maps chatting words to their English counterparts. The 'chat' dictionary should be defined and populated prior to using this function.
"""
if('.' in text):
text = ''.join(text.split('.')) # 'id.k...' ----> idk
tokens = nltk.word_tokenize(text.lower())
for x in range(0, len(tokens)):
if tokens[x] in chat:
tokens[x] = chat[tokens[x]]
return ' '.join([token for token in tokens])
def sanitize_input(user_input):
user_input = user_input.strip()
user_input = user_input.lower()
user_input = chat_to_english(user_input)
return user_input
def is_random(user_input):
# Remove Punctuations
user_input = user_input.translate(str.maketrans('', '', string.punctuation))
# Search of spaces inside the input. i.e., determine if it's single/multiple word
if not re.search(r"[A-z]\s[A-z]", user_input, re.IGNORECASE):
if(user_input in english_words or user_input.capitalize() in english_words):
return False
return True
stemmer = PorterStemmer()
user_input_tokens = nltk.word_tokenize(user_input)
stemmed_user_input_tokens = []
for token in user_input_tokens:
stemmed_user_input_tokens.append(stemmer.stem(token))
score = 0
stemmed_score = 0
for token in user_input_tokens:
if token in english_words or token.capitalize() in english_words:
score += 1
else:
score -= 1
for token in stemmed_user_input_tokens:
if token in english_words or token.capitalize() in english_words:
stemmed_score += 1
else:
stemmed_score -=1
if(stemmed_score > score):
score = stemmed_score
if round((score / len(user_input_tokens)) * 100) >= 50:
return False
return True
def is_empty(user_input):
if user_input == "":
return True
elif user_input.isspace():
return True
else:
return False
def noisy_input():
return random.choice(responses['bot-definition'])
def remove_punc(_string):
new_string = _string.translate(str.maketrans('', '', string.punctuation))
return new_string.strip()
def is_typical(user_input):
user_input = remove_punc(user_input)
for key in typicals.keys():
if(user_input in key.split(',')):
return True
return False
def get_typical_response(user_input):
user_input = remove_punc(user_input)
for question, response in typicals.items():
if user_input in question.split(','):
return response