-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
94 lines (51 loc) · 2.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 12 09:54:09 2024
@author: jonha
Main File
"""
import helpers as h
import pandas as pd
from datetime import datetime
## directories
#input_dir = 'C:\\TestCode\\csat\\sampleData\\'
#input_image_dir = 'C:\\TestCode\\csat\\images\\'
#output_dir = 'C:\\TestCode\\csat\\outputData\\'
input_dir = '/workspaces/text_mining/sampleData/'
input_image_dir = '/workspaces/text_mining/images/'
output_dir = '/workspaces/text_mining/outputData/'
# inputs
input_excel = 'sampleCustomerChatData.xlsx'
response_column_name = 'chat_survey_response'
# outputs
output_word_count_sentiment = 'word_counts_sentiment.xlsx'
output_biggram_count_sentiment = 'biggram_counts_sentiment.xlsx'
output_response_sentiment = 'response_sentiment.xlsx'
output_wordcloud = 'wordCloud.png'
# Dataframe
chat_data = pd.read_excel(input_dir + input_excel)
chat_data = chat_data.dropna(subset=['chat_survey_response'])
# Uncomment the functions you want to run below. Comment out ones you do not want to run.
#####
# run this to get a count of all words and their sentiment score
word_counts = h.single_word_count(chat_data, response_column_name, output_dir + output_word_count_sentiment )
# run this for biggram text analysis set the number to the return the desired length of the biggram
#biggram_counts = h.biggrams(chat_data, response_column_name, 3, output_dir + output_biggram_count_sentiment)
# run this for a complete response analysis to determine if the response is positive or negative This one takes a LONG LONG (hours depending on df size) time
#response_rating = h.process_chat_data(chat_data, output_dir + output_response_sentiment)
# The following functions will create wordclouds
# https://matplotlib.org/stable/users/explain/colors/colormaps.html - To view colormap options
# Must run word_counts funtion above for these to run
# Wordcloud variables adjust as needed
max_words = 1000
background_color = "#363838" # #363838 - dark grey background
color_map = "" # will use default if left blank
sentiment = "" # leave blank for all words, negative for negative words, positive for positive words
output = output_dir + output_wordcloud
mask_image = input_image_dir + 'cash_app.jpg'
# Simple wordcloud
#h.basic_word_cloud(word_counts, sentiment , output , background_color , color_map, max_words )
# Custom Shape wordcloud
#h.custom_word_cloud_image(word_counts, sentiment , output , background_color , color_map, max_words, mask_image )
# Custom Shape wordcloud and color based of image color(s)
#h.custom_word_cloud_image_color(word_counts, sentiment , output , background_color , max_words, mask_image )