From eca17844584189740ee9a3eeeab8eabad84b263b Mon Sep 17 00:00:00 2001 From: Caiwen Li <130128348+LisaLi525@users.noreply.github.com> Date: Wed, 13 Dec 2023 22:03:58 +0200 Subject: [PATCH] Create Customer Segments Analysis.R --- Customer Segments Analysis.R | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 Customer Segments Analysis.R diff --git a/Customer Segments Analysis.R b/Customer Segments Analysis.R new file mode 100644 index 0000000..9718b0d --- /dev/null +++ b/Customer Segments Analysis.R @@ -0,0 +1,36 @@ +analyze_customer_segments <- function(transaction_summary, gabe_list, start_date, end_date) { + # Filtering transactions in the provided date range and specific stores + active_customers_query <- transaction_summary %>% + filter(trandate >= start_date, trandate < end_date, storeid %in% gabe_list) %>% + select(tranid, gtin, trandate, storeid, customerid, totdiscountamount, totitemsaleprice) %>% + distinct() %>% + group_by(customerid) %>% + summarise(ntran = n_distinct(tranid), + nunit = n_distinct(gtin), + nstore = n_distinct(storeid), + spend = sum(totitemsaleprice), + discount = sum(totdiscountamount)) %>% + filter(customerid != "" & spend > 0) %>% + mutate(sp_tran_avg = spend / ntran, + sp_unit_avg = spend / nunit, + unit_avg = nunit / ntran) %>% + ungroup() + + # Collecting the results + active_customers <- active_customers_query %>% + collect() + + # Filtering invalid customer IDs (e.g., less than 8 characters) + active_customers_f17 <- active_customers %>% + mutate(str_count = nchar(trimws(customerid))) %>% + filter(str_count >= 8) + + # Classifying customers into segments + active_customers_f17 <- active_customers_f17 %>% + mutate(buckets = ifelse(ntran == 1, "one_and_done", + ifelse(ntran > 1 & ntran < 4 & sp_tran_avg < 44, "repeat_customers", + ifelse(ntran > 1 & ntran < 4 & sp_tran_avg > 44, "repeat_highvalue_customers", + ifelse(ntran >= 4 & sp_tran_avg < 44, "loyal_customers", "loyal_highvalue_customers"))))) + + return(active_customers_f17) +}