From 4978c1090ae1ae6c7049cab2b1a4a0922d276075 Mon Sep 17 00:00:00 2001
From: Oden <hello@odensc.com>
Date: Tue, 31 Oct 2023 15:07:23 -0700
Subject: [PATCH] Add article script from Matt

---
 ...graffiti-dashboard-build.yml => build.yml} |   2 +-
 .../{graffiti-dashboard-cron.yml => cron.yml} |   9 +-
 .../all_tickets/ticket_extract_transform.r    | 690 ------------------
 scripts/articles/articles.R                   |  50 ++
 4 files changed, 58 insertions(+), 693 deletions(-)
 rename .github/workflows/{graffiti-dashboard-build.yml => build.yml} (95%)
 rename .github/workflows/{graffiti-dashboard-cron.yml => cron.yml} (73%)
 delete mode 100644 scripts/all_tickets/ticket_extract_transform.r
 create mode 100644 scripts/articles/articles.R

diff --git a/.github/workflows/graffiti-dashboard-build.yml b/.github/workflows/build.yml
similarity index 95%
rename from .github/workflows/graffiti-dashboard-build.yml
rename to .github/workflows/build.yml
index cfa9763..851495b 100644
--- a/.github/workflows/graffiti-dashboard-build.yml
+++ b/.github/workflows/build.yml
@@ -1,4 +1,4 @@
-name: Graffiti Dashboard (build)
+name: Build
 
 on:
   push:
diff --git a/.github/workflows/graffiti-dashboard-cron.yml b/.github/workflows/cron.yml
similarity index 73%
rename from .github/workflows/graffiti-dashboard-cron.yml
rename to .github/workflows/cron.yml
index b5048c1..15a63fc 100644
--- a/.github/workflows/graffiti-dashboard-cron.yml
+++ b/.github/workflows/cron.yml
@@ -1,4 +1,4 @@
-name: Graffiti Dashboard (cron)
+name: Cron
 
 on:
   # Run daily at 8am
@@ -15,7 +15,12 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
-      - name: Run script
+      - name: Run articles
+        env:
+          ZENDESK_API_EMAIL: ${{ secrets.ZENDESK_API_EMAIL }}
+          ZENDESK_API_TOKEN: ${{ secrets.ZENDESK_API_TOKEN }}
+        run: Rscript /app/scripts/articles/articles.R
+      - name: Run graffiti-dashboard
         env:
           ZENDESK_API_EMAIL: ${{ secrets.ZENDESK_API_EMAIL }}
           ZENDESK_API_TOKEN: ${{ secrets.ZENDESK_API_TOKEN }}
diff --git a/scripts/all_tickets/ticket_extract_transform.r b/scripts/all_tickets/ticket_extract_transform.r
deleted file mode 100644
index 1a94ab4..0000000
--- a/scripts/all_tickets/ticket_extract_transform.r
+++ /dev/null
@@ -1,690 +0,0 @@
-#### LIBRARIES ####
-library(devtools)
-library(tidyverse)
-library(janitor)
-library(lubridate)
-library(readxl)
-library(dplyr)
-library(httr)
-library(jsonlite)
-library(data.table)
-
-#### API authentication and base ####
-token <- Sys.getenv("ZENDESK_API_TOKEN")
-email <- Sys.getenv("ZENDESK_API_EMAIL")
-
-domain <- "https://portlandoregon.zendesk.com/api/v2/"
-
-####===== Pull Zendesk Article Metadata =====#####
-#set request URL
-path <- "help_center/articles"
-request_url <- paste0(domain,path)
-
-# Call Zendesk Articles API to pull metadata on each article
-i <- 1
-apidata <- list()
-next_url <- ""
-
-while(!is.null(next_url)) {
-call <- GET(paste0(request_url,"page=",i),
-            authenticate(email,token,type="basic"))
-raw_content <- rawToChar(call$content)
-api_response <- fromJSON(raw_content,flatten=TRUE)
-next_url <- api_response$next_page  
-temp <- api_response$articles
-apidata <- bind_rows(apidata,temp)
-i <- i+1
-}
-
-#replace nested lists with character strings
-metadata <- apidata %>% mutate(across(where(is.list),as.character))
-
-#remove unused dataframes and fields. Clean unused data
-remove(
-  call
-  , api_response
-  , apidata
-  , temp
-  , i
-  , next_url
-  , temp
-  , raw_content
-)
-
-gc(reset = TRUE)
-
-
-
-#####===== Get Zendesk Groups data =====#####
-path <- "groups"
-request_url <- paste0(domain,path)
-
-#Initial API call
-call <- GET(URLencode(request_url), 
-            authenticate(email,token,type="basic"))
-raw_content <- rawToChar(call$content)
-api_response <- fromJSON(raw_content,flatten=TRUE)
-groups <- api_response$groups
-
-#remove unused dataframes and fields. Clean unused data
-remove(
-  call
-  , api_response
-  , raw_content
-)
-
-gc(reset = TRUE)
-
-
-
-#####===== Get Zendesk Ticket Forms data =====#####
-path <- "ticket_forms"
-request_url <- paste0(domain,path)
-
-#Initial API call
-call <- GET(URLencode(request_url), 
-            authenticate(email,token,type="basic"))
-raw_content <- rawToChar(call$content)
-api_response <- fromJSON(raw_content,flatten=TRUE)
-ticket_forms <- api_response$ticket_forms
-
-#remove unused dataframes and fields. Clean unused data
-remove(
-  call
-  , api_response
-  , raw_content
-)
-
-gc(reset = TRUE)
-
-
-####===== Get Zendesk Article Categories data =====#####
-#api authentication and base
-base_url <- "https://portlandoregon.zendesk.com/api/v2/help_center/categories.json"
-call <- GET(paste0(base_url), 
-            authenticate(email,token,type="basic"))
-raw_content <- rawToChar(call$content)
-api_response <- fromJSON(raw_content,flatten=TRUE)
-categories <- api_response$categories
-
-#write to csv
-write.csv(categories, paste0(directory,"/article_categories.csv"))
-
-#remove unused dataframes and fields. Clean unused data
-remove(
-  call
-  , api_response
-  , raw_content
-  , base_url
-)
-
-gc(reset = TRUE)
-
-
-
-####===== Get Zendesk Article Sections data =====#####
-#api authentication and base
-base_url <- "https://portlandoregon.zendesk.com/api/v2/help_center/sections.json"
-call <- GET(paste0(base_url), 
-            authenticate(email,token,type="basic"))
-raw_content <- rawToChar(call$content)
-api_response <- fromJSON(raw_content,flatten=TRUE)
-
-# Loop to pull all sections
-i <- 1
-apidata <- api_response$sections
-next_url <- api_response$next_page
-
-#While loop to move through API response pages and union all data together
-while(!is.null(next_url)) {
-  call <- GET(next_url, 
-              authenticate(email,token,type="basic"))
-  raw_content <- rawToChar(call$content)
-  api_response <- fromJSON(raw_content,flatten=TRUE)
-  next_url <- api_response$next_page
-  temp <- api_response$sections
-  apidata <- bind_rows(apidata,temp)
-}
-
-#set up a dataframe for each layer in the article section hierarchy
-sec_all <- apidata %>% 
-  select(parent_section_id,id,url,html_url,category_id,created_at,updated_at,name,description) %>% 
-  rename("section_id"="id")
-
-#remove unused dataframes and fields. Clean unused data
-remove(
-  call
-  , api_response
-  , raw_content
-  , base_url
-  , apidata
-  , temp
-  , next_url
-  , i
-)
-
-gc(reset = TRUE)
-
-
-####===== Get ticket events =====####
-#May be able to use incremental metric events API instead
-#Note to self - do we still need to use the audits API if we do this?
-load(file = "next_url.Rdata")
-load(file = "ticket_events.Rdata")
-
-# base_url <- "https://portlandoregon.zendesk.com/api/v2/incremental/ticket_events?start_time=1640995200"
-
-#start_time <- "1669881600" #2022-12-01 00:00:00
-#start_time <- "1640995200" #2022-01-01 00:00:00
-
-#request_url <- paste0(base_url,start_time,".json")
-#request_url <- next_url
-request_url <- base_url
-
-#make initial call to the audits API
-# call <- GET(request_url, 
-#             authenticate(email,token,type="basic"))
-# raw_content <- rawToChar(call$content)
-# api_response <- fromJSON(raw_content,flatten=TRUE)
-end_of_stream <- ""
-apidata <- ticket_events
-# next_url <- api_response$next_page
-# apidata <- api_response$ticket_events
-
-while(!is.null(end_of_stream)) {
-  call <- GET(next_url, 
-              authenticate(email,token,type="basic"))
-  raw_content <- rawToChar(call$content)
-  api_response <- fromJSON(raw_content,flatten=TRUE)
-  next_url <- api_response$next_page
-  temp <- api_response$ticket_events
-  apidata <- bind_rows(apidata,temp)
-  end_of_stream <- api_response$end_of_stream
-  Sys.sleep(10)
-}
-
-incremental_error_check <- api_response
-
-save(apidata, file = "ticket_events.Rdata")
-save(next_url, file = "next_url.Rdata")
-
-# unlist nested list with id
-apidata.events <- rbindlist(apidata$child_events,use.names=T,fill=T,idcol=T)
-#apidata.events <- subset(apidata.events, select = -c(1))
-#apidata.events <- filter(apidata.events,type=="KnowledgeLinked" | field_name=="status")
-
-
-# create same id in remaining data frame
-apidata$.id <- seq.int(nrow(apidata))
-
-# join data frame with unlisted list
-ticket_events <- left_join(apidata, apidata.events, by = c(".id"=".id"))
-ticket_events <- filter(ticket_events,!is.na(status))
-
-
-
-####===== Get  knowledge base links data =====####
-#Call ticket audit API to pull data on when someone uses knowledge links
-base_url <- "https://portlandoregon.zendesk.com/api/v2/ticket_audits.json"
-#start_date <- "&[start_date]=2022-12-01"
-#end_date <- "&[start_date]=2022-12-31"
-#request_url <- paste0(base_url,start_date,end_date,".json")
-request_url <- base_url
-
-#make initial call to the audits API
-call <- GET(request_url, 
-            authenticate(email,token,type="basic"))
-raw_content <- rawToChar(call$content)
-api_response <- fromJSON(raw_content,flatten=TRUE)
-next_url <- api_response$before_url
-
-# initialize while loop fields
-i <- 1
-apidata <- api_response$audits
-
-#While loop to move through API response pages and union all data together
-#Would like to set this up to approximate incremental updates so it doesn't take so long
-while(!is.null(next_url)) {
-  call <- GET(next_url, 
-              authenticate(email,token,type="basic"))
-  raw_content <- rawToChar(call$content)
-  api_response <- fromJSON(raw_content,flatten=TRUE)
-  next_url <- api_response$before_url
-  temp <- api_response$audits
-  apidata <- bind_rows(apidata,temp)
-}
-
-#Removed unneeded fields
-apidata <- apidata %>% select(
-  -"metadata.suspension_type_id"
-  , -"metadata.flags",-"metadata.trusted"
-  , -"metadata.notifications_suppressed_for"
-  , -"metadata.system.location"
-  , -"metadata.system.latitude"
-  , -"metadata.system.longitude"
-  , -"metadata.system.client"
-  , -"metadata.system.ip_address"
-  , -"metadata.system.message_id"
-  , -"metadata.system.email_id"
-  , -"metadata.system.raw_email_identifier"
-  , -"metadata.system.json_email_identifier"
-  , -"metadata.system.eml_redacted"
-  , -"metadata.system.machine_generated"
-  , -"metadata.flags_options.25.trusted"
-  , -"metadata.flags_options.2.trusted"
-  , -"metadata.flags_options.15.trusted"
-  , -"metadata.flags_options.9.trusted"
-  , -"metadata.flags_options.4.trusted"
-  , -"metadata.flags_options.21.trusted"
-  , -"metadata.flags_options.11.trusted"
-  , -"metadata.flags_options.11.message.user"
-  , -"metadata.flags_options.11.message.user_id"
-)
-
-# unlist nested list with id
-apidata.events <- apidata %>% slice(1:10000)
-apidata.events <- rbindlist(apidata.events$events,fill=T)
-apidata.events <- filter(apidata.events,type=="KnowledgeLinked" | field_name=="status")
-x <- 10001
-y <- 20000
-
-#getting a stack overflow error when I try to bind all at once, so I'm doing it bit by bit
-while(y<=nrow(apidata)+10000) {
-  temp <- apidata %>% slice(x:y)
-  temp <- rbindlist(temp$events,fill=T)
-  temp <- filter(temp,type=="KnowledgeLinked" | field_name=="status")
-  apidata.events <- bind_rows(apidata.events,temp)
-  x <- x+10000
-  y <- y+10000
-  }
-
-# apidata$id <- format(apidata$id, scientific = FALSE)
-# apidata.events$id <- format(apidata.events$id, scientific = TRUE)
-# apidata.events$audit_id <- format(apidata.events$audit_id, scientific = TRUE)
-# apidata.events$audit_id
-
-apidata.events$audit_id <- as.character(apidata.events$audit_id)
-
-
-# create same id in remaining data frame
-#apidata$id1 <- seq.int(nrow(apidata))
-
-# join data frame with unlisted list
-article_links <- left_join(apidata, apidata.events, by = c("id" = "audit_id"))
-
-#remove unused dataframes and fields. Clean unused data
-remove(
-  call
-  , api_response
-  , raw_content
-  , base_url
-  , apidata
-  , temp
-  , next_url
-  , i
-  , x
-  , y
-  , apidata.events
-)
-
-gc(reset = TRUE)
-
-# remove all remaining lists in the data
-article_links <- article_links %>% mutate(across(where(is.list),as.character))
-article_links <- article_links[, sapply(article_links, class) != "list"]
-
-
-
-####===== Get  ticket field lookup table =====####
-base_url <- "https://portlandoregon.zendesk.com/api/v2/ticket_fields"
-
-#Initial API call
-call <- GET(URLencode(base_url), 
-            authenticate(email,token,type="basic"))
-raw_content <- rawToChar(call$content)
-api_response <- fromJSON(raw_content,flatten=TRUE)
-ticket_fields <- api_response$ticket_fields
-
-#remove unused dataframes and fields. Clean unused data
-remove(
-  call
-  , api_response
-  , raw_content
-  , base_url
-)
-
-
-
-####===== Get  tickets data =====####
-#NEED TO UNNEST THE 311 ACTIONS TAKEN TO ALLOW FOR BETTER HANDLING OF MULTIPLE ENTRIES
-#ALSO NEED INCREMENTAL REFRESH OF THIS DATA TO SPEED THIS UP
-base_url <- "https://portlandoregon.zendesk.com/api/v2/search/export.json?"
-group <- "-group:4549352062487"
-ery <- "query="
-type <- "&filter[type]=ticket"
-request_url <- paste0(base_url,query,group,type)
-
-#Initial API call
-call <- GET(URLencode(request_url), 
-            authenticate(email,token,type="basic"))
-raw_content <- rawToChar(call$content)
-api_response <- fromJSON(raw_content,flatten=TRUE)
-next_url <- api_response$links$`next`
-
-# Initialize fields for a while loop to request all ticket records from the Zendesk API
-i <- 1
-apidata <- api_response$results
-has_more <- TRUE
-
-# While loop to request all ticket records from the Zendesk API
-while(has_more==TRUE) {
-  call <- GET(next_url, 
-              authenticate(email,token,type="basic"))
-  raw_content <- rawToChar(call$content)
-  api_response <- fromJSON(raw_content,flatten=TRUE)
-  has_more <- api_response$meta$has_more
-  next_url <- api_response$links$`next`
-  temp <- api_response$results
-  apidata <- bind_rows(apidata,temp)
-}
-
-# unlist nested list with id
-apidata.fields <- rbindlist(apidata$custom_fields,fill=T,idcol="id1")
-apidata.fields <- apidata.fields[!is.na(apidata.fields$value),]
-
-# create same id in remaining data frame
-apidata$id1 <- seq.int(nrow(apidata))
-
-# join data frame with unlisted list
-tickets <- left_join(apidata, apidata.fields, by = "id1")
-
-#remove unused dataframes and fields. Clean unused data
-remove(
-  call
-  , api_response
-  , raw_content
-  , base_url
-  , apidata
-  , temp
-  , next_url
-  , i
-  , has_more
-  , group
-  , request_url
-  , apidata.fields
-)
-
-gc(reset = TRUE)
-
-#Add ticket field definitions to tickets data
-ticket_fields <- ticket_fields %>% rename("field_id" = "id")
-tickets <- tickets %>% rename("field_id" = "id.y")
-tickets_joined <- left_join(tickets, ticket_fields, by = "field_id")
-
-gc(reset = TRUE)
-
-#pivot fields to make dataset wide - avoid having too many rows with repeated data to make the size a little more manageable.
-ticket_fieldvals <- tickets_joined %>%
-  pivot_wider(id_cols = "id.x", names_from="title")
-
-tickets <- tickets_joined %>%
-  select(
-        "id.x"
-        , "status"
-        , "requester_id"
-        , "submitter_id"
-        , "assignee_id"
-        , "organization_id"
-        , "group_id"
-        , "via.channel"
-        , "ticket_form_id"
-        , "created_at.x"
-      ) %>%
-      rename(
-        "ticket_id" = "id.x"
-      )
-    
-tickets <- distinct(tickets)
-
-# join data frame with ticket fields. Definitely possible to do this more efficiently
-tickets <- inner_join(tickets, ticket_fieldvals, by = c("ticket_id" = "id.x"))
-
-# convert lists into character strings.
-# TO DO: MAKE SEPARATE COLUMNS FOR EACH RESOLUTION TYPE SO WE CAN HAVE MULTIPLE
-tickets <- tickets %>% mutate(across(where(is.list),as.character))
-
-#remove unused dataframes and fields. Clean unused data
-remove(
-  tickets_joined
-  , ticket_fields
-  , ticket_fieldvals
-)
-
-gc(reset = TRUE)
-
-
-
-####===== Join categories and sections to ticket data =====####
-#Repeatedly join section dataframe to itself to build article hierarchy
-sec_joined <- left_join(sec_all, sec_all, by = c("parent_section_id" = "section_id"), suffix = c("",".1"))
-sec_joined <- left_join(sec_joined, sec_joined, by = c("parent_section_id.1" = "section_id"), suffix = c("",".2"))
-sec_joined <- left_join(sec_joined, sec_joined, by = c("parent_section_id.2" = "section_id"), suffix = c("",".3"))
-
-#Because the number of section layers varies by article, we want to set a single section to represent the top-level section 
-sec_joined$sec_a.id <- ifelse(is.na(sec_joined$parent_section_id.2),sec_joined$parent_section_id.1,sec_joined$parent_section_id.2)
-sec_joined$sec_a.id <- ifelse(is.na(sec_joined$sec_a.id),sec_joined$parent_section_id,sec_joined$sec_a.id)
-sec_joined$sec_a.id <- ifelse(is.na(sec_joined$sec_a.id),sec_joined$section_id,sec_joined$sec_a.id)
-
-#Join to bring in section names
-sec_joined <- left_join(sec_joined, sec_all, by = c("sec_a.id" = "section_id"), suffix = c("",".sec_a"))
-
-
-#Repeat pattern for layer b - if top level parent id = Section A ID then we know that B uses the parent id at the next-lowest level, and so on
-sec_joined$sec_b.id <- ifelse(sec_joined$parent_section_id.2==sec_joined$sec_a.id,sec_joined$parent_section_id.1,NA)
-sec_joined$sec_b.id <- ifelse(sec_joined$parent_section_id.1==sec_joined$sec_a.id,sec_joined$parent_section_id,sec_joined$sec_b.id)
-sec_joined$sec_b.id <- ifelse(sec_joined$parent_section_id==sec_joined$sec_a.id,sec_joined$section_id,sec_joined$sec_b.id)
-
-sec_joined <- left_join(sec_joined, sec_all, by = c("sec_b.id" = "section_id"), suffix = c("",".sec_b"))
-
-
-#Repeat pattern for layer c
-sec_joined$sec_c.id <- ifelse(sec_joined$parent_section_id.1==sec_joined$sec_b.id,sec_joined$parent_section_id,NA)
-sec_joined$sec_c.id <- ifelse(sec_joined$parent_section_id==sec_joined$sec_b.id,sec_joined$section_id,sec_joined$sec_c.id)
-
-sec_joined <- left_join(sec_joined, sec_all, by = c("sec_c.id" = "section_id"), suffix = c("",".sec_c"))
-
-
-#Repeat pattern for layer d
-sec_joined$sec_d.id <- ifelse(sec_joined$parent_section_id==sec_joined$sec_c.id,sec_joined$section_id,NA)
-
-sec_joined <- left_join(sec_joined, sec_all, by = c("sec_d.id" = "section_id"), suffix = c("",".sec_d"))
-
-
-#Select only the relevant fields to complete section lookup table
-sec_lookup <- sec_joined %>% select(
-  "section_id"
-  , "category_id"
-  , "name"
-  , "description"
-  , "sec_a.id"
-  , "name.sec_a"
-  , "sec_b.id"
-  , "name.sec_b"
-  , "sec_c.id"
-  , "name.sec_c"
-  , "sec_d.id"
-  , "name.sec_d"
-  )
-
-write.csv(sec_lookup, paste0(directory,"/section_lookup.csv"))
-
-####===== Create single dataframe with tickets, categories, sections, and articles =====#####
-#create single dataframe with all relevant category, section, and article information
-article_meta <- left_join(metadata, sec_lookup, by = c("section_id" = "section_id"))
-article_meta <- left_join(article_meta, categories, by = c("category_id" = "id"))
-article_meta <- select(article_meta,-"body")
-
-write.csv(article_meta, paste0(directory,"/article_metadata.csv"))
-
-#Break up article_links into knowledge capture and solved events
-knowledge_capture <- filter(article_links, type == "KnowledgeLinked") 
-
-## TO DO: Why am I only getting created_at dates in January 2023
-#solve_date <- subset(article_links, (value == "solved" | value == "closed")) 
-solve_date <- filter(ticket_events, (status == "solved"))
-create_date <- filter(ticket_events, (event_type.y == "Create"))
-open_date <- filter(ticket_events, (status == "open"))
-pending_date <- filter(ticket_events, (status == "pending"))
-
-#unique(apidata.events$status)
-
-#collapse event dates to a single row
-solve_date <- solve_date %>% group_by(ticket_id) %>% summarize(solve_date = min(timestamp))
-create_date <- create_date %>% group_by(ticket_id) %>% summarize(create_date = min(timestamp))
-open_date <- open_date %>% group_by(ticket_id) %>% summarize(open_date = min(timestamp))
-pending_date <- pending_date %>% group_by(ticket_id) %>% summarize(pending_date = min(timestamp))
-
-knowledge_capture_small <- knowledge_capture %>% 
-  select(-events
-         ,-author_id.x
-         ,-via.source.rel.x
-         ,-via.source.from.address
-         ,-via.source.from.name
-         ,-via.source.from.original_recipients
-         ,-via.source.from.deleted.x
-         ,-via.source.from.deleted.y
-         ,-via.source.from.title.x
-         ,-via.source.from.title.y
-         ,-via.source.from.id.x
-         ,-via.source.from.ticket_id
-         ,-via.source.from.subject
-         ,-via.source.from.channel
-         ,-body.html_url
-         ,-body.url
-         )
-
-gc(reset=TRUE)
-
-tickets_all <- left_join(tickets, knowledge_capture_small, by = "ticket_id")
-
-#Join tickets with article_links data to get all knowledge capture events
-tickets_all <- left_join(tickets_all, solve_date, by = "ticket_id")
-tickets_all <- left_join(tickets_all, create_date, by = "ticket_id")
-tickets_all <- left_join(tickets_all, open_date, by = "ticket_id")
-tickets_all <- left_join(tickets_all, pending_date, by = "ticket_id")
-
-#Join with article metadata
-tickets_all <- tickets_all %>% rename("article_id" = "body.id")
-#metadata <- metadata %>% rename("article_id" = "id")
-#tickets_full <- left_join(tickets_full, metadata, by = "article_id")
-
-#tickets_full <- left_join(tickets_full, sec_lookup, by = "section_id")
-tickets_all <- full_join(tickets_all, article_meta, by = c("article_id" = "id"))
-tickets_all <- full_join(tickets_all, groups, by = c("group_id" = "id"))
-tickets_all <- full_join(tickets_all, ticket_forms, by = c("ticket_form_id" = "id"))
-
-length(unique(tickets_all$id.y.y))
-
-#select only the fields that we need. Can expand this.
-ticket_data <- tickets_all %>% 
-  select(
-  "ticket_id"
-  , "status"
-  , "via.channel"
-  # , "Asset ID"
-  # , "Company"
-  # , "Repeat offender"
-  # , "Confidentiality Waiver"
-  # , "Waiver URL"
-  # , "Waiver Received Date"
-    , "PBOT Signs and Streetlights Asset"
-  # , "Hate speech or gang related"
-    , "Related ticket ID"
-  # , "Vehicle License Plate"
-  , "Webform ID"
-  , "Location Lon"
-  # , "Confidentiality Opt-In"
-  # , "Resolution URL"
-  # , "OK for public"
-  , "Refer to Parks"
-  # , "Graffiti abatement resolution"
-  # , "Letter Generation URL"
-  , "Location Lat"
-  , "Location Address"
-  , "Service Level Expectation"
-  # , "Reported Issue"
-  , "Contact Type"
-  , "If transferred/forwarded"
-  , "Graffiti Status"
-  # , "Mural"
-  # , "Vehicle Make and Model"
-  , "Refer to PBOT"
-  , "Graffiti contractor"
-  # , "Public Description"
-  # , "Waiver received"
-  # , "Vehicle Present"
-  # , "Location Notes"
-  # , "Attachments"
-  , "311 Action Taken"
-  , "Square footage"
-  , "Property Type"
-  # , "Above ground floor"
-  # , "Included for KB"
-  , "created_at.x"
-  , "created_at.x.x"
-  , "id.y"
-  , "article_id"
-  , "name.x"
-  #, "body.url"
-  , "solve_date"
-  , "description.x"
-  , "sec_a.id"
-  , "sec_b.id"
-  , "sec_c.id"
-  , "sec_d.id"
-  , "name.sec_a"
-  , "name.sec_b"
-  , "name.sec_c"
-  , "name.sec_d"
-  , "name.x.x"
-  , "name.y.y"
-  , "name"
-  , "end_user_visible"
-  , "create_date"
-  , "open_date"
-  , "solve_date"
-  , "pending_date"
-
-) %>%
-  rename("ticket_created_at" = "created_at.x.x"
-         , "event_created_at" = "created_at.x"
-         , "event_id" = "id.y"
-         , "description" = "description.x"
-         , "name.article" = "name.x"
-         , "name.category" = "name.x.x"
-         , "name.group" = "name.y.y"
-         , "name.form" = "name"
-         , "form_end_user_visible" = "end_user_visible"
-         )
-
-remove(apidata.events)
-remove(knowledge_capture)
-
-gc(reset = TRUE)
-
-#write to csv
-# save(ticket_data, file = "ticket_data.Rdata")
-# save(article_links, file = "article_links.Rdata")
-# save(article_meta, file = "article_meta.Rdata")
-# save(categories, file = "categories.Rdata")
-# save(groups, file = "groups.Rdata")
-# save(knowledge_capture_small, file = "knowledge_capture.Rdata")
-# save(metadata, file = "metadata.Rdata")
-# save(sec_lookup, file = "sec_lookup.Rdata")
-# save(ticket_events, file = "ticket_events.Rdata")
-# save(ticket_forms, file = "ticket_forms.Rdata")
-# save(tickets, file = "tickets.Rdata")
-
-
-ticket_data <- ticket_data %>% mutate(across(where(is.list),as.character))
-write.csv(ticket_data, paste0(directory,"/tickets.csv"))
diff --git a/scripts/articles/articles.R b/scripts/articles/articles.R
new file mode 100644
index 0000000..d4b5383
--- /dev/null
+++ b/scripts/articles/articles.R
@@ -0,0 +1,50 @@
+#### LIBRARIES ####
+library(devtools)
+library(tidyverse)
+library(janitor)
+library(lubridate)
+library(readxl)
+library(dplyr)
+library(httr)
+library(jsonlite)
+library(data.table)
+
+
+#### API authentication and base ####
+token <- Sys.getenv("ZENDESK_API_TOKEN")
+email <- Sys.getenv("ZENDESK_API_EMAIL")
+
+#### Get Raw Zendesk Article Metadata #####
+#set request URL
+domain <- "https://portlandoregon.zendesk.com/api/v2/"
+path <- "help_center/articles.json?"
+request_url <- paste0(domain,path)
+
+# Call Zendesk Articles API to pull metadata on each article
+i <- 1
+apidata <- list()
+next_url <- ""
+
+while(!is.null(next_url)) {
+call <- GET(paste0(request_url,"page=",i),
+            authenticate(email,token,type="basic"))
+raw_content <- rawToChar(call$content)
+api_response <- fromJSON(raw_content,flatten=TRUE)
+next_url <- api_response$next_page
+temp <- api_response$articles
+apidata <- bind_rows(apidata,temp)
+i <- i+1
+}
+
+
+
+#Replace nested lists with character strings
+zendesk_articles <- apidata %>% mutate(across(where(is.list),as.character))
+
+zendesk_articles <- zendesk_articles %>%
+  select(id,section_id,name)
+
+#### write data output to csv ####
+output_dir <- file.path(getwd(), "data")
+dir.create(output_dir)
+write.csv(zendesk_articles, file.path(output_dir, "articles.csv"))