From 23b0b7d0084e6eb210eec7f97022a8b4ae20c88f Mon Sep 17 00:00:00 2001 From: "Sander W. van der Laan" Date: Tue, 23 Aug 2022 16:37:55 +0200 Subject: [PATCH] * v1.0.3 Updated baseline characteristics. --- AEDB.EM.baseline.Rmd | 11 +- AEDB.EM.baseline.nb.html | 1783 +++++++++-------- ...220823.EntropyMasker.AE.BaselineTable.xlsx | Bin 0 -> 7234 bytes ....EntropyMasker.AE.EM.56.BaselineTable.xlsx | Bin 0 -> 7011 bytes ....EntropyMasker.AE.EM.59.BaselineTable.xlsx | Bin 0 -> 6731 bytes 5 files changed, 932 insertions(+), 862 deletions(-) create mode 100644 ae_baseline/20220823.EntropyMasker.AE.BaselineTable.xlsx create mode 100644 ae_baseline/20220823.EntropyMasker.AE.EM.56.BaselineTable.xlsx create mode 100644 ae_baseline/20220823.EntropyMasker.AE.EM.59.BaselineTable.xlsx diff --git a/AEDB.EM.baseline.Rmd b/AEDB.EM.baseline.Rmd index 5cd1f53..b706ea8 100644 --- a/AEDB.EM.baseline.Rmd +++ b/AEDB.EM.baseline.Rmd @@ -52,7 +52,6 @@ _... and load those packages._ ```{r loading_packages, message=FALSE, warning=FALSE} source("scripts/pack01.packages.R") - ``` _We will create a datestamp and define the Utrecht Science Park Colour Scheme_. @@ -1011,7 +1010,7 @@ cat("CREATE BASELINE TABLE\n") # Baseline table variables basetable_vars = c("Hospital", "Artery_summary", - "Age", "Gender") + "Age", "Gender", # "ORyear", # "TC_finalCU", "LDL_finalCU", "HDL_finalCU", "TG_finalCU", # "TC_final", "LDL_final", "HDL_final", "TG_final", @@ -1033,7 +1032,8 @@ basetable_vars = c("Hospital", # "IPH.bin", "VesselDensity_rankNorm", # "Calc.bin", "Collagen.bin", # "Fat.bin_10", "Fat.bin_40", - # "OverallPlaquePhenotype", "Plaque_Vulnerability_Index") + "OverallPlaquePhenotype" ) + # , "Plaque_Vulnerability_Index") basetable_bin = c("Hospital", "Artery_summary", @@ -1183,8 +1183,8 @@ saveRDS(AEDB, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.raw.RDS")) ------------------------------------------------------------------------------------------------------------------------ - Version: v1.0.2 - Last update: 2022-07-12 + Version: v1.0.3 + Last update: 2022-08-23 Written by: Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl). Description: Script to get some Athero-Express Biobank Study baseline characteristics. Minimum requirements: R version 3.4.3 (2017-06-30) -- 'Single Candle', Mac OS X El Capitan @@ -1200,6 +1200,7 @@ saveRDS(AEDB, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.raw.RDS")) _W_ **Changes log** + * v1.0.3 Updated baseline characteristics. * v1.0.2 Simplified the initial script. It now outputs the relevant R-objects (as .RDS). * v1.0.1 Update to main AEDB (there is an error in the Age-variable in the new version). * v1.0.0 Initial version. Add 'plaque vulnerability index', Fixed baseline table, added codes, and results. Major update to WORCS system. diff --git a/AEDB.EM.baseline.nb.html b/AEDB.EM.baseline.nb.html index 6645fca..fc620b1 100644 --- a/AEDB.EM.baseline.nb.html +++ b/AEDB.EM.baseline.nb.html @@ -11,7 +11,7 @@ - + Baseline characteristics @@ -2998,7 +2998,7 @@

Baseline characteristics

Accompanying ‘EntropyMasker’

Sander W. van der Laan, PhD | @swvanderlaan |

-

2022-08-18

+

2022-08-23

@@ -3044,9 +3044,64 @@

General Setup

source("scripts/pack01.packages.R")
- +

-* General packages...
+* General packages... +These packages have more recent versions available. +It is recommended to update all of them. +Which would you like to update? + +1: All +2: CRAN packages only +3: None +4: rlang (1.0.2 -> 1.0.4) [CRAN] +5: pillar (1.7.0 -> 1.8.1) [CRAN] +6: viridisLite (0.4.0 -> 0.4.1) [CRAN] +7: farver (2.1.0 -> 2.1.1) [CRAN] +8: tibble (3.1.7 -> 3.1.8) [CRAN] +9: scales (1.2.0 -> 1.2.1) [CRAN] + + +
3
+ + +
  
+   checking for file ‘/private/var/folders/qr/ycksfpsx091f_xhvzv9r35x00000gq/T/RtmpxOqdin/remotes10e0f4fc624be/thomasp85-patchwork-c14c960/DESCRIPTION’ ...
+  
+✔  checking for file ‘/private/var/folders/qr/ycksfpsx091f_xhvzv9r35x00000gq/T/RtmpxOqdin/remotes10e0f4fc624be/thomasp85-patchwork-c14c960/DESCRIPTION’ (442ms)
+
+  
+─  preparing ‘patchwork’:
+   checking DESCRIPTION meta-information ...
+  
+✔  checking DESCRIPTION meta-information
+
+  
+─  checking for LF line-endings in source and make files and shell scripts
+
+  
+─  checking for empty or unneeded directories
+
+  
+─  building ‘patchwork_1.1.2.9000.tar.gz’
+
+  
+   
+ + +
* installing *source* package ‘patchwork’ ...
+** using staged installation
+** R
+** byte-compile and prepare package for lazy loading
+** help
+*** installing help indices
+*** copying figures
+** building package indices
+** installing vignettes
+** testing if installed package can be loaded from temporary location
+** testing if installed package can be loaded from final location
+** testing if installed package keeps a record of temporary installation path
+* DONE (patchwork)
@@ -3055,11 +3110,11 @@

General Setup

-

-Today = format(as.Date(as.POSIXlt(Sys.time())), "%Y%m%d")
-Today.Report = format(as.Date(as.POSIXlt(Sys.time())), "%A, %B %d, %Y")
-
-source("scripts/colors.R")
+

+Today = format(as.Date(as.POSIXlt(Sys.time())), "%Y%m%d")
+Today.Report = format(as.Date(as.POSIXlt(Sys.time())), "%A, %B %d, %Y")
+
+source("scripts/colors.R")
@@ -3085,7 +3140,7 @@

Load relevant samples

-
EM_samples <- fread(paste0(ANALYSIS_loc, "/dataverse/EntropyMasker_image_files_used.txt"))
+
EM_samples <- fread(paste0(ANALYSIS_loc, "/dataverse/EntropyMasker_image_files_used.txt"))
@@ -3096,42 +3151,42 @@

Load data

-
cat("* get Athero-Express Biobank Study Database...")
+
cat("* get Athero-Express Biobank Study Database...")
* get Athero-Express Biobank Study Database...
-
# METHOD 1: It seems this method gives loads of errors and warnings, which all are hard to comprehend
-#           or debug. We expect 3,527 samples, and 927 variables; we get 927 variables!!!
-# AEdata = as.data.table(read.spss(paste0(INP_loc,"/2017-1NEW_AtheroExpressDatabase_ScientificAE_20171306_v1.0.sav"),
-#                                  trim.factor.names = TRUE, trim_values = TRUE, # we trim spaces in values
-#                                  reencode = TRUE, # we re-encode to the local locale encoding
-#                                  add.undeclared.levels = "append", # we do *not* want to convert to R-factors
-#                                  use.value.labels = FALSE, # we do *not* convert variables with value labels into R factors
-#                                  use.missings = TRUE, sub = "NA", # we will set every missing variable to NA
-#                                  duplicated.value.labels = "condense", # we will condense duplicated value labels
-#                                  to.data.frame = TRUE))
-# AEdata.labels <- as.data.table(attr(AEdata, "variable.labels"))
-# names(AEdata.labels) <- "Variable"
-
-# METHOD 2: Using library("haven") importing seems flawless; best argument being:
-#           we expect 3,527 samples and 888 variables, which is what you'd get with this method
-#           So for now, METHOD 2 is prefered. 
-#            
-require(haven)
-
-# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2022_1_NEW_AtheroExpressDatabase_ScientificAE_15-02-2022.sav")) # something wrong with Age-variable
-# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_30-09-2020.sav")) # duplicate studynumbers in it
-AEDB <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_16-03-2020.sav"))
-
-# writing off the SPSS data to an Excel.
-# fwrite(AEdata, file = paste0(INP_loc,"/2017-1NEW_AtheroExpressDatabase_ScientificAE_20171306_v1.0.values.xlsx"), 
-#        sep = ";", na = "NA", dec = ".", col.names = TRUE, row.names = FALSE,
-#        dateTimeAs = "ISO", showProgress = TRUE, verbose = TRUE)
-# warnings()
-
-AEDB[1:10, 1:10]
+
# METHOD 1: It seems this method gives loads of errors and warnings, which all are hard to comprehend
+#           or debug. We expect 3,527 samples, and 927 variables; we get 927 variables!!!
+# AEdata = as.data.table(read.spss(paste0(INP_loc,"/2017-1NEW_AtheroExpressDatabase_ScientificAE_20171306_v1.0.sav"),
+#                                  trim.factor.names = TRUE, trim_values = TRUE, # we trim spaces in values
+#                                  reencode = TRUE, # we re-encode to the local locale encoding
+#                                  add.undeclared.levels = "append", # we do *not* want to convert to R-factors
+#                                  use.value.labels = FALSE, # we do *not* convert variables with value labels into R factors
+#                                  use.missings = TRUE, sub = "NA", # we will set every missing variable to NA
+#                                  duplicated.value.labels = "condense", # we will condense duplicated value labels
+#                                  to.data.frame = TRUE))
+# AEdata.labels <- as.data.table(attr(AEdata, "variable.labels"))
+# names(AEdata.labels) <- "Variable"
+
+# METHOD 2: Using library("haven") importing seems flawless; best argument being:
+#           we expect 3,527 samples and 888 variables, which is what you'd get with this method
+#           So for now, METHOD 2 is prefered. 
+#            
+require(haven)
+
+# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2022_1_NEW_AtheroExpressDatabase_ScientificAE_15-02-2022.sav")) # something wrong with Age-variable
+# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_30-09-2020.sav")) # duplicate studynumbers in it
+AEDB <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_16-03-2020.sav"))
+
+# writing off the SPSS data to an Excel.
+# fwrite(AEdata, file = paste0(INP_loc,"/2017-1NEW_AtheroExpressDatabase_ScientificAE_20171306_v1.0.values.xlsx"), 
+#        sep = ";", na = "NA", dec = ".", col.names = TRUE, row.names = FALSE,
+#        dateTimeAs = "ISO", showProgress = TRUE, verbose = TRUE)
+# warnings()
+
+AEDB[1:10, 1:10]
@@ -3141,7 +3196,7 @@

Load data

-
dim(AEDB)
+
dim(AEDB)
[1] 3791 1091
@@ -3212,41 +3267,41 @@

Symptoms

-

-# Fix symptoms
-
-attach(AEDB)
-AEDB[,"Symptoms.5G"] <- NA
-AEDB$Symptoms.5G[sympt == 0] <- "Asymptomatic"
-AEDB$Symptoms.5G[sympt == 1 | sympt == 7 | sympt == 13] <- "TIA"
-AEDB$Symptoms.5G[sympt == 2 | sympt == 3] <- "Stroke"
-AEDB$Symptoms.5G[sympt == 4 | sympt == 14 | sympt == 15 ] <- "Ocular"
-AEDB$Symptoms.5G[sympt == 8 | sympt == 11] <- "Retinal infarction"
-AEDB$Symptoms.5G[sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Other"
-
-
-# AsymptSympt
-AEDB[,"AsymptSympt"] <- NA
-AEDB$AsymptSympt[sympt == -999] <- NA
-AEDB$AsymptSympt[sympt == 0] <- "Asymptomatic"
-AEDB$AsymptSympt[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3] <- "Symptomatic"
-AEDB$AsymptSympt[sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Ocular and others"
-
-# AsymptSympt
-AEDB[,"AsymptSympt2G"] <- NA
-AEDB$AsymptSympt2G[sympt == -999] <- NA
-AEDB$AsymptSympt2G[sympt == 0] <- "Asymptomatic"
-AEDB$AsymptSympt2G[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3 | sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Symptomatic"
-
-detach(AEDB)
-
-# table(AEDB$sympt, useNA = "ifany")
-# table(AEDB$AsymptSympt2G, useNA = "ifany")
-# table(AEDB$Symptoms.5G, useNA = "ifany")
-# 
-# table(AEDB$AsymptSympt2G, AEDB$sympt, useNA = "ifany")
-# table(AEDB$Symptoms.5G, AEDB$sympt, useNA = "ifany")
-table(AEDB$AsymptSympt2G, AEDB$Symptoms.5G, useNA = "ifany")
+

+# Fix symptoms
+
+attach(AEDB)
+AEDB[,"Symptoms.5G"] <- NA
+AEDB$Symptoms.5G[sympt == 0] <- "Asymptomatic"
+AEDB$Symptoms.5G[sympt == 1 | sympt == 7 | sympt == 13] <- "TIA"
+AEDB$Symptoms.5G[sympt == 2 | sympt == 3] <- "Stroke"
+AEDB$Symptoms.5G[sympt == 4 | sympt == 14 | sympt == 15 ] <- "Ocular"
+AEDB$Symptoms.5G[sympt == 8 | sympt == 11] <- "Retinal infarction"
+AEDB$Symptoms.5G[sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Other"
+
+
+# AsymptSympt
+AEDB[,"AsymptSympt"] <- NA
+AEDB$AsymptSympt[sympt == -999] <- NA
+AEDB$AsymptSympt[sympt == 0] <- "Asymptomatic"
+AEDB$AsymptSympt[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3] <- "Symptomatic"
+AEDB$AsymptSympt[sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Ocular and others"
+
+# AsymptSympt
+AEDB[,"AsymptSympt2G"] <- NA
+AEDB$AsymptSympt2G[sympt == -999] <- NA
+AEDB$AsymptSympt2G[sympt == 0] <- "Asymptomatic"
+AEDB$AsymptSympt2G[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3 | sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Symptomatic"
+
+detach(AEDB)
+
+# table(AEDB$sympt, useNA = "ifany")
+# table(AEDB$AsymptSympt2G, useNA = "ifany")
+# table(AEDB$Symptoms.5G, useNA = "ifany")
+# 
+# table(AEDB$AsymptSympt2G, AEDB$sympt, useNA = "ifany")
+# table(AEDB$Symptoms.5G, AEDB$sympt, useNA = "ifany")
+table(AEDB$AsymptSympt2G, AEDB$Symptoms.5G, useNA = "ifany")
              
@@ -3256,17 +3311,17 @@ 

Symptoms

<NA> 0 0 0 0 0 0 1103
-
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "sympt", "Symptoms.5G", "AsymptSympt"))
-# require(labelled)
-# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
-# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
-# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
-# 
-# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
-# 
-# table(AEDB.temp$Symptoms.5G, AEDB.temp$AsymptSympt)
-# 
-# rm(AEDB.temp)
+
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "sympt", "Symptoms.5G", "AsymptSympt"))
+# require(labelled)
+# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
+# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
+# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
+# 
+# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
+# 
+# table(AEDB.temp$Symptoms.5G, AEDB.temp$AsymptSympt)
+# 
+# rm(AEDB.temp)
@@ -3279,8 +3334,8 @@

Re-assessed symptoms

-

-AEDB$indexsymptoms_worst
+

+AEDB$indexsymptoms_worst
<labelled<double>[3791]>: Latest cerebrovascular symptoms (indication CEA).
@@ -3317,7 +3372,7 @@ 

Re-assessed symptoms

-
AEDB$indexsymptoms_worst_4g
+
AEDB$indexsymptoms_worst_4g
<labelled<double>[3791]>:  symptoms strokefenotype en symptoms.4g .
@@ -3354,7 +3409,7 @@ 

Re-assessed symptoms

-
AEDB$indexsymptoms_latest
+
AEDB$indexsymptoms_latest
<labelled<double>[3791]>: Latest cerebrovascular symptoms (indication CEA).
@@ -3391,7 +3446,7 @@ 

Re-assessed symptoms

-
AEDB$indexsymptoms_latest_4g
+
AEDB$indexsymptoms_latest_4g
<labelled<double>[3791]>:  symptoms strokefenotype en symptoms.4g .
@@ -3433,13 +3488,13 @@ 

Re-assessed symptoms

-
cat("New 'worst' vs 'latest' symptom categories.\n")
+
cat("New 'worst' vs 'latest' symptom categories.\n")
New 'worst' vs 'latest' symptom categories.
-
table(as_factor(AEDB$indexsymptoms_worst_4g), as_factor(AEDB$indexsymptoms_latest_4g))
+
table(as_factor(AEDB$indexsymptoms_worst_4g), as_factor(AEDB$indexsymptoms_latest_4g))
         
@@ -3451,14 +3506,14 @@ 

Re-assessed symptoms

unclear 0 0 0 0 55
-
cat("\nNew 'worst' symptom categories.\n")
+
cat("\nNew 'worst' symptom categories.\n")

 New 'worst' symptom categories.
-
table((AEDB$indexsymptoms_worst_4g))
+
table((AEDB$indexsymptoms_worst_4g))

@@ -3466,14 +3521,14 @@ 

Re-assessed symptoms

1000 345 723 580 55
-
cat("\nNew 'latest' symptom categories.\n")
+
cat("\nNew 'latest' symptom categories.\n")

 New 'latest' symptom categories.
-
table(as_factor(AEDB$indexsymptoms_latest_4g))
+
table(as_factor(AEDB$indexsymptoms_latest_4g))

@@ -3486,13 +3541,13 @@ 

Re-assessed symptoms

-
cat("New 'latest' vs original symptom 2G categories.\n")
+
cat("New 'latest' vs original symptom 2G categories.\n")
New 'latest' vs original symptom 2G categories.
-
table((AEDB$indexsymptoms_latest_4g), AEDB$AsymptSympt2G)
+
table((AEDB$indexsymptoms_latest_4g), AEDB$AsymptSympt2G)
   
@@ -3504,14 +3559,14 @@ 

Re-assessed symptoms

9 7 74
-
cat("\nNew 'latest' vs original symptom 5G categories.\n")
+
cat("\nNew 'latest' vs original symptom 5G categories.\n")

 New 'latest' vs original symptom 5G categories.
-
table((AEDB$indexsymptoms_latest_4g), AEDB$Symptoms.5G)
+
table((AEDB$indexsymptoms_latest_4g), AEDB$Symptoms.5G)
   
@@ -3523,7 +3578,7 @@ 

Re-assessed symptoms

9 7 11 10 0 13 40
-
   
+
   
@@ -3550,25 +3605,25 @@

Re-assessed symptoms

-

-# Fix symptoms
-attach(AEDB)
-
-# Symptoms.Update2G
-AEDB[,"Symptoms.Update2G"] <- NA
-AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 0] <- "Asymptomatic"
-AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 1 | indexsymptoms_latest_4g == 2 | indexsymptoms_latest_4g == 3] <- "Symptomatic"
-AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 9 ] <- NA
-
-# Symptoms.Update3G
-AEDB[,"Symptoms.Update3G"] <- NA
-AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 0] <- "Asymptomatic"
-AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 1 | indexsymptoms_latest_4g == 2 | indexsymptoms_latest_4g == 3] <- "Symptomatic"
-AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 9 ] <- "Unclear"
-
-detach(AEDB)
-
-table(AEDB$Symptoms.Update2G, AEDB$Symptoms.5G, useNA = "ifany")
+

+# Fix symptoms
+attach(AEDB)
+
+# Symptoms.Update2G
+AEDB[,"Symptoms.Update2G"] <- NA
+AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 0] <- "Asymptomatic"
+AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 1 | indexsymptoms_latest_4g == 2 | indexsymptoms_latest_4g == 3] <- "Symptomatic"
+AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 9 ] <- NA
+
+# Symptoms.Update3G
+AEDB[,"Symptoms.Update3G"] <- NA
+AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 0] <- "Asymptomatic"
+AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 1 | indexsymptoms_latest_4g == 2 | indexsymptoms_latest_4g == 3] <- "Symptomatic"
+AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 9 ] <- "Unclear"
+
+detach(AEDB)
+
+table(AEDB$Symptoms.Update2G, AEDB$Symptoms.5G, useNA = "ifany")
              
@@ -3578,7 +3633,7 @@ 

Re-assessed symptoms

<NA> 7 11 10 0 13 40 1088
-
table(AEDB$Symptoms.Update3G, AEDB$Symptoms.5G, useNA = "ifany")
+
table(AEDB$Symptoms.Update3G, AEDB$Symptoms.5G, useNA = "ifany")
              
@@ -3597,16 +3652,16 @@ 

Other clinical characteristics

-

-# Fix diabetes
-attach(AEDB)
-AEDB[,"DiabetesStatus"] <- NA
-AEDB$DiabetesStatus[DM.composite == -999] <- NA
-AEDB$DiabetesStatus[DM.composite == 0] <- "Control (no Diabetes Dx/Med)"
-AEDB$DiabetesStatus[DM.composite == 1] <- "Diabetes"
-detach(AEDB)
-
-table(AEDB$DM.composite, AEDB$DiabetesStatus)
+

+# Fix diabetes
+attach(AEDB)
+AEDB[,"DiabetesStatus"] <- NA
+AEDB$DiabetesStatus[DM.composite == -999] <- NA
+AEDB$DiabetesStatus[DM.composite == 0] <- "Control (no Diabetes Dx/Med)"
+AEDB$DiabetesStatus[DM.composite == 1] <- "Diabetes"
+detach(AEDB)
+
+table(AEDB$DM.composite, AEDB$DiabetesStatus)
   
@@ -3615,16 +3670,16 @@ 

Other clinical characteristics

1 0 985
-
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
-# require(labelled)
-# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
-# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
-# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
-# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
-# 
-# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
-# 
-# rm(AEDB.temp)
+
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
+# require(labelled)
+# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
+# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
+# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
+# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
+# 
+# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
+# 
+# rm(AEDB.temp)
@@ -3644,51 +3699,51 @@

Other clinical characteristics

-
require(labelled)
-AEDB$diet801 <- to_factor(AEDB$diet801)
-AEDB$diet802 <- to_factor(AEDB$diet802)
-AEDB$diet805 <- to_factor(AEDB$diet805)
-AEDB$SmokingReported <- to_factor(AEDB$SmokingReported)
-AEDB$SmokerCurrent <- to_factor(AEDB$SmokerCurrent)
-AEDB$SmokingYearOR <- to_factor(AEDB$SmokingYearOR)
-
-# table(AEDB$diet801)
-# table(AEDB$diet802)
-# table(AEDB$SmokingReported)
-# table(AEDB$SmokerCurrent)
-# table(AEDB$SmokingYearOR)
-# table(AEDB$SmokingReported, AEDB$SmokerCurrent, useNA = "ifany", dnn = c("Reported smoking", "Current smoker"))
-# 
-# table(AEDB$diet801, AEDB$diet802, useNA = "ifany", dnn = c("Smoker", "Past smoker"))
-
-cat("\nFixing smoking status.\n")
+
require(labelled)
+AEDB$diet801 <- to_factor(AEDB$diet801)
+AEDB$diet802 <- to_factor(AEDB$diet802)
+AEDB$diet805 <- to_factor(AEDB$diet805)
+AEDB$SmokingReported <- to_factor(AEDB$SmokingReported)
+AEDB$SmokerCurrent <- to_factor(AEDB$SmokerCurrent)
+AEDB$SmokingYearOR <- to_factor(AEDB$SmokingYearOR)
+
+# table(AEDB$diet801)
+# table(AEDB$diet802)
+# table(AEDB$SmokingReported)
+# table(AEDB$SmokerCurrent)
+# table(AEDB$SmokingYearOR)
+# table(AEDB$SmokingReported, AEDB$SmokerCurrent, useNA = "ifany", dnn = c("Reported smoking", "Current smoker"))
+# 
+# table(AEDB$diet801, AEDB$diet802, useNA = "ifany", dnn = c("Smoker", "Past smoker"))
+
+cat("\nFixing smoking status.\n")

 Fixing smoking status.
-
attach(AEDB)
-AEDB[,"SmokerStatus"] <- NA
-AEDB$SmokerStatus[diet802 == "don't know"] <- "Never smoked"
-AEDB$SmokerStatus[diet802 == "I still smoke"] <- "Current smoker"
-AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "no"] <- "Never smoked"
-AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "yes"] <- "Ex-smoker"
-AEDB$SmokerStatus[SmokerCurrent == "yes"] <- "Current smoker"
-AEDB$SmokerStatus[SmokerCurrent == "no data available/missing"] <- NA
-# AEDB$SmokerStatus[is.na(SmokerCurrent)] <- "Never smoked"
-detach(AEDB)
-
-cat("\n* Current smoking status.\n")
+
attach(AEDB)
+AEDB[,"SmokerStatus"] <- NA
+AEDB$SmokerStatus[diet802 == "don't know"] <- "Never smoked"
+AEDB$SmokerStatus[diet802 == "I still smoke"] <- "Current smoker"
+AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "no"] <- "Never smoked"
+AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "yes"] <- "Ex-smoker"
+AEDB$SmokerStatus[SmokerCurrent == "yes"] <- "Current smoker"
+AEDB$SmokerStatus[SmokerCurrent == "no data available/missing"] <- NA
+# AEDB$SmokerStatus[is.na(SmokerCurrent)] <- "Never smoked"
+detach(AEDB)
+
+cat("\n* Current smoking status.\n")

 * Current smoking status.
-
table(AEDB$SmokerCurrent,
-      useNA = "ifany", 
-      dnn = c("Current smoker"))
+
table(AEDB$SmokerCurrent,
+      useNA = "ifany", 
+      dnn = c("Current smoker"))
Current smoker
@@ -3696,16 +3751,16 @@ 

Other clinical characteristics

0 2364 1308 119
-
cat("\n* Updated smoking status.\n")
+
cat("\n* Updated smoking status.\n")

 * Updated smoking status.
-
table(AEDB$SmokerStatus,
-      useNA = "ifany", 
-      dnn = c("Updated smoking status"))
+
table(AEDB$SmokerStatus,
+      useNA = "ifany", 
+      dnn = c("Updated smoking status"))
Updated smoking status
@@ -3713,16 +3768,16 @@ 

Other clinical characteristics

1308 1814 389 280
-
cat("\n* Comparing to 'SmokerCurrent'.\n")
+
cat("\n* Comparing to 'SmokerCurrent'.\n")

 * Comparing to 'SmokerCurrent'.
-
table(AEDB$SmokerStatus, AEDB$SmokerCurrent, 
-      useNA = "ifany", 
-      dnn = c("Updated smoking status", "Current smoker"))
+
table(AEDB$SmokerStatus, AEDB$SmokerCurrent, 
+      useNA = "ifany", 
+      dnn = c("Updated smoking status", "Current smoker"))
                      Current smoker
@@ -3733,16 +3788,16 @@ 

Other clinical characteristics

<NA> 0 161 0 119
-
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
-# require(labelled)
-# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
-# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
-# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
-# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
-# 
-# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
-# 
-# rm(AEDB.temp)
+
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
+# require(labelled)
+# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
+# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
+# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
+# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
+# 
+# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
+# 
+# rm(AEDB.temp)
@@ -3750,25 +3805,25 @@

Other clinical characteristics

-

-# Fix diabetes
-attach(AEDB)
-AEDB[,"AlcoholUse"] <- NA
-AEDB$AlcoholUse[diet810 == -999] <- NA
-AEDB$AlcoholUse[diet810 == 0] <- "No"
-AEDB$AlcoholUse[diet810 == 1] <- "Yes"
-detach(AEDB)
-
-# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
-# require(labelled)
-# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
-# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
-# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
-# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
-# 
-# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
-# 
-# rm(AEDB.temp)
+

+# Fix diabetes
+attach(AEDB)
+AEDB[,"AlcoholUse"] <- NA
+AEDB$AlcoholUse[diet810 == -999] <- NA
+AEDB$AlcoholUse[diet810 == 0] <- "No"
+AEDB$AlcoholUse[diet810 == 1] <- "Yes"
+detach(AEDB)
+
+# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
+# require(labelled)
+# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
+# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
+# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
+# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
+# 
+# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
+# 
+# rm(AEDB.temp)
@@ -3778,15 +3833,15 @@

Other clinical characteristics

-

-# Fix diabetes
-attach(AEDB)
-AEDB[,"MedHx_CVD"] <- NA
-AEDB$MedHx_CVD[CAD_history == 0 | Stroke_history == 0 | Peripheral.interv == 0] <- "No"
-AEDB$MedHx_CVD[CAD_history == 1 | Stroke_history == 1 | Peripheral.interv == 1] <- "yes"
-detach(AEDB)
-
-table(AEDB$CAD_history)
+

+# Fix diabetes
+attach(AEDB)
+AEDB[,"MedHx_CVD"] <- NA
+AEDB$MedHx_CVD[CAD_history == 0 | Stroke_history == 0 | Peripheral.interv == 0] <- "No"
+AEDB$MedHx_CVD[CAD_history == 1 | Stroke_history == 1 | Peripheral.interv == 1] <- "yes"
+detach(AEDB)
+
+table(AEDB$CAD_history)

@@ -3794,7 +3849,7 @@ 

Other clinical characteristics

2430 1285
-
table(AEDB$Stroke_history)
+
table(AEDB$Stroke_history)

@@ -3802,7 +3857,7 @@ 

Other clinical characteristics

2763 947
-
table(AEDB$Peripheral.interv)
+
table(AEDB$Peripheral.interv)

@@ -3810,7 +3865,7 @@ 

Other clinical characteristics

2579 1099
-
table(AEDB$MedHx_CVD)
+
table(AEDB$MedHx_CVD)

@@ -3818,16 +3873,16 @@ 

Other clinical characteristics

1309 2475
-
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
-# require(labelled)
-# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
-# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
-# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
-# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
-# 
-# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
-# 
-# rm(AEDB.temp)
+
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
+# require(labelled)
+# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
+# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
+# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
+# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
+# 
+# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
+# 
+# rm(AEDB.temp)
@@ -3849,26 +3904,26 @@

Plaque phenotypes

-

-# Fix plaquephenotypes
-attach(AEDB)
-AEDB[,"OverallPlaquePhenotype"] <- NA
-AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
-AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
-AEDB$OverallPlaquePhenotype[plaquephenotype == 1] <- "fibrous"
-AEDB$OverallPlaquePhenotype[plaquephenotype == 2] <- "fibroatheromatous"
-AEDB$OverallPlaquePhenotype[plaquephenotype == 3] <- "atheromatous"
-detach(AEDB)
-
-# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "plaquephenotype", "OverallPlaquePhenotype"))
-# require(labelled)
-# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
-# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
-# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
-# 
-# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
-# 
-# rm(AEDB.temp)
+

+# Fix plaquephenotypes
+attach(AEDB)
+AEDB[,"OverallPlaquePhenotype"] <- NA
+AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
+AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
+AEDB$OverallPlaquePhenotype[plaquephenotype == 1] <- "fibrous"
+AEDB$OverallPlaquePhenotype[plaquephenotype == 2] <- "fibroatheromatous"
+AEDB$OverallPlaquePhenotype[plaquephenotype == 3] <- "atheromatous"
+detach(AEDB)
+
+# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "plaquephenotype", "OverallPlaquePhenotype"))
+# require(labelled)
+# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
+# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
+# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
+# 
+# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
+# 
+# rm(AEDB.temp)
@@ -3877,236 +3932,236 @@

Plaque phenotypes

-
AEDB$macmean0 <- as.numeric(AEDB$macmean0)
-AEDB$smcmean0 <- as.numeric(AEDB$smcmean0)
-AEDB$neutrophils <- as.numeric(AEDB$neutrophils)
-AEDB$Mast_cells_plaque <- as.numeric(AEDB$Mast_cells_plaque)
-AEDB$vessel_density_averaged <- as.numeric(AEDB$vessel_density_averaged)
-
-AEDB$MAC_rankNorm <- qnorm((rank(AEDB$macmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB$macmean0)))
-AEDB$SMC_rankNorm <- qnorm((rank(AEDB$smcmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB$smcmean0)))
-AEDB$Neutrophils_rankNorm <- qnorm((rank(AEDB$neutrophils, na.last = "keep") - 0.5) / sum(!is.na(AEDB$neutrophils)))
-AEDB$MastCells_rankNorm <- qnorm((rank(AEDB$Mast_cells_plaque, na.last = "keep") - 0.5) / sum(!is.na(AEDB$Mast_cells_plaque)))
-AEDB$VesselDensity_rankNorm <- qnorm((rank(AEDB$vessel_density_averaged, na.last = "keep") - 0.5) / sum(!is.na(AEDB$vessel_density_averaged)))
+
AEDB$macmean0 <- as.numeric(AEDB$macmean0)
+AEDB$smcmean0 <- as.numeric(AEDB$smcmean0)
+AEDB$neutrophils <- as.numeric(AEDB$neutrophils)
+AEDB$Mast_cells_plaque <- as.numeric(AEDB$Mast_cells_plaque)
+AEDB$vessel_density_averaged <- as.numeric(AEDB$vessel_density_averaged)
+
+AEDB$MAC_rankNorm <- qnorm((rank(AEDB$macmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB$macmean0)))
+AEDB$SMC_rankNorm <- qnorm((rank(AEDB$smcmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB$smcmean0)))
+AEDB$Neutrophils_rankNorm <- qnorm((rank(AEDB$neutrophils, na.last = "keep") - 0.5) / sum(!is.na(AEDB$neutrophils)))
+AEDB$MastCells_rankNorm <- qnorm((rank(AEDB$Mast_cells_plaque, na.last = "keep") - 0.5) / sum(!is.na(AEDB$Mast_cells_plaque)))
+AEDB$VesselDensity_rankNorm <- qnorm((rank(AEDB$vessel_density_averaged, na.last = "keep") - 0.5) / sum(!is.na(AEDB$vessel_density_averaged)))
-
library(labelled)
-AEDB$Gender <- to_factor(AEDB$Gender)
-library(patchwork)
-
-p1 <- ggpubr::gghistogram(AEDB, "macmean0", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "% of macrophages (CD68)",
-                    xlab = "% per region of interest", 
-                    ggtheme = theme_minimal())
+
library(labelled)
+AEDB$Gender <- to_factor(AEDB$Gender)
+library(patchwork)
+
+p1 <- ggpubr::gghistogram(AEDB, "macmean0", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "% of macrophages (CD68)",
+                    xlab = "% per region of interest", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p2 <- ggpubr::gghistogram(AEDB, "MAC_rankNorm", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "% of macrophages (CD68)",
-                   xlab = "% per region of interest\ninverse-rank normalized number", 
-                    ggtheme = theme_minimal())
+
p2 <- ggpubr::gghistogram(AEDB, "MAC_rankNorm", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "% of macrophages (CD68)",
+                   xlab = "% per region of interest\ninverse-rank normalized number", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p1 | p2 
+
p1 | p2 

-

-p1 <- ggpubr::gghistogram(AEDB, "smcmean0", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "% of smooth muscle cells (SMA)",
-                    xlab = "% per region of interest", 
-                    ggtheme = theme_minimal())
+

+p1 <- ggpubr::gghistogram(AEDB, "smcmean0", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "% of smooth muscle cells (SMA)",
+                    xlab = "% per region of interest", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p2 <- ggpubr::gghistogram(AEDB, "SMC_rankNorm", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "% of smooth muscle cells (SMA)",
-                   xlab = "% per region of interest\ninverse-rank normalized number", 
-                    ggtheme = theme_minimal())
+
p2 <- ggpubr::gghistogram(AEDB, "SMC_rankNorm", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "% of smooth muscle cells (SMA)",
+                   xlab = "% per region of interest\ninverse-rank normalized number", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p1 | p2 
+
p1 | p2 

-

-
-p1 <- ggpubr::gghistogram(AEDB, "neutrophils", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "number of neutrophils (CD66b)",
-                    xlab = "counts per plaque", 
-                    ggtheme = theme_minimal())
+

+
+p1 <- ggpubr::gghistogram(AEDB, "neutrophils", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "number of neutrophils (CD66b)",
+                    xlab = "counts per plaque", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p2 <- ggpubr::gghistogram(AEDB, "Neutrophils_rankNorm", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "number of neutrophils (CD66b)",
-                   xlab = "counts per plaque\ninverse-rank normalized number", 
-                    ggtheme = theme_minimal())
+
p2 <- ggpubr::gghistogram(AEDB, "Neutrophils_rankNorm", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "number of neutrophils (CD66b)",
+                   xlab = "counts per plaque\ninverse-rank normalized number", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p1 | p2 
+
p1 | p2 

-

-
-p1 <- ggpubr::gghistogram(AEDB, "Mast_cells_plaque", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "number of mast cells",
-                    xlab = "counts per plaque", 
-                    ggtheme = theme_minimal())
+

+
+p1 <- ggpubr::gghistogram(AEDB, "Mast_cells_plaque", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "number of mast cells",
+                    xlab = "counts per plaque", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p2 <- ggpubr::gghistogram(AEDB, "MastCells_rankNorm", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "number of mast cells",
-                   xlab = "counts per plaque\ninverse-rank normalized number", 
-                    ggtheme = theme_minimal())
+
p2 <- ggpubr::gghistogram(AEDB, "MastCells_rankNorm", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "number of mast cells",
+                   xlab = "counts per plaque\ninverse-rank normalized number", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p1 | p2 
+
p1 | p2 

-

-
-p1 <- ggpubr::gghistogram(AEDB, "vessel_density_averaged", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "number of intraplaque neovessels",
-                    xlab = "counts per 3-4 hotspots", 
-                    ggtheme = theme_minimal())
+

+
+p1 <- ggpubr::gghistogram(AEDB, "vessel_density_averaged", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "number of intraplaque neovessels",
+                    xlab = "counts per 3-4 hotspots", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p2 <- ggpubr::gghistogram(AEDB, "VesselDensity_rankNorm", 
-                    # y = "..count..", 
-                    color = "white",
-                    fill = "Gender",
-                    palette = c("#1290D9", "#DB003F"), 
-                    add = "median", 
-                    #add_density = TRUE,
-                    rug = TRUE,
-                    #add.params =  list(color = "black", linetype = 2), 
-                    title = "number of intraplaque neovessels",
-                   xlab = "counts per 3-4 hotspots\ninverse-rank normalized number", 
-                    ggtheme = theme_minimal())
+
p2 <- ggpubr::gghistogram(AEDB, "VesselDensity_rankNorm", 
+                    # y = "..count..", 
+                    color = "white",
+                    fill = "Gender",
+                    palette = c("#1290D9", "#DB003F"), 
+                    add = "median", 
+                    #add_density = TRUE,
+                    rug = TRUE,
+                    #add.params =  list(color = "black", linetype = 2), 
+                    title = "number of intraplaque neovessels",
+                   xlab = "counts per 3-4 hotspots\ninverse-rank normalized number", 
+                    ggtheme = theme_minimal())
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
-
p1 | p2 
+
p1 | p2 

-

-rm(p1, p2)
+

+rm(p1, p2)
@@ -4115,17 +4170,17 @@

Plaque phenotypes

-
# Plaque vulnerability
-require(labelled)
-AEDB$Macrophages.bin <- to_factor(AEDB$Macrophages.bin)
-AEDB$SMC.bin <- to_factor(AEDB$SMC.bin)
-AEDB$IPH.bin <- to_factor(AEDB$IPH.bin)
-AEDB$Calc.bin <- to_factor(AEDB$Calc.bin)
-AEDB$Collagen.bin <- to_factor(AEDB$Collagen.bin)
-AEDB$Fat.bin_10 <- to_factor(AEDB$Fat.bin_10)
-AEDB$Fat.bin_40 <- to_factor(AEDB$Fat.bin_40)
-
-table(AEDB$Macrophages.bin)
+
# Plaque vulnerability
+require(labelled)
+AEDB$Macrophages.bin <- to_factor(AEDB$Macrophages.bin)
+AEDB$SMC.bin <- to_factor(AEDB$SMC.bin)
+AEDB$IPH.bin <- to_factor(AEDB$IPH.bin)
+AEDB$Calc.bin <- to_factor(AEDB$Calc.bin)
+AEDB$Collagen.bin <- to_factor(AEDB$Collagen.bin)
+AEDB$Fat.bin_10 <- to_factor(AEDB$Fat.bin_10)
+AEDB$Fat.bin_40 <- to_factor(AEDB$Fat.bin_40)
+
+table(AEDB$Macrophages.bin)

@@ -4133,7 +4188,7 @@ 

Plaque phenotypes

1602 1215
-
table(AEDB$Fat.bin_10)
+
table(AEDB$Fat.bin_10)

@@ -4141,7 +4196,7 @@ 

Plaque phenotypes

1226 1628
-
table(AEDB$Collagen.bin)
+
table(AEDB$Collagen.bin)

@@ -4149,7 +4204,7 @@ 

Plaque phenotypes

540 2297
-
table(AEDB$SMC.bin)
+
table(AEDB$SMC.bin)

@@ -4157,7 +4212,7 @@ 

Plaque phenotypes

870 1962
-
table(AEDB$IPH.bin)
+
table(AEDB$IPH.bin)

@@ -4165,74 +4220,74 @@ 

Plaque phenotypes

1223 1628
-
# SPSS code
-
-# 
-# *** syntax- Plaque vulnerability**.
-# COMPUTE Macro_instab = -999.
-# IF macrophages.bin=2 Macro_instab=1.
-# IF macrophages.bin=1 Macro_instab=0.
-# EXECUTE.
-# 
-# COMPUTE Fat10_instab = -999.
-# IF Fat.bin_10=2 Fat10_instab=1.
-# IF Fat.bin_10=1 Fat10_instab=0.
-# EXECUTE.
-# 
-# COMPUTE coll_instab=-999.
-# IF Collagen.bin=2 coll_instab=0.
-# IF Collagen.bin=1 coll_instab=1.
-# EXECUTE.
-# 
-# 
-# COMPUTE SMC_instab=-999.
-# IF SMC.bin=2 SMC_instab=0.
-# IF SMC.bin=1 SMC_instab=1.
-# EXECUTE.
-# 
-# COMPUTE IPH_instab=-999.
-# IF IPH.bin=0 IPH_instab=0.
-# IF IPH.bin=1 IPH_instab=1.
-# EXECUTE.
-# 
-# COMPUTE Instability=Macro_instab + Fat10_instab +  coll_instab + SMC_instab + IPH_instab.
-# EXECUTE.
-
-# Fix plaquephenotypes
-attach(AEDB)
-# mac instability
-AEDB[,"MAC_Instability"] <- NA
-AEDB$MAC_Instability[Macrophages.bin == -999] <- NA
-AEDB$MAC_Instability[Macrophages.bin == "no/minor"] <- 0
-AEDB$MAC_Instability[Macrophages.bin == "moderate/heavy"] <- 1
-
-# fat instability
-AEDB[,"FAT10_Instability"] <- NA
-AEDB$FAT10_Instability[Fat.bin_10 == -999] <- NA
-AEDB$FAT10_Instability[Fat.bin_10 == " <10%"] <- 0
-AEDB$FAT10_Instability[Fat.bin_10 == " >10%"] <- 1
-
-# col instability 
-AEDB[,"COL_Instability"] <- NA
-AEDB$COL_Instability[Collagen.bin == -999] <- NA
-AEDB$COL_Instability[Collagen.bin == "no/minor"] <- 1
-AEDB$COL_Instability[Collagen.bin == "moderate/heavy"] <- 0
-
-# smc instability
-AEDB[,"SMC_Instability"] <- NA
-AEDB$SMC_Instability[SMC.bin == -999] <- NA
-AEDB$SMC_Instability[SMC.bin == "no/minor"] <- 1
-AEDB$SMC_Instability[SMC.bin == "moderate/heavy"] <- 0
-
-# iph instability
-AEDB[,"IPH_Instability"] <- NA
-AEDB$IPH_Instability[IPH.bin == -999] <- NA
-AEDB$IPH_Instability[IPH.bin == "no"] <- 0
-AEDB$IPH_Instability[IPH.bin == "yes"] <- 1
-
-detach(AEDB)
-
-table(AEDB$MAC_Instability, useNA = "ifany")
+
# SPSS code
+
+# 
+# *** syntax- Plaque vulnerability**.
+# COMPUTE Macro_instab = -999.
+# IF macrophages.bin=2 Macro_instab=1.
+# IF macrophages.bin=1 Macro_instab=0.
+# EXECUTE.
+# 
+# COMPUTE Fat10_instab = -999.
+# IF Fat.bin_10=2 Fat10_instab=1.
+# IF Fat.bin_10=1 Fat10_instab=0.
+# EXECUTE.
+# 
+# COMPUTE coll_instab=-999.
+# IF Collagen.bin=2 coll_instab=0.
+# IF Collagen.bin=1 coll_instab=1.
+# EXECUTE.
+# 
+# 
+# COMPUTE SMC_instab=-999.
+# IF SMC.bin=2 SMC_instab=0.
+# IF SMC.bin=1 SMC_instab=1.
+# EXECUTE.
+# 
+# COMPUTE IPH_instab=-999.
+# IF IPH.bin=0 IPH_instab=0.
+# IF IPH.bin=1 IPH_instab=1.
+# EXECUTE.
+# 
+# COMPUTE Instability=Macro_instab + Fat10_instab +  coll_instab + SMC_instab + IPH_instab.
+# EXECUTE.
+
+# Fix plaquephenotypes
+attach(AEDB)
+# mac instability
+AEDB[,"MAC_Instability"] <- NA
+AEDB$MAC_Instability[Macrophages.bin == -999] <- NA
+AEDB$MAC_Instability[Macrophages.bin == "no/minor"] <- 0
+AEDB$MAC_Instability[Macrophages.bin == "moderate/heavy"] <- 1
+
+# fat instability
+AEDB[,"FAT10_Instability"] <- NA
+AEDB$FAT10_Instability[Fat.bin_10 == -999] <- NA
+AEDB$FAT10_Instability[Fat.bin_10 == " <10%"] <- 0
+AEDB$FAT10_Instability[Fat.bin_10 == " >10%"] <- 1
+
+# col instability 
+AEDB[,"COL_Instability"] <- NA
+AEDB$COL_Instability[Collagen.bin == -999] <- NA
+AEDB$COL_Instability[Collagen.bin == "no/minor"] <- 1
+AEDB$COL_Instability[Collagen.bin == "moderate/heavy"] <- 0
+
+# smc instability
+AEDB[,"SMC_Instability"] <- NA
+AEDB$SMC_Instability[SMC.bin == -999] <- NA
+AEDB$SMC_Instability[SMC.bin == "no/minor"] <- 1
+AEDB$SMC_Instability[SMC.bin == "moderate/heavy"] <- 0
+
+# iph instability
+AEDB[,"IPH_Instability"] <- NA
+AEDB$IPH_Instability[IPH.bin == -999] <- NA
+AEDB$IPH_Instability[IPH.bin == "no"] <- 0
+AEDB$IPH_Instability[IPH.bin == "yes"] <- 1
+
+detach(AEDB)
+
+table(AEDB$MAC_Instability, useNA = "ifany")

@@ -4240,7 +4295,7 @@ 

Plaque phenotypes

1602 1215 974
-
table(AEDB$FAT10_Instability, useNA = "ifany")
+
table(AEDB$FAT10_Instability, useNA = "ifany")

@@ -4248,7 +4303,7 @@ 

Plaque phenotypes

1226 1628 937
-
table(AEDB$COL_Instability, useNA = "ifany")
+
table(AEDB$COL_Instability, useNA = "ifany")

@@ -4256,7 +4311,7 @@ 

Plaque phenotypes

2297 540 954
-
table(AEDB$SMC_Instability, useNA = "ifany")
+
table(AEDB$SMC_Instability, useNA = "ifany")

@@ -4264,7 +4319,7 @@ 

Plaque phenotypes

1962 870 959
-
table(AEDB$IPH_Instability, useNA = "ifany")
+
table(AEDB$IPH_Instability, useNA = "ifany")

@@ -4272,11 +4327,11 @@ 

Plaque phenotypes

1223 1628 940
-
# creating vulnerability index
-AEDB <- AEDB %>% mutate(Plaque_Vulnerability_Index = factor(rowSums(.[grep("_Instability", names(.))], na.rm = TRUE)),
-                                )
-
-table(AEDB$Plaque_Vulnerability_Index, useNA = "ifany")
+
# creating vulnerability index
+AEDB <- AEDB %>% mutate(Plaque_Vulnerability_Index = factor(rowSums(.[grep("_Instability", names(.))], na.rm = TRUE)),
+                                )
+
+table(AEDB$Plaque_Vulnerability_Index, useNA = "ifany")

@@ -4284,7 +4339,7 @@ 

Plaque phenotypes

1324 655 728 676 298 110
-
# str(AEDB$Plaque_Vulnerability_Index)
+
# str(AEDB$Plaque_Vulnerability_Index)
@@ -4319,86 +4374,86 @@

Prepare baseline summary

-
cat("====================================================================================================\n")
+
cat("====================================================================================================\n")
====================================================================================================
-
cat("SELECTION THE SHIZZLE\n")
+
cat("SELECTION THE SHIZZLE\n")
SELECTION THE SHIZZLE
-
### Artery levels
-# AEdata$Artery_summary: 
-#           value                                                                                   label
-# NOT USE - 0 No artery known (yet), no surgery (patient ill, died, exited study), re-numbered to AAA
-# USE - 1                                                                  carotid (left & right)
-# USE - 2                                               femoral/iliac (left, right or both sides)
-# NOT USE - 3                                               other carotid arteries (common, external)
-# NOT USE - 4                                   carotid bypass and injury (left, right or both sides)
-# NOT USE - 5                                                         aneurysmata (carotid & femoral)
-# NOT USE - 6                                                                                   aorta
-# NOT USE - 7                                            other arteries (renal, popliteal, vertebral)
-# NOT USE - 8                        femoral bypass, angioseal and injury (left, right or both sides)
-
-### AEdata$informedconsent
-#           value                                                                                           label
-# NOT USE - -999                                                                                         missing
-# NOT USE - 0                                                                                        no, died
-# USE - 1                                                                                             yes
-# USE - 2                                                             yes, health treatment when possible
-# USE - 3                                                                        yes, no health treatment
-# USE - 4                                                yes, no health treatment, no commercial business
-# NOT USE - 5                                                          yes, no tissue, no commerical business
-# NOT USE - 6                      yes, no tissue, no questionnaires, no medical info, no commercial business
-# USE - 7                             yes, no questionnaires, no health treatment, no commercial business
-# USE - 8                                          yes, no questionnaires, health treatment when possible
-# NOT USE - 9                  yes, no tissue, no questionnaires, no health treatment, no commerical business
-# USE - 10                               yes, no health treatment, no medical info, no commercial business
-# NOT USE - 11 yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business
-# USE - 12                                                     yes, no questionnaires, no health treatment
-# NOT USE - 13                                                             yes, no tissue, no health treatment
-# NOT USE - 14                                                               yes, no tissue, no questionnaires
-# NOT USE - 15                                                  yes, no tissue, health treatment when possible
-# NOT USE - 16                                                                                  yes, no tissue
-# USE - 17                                                                     yes, no commerical business
-# USE - 18                                     yes, health treatment when possible, no commercial business
-# USE - 19                                                    yes, no medical info, no commercial business
-# USE - 20                                                                          yes, no questionnaires
-# NOT USE - 21                         yes, no tissue, no questionnaires, no health treatment, no medical info
-# NOT USE - 22                  yes, no tissue, no questionnaires, no health treatment, no commercial business
-# USE - 23                                                                            yes, no medical info
-# USE - 24                                                  yes, no questionnaires, no commercial business
-# USE - 25                                    yes, no questionnaires, no health treatment, no medical info
-# USE - 26                  yes, no questionnaires, health treatment when possible, no commercial business
-# USE - 27                                                      yes,  no health treatment, no medical info
-# NOT USE - 28                                                                             no, doesn't want to
-# NOT USE - 29                                                                              no, unable to sign
-# NOT USE - 30                                                                                 no, no reaction
-# NOT USE - 31                                                                                        no, lost
-# NOT USE - 32                                                                                     no, too old
-# NOT USE - 34                                            yes, no medical info, health treatment when possible
-# NOT USE - 35                                             no (never asked for IC because there was no tissue)
-# USE - 36                    yes, no medical info, no commercial business, health treatment when possible
-# NOT USE - 37                                                                                    no, endpoint
-# USE - 38                                                         wil niets invullen, wel alles gebruiken
-# USE - 39                                           second informed concents: yes, no commercial business
-# NOT USE - 40                                                                              nooit geincludeerd
-
-cat("- sanity checking PRIOR to selection")
+
### Artery levels
+# AEdata$Artery_summary: 
+#           value                                                                                   label
+# NOT USE - 0 No artery known (yet), no surgery (patient ill, died, exited study), re-numbered to AAA
+# USE - 1                                                                  carotid (left & right)
+# USE - 2                                               femoral/iliac (left, right or both sides)
+# NOT USE - 3                                               other carotid arteries (common, external)
+# NOT USE - 4                                   carotid bypass and injury (left, right or both sides)
+# NOT USE - 5                                                         aneurysmata (carotid & femoral)
+# NOT USE - 6                                                                                   aorta
+# NOT USE - 7                                            other arteries (renal, popliteal, vertebral)
+# NOT USE - 8                        femoral bypass, angioseal and injury (left, right or both sides)
+
+### AEdata$informedconsent
+#           value                                                                                           label
+# NOT USE - -999                                                                                         missing
+# NOT USE - 0                                                                                        no, died
+# USE - 1                                                                                             yes
+# USE - 2                                                             yes, health treatment when possible
+# USE - 3                                                                        yes, no health treatment
+# USE - 4                                                yes, no health treatment, no commercial business
+# NOT USE - 5                                                          yes, no tissue, no commerical business
+# NOT USE - 6                      yes, no tissue, no questionnaires, no medical info, no commercial business
+# USE - 7                             yes, no questionnaires, no health treatment, no commercial business
+# USE - 8                                          yes, no questionnaires, health treatment when possible
+# NOT USE - 9                  yes, no tissue, no questionnaires, no health treatment, no commerical business
+# USE - 10                               yes, no health treatment, no medical info, no commercial business
+# NOT USE - 11 yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business
+# USE - 12                                                     yes, no questionnaires, no health treatment
+# NOT USE - 13                                                             yes, no tissue, no health treatment
+# NOT USE - 14                                                               yes, no tissue, no questionnaires
+# NOT USE - 15                                                  yes, no tissue, health treatment when possible
+# NOT USE - 16                                                                                  yes, no tissue
+# USE - 17                                                                     yes, no commerical business
+# USE - 18                                     yes, health treatment when possible, no commercial business
+# USE - 19                                                    yes, no medical info, no commercial business
+# USE - 20                                                                          yes, no questionnaires
+# NOT USE - 21                         yes, no tissue, no questionnaires, no health treatment, no medical info
+# NOT USE - 22                  yes, no tissue, no questionnaires, no health treatment, no commercial business
+# USE - 23                                                                            yes, no medical info
+# USE - 24                                                  yes, no questionnaires, no commercial business
+# USE - 25                                    yes, no questionnaires, no health treatment, no medical info
+# USE - 26                  yes, no questionnaires, health treatment when possible, no commercial business
+# USE - 27                                                      yes,  no health treatment, no medical info
+# NOT USE - 28                                                                             no, doesn't want to
+# NOT USE - 29                                                                              no, unable to sign
+# NOT USE - 30                                                                                 no, no reaction
+# NOT USE - 31                                                                                        no, lost
+# NOT USE - 32                                                                                     no, too old
+# NOT USE - 34                                            yes, no medical info, health treatment when possible
+# NOT USE - 35                                             no (never asked for IC because there was no tissue)
+# USE - 36                    yes, no medical info, no commercial business, health treatment when possible
+# NOT USE - 37                                                                                    no, endpoint
+# USE - 38                                                         wil niets invullen, wel alles gebruiken
+# USE - 39                                           second informed concents: yes, no commercial business
+# NOT USE - 40                                                                              nooit geincludeerd
+
+cat("- sanity checking PRIOR to selection")
- sanity checking PRIOR to selection
-
library(data.table)
-require(labelled)
-ae.gender <- to_factor(AEDB$Gender)
-ae.hospital <- to_factor(AEDB$Hospital)
-table(ae.gender, ae.hospital, dnn = c("Sex", "Hospital"))
+
library(data.table)
+require(labelled)
+ae.gender <- to_factor(AEDB$Gender)
+ae.hospital <- to_factor(AEDB$Hospital)
+table(ae.gender, ae.hospital, dnn = c("Sex", "Hospital"))
        Hospital
@@ -4407,8 +4462,8 @@ 

Prepare baseline summary

male 1211 1420
-
ae.artery <- to_factor(AEDB$Artery_summary)
-table(ae.artery, ae.gender, dnn = c("Sex", "Artery"))
+
ae.artery <- to_factor(AEDB$Artery_summary)
+table(ae.artery, ae.gender, dnn = c("Sex", "Artery"))
                                                                                         Artery
@@ -4424,109 +4479,109 @@ 

Prepare baseline summary

femoral bypass, angioseal and injury (left, right or both sides) 4 2
-
rm(ae.gender, ae.hospital, ae.artery)
-
-# I change numeric and factors manually because, well, I wouldn't know how to fix it otherwise
-# to have this 'tibble' work with 'tableone'... :-)
-
-AEDB$Age <- as.numeric(AEDB$Age)
-AEDB$diastoli <- as.numeric(AEDB$diastoli)
-AEDB$systolic <- as.numeric(AEDB$systolic)
-
-AEDB$TC_finalCU <- as.numeric(AEDB$TC_finalCU)
-AEDB$LDL_finalCU <- as.numeric(AEDB$LDL_finalCU)
-AEDB$HDL_finalCU <- as.numeric(AEDB$HDL_finalCU)
-AEDB$TG_finalCU <- as.numeric(AEDB$TG_finalCU)
-
-AEDB$TC_final <- as.numeric(AEDB$TC_final)
-AEDB$LDL_final <- as.numeric(AEDB$LDL_final)
-AEDB$HDL_final <- as.numeric(AEDB$HDL_final)
-AEDB$TG_final <- as.numeric(AEDB$TG_final)
-
-AEDB$Age <- as.numeric(AEDB$Age)
-AEDB$GFR_MDRD <- as.numeric(AEDB$GFR_MDRD)
-AEDB$BMI <- as.numeric(AEDB$BMI)
-AEDB$eCigarettes <- as.numeric(AEDB$eCigarettes)
-AEDB$ePackYearsSmoking <- as.numeric(AEDB$ePackYearsSmoking)
-AEDB$EP_composite_time <- as.numeric(AEDB$EP_composite_time)
-AEDB$EP_major_time <- as.numeric(AEDB$EP_major_time)
-
-require(labelled)
-AEDB$Artery_summary <- to_factor(AEDB$Artery_summary)
-AEDB$ORyear <- to_factor(AEDB$ORyear)
-AEDB$Gender <- to_factor(AEDB$Gender)
-AEDB$Hospital <- to_factor(AEDB$Hospital)
-AEDB$KDOQI <- to_factor(AEDB$KDOQI)
-AEDB$BMI_WHO <- to_factor(AEDB$BMI_WHO)
-AEDB$DiabetesStatus <- to_factor(AEDB$DiabetesStatus)
-AEDB$SmokerStatus <- to_factor(AEDB$SmokerStatus)
-AEDB$AlcoholUse <- to_factor(AEDB$AlcoholUse)
-
-AEDB$Hypertension.selfreport <- to_factor(AEDB$Hypertension1)
-AEDB$Hypertension.selfreportdrug <- to_factor(AEDB$Hypertension2)
-AEDB$Hypertension.composite <- to_factor(AEDB$Hypertension.composite)
-AEDB$Hypertension.drugs <- to_factor(AEDB$Hypertension.drugs)
-
-AEDB$Med.anticoagulants <- to_factor(AEDB$Med.anticoagulants)
-AEDB$Med.all.antiplatelet <- to_factor(AEDB$Med.all.antiplatelet)
-AEDB$Med.Statin.LLD <- to_factor(AEDB$Med.Statin.LLD)
-
-AEDB$Stroke_Dx <- to_factor(AEDB$Stroke_Dx)
-AEDB$CAD_history <- to_factor(AEDB$CAD_history)
-AEDB$PAOD <- to_factor(AEDB$PAOD)
-AEDB$Peripheral.interv <- to_factor(AEDB$Peripheral.interv)
-
-AEDB$sympt <- to_factor(AEDB$sympt)
-AEDB$Symptoms.3g <- to_factor(AEDB$Symptoms.3g)
-AEDB$Symptoms.4g <- to_factor(AEDB$Symptoms.4g)
-AEDB$Symptoms.5G <- to_factor(AEDB$Symptoms.5G)
-AEDB$AsymptSympt <- to_factor(AEDB$AsymptSympt)
-AEDB$AsymptSympt2G <- to_factor(AEDB$AsymptSympt2G)
-AEDB$Symptoms.Update2G <- to_factor(AEDB$Symptoms.Update2G)
-AEDB$Symptoms.Update3G <- to_factor(AEDB$Symptoms.Update3G)
-
-AEDB$restenos <- to_factor(AEDB$restenos)
-AEDB$stenose <- to_factor(AEDB$stenose)
-AEDB$EP_composite <- to_factor(AEDB$EP_composite)
-AEDB$EP_major <- to_factor(AEDB$EP_major)
-AEDB$Macrophages.bin <- to_factor(AEDB$Macrophages.bin)
-AEDB$SMC.bin <- to_factor(AEDB$SMC.bin)
-AEDB$IPH.bin <- to_factor(AEDB$IPH.bin)
-AEDB$Calc.bin <- to_factor(AEDB$Calc.bin)
-AEDB$Collagen.bin <- to_factor(AEDB$Collagen.bin)
-AEDB$Fat.bin_10 <- to_factor(AEDB$Fat.bin_10)
-AEDB$Fat.bin_40 <- to_factor(AEDB$Fat.bin_40)
-AEDB$OverallPlaquePhenotype <- to_factor(AEDB$OverallPlaquePhenotype)
-AEDB$Plaque_Vulnerability_Index <- to_factor(AEDB$Plaque_Vulnerability_Index)
-
-AEDB$Artery_summary <- to_factor(AEDB$Artery_summary)
-
-AEDB$informedconsent <- to_factor(AEDB$informedconsent)
-
-AEDB.full <- subset(AEDB,
-                    informedconsent != "missing" & # we are really strict in selecting based on 'informed consent'!
-                       informedconsent != "no, died" & 
-                       informedconsent != "yes, no tissue, no commerical business" &
-                       informedconsent != "yes, no tissue, no questionnaires, no medical info, no commercial business" &
-                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commerical business" &
-                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business" &
-                       informedconsent != "yes, no tissue, no health treatment" &
-                       informedconsent != "yes, no tissue, no questionnaires" &
-                       informedconsent != "yes, no tissue, health treatment when possible" &
-                       informedconsent != "yes, no tissue" &
-                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info" &
-                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commercial business" &
-                       informedconsent != "no, doesn't want to" &
-                       informedconsent != "no, unable to sign" & 
-                       informedconsent != "no, no reaction" & 
-                       informedconsent != "no, lost" & 
-                       informedconsent != "no, too old" & 
-                       informedconsent != "yes, no medical info, health treatment when possible" &
-                       informedconsent != "no (never asked for IC because there was no tissue)" &
-                       informedconsent != "no, endpoint" & 
-                       informedconsent != "nooit geincludeerd")
-# AEDB.CEA[1:10, 1:10]
-dim(AEDB.full)
+
rm(ae.gender, ae.hospital, ae.artery)
+
+# I change numeric and factors manually because, well, I wouldn't know how to fix it otherwise
+# to have this 'tibble' work with 'tableone'... :-)
+
+AEDB$Age <- as.numeric(AEDB$Age)
+AEDB$diastoli <- as.numeric(AEDB$diastoli)
+AEDB$systolic <- as.numeric(AEDB$systolic)
+
+AEDB$TC_finalCU <- as.numeric(AEDB$TC_finalCU)
+AEDB$LDL_finalCU <- as.numeric(AEDB$LDL_finalCU)
+AEDB$HDL_finalCU <- as.numeric(AEDB$HDL_finalCU)
+AEDB$TG_finalCU <- as.numeric(AEDB$TG_finalCU)
+
+AEDB$TC_final <- as.numeric(AEDB$TC_final)
+AEDB$LDL_final <- as.numeric(AEDB$LDL_final)
+AEDB$HDL_final <- as.numeric(AEDB$HDL_final)
+AEDB$TG_final <- as.numeric(AEDB$TG_final)
+
+AEDB$Age <- as.numeric(AEDB$Age)
+AEDB$GFR_MDRD <- as.numeric(AEDB$GFR_MDRD)
+AEDB$BMI <- as.numeric(AEDB$BMI)
+AEDB$eCigarettes <- as.numeric(AEDB$eCigarettes)
+AEDB$ePackYearsSmoking <- as.numeric(AEDB$ePackYearsSmoking)
+AEDB$EP_composite_time <- as.numeric(AEDB$EP_composite_time)
+AEDB$EP_major_time <- as.numeric(AEDB$EP_major_time)
+
+require(labelled)
+AEDB$Artery_summary <- to_factor(AEDB$Artery_summary)
+AEDB$ORyear <- to_factor(AEDB$ORyear)
+AEDB$Gender <- to_factor(AEDB$Gender)
+AEDB$Hospital <- to_factor(AEDB$Hospital)
+AEDB$KDOQI <- to_factor(AEDB$KDOQI)
+AEDB$BMI_WHO <- to_factor(AEDB$BMI_WHO)
+AEDB$DiabetesStatus <- to_factor(AEDB$DiabetesStatus)
+AEDB$SmokerStatus <- to_factor(AEDB$SmokerStatus)
+AEDB$AlcoholUse <- to_factor(AEDB$AlcoholUse)
+
+AEDB$Hypertension.selfreport <- to_factor(AEDB$Hypertension1)
+AEDB$Hypertension.selfreportdrug <- to_factor(AEDB$Hypertension2)
+AEDB$Hypertension.composite <- to_factor(AEDB$Hypertension.composite)
+AEDB$Hypertension.drugs <- to_factor(AEDB$Hypertension.drugs)
+
+AEDB$Med.anticoagulants <- to_factor(AEDB$Med.anticoagulants)
+AEDB$Med.all.antiplatelet <- to_factor(AEDB$Med.all.antiplatelet)
+AEDB$Med.Statin.LLD <- to_factor(AEDB$Med.Statin.LLD)
+
+AEDB$Stroke_Dx <- to_factor(AEDB$Stroke_Dx)
+AEDB$CAD_history <- to_factor(AEDB$CAD_history)
+AEDB$PAOD <- to_factor(AEDB$PAOD)
+AEDB$Peripheral.interv <- to_factor(AEDB$Peripheral.interv)
+
+AEDB$sympt <- to_factor(AEDB$sympt)
+AEDB$Symptoms.3g <- to_factor(AEDB$Symptoms.3g)
+AEDB$Symptoms.4g <- to_factor(AEDB$Symptoms.4g)
+AEDB$Symptoms.5G <- to_factor(AEDB$Symptoms.5G)
+AEDB$AsymptSympt <- to_factor(AEDB$AsymptSympt)
+AEDB$AsymptSympt2G <- to_factor(AEDB$AsymptSympt2G)
+AEDB$Symptoms.Update2G <- to_factor(AEDB$Symptoms.Update2G)
+AEDB$Symptoms.Update3G <- to_factor(AEDB$Symptoms.Update3G)
+
+AEDB$restenos <- to_factor(AEDB$restenos)
+AEDB$stenose <- to_factor(AEDB$stenose)
+AEDB$EP_composite <- to_factor(AEDB$EP_composite)
+AEDB$EP_major <- to_factor(AEDB$EP_major)
+AEDB$Macrophages.bin <- to_factor(AEDB$Macrophages.bin)
+AEDB$SMC.bin <- to_factor(AEDB$SMC.bin)
+AEDB$IPH.bin <- to_factor(AEDB$IPH.bin)
+AEDB$Calc.bin <- to_factor(AEDB$Calc.bin)
+AEDB$Collagen.bin <- to_factor(AEDB$Collagen.bin)
+AEDB$Fat.bin_10 <- to_factor(AEDB$Fat.bin_10)
+AEDB$Fat.bin_40 <- to_factor(AEDB$Fat.bin_40)
+AEDB$OverallPlaquePhenotype <- to_factor(AEDB$OverallPlaquePhenotype)
+AEDB$Plaque_Vulnerability_Index <- to_factor(AEDB$Plaque_Vulnerability_Index)
+
+AEDB$Artery_summary <- to_factor(AEDB$Artery_summary)
+
+AEDB$informedconsent <- to_factor(AEDB$informedconsent)
+
+AEDB.full <- subset(AEDB,
+                    informedconsent != "missing" & # we are really strict in selecting based on 'informed consent'!
+                       informedconsent != "no, died" & 
+                       informedconsent != "yes, no tissue, no commerical business" &
+                       informedconsent != "yes, no tissue, no questionnaires, no medical info, no commercial business" &
+                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commerical business" &
+                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business" &
+                       informedconsent != "yes, no tissue, no health treatment" &
+                       informedconsent != "yes, no tissue, no questionnaires" &
+                       informedconsent != "yes, no tissue, health treatment when possible" &
+                       informedconsent != "yes, no tissue" &
+                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info" &
+                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commercial business" &
+                       informedconsent != "no, doesn't want to" &
+                       informedconsent != "no, unable to sign" & 
+                       informedconsent != "no, no reaction" & 
+                       informedconsent != "no, lost" & 
+                       informedconsent != "no, too old" & 
+                       informedconsent != "yes, no medical info, health treatment when possible" &
+                       informedconsent != "no (never asked for IC because there was no tissue)" &
+                       informedconsent != "no, endpoint" & 
+                       informedconsent != "nooit geincludeerd")
+# AEDB.CEA[1:10, 1:10]
+dim(AEDB.full)
[1] 3458 1114
@@ -4536,52 +4591,53 @@

Prepare baseline summary

-
cat("===========================================================================================\n")
+
cat("===========================================================================================\n")
===========================================================================================
-
cat("CREATE BASELINE TABLE\n")
+
cat("CREATE BASELINE TABLE\n")
CREATE BASELINE TABLE
- -
# Baseline table variables
-basetable_vars = c("Hospital", 
-                   "Artery_summary",
-                   "Age", "Gender")
-                   # "ORyear", 
-                   # "TC_finalCU", "LDL_finalCU", "HDL_finalCU", "TG_finalCU", 
-                   # "TC_final", "LDL_final", "HDL_final", "TG_final", 
-                   # "hsCRP_plasma",
-                   # "systolic", "diastoli", "GFR_MDRD", "BMI", 
-                   # "KDOQI", "BMI_WHO",
-                   # "SmokerStatus", "AlcoholUse",
-                   # "DiabetesStatus", 
-                   # "Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs", 
-                   # "Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD", 
-                   # "Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
-                   # "Symptoms.Update2G", 
-                   # "Symptoms.Update3G",
-                   # "restenos", "stenose",
-                   # "CAD_history", "PAOD", "Peripheral.interv", 
-                   # "EP_composite", "EP_composite_time", "EP_major", "EP_major_time",
-                   # "MAC_rankNorm", "SMC_rankNorm", "Macrophages.bin", "SMC.bin",
-                   # "Neutrophils_rankNorm", "MastCells_rankNorm",
-                   # "IPH.bin", "VesselDensity_rankNorm",
-                   # "Calc.bin", "Collagen.bin", 
-                   # "Fat.bin_10", "Fat.bin_40", 
-                   # "OverallPlaquePhenotype", "Plaque_Vulnerability_Index")
-
-basetable_bin = c("Hospital", 
-                  "Artery_summary",
-                  "Gender")
-# basetable_bin
-
-basetable_con = basetable_vars[!basetable_vars %in% basetable_bin]
-# basetable_con
+ +
# Baseline table variables
+basetable_vars = c("Hospital", 
+                   "Artery_summary",
+                   "Age", "Gender",
+                   # "ORyear", 
+                   # "TC_finalCU", "LDL_finalCU", "HDL_finalCU", "TG_finalCU", 
+                   # "TC_final", "LDL_final", "HDL_final", "TG_final", 
+                   # "hsCRP_plasma",
+                   # "systolic", "diastoli", "GFR_MDRD", "BMI", 
+                   # "KDOQI", "BMI_WHO",
+                   # "SmokerStatus", "AlcoholUse",
+                   # "DiabetesStatus", 
+                   # "Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs", 
+                   # "Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD", 
+                   # "Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
+                   # "Symptoms.Update2G", 
+                   # "Symptoms.Update3G",
+                   # "restenos", "stenose",
+                   # "CAD_history", "PAOD", "Peripheral.interv", 
+                   # "EP_composite", "EP_composite_time", "EP_major", "EP_major_time",
+                   # "MAC_rankNorm", "SMC_rankNorm", "Macrophages.bin", "SMC.bin",
+                   # "Neutrophils_rankNorm", "MastCells_rankNorm",
+                   # "IPH.bin", "VesselDensity_rankNorm",
+                   # "Calc.bin", "Collagen.bin", 
+                   # "Fat.bin_10", "Fat.bin_40", 
+                   "OverallPlaquePhenotype" )
+                   # , "Plaque_Vulnerability_Index")
+
+basetable_bin = c("Hospital", 
+                  "Artery_summary",
+                  "Gender")
+# basetable_bin
+
+basetable_con = basetable_vars[!basetable_vars %in% basetable_bin]
+# basetable_con
@@ -4593,34 +4649,38 @@

Athero-Express Biobank Study Baseline Characteristics

-
# Create baseline tables
-# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
-AEDB.tableOne = print(CreateTableOne(vars = basetable_vars, 
-                                         factorVars = basetable_bin,
-                                         # strata = "Symptoms.4g",
-                                         data = AEDB.full, includeNA = TRUE), 
-                          nonnormal = c(), missing = TRUE,
-                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
-                          format = "pf", 
-                          contDigits = 3)[,1:3]
+
# Create baseline tables
+# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
+AEDB.tableOne = print(CreateTableOne(vars = basetable_vars, 
+                                         factorVars = basetable_bin,
+                                         # strata = "Symptoms.4g",
+                                         data = AEDB.full, includeNA = TRUE), 
+                          nonnormal = c(), missing = TRUE,
+                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
+                          format = "pf", 
+                          contDigits = 3)[,1:3]
- -
                         
-                          level                                                            Overall        Missing
-  n                                                                                          3458                
-  Hospital % (freq)       St. Antonius, Nieuwegein                                           45.3 (1567)  0.0    
-                          UMC Utrecht                                                        54.7 (1891)         
-  Artery_summary % (freq) carotid (left & right)                                             69.0 (2387)  0.0    
-                          femoral/iliac (left, right or both sides)                          28.8 ( 995)         
-                          other carotid arteries (common, external)                           1.3 (  45)         
-                          carotid bypass and injury (left, right or both sides)               0.2 (   7)         
-                          aneurysmata (carotid & femoral)                                     0.0 (   1)         
-                          aorta                                                               0.2 (   6)         
-                          other arteries (renal, popliteal, vertebral)                        0.3 (  12)         
-                          femoral bypass, angioseal and injury (left, right or both sides)    0.1 (   5)         
-  Age (mean (SD))                                                                          68.733 (9.214) 0.0    
-  Gender % (freq)         female                                                             29.7 (1026)  0.0    
-                          male                                                               70.3 (2432)         
+ +
                                 
+                                  level                                                            Overall        Missing
+  n                                                                                                  3458                
+  Hospital % (freq)               St. Antonius, Nieuwegein                                           45.3 (1567)   0.0   
+                                  UMC Utrecht                                                        54.7 (1891)         
+  Artery_summary % (freq)         carotid (left & right)                                             69.0 (2387)   0.0   
+                                  femoral/iliac (left, right or both sides)                          28.8 ( 995)         
+                                  other carotid arteries (common, external)                           1.3 (  45)         
+                                  carotid bypass and injury (left, right or both sides)               0.2 (   7)         
+                                  aneurysmata (carotid & femoral)                                     0.0 (   1)         
+                                  aorta                                                               0.2 (   6)         
+                                  other arteries (renal, popliteal, vertebral)                        0.3 (  12)         
+                                  femoral bypass, angioseal and injury (left, right or both sides)    0.1 (   5)         
+  Age (mean (SD))                                                                                  68.733 (9.214)  0.0   
+  Gender % (freq)                 female                                                             29.7 (1026)   0.0   
+                                  male                                                               70.3 (2432)         
+  OverallPlaquePhenotype % (freq) atheromatous                                                       14.7 ( 507)  25.9   
+                                  fibroatheromatous                                                  22.2 ( 769)         
+                                  fibrous                                                            37.2 (1285)         
+                                  <NA>                                                               25.9 ( 897)         
@@ -4631,94 +4691,98 @@

Match samples used

-
# Remove duplicate rows of the dataframe
-library(dplyr)
-temp <- EM_samples %>% select(., "Studynumber") %>%
-  distinct(.)
-
-AEDB_EM <- merge(temp,
-                 AEDB,
-                 by.x = "Studynumber",
-                 by.y = "STUDY_NUMBER", 
-                 sort = FALSE)
-
-AEDB_EM.full <- subset(AEDB_EM,
-                    informedconsent != "missing" & # we are really strict in selecting based on 'informed consent'!
-                       informedconsent != "no, died" & 
-                       informedconsent != "yes, no tissue, no commerical business" &
-                       informedconsent != "yes, no tissue, no questionnaires, no medical info, no commercial business" &
-                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commerical business" &
-                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business" &
-                       informedconsent != "yes, no tissue, no health treatment" &
-                       informedconsent != "yes, no tissue, no questionnaires" &
-                       informedconsent != "yes, no tissue, health treatment when possible" &
-                       informedconsent != "yes, no tissue" &
-                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info" &
-                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commercial business" &
-                       informedconsent != "no, doesn't want to" &
-                       informedconsent != "no, unable to sign" & 
-                       informedconsent != "no, no reaction" & 
-                       informedconsent != "no, lost" & 
-                       informedconsent != "no, too old" & 
-                       informedconsent != "yes, no medical info, health treatment when possible" &
-                       informedconsent != "no (never asked for IC because there was no tissue)" &
-                       informedconsent != "no, endpoint" & 
-                       informedconsent != "nooit geincludeerd")
-# AEDB_EM.full[1:10, 1:10]
-
-rm(temp)
+
# Remove duplicate rows of the dataframe
+library(dplyr)
+temp <- EM_samples %>% select(., "Studynumber") %>%
+  distinct(.)
+
+AEDB_EM <- merge(temp,
+                 AEDB,
+                 by.x = "Studynumber",
+                 by.y = "STUDY_NUMBER", 
+                 sort = FALSE)
+
+AEDB_EM.full <- subset(AEDB_EM,
+                    informedconsent != "missing" & # we are really strict in selecting based on 'informed consent'!
+                       informedconsent != "no, died" & 
+                       informedconsent != "yes, no tissue, no commerical business" &
+                       informedconsent != "yes, no tissue, no questionnaires, no medical info, no commercial business" &
+                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commerical business" &
+                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business" &
+                       informedconsent != "yes, no tissue, no health treatment" &
+                       informedconsent != "yes, no tissue, no questionnaires" &
+                       informedconsent != "yes, no tissue, health treatment when possible" &
+                       informedconsent != "yes, no tissue" &
+                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info" &
+                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commercial business" &
+                       informedconsent != "no, doesn't want to" &
+                       informedconsent != "no, unable to sign" & 
+                       informedconsent != "no, no reaction" & 
+                       informedconsent != "no, lost" & 
+                       informedconsent != "no, too old" & 
+                       informedconsent != "yes, no medical info, health treatment when possible" &
+                       informedconsent != "no (never asked for IC because there was no tissue)" &
+                       informedconsent != "no, endpoint" & 
+                       informedconsent != "nooit geincludeerd")
+# AEDB_EM.full[1:10, 1:10]
+
+rm(temp)
-
# Create baseline tables
-# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
-AEDB_EM.full.tableOne = print(CreateTableOne(vars = basetable_vars, 
-                                         factorVars = basetable_bin,
-                                         # strata = "Symptoms.4g",
-                                         data = AEDB_EM.full, includeNA = TRUE), 
-                          nonnormal = c(), missing = TRUE,
-                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
-                          format = "pf", 
-                          contDigits = 3)[,1:3]
+
# Create baseline tables
+# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
+AEDB_EM.full.tableOne = print(CreateTableOne(vars = basetable_vars, 
+                                         factorVars = basetable_bin,
+                                         # strata = "Symptoms.4g",
+                                         data = AEDB_EM.full, includeNA = TRUE), 
+                          nonnormal = c(), missing = TRUE,
+                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
+                          format = "pf", 
+                          contDigits = 3)[,1:3]
- -
                         
-                          level                                        Overall        Missing
-  n                                                                        56                
-  Hospital % (freq)       St. Antonius, Nieuwegein                       21.4 (12)    0.0    
-                          UMC Utrecht                                    78.6 (44)           
-  Artery_summary % (freq) carotid (left & right)                         85.7 (48)    0.0    
-                          femoral/iliac (left, right or both sides)      12.5 ( 7)           
-                          other arteries (renal, popliteal, vertebral)    1.8 ( 1)           
-  Age (mean (SD))                                                      69.500 (9.260) 0.0    
-  Gender % (freq)         female                                         33.9 (19)    0.0    
-                          male                                           66.1 (37)           
+ +
                                 
+                                  level                                        Overall        Missing
+  n                                                                                56                
+  Hospital % (freq)               St. Antonius, Nieuwegein                       21.4 (12)     0.0   
+                                  UMC Utrecht                                    78.6 (44)           
+  Artery_summary % (freq)         carotid (left & right)                         85.7 (48)     0.0   
+                                  femoral/iliac (left, right or both sides)      12.5 ( 7)           
+                                  other arteries (renal, popliteal, vertebral)    1.8 ( 1)           
+  Age (mean (SD))                                                              69.500 (9.260)  0.0   
+  Gender % (freq)                 female                                         33.9 (19)     0.0   
+                                  male                                           66.1 (37)           
+  OverallPlaquePhenotype % (freq) atheromatous                                   21.4 (12)    17.9   
+                                  fibroatheromatous                              19.6 (11)           
+                                  fibrous                                        41.1 (23)           
+                                  <NA>                                           17.9 (10)           
-

-basetable_vars = c("Hospital", 
-                   "Artery_summary")
-
-basetable_bin = c("Hospital", 
-                  "Artery_summary")
-
-# Create baseline tables
-# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
-AEDB_EM.tableOne = print(CreateTableOne(vars = basetable_vars, 
-                                         factorVars = basetable_bin,
-                                         # strata = "Symptoms.4g",
-                                         data = AEDB_EM, includeNA = TRUE), 
-                          nonnormal = c(), missing = TRUE,
-                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
-                          format = "pf", 
-                          contDigits = 3)[,1:3]
+

+basetable_vars = c("Hospital", 
+                   "Artery_summary")
+
+basetable_bin = c("Hospital", 
+                  "Artery_summary")
+
+# Create baseline tables
+# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
+AEDB_EM.tableOne = print(CreateTableOne(vars = basetable_vars, 
+                                         factorVars = basetable_bin,
+                                         # strata = "Symptoms.4g",
+                                         data = AEDB_EM, includeNA = TRUE), 
+                          nonnormal = c(), missing = TRUE,
+                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
+                          format = "pf", 
+                          contDigits = 3)[,1:3]
                         
@@ -4739,28 +4803,32 @@ 

Baseline writing

Study.

- -
# Write basetable
-
-require(openxlsx)
-
-write.xlsx(as.data.frame(AEDB.tableOne), 
-           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.xlsx"), 
-           rowNames = TRUE, 
-           colNames = TRUE, 
-           sheetName = "AE_Base", overwrite = TRUE)
-
-write.xlsx(as.data.frame(AEDB_EM.tableOne), 
-           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.EM.59.BaselineTable.xlsx"), 
-           rowNames = TRUE, 
-           colNames = TRUE, 
-           sheetName = "AE_Base_EM_59", overwrite = TRUE)
-
-write.xlsx(as.data.frame(AEDB_EM.full.tableOne), 
-           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.EM.56.BaselineTable.xlsx"), 
-           rowNames = TRUE, 
-           colNames = TRUE, 
-           sheetName = "AE_Base_EM_56", overwrite = TRUE)
+ +
# Write basetable
+
+require(openxlsx)
+ + +
Loading required package: openxlsx
+ + +
write.xlsx(as.data.frame(AEDB.tableOne), 
+           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.xlsx"), 
+           rowNames = TRUE, 
+           colNames = TRUE, 
+           sheetName = "AE_Base", overwrite = TRUE)
+
+write.xlsx(as.data.frame(AEDB_EM.tableOne), 
+           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.EM.59.BaselineTable.xlsx"), 
+           rowNames = TRUE, 
+           colNames = TRUE, 
+           sheetName = "AE_Base_EM_59", overwrite = TRUE)
+
+write.xlsx(as.data.frame(AEDB_EM.full.tableOne), 
+           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.EM.56.BaselineTable.xlsx"), 
+           rowNames = TRUE, 
+           colNames = TRUE, 
+           sheetName = "AE_Base_EM_56", overwrite = TRUE)
@@ -4769,9 +4837,9 @@

Baseline writing

-
saveRDS(AEDB_EM.full, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.EM.FULL.RDS"))
-saveRDS(AEDB.full, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.FULL.RDS"))
-saveRDS(AEDB, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.raw.RDS"))
+
saveRDS(AEDB_EM.full, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.EM.FULL.RDS"))
+saveRDS(AEDB.full, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.FULL.RDS"))
+saveRDS(AEDB, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.raw.RDS"))
@@ -4780,8 +4848,8 @@

Baseline writing

Session information


-
Version:      v1.0.2
-Last update:  2022-07-12
+
Version:      v1.0.3
+Last update:  2022-08-23
 Written by:   Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl).
 Description:  Script to get some Athero-Express Biobank Study baseline characteristics.
 Minimum requirements: R version 3.4.3 (2017-06-30) -- 'Single Candle', Mac OS X El Capitan
@@ -4797,6 +4865,7 @@ 

Session information

_W_ **Changes log** +* v1.0.3 Updated baseline characteristics. * v1.0.2 Simplified the initial script. It now outputs the relevant R-objects (as .RDS). * v1.0.1 Update to main AEDB (there is an error in the Age-variable in the new version). * v1.0.0 Initial version. Add 'plaque vulnerability index', Fixed baseline table, added codes, and results. Major update to WORCS system.
@@ -4804,9 +4873,9 @@

Session information

-
sessionInfo()
+
sessionInfo()
- +
R version 4.2.1 (2022-06-23)
 Platform: x86_64-apple-darwin21.5.0 (64-bit)
 Running under: macOS Ventura 13.0
@@ -4821,32 +4890,32 @@ 

Session information

[1] grid tools stats graphics grDevices utils datasets methods base other attached packages: - [1] openxlsx_4.2.5 patchwork_1.1.0.9000 labelled_2.9.1 sjPlot_2.8.10 UpSetR_1.4.0 ggpubr_0.4.0 - [7] forestplot_2.0.1 checkmate_2.1.0 magrittr_2.0.3 pheatmap_1.0.12 devtools_2.4.3 usethis_2.1.6 -[13] BlandAltmanLeh_0.3.1 tableone_0.13.2 haven_2.5.0 eeptools_1.2.4 DT_0.23 knitr_1.39 -[19] forcats_0.5.1 stringr_1.4.0 purrr_0.3.4 tibble_3.1.7 ggplot2_3.3.6 tidyverse_1.3.1 -[25] data.table_1.14.2 naniar_0.6.1 tidyr_1.2.0 dplyr_1.0.9 optparse_1.7.1 readr_2.1.2 -[31] R.utils_2.11.0 R.oo_1.25.0 R.methodsS3_1.8.2 credentials_1.3.2 + [1] openxlsx_4.2.5 sjPlot_2.8.10 UpSetR_1.4.0 ggpubr_0.4.0 forestplot_2.0.1 checkmate_2.1.0 + [7] magrittr_2.0.3 pheatmap_1.0.12 devtools_2.4.3 usethis_2.1.6 BlandAltmanLeh_0.3.1 tableone_0.13.2 +[13] haven_2.5.0 eeptools_1.2.4 DT_0.23 knitr_1.39 forcats_0.5.1 stringr_1.4.0 +[19] purrr_0.3.4 tibble_3.1.7 ggplot2_3.3.6 tidyverse_1.3.1 data.table_1.14.2 naniar_0.6.1 +[25] tidyr_1.2.0 dplyr_1.0.9 optparse_1.7.1 readr_2.1.2 R.utils_2.11.0 R.oo_1.25.0 +[31] R.methodsS3_1.8.2 credentials_1.3.2 loaded via a namespace (and not attached): [1] readxl_1.4.0 backports_1.4.1 plyr_1.8.7 sp_1.5-0 splines_4.2.1 TH.data_1.1-1 digest_0.6.29 - [8] htmltools_0.5.2 fansi_1.0.3 memoise_2.0.1 remotes_2.4.2 tzdb_0.3.0 modelr_0.1.8 sandwich_3.0-2 + [8] htmltools_0.5.2 fansi_1.0.3 memoise_2.0.1 tzdb_0.3.0 remotes_2.4.2 modelr_0.1.8 sandwich_3.0-2 [15] askpass_1.1 prettyunits_1.1.1 colorspace_2.0-3 rvest_1.0.2 mitools_2.4 xfun_0.31 callr_3.7.0 [22] crayon_1.5.1 jsonlite_1.8.0 lme4_1.1-29 survival_3.3-1 zoo_1.8-10 glue_1.6.2 gtable_0.3.0 [29] emmeans_1.7.5 sjstats_0.18.1 sjmisc_2.8.9 car_3.1-0 pkgbuild_1.3.1 abind_1.4-5 scales_1.2.0 - [36] mvtnorm_1.1-3 DBI_1.1.3 rstatix_0.7.0 ggeffects_1.1.2 Rcpp_1.0.8.3 xtable_1.8-4 performance_0.9.1 + [36] mvtnorm_1.1-3 DBI_1.1.3 ggeffects_1.1.2 rstatix_0.7.0 Rcpp_1.0.8.3 performance_0.9.1 xtable_1.8-4 [43] proxy_0.4-27 foreign_0.8-82 survey_4.1-1 vcd_1.4-10 datawizard_0.4.1 htmlwidgets_1.5.4 httr_1.4.3 - [50] getopt_1.20.3 RColorBrewer_1.1-3 ellipsis_0.3.2 farver_2.1.0 pkgconfig_2.0.3 sass_0.4.1 dbplyr_2.2.0 - [57] utf8_1.2.2 labeling_0.4.2 tidyselect_1.1.2 rlang_1.0.2 effectsize_0.7.0 munsell_0.5.0 cellranger_1.1.0 - [64] cachem_1.0.6 cli_3.3.0 generics_0.1.2 sjlabelled_1.2.0 broom_0.8.0 evaluate_0.15 fastmap_1.1.0 - [71] arm_1.12-2 yaml_2.3.5 sys_3.4 processx_3.6.1 fs_1.5.2 zip_2.2.0 pander_0.6.5 - [78] visdat_0.5.3 nlme_3.1-157 xml2_1.3.3 brio_1.1.3 compiler_4.2.1 rstudioapi_0.13 curl_4.3.2 - [85] e1071_1.7-11 testthat_3.1.4 ggsignif_0.6.3 reprex_2.0.1 bslib_0.3.1 stringi_1.7.6 ps_1.7.1 - [92] parameters_0.18.1 desc_1.4.1 lattice_0.20-45 Matrix_1.4-1 nloptr_2.0.3 vctrs_0.4.1 pillar_1.7.0 - [99] lifecycle_1.0.1 jquerylib_0.1.4 lmtest_0.9-40 estimability_1.3 maptools_1.1-4 insight_0.17.1 R6_2.5.1 -[106] gridExtra_2.3 sessioninfo_1.2.2 codetools_0.2-18 boot_1.3-28 MASS_7.3-57 assertthat_0.2.1 pkgload_1.2.4 -[113] openssl_2.0.2 rprojroot_2.0.3 withr_2.5.0 multcomp_1.4-19 bayestestR_0.12.1 hms_1.1.1 class_7.3-20 -[120] coda_0.19-4 minqa_1.2.4 rmarkdown_2.14 carData_3.0-5 lubridate_1.8.0
+ [50] getopt_1.20.3 RColorBrewer_1.1-3 ellipsis_0.3.2 pkgconfig_2.0.3 dbplyr_2.2.0 utf8_1.2.2 tidyselect_1.1.2 + [57] rlang_1.0.2 effectsize_0.7.0 munsell_0.5.0 cellranger_1.1.0 cachem_1.0.6 cli_3.3.0 generics_0.1.2 + [64] sjlabelled_1.2.0 broom_0.8.0 evaluate_0.15 fastmap_1.1.0 arm_1.12-2 yaml_2.3.5 sys_3.4 + [71] processx_3.6.1 fs_1.5.2 zip_2.2.0 pander_0.6.5 visdat_0.5.3 nlme_3.1-157 xml2_1.3.3 + [78] brio_1.1.3 compiler_4.2.1 rstudioapi_0.13 curl_4.3.2 e1071_1.7-11 testthat_3.1.4 ggsignif_0.6.3 + [85] reprex_2.0.1 stringi_1.7.6 ps_1.7.1 parameters_0.18.1 desc_1.4.1 lattice_0.20-45 Matrix_1.4-1 + [92] nloptr_2.0.3 vctrs_0.4.1 pillar_1.7.0 lifecycle_1.0.1 lmtest_0.9-40 estimability_1.3 maptools_1.1-4 + [99] insight_0.17.1 R6_2.5.1 gridExtra_2.3 codetools_0.2-18 sessioninfo_1.2.2 boot_1.3-28 MASS_7.3-57 +[106] assertthat_0.2.1 pkgload_1.2.4 openssl_2.0.2 rprojroot_2.0.3 withr_2.5.0 multcomp_1.4-19 bayestestR_0.12.1 +[113] hms_1.1.1 labelled_2.9.1 class_7.3-20 coda_0.19-4 minqa_1.2.4 rmarkdown_2.14 carData_3.0-5 +[120] lubridate_1.8.0
@@ -4856,7 +4925,7 @@

Saving environment

-
save.image(paste0(PROJECT_loc, "/",Today,".",PROJECTNAME,".AEDB.EM.baseline.RData"))
+
save.image(paste0(PROJECT_loc, "/",Today,".",PROJECTNAME,".AEDB.EM.baseline.RData"))
@@ -4874,7 +4943,7 @@

Saving environment

-
---
title: "Baseline characteristics"
author: "[Sander W. van der Laan, PhD](https://swvanderlaan.github.io) | @swvanderlaan | s.w.vanderlaan@gmail.com"
date: "`r Sys.Date()`"
output:
  html_notebook:
    cache: yes
    code_folding: hide
    collapse: yes
    df_print: paged
    fig.align: center
    fig_caption: yes
    fig_height: 6
    fig_retina: 2
    fig_width: 7
    highlight: tango
    theme: lumen
    toc: yes
    toc_float:
      collapsed: no
      smooth_scroll: yes
mainfont: Arial
subtitle: Accompanying 'EntropyMasker'
editor_options:
  chunk_output_type: inline
# bibliography: references.bib
# knit: worcs::cite_all
---

# General Setup
We will clean the environment, setup the locations, define colors, and create a datestamp.

_Clean the environment._
```{r echo = FALSE}
rm(list = ls())
```

_Set locations and working directories..._
```{r LocalSystem, echo = FALSE}
source("scripts/local.system.R")

```

_... a package-installation function ..._
```{r Function: installations}
source("scripts/functions.R")

```


_... and load those packages._
```{r loading_packages, message=FALSE, warning=FALSE}
source("scripts/pack01.packages.R")


```

_We will create a datestamp and define the Utrecht Science Park Colour Scheme_.
```{r Setting: Colors}

Today = format(as.Date(as.POSIXlt(Sys.time())), "%Y%m%d")
Today.Report = format(as.Date(as.POSIXlt(Sys.time())), "%A, %B %d, %Y")

source("scripts/colors.R")

```

```{r global_options, include = FALSE}
# further define some knitr-options.
knitr::opts_chunk$set(fig.width = 12, fig.height = 8, fig.path = 'Figures/', 
                      wwarning = TRUE, # show warnings during codebook generation
  message = TRUE, # show messages during codebook generation
  error = TRUE, # do not interrupt codebook generation in case of errors,
                # usually better for debugging
  echo = TRUE,  # show R code
                      eval = TRUE)
ggplot2::theme_set(ggplot2::theme_minimal())
pander::panderOptions("table.split.table", Inf)
```

# This notebook 

In this notebook we create a baseline table of the samples used in **`EntropyMasker`**. 

# Athero-Express Biobank Study

The [*Athero-Express Biobank Study (AE)*](https://doi.org/10.1007/s10564-004-2304-6) contains plaque material of patients that underwent endarterectomyat two Dutch tertiary referral centers. Details of the study design were described before. Briefly, blood and plaque material were obtained during endarterectomy and stored at -80 ℃. All patients provided informed consent and the study was approved by the medical ethics committee.

## Load relevant samples

```{r}
EM_samples <- fread(paste0(ANALYSIS_loc, "/dataverse/EntropyMasker_image_files_used.txt"))
```

## Load data

Loading Athero-Express Biobank Study clinical and biobank data.

```{r LoadAEDB}
cat("* get Athero-Express Biobank Study Database...")
# METHOD 1: It seems this method gives loads of errors and warnings, which all are hard to comprehend
#           or debug. We expect 3,527 samples, and 927 variables; we get 927 variables!!!
# AEdata = as.data.table(read.spss(paste0(INP_loc,"/2017-1NEW_AtheroExpressDatabase_ScientificAE_20171306_v1.0.sav"),
#                                  trim.factor.names = TRUE, trim_values = TRUE, # we trim spaces in values
#                                  reencode = TRUE, # we re-encode to the local locale encoding
#                                  add.undeclared.levels = "append", # we do *not* want to convert to R-factors
#                                  use.value.labels = FALSE, # we do *not* convert variables with value labels into R factors
#                                  use.missings = TRUE, sub = "NA", # we will set every missing variable to NA
#                                  duplicated.value.labels = "condense", # we will condense duplicated value labels
#                                  to.data.frame = TRUE))
# AEdata.labels <- as.data.table(attr(AEdata, "variable.labels"))
# names(AEdata.labels) <- "Variable"

# METHOD 2: Using library("haven") importing seems flawless; best argument being:
#           we expect 3,527 samples and 888 variables, which is what you'd get with this method
#           So for now, METHOD 2 is prefered. 
#            
require(haven)

# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2022_1_NEW_AtheroExpressDatabase_ScientificAE_15-02-2022.sav")) # something wrong with Age-variable
# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_30-09-2020.sav")) # duplicate studynumbers in it
AEDB <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_16-03-2020.sav"))

# writing off the SPSS data to an Excel.
# fwrite(AEdata, file = paste0(INP_loc,"/2017-1NEW_AtheroExpressDatabase_ScientificAE_20171306_v1.0.values.xlsx"), 
#        sep = ";", na = "NA", dec = ".", col.names = TRUE, row.names = FALSE,
#        dateTimeAs = "ISO", showProgress = TRUE, verbose = TRUE)
# warnings()

AEDB[1:10, 1:10]
dim(AEDB)

```



<!-- ## Examine AEDB -->

<!-- We can examine the contents of the Athero-Express Biobank dataset to know what each variable is called, what class -->
<!-- (type) it has, and what the variable description is. -->

<!-- There is an excellent post on this: <https://www.r-bloggers.com/working-with-spss-labels-in-r/>. -->

<!-- ```{r AEDB: describe} -->
<!-- AEDB %>% sjPlot::view_df(show.type = TRUE, -->
<!--                          show.frq = TRUE, -->
<!--                          show.prc = TRUE, -->
<!--                          show.na = TRUE,  -->
<!--                          max.len = TRUE,  -->
<!--                          wrap.labels = 20, -->
<!--                          verbose = FALSE,  -->
<!--                          use.viewer = FALSE, -->
<!--                          file = paste0(OUT_loc, "/", Today, ".AEDB.dictionary.html"))  -->
<!-- ``` -->

## Fixing and creating variables

We have to fix certain clinical parameters:

-   symptoms
-   diabetes
-   alcohol use
-   smoking
-   plaque phenotypes

### Symptoms

We need to be very strict in defining *symptoms.* Therefore we will fix a new
variable that groups *symptoms* at inclusion.

Coding of *symptoms* is as follows:

-   missing -999
-   Asymptomatic 0
-   TIA 1
-   minor stroke 2
-   Major stroke 3
-   Amaurosis fugax 4
-   Four vessel disease 5
-   Vertebrobasilary TIA 7
-   Retinal infarction 8
-   Symptomatic, but aspecific symtoms 9
-   Contralateral symptomatic occlusion 10
-   retinal infarction 11
-   armclaudication due to occlusion subclavian artery, CEA needed for bypass
    12
-   retinal infarction + TIAs 13
-   Ocular ischemic syndrome 14
-   ischemisch glaucoom 15
-   subclavian steal syndrome 16
-   TGA 17

We will group as follows:

1.  Asymptomatic > 0
2.  TIA > 1, 7, 13
3.  Stroke > 2, 3
4.  Ocular > 4, 14, 15
5.  Retinal infarction > 8, 11
6.  Other > 5, 9, 10, 12, 16, 17

```{r FixSymptoms, message=FALSE, warning=FALSE}

# Fix symptoms

attach(AEDB)
AEDB[,"Symptoms.5G"] <- NA
AEDB$Symptoms.5G[sympt == 0] <- "Asymptomatic"
AEDB$Symptoms.5G[sympt == 1 | sympt == 7 | sympt == 13] <- "TIA"
AEDB$Symptoms.5G[sympt == 2 | sympt == 3] <- "Stroke"
AEDB$Symptoms.5G[sympt == 4 | sympt == 14 | sympt == 15 ] <- "Ocular"
AEDB$Symptoms.5G[sympt == 8 | sympt == 11] <- "Retinal infarction"
AEDB$Symptoms.5G[sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Other"


# AsymptSympt
AEDB[,"AsymptSympt"] <- NA
AEDB$AsymptSympt[sympt == -999] <- NA
AEDB$AsymptSympt[sympt == 0] <- "Asymptomatic"
AEDB$AsymptSympt[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3] <- "Symptomatic"
AEDB$AsymptSympt[sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Ocular and others"

# AsymptSympt
AEDB[,"AsymptSympt2G"] <- NA
AEDB$AsymptSympt2G[sympt == -999] <- NA
AEDB$AsymptSympt2G[sympt == 0] <- "Asymptomatic"
AEDB$AsymptSympt2G[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3 | sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Symptomatic"

detach(AEDB)

# table(AEDB$sympt, useNA = "ifany")
# table(AEDB$AsymptSympt2G, useNA = "ifany")
# table(AEDB$Symptoms.5G, useNA = "ifany")
# 
# table(AEDB$AsymptSympt2G, AEDB$sympt, useNA = "ifany")
# table(AEDB$Symptoms.5G, AEDB$sympt, useNA = "ifany")
table(AEDB$AsymptSympt2G, AEDB$Symptoms.5G, useNA = "ifany")

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "sympt", "Symptoms.5G", "AsymptSympt"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# table(AEDB.temp$Symptoms.5G, AEDB.temp$AsymptSympt)
# 
# rm(AEDB.temp)
```

### Re-assessed symptoms

We re-assessed the categorization of symptoms. These are summarized and parsed
in this section.

Labeling of new symptom categories.

```{r message=FALSE, warning=FALSE, paged.print=TRUE}

AEDB$indexsymptoms_worst
AEDB$indexsymptoms_worst_4g
AEDB$indexsymptoms_latest
AEDB$indexsymptoms_latest_4g

```

Getting counts for each of the most important categories.

```{r}
cat("New 'worst' vs 'latest' symptom categories.\n")
table(as_factor(AEDB$indexsymptoms_worst_4g), as_factor(AEDB$indexsymptoms_latest_4g))

cat("\nNew 'worst' symptom categories.\n")
table((AEDB$indexsymptoms_worst_4g))

cat("\nNew 'latest' symptom categories.\n")
table(as_factor(AEDB$indexsymptoms_latest_4g))
```

Comparing with the original symptom categories.

```{r}
cat("New 'latest' vs original symptom 2G categories.\n")
table((AEDB$indexsymptoms_latest_4g), AEDB$AsymptSympt2G)

cat("\nNew 'latest' vs original symptom 5G categories.\n")
table((AEDB$indexsymptoms_latest_4g), AEDB$Symptoms.5G)
   
```

We need to be very strict in defining *symptoms.* Therefore we will fix a new
variable that groups *symptoms* at inclusion.

Coding of *symptoms* is as follows:

-   asympt 0\
-   ocular 1\
-   TIA 2\
-   stroke 3\
-   unclear 9

We will group as follows:

1.  Asymptomatic > 0
2.  Symptomatic > 1, 2, 3
3.  NA > 9

```{r FixNewSymptoms}

# Fix symptoms
attach(AEDB)

# Symptoms.Update2G
AEDB[,"Symptoms.Update2G"] <- NA
AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 0] <- "Asymptomatic"
AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 1 | indexsymptoms_latest_4g == 2 | indexsymptoms_latest_4g == 3] <- "Symptomatic"
AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 9 ] <- NA

# Symptoms.Update3G
AEDB[,"Symptoms.Update3G"] <- NA
AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 0] <- "Asymptomatic"
AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 1 | indexsymptoms_latest_4g == 2 | indexsymptoms_latest_4g == 3] <- "Symptomatic"
AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 9 ] <- "Unclear"

detach(AEDB)

table(AEDB$Symptoms.Update2G, AEDB$Symptoms.5G, useNA = "ifany")
table(AEDB$Symptoms.Update3G, AEDB$Symptoms.5G, useNA = "ifany")

```

### Other clinical characteristics

We will also fix the *diabetes* status variable.

```{r FixDiabetes, message=FALSE, warning=FALSE}

# Fix diabetes
attach(AEDB)
AEDB[,"DiabetesStatus"] <- NA
AEDB$DiabetesStatus[DM.composite == -999] <- NA
AEDB$DiabetesStatus[DM.composite == 0] <- "Control (no Diabetes Dx/Med)"
AEDB$DiabetesStatus[DM.composite == 1] <- "Diabetes"
detach(AEDB)

table(AEDB$DM.composite, AEDB$DiabetesStatus)
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)

```

We will also fix the *smoking* status variable. We are interested in whether
someone never, ever or is currently (at the time of inclusion) smoking. This is
based on the questionnaire.

-   `diet801`: are you a smoker?
-   `diet802`: did you smoke in the past?

We already have some variables indicating smoking status:

-   `SmokingReported`: patient has reported to smoke.
-   `SmokingYearOR`: smoking in the year of surgery?
-   `SmokerCurrent`: currently smoking?

```{r FixSmoking, message=FALSE, warning=FALSE}
require(labelled)
AEDB$diet801 <- to_factor(AEDB$diet801)
AEDB$diet802 <- to_factor(AEDB$diet802)
AEDB$diet805 <- to_factor(AEDB$diet805)
AEDB$SmokingReported <- to_factor(AEDB$SmokingReported)
AEDB$SmokerCurrent <- to_factor(AEDB$SmokerCurrent)
AEDB$SmokingYearOR <- to_factor(AEDB$SmokingYearOR)

# table(AEDB$diet801)
# table(AEDB$diet802)
# table(AEDB$SmokingReported)
# table(AEDB$SmokerCurrent)
# table(AEDB$SmokingYearOR)
# table(AEDB$SmokingReported, AEDB$SmokerCurrent, useNA = "ifany", dnn = c("Reported smoking", "Current smoker"))
# 
# table(AEDB$diet801, AEDB$diet802, useNA = "ifany", dnn = c("Smoker", "Past smoker"))

cat("\nFixing smoking status.\n")
attach(AEDB)
AEDB[,"SmokerStatus"] <- NA
AEDB$SmokerStatus[diet802 == "don't know"] <- "Never smoked"
AEDB$SmokerStatus[diet802 == "I still smoke"] <- "Current smoker"
AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "no"] <- "Never smoked"
AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "yes"] <- "Ex-smoker"
AEDB$SmokerStatus[SmokerCurrent == "yes"] <- "Current smoker"
AEDB$SmokerStatus[SmokerCurrent == "no data available/missing"] <- NA
# AEDB$SmokerStatus[is.na(SmokerCurrent)] <- "Never smoked"
detach(AEDB)

cat("\n* Current smoking status.\n")
table(AEDB$SmokerCurrent,
      useNA = "ifany", 
      dnn = c("Current smoker"))

cat("\n* Updated smoking status.\n")
table(AEDB$SmokerStatus,
      useNA = "ifany", 
      dnn = c("Updated smoking status"))

cat("\n* Comparing to 'SmokerCurrent'.\n")
table(AEDB$SmokerStatus, AEDB$SmokerCurrent, 
      useNA = "ifany", 
      dnn = c("Updated smoking status", "Current smoker"))

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)


```

We will also fix the *alcohol* status variable.

```{r FixAlcohol, message=FALSE, warning=FALSE}

# Fix diabetes
attach(AEDB)
AEDB[,"AlcoholUse"] <- NA
AEDB$AlcoholUse[diet810 == -999] <- NA
AEDB$AlcoholUse[diet810 == 0] <- "No"
AEDB$AlcoholUse[diet810 == 1] <- "Yes"
detach(AEDB)

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)


```

We will also fix a history of CAD, stroke or peripheral intervention status variable. This will be based on `CAD_history`, `Stroke_history`, and `Peripheral.interv`

```{r FixCAD_History, message=FALSE, warning=FALSE}

# Fix diabetes
attach(AEDB)
AEDB[,"MedHx_CVD"] <- NA
AEDB$MedHx_CVD[CAD_history == 0 | Stroke_history == 0 | Peripheral.interv == 0] <- "No"
AEDB$MedHx_CVD[CAD_history == 1 | Stroke_history == 1 | Peripheral.interv == 1] <- "yes"
detach(AEDB)

table(AEDB$CAD_history)
table(AEDB$Stroke_history)
table(AEDB$Peripheral.interv)
table(AEDB$MedHx_CVD)

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)


```

### Plaque phenotypes

We will also fix the *plaquephenotypes* variable.

Coding of symptoms is as follows:

-   missing -999\
-   not relevant -888
-   fibrous 1\
-   fibroatheromatous 2\
-   atheromatous 3

```{r FixPlaquePhenotypes, message=FALSE, warning=FALSE}

# Fix plaquephenotypes
attach(AEDB)
AEDB[,"OverallPlaquePhenotype"] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == 1] <- "fibrous"
AEDB$OverallPlaquePhenotype[plaquephenotype == 2] <- "fibroatheromatous"
AEDB$OverallPlaquePhenotype[plaquephenotype == 3] <- "atheromatous"
detach(AEDB)

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "plaquephenotype", "OverallPlaquePhenotype"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)

```

We will also fix and inverse-rank normal transform the continuous (manually)
scored plaque phenotypes.

```{r IRNT PlaquePhenotypes}
AEDB$macmean0 <- as.numeric(AEDB$macmean0)
AEDB$smcmean0 <- as.numeric(AEDB$smcmean0)
AEDB$neutrophils <- as.numeric(AEDB$neutrophils)
AEDB$Mast_cells_plaque <- as.numeric(AEDB$Mast_cells_plaque)
AEDB$vessel_density_averaged <- as.numeric(AEDB$vessel_density_averaged)

AEDB$MAC_rankNorm <- qnorm((rank(AEDB$macmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB$macmean0)))
AEDB$SMC_rankNorm <- qnorm((rank(AEDB$smcmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB$smcmean0)))
AEDB$Neutrophils_rankNorm <- qnorm((rank(AEDB$neutrophils, na.last = "keep") - 0.5) / sum(!is.na(AEDB$neutrophils)))
AEDB$MastCells_rankNorm <- qnorm((rank(AEDB$Mast_cells_plaque, na.last = "keep") - 0.5) / sum(!is.na(AEDB$Mast_cells_plaque)))
AEDB$VesselDensity_rankNorm <- qnorm((rank(AEDB$vessel_density_averaged, na.last = "keep") - 0.5) / sum(!is.na(AEDB$vessel_density_averaged)))

```

```{r IRNT PlaquePhenotypes: Visualisation}
library(labelled)
AEDB$Gender <- to_factor(AEDB$Gender)
library(patchwork)

p1 <- ggpubr::gghistogram(AEDB, "macmean0", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "% of macrophages (CD68)",
                    xlab = "% per region of interest", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "MAC_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "% of macrophages (CD68)",
                   xlab = "% per region of interest\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 

p1 <- ggpubr::gghistogram(AEDB, "smcmean0", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "% of smooth muscle cells (SMA)",
                    xlab = "% per region of interest", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "SMC_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "% of smooth muscle cells (SMA)",
                   xlab = "% per region of interest\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 


p1 <- ggpubr::gghistogram(AEDB, "neutrophils", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of neutrophils (CD66b)",
                    xlab = "counts per plaque", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "Neutrophils_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of neutrophils (CD66b)",
                   xlab = "counts per plaque\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 


p1 <- ggpubr::gghistogram(AEDB, "Mast_cells_plaque", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of mast cells",
                    xlab = "counts per plaque", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "MastCells_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of mast cells",
                   xlab = "counts per plaque\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 


p1 <- ggpubr::gghistogram(AEDB, "vessel_density_averaged", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of intraplaque neovessels",
                    xlab = "counts per 3-4 hotspots", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "VesselDensity_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of intraplaque neovessels",
                   xlab = "counts per 3-4 hotspots\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 

rm(p1, p2)
```

Here we calculate the *plaque instability/vulnerability* index

```{r Plaque Vulnerability}
# Plaque vulnerability
require(labelled)
AEDB$Macrophages.bin <- to_factor(AEDB$Macrophages.bin)
AEDB$SMC.bin <- to_factor(AEDB$SMC.bin)
AEDB$IPH.bin <- to_factor(AEDB$IPH.bin)
AEDB$Calc.bin <- to_factor(AEDB$Calc.bin)
AEDB$Collagen.bin <- to_factor(AEDB$Collagen.bin)
AEDB$Fat.bin_10 <- to_factor(AEDB$Fat.bin_10)
AEDB$Fat.bin_40 <- to_factor(AEDB$Fat.bin_40)

table(AEDB$Macrophages.bin)
table(AEDB$Fat.bin_10)
table(AEDB$Collagen.bin)
table(AEDB$SMC.bin)
table(AEDB$IPH.bin)

# SPSS code

# 
# *** syntax- Plaque vulnerability**.
# COMPUTE Macro_instab = -999.
# IF macrophages.bin=2 Macro_instab=1.
# IF macrophages.bin=1 Macro_instab=0.
# EXECUTE.
# 
# COMPUTE Fat10_instab = -999.
# IF Fat.bin_10=2 Fat10_instab=1.
# IF Fat.bin_10=1 Fat10_instab=0.
# EXECUTE.
# 
# COMPUTE coll_instab=-999.
# IF Collagen.bin=2 coll_instab=0.
# IF Collagen.bin=1 coll_instab=1.
# EXECUTE.
# 
# 
# COMPUTE SMC_instab=-999.
# IF SMC.bin=2 SMC_instab=0.
# IF SMC.bin=1 SMC_instab=1.
# EXECUTE.
# 
# COMPUTE IPH_instab=-999.
# IF IPH.bin=0 IPH_instab=0.
# IF IPH.bin=1 IPH_instab=1.
# EXECUTE.
# 
# COMPUTE Instability=Macro_instab + Fat10_instab +  coll_instab + SMC_instab + IPH_instab.
# EXECUTE.

# Fix plaquephenotypes
attach(AEDB)
# mac instability
AEDB[,"MAC_Instability"] <- NA
AEDB$MAC_Instability[Macrophages.bin == -999] <- NA
AEDB$MAC_Instability[Macrophages.bin == "no/minor"] <- 0
AEDB$MAC_Instability[Macrophages.bin == "moderate/heavy"] <- 1

# fat instability
AEDB[,"FAT10_Instability"] <- NA
AEDB$FAT10_Instability[Fat.bin_10 == -999] <- NA
AEDB$FAT10_Instability[Fat.bin_10 == " <10%"] <- 0
AEDB$FAT10_Instability[Fat.bin_10 == " >10%"] <- 1

# col instability 
AEDB[,"COL_Instability"] <- NA
AEDB$COL_Instability[Collagen.bin == -999] <- NA
AEDB$COL_Instability[Collagen.bin == "no/minor"] <- 1
AEDB$COL_Instability[Collagen.bin == "moderate/heavy"] <- 0

# smc instability
AEDB[,"SMC_Instability"] <- NA
AEDB$SMC_Instability[SMC.bin == -999] <- NA
AEDB$SMC_Instability[SMC.bin == "no/minor"] <- 1
AEDB$SMC_Instability[SMC.bin == "moderate/heavy"] <- 0

# iph instability
AEDB[,"IPH_Instability"] <- NA
AEDB$IPH_Instability[IPH.bin == -999] <- NA
AEDB$IPH_Instability[IPH.bin == "no"] <- 0
AEDB$IPH_Instability[IPH.bin == "yes"] <- 1

detach(AEDB)

table(AEDB$MAC_Instability, useNA = "ifany")
table(AEDB$FAT10_Instability, useNA = "ifany")
table(AEDB$COL_Instability, useNA = "ifany")
table(AEDB$SMC_Instability, useNA = "ifany")
table(AEDB$IPH_Instability, useNA = "ifany")

# creating vulnerability index
AEDB <- AEDB %>% mutate(Plaque_Vulnerability_Index = factor(rowSums(.[grep("_Instability", names(.))], na.rm = TRUE)),
                                )

table(AEDB$Plaque_Vulnerability_Index, useNA = "ifany")

# str(AEDB$Plaque_Vulnerability_Index)

```

## Prepare baseline summary

We are interested in the following variables at baseline in the whole cohort.

-   Age (years)
-   Female sex (N, %)
-   Artery type (N, %)
-   Hospital (N, %)
<!-- -   Hypertension (N, %) -->
<!-- -   SBP (mmHg) -->
<!-- -   DBP (mmHg) -->
<!-- -   Diabetes mellitus (N, %) -->
<!-- -   Total cholesterol levels (mg/dL) -->
<!-- -   LDL cholesterol levels (mg/dL) -->
<!-- -   HDL cholesterol levels (mg/dL) -->
<!-- -   Triglyceride levels (mg/dL) -->
<!-- -   Use of statins (N, %) -->
<!-- -   Use of antiplatelet drugs (N, %) -->
<!-- -   BMI (kg/m²) -->
<!-- -   Smoking status (N, %) -->
<!--     -   Never smokers -->
<!--     -   Ex-smokers -->
<!--     -   Current smokers -->
<!-- -   History of CAD (N, %) -->
<!-- -   History of PAD (N, %) -->
<!-- -   Clinical manifestations -->
<!--     -   Asymptomatic -->
<!--     -   Amaurosis fugax -->
<!--     -   TIA -->
<!--     -   Stroke -->
<!-- -   eGFR (mL/min/1.73 m²) -->
<!-- -   stenosis -->
<!-- -   year of surgery -->
<!-- -   plaque characteristics -->

```{r Baseline AEDB: preparation}
cat("====================================================================================================\n")
cat("SELECTION THE SHIZZLE\n")

### Artery levels
# AEdata$Artery_summary: 
#           value                                                                                   label
# NOT USE - 0 No artery known (yet), no surgery (patient ill, died, exited study), re-numbered to AAA
# USE - 1                                                                  carotid (left & right)
# USE - 2                                               femoral/iliac (left, right or both sides)
# NOT USE - 3                                               other carotid arteries (common, external)
# NOT USE - 4                                   carotid bypass and injury (left, right or both sides)
# NOT USE - 5                                                         aneurysmata (carotid & femoral)
# NOT USE - 6                                                                                   aorta
# NOT USE - 7                                            other arteries (renal, popliteal, vertebral)
# NOT USE - 8                        femoral bypass, angioseal and injury (left, right or both sides)

### AEdata$informedconsent
#           value                                                                                           label
# NOT USE - -999                                                                                         missing
# NOT USE - 0                                                                                        no, died
# USE - 1                                                                                             yes
# USE - 2                                                             yes, health treatment when possible
# USE - 3                                                                        yes, no health treatment
# USE - 4                                                yes, no health treatment, no commercial business
# NOT USE - 5                                                          yes, no tissue, no commerical business
# NOT USE - 6                      yes, no tissue, no questionnaires, no medical info, no commercial business
# USE - 7                             yes, no questionnaires, no health treatment, no commercial business
# USE - 8                                          yes, no questionnaires, health treatment when possible
# NOT USE - 9                  yes, no tissue, no questionnaires, no health treatment, no commerical business
# USE - 10                               yes, no health treatment, no medical info, no commercial business
# NOT USE - 11 yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business
# USE - 12                                                     yes, no questionnaires, no health treatment
# NOT USE - 13                                                             yes, no tissue, no health treatment
# NOT USE - 14                                                               yes, no tissue, no questionnaires
# NOT USE - 15                                                  yes, no tissue, health treatment when possible
# NOT USE - 16                                                                                  yes, no tissue
# USE - 17                                                                     yes, no commerical business
# USE - 18                                     yes, health treatment when possible, no commercial business
# USE - 19                                                    yes, no medical info, no commercial business
# USE - 20                                                                          yes, no questionnaires
# NOT USE - 21                         yes, no tissue, no questionnaires, no health treatment, no medical info
# NOT USE - 22                  yes, no tissue, no questionnaires, no health treatment, no commercial business
# USE - 23                                                                            yes, no medical info
# USE - 24                                                  yes, no questionnaires, no commercial business
# USE - 25                                    yes, no questionnaires, no health treatment, no medical info
# USE - 26                  yes, no questionnaires, health treatment when possible, no commercial business
# USE - 27                                                      yes,  no health treatment, no medical info
# NOT USE - 28                                                                             no, doesn't want to
# NOT USE - 29                                                                              no, unable to sign
# NOT USE - 30                                                                                 no, no reaction
# NOT USE - 31                                                                                        no, lost
# NOT USE - 32                                                                                     no, too old
# NOT USE - 34                                            yes, no medical info, health treatment when possible
# NOT USE - 35                                             no (never asked for IC because there was no tissue)
# USE - 36                    yes, no medical info, no commercial business, health treatment when possible
# NOT USE - 37                                                                                    no, endpoint
# USE - 38                                                         wil niets invullen, wel alles gebruiken
# USE - 39                                           second informed concents: yes, no commercial business
# NOT USE - 40                                                                              nooit geincludeerd

cat("- sanity checking PRIOR to selection")
library(data.table)
require(labelled)
ae.gender <- to_factor(AEDB$Gender)
ae.hospital <- to_factor(AEDB$Hospital)
table(ae.gender, ae.hospital, dnn = c("Sex", "Hospital"))
ae.artery <- to_factor(AEDB$Artery_summary)
table(ae.artery, ae.gender, dnn = c("Sex", "Artery"))

rm(ae.gender, ae.hospital, ae.artery)

# I change numeric and factors manually because, well, I wouldn't know how to fix it otherwise
# to have this 'tibble' work with 'tableone'... :-)

AEDB$Age <- as.numeric(AEDB$Age)
AEDB$diastoli <- as.numeric(AEDB$diastoli)
AEDB$systolic <- as.numeric(AEDB$systolic)

AEDB$TC_finalCU <- as.numeric(AEDB$TC_finalCU)
AEDB$LDL_finalCU <- as.numeric(AEDB$LDL_finalCU)
AEDB$HDL_finalCU <- as.numeric(AEDB$HDL_finalCU)
AEDB$TG_finalCU <- as.numeric(AEDB$TG_finalCU)

AEDB$TC_final <- as.numeric(AEDB$TC_final)
AEDB$LDL_final <- as.numeric(AEDB$LDL_final)
AEDB$HDL_final <- as.numeric(AEDB$HDL_final)
AEDB$TG_final <- as.numeric(AEDB$TG_final)

AEDB$Age <- as.numeric(AEDB$Age)
AEDB$GFR_MDRD <- as.numeric(AEDB$GFR_MDRD)
AEDB$BMI <- as.numeric(AEDB$BMI)
AEDB$eCigarettes <- as.numeric(AEDB$eCigarettes)
AEDB$ePackYearsSmoking <- as.numeric(AEDB$ePackYearsSmoking)
AEDB$EP_composite_time <- as.numeric(AEDB$EP_composite_time)
AEDB$EP_major_time <- as.numeric(AEDB$EP_major_time)

require(labelled)
AEDB$Artery_summary <- to_factor(AEDB$Artery_summary)
AEDB$ORyear <- to_factor(AEDB$ORyear)
AEDB$Gender <- to_factor(AEDB$Gender)
AEDB$Hospital <- to_factor(AEDB$Hospital)
AEDB$KDOQI <- to_factor(AEDB$KDOQI)
AEDB$BMI_WHO <- to_factor(AEDB$BMI_WHO)
AEDB$DiabetesStatus <- to_factor(AEDB$DiabetesStatus)
AEDB$SmokerStatus <- to_factor(AEDB$SmokerStatus)
AEDB$AlcoholUse <- to_factor(AEDB$AlcoholUse)

AEDB$Hypertension.selfreport <- to_factor(AEDB$Hypertension1)
AEDB$Hypertension.selfreportdrug <- to_factor(AEDB$Hypertension2)
AEDB$Hypertension.composite <- to_factor(AEDB$Hypertension.composite)
AEDB$Hypertension.drugs <- to_factor(AEDB$Hypertension.drugs)

AEDB$Med.anticoagulants <- to_factor(AEDB$Med.anticoagulants)
AEDB$Med.all.antiplatelet <- to_factor(AEDB$Med.all.antiplatelet)
AEDB$Med.Statin.LLD <- to_factor(AEDB$Med.Statin.LLD)

AEDB$Stroke_Dx <- to_factor(AEDB$Stroke_Dx)
AEDB$CAD_history <- to_factor(AEDB$CAD_history)
AEDB$PAOD <- to_factor(AEDB$PAOD)
AEDB$Peripheral.interv <- to_factor(AEDB$Peripheral.interv)

AEDB$sympt <- to_factor(AEDB$sympt)
AEDB$Symptoms.3g <- to_factor(AEDB$Symptoms.3g)
AEDB$Symptoms.4g <- to_factor(AEDB$Symptoms.4g)
AEDB$Symptoms.5G <- to_factor(AEDB$Symptoms.5G)
AEDB$AsymptSympt <- to_factor(AEDB$AsymptSympt)
AEDB$AsymptSympt2G <- to_factor(AEDB$AsymptSympt2G)
AEDB$Symptoms.Update2G <- to_factor(AEDB$Symptoms.Update2G)
AEDB$Symptoms.Update3G <- to_factor(AEDB$Symptoms.Update3G)

AEDB$restenos <- to_factor(AEDB$restenos)
AEDB$stenose <- to_factor(AEDB$stenose)
AEDB$EP_composite <- to_factor(AEDB$EP_composite)
AEDB$EP_major <- to_factor(AEDB$EP_major)
AEDB$Macrophages.bin <- to_factor(AEDB$Macrophages.bin)
AEDB$SMC.bin <- to_factor(AEDB$SMC.bin)
AEDB$IPH.bin <- to_factor(AEDB$IPH.bin)
AEDB$Calc.bin <- to_factor(AEDB$Calc.bin)
AEDB$Collagen.bin <- to_factor(AEDB$Collagen.bin)
AEDB$Fat.bin_10 <- to_factor(AEDB$Fat.bin_10)
AEDB$Fat.bin_40 <- to_factor(AEDB$Fat.bin_40)
AEDB$OverallPlaquePhenotype <- to_factor(AEDB$OverallPlaquePhenotype)
AEDB$Plaque_Vulnerability_Index <- to_factor(AEDB$Plaque_Vulnerability_Index)

AEDB$Artery_summary <- to_factor(AEDB$Artery_summary)

AEDB$informedconsent <- to_factor(AEDB$informedconsent)

AEDB.full <- subset(AEDB,
                    informedconsent != "missing" & # we are really strict in selecting based on 'informed consent'!
                       informedconsent != "no, died" & 
                       informedconsent != "yes, no tissue, no commerical business" &
                       informedconsent != "yes, no tissue, no questionnaires, no medical info, no commercial business" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commerical business" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business" &
                       informedconsent != "yes, no tissue, no health treatment" &
                       informedconsent != "yes, no tissue, no questionnaires" &
                       informedconsent != "yes, no tissue, health treatment when possible" &
                       informedconsent != "yes, no tissue" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commercial business" &
                       informedconsent != "no, doesn't want to" &
                       informedconsent != "no, unable to sign" & 
                       informedconsent != "no, no reaction" & 
                       informedconsent != "no, lost" & 
                       informedconsent != "no, too old" & 
                       informedconsent != "yes, no medical info, health treatment when possible" &
                       informedconsent != "no (never asked for IC because there was no tissue)" &
                       informedconsent != "no, endpoint" & 
                       informedconsent != "nooit geincludeerd")
# AEDB.CEA[1:10, 1:10]
dim(AEDB.full)

```

```{r Baseline AEDB: creation}
cat("===========================================================================================\n")
cat("CREATE BASELINE TABLE\n")

# Baseline table variables
basetable_vars = c("Hospital", 
                   "Artery_summary",
                   "Age", "Gender")
                   # "ORyear", 
                   # "TC_finalCU", "LDL_finalCU", "HDL_finalCU", "TG_finalCU", 
                   # "TC_final", "LDL_final", "HDL_final", "TG_final", 
                   # "hsCRP_plasma",
                   # "systolic", "diastoli", "GFR_MDRD", "BMI", 
                   # "KDOQI", "BMI_WHO",
                   # "SmokerStatus", "AlcoholUse",
                   # "DiabetesStatus", 
                   # "Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs", 
                   # "Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD", 
                   # "Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
                   # "Symptoms.Update2G", 
                   # "Symptoms.Update3G",
                   # "restenos", "stenose",
                   # "CAD_history", "PAOD", "Peripheral.interv", 
                   # "EP_composite", "EP_composite_time", "EP_major", "EP_major_time",
                   # "MAC_rankNorm", "SMC_rankNorm", "Macrophages.bin", "SMC.bin",
                   # "Neutrophils_rankNorm", "MastCells_rankNorm",
                   # "IPH.bin", "VesselDensity_rankNorm",
                   # "Calc.bin", "Collagen.bin", 
                   # "Fat.bin_10", "Fat.bin_40", 
                   # "OverallPlaquePhenotype", "Plaque_Vulnerability_Index")

basetable_bin = c("Hospital", 
                  "Artery_summary",
                  "Gender")
# basetable_bin

basetable_con = basetable_vars[!basetable_vars %in% basetable_bin]
# basetable_con
```

# Athero-Express Biobank Study Baseline Characteristics

Showing the baseline table of the whole Athero-Express Biobank.

```{r Baseline AEDB: Visualize AEDB}
# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB.tableOne = print(CreateTableOne(vars = basetable_vars, 
                                         factorVars = basetable_bin,
                                         # strata = "Symptoms.4g",
                                         data = AEDB.full, includeNA = TRUE), 
                          nonnormal = c(), missing = TRUE,
                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
                          format = "pf", 
                          contDigits = 3)[,1:3]
```


## Match samples used

Here we match the full database with the samples used in **`EntropyMasker`**. 

```{r}
# Remove duplicate rows of the dataframe
library(dplyr)
temp <- EM_samples %>% select(., "Studynumber") %>%
  distinct(.)

AEDB_EM <- merge(temp,
                 AEDB,
                 by.x = "Studynumber",
                 by.y = "STUDY_NUMBER", 
                 sort = FALSE)

AEDB_EM.full <- subset(AEDB_EM,
                    informedconsent != "missing" & # we are really strict in selecting based on 'informed consent'!
                       informedconsent != "no, died" & 
                       informedconsent != "yes, no tissue, no commerical business" &
                       informedconsent != "yes, no tissue, no questionnaires, no medical info, no commercial business" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commerical business" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business" &
                       informedconsent != "yes, no tissue, no health treatment" &
                       informedconsent != "yes, no tissue, no questionnaires" &
                       informedconsent != "yes, no tissue, health treatment when possible" &
                       informedconsent != "yes, no tissue" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commercial business" &
                       informedconsent != "no, doesn't want to" &
                       informedconsent != "no, unable to sign" & 
                       informedconsent != "no, no reaction" & 
                       informedconsent != "no, lost" & 
                       informedconsent != "no, too old" & 
                       informedconsent != "yes, no medical info, health treatment when possible" &
                       informedconsent != "no (never asked for IC because there was no tissue)" &
                       informedconsent != "no, endpoint" & 
                       informedconsent != "nooit geincludeerd")
# AEDB_EM.full[1:10, 1:10]

rm(temp)
```



```{r Baseline AEDB: Visualize AEDB EM FULL}
# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB_EM.full.tableOne = print(CreateTableOne(vars = basetable_vars, 
                                         factorVars = basetable_bin,
                                         # strata = "Symptoms.4g",
                                         data = AEDB_EM.full, includeNA = TRUE), 
                          nonnormal = c(), missing = TRUE,
                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
                          format = "pf", 
                          contDigits = 3)[,1:3]
```

```{r Baseline AEDB: Visualize AEDB EM}

basetable_vars = c("Hospital", 
                   "Artery_summary")

basetable_bin = c("Hospital", 
                  "Artery_summary")

# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB_EM.tableOne = print(CreateTableOne(vars = basetable_vars, 
                                         factorVars = basetable_bin,
                                         # strata = "Symptoms.4g",
                                         data = AEDB_EM, includeNA = TRUE), 
                          nonnormal = c(), missing = TRUE,
                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
                          format = "pf", 
                          contDigits = 3)[,1:3]
```

## Baseline writing

Let's save the baseline characteristics of the Athero-Express Biobank Study.

```{r Baseline SampleSelection: write}
# Write basetable

require(openxlsx)

write.xlsx(as.data.frame(AEDB.tableOne), 
           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.xlsx"), 
           rowNames = TRUE, 
           colNames = TRUE, 
           sheetName = "AE_Base", overwrite = TRUE)

write.xlsx(as.data.frame(AEDB_EM.tableOne), 
           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.EM.59.BaselineTable.xlsx"), 
           rowNames = TRUE, 
           colNames = TRUE, 
           sheetName = "AE_Base_EM_59", overwrite = TRUE)

write.xlsx(as.data.frame(AEDB_EM.full.tableOne), 
           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.EM.56.BaselineTable.xlsx"), 
           rowNames = TRUE, 
           colNames = TRUE, 
           sheetName = "AE_Base_EM_56", overwrite = TRUE)

```

We will also write the newly prepared AEDB selected for this study which we can use in downstream analyses. 

```{r}
saveRDS(AEDB_EM.full, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.EM.FULL.RDS"))
saveRDS(AEDB.full, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.FULL.RDS"))
saveRDS(AEDB, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.raw.RDS"))

```


# Session information

------------------------------------------------------------------------------------------------------------------------

    Version:      v1.0.2
    Last update:  2022-07-12
    Written by:   Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl).
    Description:  Script to get some Athero-Express Biobank Study baseline characteristics.
    Minimum requirements: R version 3.4.3 (2017-06-30) -- 'Single Candle', Mac OS X El Capitan

    **MoSCoW To-Do List**
    The things we Must, Should, Could, and Would have given the time we have.
    _M_

    _S_

    _C_

    _W_

    **Changes log**
    * v1.0.2 Simplified the initial script. It now outputs the relevant R-objects (as .RDS).
    * v1.0.1 Update to main AEDB (there is an error in the Age-variable in the new version).
    * v1.0.0 Initial version. Add 'plaque vulnerability index', Fixed baseline table, added codes, and results. Major update to WORCS system.

------------------------------------------------------------------------------------------------------------------------

```{r eval = TRUE}
sessionInfo()
```

# Saving environment

```{r Saving}
save.image(paste0(PROJECT_loc, "/",Today,".",PROJECTNAME,".AEDB.EM.baseline.RData"))
```

+-----------------------------------------------------------------------------------------------------------------------------------------+
| <sup>© 1979-2022 Sander W. van der Laan | s.w.vanderlaan[at]gmail.com | [swvanderlaan.github.io](https://swvanderlaan.github.io).</sup> |
+-----------------------------------------------------------------------------------------------------------------------------------------+

+
---
title: "Baseline characteristics"
author: "[Sander W. van der Laan, PhD](https://swvanderlaan.github.io) | @swvanderlaan | s.w.vanderlaan@gmail.com"
date: "`r Sys.Date()`"
output:
  html_notebook:
    cache: yes
    code_folding: hide
    collapse: yes
    df_print: paged
    fig.align: center
    fig_caption: yes
    fig_height: 6
    fig_retina: 2
    fig_width: 7
    highlight: tango
    theme: lumen
    toc: yes
    toc_float:
      collapsed: no
      smooth_scroll: yes
mainfont: Arial
subtitle: Accompanying 'EntropyMasker'
editor_options:
  chunk_output_type: inline
# bibliography: references.bib
# knit: worcs::cite_all
---

# General Setup
We will clean the environment, setup the locations, define colors, and create a datestamp.

_Clean the environment._
```{r echo = FALSE}
rm(list = ls())
```

_Set locations and working directories..._
```{r LocalSystem, echo = FALSE}
source("scripts/local.system.R")

```

_... a package-installation function ..._
```{r Function: installations}
source("scripts/functions.R")

```


_... and load those packages._
```{r loading_packages, message=FALSE, warning=FALSE}
source("scripts/pack01.packages.R")

```

_We will create a datestamp and define the Utrecht Science Park Colour Scheme_.
```{r Setting: Colors}

Today = format(as.Date(as.POSIXlt(Sys.time())), "%Y%m%d")
Today.Report = format(as.Date(as.POSIXlt(Sys.time())), "%A, %B %d, %Y")

source("scripts/colors.R")

```

```{r global_options, include = FALSE}
# further define some knitr-options.
knitr::opts_chunk$set(fig.width = 12, fig.height = 8, fig.path = 'Figures/', 
                      wwarning = TRUE, # show warnings during codebook generation
  message = TRUE, # show messages during codebook generation
  error = TRUE, # do not interrupt codebook generation in case of errors,
                # usually better for debugging
  echo = TRUE,  # show R code
                      eval = TRUE)
ggplot2::theme_set(ggplot2::theme_minimal())
pander::panderOptions("table.split.table", Inf)
```

# This notebook 

In this notebook we create a baseline table of the samples used in **`EntropyMasker`**. 

# Athero-Express Biobank Study

The [*Athero-Express Biobank Study (AE)*](https://doi.org/10.1007/s10564-004-2304-6) contains plaque material of patients that underwent endarterectomyat two Dutch tertiary referral centers. Details of the study design were described before. Briefly, blood and plaque material were obtained during endarterectomy and stored at -80 ℃. All patients provided informed consent and the study was approved by the medical ethics committee.

## Load relevant samples

```{r}
EM_samples <- fread(paste0(ANALYSIS_loc, "/dataverse/EntropyMasker_image_files_used.txt"))
```

## Load data

Loading Athero-Express Biobank Study clinical and biobank data.

```{r LoadAEDB}
cat("* get Athero-Express Biobank Study Database...")
# METHOD 1: It seems this method gives loads of errors and warnings, which all are hard to comprehend
#           or debug. We expect 3,527 samples, and 927 variables; we get 927 variables!!!
# AEdata = as.data.table(read.spss(paste0(INP_loc,"/2017-1NEW_AtheroExpressDatabase_ScientificAE_20171306_v1.0.sav"),
#                                  trim.factor.names = TRUE, trim_values = TRUE, # we trim spaces in values
#                                  reencode = TRUE, # we re-encode to the local locale encoding
#                                  add.undeclared.levels = "append", # we do *not* want to convert to R-factors
#                                  use.value.labels = FALSE, # we do *not* convert variables with value labels into R factors
#                                  use.missings = TRUE, sub = "NA", # we will set every missing variable to NA
#                                  duplicated.value.labels = "condense", # we will condense duplicated value labels
#                                  to.data.frame = TRUE))
# AEdata.labels <- as.data.table(attr(AEdata, "variable.labels"))
# names(AEdata.labels) <- "Variable"

# METHOD 2: Using library("haven") importing seems flawless; best argument being:
#           we expect 3,527 samples and 888 variables, which is what you'd get with this method
#           So for now, METHOD 2 is prefered. 
#            
require(haven)

# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2022_1_NEW_AtheroExpressDatabase_ScientificAE_15-02-2022.sav")) # something wrong with Age-variable
# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_30-09-2020.sav")) # duplicate studynumbers in it
AEDB <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_16-03-2020.sav"))

# writing off the SPSS data to an Excel.
# fwrite(AEdata, file = paste0(INP_loc,"/2017-1NEW_AtheroExpressDatabase_ScientificAE_20171306_v1.0.values.xlsx"), 
#        sep = ";", na = "NA", dec = ".", col.names = TRUE, row.names = FALSE,
#        dateTimeAs = "ISO", showProgress = TRUE, verbose = TRUE)
# warnings()

AEDB[1:10, 1:10]
dim(AEDB)

```



<!-- ## Examine AEDB -->

<!-- We can examine the contents of the Athero-Express Biobank dataset to know what each variable is called, what class -->
<!-- (type) it has, and what the variable description is. -->

<!-- There is an excellent post on this: <https://www.r-bloggers.com/working-with-spss-labels-in-r/>. -->

<!-- ```{r AEDB: describe} -->
<!-- AEDB %>% sjPlot::view_df(show.type = TRUE, -->
<!--                          show.frq = TRUE, -->
<!--                          show.prc = TRUE, -->
<!--                          show.na = TRUE,  -->
<!--                          max.len = TRUE,  -->
<!--                          wrap.labels = 20, -->
<!--                          verbose = FALSE,  -->
<!--                          use.viewer = FALSE, -->
<!--                          file = paste0(OUT_loc, "/", Today, ".AEDB.dictionary.html"))  -->
<!-- ``` -->

## Fixing and creating variables

We have to fix certain clinical parameters:

-   symptoms
-   diabetes
-   alcohol use
-   smoking
-   plaque phenotypes

### Symptoms

We need to be very strict in defining *symptoms.* Therefore we will fix a new
variable that groups *symptoms* at inclusion.

Coding of *symptoms* is as follows:

-   missing -999
-   Asymptomatic 0
-   TIA 1
-   minor stroke 2
-   Major stroke 3
-   Amaurosis fugax 4
-   Four vessel disease 5
-   Vertebrobasilary TIA 7
-   Retinal infarction 8
-   Symptomatic, but aspecific symtoms 9
-   Contralateral symptomatic occlusion 10
-   retinal infarction 11
-   armclaudication due to occlusion subclavian artery, CEA needed for bypass
    12
-   retinal infarction + TIAs 13
-   Ocular ischemic syndrome 14
-   ischemisch glaucoom 15
-   subclavian steal syndrome 16
-   TGA 17

We will group as follows:

1.  Asymptomatic > 0
2.  TIA > 1, 7, 13
3.  Stroke > 2, 3
4.  Ocular > 4, 14, 15
5.  Retinal infarction > 8, 11
6.  Other > 5, 9, 10, 12, 16, 17

```{r FixSymptoms, message=FALSE, warning=FALSE}

# Fix symptoms

attach(AEDB)
AEDB[,"Symptoms.5G"] <- NA
AEDB$Symptoms.5G[sympt == 0] <- "Asymptomatic"
AEDB$Symptoms.5G[sympt == 1 | sympt == 7 | sympt == 13] <- "TIA"
AEDB$Symptoms.5G[sympt == 2 | sympt == 3] <- "Stroke"
AEDB$Symptoms.5G[sympt == 4 | sympt == 14 | sympt == 15 ] <- "Ocular"
AEDB$Symptoms.5G[sympt == 8 | sympt == 11] <- "Retinal infarction"
AEDB$Symptoms.5G[sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Other"


# AsymptSympt
AEDB[,"AsymptSympt"] <- NA
AEDB$AsymptSympt[sympt == -999] <- NA
AEDB$AsymptSympt[sympt == 0] <- "Asymptomatic"
AEDB$AsymptSympt[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3] <- "Symptomatic"
AEDB$AsymptSympt[sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Ocular and others"

# AsymptSympt
AEDB[,"AsymptSympt2G"] <- NA
AEDB$AsymptSympt2G[sympt == -999] <- NA
AEDB$AsymptSympt2G[sympt == 0] <- "Asymptomatic"
AEDB$AsymptSympt2G[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3 | sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Symptomatic"

detach(AEDB)

# table(AEDB$sympt, useNA = "ifany")
# table(AEDB$AsymptSympt2G, useNA = "ifany")
# table(AEDB$Symptoms.5G, useNA = "ifany")
# 
# table(AEDB$AsymptSympt2G, AEDB$sympt, useNA = "ifany")
# table(AEDB$Symptoms.5G, AEDB$sympt, useNA = "ifany")
table(AEDB$AsymptSympt2G, AEDB$Symptoms.5G, useNA = "ifany")

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "sympt", "Symptoms.5G", "AsymptSympt"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# table(AEDB.temp$Symptoms.5G, AEDB.temp$AsymptSympt)
# 
# rm(AEDB.temp)
```

### Re-assessed symptoms

We re-assessed the categorization of symptoms. These are summarized and parsed
in this section.

Labeling of new symptom categories.

```{r message=FALSE, warning=FALSE, paged.print=TRUE}

AEDB$indexsymptoms_worst
AEDB$indexsymptoms_worst_4g
AEDB$indexsymptoms_latest
AEDB$indexsymptoms_latest_4g

```

Getting counts for each of the most important categories.

```{r}
cat("New 'worst' vs 'latest' symptom categories.\n")
table(as_factor(AEDB$indexsymptoms_worst_4g), as_factor(AEDB$indexsymptoms_latest_4g))

cat("\nNew 'worst' symptom categories.\n")
table((AEDB$indexsymptoms_worst_4g))

cat("\nNew 'latest' symptom categories.\n")
table(as_factor(AEDB$indexsymptoms_latest_4g))
```

Comparing with the original symptom categories.

```{r}
cat("New 'latest' vs original symptom 2G categories.\n")
table((AEDB$indexsymptoms_latest_4g), AEDB$AsymptSympt2G)

cat("\nNew 'latest' vs original symptom 5G categories.\n")
table((AEDB$indexsymptoms_latest_4g), AEDB$Symptoms.5G)
   
```

We need to be very strict in defining *symptoms.* Therefore we will fix a new
variable that groups *symptoms* at inclusion.

Coding of *symptoms* is as follows:

-   asympt 0\
-   ocular 1\
-   TIA 2\
-   stroke 3\
-   unclear 9

We will group as follows:

1.  Asymptomatic > 0
2.  Symptomatic > 1, 2, 3
3.  NA > 9

```{r FixNewSymptoms}

# Fix symptoms
attach(AEDB)

# Symptoms.Update2G
AEDB[,"Symptoms.Update2G"] <- NA
AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 0] <- "Asymptomatic"
AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 1 | indexsymptoms_latest_4g == 2 | indexsymptoms_latest_4g == 3] <- "Symptomatic"
AEDB$Symptoms.Update2G[indexsymptoms_latest_4g == 9 ] <- NA

# Symptoms.Update3G
AEDB[,"Symptoms.Update3G"] <- NA
AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 0] <- "Asymptomatic"
AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 1 | indexsymptoms_latest_4g == 2 | indexsymptoms_latest_4g == 3] <- "Symptomatic"
AEDB$Symptoms.Update3G[indexsymptoms_latest_4g == 9 ] <- "Unclear"

detach(AEDB)

table(AEDB$Symptoms.Update2G, AEDB$Symptoms.5G, useNA = "ifany")
table(AEDB$Symptoms.Update3G, AEDB$Symptoms.5G, useNA = "ifany")

```

### Other clinical characteristics

We will also fix the *diabetes* status variable.

```{r FixDiabetes, message=FALSE, warning=FALSE}

# Fix diabetes
attach(AEDB)
AEDB[,"DiabetesStatus"] <- NA
AEDB$DiabetesStatus[DM.composite == -999] <- NA
AEDB$DiabetesStatus[DM.composite == 0] <- "Control (no Diabetes Dx/Med)"
AEDB$DiabetesStatus[DM.composite == 1] <- "Diabetes"
detach(AEDB)

table(AEDB$DM.composite, AEDB$DiabetesStatus)
# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)

```

We will also fix the *smoking* status variable. We are interested in whether
someone never, ever or is currently (at the time of inclusion) smoking. This is
based on the questionnaire.

-   `diet801`: are you a smoker?
-   `diet802`: did you smoke in the past?

We already have some variables indicating smoking status:

-   `SmokingReported`: patient has reported to smoke.
-   `SmokingYearOR`: smoking in the year of surgery?
-   `SmokerCurrent`: currently smoking?

```{r FixSmoking, message=FALSE, warning=FALSE}
require(labelled)
AEDB$diet801 <- to_factor(AEDB$diet801)
AEDB$diet802 <- to_factor(AEDB$diet802)
AEDB$diet805 <- to_factor(AEDB$diet805)
AEDB$SmokingReported <- to_factor(AEDB$SmokingReported)
AEDB$SmokerCurrent <- to_factor(AEDB$SmokerCurrent)
AEDB$SmokingYearOR <- to_factor(AEDB$SmokingYearOR)

# table(AEDB$diet801)
# table(AEDB$diet802)
# table(AEDB$SmokingReported)
# table(AEDB$SmokerCurrent)
# table(AEDB$SmokingYearOR)
# table(AEDB$SmokingReported, AEDB$SmokerCurrent, useNA = "ifany", dnn = c("Reported smoking", "Current smoker"))
# 
# table(AEDB$diet801, AEDB$diet802, useNA = "ifany", dnn = c("Smoker", "Past smoker"))

cat("\nFixing smoking status.\n")
attach(AEDB)
AEDB[,"SmokerStatus"] <- NA
AEDB$SmokerStatus[diet802 == "don't know"] <- "Never smoked"
AEDB$SmokerStatus[diet802 == "I still smoke"] <- "Current smoker"
AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "no"] <- "Never smoked"
AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "yes"] <- "Ex-smoker"
AEDB$SmokerStatus[SmokerCurrent == "yes"] <- "Current smoker"
AEDB$SmokerStatus[SmokerCurrent == "no data available/missing"] <- NA
# AEDB$SmokerStatus[is.na(SmokerCurrent)] <- "Never smoked"
detach(AEDB)

cat("\n* Current smoking status.\n")
table(AEDB$SmokerCurrent,
      useNA = "ifany", 
      dnn = c("Current smoker"))

cat("\n* Updated smoking status.\n")
table(AEDB$SmokerStatus,
      useNA = "ifany", 
      dnn = c("Updated smoking status"))

cat("\n* Comparing to 'SmokerCurrent'.\n")
table(AEDB$SmokerStatus, AEDB$SmokerCurrent, 
      useNA = "ifany", 
      dnn = c("Updated smoking status", "Current smoker"))

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)


```

We will also fix the *alcohol* status variable.

```{r FixAlcohol, message=FALSE, warning=FALSE}

# Fix diabetes
attach(AEDB)
AEDB[,"AlcoholUse"] <- NA
AEDB$AlcoholUse[diet810 == -999] <- NA
AEDB$AlcoholUse[diet810 == 0] <- "No"
AEDB$AlcoholUse[diet810 == 1] <- "Yes"
detach(AEDB)

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)


```

We will also fix a history of CAD, stroke or peripheral intervention status variable. This will be based on `CAD_history`, `Stroke_history`, and `Peripheral.interv`

```{r FixCAD_History, message=FALSE, warning=FALSE}

# Fix diabetes
attach(AEDB)
AEDB[,"MedHx_CVD"] <- NA
AEDB$MedHx_CVD[CAD_history == 0 | Stroke_history == 0 | Peripheral.interv == 0] <- "No"
AEDB$MedHx_CVD[CAD_history == 1 | Stroke_history == 1 | Peripheral.interv == 1] <- "yes"
detach(AEDB)

table(AEDB$CAD_history)
table(AEDB$Stroke_history)
table(AEDB$Peripheral.interv)
table(AEDB$MedHx_CVD)

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)


```

### Plaque phenotypes

We will also fix the *plaquephenotypes* variable.

Coding of symptoms is as follows:

-   missing -999\
-   not relevant -888
-   fibrous 1\
-   fibroatheromatous 2\
-   atheromatous 3

```{r FixPlaquePhenotypes, message=FALSE, warning=FALSE}

# Fix plaquephenotypes
attach(AEDB)
AEDB[,"OverallPlaquePhenotype"] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == 1] <- "fibrous"
AEDB$OverallPlaquePhenotype[plaquephenotype == 2] <- "fibroatheromatous"
AEDB$OverallPlaquePhenotype[plaquephenotype == 3] <- "atheromatous"
detach(AEDB)

# AEDB.temp <- subset(AEDB,  select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "plaquephenotype", "OverallPlaquePhenotype"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# 
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
# 
# rm(AEDB.temp)

```

We will also fix and inverse-rank normal transform the continuous (manually)
scored plaque phenotypes.

```{r IRNT PlaquePhenotypes}
AEDB$macmean0 <- as.numeric(AEDB$macmean0)
AEDB$smcmean0 <- as.numeric(AEDB$smcmean0)
AEDB$neutrophils <- as.numeric(AEDB$neutrophils)
AEDB$Mast_cells_plaque <- as.numeric(AEDB$Mast_cells_plaque)
AEDB$vessel_density_averaged <- as.numeric(AEDB$vessel_density_averaged)

AEDB$MAC_rankNorm <- qnorm((rank(AEDB$macmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB$macmean0)))
AEDB$SMC_rankNorm <- qnorm((rank(AEDB$smcmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB$smcmean0)))
AEDB$Neutrophils_rankNorm <- qnorm((rank(AEDB$neutrophils, na.last = "keep") - 0.5) / sum(!is.na(AEDB$neutrophils)))
AEDB$MastCells_rankNorm <- qnorm((rank(AEDB$Mast_cells_plaque, na.last = "keep") - 0.5) / sum(!is.na(AEDB$Mast_cells_plaque)))
AEDB$VesselDensity_rankNorm <- qnorm((rank(AEDB$vessel_density_averaged, na.last = "keep") - 0.5) / sum(!is.na(AEDB$vessel_density_averaged)))

```

```{r IRNT PlaquePhenotypes: Visualisation}
library(labelled)
AEDB$Gender <- to_factor(AEDB$Gender)
library(patchwork)

p1 <- ggpubr::gghistogram(AEDB, "macmean0", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "% of macrophages (CD68)",
                    xlab = "% per region of interest", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "MAC_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "% of macrophages (CD68)",
                   xlab = "% per region of interest\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 

p1 <- ggpubr::gghistogram(AEDB, "smcmean0", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "% of smooth muscle cells (SMA)",
                    xlab = "% per region of interest", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "SMC_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "% of smooth muscle cells (SMA)",
                   xlab = "% per region of interest\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 


p1 <- ggpubr::gghistogram(AEDB, "neutrophils", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of neutrophils (CD66b)",
                    xlab = "counts per plaque", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "Neutrophils_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of neutrophils (CD66b)",
                   xlab = "counts per plaque\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 


p1 <- ggpubr::gghistogram(AEDB, "Mast_cells_plaque", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of mast cells",
                    xlab = "counts per plaque", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "MastCells_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of mast cells",
                   xlab = "counts per plaque\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 


p1 <- ggpubr::gghistogram(AEDB, "vessel_density_averaged", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of intraplaque neovessels",
                    xlab = "counts per 3-4 hotspots", 
                    ggtheme = theme_minimal())

p2 <- ggpubr::gghistogram(AEDB, "VesselDensity_rankNorm", 
                    # y = "..count..", 
                    color = "white",
                    fill = "Gender",
                    palette = c("#1290D9", "#DB003F"), 
                    add = "median", 
                    #add_density = TRUE,
                    rug = TRUE,
                    #add.params =  list(color = "black", linetype = 2), 
                    title = "number of intraplaque neovessels",
                   xlab = "counts per 3-4 hotspots\ninverse-rank normalized number", 
                    ggtheme = theme_minimal())

p1 | p2 

rm(p1, p2)
```

Here we calculate the *plaque instability/vulnerability* index

```{r Plaque Vulnerability}
# Plaque vulnerability
require(labelled)
AEDB$Macrophages.bin <- to_factor(AEDB$Macrophages.bin)
AEDB$SMC.bin <- to_factor(AEDB$SMC.bin)
AEDB$IPH.bin <- to_factor(AEDB$IPH.bin)
AEDB$Calc.bin <- to_factor(AEDB$Calc.bin)
AEDB$Collagen.bin <- to_factor(AEDB$Collagen.bin)
AEDB$Fat.bin_10 <- to_factor(AEDB$Fat.bin_10)
AEDB$Fat.bin_40 <- to_factor(AEDB$Fat.bin_40)

table(AEDB$Macrophages.bin)
table(AEDB$Fat.bin_10)
table(AEDB$Collagen.bin)
table(AEDB$SMC.bin)
table(AEDB$IPH.bin)

# SPSS code

# 
# *** syntax- Plaque vulnerability**.
# COMPUTE Macro_instab = -999.
# IF macrophages.bin=2 Macro_instab=1.
# IF macrophages.bin=1 Macro_instab=0.
# EXECUTE.
# 
# COMPUTE Fat10_instab = -999.
# IF Fat.bin_10=2 Fat10_instab=1.
# IF Fat.bin_10=1 Fat10_instab=0.
# EXECUTE.
# 
# COMPUTE coll_instab=-999.
# IF Collagen.bin=2 coll_instab=0.
# IF Collagen.bin=1 coll_instab=1.
# EXECUTE.
# 
# 
# COMPUTE SMC_instab=-999.
# IF SMC.bin=2 SMC_instab=0.
# IF SMC.bin=1 SMC_instab=1.
# EXECUTE.
# 
# COMPUTE IPH_instab=-999.
# IF IPH.bin=0 IPH_instab=0.
# IF IPH.bin=1 IPH_instab=1.
# EXECUTE.
# 
# COMPUTE Instability=Macro_instab + Fat10_instab +  coll_instab + SMC_instab + IPH_instab.
# EXECUTE.

# Fix plaquephenotypes
attach(AEDB)
# mac instability
AEDB[,"MAC_Instability"] <- NA
AEDB$MAC_Instability[Macrophages.bin == -999] <- NA
AEDB$MAC_Instability[Macrophages.bin == "no/minor"] <- 0
AEDB$MAC_Instability[Macrophages.bin == "moderate/heavy"] <- 1

# fat instability
AEDB[,"FAT10_Instability"] <- NA
AEDB$FAT10_Instability[Fat.bin_10 == -999] <- NA
AEDB$FAT10_Instability[Fat.bin_10 == " <10%"] <- 0
AEDB$FAT10_Instability[Fat.bin_10 == " >10%"] <- 1

# col instability 
AEDB[,"COL_Instability"] <- NA
AEDB$COL_Instability[Collagen.bin == -999] <- NA
AEDB$COL_Instability[Collagen.bin == "no/minor"] <- 1
AEDB$COL_Instability[Collagen.bin == "moderate/heavy"] <- 0

# smc instability
AEDB[,"SMC_Instability"] <- NA
AEDB$SMC_Instability[SMC.bin == -999] <- NA
AEDB$SMC_Instability[SMC.bin == "no/minor"] <- 1
AEDB$SMC_Instability[SMC.bin == "moderate/heavy"] <- 0

# iph instability
AEDB[,"IPH_Instability"] <- NA
AEDB$IPH_Instability[IPH.bin == -999] <- NA
AEDB$IPH_Instability[IPH.bin == "no"] <- 0
AEDB$IPH_Instability[IPH.bin == "yes"] <- 1

detach(AEDB)

table(AEDB$MAC_Instability, useNA = "ifany")
table(AEDB$FAT10_Instability, useNA = "ifany")
table(AEDB$COL_Instability, useNA = "ifany")
table(AEDB$SMC_Instability, useNA = "ifany")
table(AEDB$IPH_Instability, useNA = "ifany")

# creating vulnerability index
AEDB <- AEDB %>% mutate(Plaque_Vulnerability_Index = factor(rowSums(.[grep("_Instability", names(.))], na.rm = TRUE)),
                                )

table(AEDB$Plaque_Vulnerability_Index, useNA = "ifany")

# str(AEDB$Plaque_Vulnerability_Index)

```

## Prepare baseline summary

We are interested in the following variables at baseline in the whole cohort.

-   Age (years)
-   Female sex (N, %)
-   Artery type (N, %)
-   Hospital (N, %)
<!-- -   Hypertension (N, %) -->
<!-- -   SBP (mmHg) -->
<!-- -   DBP (mmHg) -->
<!-- -   Diabetes mellitus (N, %) -->
<!-- -   Total cholesterol levels (mg/dL) -->
<!-- -   LDL cholesterol levels (mg/dL) -->
<!-- -   HDL cholesterol levels (mg/dL) -->
<!-- -   Triglyceride levels (mg/dL) -->
<!-- -   Use of statins (N, %) -->
<!-- -   Use of antiplatelet drugs (N, %) -->
<!-- -   BMI (kg/m²) -->
<!-- -   Smoking status (N, %) -->
<!--     -   Never smokers -->
<!--     -   Ex-smokers -->
<!--     -   Current smokers -->
<!-- -   History of CAD (N, %) -->
<!-- -   History of PAD (N, %) -->
<!-- -   Clinical manifestations -->
<!--     -   Asymptomatic -->
<!--     -   Amaurosis fugax -->
<!--     -   TIA -->
<!--     -   Stroke -->
<!-- -   eGFR (mL/min/1.73 m²) -->
<!-- -   stenosis -->
<!-- -   year of surgery -->
<!-- -   plaque characteristics -->

```{r Baseline AEDB: preparation}
cat("====================================================================================================\n")
cat("SELECTION THE SHIZZLE\n")

### Artery levels
# AEdata$Artery_summary: 
#           value                                                                                   label
# NOT USE - 0 No artery known (yet), no surgery (patient ill, died, exited study), re-numbered to AAA
# USE - 1                                                                  carotid (left & right)
# USE - 2                                               femoral/iliac (left, right or both sides)
# NOT USE - 3                                               other carotid arteries (common, external)
# NOT USE - 4                                   carotid bypass and injury (left, right or both sides)
# NOT USE - 5                                                         aneurysmata (carotid & femoral)
# NOT USE - 6                                                                                   aorta
# NOT USE - 7                                            other arteries (renal, popliteal, vertebral)
# NOT USE - 8                        femoral bypass, angioseal and injury (left, right or both sides)

### AEdata$informedconsent
#           value                                                                                           label
# NOT USE - -999                                                                                         missing
# NOT USE - 0                                                                                        no, died
# USE - 1                                                                                             yes
# USE - 2                                                             yes, health treatment when possible
# USE - 3                                                                        yes, no health treatment
# USE - 4                                                yes, no health treatment, no commercial business
# NOT USE - 5                                                          yes, no tissue, no commerical business
# NOT USE - 6                      yes, no tissue, no questionnaires, no medical info, no commercial business
# USE - 7                             yes, no questionnaires, no health treatment, no commercial business
# USE - 8                                          yes, no questionnaires, health treatment when possible
# NOT USE - 9                  yes, no tissue, no questionnaires, no health treatment, no commerical business
# USE - 10                               yes, no health treatment, no medical info, no commercial business
# NOT USE - 11 yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business
# USE - 12                                                     yes, no questionnaires, no health treatment
# NOT USE - 13                                                             yes, no tissue, no health treatment
# NOT USE - 14                                                               yes, no tissue, no questionnaires
# NOT USE - 15                                                  yes, no tissue, health treatment when possible
# NOT USE - 16                                                                                  yes, no tissue
# USE - 17                                                                     yes, no commerical business
# USE - 18                                     yes, health treatment when possible, no commercial business
# USE - 19                                                    yes, no medical info, no commercial business
# USE - 20                                                                          yes, no questionnaires
# NOT USE - 21                         yes, no tissue, no questionnaires, no health treatment, no medical info
# NOT USE - 22                  yes, no tissue, no questionnaires, no health treatment, no commercial business
# USE - 23                                                                            yes, no medical info
# USE - 24                                                  yes, no questionnaires, no commercial business
# USE - 25                                    yes, no questionnaires, no health treatment, no medical info
# USE - 26                  yes, no questionnaires, health treatment when possible, no commercial business
# USE - 27                                                      yes,  no health treatment, no medical info
# NOT USE - 28                                                                             no, doesn't want to
# NOT USE - 29                                                                              no, unable to sign
# NOT USE - 30                                                                                 no, no reaction
# NOT USE - 31                                                                                        no, lost
# NOT USE - 32                                                                                     no, too old
# NOT USE - 34                                            yes, no medical info, health treatment when possible
# NOT USE - 35                                             no (never asked for IC because there was no tissue)
# USE - 36                    yes, no medical info, no commercial business, health treatment when possible
# NOT USE - 37                                                                                    no, endpoint
# USE - 38                                                         wil niets invullen, wel alles gebruiken
# USE - 39                                           second informed concents: yes, no commercial business
# NOT USE - 40                                                                              nooit geincludeerd

cat("- sanity checking PRIOR to selection")
library(data.table)
require(labelled)
ae.gender <- to_factor(AEDB$Gender)
ae.hospital <- to_factor(AEDB$Hospital)
table(ae.gender, ae.hospital, dnn = c("Sex", "Hospital"))
ae.artery <- to_factor(AEDB$Artery_summary)
table(ae.artery, ae.gender, dnn = c("Sex", "Artery"))

rm(ae.gender, ae.hospital, ae.artery)

# I change numeric and factors manually because, well, I wouldn't know how to fix it otherwise
# to have this 'tibble' work with 'tableone'... :-)

AEDB$Age <- as.numeric(AEDB$Age)
AEDB$diastoli <- as.numeric(AEDB$diastoli)
AEDB$systolic <- as.numeric(AEDB$systolic)

AEDB$TC_finalCU <- as.numeric(AEDB$TC_finalCU)
AEDB$LDL_finalCU <- as.numeric(AEDB$LDL_finalCU)
AEDB$HDL_finalCU <- as.numeric(AEDB$HDL_finalCU)
AEDB$TG_finalCU <- as.numeric(AEDB$TG_finalCU)

AEDB$TC_final <- as.numeric(AEDB$TC_final)
AEDB$LDL_final <- as.numeric(AEDB$LDL_final)
AEDB$HDL_final <- as.numeric(AEDB$HDL_final)
AEDB$TG_final <- as.numeric(AEDB$TG_final)

AEDB$Age <- as.numeric(AEDB$Age)
AEDB$GFR_MDRD <- as.numeric(AEDB$GFR_MDRD)
AEDB$BMI <- as.numeric(AEDB$BMI)
AEDB$eCigarettes <- as.numeric(AEDB$eCigarettes)
AEDB$ePackYearsSmoking <- as.numeric(AEDB$ePackYearsSmoking)
AEDB$EP_composite_time <- as.numeric(AEDB$EP_composite_time)
AEDB$EP_major_time <- as.numeric(AEDB$EP_major_time)

require(labelled)
AEDB$Artery_summary <- to_factor(AEDB$Artery_summary)
AEDB$ORyear <- to_factor(AEDB$ORyear)
AEDB$Gender <- to_factor(AEDB$Gender)
AEDB$Hospital <- to_factor(AEDB$Hospital)
AEDB$KDOQI <- to_factor(AEDB$KDOQI)
AEDB$BMI_WHO <- to_factor(AEDB$BMI_WHO)
AEDB$DiabetesStatus <- to_factor(AEDB$DiabetesStatus)
AEDB$SmokerStatus <- to_factor(AEDB$SmokerStatus)
AEDB$AlcoholUse <- to_factor(AEDB$AlcoholUse)

AEDB$Hypertension.selfreport <- to_factor(AEDB$Hypertension1)
AEDB$Hypertension.selfreportdrug <- to_factor(AEDB$Hypertension2)
AEDB$Hypertension.composite <- to_factor(AEDB$Hypertension.composite)
AEDB$Hypertension.drugs <- to_factor(AEDB$Hypertension.drugs)

AEDB$Med.anticoagulants <- to_factor(AEDB$Med.anticoagulants)
AEDB$Med.all.antiplatelet <- to_factor(AEDB$Med.all.antiplatelet)
AEDB$Med.Statin.LLD <- to_factor(AEDB$Med.Statin.LLD)

AEDB$Stroke_Dx <- to_factor(AEDB$Stroke_Dx)
AEDB$CAD_history <- to_factor(AEDB$CAD_history)
AEDB$PAOD <- to_factor(AEDB$PAOD)
AEDB$Peripheral.interv <- to_factor(AEDB$Peripheral.interv)

AEDB$sympt <- to_factor(AEDB$sympt)
AEDB$Symptoms.3g <- to_factor(AEDB$Symptoms.3g)
AEDB$Symptoms.4g <- to_factor(AEDB$Symptoms.4g)
AEDB$Symptoms.5G <- to_factor(AEDB$Symptoms.5G)
AEDB$AsymptSympt <- to_factor(AEDB$AsymptSympt)
AEDB$AsymptSympt2G <- to_factor(AEDB$AsymptSympt2G)
AEDB$Symptoms.Update2G <- to_factor(AEDB$Symptoms.Update2G)
AEDB$Symptoms.Update3G <- to_factor(AEDB$Symptoms.Update3G)

AEDB$restenos <- to_factor(AEDB$restenos)
AEDB$stenose <- to_factor(AEDB$stenose)
AEDB$EP_composite <- to_factor(AEDB$EP_composite)
AEDB$EP_major <- to_factor(AEDB$EP_major)
AEDB$Macrophages.bin <- to_factor(AEDB$Macrophages.bin)
AEDB$SMC.bin <- to_factor(AEDB$SMC.bin)
AEDB$IPH.bin <- to_factor(AEDB$IPH.bin)
AEDB$Calc.bin <- to_factor(AEDB$Calc.bin)
AEDB$Collagen.bin <- to_factor(AEDB$Collagen.bin)
AEDB$Fat.bin_10 <- to_factor(AEDB$Fat.bin_10)
AEDB$Fat.bin_40 <- to_factor(AEDB$Fat.bin_40)
AEDB$OverallPlaquePhenotype <- to_factor(AEDB$OverallPlaquePhenotype)
AEDB$Plaque_Vulnerability_Index <- to_factor(AEDB$Plaque_Vulnerability_Index)

AEDB$Artery_summary <- to_factor(AEDB$Artery_summary)

AEDB$informedconsent <- to_factor(AEDB$informedconsent)

AEDB.full <- subset(AEDB,
                    informedconsent != "missing" & # we are really strict in selecting based on 'informed consent'!
                       informedconsent != "no, died" & 
                       informedconsent != "yes, no tissue, no commerical business" &
                       informedconsent != "yes, no tissue, no questionnaires, no medical info, no commercial business" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commerical business" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business" &
                       informedconsent != "yes, no tissue, no health treatment" &
                       informedconsent != "yes, no tissue, no questionnaires" &
                       informedconsent != "yes, no tissue, health treatment when possible" &
                       informedconsent != "yes, no tissue" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commercial business" &
                       informedconsent != "no, doesn't want to" &
                       informedconsent != "no, unable to sign" & 
                       informedconsent != "no, no reaction" & 
                       informedconsent != "no, lost" & 
                       informedconsent != "no, too old" & 
                       informedconsent != "yes, no medical info, health treatment when possible" &
                       informedconsent != "no (never asked for IC because there was no tissue)" &
                       informedconsent != "no, endpoint" & 
                       informedconsent != "nooit geincludeerd")
# AEDB.CEA[1:10, 1:10]
dim(AEDB.full)

```

```{r Baseline AEDB: creation}
cat("===========================================================================================\n")
cat("CREATE BASELINE TABLE\n")

# Baseline table variables
basetable_vars = c("Hospital", 
                   "Artery_summary",
                   "Age", "Gender",
                   # "ORyear", 
                   # "TC_finalCU", "LDL_finalCU", "HDL_finalCU", "TG_finalCU", 
                   # "TC_final", "LDL_final", "HDL_final", "TG_final", 
                   # "hsCRP_plasma",
                   # "systolic", "diastoli", "GFR_MDRD", "BMI", 
                   # "KDOQI", "BMI_WHO",
                   # "SmokerStatus", "AlcoholUse",
                   # "DiabetesStatus", 
                   # "Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs", 
                   # "Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD", 
                   # "Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
                   # "Symptoms.Update2G", 
                   # "Symptoms.Update3G",
                   # "restenos", "stenose",
                   # "CAD_history", "PAOD", "Peripheral.interv", 
                   # "EP_composite", "EP_composite_time", "EP_major", "EP_major_time",
                   # "MAC_rankNorm", "SMC_rankNorm", "Macrophages.bin", "SMC.bin",
                   # "Neutrophils_rankNorm", "MastCells_rankNorm",
                   # "IPH.bin", "VesselDensity_rankNorm",
                   # "Calc.bin", "Collagen.bin", 
                   # "Fat.bin_10", "Fat.bin_40", 
                   "OverallPlaquePhenotype" )
                   # , "Plaque_Vulnerability_Index")

basetable_bin = c("Hospital", 
                  "Artery_summary",
                  "Gender")
# basetable_bin

basetable_con = basetable_vars[!basetable_vars %in% basetable_bin]
# basetable_con
```

# Athero-Express Biobank Study Baseline Characteristics

Showing the baseline table of the whole Athero-Express Biobank.

```{r Baseline AEDB: Visualize AEDB}
# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB.tableOne = print(CreateTableOne(vars = basetable_vars, 
                                         factorVars = basetable_bin,
                                         # strata = "Symptoms.4g",
                                         data = AEDB.full, includeNA = TRUE), 
                          nonnormal = c(), missing = TRUE,
                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
                          format = "pf", 
                          contDigits = 3)[,1:3]
```


## Match samples used

Here we match the full database with the samples used in **`EntropyMasker`**. 

```{r}
# Remove duplicate rows of the dataframe
library(dplyr)
temp <- EM_samples %>% select(., "Studynumber") %>%
  distinct(.)

AEDB_EM <- merge(temp,
                 AEDB,
                 by.x = "Studynumber",
                 by.y = "STUDY_NUMBER", 
                 sort = FALSE)

AEDB_EM.full <- subset(AEDB_EM,
                    informedconsent != "missing" & # we are really strict in selecting based on 'informed consent'!
                       informedconsent != "no, died" & 
                       informedconsent != "yes, no tissue, no commerical business" &
                       informedconsent != "yes, no tissue, no questionnaires, no medical info, no commercial business" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commerical business" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info, no commercial business" &
                       informedconsent != "yes, no tissue, no health treatment" &
                       informedconsent != "yes, no tissue, no questionnaires" &
                       informedconsent != "yes, no tissue, health treatment when possible" &
                       informedconsent != "yes, no tissue" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no medical info" &
                       informedconsent != "yes, no tissue, no questionnaires, no health treatment, no commercial business" &
                       informedconsent != "no, doesn't want to" &
                       informedconsent != "no, unable to sign" & 
                       informedconsent != "no, no reaction" & 
                       informedconsent != "no, lost" & 
                       informedconsent != "no, too old" & 
                       informedconsent != "yes, no medical info, health treatment when possible" &
                       informedconsent != "no (never asked for IC because there was no tissue)" &
                       informedconsent != "no, endpoint" & 
                       informedconsent != "nooit geincludeerd")
# AEDB_EM.full[1:10, 1:10]

rm(temp)
```



```{r Baseline AEDB: Visualize AEDB EM FULL}
# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB_EM.full.tableOne = print(CreateTableOne(vars = basetable_vars, 
                                         factorVars = basetable_bin,
                                         # strata = "Symptoms.4g",
                                         data = AEDB_EM.full, includeNA = TRUE), 
                          nonnormal = c(), missing = TRUE,
                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
                          format = "pf", 
                          contDigits = 3)[,1:3]
```

```{r Baseline AEDB: Visualize AEDB EM}

basetable_vars = c("Hospital", 
                   "Artery_summary")

basetable_bin = c("Hospital", 
                  "Artery_summary")

# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB_EM.tableOne = print(CreateTableOne(vars = basetable_vars, 
                                         factorVars = basetable_bin,
                                         # strata = "Symptoms.4g",
                                         data = AEDB_EM, includeNA = TRUE), 
                          nonnormal = c(), missing = TRUE,
                          quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE, 
                          format = "pf", 
                          contDigits = 3)[,1:3]
```

## Baseline writing

Let's save the baseline characteristics of the Athero-Express Biobank Study.

```{r Baseline SampleSelection: write}
# Write basetable

require(openxlsx)

write.xlsx(as.data.frame(AEDB.tableOne), 
           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.xlsx"), 
           rowNames = TRUE, 
           colNames = TRUE, 
           sheetName = "AE_Base", overwrite = TRUE)

write.xlsx(as.data.frame(AEDB_EM.tableOne), 
           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.EM.59.BaselineTable.xlsx"), 
           rowNames = TRUE, 
           colNames = TRUE, 
           sheetName = "AE_Base_EM_59", overwrite = TRUE)

write.xlsx(as.data.frame(AEDB_EM.full.tableOne), 
           file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.EM.56.BaselineTable.xlsx"), 
           rowNames = TRUE, 
           colNames = TRUE, 
           sheetName = "AE_Base_EM_56", overwrite = TRUE)

```

We will also write the newly prepared AEDB selected for this study which we can use in downstream analyses. 

```{r}
saveRDS(AEDB_EM.full, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.EM.FULL.RDS"))
saveRDS(AEDB.full, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.FULL.RDS"))
saveRDS(AEDB, file = paste0(OUT_loc, "/",Today,".",PROJECTNAME,".AEDB.raw.RDS"))

```


# Session information

------------------------------------------------------------------------------------------------------------------------

    Version:      v1.0.3
    Last update:  2022-08-23
    Written by:   Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl).
    Description:  Script to get some Athero-Express Biobank Study baseline characteristics.
    Minimum requirements: R version 3.4.3 (2017-06-30) -- 'Single Candle', Mac OS X El Capitan

    **MoSCoW To-Do List**
    The things we Must, Should, Could, and Would have given the time we have.
    _M_

    _S_

    _C_

    _W_

    **Changes log**
    * v1.0.3 Updated baseline characteristics.
    * v1.0.2 Simplified the initial script. It now outputs the relevant R-objects (as .RDS).
    * v1.0.1 Update to main AEDB (there is an error in the Age-variable in the new version).
    * v1.0.0 Initial version. Add 'plaque vulnerability index', Fixed baseline table, added codes, and results. Major update to WORCS system.

------------------------------------------------------------------------------------------------------------------------

```{r eval = TRUE}
sessionInfo()
```

# Saving environment

```{r Saving}
save.image(paste0(PROJECT_loc, "/",Today,".",PROJECTNAME,".AEDB.EM.baseline.RData"))
```

+-----------------------------------------------------------------------------------------------------------------------------------------+
| <sup>© 1979-2022 Sander W. van der Laan | s.w.vanderlaan[at]gmail.com | [swvanderlaan.github.io](https://swvanderlaan.github.io).</sup> |
+-----------------------------------------------------------------------------------------------------------------------------------------+

diff --git a/ae_baseline/20220823.EntropyMasker.AE.BaselineTable.xlsx b/ae_baseline/20220823.EntropyMasker.AE.BaselineTable.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0a975e53d4728e2f725248b8e1ccc2ca5b84499a GIT binary patch literal 7234 zcmaKR1z1$y_BJJw(hV|53eqWELw8B{0K?EBA|N?5Dh(<nfeDCh=$wXj!} z_A$y#bQIlJWBUOjnV>-uCP|Rzh{MT>Z!ieCe6F-WL4|LaAuwm^>h;X@jYxdHyaUr7 zC~GlRRd&my%prZ>mH)*LPOBe;RF|f1?iiBFrMax3sgJwpzB%M5l z@K7JN$#$KCinVDIdfgTIgIYL5t3ivwP?v2P+~9Raz;p}(^apy(kU z$ako??L}=!e2+@xiR1v6f7sU2PeI2P z1CAmAjhr^l()`({!$BVL!8=TAi^#B?#&0kQpo(E|OSoe3Um6&Sj(Uv2v!KYw2A zzI`jTBVxl!^>P6{I{ndP`+GnTYCBVa^ac4Nf>R3oFBVt(s{#eff{zn*C|VLLD;~;c zMHP|N(1?3Yohv|EJ6^F~PxYMy*(n@35nAMQ|94gr-JGh8s|V;;PR5(5cJNR_uT=U1 zUO$WFm7NRjFQdgWAl)@j@G+}UmnHsowyQ1%ztb`g@)-8@=v_kSE&}Nt%u4i?E18&y z*mFQ7uM)&3o1VzsTbEJZsBTY)-RP}dmNoHzH+VvKyzI1+h`~Tfmd#q-G_Mo7ucTOFjCyHW`ql-l1xR%-DDpDRlc*8 zV+X$zS4#u(^}d=I2xYh}o1KM2(rD_%#fqt<%j8i!nsQgZW&6D+&K%RIg(2e>k4!12 zf)9W4bmRbi`MJv4aIT7dHS%I*sn!%pYR?QZV-QVG6^q4(4n;n7`fA%l3`%Dlk3}W? zH)EKb%k6b`!9BsfuQjcBH>N* z6K<+a(up-!`2c|gN-Nn3$22MTs!d*;U;@o#Iy*Ak{j3&#TH3YFpJdEf=L$W!rLP?# z?Hb}7koVd3;^F``#L%y#>9h8SAdG>o^V?!WzG0>`%3ts_acDiylM-S(%yo(Got^7O zOhAVD1o<4RM)CWNj9Lc-hJz4l6_q$+{RV(3xla2^IkP&$~lO+1VJ5EvaetJ++#@)1o1O$Fj8_goLZ>!s|XQ= z5J7QlHOuKwahwa5aG6*FC>@`lty|IQLmkFYNnyl-p*OTSWiuv&yT&jlR!KR`e;PNc z#^ZkTn=sRT0~wJyK3fGlI`_W)#=|snWivBZ96?K|2=Sq`*X2UqGT5a!J8cm|bA`t3 zv4#9wK4lLpQtXY5gvuPZ?-?y^3-6dS92q4&P5{1SVvwCk0Hz_l^QEOm zIwVaci=ZjG+ z=VuXAZ2OI%!w;sZv(nCQLC^2prO0C2lajvp9QDhjWWF9iz~k-@edyKbEv{n$uP|d1 zMka0&@{RxBbU^w~EIb{oJV3U(UdWKxd;VgC6f@Ux9@6luer+x-Rj$M@aypKjKDSLoF2g-FZnx4JQh=V$IQ7N60u$ zxY#;vtb4vx%8VO9S{@#aJ%H%p8idH>==bbcar)QI+MitSf0X3v{U zVm@@t4@*r3OHA=kN1Es*ezd8R^88)FFIH3GOct8SHgyj2WRKr z0=Vu+`GIIR^SgPalL}NXp$zUvgt4zlEJg3K(tIwT;vY5=yO>OweX6;Mh ze0#EOuJx<_@oy8|udV=^2}MbyzFoV`L|KtT@#Kt(Z<5ZRk@d_!OqdnusXT*t%ZhGT zT>j!<#`adpC?usY|UcOGCUpbtgr0zNoAPm34Snr8BK~!|1zs9 z!=lyPR4NO14-7fq-!5OqY#8Lohx%LWkoZ}fnkp$>(iZt!0==tFuqBln?Ex=D1z*(G zh|Ofg1!D!eRpi`KahjYn_VwXS7Qmh9mHN_)&&~{T=4{@Vw%nj@M34=n+V;Q)T^1Wg z={A_yU24*+Oqs2U{Ew@sY2xKm#vjNP%hGt3<>neWK(;|ipC8u)w$7dJR>a)lHtT-y z>}vmR&_~d*8`(R&w7SEQ73Q{uz?8W;B{E)=qj#D%M~tf7l>0M?#%ce(=dZ)Wfu$s! zIN(9ZQ=GO5d_Nr~`g7H+GwHD%P2C3$8Fp&sOqYV3Rc*1%iH)mSqpG(B8=kgR5p@wZgCUV03B~UmPwS8FH?MyYv5L!p(PP< zLd0uq0!!t#Pm4Q_HRi81_p}uS3cmOZk{lB$Xu!PBM!ofe#}a#-S9}sorLm$GJ+iXirqo00O}3jHAD| z8X$~b6!*$a`EJk)T3eIoo`)B3x^@|`DdoeA^OgGbta;f&!u7d(3xpY~SvmqFg=u~b zg?JP-PCjcH0rjOuPXz}gy+kuK3O*S_Zb1RkJxzXYu0q?-Ifn4H+#AZDjqQtNMhV|P z^SfuivOgomFMno&<4Pd4jn^EL8GM3=6Sx?&-(}0kBY)1>Wl13Q(@khk?K~S6DWbY_ zf>Ehnc2XQ(ig7iDLx0cT5Cn|t$C_7ICSxZK0N~rylX!%uIrhj@g>9&-JR$@Ox70H?IvTv28RQWDmlPhDxR4Lt0!D#&LWJ3Q3u%V`MD)4xdH?cAN z@IaS@YUcd-M zr!&H9{nYz@1}mSJOt$}4I#-bnc?G?PP1+^dJHX7ra8$pgWrx}dgTLiN4;`Yd_E zw?aRShy|AdOZ*1y0n@UGQlhnn{_c9Rh)IRfoX@Ruy)8LtTqDn7L)nS`XP6`pA{*vjhdM9zV#&JSv444{qW(wp$#Img z`PGtbt4pv%!n>_Hk~2-BMmyX6_eA1Fg>EApc<*h|G*V|ZCcwJiCK1jV(U!^sEnBsX zQ{U+62bRWjoCq_IHCe>%4qwG)LPrB@K5ZF3{^%^xDL(re@WSEk;vpTew5W%{jK&h^ zO|DDwq~vESJ`Xg`j6B>AE~Imw^$2jBEco1oN;!WlH9J>U$O66)&$M2{D6u#Ya_E>@ zomw4QE3h(0|M&&pIQ$2)T&qdVZxUt*VRy}2Wi;BWX-4(x;LvgJBV9;* zI*GCvdupDZZ@08D5xf{6I9`+*^p+?I=wkRqF_{ReC0P2ch(#^btVxsAcQ(+Q`=ah> z%x!=2yz1debS5YQ#pL^C#gz!i!Z8Q1rY`)Ly-W4nY~*(Tx)GSt zcGO)-Z?g2j>utW$KzCzGUK5u6knGi%@j*ert7%^=nyS@<%yA)K&}Mvw*ywDZop=@c zMvd6QVb&}v?1z>@C3mw850%?15|Gc2hWZb00N(s+Jo;C(DYxq~O|w@!!c)_WJ@ddK zX=rbEuJ(>3FVcf|P0+5GR+NKJOqO?c!vpHf1~8iI1z|1k2+)+j&GS+fH`*wcoy)Pi0N z@iMX(aGVz-$ug%_DCaN=#&p|iZJ8b}cCcD6#k3!~lx1wyA-CVQ;?LBFJ}I=7-_M!w-T*6Pgz%4* zw{GFCs>MY@%e>|vYpKM(8cdz?Z76)Vp%AOQvah^PMk}S9g)OI86l<_>>rp3BZXl3; zl3Rpi`6Ic&rP>&oOS^F$t4Q`3ztPvs^d1WZ915P5SufJJOD`~PIvT+8_xFr3E>JG|U z&z1X5J4M0w6ZN`J2;&YB4n;Ag3+P{0V6D! z4>Dl0s0`Ikso3C0lmvvtb#V?EY`oAG37JT#j9PF6T}IZyY1QiLnGJ1cSVNc+X0?+v zpkV{u8#B{~K-dR>V{-5~G7R&(p=FD)Rzsl#Pr}C|Jq&evgqmt zIU&I6M}%u3s4GO&rK-L_y+4@pkc2M~d$9CQWry)Vs`Oi?x0`{$p=iV#1j3?h^{7Qu zu<7~aFPX(h(Qt1OiH7Z8dtA!@!0X>g<)=Dp=1G%6uW*){+V{rD$EO@aaq2(xoNd?@%~jK985kP8 zxY$~AS{AKY0(E=pN2mW}ay;>n{KOGp;R@xsV`?$Q?*mcuUh?&BB)e7qQJ+}_0LjO7 zu`CoeHv@mD>Hq?FZna7s;Bcv)6uRX^I20Zm7g!gKnK7t%l8)Y6$DE%wF`6uQd@&1X zQ9G2F|5`#nuza9oEJ1#Jgp=;5UsCf>Q-9wY9+(=Hn*H;^7(+Ks*{;td71kp+PmXle zL$AFJQQ%Yeru)fjCA6EU-A^{f9e?0{ma4`(Jd%2FSgk*nm1sqjv?e2jTat()SwvQ6 zEW=9vOvh;=8oD!|ZbwY6%kNQb_;H?N?djgu(z%q*_Li^Dw!qf2{D}bm`XM{6>VYbq zF0=mE);#px#|Gcp9MS_4^MkrA%WQsZYJ9mTh*rk)l+`k%$X4qgdOo7TGUcE8 zI?M4&zjtUVNM+t@EqdBI=b(*`A7NLkuwHN6uyj~t;HN0eKl~Q#={7ki;R&IX8oVev zL8&U>-k!6=tAY-p4W4?_)!~Q1gGWSt)$nGV zWbtMmFkm`$pV!gJ!$W%|UyE-*F!~bB0hGifqhH?=*4?!>iJC%{Ie15b^*l1-$@@^s z8Wx4`v(@BSW7Ytr5?;{@KsP+1qU9Y}5ujv(CNoE}GJcl@9uWnX?|&KxD4IatB2OH@ zn=mD3%@JQ|&ZvY_BmI)pG5yVVl$^|%b~%w>337b*F%VnTI9F~!%zbB5HSqeqGjr!gTF~QVV6`uA{wL&J3$;X_H(ucW z?fDqLE}7~_wDNvkR4)rlWAyAIEapq_b6e&Bb0tGTh9I*tJ~7l4bLRxRj_-V&qE*vn z^vp%}&Zq3HM2igzy_JtWD#9V>o=+a1)kDN-Y4quyTPDMkc#EIVIa@t?;Kd(OV9Eh9 zzrCP+qLF5{s@F_>rXd-#cCp09xzw?>*COc2ksr4<7DRe}S&H&yK&Ab%WcGH4Vt#zl zMNI3Hm%XEeij~@EhXZwch#LV8H~)^8dfp&p-X#6sE4Fz~2T77RUVM=MN$9zY2qYI=v|o zTx(jt4Hn#c`?#mJ)s&EI1M89}oX0Ec`k2<`U@tk837UvG^~I=${U5 z*5K=v(r<$W^AP`XaC6=C=ir+K>AFn+Hdyc;^3NcD{d(}l b4*y-%v{Y{)!-s-`hy1W0^9hsc`s)7yOx##a literal 0 HcmV?d00001 diff --git a/ae_baseline/20220823.EntropyMasker.AE.EM.56.BaselineTable.xlsx b/ae_baseline/20220823.EntropyMasker.AE.EM.56.BaselineTable.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..592e79a8eabf6bf778f9380f869d57bd9b7dbd31 GIT binary patch literal 7011 zcmaJ`1yqz<*QOgmkVbMq8l_u26r4P|7Mdk7d97zlcyB)SN<1RMTt<_>c5;N-Y@mL-g; zb#vha?up2Ly7=m%fRS6!OaocKXTdWkIF)x^W5S$h6iRBYi| zlqGGJNgOW$qtY3f;juNE1DOEmEN~?iab^LjbZIR1!JSR^6C1!nTj8M}HuGkz@?Um#-#*au}V#VSi} z8I{|m9k}wm_|9(e9iQxnG1v`d22Vp79m9MZR{IqWfHD#Sg2ul>LL zfx+)aFh1{5v17g8RPJ0A8rj!ud|aKS&_QHMAu!xK{)xw*7&F1$&|r^-ghJv#h*E_g zk!!^?qOmBlD<%=Bqrp1jhtSVc@S>LPSu78k&O?2kmoHiuU$M2&gA=v#X~RRjnP$4Q z4l6&;8c}Pn$sE>|IchX&&;qrYR_z--&+z~!AbYHdHsJgD1dGTVUOfaI7@JJDk{gZ; zC3e_l3R&l>*Mvz)tT2qcRHXx*Lr~JrMh*F~|56Ltq@98s%T!89qmp~M<}o6}tnG$w z(<~afG`S}^kbsU%@QK(khhON{$_1Z86DOd&tVc<9!YX?7*~07Z-+3wRz0Y!u?IUph+JVpJJtMK8jvUau7aCZfJa9V)DH-5?-e~1nDQ}{|d zL6u`kNF; zw)(}O7AtW?r&p-P)?Hu)h3`lrjWUWJszmvTDxfUZA|Pb(pvIc5pkQ8@*|v?`0k&zO z{9_S0I!%A3tAi#Gu?yfYaYd|;cSeHy$?W=Iowsn6@9`@wlGayMm5*eyql$@YDMUO! zT*^7Nb*DeL+3Gt{qO&siMkwLi{r9N6dwZ)^uI`|pF&S^7+|5NAcC9q%pYbgAq10T^ zP&p-r-n~831aFf{RVjj<^F3AJvOBHwAonpJ_kk6d_L3~Moyl8W#VP>9U6x$X+w=sH zndT?bbQ_Y2n>Aetv6};Rt5QaO@8)t&*qfLn5?Kc)eY=^wRwG3l00S&c=+3_TD|QqP z5-pM;yz0|@JCh+Oc%FH^Hl6W2FEBm4AosART3{|{3X+xGR7u{v%G~E`Cw9K4uI75g z8-ukmj-=t%Og3hAiIXW;S8K*%E;GmRNQ%7$=3R78oLOfPi-M=k^o>bB1RY&)b?3_Z z@Nks3V_lc{sO85>k$sjYtUK4sih(shl`jz)JrZ2#@zJz()GLKLoCt~f#a9TnQr4fN z+@LzW4XPl4BMSKAStp01`oxLzuZ-yJ>h5Ui>gsrd?BCf@(h=z)J{;auUxDVjM6Fm; zrA|l300{*fftY5+0hO7nQ#4r<$)4`4E?O*bA&w#UG&=O3!KIk{FFT{X+FtP4$N~3nJc?E-HVy}x|nVQ1kEvex#5yTsPxCT^3GHC;2f@cY|`q9US{(mF$5PB0XgsKHE|bidM_ zIJ++su#siVF&ed9#d(#S>zKTJiE!;e8V$6kr`4DA-zkKFfgW_ZH zc0XeV{Jg3%?y*(IM^*|hbzAs!8tN4MiX>~`Oi=EZclweIi0rkLulIBz`T-+d%bK;h z(y^_B(IOR9ruG>UYjP91g)Q`SDc9L+#>P7#4t5UY3_0SowV?Q2#r?Q} zWlUVGlz!(lB%lbC3B5N3NL*Je>{&ermQ!MmrTa)|Z(LT~%ILiWh} zm!8Os;^x213)Lak7m*u~9U~B{)M>sDhRVKBB}r=;PAZ~dS7zKRd$Bp*-UU4!%H|w) z%5RBj3VXjy+l4-Zm))^tMN*Mk`Zf$muoXRc4ge^7cPwgtv7NJ8_L9Ai5%WLmw zl^#9P8~KPo-|eKtBb(BGUoc-hX6I)d0SyIB(BS7e8RJiJg!^|KdHOhkeg;rMqN?jW z4Sx7F)@oL7T|$w;$FbZgrN-j~ov%bz`+0{JxZ6X!6?^6_W=aKeQJfvZJmWYx4bQSl zY2`5`eP<`!wKeE6_gf!JO9!;=?6zqV(0PjrEDnZ`aZha2-EreT^x^~oFu=LF7o`2{ zR+8HV>lSPBUncTIfK+an$k_A>ixKy%aos5Vt^{^#N7jIOqgS^bRMr{%B&2p?xtRd2UXD=pn-KJ9VpL zdgVUS11PL%)^9&9BUD7Tl&}ZOe#G%9R{MKgUo8OjrE>PU#MG|3Hk)0hjfyGY2OoQN zdo07NruFPeFX4{uBx~eJ{KgdfI`UL4Q3T-ei;=12c~~LHvMr_1uGt%rh(PDdsdxl(`IkXU3JM z)BU{|Mo~^afcQk$fUS0*`CWp9hzv|TbH(F%%l3m_LiPX{Yf1-_uPHj3_lbX9i0OVZw$^ik&JTEj;6 zyi^hX#vI)ue&%|%7B68@s&8Wv4oR((_vcLihEjv4e8Xa%LYZoX3x-rI>?WYi=%? zOZHxKJn;)K|Gvs)4kS`gdG{2hO11p7B)k;mdJ2o0&JPHZeLjRSFSkm>Lf}t>Yt=yL z9-ivZFIgSBsj8%pPxBP%K#E(U&@xlXsxk{#2j(QFszYN?B0m-u8a6F4YM?@Nrba#Z zJd)O)p^8W-6lv;kovzPqn#t#U4(xD}CdK()$;D z(YV`5_iSGTa3qO%bk`zC>*boiNfvB%w-G?iVy<%izD^zc3)+TM2mUJ7P;SD+MdP`?+zbuR-=D{nZo z@Bkob?E}ras^nyCnJ}MZ0YB85Jn*Tq-AKX%B^FztwplyG)I9fdKhYHO#I?^^OQ@H- z>O;(>w;R4Eeid2ry5q<*%ViFQn)C}c8HQhC@M_Cg`_9T1!RM;FAiZ4JFxl9~s5^&Y z{jhy#3aGxNm&iRg)K392am{^yLGC4;S$!VXOftO=fC#JEt6wRB-Mn?S_qCa53oZF^ z5|DWM>a(;-86$O=L&V$cFM`Px*DKa-EL-Z?C;G19Q%dHdh>O-KuN)utQBfyfU5Zgr9Yy&M0DYlpdQEHW^U6umyZP?kw6P z@-c(vh25K_qx%FBLhgD{wH46oJeQ;yv1JQxcO>@AeC$q_dvhKQF#G3H_LnYXiUm_C zIeAk2W@U@#n=GG4d*FxR{B~>uT z#S>Q9+WPPlmR{vclL_>p4Fh{p>q$2Sote@@&o>1M!+lN3`OO%%qf*yXhKGgz>9al- z6xHj8S=0PJps(?n!jm5dZA7Y(H*19#kFq}^LcVLrRdKdhagl-32|?c98yh-b(|GZy zaOqxCCZpG98Rx8bho_{Kc;wp)rXs!Bz1}|-yGje%GeWustSJVa8m;c`h5Of=45Kwi zK%GT$*YjK3+j~~;eBnI61drAZ-R}*k1E`b)JkWU2OeA{!lZl8;#Y5GEkGA;~f6B;s ze`Vy`b4kG~XZ3Cxcz=hr(u!Q-=xJcf>oAW^NSJNP5o@Yng@n8O!QZ!HW-hJOz-9_P zE@bIyPq1+IdGP?6-^7@WPOKneri`7lCCjW1E9$0@49!&MM*M#0-|GurrK_AKbEPhf-50NQD)o8pq+?kZPj?rGMW^{o+zsR;;VDWJ` zUS>Z*MQ5Wz-_<<*rY;#zI#!%V*f-j?n)D`TE!ox}{YEEtoZ)Y)kB2VAE2qpKEX;Gd zfC783rY&Ppca4uI=^xj#tw!@pIteLV%H(&CdrSuTl5b6?`6J%+>hpo$xufBQwFUW4 zZlwRmjUIL&kf+Dr^XAXR&l>ypv^jpm#YLDg9A-j3BGT44rC{3Ylj7kM)IYb&WO^9Z zDk>Q%o>^xfaX&Nr@T_`${oIPOC$uqC0jjne!y>*FxjB2aIzkV|z#q3Tc&=@B7! z0Om;PovLob;S`BCfHz+QWJjZ6uVFB=^7Z3ZA-?9gnV(dLw>)rv366&KALmTc|H11Y zqzd$4E~{MlS4m8J&Mxz^MOa16NskL=s#0uU3HCS;$kzqcnYF({=%bn=@M3tmO?fd= z+3&IaWx>NhP?!agk?Gj_0OGZ%}_lx78!i4 zwcc^5)aO#JBYGf`_NfMNm^@p91X#-hU^Q8;4H{}B)V&(&!uAhXP72lC4YHtX0`Id~ zVVWFT8%-(QS3qHpL^|QNtre*heUtfJOY+;hsaI6etA~MhZ8_eZ`c*REK_0J+{X?c5 z_arMlnZELOesT0JvqLUP^tJ4>wP^ zjId6yV?212o`YWevGTKdW^tFg`w5@^6UG=ZMg9rvcwvu1=8*39$CQ%Q&{jNSytcZv zYp$l0Y7dzG*j4(`o5G{iK90m;){S)*T2*c&>T~Xm3(nwzC7^6B7t*Yzb!#4~>b)>B zB`0Q-)TeTRRy7gd5H9i6lz6Jx->bJ>D}7|t@0fCB@lXYr?5}bv>n_STKDqNIiHSP{ ztIb=6(2ZX;z0aRe_%Xw=H&4LV9U2Gsp3jWLP-s_=6opYb5b_Yu(h)J27@s`ES3%-? zTSHjXNjw4}MGHI46cU})K0YQ_9*G#z)Oa&sn>W+!TQ{ z>8B_Uzi@#W7Y=bz`z3rVuV()rG2nLwlS-nk> zzY7hNYvrWLNPY%|TyBwJ;$+vT;F1(SUuCJ)@DtoA-h+a`--bJQ>oN38p08@vY8tBiXRPwh>j4fL~V~K><%ApBv%ZGmxO{$qs4 J(}SO?{|A4D+qeJ# literal 0 HcmV?d00001 diff --git a/ae_baseline/20220823.EntropyMasker.AE.EM.59.BaselineTable.xlsx b/ae_baseline/20220823.EntropyMasker.AE.EM.59.BaselineTable.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a14caee52f20e5ce6b429696ea4ad8e5f41d8be6 GIT binary patch literal 6731 zcmaJ_1yodP*QO<zrA$_d4(M?04^XKe`&2SU1pcadFX%qp6J0t{4I8*~$mx=F4~I z@?M@ctlh;=7``p3^y&M$hbnFXw3!()N5V~PO?IT>zRZO;+A6ge4uHDP*z|ROTOE1Y z&>ToRNfifAns{?ZU3U`$#OS1Zbv^`a^m@051gec>!l{EOFblH%Xox$Jm+WE*>7` zM8_!_;o^#Tla0*!R&LqvG*!4)N5}Uzn;+EVsLdp~Oq`F$HcjQ_;!p ziqmU~pz|+U#=%PBJCo7@hPpgM!DxMg(3CosP_Q74;awBK$0@DIC%0Qzz0&oIZpKCh za7}jV?^S)CHe)haR@|#Ech!aI-UJwMEjh#dPKntLLC*N2Zvh`>(`@4Ng^bY*5w{h) z)V&E6=?P-aGHE)N{6{Uyz!C`B*P89u?uf}bIcQ_v51DVF8FNxq<{r;1gQ@#e=pCT5 zPdly}HBIBtD$x4T0?1fs#2!cw-U*K0So|*R(!|GBQQobtG-{VP96I;n_*jVEIp8Ee zIi{J;NlBughiTPddo_S~#Lk#!S^T(rUcjyL{JB)E02P+UT!CBMkDMn=2NJ<+CC0-8 z@cG%VcAT}V@4n67JHx)D3Y|mV&IKI}?IHG`s3Jj8W$$UH>*ML=%V*=|b;(oC$Xx;y zPqB+_WYw-^QSlH8xjC$B{=@@Dm|GV8IpaTqSR=(H82Kb;9j6uMORw2KimuDm;weXy ziiw{#&C~flpwB}Y*Wn+nyFm)5qzf8KXV$ zcjady-c``!8sFHqObf86(vm0JI^EWiD8Jr13-Wm%=+nQ5Fql_ja0?VqL+FEUA8hEY0eo5Xe_{~iIA#W3xY&y@tSWp+2|5ChEBU?W=*ERQ` zokb@)7ugoMC?TB*;jOVKEMmXH9*2%p!4y1Sf5;7j@fL&!j;dT$7h^_%kcPnN@}X1E zk*BpW8bqaj(;&P<`OW_g6&>@bq!D_4IVTME39MDCdfCmjnfGR*-0O zeY!r_QoX~~HB463K{TmZtzUEU{0K+MLaw_jw=>8F`Q6&#?d(ww+_q5U0k)A|q^xJ8 zdq~lO=lS^_dL$s|Rr7*gM>y7C&)GGJ;lOBfI<-%PI{5UyXX$C+E=vPameVtXxDT%H z1%w4$YQ~7(0fBD^h2DoFG^?ud$KM$(~d;KK-<6|SfHmS_`I zMKR|bOncf{E-cSl!3ESa@W_0AEHLv7C5wikO<$+vpH z(I_L$XO~bZhXPBb;Km9HipD>J#rYRlUOvvK+UR2d^7C`{aP;N-?fvc@TW60g6L*g; zX2SC}11~J;A(clHquv*+gt=-3h?z8DeNKumUk1FvA7THq;ob|j?#9t2YHU8JOxZ#-+(@roT zZZZn*p=P4L(qKTvy{e~9oQs^9$f2w9)8zL}7q;N{9H>wM!I$ZT2S%fye*gaOT`rX@D31XYPp392P% z9&y!X&?sj4-ZX7q)){up*>y&$JgyxX+4F^jpC%*sWzW@l!uwtTlO~hZB^TvY7F?pJ z#7#%-_99ua;hiwJDt)>y|mi1nkt`zbp_6Y2FsD=w`!y7(n} zYAet4GSdtsJU>6lV8K{e1KZ$D3OVvE(We{tn*-H+l#0IitheCXh}-7!+-nouLI1Q9 zudVpw{UIHv-S%z0ugUR5Ecgpc&q=UIj|9h^K8oe!g>nD^29ee^H^(q=ZsJhxW^Mq#1+%BOs)4^8q;PC&iHB5_=BTB* z6UTH(bx|$%Z9m?$SrF7aJ~g+J_twMH+VHe=%=!&#=}?;Qj7o(Sfd_QsjV50S6KvUS^zZtqLghu~at z_`H}@ITFPo)*rhU(cc{U1-gNLk_S!K@|wN`o%xywS@r)z$$ECs1iJD&O z)g{||0N(a%l}k9VAs%Q-cAX{^DRn;H#rNP!p{xwH<(rQhP%qil+DfM*{@SNlb z+%T`I0&I1+$r;nYfV&w&1n_>DPyIyP>>zjEmR(uvHQFWw#bBm=U-^*7JZy|%jg8x* zF1yB@-KI47u!fc{Rq^S>ZG|#oDD+&UxY)9^#eHq*n!}UC$Gxb{WzJ%~W1T&{QzR}J~h$v=h z@-r_r#_$w+d$Yv8d*|g0opR3R)c11EmK#^|X5~vrR%cj{BsnX2`ak!zx3^cO`NYL~or2u{bX8 z!es;%PtEW>iDfnkTat1kjtKF?=96}N>;?E0&v<*RiDkZfiR@^eF%T(USM8{0X4g)nQPPCZ@lH+eWQ=Q6F?R}Kr$IZHlMH-2yGGL&A zZ__PMu4Qr9ke?bX1=d*0RBjVCd$Pi&UNd|bP9)mQAPK$=8E&qmah_=QCo?U7yuopR zv(RDNjYT#Ytv!9Kn6QQ`SMdY?$uot*n-IK7HZ$_0^m>gSwnOnp6a)}mTj&6va&Nnv z+_Rb(kIe6#wzJhl0N;-IsYx9H%2|0IH6~4dYbTh}-4$gwnJ!ZWw;D!UBvu$E1Ca3T zTz@+p7kZaM{vCGqolc zgcrkJ1r0eePs<}-lYWN1>upqeO#dz}54`S36&nViuH)3JuYUT}Krtrpsc0~EZK2nZ zhSN|Qr#iPIz`&{vVrgCQxsPHTbMzwcq$S$lN9!)${HxWV!{EwX6{8W%$%S&4Vm;P5 zhv%kWl8Ecdd3sOEkzS`-Mj+z?#30T4)nT7IrVWG6(V3uzmL3Y9g6LpX$moRt!<_PC z7ORFr{K==RhHMo0&HjDrX}s30(>*V&q*`v$9%KM0C(b`BSd?=x#kj=1%KIYrwDMxn z{;fxZRN9-32J%xK(k2J{-F8yR(h{#x9>R8e4DHO1+8>+^wk8qoIf>S4gRL9&O;cM; zOoNLP1#Tp{hdLaR4*So*xiMp5b#ohl`y=jB-I5=lGp9JcoZn|4lNI+dhHEc^UKDy{ zOiC}<2>4*|<`fZhc-)xrZA3UH%R8TW(5OMjGxH1OMXbt^scfrttWxuXk^3&WwVAb1 z^+L-tO!rd+R+Z}M?cx#DD}Hr_5%oH>f@aYGS8mUu6;|L*T?@Kj7mvR8zzyV+hm&aY z;1kPisKesghlu$Q;fd1B@Ry|NN*;huDjB3Py27uwN;x#6EShyV13!lO^PM*wjC<`) zp4HqtO3Vetp_zUAQT@=e>y8q}fI#Rc+)~zvl{?7*{wMGmLvJOG-SgM)!EEMiCzTLl z`RM!xD$larHW-)s0-73(T+J4Mi z)o}9lp5IHT>R@lv)1qcv$6@)4anrrxkZ03@Hgq*Bd$|)LfuQx&9Eq`y0}hfkm}_+s z$o;&J=#XPwoO@5;b`lr_#a?SHsx?(f4%Y2KR#j-G7 ze!bW^kUr0j*fztsU|Uv;I5J!Mx*Z$RU@?f(90zxoELbUOZENdZy8eZ47td?B?j1u< zSUsC&Ss15oN;8Gj#ZMIgoJEYShic44bbqSI#D7)ft1|)giMvi0Gf7_?{$eX;nX4br zQOISMfSf$f@($S2q#A>0VJal3a&jiS73eU2?NQYHIq}!x>EzOWoZ!*-4u)VccI%A6 zIt$Rj*5Tr2S{ey-7_@TR-tSt3a@n{6jlS`@t8pG!%l!MJ2j1R1x`|KXCN!< zv(O&-ExFGHSn^)2W5CD_`&X0By#Tsh{*NKUv}t3r&+IYq~B^Q{F;Q5znf45ii|u z<;BsR$Le&J03nc!PWJHhl7(_sk6Rgww_{z1YVDwBzEGvR7$*}?U#Q`Pftf3gm6Qp9 z*!gZE?o!*d^)>+bGswxPNB{y{m^+7jfFP3bW7Keq@=XPQhpCLd5^xZrpkKw`QV4by z_D4VxA7WPed^Dwgy4ACleiUX4?ND!mSlExPTt;0&C1HD9esB+ELs(( z)=icfwub4ClN&%R8o-Qiws${18s{6py46&A$r=`VD@0Yv5QhR&%D2g+tdUX0%wAJf z*x8PecUmBT8+ha*VVG15GdhvM{H$4Zh_g54O41zCsR6HYkLuF$^JFDsaxMt2-}bLa ztc59vFt3atJWD`5k>VaTjm28;BB=Mt1;X%#USF^7G9Ap6eaZH6Jxpmh5%B_nu&P)& zXcZT3el+=0X9!RM?98Lku>a$bNBv)T{eu+Lc*|pk|3(PK;Dv@NMnOKC5y85#^ONGL zl=)uo_(`@$6-2cmr*qM1;vNl!+86`VebPtAq40!C#qC1$`#pN@5q2t`o`8&!;Z!We0m8EB7`KCjt(y`_e36Z+^)V#j#}Om+!}xFHoPl#-*Y z&i%F#F@+B=?hel9?;XHg;A;ir5)4Q{~ZC!XVSajCvbfjKvO0Xq`Vite3&QKUAE;ku>N~~J#Z**I z;D3xI@SVjqB6-ok7Vf~S-jUqSfO8*%p*weF;UmJ`1Z_Utp2e|wJlG^|MBalS>h?sv zpr~dNlsjy2=tgJBff9BDLns&(R6=w5zlTOKO6fb`nB;q~lTYr}_ zfA_wsXhud|I7V~%5y1v{W3@dF{&N@yA1YwoU01Or6%;tAQ6%{ zKjZwS2>nlm==Y#kN9Id$2sFM~vY@&1bNpF#U~*Q-Z@ z|IceCDnI_mVD@`}=T~~s<;mffK_YIUh(ZOpdXD(r`D$UlY*)Vw60uA6ALlvZ{H&M|3{WX6F{HMC>YG9-2Lqj7(T^y)