.Rhistory

wilcox.test(
LungCap ~ Smoke,
data = data
) |> report()
library(tidyverse)
library(ISLR)
library(sjPlot)
library(likert)
# view df
view_df(Wage, show.frq = T, show.prc = T, show.na = T)
Wage |>
plot_frq(education)
Wage |>
group_by(race) |>
plot_frq(education)
source("I:/General/SAR/scripts/sjplot/sjplots.R", echo=TRUE)
Wage |>
group_by(race) |>
plot_frq(education) |>
plot_grid()
plot_frq(
var.cnt = Wage$education,
var.grp = Wage$jobclass
)
plot_grpfrq(
var.cnt = Wage$education,
var.grp = Wage$jobclass
)
plot_xtab(
x = Wage$education,
grp = Wage$jobclass,
margin = "row",
bar.pos = "stack",
show.summary = TRUE,
coord.flip = TRUE
)
Wage |>
group_by(jobclass) |>
plot_frq(
wage,
type = "histogram",
show.mean = TRUE,
normal.curve = TRUE
) |> plot_grid()
data("pisaitems")
d <- pisaitems |>
select(starts_with("ST25Q"))
View(d)
view_df(d, show.frq = T, show.prc = T)
plot_likert(d)
m <- lm(wage ~ education, data = Wage)
plot_model(m, type = "pred")
summary(m)
plot_model(m, show.values = TRUE, width = 0)+
ylab("Increase in salary as compared to education")
tab_model(
m,
show.reflvl = T,
show.intercept = F,
p.style = "numeric_stars"
)
tab_model(
m,
show.reflvl = T,
show.intercept = F,
p.style = "numeric_stars"
)
# load required packages
library(tidyverse)
library(easystats)
library(gtsummary)
library(gt)
library(readxl)
# load data
kap_data <- read_excel("data/KAP_Thals_Processed.xlsx", sheet = 3)
# Table 1. Demographic characteristics of the study participants
kap_data |>
select(1:6) |>
tbl_summary(
statistic = list(
all_continuous() ~ "{mean} ({sd})"
),
type = `Do you know about thalassemia?` ~ "categorical"
) |>
as_gt()
?factor
kap_data |>
select(1:6) |>
mutate(`Do you know about thalassemia?` = factor(`Do you know about thalassemia?`, levels = c("Yes", "No"))) |>
tbl_summary(
statistic = list(
all_continuous() ~ "{mean} ({sd})"
),
type = `Do you know about thalassemia?` ~ "categorical"
) |>
as_gt()
# Table 2. Distribution of knowledge
kap_data |>
filter(`Do you know about thalassemia?` == "Yes")
kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17)
# Table 2. Distribution of knowledge
kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
tbl_summary()
# Filter and select relevant data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
count(Question, Response) |>
pivot_wider(names_from = Response, values_from = n, values_fill = 0) |>
rename(`Don't know` = `Don't know`, Yes = Yes, No = No)
View(kap_summary)
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
count(Question, Response) |>
pivot_wider(names_from = Response, values_from = n, values_fill = 0) |>
rename(Yes = Yes, No = No, `Don't know` = `Don't know`)
View(kap_summary)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response")
View(kap_summary)
kap_summary |>
tbl_summary()
kap_summary |>
tbl_summary(by = "Response")
kap_summary |>
tbl_summary(
by = "Response",
label = list(Question ~ "Characteristic")
)
kap_summary |>
tbl_summary(
by = "Response"
)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know")))
kap_summary |>
tbl_summary(
by = "Response"
)
kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
tbl_summary()
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know")))
kap_summary |>
tbl_summary(
by = "Response"
)
kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response)
View(kap_summary)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
count(Question, Response) |>
kap_summary |>
tbl_summary(
by = "Response"
)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
pivot_wider(names_from = Response, values_from = n, values_fill = 0)
View(kap_summary)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
pivot_wider(names_from = Response, values_from = n, values_fill = 0) |>
pivot_longer(cols = -Question, names_to = "Response", values_to = "Count")
View(kap_summary)
kap_summary |>
tbl_summary(
by = "Response"
)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
pivot_wider(names_from = Response, values_from = n, values_fill = 0)
View(kap_summary)
kap_summary |>
tbl_summary()
kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
pivot_wider(names_from = Response, values_from = n, values_fill = 0) |>
at_gt()
gt(kap_summary)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
mutate(
Percentage = n / sum(n) * 100, # Calculate percentages
Count_Percentage = paste0(n, " (", round(Percentage, 1), "%)") # Combine count and percentage
) |>
pivot_wider(names_from = Response, values_from = n, values_fill = 0)
View(kap_summary)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
pivot_wider(names_from = Response, values_from = n, values_fill = 0)
# Filter and restructure the data
kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
pivot_wider(names_from = Response, values_from = n, values_fill = 0)
# Filter and restructure the data
kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
group_by(Question) |>
mutate(Percent = n / sum(n) * 100,         # Calculate percentage
Display = paste0(n, " (", round(Percent, 1), "%)")) |> # Combine count and percentage
select(-n, -Percent) |>                   # Remove unnecessary columns
pivot_wider(names_from = Response, values_from = Display, values_fill = "0 (0%)")
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
group_by(Question) |>
mutate(Percent = n / sum(n) * 100,         # Calculate percentage
Display = paste0(n, " (", round(Percent, 1), "%)")) |> # Combine count and percentage
select(-n, -Percent) |>                   # Remove unnecessary columns
pivot_wider(names_from = Response, values_from = Display, values_fill = "0 (0%)")
View(kap_summary)
View(kap_summary)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
group_by(Question) |>
mutate(Percent = n / sum(n) * 100,         # Calculate percentage
Display = paste0(n, " (", round(Percent, 1))) |> # Combine count and percentage
select(-n, -Percent) |>                   # Remove unnecessary columns
pivot_wider(names_from = Response, values_from = Display, values_fill = "0 (0%)")
View(kap_summary)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
group_by(Question) |>
mutate(Percent = n / sum(n) * 100,         # Calculate percentage
Display = paste0(n, " (", round(Percent, 1), ")")) |> # Combine count and percentage
select(-n, -Percent) |>                   # Remove unnecessary columns
pivot_wider(names_from = Response, values_from = Display, values_fill = "0 (0%)")
View(kap_summary)
441 + 34 + 135
72.3 + 5.6 + 135
72.3 + 5.6 + 22.1
kap_summary |>
gt()
kap_summary |>
gt() |>
gt() |>
cols_label(
Yes = "Yes (%)",
No = "No (%)",
`Don't know` = "Don't Know (%)"
)
kap_summary |>
gt() |>
cols_label(
Yes = "Yes (%)",
No = "No (%)",
`Don't know` = "Don't Know (%)"
)
kap_summary |>
gt() |>
cols_label(
Yes = "Yes (%)",
No = "No (%)",
`Don't know` = "Don't Know (%)"
) |>
tab_header(
title = "Knowledge, Attitude, and Practice (KAP) Summary",
subtitle = "Responses to Thalassemia-Related Questions"
)
kap_summary |>
gt() |>
cols_label(
Yes = "Yes (%)",
No = "No (%)",
`Don't know` = "Don't Know (%)"
) |>
tab_header(
title = "Knowledge, Attitude, and Practice (KAP) Summary",
subtitle = "Responses to Thalassemia-Related Questions"
) |>
fmt_markdown(columns = everything()) |> # Format all columns for better display
tab_options(
table.font.size = 12,
heading.title.font.size = 16,
heading.subtitle.font.size = 14
)
# Filter and restructure the data
kap_summary <- kap_data |>
filter(`Do you know about thalassemia?` == "Yes") |>
select(8:17) |>
pivot_longer(everything(), names_to = "Question", values_to = "Response") |>
mutate(Response = factor(Response, levels = c("Yes", "No", "Don't know"))) |>
count(Question, Response) |>
group_by(Question) |>
mutate(Percent = n / sum(n) * 100,         # Calculate percentage
Display = paste0(n, " (", round(Percent, 1), ")")) |> # Combine count and percentage
select(-n, -Percent) |>                   # Remove unnecessary columns
pivot_wider(names_from = Response, values_from = Display, values_fill = "0 (0%)")
write.csv(kap_summary, "tables/KAP_Summary.csv", row.names = FALSE)
# load packages
library(tidyverse)
library(easystats)
# Step 1. Load the data
data <- readxl::read_excel("data/LungCapData.xls")
# Step 2. Define the Research Question
# RQ: Is there any difference in lung capacity between smokers and non-smokers?
tapply(data$LungCap, data$Smoke, mean)
# Step 3. Bivariate or Multivariate?
# This is a bivariate analysis since we are examining the difference between two variables: Smokes (categorical) and LungCap (continuous).
tapply(data$LungCap, data$Smoke, mean)
# Step 4. Difference or Correlation?
# The goal is to test for a difference in weight between the two groups.
tapply(data$LungCap, data$Smoke, mean)
# Step 4. Difference or Correlation?
# The goal is to test for a difference in LungCap between the two groups (male and female).
tapply(data$LungCap, data$Smoke, mean)
# step 5. Independent or Paired Data?
# The data are independent because smokers and non-smokers groups consist of different participants.
tapply(data$LungCap, data$Smoke, mean)
# step 5. Independent or Paired Data?
# The data are independent because smokers and non-smokers groups consist of different participants.
table(data$Smoke)
# Step 6. Type of Outcome Variable?
glimpse(data)
data$Smoke <- as.factor(data$Smoke)
glimpse(data)
# Step 7. Number of Groups?
# There are two groups: smokers and non-smokers.
table(data$Smoke)
# Step 8. Check Normality
# visualization
hist(data$LungCap)
qqnorm(data$LungCap)
# test
shapiro.test(data$LungCap[data$Smoke == "yes"])  # For smokers
shapiro.test(data$LungCap[data$Smoke == "no"])  # For non-smokers
# test
norm_test <- shapiro.test(data$LungCap[data$Smoke == "yes"])  # For smokers
report(norm_test)
# test
shapiro.test(data$LungCap[data$Smoke == "yes"])  # For smokers
shapiro.test(data$LungCap[data$Smoke == "no"])  # For non-smokers
# Conduct the Test: If the data are normal
t.test(
LungCap ~ Smoke,
data = data,
var.equal = TRUE
) |>
report()
# Conduct the Test: If the data are normal
t.test(
LungCap ~ Smoke,
data = data,
var.equal = TRUE
) |>
report() |>
summary()
# Conduct the Test: If the data are not normal
wilcox.test(
LungCap ~ Smoke,
data = data
) |> report()
# Perform the independent t-test
ttest_result <- t.test(
LungCap ~ Smoke,
data = data,
var.equal = TRUE
)
report(ttest_result)
# Creating a matrix
mat <- matrix(1:9, nrow = 3)
# Applying the sum function to each row (MARGIN = 1)
apply(mat, 1, sum)  # Output: sum of each row
mat
# Applying the sum function to each row (MARGIN = 1)
apply(mat, 1, sum)  # Output: sum of each row
# Applying the sum function to each column (MARGIN = 2)
apply(mat, 2, sum)  # Output: sum of each column
# Creating a list
my_list <- list(a = 1:5, b = 6:10, c = 11:15)
my_list
# Applying the mean function to each element of the list
lapply(my_list, mean)  # Output: list of means for each element
# The sapply() function is a simplified version of lapply(). It tries to simplify the result to a vector or matrix when possible, instead of returning a list.
# Applying the mean function using sapply
sapply(my_list, mean)  # Output: a vector of means
knitr::include_graphics("img/data-types.png")
#| echo: false
knitr::include_graphics("img/data-types2.png")
#| echo: false
knitr::include_graphics("img/data-types-2.png")
library(tidyverse)
library(easystats)
library(tidyverse)
library(easystats)
library(readxl)
#| label: load-package
#| warning: false
#| message: false
library(tidyverse)
library(easystats)
library(readxl)
data  <- read_excel("data/pulse_data.csv")
data  <- read.csv("data/pulse_data.csv")
View(data)
lm(Height ~ BMI, data = data)
lm_model <- lm(Height ~ BMI, data = data)
report()
report(lm_model)
report(lm_model) |>
summary()
#| label: lm-model
uvlm_model <- lm(Height ~ BMI, data = data)
report(uvlm_model) |>
summary()
#| label: uvlm-model
uvlm_model <- lm(BMI ~ Age, data = data)
report(uvlm_model) |>
summary()
#| label: mvlm-model
mvlm_model <- lm(BMI ~ Age + Height + Weight + Gender, data = data)
report(mvlm_model) |>
summary()
2+2
2+2
# Initial comment of this line
2 + 2