## Define function that recodes to numeric, but watches out to coercion to not introduce NAs
colstonumeric <- function(df){
tryCatch({
df_num <- as.data.frame(
lapply(df,
function(x) { as.numeric(as.character(x))}))
},warning = function(stop_on_warning) {
message("Stoped the execution of numeric conversion: ", conditionMessage(stop_on_warning))
})
}
##
## Define function that reverse codes items
ReverseCode <- function(df, tonumeric = FALSE, min = NULL, max = NULL) {
if(tonumeric) df <- colstonumeric(df)
df <- (max + min) - df
}
##
## Define function that scores only rows with less than 10% NAs (returns NA if all or above threshold percentage of rows are NA); can reverse code if vector of column indexes and min, max are provided.
ScoreLikert <- function(df, napercent = .1, tonumeric = FALSE, reversecols = NULL, min = NULL, max = NULL, engine = "sum") {
reverse_list <- list(reversecols = reversecols, min = min, max = max)
reverse_check <- !sapply(reverse_list, is.null)
# Recode to numeric, but watch out to coercion to not introduce NAs
colstonumeric <- function(df){
tryCatch({
df_num <- as.data.frame(
lapply(df,
function(x) { as.numeric(as.character(x))}))
},warning = function(stop_on_warning) {
message("Stoped the execution of numeric conversion: ", conditionMessage(stop_on_warning))
})
}
if(tonumeric) df <- colstonumeric(df)
if(all(reverse_check)){
df[ ,reversecols] <- (max + min) - df[ ,reversecols]
}else if(any(reverse_check)){
stop("Insuficient info for reversing. Please provide: ", paste(names(reverse_list)[!reverse_check], collapse = ", "))
}
if(engine == "sum") {
return(
ifelse(rowSums(is.na(df)) > ncol(df) * napercent,
NA,
rowSums(df, na.rm = TRUE) * NA ^ (rowSums(!is.na(df)) == 0)
)
)
}
if(engine == "mean") {
return(
ifelse(rowMeans(is.na(df)) > ncol(df) * napercent,
NA,
rowMeans(df, na.rm = TRUE) * NA ^ (rowSums(!is.na(df)) == 0)
)
)
}
if(engine == "mean_na") {
df[is.na(df)] <- 0
rowMeans(df)
}
}
# helper for mixed type to long format
select_to_longer <- function(data, var_prefix = NULL) {
var_regex <- paste0("(", var_prefix, ")_(pre|post)")
data %>%
dplyr::select(ID, Conditia, dplyr::matches(var_regex)) %>%
tidyr::pivot_longer(cols = dplyr::matches(var_regex),
names_to = c("Variable", "PrePost"),
names_pattern = "(.*)_(pre|post)",
values_to = var_prefix
) %>%
dplyr::mutate(
ID = as.factor(ID),
Cond = factor(Conditia, levels = c("VR", "VR miros", "VR social")),
PrePost = factor(PrePost, levels = c("pre", "post"))
) %>%
dplyr::select(-c(Conditia, Variable))
} # select_to_longer(df, "varpref)
# # -------------------------------------------------------------------------
# # INTERACTIVE - Get the data from Google Drive ----------------------------
# library(googledrive)
#
# # Get the data from Google Drive
# sheet_url <- "https://docs.google.com/spreadsheets/d/1ATSP4zEGt2mo25Kzu3lYlwu1LBA2WfwW/edit?usp=sharing&ouid=109354048369365876984&rtpof=true&sd=true"
# file_name <- paste0("DATE SAM,2022_", Sys.Date(), ".xlsx")
#
# ###############################
# if(interactive()){
# want_dl <- readline("Want to re-download data? 1-YES 2-NO: ")
# if (want_dl == 1) {
# # Call Google Drive authentication forcing interactive login and save in cache
# googledrive::drive_auth(use_oob = TRUE, cache = TRUE)
#
# # Reuse token to Sheet authentification
# # googlesheets4::gs4_auth(token = googledrive::drive_token()) # the file is .xlsx not google sheet
#
# # Download the .xlsx from google drive
# googledrive::drive_download(sheet_url, path = file.path(here::here(), file_name), overwrite = TRUE)
# }
# }
# ##################################################################################################################
# # INTERACTIVE - Read the latest .xlsx file for scalp ----------------------
#
# # Read the latest .xlsx file for scalp SSM
# last_xlsx_file <-
# dir(here::here(), pattern = "DATE SAM.*xlsx", full.names = TRUE) %>%
# file.info() %>%
# dplyr::arrange(dplyr::desc(ctime)) %>%
# dplyr::slice(1) %>%
# row.names()
#
# # Check if it is the same that was downloaded this session
# check_file <- identical(file_name, basename(last_xlsx_file))
# check_file
#
# # If it is read it, if not decide what to do
# if(interactive() & check_file){
# df_arsq <- rio::import(file = last_xlsx_file, which = "ARSQ", skip = 1)
# df_memo <- rio::import(file = last_xlsx_file, which = "Scala Memorabilitate")
# df_pq <- rio::import(file = last_xlsx_file, which = "Chestionar prezenta")
# df_itq <- rio::import(file = last_xlsx_file, which = "ITQ")
# }
# # -------------------------------------------------------------------------
last_xlsx_file <-
dir(here::here(), pattern = "DATE SAM.*xlsx", full.names = TRUE) %>%
file.info() %>%
dplyr::arrange(dplyr::desc(ctime)) %>%
dplyr::slice(1) %>%
row.names()
df_arsq <- rio::import(file = last_xlsx_file, which = "ARSQ", skip = 1)
New names:
New names:
df_sam_ox <- df_sam_ox[, 2:5]
df_sam_ox <-
df_sam_ox %>%
janitor::clean_names() %>%
tidyr::separate_wider_delim(id_proba, delim = "/", names = c("id", "study", "prepost")) %>%
mutate(id = as.numeric(id))
# Wide data (to join to other dataframes)
df_sam_ox_wide <-
df_sam_ox %>%
dplyr::select(-study) %>%
pivot_wider(id_cols = id, names_from = prepost, values_from = starts_with("oxi")) %>%
dplyr::mutate(oxitocina_pg_ml_DIF = oxitocina_pg_ml_POST - oxitocina_pg_ml_PRE,
oxitocina_normalizata_DIF = oxitocina_normalizata_POST - oxitocina_normalizata_PRE)
# Join dataframes to add condition
df_sam_ox_long <- left_join(df_sam_ox, df_arsq[, 1:5], by = c("id" = "ID"))
df_arsq <-
df_arsq %>%
select_if(~ !all(is.na(.))) %>% # drop columns with only NAs
mutate(across(where(is.character), ~ na_if(.x, "na"))) %>% # mark NAs
mutate(across(i1:j55, ~ as.numeric(.x))) %>% # convert item cols to numeric
rename_with(~ paste0("ascq_pre_", 1:55), .cols = i1:i55) %>% # rename item cols
rename_with(~ paste0("ascq_post_", 1:55), .cols = j1:j55) %>%
mutate(
ID = as.numeric(ID),
Conditia = as.factor(Conditia),
Gen = case_when(
Gen == 1 ~ "f",
Gen == 2 ~ "m",
TRUE ~ NA_character_
),
Gen = as.factor(Gen)
)
df_pq <-
df_pq %>%
select_if(~ !all(is.na(.))) %>% # drop columns with only NAs
mutate(across(where(is.character), ~ na_if(.x, "na"))) %>% # mark NAs
mutate(across(i1:i12, ~ as.numeric(.x))) %>% # convert item cols to numeric
rename_with(~ paste0("pq_", 1:12), .cols = i1:i12) %>% # rename item cols
mutate(
ID = as.numeric(ID),
Conditia = as.factor(Conditia),
Gen = case_when(
Gen == 1 ~ "f",
Gen == 2 ~ "m",
TRUE ~ NA_character_
),
Gen = as.factor(Gen)
)
df_itq <-
df_itq %>%
select_if(~ !all(is.na(.))) %>% # drop columns with only NAs
mutate(across(where(is.character), ~ na_if(.x, "na"))) %>% # mark NAs
mutate(across(itq1:itq29, ~ as.numeric(.x))) %>% # convert item cols to numeric
mutate(
ID = as.numeric(ID),
Conditia = as.factor(Conditia),
Gen = case_when(
Gen == 1 ~ "f",
Gen == 2 ~ "m",
TRUE ~ NA_character_
),
Gen = as.factor(Gen)
)
df_memo <-
df_memo %>%
select_if(~ !all(is.na(.))) %>% # drop columns with only NAs
filter(!all(is.na(.))) %>% # drop rows with only NAs
mutate(across(starts_with("Varsta_amin_"), ~ as.numeric(.x))) %>% # convert item cols to numeric
mutate(across(starts_with("Val_"), ~ as.numeric(.x))) %>%
mutate(across(starts_with("Viv_"), ~ as.numeric(.x))) %>%
mutate(across(starts_with("Relv_"), ~ as.numeric(.x))) %>%
mutate(
ID = as.numeric(ID),
Conditia = as.factor(Conditia),
Gen = case_when(
Gen == 1 ~ "f",
Gen == 2 ~ "m",
TRUE ~ NA_character_
),
Gen = as.factor(Gen)
)
df_memo <- df_memo[1:80, ] # only the first 80 rows have data
df_memo <-
df_memo %>%
rowwise() %>%
mutate(
Valence = mean(c_across(starts_with("Val_")), na.rm = TRUE),
Vividness = mean(c_across(starts_with("Viv_")), na.rm = TRUE),
Relevance = mean(c_across(starts_with("Relv_")), na.rm = TRUE)
)
# identical(df_memo$Valence, rowMeans(df_memo[, paste0("Val_", 1:10)], na.rm = TRUE))
# identical(df_memo$Vividness, rowMeans(df_memo[, paste0("Viv_", 1:10)], na.rm = TRUE))
# identical(df_memo$Relevance, rowMeans(df_memo[, paste0("Relv_", 1:10)], na.rm = TRUE))
# full scoring in EEG_O.4_resting_state_behavioral
## ARSQ (Likert 1-5) -- no reverse scoring, in total 30 items to score + 25 additional single items
# discont: 1, 2, 3
# tom: 4, 5, 6
# self: 7, 8, 9
# planning: 10, 11, 12
# sleep: 13, 14, 15
# comfort: 16, 17, 18
# somatic: 19, 20, 21
# health: 22, 23, 24
# visual: 25, 26, 27
# verbal: 28, 29, 30
# + 25 individual items
df_arsq$discont_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(1, 2, 3))], napercent = 1, engine = "mean")
df_arsq$tom_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(4, 5, 6))], napercent = 1, engine = "mean")
df_arsq$self_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(7, 8, 9))], napercent = 1, engine = "mean")
df_arsq$planning_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(10, 11, 12))], napercent = 1, engine = "mean")
df_arsq$sleep_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(13, 14, 15))], napercent = 1, engine = "mean")
df_arsq$comfort_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(16, 17, 18))], napercent = 1, engine = "mean")
df_arsq$somatic_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(19, 20, 21))], napercent = 1, engine = "mean")
df_arsq$health_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(22, 23, 24))], napercent = 1, engine = "mean")
df_arsq$visual_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(25, 26, 27))], napercent = 1, engine = "mean")
df_arsq$verbal_pre <- ScoreLikert(df_arsq[, paste0("ascq_pre_", c(28, 29, 30))], napercent = 1, engine = "mean")
df_arsq$discont_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(1, 2, 3))], napercent = 1, engine = "mean")
df_arsq$tom_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(4, 5, 6))], napercent = 1, engine = "mean")
df_arsq$self_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(7, 8, 9))], napercent = 1, engine = "mean")
df_arsq$planning_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(10, 11, 12))], napercent = 1, engine = "mean")
df_arsq$sleep_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(13, 14, 15))], napercent = 1, engine = "mean")
df_arsq$comfort_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(16, 17, 18))], napercent = 1, engine = "mean")
df_arsq$somatic_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(19, 20, 21))], napercent = 1, engine = "mean")
df_arsq$health_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(22, 23, 24))], napercent = 1, engine = "mean")
df_arsq$visual_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(25, 26, 27))], napercent = 1, engine = "mean")
df_arsq$verbal_post <- ScoreLikert(df_arsq[, paste0("ascq_post_", c(28, 29, 30))], napercent = 1, engine = "mean")
Error in `left_join()`:
! Join columns in `y` must be present in the data.
✖ Problem with `id`.
Backtrace:
1. dplyr::left_join(df_memo_ox, df_dur_ms, by = join_by(ID == id))
2. dplyr:::left_join.data.frame(df_memo_ox, df_dur_ms, by = join_by(ID == id))
df_sam_ox_long %>%
mutate(prepost = factor(prepost, levels = c("PRE", "POST"))) %>%
group_by(id, Conditia) %>%
filter(n() > 1) %>% # exclude incomplete
ungroup() %>%
ggstatsplot::grouped_ggwithinstats(
x = prepost,
y = oxitocina_normalizata,
grouping.var = Conditia,
type = "p",
bf.message = FALSE,
annotation.args = list(title = "Pre & Post OX data", subtitle = "")
)
df_sam_ox_long %>%
mutate(prepost = factor(prepost, levels = c("PRE", "POST"))) %>%
filter(oxitocina_normalizata < 3) %>% # exclude outlier from "VR miros"
group_by(id, Conditia) %>%
filter(n() > 1) %>% # exclude incomplete
ungroup() %>%
ggstatsplot::grouped_ggwithinstats(
x = prepost,
y = oxitocina_normalizata,
grouping.var = Conditia,
type = "p",
bf.message = FALSE,
annotation.args = list(title = "Pre & Post OX data", subtitle = "Outlier excluded from VR miros")
)
df_sam_ox_long_anova <-
df_sam_ox_long %>%
mutate(prepost = factor(prepost, levels = c("PRE", "POST"))) %>%
filter(oxitocina_normalizata < 3)
# ANOVA
anova_test(
data = df_sam_ox_long_anova, wid = id,
dv = oxitocina_normalizata,
within = prepost, between = Conditia
)
ANOVA Table (type III tests)
Effect DFn DFd F p p<.05 ges
1 Conditia 2 60 0.979 0.382 0.025
2 prepost 1 60 6.580 0.013 * 0.025
3 Conditia:prepost 2 60 2.439 0.096 0.018
# comparisons for treatment variable
df_sam_ox_long_anova %>%
group_by(prepost) %>%
pairwise_t_test(
oxitocina_normalizata ~ Conditia, paired = FALSE,
p.adjust.method = "holm"
)
# comparisons for time variable
df_sam_ox_long_anova %>%
group_by(id, Conditia) %>%
filter(n() > 1) %>%
ungroup() %>%
group_by(Conditia) %>%
pairwise_t_test(
oxitocina_normalizata ~ prepost, paired = TRUE,
p.adjust.method = "holm"
)
df_dur_ms %>%
ggstatsplot::ggbetweenstats(
x = Conditia,
y = med_dur,
outlier.label = ID,
xlab = ""
)
df_dur_ms %>%
ggstatsplot::ggbetweenstats(
x = Conditia,
y = mean_dur,
outlier.label = ID,
xlab = ""
)
same_cols <- intersect(names(df_memo_ox), names(df_pq_ox))
dplyr::left_join(df_memo_ox, df_pq_ox, by = same_cols) %>%
dplyr::rename(Conditia = Conditia.x) %>%
dplyr::filter(Conditia == "VR") %>%
dplyr::select(
"oxitocina_pg_ml_DIF", "oxitocina_normalizata_DIF",
"Valence", "Vividness", "Relevance", "PQ", "med_dur", "mean_dur"
) %>%
PerformanceAnalytics::chart.Correlation()
title("VR")
dplyr::left_join(df_memo_ox, df_pq_ox, by = same_cols) %>%
dplyr::rename(Conditia = Conditia.x) %>%
dplyr::filter(Conditia == "VR miros") %>%
dplyr::select(
"oxitocina_pg_ml_DIF", "oxitocina_normalizata_DIF",
"Valence", "Vividness", "Relevance", "PQ", "med_dur", "mean_dur"
) %>%
PerformanceAnalytics::chart.Correlation()
title("VR miros")
dplyr::left_join(df_memo_ox, df_pq_ox, by = same_cols) %>%
dplyr::rename(Conditia = Conditia.x) %>%
dplyr::filter(Conditia == "VR social") %>%
dplyr::select(
"oxitocina_pg_ml_DIF", "oxitocina_normalizata_DIF",
"Valence", "Vividness", "Relevance", "PQ", "med_dur", "mean_dur"
) %>%
PerformanceAnalytics::chart.Correlation()
title("VR social")
df_arsq_ox %>%
dplyr::filter(Conditia == "VR") %>%
dplyr::select(
"oxitocina_pg_ml_DIF", "oxitocina_normalizata_DIF",
contains("_dif")
) %>%
PerformanceAnalytics::chart.Correlation()
title("VR")
df_arsq_ox %>%
dplyr::filter(Conditia == "VR miros") %>%
dplyr::select(
"oxitocina_pg_ml_DIF", "oxitocina_normalizata_DIF",
contains("_dif")
) %>%
PerformanceAnalytics::chart.Correlation()
title("VR miros")
df_arsq_ox %>%
dplyr::filter(Conditia == "VR social") %>%
dplyr::select(
"oxitocina_pg_ml_DIF", "oxitocina_normalizata_DIF",
contains("_dif")
) %>%
PerformanceAnalytics::chart.Correlation()
title("VR social")