1 Read, Clean, Recode, Merge

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Read, Clean, Recode, Unite
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

## Read files
folder_teatru <- "E:/Cinetic idei noi/A13/Date grup teatru"
folder_psiho <- "E:/Cinetic idei noi/A13/Date grup psiho"
file_teatru <- "A13 Tabel date COMPLET28ian.xlsx"
file_psiho <- "A13P Tabel centralizat.xlsx"

setwd(folder_teatru)
Data_teatru <- rio::import(file.path(folder_teatru, file_teatru),
                           skip = 1)
setwd(folder_psiho)
Data_psiho <- rio::import(file.path(folder_psiho, file_psiho),
                           skip = 1)


## Tidy up data
# Function coalesce rows: colapse when NA, unite with "_" when not NA
coalesce2 <- function(...) {
  Reduce(function(x, y) {
    i <- which(is.na(x))
    j <- which(!is.na(x) & !is.na(y))
    x[i] <- y[i]
    x[j] <- paste(x[j], y[j], sep = "_")
    x},
    list(...))
}

colnames(Data_teatru) <- coalesce2(Data_teatru[2,], Data_teatru[3,])
Data_teatru <- Data_teatru[-c(1:3),]

colnames(Data_psiho) <- coalesce2(Data_psiho[2,], Data_psiho[3,])
Data_psiho <- Data_psiho[-c(1:3),]


## Check if both data sets have exact same column names
all.equal(colnames(Data_teatru), colnames(Data_psiho))           # not the same
diff_colname <- setdiff(colnames(Data_teatru), colnames(Data_psiho))
ind <- which(colnames(Data_teatru) ==  diff_colname)

colnames(Data_psiho)[ind] <- diff_colname                       # replace with same colname from Data_teatru
all.equal(colnames(Data_teatru), colnames(Data_psiho))           # now they are the same
## Solve duplicate names due to excel double header
# Function to paste a string before column name if it doesnt already start with that string
paste_tocolnames <- function(vec_colnames, string_paste){
  ind <- grep(pattern = string_paste, vec_colnames)                   # ignore column that already has string patterm
  vec_colnames[-ind] <- paste0(string_paste, vec_colnames[-ind])      # paste pattern to the rest of them
  return(vec_colnames)
}

colnames(Data_teatru)[6:21] <- paste_tocolnames(colnames(Data_teatru)[6:21], "APS pre_")
colnames(Data_teatru)[22:35] <- paste_tocolnames(colnames(Data_teatru)[22:35], "PPS pre_")
colnames(Data_teatru)[36:55] <- paste_tocolnames(colnames(Data_teatru)[36:55], "PANAS pre_")
colnames(Data_teatru)[76:79] <- paste_tocolnames(colnames(Data_teatru)[76:79], "SRS post_")
colnames(Data_teatru)[80:99] <- paste_tocolnames(colnames(Data_teatru)[80:99], "PANAS post_")
colnames(Data_teatru)[100:113] <- paste_tocolnames(colnames(Data_teatru)[100:113], "PPS post_")
colnames(Data_teatru)[114:129] <- paste_tocolnames(colnames(Data_teatru)[114:129], "APS post_")

colnames(Data_psiho)[6:21] <- paste_tocolnames(colnames(Data_psiho)[6:21], "APS pre_")
colnames(Data_psiho)[22:35] <- paste_tocolnames(colnames(Data_psiho)[22:35], "PPS pre_")
colnames(Data_psiho)[36:55] <- paste_tocolnames(colnames(Data_psiho)[36:55], "PANAS pre_")
colnames(Data_psiho)[76:79] <- paste_tocolnames(colnames(Data_psiho)[76:79], "SRS post_")
colnames(Data_psiho)[80:99] <- paste_tocolnames(colnames(Data_psiho)[80:99], "PANAS post_")
colnames(Data_psiho)[100:113] <- paste_tocolnames(colnames(Data_psiho)[100:113], "PPS post_")
colnames(Data_psiho)[114:129] <- paste_tocolnames(colnames(Data_psiho)[114:129], "APS post_")
# as.data.frame(colnames(Data_psiho))
# as.data.frame(colnames(Data_teatru))

colnames(Data_teatru) <- enc2native(colnames(Data_teatru))      # fix encoding
colnames(Data_psiho) <- enc2native(colnames(Data_psiho))


## Recode known missing values
# str(Data_psiho, list.len = ncol(Data_psiho))
# str(Data_psiho, list.len = ncol(Data_psiho))
Data_teatru <-
  Data_teatru %>%
  replace(. == "/", NA) %>%                                     # missing values are coded "/"
  replace(. == "-", NA) %>%                                     # missing values are coded "-"
  replace(. == "NA", NA)                                        # missing values are coded "NA"

Data_psiho <-
  Data_psiho %>%
  replace(.=="/", NA) %>%                                       # missing values are coded "/"
  replace(.=="-", NA) %>%                                       # missing values are coded "-"
  replace(. == "NA", NA)                                        # missing values are coded "NA"

  
## Check for non-numeric elements in data sets
check_numeric1 <- as.data.frame(sapply(Data_teatru, varhandle::check.numeric)) 
check_numeric2 <- as.data.frame(sapply(Data_psiho, varhandle::check.numeric))
# sapply(check_numeric1, function(x) length(which(!x)))     # look at columns with non-numeric and count of non-numeric values
# sapply(check_numeric2, function(x) length(which(!x)))

nonnumeric1 <- sapply(check_numeric1, function(x) which(!x, arr.ind = TRUE))    # find row numbers for non-numeric values
nonnumeric2 <- sapply(check_numeric2, function(x) which(!x, arr.ind = TRUE)) 
nonnumeric1[lapply(nonnumeric1, length) > 0]                                   # return only columns and rown numbers were non-numeric
nonnumeric2[lapply(nonnumeric2, length) > 0]
## Recode to numeric
Data_teatru[, -c(1:5)] <- sapply(Data_teatru[, -c(1:5)], as.numeric)    
Data_psiho[, -c(1:5)] <- sapply(Data_psiho[, -c(1:5)], as.numeric)     # cant do this because of encoding:  mutate_at(-c(1:5), ~as.numeric(.))
  
## Correct typos in IDs
#unique(Data_teatru$`Indicativ subiect`)
#unique(Data_psiho$`Indicativ subiect`)
Data_teatru$`Indicativ subiect`[Data_teatru$`Indicativ subiect` == "18.A.1:3"] <- "18.A.1.3"
Data_teatru$`Indicativ subiect`[Data_teatru$`Indicativ subiect` == "26.A.1.3."] <- "26.A.1.3"
Data_teatru$`Indicativ subiect`[Data_teatru$`Indicativ subiect` == "27.A.1.3."] <- "27.A.1.3"

Data_psiho$`Indicativ subiect` <- gsub(".P", "", Data_psiho$`Indicativ subiect`)               # delete .P from IDs
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "16.A.A.1.3"] <- "16.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "1.A.1.3"] <- "01.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "2.A.1.3"] <- "02.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "4.A.1.3"] <- "04.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "0.4.A.1.3"] <- "04.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "5.A.1.3"] <- "05.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "6.A.1.3"] <- "06.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "7.A.1.3"] <- "07.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "8.A.1.3"] <- "08.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "9.A.1.3"] <- "09.A.1.3"
Data_psiho$`Indicativ subiect` <- paste0(Data_psiho$`Indicativ subiect`, ".P")                # add .P to all IDs

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Scoring Questionnaire and Unite
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Define function that calculates RowSums but only for rows with less than 10% NAs; and return NA if all row values are NA 
SpecialRowSums <- function(df, napercent = .1) {
  ifelse(rowSums(is.na(df)) > ncol(df) * napercent,
         NA,
         rowSums(df, na.rm = TRUE) * NA ^ (rowSums(!is.na(df)) == 0)
  )
}

## APS: simple sum
Data_teatru$`APS pre_Total` <- SpecialRowSums(Data_teatru[ ,sprintf("APS pre_%d", 1:16)], napercent = .13)  # not more than 2 NAs for 16 items
Data_teatru$`APS post_Total` <- SpecialRowSums(Data_teatru[ ,sprintf("APS post_%d", 1:16)], napercent = .13)
Data_psiho$`APS pre_Total` <- SpecialRowSums(Data_psiho[ ,sprintf("APS pre_%d", 1:16)], napercent = .13)
Data_psiho$`APS post_Total` <- SpecialRowSums(Data_psiho[ ,sprintf("APS post_%d", 1:16)], napercent = .13)

## PSS-SF 14: Items 4, 5, 6, 7, 9, 10, and 13 are scored in reverse direction.
keys_PSS <- c(1,1,1,-1,-1,-1,-1,1,-1,-1,1,1,-1,1)

Data_teatru$`PPS pre_Total` <- 
  SpecialRowSums(
  psych::reverse.code(items = Data_teatru[ ,sprintf("PPS pre_%d", 1:14)], keys = keys_PSS,  mini = 0, maxi = 4),
  napercent = .1)  # not more than 1 NAs for 14 items 
Data_teatru$`PPS post_Total` <- 
  SpecialRowSums(
    psych::reverse.code(items = Data_teatru[ ,sprintf("PPS post_%d", 1:14)], keys = keys_PSS,  mini = 0, maxi = 4),
    napercent = .1)
Data_psiho$`PPS pre_Total` <- 
  SpecialRowSums(
    psych::reverse.code(items = Data_psiho[ ,sprintf("PPS pre_%d", 1:14)], keys = keys_PSS,  mini = 0, maxi = 4),
    napercent = .1)  # not more than 1 NAs for 14 items 
Data_psiho$`PPS post_Total` <- 
  SpecialRowSums(
    psych::reverse.code(items = Data_psiho[ ,sprintf("PPS post_%d", 1:14)], keys = keys_PSS,  mini = 0, maxi = 4),
    napercent = .1)

## PANAS: Positive Affect Score = sum items 1, 3, 5, 9, 10, 12, 14, 16, 17, 19. Negative Affect Score = sum items 2, 4, 6, 7, 8, 11, 13, 15, 18, 20.
Data_teatru$`PA pre_Total` <- SpecialRowSums(Data_teatru[ ,35 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) # not more than 1 NAs for 10 items
Data_teatru$`NA pre_Total` <- SpecialRowSums(Data_teatru[ ,35 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)
Data_psiho$`PA pre_Total` <- SpecialRowSums(Data_psiho[ ,35 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) 
Data_psiho$`NA pre_Total` <- SpecialRowSums(Data_psiho[ ,35 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)

Data_teatru$`PA post_Total` <- SpecialRowSums(Data_teatru[ ,79 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) 
Data_teatru$`NA post_Total` <- SpecialRowSums(Data_teatru[ ,79 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)
Data_psiho$`PA post_Total` <- SpecialRowSums(Data_psiho[ ,79 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) 
Data_psiho$`NA post_Total` <- SpecialRowSums(Data_psiho[ ,79 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)


# Define other grouping varibles
Data_teatru <- 
  Data_teatru %>%
    mutate(Etapa = case_when(`Etapă, zi` %in% c("I.1", "I.2") ~ "I",
                             `Etapă, zi` %in% c("II.1",  "II.2") ~ "II",
                             `Etapă, zi` %in% c("III.1", "III.2") ~ "III",
                             `Etapă, zi` %in% c("IV.1", "IV.2") ~ "IV",
                             TRUE ~ NA_character_),
           Zi = case_when(`Etapă, zi` == "I.1" ~ "zi1", 
                          `Etapă, zi` == "I.2" ~ "zi2",  
                          `Etapă, zi` == "II.1" ~ "zi3", 
                          `Etapă, zi` == "II.2" ~ "zi4",
                          `Etapă, zi` == "III.1" ~ "zi5", 
                          `Etapă, zi` == "III.2" ~ "zi6", 
                          `Etapă, zi` == "IV.1" ~ "zi7", 
                          `Etapă, zi` == "IV.2" ~ "zi8", 
                          TRUE ~ NA_character_)) %>%
  unite(col = "Etapa_Zi", Etapa, Zi, remove = FALSE)

Data_psiho <- 
  Data_psiho %>%
  mutate(Etapa = case_when(`Etapă, zi` %in% c("I.1", "I.2") ~ "I",
                           `Etapă, zi` %in% c("II.1",  "II.2") ~ "II",
                           `Etapă, zi` %in% c("III.1", "III.2") ~ "III",
                           `Etapă, zi` %in% c("IV.1", "IV.2") ~ "IV",
                           TRUE ~ NA_character_),
         Zi = case_when(`Etapă, zi` == "I.1" ~ "zi1", 
                        `Etapă, zi` == "I.2" ~ "zi2",  
                        `Etapă, zi` == "II.1" ~ "zi3", 
                        `Etapă, zi` == "II.2" ~ "zi4",
                        `Etapă, zi` == "III.1" ~ "zi5", 
                        `Etapă, zi` == "III.2" ~ "zi6", 
                        `Etapă, zi` == "IV.1" ~ "zi7", 
                        `Etapă, zi` == "IV.2" ~ "zi8", 
                        TRUE ~ NA_character_)) %>%
  unite(col = "Etapa_Zi", Etapa, Zi, remove = FALSE)

## Unite data sets
Data_teatru$Dataset <- rep("teatru", nrow(Data_teatru))
Data_psiho$Dataset <- rep("psiho", nrow(Data_psiho))

Data_United <- rbind(Data_teatru, Data_psiho)

2 Sample descriptives

## Number of subjects
## Number of subjects by Teatru/Psiho

3 Outcomes Pre-Post Intervention

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   OUTCOMES PRE-POST INTERVENTION   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Function to run all for teatru, psiho and United
func_prepost_tot <- function(df){
  
  Data_melt <-
    df[, c("Indicativ subiect", "Grupa", "Nume Prenume", "Etapă, zi", 
           "APS pre_Total", "APS post_Total", 
           "PPS pre_Total", "PPS post_Total")] %>% 
    gather("APS pre_Total":"PPS post_Total", key = "variable", value = "value")  %>% 
    mutate_at(vars(c(1:5)), funs(as.factor)) %>% 
    mutate(variable = factor(variable, levels = c("APS pre_Total", "APS post_Total", 
                                                  "PPS pre_Total", "PPS post_Total")))
  
  # APS t test - paired
  aps_ttest <- 
    df %>%
    select(`Indicativ subiect`, `APS pre_Total`, `APS post_Total`) %>%
    group_by(`Indicativ subiect`) %>%
    summarise_all(funs(na.omit(.)[1])) %>%                        # squash rows with NAs per id
    # filter_all(all_vars(!is.na(.))) %>%                         # drop row if any column is NA -- dont use here
    do(broom::tidy(t.test(.$`APS pre_Total`,
                          .$`APS post_Total`,
                          mu = 0,
                          alt = "two.sided",
                          paired = TRUE,
                          conf.level = 0.95))) 
  aps_ttest_out <- knitr::kable(aps_ttest, caption = paste0(deparse(substitute(df)), " APS"), format = 'pandoc')
  
  # APS plot t test - unpaired
  aps_plot <-   
    Data_melt %>%
    filter(variable %in% c("APS pre_Total", "APS post_Total")) %>%
    #group_by(`Etapă, zi`) %>%
    ggplot(aes(x = variable, y = value)) +
    geom_boxplot() +
    stat_summary(fun.data = mean_se,  colour = "darkred") +
    xlab("") +
    ggtitle(deparse(substitute(df))) +
    #facet_wrap(~`Etapă, zi`) +
    ggpubr::stat_compare_means(method = "t.test", 
                               label = "p.signif",                                         # to avoid scientific notation of very small p-values
                               #paired = TRUE, 
                               comparisons = list(c("APS pre_Total", "APS post_Total")))  
  
  
  # PPS t test - paired
  pps_ttest <- 
    df %>%
    select(`Indicativ subiect`, `PPS pre_Total`, `PPS post_Total`) %>%
    group_by(`Indicativ subiect`) %>%
    summarise_all(funs(na.omit(.)[1])) %>%                        # squash rows with NAs per id
    # filter_all(all_vars(!is.na(.))) %>%                         # drop row if any column is NA -- dont use here
    do(broom::tidy(t.test(.$`PPS pre_Total`,
                          .$`PPS post_Total`,
                          mu = 0,
                          alt = "two.sided",
                          paired = TRUE,
                          conf.level = 0.95)))
  pps_ttest_out <- knitr::kable(pps_ttest, caption = paste0(deparse(substitute(df)), " PPS"), format = 'pandoc') 
  
  # PPS plot t test - unpaired
  pps_plot <-
    Data_melt %>%
    filter(variable %in% c("PPS pre_Total", "PPS post_Total")) %>%
    #group_by(`Etapă, zi`) %>%
    ggplot(aes(x = variable, y = value)) +
    geom_boxplot() +
    stat_summary(fun.data = mean_se,  colour = "darkred") +
    xlab("") +
    ggtitle(deparse(substitute(df))) +
    #facet_wrap(~`Etapă, zi`) +
    ggpubr::stat_compare_means(method = "t.test", 
                               label = "p.signif",                                         # to avoid scientific notation of very small p-values
                               #paired = TRUE, 
                               comparisons = list(c("PPS pre_Total", "PPS post_Total"))) 
  
  print(aps_ttest_out)
  print(aps_plot)
  print(pps_ttest_out)
  print(pps_plot)
}


cat("### Teatru")

3.0.1 Teatru

Data_teatru APS
estimate statistic p.value parameter conf.low conf.high method alternative
1.4 0.9058555 0.3803385 14 -1.914769 4.714769 Paired t-test two.sided
Data_teatru PPS
estimate statistic p.value parameter conf.low conf.high method alternative
-0.3 -0.1705189 0.8664048 19 -3.982333 3.382333 Paired t-test two.sided

3.0.2 Psiho

Data_psiho APS
estimate statistic p.value parameter conf.low conf.high method alternative
-1.666667 -1.011977 0.3257369 17 -5.141411 1.808077 Paired t-test two.sided
Data_psiho PPS
estimate statistic p.value parameter conf.low conf.high method alternative
3.3 1.759827 0.0945285 19 -0.6248056 7.224806 Paired t-test two.sided

3.0.3 United

Data_United APS
estimate statistic p.value parameter conf.low conf.high method alternative
-0.2727273 -0.236143 0.8148267 32 -2.625231 2.079776 Paired t-test two.sided
Data_United PPS
estimate statistic p.value parameter conf.low conf.high method alternative
1.5 1.152623 0.2560814 39 -1.132289 4.132289 Paired t-test two.sided

4 PANAS (PANAS for zi1, zi3, zi5, zi7 are Pre; zi2, zi4, zi6, zi8 are Post )

4.0.1 Teatru

4.0.2 Psiho

4.0.3 United

5 IOS (Pre on begining of Etapa, Post on end of Etapa)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   IOS - pre on begining of etapa, post on end  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Function plot pre post data faceted by zi
plot_prepost_zi <- function(df, pre_var, post_var){
  df %>%
    gather(pre_var, post_var, key = "variable", value = "value") %>%
    mutate(variable = factor(variable, levels = c(pre_var, post_var))) %>%
    ggplot(aes(y = value, x = variable)) +
    facet_wrap(~Zi, nrow = 1) + 
    ggtitle(deparse(substitute(df))) + 
    geom_boxplot() + stat_summary(fun.data = mean_se,  colour = "darkred")  +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggpubr::stat_compare_means(method = "t.test",
                               label = "p.signif",                                         # to avoid scientific notation of very small p-values
                               #paired = TRUE,
                               comparisons = list(c(pre_var, post_var)))
}


## Function plot pre post data adjusted comparisons of all 
plot_prepost_zi2 <- function(df, pre_var, post_var){
  df_modif <- 
    df %>%
    gather(pre_var, post_var, key = "variable", value = "value") %>%
    mutate(variable = factor(variable, levels = c(pre_var, post_var)),
           condition = interaction(variable, Zi),
           condition = as.factor(condition)) 
  
  stat.test <-
    df_modif %>%
    # group_by(`Indicativ subiect`) %>%
    rstatix::t_test(value ~ condition) %>%
    # rstatix::adjust_pvalue() %>%
    rstatix::add_significance("p") %>%
    filter(p.signif != "ns") 
  
  p <- 
    ggplot(df_modif, aes(y = value, x = condition)) +
      ggtitle(deparse(substitute(df))) + 
      geom_boxplot() + stat_summary(fun.data = mean_se,  colour = "darkred") +
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
      stat_pvalue_manual(stat.test, label = "p.signif", 
                         y.position = seq(max(df_modif$value, na.rm = TRUE)+2, max(df_modif$value, na.rm = TRUE)*2, 
                                          length.out = nrow(stat.test)))                                                  # very hacky
  
  print(p)
  return(stat.test) %>% print(n = Inf)
  
}


# cat("## By Zi")
cat("### Teatru")

5.0.1 Teatru

5.0.2 Psiho

5.0.3 United

5.1 ANOVA for Baseline each Etapa

5.2 By Etapa

6 VAS variables (multiple measurements on each zi)

6.2 VAS stres

6.2.1 Teatru

6.2.2 Psiho

6.2.3 United

6.3 VAS stare de bine

6.3.1 Teatru

6.3.2 Psiho

6.3.3 United

6.4 VAS corp

6.4.1 Teatru

6.4.2 Psiho

6.4.3 United

6.5 By Etapa - Only United

6.5.1 VAS Stres

6.5.2 VAS St bine

6.5.3 VAS St bine corp

7 Define function Moderation

find_mod <- function(df, dfp = NULL, num_only = TRUE, verbose = TRUE) {
  count = 0
  moderation_model_list <<- list()

  if(num_only == TRUE){
  numeric_cols <- unlist(lapply(df, is.numeric))                                      # get only numeric columns
  df <- df[, numeric_cols]
  }
  
  # restricted permutations for Moderation - Check: choose(len_names, 3)*3
  names <- colnames(df)
  len_names = length(names)
  
  if(is.null(dfp)){
    dfp <- lapply(1:len_names, function(i){
      tmp <- lapply(1:(len_names-1), function(j){
        tmp <- lapply((j+1):len_names, function(k){
          if(j != i & k != i) c(names[i], names[j], names[k])
        })
        do.call(rbind, tmp)
      })
      do.call(rbind, tmp)
    })
    dfp <- do.call(rbind.data.frame, dfp)
    names(dfp) <- paste("var", 1:3, sep = "_")
    dfp[, ] <- lapply(dfp[, ], as.character)    
  } else {
    dfp <- dfp
  }
  
  for (row in 1:nrow(dfp)) {                
    
    results <- medmod::mod(data = df,                                                   # mod does centering automatically
                          dep = dfp[row, 1], mod = dfp[row, 2], pred = dfp[row, 3], 
                          estMethod = "standard", test = TRUE, 
                          simpleSlopeEst = FALSE, simpleSlopePlot = FALSE)             # when testing use estMethod = 'bootstrap', bootstrap = 500 
    
    pmod <- as.data.frame(results$mod)[3,5]

    if(pmod < 0.05 && !is.na(pmod)) {
      count <- count + 1
        if(verbose == TRUE) {
        cat("Dependent Variable:", dfp[row, 1])
        print(results$mod) 
        }
      moderation_model_list[["Model"]][[paste("model", count, sep = "_")]] <<- as.data.frame(results$mod)   # return as list of dataframes
      moderation_model_list[["Syntax"]][[paste("model", count, sep = "_")]] <<- results$modelSyntax
      
    }
  }
  cat("\n","Report: ", count, "significant moderations out of", row, "total tries.")
}

## ex dfp
# dfp <- data.frame(
#   var1 = colnames(Data_med_melt)[grep("_post", colnames(Data_med_melt))],
#   var2 = rep("Condition", 13),
#   var3 = colnames(Data_med_melt)[grep("_pre", colnames(Data_med_melt))],
#   stringsAsFactors = FALSE
# )
# find_med(df = Data_med_melt, dfp = dfp, num_only = FALSE) 

8 Moderation on PrePost (APS, PSS)

term est se lower upper z p
PPS pre_Total 0.73 0.18 0.41 1.11 4.04 0.00
SRS post_Rela<U+021B>ia -0.07 0.14 -0.37 0.20 -0.50 0.62
PPS pre_Total:SRS post_Rela<U+021B>ia -0.04 0.02 -0.09 0.00 -1.73 0.08

term est se lower upper z p
Average 0.74 0.18 0.42 1.15 3.99 0.00
Low (-1SD) 1.11 0.31 0.61 1.86 3.65 0.00
High (+1SD) 0.36 0.26 -0.10 0.88 1.39 0.16

Scale for ‘colour’ is already present. Adding another scale for ‘colour’, which will replace the existing scale.


9 Session Info

R version 3.6.1 (2019-07-05)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 8.1 x64 (build 9600)

Matrix products: default

locale:
[1] LC_COLLATE=Romanian_Romania.1250  LC_CTYPE=Romanian_Romania.1250    LC_MONETARY=Romanian_Romania.1250 LC_NUMERIC=C                     
[5] LC_TIME=Romanian_Romania.1250    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] rio_0.5.16                 scales_1.0.0               ggpubr_0.2.5               magrittr_1.5               tadaatoolbox_0.16.1       
 [6] summarytools_0.8.8         rstatix_0.2.0              broom_0.5.2                PerformanceAnalytics_1.5.2 xts_0.11-2                
[11] zoo_1.8-4                  psych_1.8.12               plyr_1.8.4                 forcats_0.4.0              stringr_1.4.0             
[16] dplyr_0.8.3                purrr_0.3.2                readr_1.3.1                tidyr_1.0.0                tibble_2.1.3              
[21] ggplot2_3.2.1              tidyverse_1.2.1            papaja_0.1.0.9842          pacman_0.5.1              

loaded via a namespace (and not attached):
 [1] nlme_3.1-140       bitops_1.0-6       matrixStats_0.54.0 lubridate_1.7.4    httr_1.4.0         tools_3.6.1        backports_1.1.4   
 [8] R6_2.4.0           nortest_1.0-4      lazyeval_0.2.2     colorspace_1.4-1   withr_2.1.2        tidyselect_0.2.5   gridExtra_2.3     
[15] mnormt_1.5-5       pixiedust_0.8.6    curl_3.2           compiler_3.6.1     cli_1.1.0          rvest_0.3.2        expm_0.999-3      
[22] xml2_1.2.0         labeling_0.3       mvtnorm_1.0-11     quadprog_1.5-5     pbivnorm_0.6.0     digest_0.6.21      foreign_0.8-71    
[29] rmarkdown_1.17     base64enc_0.1-3    pkgconfig_2.0.3    htmltools_0.3.6    highr_0.8          pwr_1.2-2          rlang_0.4.0       
[36] readxl_1.1.0       rstudioapi_0.8     pryr_0.1.4         jmvcore_1.0.8      generics_0.0.2     jsonlite_1.6       zip_1.0.0         
[43] car_3.0-2          RCurl_1.95-4.11    rapportools_1.0    Matrix_1.2-17      Rcpp_1.0.2         DescTools_0.99.29  munsell_0.5.0     
[50] abind_1.4-5        viridis_0.5.1      lifecycle_0.1.0    stringi_1.4.3      carData_3.0-2      MASS_7.3-51.4      lavaan_0.6-3      
[57] grid_3.6.1         parallel_3.6.1     crayon_1.3.4       lattice_0.20-38    haven_2.1.1        pander_0.6.3       hms_0.5.1         
[64] zeallot_0.1.0      knitr_1.25         pillar_1.4.2       varhandle_2.0.4    rjson_0.2.20       boot_1.3-22        ggsignif_0.4.0    
[71] stats4_3.6.1       codetools_0.2-16   glue_1.3.1         evaluate_0.14      data.table_1.11.8  modelr_0.1.5       vctrs_0.2.0       
[78] cellranger_1.1.0   gtable_0.3.0       assertthat_0.2.1   xfun_0.9           openxlsx_4.1.0     viridisLite_0.3.0  medmod_1.0.0      
[85] jmv_1.0.8          ellipsis_0.3.0    
 

A work by Claudiu Papasteri

 

---
title: "<br> Drama Exercises" 
subtitle: "Report for Theater Group and Psychology Group"
author: "<br> Claudiu Papasteri"
date: "`r format(Sys.time(), '%d %m %Y')`"
output: 
    html_notebook:
            code_folding: hide
            toc: true
            toc_depth: 2
            number_sections: true
            theme: spacelab
            highlight: tango
            font-family: Arial
            fig_width: 10
            fig_height: 9
    # pdf_document: 
            # toc: true
            #  toc_depth: 2
            #  number_sections: true
            # fontsize: 11pt
            # geometry: margin=1in
            # fig_width: 7
            # fig_height: 6
            # fig_caption: true
    # github_document: 
            # toc: true
            # toc_depth: 2
            # html_preview: false
            # fig_width: 5
            # fig_height: 5
            # dev: jpeg
---


<!-- Setup -->


```{r setup, include=FALSE}
# kintr options
knitr::opts_chunk$set(
  comment = "#",
  collapse = TRUE,
  echo = TRUE, warning = FALSE, message = TRUE, cache = TRUE       # echo = False for github_document, but will be folded in html_notebook
)

# General R options and info
set.seed(111)               # in case we use randomized procedures       
options(scipen = 999)       # positive values bias towards fixed and negative towards scientific notation

# Load packages
if (!require("pacman")) install.packages("pacman")
packages <- c(
  "papaja",
  "tidyverse", "plyr",      
  "psych", "PerformanceAnalytics",          
  "broom", "rstatix",
  "summarytools", "tadaatoolbox",           
  "ggplot2", "ggpubr", "scales",        
  "rio"
  # , ...
)
if (!require("pacman")) install.packages("pacman")
pacman::p_load(char = packages)

# Themes for ggplot2 ploting (here used APA style)
theme_set(theme_apa())
```





<!-- Report -->


# Read, Clean, Recode, Merge

```{r red_clean_recode_merge, results='hide'}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Read, Clean, Recode, Unite
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

## Read files
folder_teatru <- "E:/Cinetic idei noi/A13/Date grup teatru"
folder_psiho <- "E:/Cinetic idei noi/A13/Date grup psiho"
file_teatru <- "A13 Tabel date COMPLET28ian.xlsx"
file_psiho <- "A13P Tabel centralizat.xlsx"

setwd(folder_teatru)
Data_teatru <- rio::import(file.path(folder_teatru, file_teatru),
                           skip = 1)
setwd(folder_psiho)
Data_psiho <- rio::import(file.path(folder_psiho, file_psiho),
                           skip = 1)


## Tidy up data
# Function coalesce rows: colapse when NA, unite with "_" when not NA
coalesce2 <- function(...) {
  Reduce(function(x, y) {
    i <- which(is.na(x))
    j <- which(!is.na(x) & !is.na(y))
    x[i] <- y[i]
    x[j] <- paste(x[j], y[j], sep = "_")
    x},
    list(...))
}

colnames(Data_teatru) <- coalesce2(Data_teatru[2,], Data_teatru[3,])
Data_teatru <- Data_teatru[-c(1:3),]

colnames(Data_psiho) <- coalesce2(Data_psiho[2,], Data_psiho[3,])
Data_psiho <- Data_psiho[-c(1:3),]


## Check if both data sets have exact same column names
all.equal(colnames(Data_teatru), colnames(Data_psiho))           # not the same
diff_colname <- setdiff(colnames(Data_teatru), colnames(Data_psiho))
ind <- which(colnames(Data_teatru) ==  diff_colname)

colnames(Data_psiho)[ind] <- diff_colname                       # replace with same colname from Data_teatru
all.equal(colnames(Data_teatru), colnames(Data_psiho))           # now they are the same


## Solve duplicate names due to excel double header
# Function to paste a string before column name if it doesnt already start with that string
paste_tocolnames <- function(vec_colnames, string_paste){
  ind <- grep(pattern = string_paste, vec_colnames)                   # ignore column that already has string patterm
  vec_colnames[-ind] <- paste0(string_paste, vec_colnames[-ind])      # paste pattern to the rest of them
  return(vec_colnames)
}

colnames(Data_teatru)[6:21] <- paste_tocolnames(colnames(Data_teatru)[6:21], "APS pre_")
colnames(Data_teatru)[22:35] <- paste_tocolnames(colnames(Data_teatru)[22:35], "PPS pre_")
colnames(Data_teatru)[36:55] <- paste_tocolnames(colnames(Data_teatru)[36:55], "PANAS pre_")
colnames(Data_teatru)[76:79] <- paste_tocolnames(colnames(Data_teatru)[76:79], "SRS post_")
colnames(Data_teatru)[80:99] <- paste_tocolnames(colnames(Data_teatru)[80:99], "PANAS post_")
colnames(Data_teatru)[100:113] <- paste_tocolnames(colnames(Data_teatru)[100:113], "PPS post_")
colnames(Data_teatru)[114:129] <- paste_tocolnames(colnames(Data_teatru)[114:129], "APS post_")

colnames(Data_psiho)[6:21] <- paste_tocolnames(colnames(Data_psiho)[6:21], "APS pre_")
colnames(Data_psiho)[22:35] <- paste_tocolnames(colnames(Data_psiho)[22:35], "PPS pre_")
colnames(Data_psiho)[36:55] <- paste_tocolnames(colnames(Data_psiho)[36:55], "PANAS pre_")
colnames(Data_psiho)[76:79] <- paste_tocolnames(colnames(Data_psiho)[76:79], "SRS post_")
colnames(Data_psiho)[80:99] <- paste_tocolnames(colnames(Data_psiho)[80:99], "PANAS post_")
colnames(Data_psiho)[100:113] <- paste_tocolnames(colnames(Data_psiho)[100:113], "PPS post_")
colnames(Data_psiho)[114:129] <- paste_tocolnames(colnames(Data_psiho)[114:129], "APS post_")
# as.data.frame(colnames(Data_psiho))
# as.data.frame(colnames(Data_teatru))

colnames(Data_teatru) <- enc2native(colnames(Data_teatru))      # fix encoding
colnames(Data_psiho) <- enc2native(colnames(Data_psiho))


## Recode known missing values
# str(Data_psiho, list.len = ncol(Data_psiho))
# str(Data_psiho, list.len = ncol(Data_psiho))
Data_teatru <-
  Data_teatru %>%
  replace(. == "/", NA) %>%                                     # missing values are coded "/"
  replace(. == "-", NA) %>%                                     # missing values are coded "-"
  replace(. == "NA", NA)                                        # missing values are coded "NA"

Data_psiho <-
  Data_psiho %>%
  replace(.=="/", NA) %>%                                       # missing values are coded "/"
  replace(.=="-", NA) %>%                                       # missing values are coded "-"
  replace(. == "NA", NA)                                        # missing values are coded "NA"

  
## Check for non-numeric elements in data sets
check_numeric1 <- as.data.frame(sapply(Data_teatru, varhandle::check.numeric)) 
check_numeric2 <- as.data.frame(sapply(Data_psiho, varhandle::check.numeric))
# sapply(check_numeric1, function(x) length(which(!x)))     # look at columns with non-numeric and count of non-numeric values
# sapply(check_numeric2, function(x) length(which(!x)))

nonnumeric1 <- sapply(check_numeric1, function(x) which(!x, arr.ind = TRUE))    # find row numbers for non-numeric values
nonnumeric2 <- sapply(check_numeric2, function(x) which(!x, arr.ind = TRUE)) 
nonnumeric1[lapply(nonnumeric1, length) > 0]                                   # return only columns and rown numbers were non-numeric
nonnumeric2[lapply(nonnumeric2, length) > 0]

## Recode to numeric
Data_teatru[, -c(1:5)] <- sapply(Data_teatru[, -c(1:5)], as.numeric)    
Data_psiho[, -c(1:5)] <- sapply(Data_psiho[, -c(1:5)], as.numeric)     # cant do this because of encoding:  mutate_at(-c(1:5), ~as.numeric(.))
  
## Correct typos in IDs
#unique(Data_teatru$`Indicativ subiect`)
#unique(Data_psiho$`Indicativ subiect`)
Data_teatru$`Indicativ subiect`[Data_teatru$`Indicativ subiect` == "18.A.1:3"] <- "18.A.1.3"
Data_teatru$`Indicativ subiect`[Data_teatru$`Indicativ subiect` == "26.A.1.3."] <- "26.A.1.3"
Data_teatru$`Indicativ subiect`[Data_teatru$`Indicativ subiect` == "27.A.1.3."] <- "27.A.1.3"

Data_psiho$`Indicativ subiect` <- gsub(".P", "", Data_psiho$`Indicativ subiect`)               # delete .P from IDs
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "16.A.A.1.3"] <- "16.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "1.A.1.3"] <- "01.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "2.A.1.3"] <- "02.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "4.A.1.3"] <- "04.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "0.4.A.1.3"] <- "04.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "5.A.1.3"] <- "05.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "6.A.1.3"] <- "06.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "7.A.1.3"] <- "07.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "8.A.1.3"] <- "08.A.1.3"
Data_psiho$`Indicativ subiect`[Data_psiho$`Indicativ subiect` == "9.A.1.3"] <- "09.A.1.3"
Data_psiho$`Indicativ subiect` <- paste0(Data_psiho$`Indicativ subiect`, ".P")                # add .P to all IDs

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Scoring Questionnaire and Unite
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Define function that calculates RowSums but only for rows with less than 10% NAs; and return NA if all row values are NA 
SpecialRowSums <- function(df, napercent = .1) {
  ifelse(rowSums(is.na(df)) > ncol(df) * napercent,
         NA,
         rowSums(df, na.rm = TRUE) * NA ^ (rowSums(!is.na(df)) == 0)
  )
}

## APS: simple sum
Data_teatru$`APS pre_Total` <- SpecialRowSums(Data_teatru[ ,sprintf("APS pre_%d", 1:16)], napercent = .13)  # not more than 2 NAs for 16 items
Data_teatru$`APS post_Total` <- SpecialRowSums(Data_teatru[ ,sprintf("APS post_%d", 1:16)], napercent = .13)
Data_psiho$`APS pre_Total` <- SpecialRowSums(Data_psiho[ ,sprintf("APS pre_%d", 1:16)], napercent = .13)
Data_psiho$`APS post_Total` <- SpecialRowSums(Data_psiho[ ,sprintf("APS post_%d", 1:16)], napercent = .13)

## PSS-SF 14: Items 4, 5, 6, 7, 9, 10, and 13 are scored in reverse direction.
keys_PSS <- c(1,1,1,-1,-1,-1,-1,1,-1,-1,1,1,-1,1)

Data_teatru$`PPS pre_Total` <- 
  SpecialRowSums(
  psych::reverse.code(items = Data_teatru[ ,sprintf("PPS pre_%d", 1:14)], keys = keys_PSS,  mini = 0, maxi = 4),
  napercent = .1)  # not more than 1 NAs for 14 items 
Data_teatru$`PPS post_Total` <- 
  SpecialRowSums(
    psych::reverse.code(items = Data_teatru[ ,sprintf("PPS post_%d", 1:14)], keys = keys_PSS,  mini = 0, maxi = 4),
    napercent = .1)
Data_psiho$`PPS pre_Total` <- 
  SpecialRowSums(
    psych::reverse.code(items = Data_psiho[ ,sprintf("PPS pre_%d", 1:14)], keys = keys_PSS,  mini = 0, maxi = 4),
    napercent = .1)  # not more than 1 NAs for 14 items 
Data_psiho$`PPS post_Total` <- 
  SpecialRowSums(
    psych::reverse.code(items = Data_psiho[ ,sprintf("PPS post_%d", 1:14)], keys = keys_PSS,  mini = 0, maxi = 4),
    napercent = .1)

## PANAS: Positive Affect Score = sum items 1, 3, 5, 9, 10, 12, 14, 16, 17, 19. Negative Affect Score = sum items 2, 4, 6, 7, 8, 11, 13, 15, 18, 20.
Data_teatru$`PA pre_Total` <- SpecialRowSums(Data_teatru[ ,35 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) # not more than 1 NAs for 10 items
Data_teatru$`NA pre_Total` <- SpecialRowSums(Data_teatru[ ,35 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)
Data_psiho$`PA pre_Total` <- SpecialRowSums(Data_psiho[ ,35 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) 
Data_psiho$`NA pre_Total` <- SpecialRowSums(Data_psiho[ ,35 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)

Data_teatru$`PA post_Total` <- SpecialRowSums(Data_teatru[ ,79 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) 
Data_teatru$`NA post_Total` <- SpecialRowSums(Data_teatru[ ,79 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)
Data_psiho$`PA post_Total` <- SpecialRowSums(Data_psiho[ ,79 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) 
Data_psiho$`NA post_Total` <- SpecialRowSums(Data_psiho[ ,79 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)


# Define other grouping varibles
Data_teatru <- 
  Data_teatru %>%
    mutate(Etapa = case_when(`Etapă, zi` %in% c("I.1", "I.2") ~ "I",
                             `Etapă, zi` %in% c("II.1",  "II.2") ~ "II",
                             `Etapă, zi` %in% c("III.1", "III.2") ~ "III",
                             `Etapă, zi` %in% c("IV.1", "IV.2") ~ "IV",
                             TRUE ~ NA_character_),
           Zi = case_when(`Etapă, zi` == "I.1" ~ "zi1", 
                          `Etapă, zi` == "I.2" ~ "zi2",  
                          `Etapă, zi` == "II.1" ~ "zi3", 
                          `Etapă, zi` == "II.2" ~ "zi4",
                          `Etapă, zi` == "III.1" ~ "zi5", 
                          `Etapă, zi` == "III.2" ~ "zi6", 
                          `Etapă, zi` == "IV.1" ~ "zi7", 
                          `Etapă, zi` == "IV.2" ~ "zi8", 
                          TRUE ~ NA_character_)) %>%
  unite(col = "Etapa_Zi", Etapa, Zi, remove = FALSE)

Data_psiho <- 
  Data_psiho %>%
  mutate(Etapa = case_when(`Etapă, zi` %in% c("I.1", "I.2") ~ "I",
                           `Etapă, zi` %in% c("II.1",  "II.2") ~ "II",
                           `Etapă, zi` %in% c("III.1", "III.2") ~ "III",
                           `Etapă, zi` %in% c("IV.1", "IV.2") ~ "IV",
                           TRUE ~ NA_character_),
         Zi = case_when(`Etapă, zi` == "I.1" ~ "zi1", 
                        `Etapă, zi` == "I.2" ~ "zi2",  
                        `Etapă, zi` == "II.1" ~ "zi3", 
                        `Etapă, zi` == "II.2" ~ "zi4",
                        `Etapă, zi` == "III.1" ~ "zi5", 
                        `Etapă, zi` == "III.2" ~ "zi6", 
                        `Etapă, zi` == "IV.1" ~ "zi7", 
                        `Etapă, zi` == "IV.2" ~ "zi8", 
                        TRUE ~ NA_character_)) %>%
  unite(col = "Etapa_Zi", Etapa, Zi, remove = FALSE)

## Unite data sets
Data_teatru$Dataset <- rep("teatru", nrow(Data_teatru))
Data_psiho$Dataset <- rep("psiho", nrow(Data_psiho))

Data_United <- rbind(Data_teatru, Data_psiho)
```


# Sample descriptives

```{r sample_desc}
cat("## Number of subjects")
Data_United %>% 
 dplyr::rename(ID = `Indicativ subiect`) %>% 
 dplyr::summarise(count = dplyr::n_distinct(ID))

cat("## Number of subjects by Teatru/Psiho")
Data_United %>%
 dplyr::rename(ID = `Indicativ subiect`) %>% 
 group_by(Dataset) %>%
 dplyr::summarise(count = dplyr::n_distinct(ID))
```

# Outcomes Pre-Post Intervention

```{r outcomes_aps_pps, fig.width=6, fig.align='center', fig.height=5, results='asis'}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   OUTCOMES PRE-POST INTERVENTION   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Function to run all for teatru, psiho and United
func_prepost_tot <- function(df){
  
  Data_melt <-
    df[, c("Indicativ subiect", "Grupa", "Nume Prenume", "Etapă, zi", 
           "APS pre_Total", "APS post_Total", 
           "PPS pre_Total", "PPS post_Total")] %>% 
    gather("APS pre_Total":"PPS post_Total", key = "variable", value = "value")  %>% 
    mutate_at(vars(c(1:5)), funs(as.factor)) %>% 
    mutate(variable = factor(variable, levels = c("APS pre_Total", "APS post_Total", 
                                                  "PPS pre_Total", "PPS post_Total")))
  
  # APS t test - paired
  aps_ttest <- 
    df %>%
    select(`Indicativ subiect`, `APS pre_Total`, `APS post_Total`) %>%
    group_by(`Indicativ subiect`) %>%
    summarise_all(funs(na.omit(.)[1])) %>%                        # squash rows with NAs per id
    # filter_all(all_vars(!is.na(.))) %>%                         # drop row if any column is NA -- dont use here
    do(broom::tidy(t.test(.$`APS pre_Total`,
                          .$`APS post_Total`,
                          mu = 0,
                          alt = "two.sided",
                          paired = TRUE,
                          conf.level = 0.95))) 
  aps_ttest_out <- knitr::kable(aps_ttest, caption = paste0(deparse(substitute(df)), " APS"), format = 'pandoc')
  
  # APS plot t test - unpaired
  aps_plot <-   
    Data_melt %>%
    filter(variable %in% c("APS pre_Total", "APS post_Total")) %>%
    #group_by(`Etapă, zi`) %>%
    ggplot(aes(x = variable, y = value)) +
    geom_boxplot() +
    stat_summary(fun.data = mean_se,  colour = "darkred") +
    xlab("") +
    ggtitle(deparse(substitute(df))) +
    #facet_wrap(~`Etapă, zi`) +
    ggpubr::stat_compare_means(method = "t.test", 
                               label = "p.signif",                                         # to avoid scientific notation of very small p-values
                               #paired = TRUE, 
                               comparisons = list(c("APS pre_Total", "APS post_Total")))  
  
  
  # PPS t test - paired
  pps_ttest <- 
    df %>%
    select(`Indicativ subiect`, `PPS pre_Total`, `PPS post_Total`) %>%
    group_by(`Indicativ subiect`) %>%
    summarise_all(funs(na.omit(.)[1])) %>%                        # squash rows with NAs per id
    # filter_all(all_vars(!is.na(.))) %>%                         # drop row if any column is NA -- dont use here
    do(broom::tidy(t.test(.$`PPS pre_Total`,
                          .$`PPS post_Total`,
                          mu = 0,
                          alt = "two.sided",
                          paired = TRUE,
                          conf.level = 0.95)))
  pps_ttest_out <- knitr::kable(pps_ttest, caption = paste0(deparse(substitute(df)), " PPS"), format = 'pandoc') 
  
  # PPS plot t test - unpaired
  pps_plot <-
    Data_melt %>%
    filter(variable %in% c("PPS pre_Total", "PPS post_Total")) %>%
    #group_by(`Etapă, zi`) %>%
    ggplot(aes(x = variable, y = value)) +
    geom_boxplot() +
    stat_summary(fun.data = mean_se,  colour = "darkred") +
    xlab("") +
    ggtitle(deparse(substitute(df))) +
    #facet_wrap(~`Etapă, zi`) +
    ggpubr::stat_compare_means(method = "t.test", 
                               label = "p.signif",                                         # to avoid scientific notation of very small p-values
                               #paired = TRUE, 
                               comparisons = list(c("PPS pre_Total", "PPS post_Total"))) 
  
  print(aps_ttest_out)
  print(aps_plot)
  print(pps_ttest_out)
  print(pps_plot)
}


cat("### Teatru")
func_prepost_tot(Data_teatru)
cat("### Psiho")
func_prepost_tot(Data_psiho)
cat("### United")
func_prepost_tot(Data_United)
```


# PANAS (PANAS for zi1, zi3, zi5, zi7 are Pre; zi2, zi4, zi6, zi8 are Post )

```{r outcome_panas, fig.width=12, fig.height=9, results='asis'}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   OUTCOMES PRE-POST for PANAS    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# PANAS for zi1, zi3, zi5, zi7 are Pre; zi2, zi4, zi6, zi8 are Post 
## Function plot PANAS, compare by zi
plot_panas_zi <- function(df, pre_var, post_var){
  df_modif <-
    df %>%
    gather(pre_var, post_var, key = "variable", value = "value") 
  
  stat.test <-
    df_modif %>%
    select(value, variable, Zi) %>%
    tidyr::drop_na() %>%                          # need to remove NAs so factor level of condition can be droped as well         
    rstatix::t_test(value ~ Zi) %>%                # automatically does pairwise, but has problems were factor levels of grouping with NA
    # rstatix::adjust_pvalue() %>%                          
    rstatix::add_significance("p") %>%                                                       
    filter(p.signif != "ns") 
  
  p<-
    ggplot(df_modif, aes(y = value, x = Zi)) +
    ggtitle(paste0(deparse(substitute(df)), " : ", pre_var, " - " ,post_var)) + 
    geom_boxplot() + stat_summary(fun.data = mean_se,  colour = "darkred") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))
  
  if(nrow(stat.test) > 0){
    p <- 
      p + stat_pvalue_manual(stat.test, label = "p.signif", 
                             y.position = seq(max(df_modif$value, na.rm = TRUE)+2, max(df_modif$value, na.rm = TRUE)*1.4, 
                                          length.out = nrow(stat.test)))                             # very hacky
  }
  
  return(stat.test) %>% print(n = Inf)
  print(p)
}

cat("### Teatru")
plot_panas_zi(Data_teatru, "PA pre_Total", "PA post_Total")
plot_panas_zi(Data_teatru, "NA pre_Total", "NA post_Total")
cat("### Psiho")
plot_panas_zi(Data_psiho, "PA pre_Total", "PA post_Total")
plot_panas_zi(Data_psiho, "NA pre_Total", "NA post_Total")
cat("### United")
plot_panas_zi(Data_United, "PA pre_Total", "PA post_Total")
plot_panas_zi(Data_United, "NA pre_Total", "NA post_Total")
```


# IOS (Pre on begining of Etapa, Post on end of Etapa)

```{r ios, fig.width=13, fig.height=11, results='asis'}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   IOS - pre on begining of etapa, post on end  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Function plot pre post data faceted by zi
plot_prepost_zi <- function(df, pre_var, post_var){
  df %>%
    gather(pre_var, post_var, key = "variable", value = "value") %>%
    mutate(variable = factor(variable, levels = c(pre_var, post_var))) %>%
    ggplot(aes(y = value, x = variable)) +
    facet_wrap(~Zi, nrow = 1) + 
    ggtitle(deparse(substitute(df))) + 
    geom_boxplot() + stat_summary(fun.data = mean_se,  colour = "darkred")  +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggpubr::stat_compare_means(method = "t.test",
                               label = "p.signif",                                         # to avoid scientific notation of very small p-values
                               #paired = TRUE,
                               comparisons = list(c(pre_var, post_var)))
}


## Function plot pre post data adjusted comparisons of all 
plot_prepost_zi2 <- function(df, pre_var, post_var){
  df_modif <- 
    df %>%
    gather(pre_var, post_var, key = "variable", value = "value") %>%
    mutate(variable = factor(variable, levels = c(pre_var, post_var)),
           condition = interaction(variable, Zi),
           condition = as.factor(condition)) 
  
  stat.test <-
    df_modif %>%
    # group_by(`Indicativ subiect`) %>%
    rstatix::t_test(value ~ condition) %>%
    # rstatix::adjust_pvalue() %>%
    rstatix::add_significance("p") %>%
    filter(p.signif != "ns") 
  
  p <- 
    ggplot(df_modif, aes(y = value, x = condition)) +
      ggtitle(deparse(substitute(df))) + 
      geom_boxplot() + stat_summary(fun.data = mean_se,  colour = "darkred") +
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
      stat_pvalue_manual(stat.test, label = "p.signif", 
                         y.position = seq(max(df_modif$value, na.rm = TRUE)+2, max(df_modif$value, na.rm = TRUE)*2, 
                                          length.out = nrow(stat.test)))                                                  # very hacky
  
  print(p)
  return(stat.test) %>% print(n = Inf)
  
}


# cat("## By Zi")
cat("### Teatru")
plot_prepost_zi(Data_teatru, "IOS pre", "IOS post")
plot_prepost_zi2(Data_teatru, "IOS pre", "IOS post")
cat("### Psiho")
plot_prepost_zi(Data_psiho, "IOS pre", "IOS post")
plot_prepost_zi2(Data_psiho, "IOS pre", "IOS post")
cat("### United")
plot_prepost_zi(Data_United, "IOS pre", "IOS post")
plot_prepost_zi2(Data_United, "IOS pre", "IOS post")


cat("## ANOVA for Baseline each Etapa")
plot_anova_base <- function(df, pre_var, post_var){
  df %>%
    select(Etapa, Zi, pre_var, post_var) %>%
    gather(pre_var, post_var, key = "variable", value = "value") %>%
    mutate(variable = factor(variable, levels = c(pre_var, post_var))) %>%
    filter(variable == pre_var)  %>%
      
    jmv::ANOVA(formula = value ~ Zi,
               effectSize = list('eta', 'partEta')) -> anova_base
    
    print(tibble::as.tibble(anova_base$main))
}
plot_anova_base(Data_United, "IOS pre", "IOS post")


cat("## By Etapa")
plot_prepost_etapa <- function(df, pre_var, post_var){
  df_modif <-
    df %>%
      select(Etapa, Zi, pre_var, post_var) %>%
      gather(pre_var, post_var, key = "variable", value = "value") %>%
      mutate(variable = factor(variable, levels = c(pre_var, post_var))) %>%
      filter(variable == pre_var & Zi %in% sprintf("zi%d", seq(1, 8, by = 2))  |
             variable == post_var & Zi %in% sprintf("zi%d", seq(2, 8, by = 2)))  
  
  stat.test <- 
    df_modif %>%     
      group_by(Etapa) %>%
      rstatix::t_test(value ~ Zi) %>%
      # rstatix::adjust_pvalue() %>%
      rstatix::add_significance("p") %>%
      filter(p.signif != "ns")
  
  p <- 
    ggplot(data = df_modif, aes(y = value, x = variable)) +
    facet_wrap(~Etapa, nrow = 1) + 
    ggtitle(deparse(substitute(df))) + 
    geom_boxplot() + stat_summary(fun.data = mean_se,  colour = "darkred")  +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggpubr::stat_compare_means(method = "t.test",
                               label = "p.signif",                                         # to avoid scientific notation of very small p-values
                               #paired = TRUE,
                               comparisons = list(c(pre_var, post_var)))

  print(p)
  return(stat.test) %>% print(n = Inf)
}
plot_prepost_etapa(Data_teatru, "IOS pre", "IOS post")


```


# VAS variables (multiple measurements on each zi)

```{r vas_variables, fig.width=15, fig.height=11, results='asis'}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   Variables multiple measurements in zi    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Function plot pre-ex1-ex2-..-post data faceted by zi
plot_preexpost_zi <- function(df, pre_var, post_x1_var, post_x2_var, post_x3_var){
  df_modif <-
    df %>%
    gather(pre_var, post_x1_var, post_x2_var, post_x3_var, key = "variable", value = "value") %>%
    mutate(variable = factor(variable, levels = c(pre_var, post_x1_var, post_x2_var, post_x3_var)))
    
  stat.test <-
    df_modif %>%
    group_by(Zi) %>%
    tidyr::drop_na(value) %>%                                     # filter so grouping factor levels get droped and we can compare with unevel levels
    rstatix::t_test(value ~ variable) %>%                # pairwise
    rstatix::add_significance("p") %>%                                    
    filter(p.signif != "ns") 
  
  p<-
    ggplot(df_modif, aes(y = value, x = variable)) +
      facet_wrap(~Zi, nrow = 1) + 
      ggtitle(deparse(substitute(df))) + 
      geom_boxplot() + stat_summary(fun.data = mean_se,  colour = "darkred") +
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
      stat_pvalue_manual(stat.test, label = "p.signif", 
                         y.position = seq(max(df_modif$value, na.rm = TRUE)+2, max(df_modif$value, na.rm = TRUE)*2, 
                                          length.out = nrow(stat.test)))                                                  # very hacky
  
  return(stat.test) %>% print(n = Inf)
  p
}

cat("## By Zi")
cat("## VAS stres")
cat("### Teatru")
plot_preexpost_zi(Data_teatru, "VAS stres pre", "VAS stres post ex1", "VAS stres post ex2", "VAS stres post ex3")
cat("### Psiho")
plot_preexpost_zi(Data_psiho, "VAS stres pre", "VAS stres post ex1", "VAS stres post ex2", "VAS stres post ex3")
cat("### United")
plot_preexpost_zi(Data_United, "VAS stres pre", "VAS stres post ex1", "VAS stres post ex2", "VAS stres post ex3")


cat("## VAS stare de bine") 
cat("### Teatru")
plot_preexpost_zi(Data_teatru, "VAS stare de bine pre", "VAS stare de bine post ex1", "VAS stare de bine post ex2", "VAS stare de bine post ex3")
cat("### Psiho")
plot_preexpost_zi(Data_psiho, "VAS stare de bine pre", "VAS stare de bine post ex1", "VAS stare de bine post ex2", "VAS stare de bine post ex3")
cat("### United")
plot_preexpost_zi(Data_United, "VAS stare de bine pre", "VAS stare de bine post ex1", "VAS stare de bine post ex2", "VAS stare de bine post ex3")


cat("## VAS corp")
cat("### Teatru")
plot_preexpost_zi(Data_teatru, "VAS corp pre", "VAS corp post ex1", "VAS corp post ex2", "VAS corp post ex3")
cat("### Psiho")
plot_preexpost_zi(Data_psiho, "VAS corp pre", "VAS corp post ex1", "VAS corp post ex2", "VAS corp post ex3")
cat("### United")
plot_preexpost_zi(Data_United, "VAS corp pre", "VAS corp post ex1", "VAS corp post ex2", "VAS corp post ex3")




plot_preexpost_etapa <- function(df, pre_var, post_x1_var, post_x2_var, post_x3_var){
  df_modif <-
    df %>%
    gather(pre_var, post_x1_var, post_x2_var, post_x3_var, key = "variable", value = "value") %>%
    filter((variable == pre_var & Zi %in% sprintf("zi%d", seq(1, 8, by = 2)))  |                           # pre  
           (variable == post_x1_var & Zi %in% c("zi2", "zi8"))  |                                    # post variations
           (variable == post_x2_var & Zi == "zi4")  |
           (variable == post_x3_var & Zi == "zi6"))  %>%
    mutate(variable = case_when(stringr::str_detect(variable, "pre") ~ "pre",
                                stringr::str_detect(variable, "post") ~ "post")) %>%
    mutate(variable = factor(variable, levels = c("pre", "post")))

  stat.test <-
    df_modif %>%
    group_by(Etapa) %>%
    tidyr::drop_na(value) %>%                                     # filter so grouping factor levels get droped and we can compare with unevel levels
    rstatix::t_test(value ~ variable) %>%                # pairwise
    rstatix::add_significance("p") %>%
    filter(p.signif != "ns")

  p<-
    ggplot(df_modif, aes(y = value, x = variable)) +
      facet_wrap(~Etapa, nrow = 1) +
      ggtitle(deparse(substitute(df))) +
      geom_boxplot() + stat_summary(fun.data = mean_se,  colour = "darkred") +
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
      stat_pvalue_manual(stat.test, label = "p.signif",
                         y.position = seq(max(df_modif$value, na.rm = TRUE)+2, max(df_modif$value, na.rm = TRUE)*2,
                                          length.out = nrow(stat.test)))                                                  # very hacky

  print(p)
  return(stat.test) %>% print(n = Inf)
}

cat("## By Etapa - Only United")
cat("### VAS Stres")
plot_preexpost_etapa(Data_United, "VAS stres pre", "VAS stres post ex1", "VAS stres post ex2", "VAS stres post ex3")
cat("### VAS St bine")
plot_preexpost_etapa(Data_United, "VAS stare de bine pre", "VAS stare de bine post ex1", "VAS stare de bine post ex2", "VAS stare de bine post ex3")
cat("### VAS St bine corp")
plot_preexpost_etapa(Data_United, "VAS corp pre", "VAS corp post ex1", "VAS corp post ex2", "VAS corp post ex3")
```


# Define function Moderation

```{r def_fun_find_mod}
find_mod <- function(df, dfp = NULL, num_only = TRUE, verbose = TRUE) {
  count = 0
  moderation_model_list <<- list()

  if(num_only == TRUE){
  numeric_cols <- unlist(lapply(df, is.numeric))                                      # get only numeric columns
  df <- df[, numeric_cols]
  }
  
  # restricted permutations for Moderation - Check: choose(len_names, 3)*3
  names <- colnames(df)
  len_names = length(names)
  
  if(is.null(dfp)){
    dfp <- lapply(1:len_names, function(i){
      tmp <- lapply(1:(len_names-1), function(j){
        tmp <- lapply((j+1):len_names, function(k){
          if(j != i & k != i) c(names[i], names[j], names[k])
        })
        do.call(rbind, tmp)
      })
      do.call(rbind, tmp)
    })
    dfp <- do.call(rbind.data.frame, dfp)
    names(dfp) <- paste("var", 1:3, sep = "_")
    dfp[, ] <- lapply(dfp[, ], as.character)    
  } else {
    dfp <- dfp
  }
  
  for (row in 1:nrow(dfp)) {                
    
    results <- medmod::mod(data = df,                                                   # mod does centering automatically
                          dep = dfp[row, 1], mod = dfp[row, 2], pred = dfp[row, 3], 
                          estMethod = "standard", test = TRUE, 
                          simpleSlopeEst = FALSE, simpleSlopePlot = FALSE)             # when testing use estMethod = 'bootstrap', bootstrap = 500 
    
    pmod <- as.data.frame(results$mod)[3,5]

    if(pmod < 0.05 && !is.na(pmod)) {
      count <- count + 1
        if(verbose == TRUE) {
        cat("Dependent Variable:", dfp[row, 1])
        print(results$mod) 
        }
      moderation_model_list[["Model"]][[paste("model", count, sep = "_")]] <<- as.data.frame(results$mod)   # return as list of dataframes
      moderation_model_list[["Syntax"]][[paste("model", count, sep = "_")]] <<- results$modelSyntax
      
    }
  }
  cat("\n","Report: ", count, "significant moderations out of", row, "total tries.")
}

## ex dfp
# dfp <- data.frame(
#   var1 = colnames(Data_med_melt)[grep("_post", colnames(Data_med_melt))],
#   var2 = rep("Condition", 13),
#   var3 = colnames(Data_med_melt)[grep("_pre", colnames(Data_med_melt))],
#   stringsAsFactors = FALSE
# )
# find_med(df = Data_med_melt, dfp = dfp, num_only = FALSE) 

```

# Moderation on PrePost (APS, PSS)

```{r mod_find, results='asis', echo=FALSE}
# Make dataframe of vars and mods
mods <- c("IOS post", "SRS post_Rela<U+021B>ia", "SRS post_Global")  # define moderators
pres <- c("PPS pre_Total", "APS pre_Total")
posts <- c("PPS post_Total", "APS post_Total")   
dfp <- merge(mods, cbind(pres, posts), by = NULL)  # by = NULL causes merge to do simple combinatorial data replication
dfp <- dfp[, c("posts", "x", "pres")]     # reorder columns for find_mod
dfp <- data.frame(lapply(dfp, as.character), stringsAsFactors = FALSE)

# Data - Summarizing gives means for repeated moderators while prepost outcomes are still same values
mod_prepost <- 
  Data_United %>%
  select("Indicativ subiect", "Etapa", "Zi", mods,  pres, posts) %>%
  group_by(`Indicativ subiect`) %>%
  summarise_if(is.numeric, mean, na.rm = TRUE) %>%     # this creates NaNs were there were NAs
  mutate_all(~ifelse(is.nan(.), NA, .))    
  
# For PrePost Outcome (not measured for Etape)
find_mod(df = mod_prepost, dfp = dfp, num_only = FALSE)    # found 1 moderation


```


```{r mod1srs, results='asis'}
mod1 <- 
  medmod::mod(data = mod_prepost,
              dep = "PPS post_Total", mod = "SRS post_Rela<U+021B>ia", pred = "PPS pre_Total",
              estMethod = 'bootstrap', bootstrap = 500, 
              test = TRUE, ci = TRUE,
              simpleSlopeEst = TRUE, simpleSlopePlot = TRUE)

mod1$mod %>% 
  knitr::kable(digits = 2) 

mod1$simpleSlope$estimates %>% 
  knitr::kable(digits = 2)	

mod1$simpleSlope$plot
###############################################################################

## idea not used here
  # filter(Zi %in% c("zi1", "zi8")) %>% 
  # pivot_longer(cols = matches("pre|post"), names_to = "variable", values_to = "value")



# Data_United %>% 
#   filter(Etapa == "I") %>%
#   medmod::mod(data = ., 
#             dep = "PPS post_Total", mod = "IOS post", pred = "PPS pre_Total", 
#             estMethod = "standard", test = TRUE, 
#             simpleSlopeEst = FALSE, simpleSlopePlot = TRUE)

```





<!-- Session Info and License -->

<br>

# Session Info
```{r session_info, echo = FALSE, results = 'markup'}
sessionInfo()    
```

<!-- Footer -->
&nbsp;
<hr />
<p style="text-align: center;">A work by <a href="https://github.com/ClaudiuPapasteri/">Claudiu Papasteri</a></p>
<p style="text-align: center;"><span style="color: #808080;"><em>claudiu.papasteri@gmail.com</em></span></p>
&nbsp;
