1 Read, Clean, Recode, Merge

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Read, Clean, Recode, Unite
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

## Read files
folder <- "C:/Users/Mihai/Desktop/R Notebooks/notebooks/A.1.3. Drama Exercises Kids"
file <- "A13 Tabel date copii.xlsx"

setwd(folder)
Data <- rio::import(file.path(folder, file),
                           skip = 1)


## Tidy up data
# Function coalesce rows: colapse when NA, unite with "_" when not NA
coalesce2 <- function(...) {
  Reduce(function(x, y) {
    i <- which(is.na(x))
    j <- which(!is.na(x) & !is.na(y))
    x[i] <- y[i]
    x[j] <- paste(x[j], y[j], sep = "_")
    x},
    list(...))
}

colnames(Data) <- coalesce2(Data[2,], Data[3,])
Data <- Data[-c(1:3),]



## Solve duplicate names due to excel double header
# Function to paste a string before column name if it doesnt already start with that string
paste_tocolnames <- function(vec_colnames, string_paste){
  ind <- grep(pattern = string_paste, vec_colnames)                   # ignore column that already has string patterm
  vec_colnames[-ind] <- paste0(string_paste, vec_colnames[-ind])      # paste pattern to the rest of them
  return(vec_colnames)
}

# PANAS pre 7:26, post 37:56
colnames(Data)[7:26] <- paste_tocolnames(colnames(Data)[7:26], "PANAS pre_")
colnames(Data)[37:56] <- paste_tocolnames(colnames(Data)[37:56], "PANAS post_")

colnames(Data) <- enc2native(colnames(Data))      # fix encoding


## Recode known missing values
# str(Data_psiho, list.len = ncol(Data_psiho))
# str(Data_psiho, list.len = ncol(Data_psiho))
Data <-
  Data %>%
  replace(. == "/", NA) %>%                                     # missing values are coded "/"
  replace(. == "-", NA) %>%                                     # missing values are coded "-"
  replace(. == "NA", NA)                                        # missing values are coded "NA"


# Exclude some extra rows and columns & some IDs
Data <- Data[, -c(57:86)]
rownames(Data) <- seq(length=nrow(Data))  # 83 trebuie sa fie
Data <- 
  Data %>%
  mutate (`Nr crt` = as.numeric(.$`Nr crt`)) %>%
  filter(!`Nr crt` %in% c(36, 38:39, 41:43, 50:51, 53:55, 75, 84:97)) %>%
  filter(`Nr crt` < 84)

## Check for non-numeric elements in data sets
check_numeric1 <- as.data.frame(sapply(Data, varhandle::check.numeric)) 
# sapply(check_numeric1, function(x) length(which(!x)))     # look at columns with non-numeric and count of non-numeric values

nonnumeric1 <- sapply(check_numeric1, function(x) which(!x, arr.ind = TRUE))    # find row numbers for non-numeric values
nonnumeric1[lapply(nonnumeric1, length) > 0]                                   # return only columns and rown numbers were non-numeric
## Recode to numeric
Data[, 7:56] <- sapply(Data[, 7:56], as.numeric)     # mutate_at fails due to encoding

## Rename columns
Data <- 
  Data %>% 
 dplyr::rename(ID = `Indica tiv subiect`) %>% 
 dplyr::rename(Zi = `(Etapa III), zi`)
names(Data) <- gsub(" ", "_", names(Data))


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Scoring Questionnaire and Unite
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Define function that calculates RowSums but only for rows with less than 10% NAs; and return NA if all row values are NA 
SpecialRowSums <- function(df, napercent = .1) {
  ifelse(rowSums(is.na(df)) > ncol(df) * napercent,
         NA,
         rowSums(df, na.rm = TRUE) * NA ^ (rowSums(!is.na(df)) == 0)
  )
}

## PANAS: Positive Affect Score = sum items 1, 3, 5, 9, 10, 12, 14, 16, 17, 19. Negative Affect Score = sum items 2, 4, 6, 7, 8, 11, 13, 15, 18, 20.
Data$PA_pre_Total <- SpecialRowSums(Data[ ,6 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) # not more than 1 NAs for 10 items
Data$NA_pre_Total <- SpecialRowSums(Data[ ,6 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)


Data$PA_post_Total <- SpecialRowSums(Data[ ,36 + c(1,3,5,9,10,12,14,16,17,19)], napercent = .11) 
Data$NA_post_Total <- SpecialRowSums(Data[ ,36 + c(2,4,6,7,8,11,13,15,18,20)], napercent = .11)

2 Sample descriptives

## Number of subjects

3 Define Function

4 PANAS

4.0.1 Positeive - Zi 1

4.0.1.1 PA_pre_Total PA_post_Total

4.0.2 Positeive - Zi 2

4.0.2.1 PA_pre_Total PA_post_Total

4.0.3 Negative - Zi 1

4.0.3.1 NA_pre_Total NA_post_Total

4.0.4 Negative - Zi 2

4.0.4.1 NA_pre_Total NA_post_Total

5 VAS

5.0.1 Stress - Zi 1

5.0.1.1 VAS_stres_pre VAS_stres_post_ex1

5.0.2 Stress - Zi 2

5.0.2.1 VAS_stres_pre VAS_stres_post_ex1

5.0.3 Well being - Zi 1

5.0.3.1 VAS_stare_de_bine_pre VAS_stare_de_bine_post_ex1

5.0.4 Well being - Zi 2

5.0.4.1 VAS_stare_de_bine_pre VAS_stare_de_bine_post_ex1

6 IOS

6.0.1 IOS - Zi 1

6.0.1.1 IOS_pre IOS_post

6.0.2 IOS - Zi 2

6.0.2.1 IOS_pre IOS_post


7 Session Info

R version 3.6.1 (2019-07-05)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 8.1 x64 (build 9600)

Matrix products: default

locale:
[1] LC_COLLATE=Romanian_Romania.1250  LC_CTYPE=Romanian_Romania.1250    LC_MONETARY=Romanian_Romania.1250 LC_NUMERIC=C                     
[5] LC_TIME=Romanian_Romania.1250    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] rio_0.5.16                 scales_1.0.0               ggpubr_0.2                 magrittr_1.5               tadaatoolbox_0.16.1       
 [6] summarytools_0.8.8         rstatix_0.2.0              broom_0.5.2                PerformanceAnalytics_1.5.2 xts_0.11-2                
[11] zoo_1.8-4                  psych_1.8.12               plyr_1.8.4                 forcats_0.4.0              stringr_1.4.0             
[16] dplyr_0.8.3                purrr_0.3.2                readr_1.3.1                tidyr_1.0.0                tibble_2.1.3              
[21] ggplot2_3.2.1              tidyverse_1.2.1            papaja_0.1.0.9842          pacman_0.5.1              

loaded via a namespace (and not attached):
 [1] nlme_3.1-140       bitops_1.0-6       matrixStats_0.54.0 lubridate_1.7.4    httr_1.4.0         tools_3.6.1        backports_1.1.4   
 [8] R6_2.4.0           nortest_1.0-4      lazyeval_0.2.2     colorspace_1.4-1   withr_2.1.2        tidyselect_0.2.5   gridExtra_2.3     
[15] mnormt_1.5-5       pixiedust_0.8.6    curl_3.2           compiler_3.6.1     cli_1.1.0          rvest_0.3.2        expm_0.999-3      
[22] xml2_1.2.0         labeling_0.3       mvtnorm_1.0-11     quadprog_1.5-5     digest_0.6.21      foreign_0.8-71     pkgconfig_2.0.3   
[29] htmltools_0.3.6    pwr_1.2-2          rlang_0.4.0        readxl_1.1.0       rstudioapi_0.8     pryr_0.1.4         generics_0.0.2    
[36] jsonlite_1.6       zip_1.0.0          car_3.0-2          RCurl_1.95-4.11    rapportools_1.0    Matrix_1.2-17      Rcpp_1.0.2        
[43] DescTools_0.99.29  munsell_0.5.0      abind_1.4-5        viridis_0.5.1      lifecycle_0.1.0    stringi_1.4.3      carData_3.0-2     
[50] MASS_7.3-51.4      grid_3.6.1         parallel_3.6.1     crayon_1.3.4       lattice_0.20-38    haven_2.1.1        pander_0.6.3      
[57] hms_0.5.1          zeallot_0.1.0      knitr_1.25         pillar_1.4.2       varhandle_2.0.4    boot_1.3-22        ggsignif_0.4.0    
[64] codetools_0.2-16   glue_1.3.1         data.table_1.11.8  modelr_0.1.5       vctrs_0.2.0        cellranger_1.1.0   gtable_0.3.0      
[71] assertthat_0.2.1   xfun_0.9           openxlsx_4.1.0     viridisLite_0.3.0  ellipsis_0.3.0    
 

A work by Claudiu Papasteri

 

