\(^1\)Department of General Psychology, University of Padova, Italy
\(^2\)Department of Philosophy, Sociology, Pedagogy and Applied Psychology, University of Padova, Italy
\(^3\)Department of Communication Sciences, Humanities and International Studies, University of Urbino Carlo Bo, Italy
The present document includes the analytical steps conducted to analyse the psychometric properties and the descriptive statistics of the different types of data collected over three days from a sample of 97 healthy adults that were included in our study. Prior to conduct these analyses, the data were pre-processed as described in the dedicated pre-processing report. The following steps are impemented below:
Preprocessed data reading
Evaluation of the psychometric properties of the mood and event ratings via multilevel confirmatory factor analysis and reliability analysis, and computation of the aggregated scores
Computation of the descriptive statistics for each variable of interest, including the inspection of univariate and bivariate distributions, and the correlation between continuous variables
A selection of the potential confounding variables to be used as covariates in the following regression analyses (see dedicated report)
Here, we remove all objects from the R global environment.
# removing all objets from the workspace
rm(list=ls())
The following R packages are used in this document (see References section):
# required packages
packages <- c("lavaan","lme4","ggplot2","gridExtra","reshape2","plyr","MVN","Rmisc","Hmisc")
# generate packages references
knitr::write_bib(c(.packages(), packages),"packagesProc.bib")
## tweaking Hmisc
# # run to install missing packages
# xfun::pkg_attach2(packages, message = FALSE); rm(list=ls())
First, we read the pre-processed data files (see details in the dedicated report).
ema
includes HRV, ESM, and PrelQS information with
one row for each measurement of either HRV (5
ambulatory + 1 lab) or ESM data (1 ‘baseline’ + 5 ambulatory), for a
total of 2,037 observations (97 participants x 3 days x 7
occasions).load("3data/ema_preProcessed.RData")
cat("ema:",nrow(ema),"observations from",nlevels(ema$ID),"participants")
## ema: 2037 observations from 97 participants
head(ema,3)
between
includes all collected variables, with one row
for each participant, for a total of 97 observations. HRV, ESM, GNG, and
BLOCK data are averaged by participant.load("3data/between_preProcessed.RData")
cat("between:",nrow(between),"observations from",nlevels(between$ID),"participants")
## between: 97 observations from 97 participants
head(between,3)
gng
includes all collected variables, with one row for
each trial of the Go/No-go task, for a total of 62,856 observations
(i.e., 97 participants x 3 days x 2 blocks x 108 trials). HRV data is
included only for the segments recorded in the lab (within.day = 6),
whereas ESM information is included only as the last ESM data entry
(within.day = 5`).load("3data/gng_preProcessed.RData")
cat("gng:",nrow(gng),"observations from",nlevels(gng$ID),"participants")
## gng: 62856 observations from 97 participants
head(gng,3)
block
includes all collected variables, with one row
for each block of the go/no-go task, for a total of 582 observations
(i.e., 97 participants x 3 days x 2 blocks). Again, HRV
data is included only for the segments recorded in the lab
(within.day
= 6), whereas ESM
information is
included only as the last ESM data entry
(within.day = 5
).load("3data/block_preProcessed.RData")
cat("block:",nrow(block),"observations from",nlevels(block$ID),"participants")
## block: 582 observations from 97 participants
head(block,3)
Second, we evaluate the psychometric properties of the considered mood and event ratings. The following packages and function are used to optimize psychometric analyses.
library(lavaan); library(lme4); library(ggplot2); library(dplyr); library(reshape2); library(plyr); library(MVN)
library(MuMIn); library(gridExtra); library(tcltk)
itemICC
itemsICC <- function(data,items,output="text",digits=3){ require(lme4)
# empty data.frame of results
res <- data.frame(item=NA,icc=NA)
# multilevel model and variance partitioning for each item
for(i in 1:length(items)){
m <- lmer(formula=gsub("d1",items[i],"d1~(1|ID)"),data=data) # mlv model with random intercept for participants' ID
out <- round(as.data.frame(VarCorr(m))[1,4]/ # computing ICC as VAR_between / (VAR_between + VAR_within)
(as.data.frame(VarCorr(m))[1,4]+as.data.frame(VarCorr(m))[2,4]),digits)
# textual output or data.frame
if(output=="text"){cat(items[i],"ICC =",out,"\n")
# data.frame output
} else { res <- rbind(res,cbind(item=items[i],icc=out)) }}
if(output!="text"){ return(res) }}
Compute the intraclass correlation coefficient (ICC) of a set of items by specifying a linear mixed-effects model with a random slope for individual differences.
corr.matrices
corr.matrices <- function(data,vars,matrix.type=c("pooled","between","within"),ID="ID"){
require(ggplot2); require(reshape2); require(plyr)
# setting ID name
colnames(data)[which(colnames(data)==ID)] <- "ID"
# function that computes and plots the correlations
cm <- function(data,vars){
c <- melt(cor(data[,which(colnames(data)%in%vars)],data[,which(colnames(data)%in%vars)],
use="complete.obs",method="pearson")) # listwise deletion
ggplot(c,aes(x=Var1,y=Var2,fill=value)) + geom_tile() + # plotting heat diagram
geom_text(aes(x=Var1,y=Var2,label=round(value,2)),color="black",size=5) + labs(x="",y="")+
scale_fill_gradient2(low="darkblue",high="#f03b20",mid="white",midpoint=0,limit=c(-1,1),space="Lab",
name="Pearson\nCorrelation",guide="legend",breaks=round(seq(1,-1,length.out = 11),2),
minor_breaks=round(seq(1,-1,length.out = 11),2)) + theme(text=element_text(face="bold",size=12)) }
# pooled correlation matrix (as the ratings were independent)
if(matrix.type=="pooled"){
cm(data,vars) + ggtitle(paste("Pooled correlation matrix, N =",nrow(na.omit(ema[,vars]))))
} else {
# computing individual means
between <- data[!duplicated(data$ID),]
for(Var in vars){
means <- aggregate(data[,Var]~data[,"ID"],FUN="mean",na.action=na.omit)
colnames(means) <- c("ID",paste(Var,".cm",sep=""))
between <- join(between,means,by="ID",type="left") }
# level-2 matrix (averaged scores by participant)
if(matrix.type=="between"){
cm(between,paste(vars,".cm",sep="")) + ggtitle(paste("Level-2 correlation matrix, N =",nrow(between)))
# level-1 matrix (mean-centered scores)
} else if(matrix.type=="within"){
# computing mean-centered scores
data <- join(data,between[,c("ID",paste(vars,".cm",sep=""))],by="ID",type="left") # merging data with individual means
for(Var in vars){ data[,paste(Var,".mc",sep="")] <- data[,Var] - data[,paste(Var,".cm",sep="")] } # person-mean-centering
# plotting
cm(data,paste(vars,".mc",sep="")) + ggtitle(paste("Level-1 correlation matrix, N =",
nrow(na.omit(data[,paste(Var,".mc",sep="")])))) }}}
Computes and plots the pooled, between-subjects, and within-subject correlation matrix of item ratings.
loadings()
#' @title Summarizing standardized loadings from a multilevel CFA model
#' @param model = multilevel CFA model.
#' @param st = Character indicating the standardization level of loadings: "st.all" or "st.lv".
loadings <- function(model=NA,st="st.all"){ require(lavaan)
if(st=="st.all"){ LOADs <- standardizedsolution(model)
} else if(st=="st.lv"){ LOADs <- standardizedsolution(model,type="st.lv")
} else{ LOADs <- parameterestimates(model) }
return(LOADs[LOADs$op=="=~",])}
Print the standardized factor loadings of an ordinary or a multilevel CFA model.
fit.ind()
fit.ind <- function(model=NA,from_summary=FALSE,type="multilevel",models.names=NA,infocrit=TRUE,digits=3,
fits=c("npar","chisq","df","rmsea","cfi","srmr_within","srmr_between")){ require(lavaan); require(MuMIn)
# removing level-specific fit indices when model is "monolevel"
if(type=="monolevel"){
fits <- gsub("srmr_within","srmr",fits)
fits <- fits[fits!="srmr_between"] }
if(from_summary==FALSE){
# returning data.frame of models fit indices when more than one model is considered
if(length(model)>1){
fit.indices <- fitmeasures(model[[1]])[fits]
for(i in 2:length(model)){
fit.indices <- rbind(fit.indices,fitmeasures(model[[i]])[fits]) }
if(infocrit==TRUE){
fit.indices <- cbind(fit.indices,
AICw=Weights(sapply(model,AIC)),BICw=Weights(sapply(model,BIC))) }
if(!is.na(models.names[1])){ row.names(fit.indices) <- models.names }
fit.indices <- round(as.data.frame(fit.indices),digits)
return(fit.indices[order(fit.indices$rmsea),])
} else { return(round(as.data.frame(fitmeasures(model)[fits]),2)) }
} else { # in some cases the fit indices are available only from the model's summary
quiet <- function(fit) { # this was written by Alicia FRANCO MARTÍNEZ on the lavaan Google group
sink(tempfile())
on.exit(sink())
invisible(summary(fit, standardized = TRUE, fit.measures=TRUE))
}
sum <- quiet(model)
fit.indices <- sum$FIT[fits]
return(round(as.data.frame(fit.indices[order(fit.indices$rmsea),]),digits))}}
Prints fit indices of one or more CFA models. Note: According to the criteria proposed by Hu and Bentler (1999), we consider RMSEA ≤ .06, CFI ≥ .95, and SRMR ≤ .08 as indicative of adequate fit.
mcfa.Huang()
mcfa.Huang<-function(gp,dat){
dat1<-dat[complete.cases(dat),]
g<-dat1[,gp] #grouping
freq<-data.frame(table(g))
gn<-grep(gp,names(dat1)) #which column number is the grouping var
dat2<-dat1[,-gn] #raw only
G<-length(table(g))
n<-nrow(dat2)
k<-ncol(dat2)
scaling<-(n^2-sum(freq$Freq^2)) / (n*(G-1))
varn<-names(dat1[,-gn])
ms<-matrix(0,n,k)
for (i in 1:k){
ms[,i]<-ave(dat2[,i],g)
}
cs<-dat2-ms #deviation matrix, centered scores
colnames(ms)<-colnames(cs)<-varn
b.cov<-(cov(ms) * (n - 1))/(G-1) #group level cov matrix
w.cov<-(cov(cs) * (n - 1))/(n-G) #individual level cov matrix
pb.cov<-(b.cov-w.cov)/scaling #estimate of pure/adjusted between cov matrix
w.cor<-cov2cor(w.cov) #individual level cor matrix
b.cor<-cov2cor(b.cov) #group level cor matrix
pb.cor<-cov2cor(pb.cov) #estimate of pure between cor matrix
icc<-round(diag(pb.cov)/(diag(w.cov)+diag(pb.cov)),3) #iccs
return(list(b.cov=b.cov,pw.cov=w.cov,ab.cov=pb.cov,pw.cor=w.cor,
b.cor=b.cor,ab.cor=pb.cor,
n=n,G=G,c.=scaling,sqc=sqrt(scaling),
icc=icc,dfw=n-G,dfb=G)) }
Generate the pooled within-subject covariance matrix. Note: function created by Francis L. Huang (University of Missouri), available from https://francish.netlify.app/
influential.analysis()
#' @title Influential analysis
#' @param data = data.frame used by the model.
#' @param cluster = Character. Variable name in the data frame defining the cluster in a two-level dataset.
#' @param parameter = Character. "var" for estimated variances, "load" for loadings.
#' @param st = Character indicating the standardization level of loadings: "st.all" or "st.lv".
#' @param n.items = Integer. Number of observed variables considered by the model.
#' @param item.labels = Character vector indicating the labels of the considered observed variables.
#' @param m = Character string specifying the model using the lavaan synthax.
influential.analysis <- function(data=NA,cluster="ID",parameter="var",st="st.all",
n.items=9,item.labels=c("v1", "v2", "v3", "t1", "t2", "t3", "f1", "f2","f3"),
m = 'level: 1
hedTone_W =~ v1 + v2 + v3
Calmness_W =~ t1 + t2 + t3
enArousal_W =~ f1 + f2 + f3
level: 2
hedTone_B =~ v1 + v2 + v3
Calmness_B =~ t1 + t2 + t3
enArousal_B =~ f1 + f2 + f3'){ require(lavaan); require(tcltk)
# storing results on the whole sample
m.res <- cfa(model=m,data=data,cluster=cluster,std.lv=TRUE)
if(parameter=="var"){
lv1 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="~~" &
nchar(parameterestimates(m.res)$lhs)==2 &
parameterestimates(m.res)$level==1,"est"]
lv2 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="~~" &
nchar(parameterestimates(m.res)$lhs)==2 &
parameterestimates(m.res)$level==2,"est"]
} else if(parameter=="load"){
if(st=="st.all"){
lv1 <- standardizedsolution(m.res)[standardizedsolution(m.res)$op=="=~",
"est.std"][1:n.items]
lv2 <- standardizedsolution(m.res)[standardizedsolution(m.res)$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
} else if(st=="st.lv"){
lv1 <- standardizedsolution(m.res,type="st.lv")[standardizedsolution(m.res,type="st.lv")$op=="=~",
"est.std"][1:n.items]
lv2 <- standardizedsolution(m.res,type="st.lv")[standardizedsolution(m.res,type="st.lv")$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
} else{
lv1 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="=~",
"est.std"][1:n.items]
lv2 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
}
}
# dataframe storing results
parameters <- data.frame(ID=rep("all",n.items),
lv1 = lv1,
lv2 = lv2,
neg.var=rep(1,n.items))
if(!any(diag(lavInspect(m.res,"est")[["within"]][["theta"]]) < 0) &
!any(diag(lavInspect(m.res,"est")[["ID"]][["theta"]]) < 0)){
parameters[1:n.items,"neg.var"] <- 0 }
# replicate parameters estimation by excluding any participant one-by-one
IDs <- levels(as.factor(as.character(data$ID)))
pb <- tkProgressBar("Modeling", "Data modeling",0, 100, 50) # progress bar
for(ID in IDs){ info <- sprintf("%d%% done", round(which(IDs==ID)/length(IDs)*100))
setTkProgressBar(pb, round(which(IDs==ID)/length(IDs)*100), sprintf("Data modeling", info), info)
m.res <- cfa(model=m,data=data[data$ID!=ID,],cluster=cluster,std.lv=TRUE)
if(parameter=="var"){
lv1 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="~~" &
nchar(parameterestimates(m.res)$lhs)==2 &
parameterestimates(m.res)$level==1,"est"]
lv2 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="~~" &
nchar(parameterestimates(m.res)$lhs)==2 &
parameterestimates(m.res)$level==2,"est"]
} else if(parameter=="load"){
if(st=="st.all"){
lv1 <- standardizedsolution(m.res)[standardizedsolution(m.res)$op=="=~",
"est.std"][1:n.items]
lv2 <- standardizedsolution(m.res)[standardizedsolution(m.res)$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
} else if(st=="st.lv"){
lv1 <- standardizedsolution(m.res,type="st.lv")[standardizedsolution(m.res,type="st.lv")$op=="=~",
"est.std"][1:n.items]
lv2 <- standardizedsolution(m.res,type="st.lv")[standardizedsolution(m.res,type="st.lv")$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
} else{
lv1 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="=~",
"est.std"][1:n.items]
lv2 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="=~",
"est.std"][(n.items+1):(n.items*2)] }}
# negative estimated variances
if(!any(diag(lavInspect(m.res,"est")[["within"]][["theta"]]) < 0) &
!any(diag(lavInspect(m.res,"est")[["ID"]][["theta"]]) < 0)){
neg.Var <- 0 } else { neg.Var <- 1 }
# storing information
parameters <- rbind(parameters,
data.frame(ID=rep(ID,n.items),
lv1=lv1,lv2=lv2,
neg.var = rep(neg.Var,n.items)))}
close(pb)
parameters$X <- rep(item.labels,length(IDs)+1)
parameters <- parameters[,c("ID","X","lv1","lv2","neg.var")]
if(parameter=="var"){
cat("Negative variance estimated on level 2 for the following items:",
levels(as.factor(as.character(parameters[parameters$lv2<0,"X"]))),
"\nNegative variance estimated on level 1 for the following items:",
levels(as.factor(as.character(parameters[parameters$lv1<0,"X"]))),sep=" ")}
return(parameters)}
fits a MCFA model on several subsamples obtained by removing each participant (cluster) one by one, and save the parameters (either item loadings or item residual variances) estimated in each subsample
plot.influential()
#' @title Plotting results of influential analysis
#' @param data = data.frame generated by the influential.analysis function.
#' @param parameter = Character. "var" for estimated variances, "load" for loadings.
#' @param variable = Character indicating the name of the observed variable to be plotted.
#' @param level = Integer indicating if focusing on level 1 or 2 (default).
#' @param threshold_lower = Numeric. Threeshold below which the participants'IDs are showed.
#' @param threshold_upper = Numeric. Threeshold above which the participants'IDs are showed.
plot.influential <- function(data,parameter="var",variable,level=2,threshold_lower=NA,threshold_upper=NA){ require(ggplot2)
if(parameter=="var"){ par <- "variance"} else { par = "loading" }
if(level==2){ colnames(data)[colnames(data)=="lv2"] <- "par"
} else { colnames(data)[colnames(data)=="lv1"] <- "par" }
p <- ggplot(data[data$X==variable,],aes(ID,par))+
geom_point()+ggtitle(paste(variable,par,"on level",level))+
geom_point(data=data[data$X==variable & data$ID=="all",],colour="blue",size=5)+
# geom_point(data=data[data$X==variable & data$par<0,],colour="red")+
geom_point(data=data[data$X==variable & data$neg.var==1,],colour="red")+
theme(axis.text.x = element_blank()) + xlab("")
if(!is.na(threshold_lower) & is.na(threshold_upper)){
p <- p + geom_text(data=data[data$X==variable &
data$par < threshold_lower,],
aes(label=substr(ID,5,6)),nudge_x=-2,size=3)
} else if(is.na(threshold_lower) & !is.na(threshold_upper)){
p <- p + geom_text(data=data[data$X==variable &
data$par > threshold_upper,],
aes(label=substr(ID,5,6)),nudge_x=-2,size=3)
} else if(!is.na(threshold_lower) & !is.na(threshold_upper)){
p <- p + geom_text(data=data[data$X==variable &
(data$par > threshold_upper |
data$par < threshold_lower),],
aes(label=substr(ID,5,6)),nudge_x=-2,size=3) }
return(p) }
plots the parameters obtained with the
influential.analysis()
function
check.influential()
#' @title Inspecting changes in parameters after the removal of influential cases
#' @param model = MCFA model fitted with lavaan.
#' @param data = data.frame generated by the influential.analysis function.
#' @param parameter = Character. Identification codes of participants to be excluded.
#' @param level = Numeric. 1 = within, 2 = between (default).
check.influential <- function(model,data,IDs,level=2){ require(lavaan); require(ggplot2)
# excluding participants
data2 <- data
for(ID in levels(as.factor(data$ID))){
if(any(IDs==ID)){ data2 <- data2[data2$ID!=ID,] }}
# updating model
model2 <- update(model,data=data2)
# standardized loadings
load1 <- standardizedsolution(model)[standardizedsolution(model)$op=="=~",]
load2 <- standardizedsolution(model2)[standardizedsolution(model2)$op=="=~",]
# data.frame of loadings
loads <- data.frame(inclusion=as.factor(c(rep("IN",nrow(load1)),rep("OUT",nrow(load1)))), # inclusion
load=as.factor(paste(load1[,"rhs"],c(rep("w",nrow(load1)/2),rep("b",nrow(load1)/2)),sep="_")),
loading=c(load1[,"est.std"],load2[,"est.std"]))
ggplot(loads,aes(x=inclusion,y=loading,color=load)) + geom_point() + geom_line(aes(group=load))
}
visualizes the changes in parameters estimated with and without one or more participants
plot.traj()
#' @title Plotting trajectories of Mood items scores of single participants
#' @param data = data.frame generated by the influential.analysis function.
#' @param ID = Character. Identification code for the selected participant.
plot.traj <- function(data,var="Mood",ID){ require(ggplot2); require(gridExtra)
# Negative Valence
p1 <- ggplot(data[data$ID==ID,],aes(x=within.day,y=v1))+
geom_line(aes(y=4),linetype=2) + geom_smooth(se=FALSE,span=0.7,color="lightblue")+
geom_smooth(aes(y=v2),se=FALSE,span=0.7,color="blue")+
geom_smooth(aes(y=v3),se=FALSE,span=0.7,color="darkblue")+
facet_wrap("day",strip.position="right")+
ggtitle(paste("Hedonic Tone items in participant",ID))+ylab("")+xlab("")+
scale_x_continuous(breaks=1:7)+scale_y_continuous(breaks=1:7)+
theme(axis.text = element_text(size=5),strip.background = element_rect(colour=factor(data$ID)))
# tense arousal
p2 <- ggplot(data[data$ID==ID,],aes(x=within.day,y=t1))+
geom_line(aes(y=4),linetype=2) + geom_smooth(se=FALSE,span=0.7,color="salmon")+
geom_smooth(aes(y=t2),se=FALSE,span=0.7,color="red")+
geom_smooth(aes(y=t3),se=FALSE,span=0.7,color="darkred")+
facet_wrap("day",strip.position="right")+
ggtitle(paste("Calmness items in participant",ID))+ylab("")+xlab("")+
scale_x_continuous(breaks=1:7)+scale_y_continuous(breaks=1:7)+
theme(axis.text = element_text(size=5),strip.background = element_rect(colour=factor(data$ID)))
# fatigue
p3 <- ggplot(data[data$ID==ID,],aes(x=within.day,y=f1))+
geom_line(aes(y=4),linetype=2) + geom_smooth(se=FALSE,span=0.7,color="yellow")+
geom_smooth(aes(y=f2),se=FALSE,span=0.7,color="salmon")+
geom_smooth(aes(y=f3),se=FALSE,span=0.7,color="orange")+
facet_wrap("day",strip.position="right")+
ggtitle(paste("Energetic Arousal items in participant",ID))+ylab("")+xlab("")+
scale_x_continuous(breaks=1:7)+scale_y_continuous(breaks=1:7)+
theme(axis.text = element_text(size=5),strip.background = element_rect(colour=factor(data$ID)))
if(var=="Mood"){ grid.arrange(p1,p2,p3,nrow=3)
} else if(var=="HT"){ p1 }else if(var=="CA"){ p2 } else{ p3 }}
plots the item scores’ temporal trajectories of a given participant
MCFArel
#' @title Computing composite reliability index from a MCFA model
#' @param fit = multileve CFA model.
#' @param level = level of interest (either 1 or 2)
#' @param items = numeric string indicating the items of interest (e.g., 1:3 for items 1, 2 and 3)
#' @param item.labels = character string indicating the names of the items of interest
MCFArel <- function(fit,level,items,item.labels){ require(lavaan)
if(level==1){
sl <- standardizedsolution(fit)[1:(nrow(standardizedSolution(fit))/2),] # pars within
} else if(level==2){
sl <- standardizedsolution(fit)[(nrow(standardizedSolution(fit))/2):nrow(standardizedsolution(fit)),] # pars between
} else { stop("Error: level can be either 1 or 2") }
sl <- sl$est.std[sl$op == "=~"][items] # standardized loadings of the selected items
names(sl) <- item.labels # item names
re <- 1 - sl^2 # residual variances of items
omega <- sum(sl)^2 / (sum(sl)^2 + sum(re)) # composite reliability index
return(round(omega,2))}
Compute level-specific indices of composite reliability from a MCFA model (see Geldhof et al., 2014).
Mood was measured using the Italian adaptation of the
Multidimensional Mood Questionnaire (MDMQ) (Wilhelm &
Schoebi, 2007; Menghini et al., 2022), using 9 bipolar items to
measure three hypothesized correlated dimension, namely Hedonic Tone,
Calmness, and Energetic Arousal. Item scores were recoded so that higher
scores indicate more positive ratings. Here, we prepare the data to be
used in the analysis by selecting the dataset with all cases with
complete responses to any mood item, which we call
Mood
.
# selecting mood item labels
mood <- c("v1","v2","v3", # hedonic tone
"t1","t2","t3", # calmness
"f1","f2","f3") # energetic arousal
# selecting dataset of complete responses (listwise deletion)
Mood <- na.omit(ema[,c("ID",mood)])
cat("Analysing",nrow(Mood),"complete responses from",nlevels(as.factor(as.character(Mood$ID))),"participants")
## Analysing 1600 complete responses from 97 participants
First, we consider descriptive statistics in raw item scores, in terms of score distribution, ICC, and correlations.
First, we inspect the score distribution of each mood item using the
MVN
package. We can note that mood items are quite
normally distributed, with a little skewness on the left
(especially for items t1
, t2
, and
t3
). Although Mood items show the typical “discrete”
patterns of ordinal scales, no drastic deviations from normality are
detected and we are confident using them for confirmatory factor
analysis (see below).
mvn(data=Mood[,mood],univariatePlot="histogram")[4] # histogram mood
## $<NA>
## NULL
mvn(data=Mood[,mood],univariatePlot="qqplot")[4] # qqnorm mood
## $<NA>
## NULL
Second, we evaluate the balance between inter- and intraindividual
variability in each item score by computing the intraclass correlation
coefficients (ICC) with the itemsICC
function showed above.
We can note that all ICCs are equal to or below .42, suggesting that
most variance is at level 1 (within) but there is still
substantial interindividual variability to consider multilevel modeling.
Items v1
and v2
show the highest variance
between participants, whereas f3
shows the highest variance
within participant.
itemsICC(data=Mood,items=mood)
## v1 ICC = 0.416
## v2 ICC = 0.396
## v3 ICC = 0.403
## t1 ICC = 0.384
## t2 ICC = 0.372
## t3 ICC = 0.384
## f1 ICC = 0.347
## f2 ICC = 0.314
## f3 ICC = 0.353
Third, we visualize the correlation matrices of mood item scores with
the corr.matrices
function showed above. Three matrices are
generated, all showing plausible values coherently with the hypothesized
measurement model, and proportional across matrices (suggesting that
cross-level invariance is plausible):
corr.matrices(data=ema,vars=mood,matrix.type="pooled")
corr.matrices(data=ema,vars=mood,matrix.type="between")
corr.matrices(data=ema,vars=mood,matrix.type="within")
Second, a multilevel confirmatory factor analysis (MCFA) is performed on mood item ratings to evaluate the validity of the hypothesized measurement model (i.e., assuming three correlated factors at both levels) and the cross-level isomorphism of our mood measure.
The following analytical pipeline is implemented:
The hypothesized and alternative multilevel models are specified, in addition to a set of preliminary benchmark models (following Hox, 2010, chapter 14) and models assuming cross-level invariance (following Jak & Jorgensen, 2017)
Potential problems of non-convergence or Heywood cases (improper solutions) are handled
A model comparison is performed between the specified models using fit indices and information criteria
Standardized and unstandardized factor loadings estimated by the selected model are inspected
Here, we conduct the preliminary steps suggested by Hox (2010, chapter 14). Similar to what recommended by Muthén (1994), the idea is to specify a set of models to separately assess the factor structure at level 1 (within) and level 2 (between), and to compare those models with the hypothesized multilevel model.
The first step evaluates the within-cluster factor
structure. Following Muthén (1994), we
conduct a conventional (one-level) CFA of the pooled
within-cluster covariance matrix poolCov
, obtained
with a function created by Huang (2018). We can note
that all standardized loadings estimated from the pooled within-cluster
covariance matrix are significant and equal to or higher than .60. Item
v2
shows the lowest loading (.60), whereas the highest
loading is showed by f1
(.82). The model shows roughly
acceptable fit indices, suggesting that the within-cluster
structure holds.
# poolend covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=Mood,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_Mood <- cfa('hedTone =~ v1 + v2 + v3
Calmness =~ t1 + t2 + t3
enArousal =~ f1 + f2 + f3',sample.cov=poolCov,sample.nobs=nrow(Mood),std.lv=TRUE)
loadings(m.withinPool_Mood,st="st.all") # standardized loadings
fit.ind(m.withinPool_Mood,type="monolevel") # fit indices
The second step is to evaluate the between-cluster factor structure by specifying a set of benchmark models for the group level (Hox, 2010). This is done to test whether there is a between-cluster structure to be modeled.
a) Null model
First, we specify a null model with no specification on level
2. If this model holds, it will mean that there is no
cluster-level structure at all (all covariances in the
between-clusters matrix are the result of individual sampling
variation), and we can continue using a conventional (mono-level)
analysis. We can note that the null model converges with no problems,
although one participant HRV068
shows no intraindividual
variability for item v2
, v3
, and
t3
, and a further participant HRV084
shows no
intraindividual variability for item t3
.
# one-factor within model + null between model
m.null <- cfa('level: 1
hedTone_W =~ v1 + v2 + v3
Calmness_W =~ t1 + t2 + t3
enArousal_W =~ f1 + f2 + f3
level: 2
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3', data=Mood,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
plot.traj(ema,var="HT","HRV068") # checking items v1-v3 HRV068
plot.traj(ema,var="CA","HRV068") # checking item t3 HRV068
b) Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2.
# one-factor within model + independence between model
m.ind <- cfa('level: 1
hedTone_W =~ v1 + v2 + v3
Calmness_W =~ t1 + t2 + t3
enArousal_W =~ f1 + f2 + f3
level: 2
v1 ~~ v1
v2 ~~ v2
v3 ~~ v3
t1 ~~ t1
t2 ~~ t2
t3 ~~ t3
f1 ~~ f1
f2 ~~ f2
f3 ~~ f3',data=Mood,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
c) Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level [here, within-individual] model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation is just ‘spurious’ (Stapelton et al., 2016). The saturated model converges with no Heywood cases.
# one-factor within model + saturated between part
m.sat <- cfa('level: 1
hedTone_W =~ v1 + v2 + v3
Calmness_W =~ t1 + t2 + t3
enArousal_W =~ f1 + f2 + f3
level: 2
v1 ~~ v1 + v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v2 ~~ v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v3 ~~ v3 + t1 + t2 + t3 + f1 + f2 + f3
t1 ~~ t1 + t2 + t3 + f1 + f2 + f3
t2 ~~ t2 + t3 + f1 + f2 + f3
t3 ~~ t3 + f1 + f2 + f3
f1 ~~ f1 + f2 + f3
f2 ~~ f2 + f3
f3 ~~ f3', data=Mood,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence model) should be rejected. Accordingly, we can note that both the null and the independence models are rejected, whereas roughly acceptable fit is shown by the saturated model. Consequently, the saturated model will be considered for evaluating the fit of the hypothesized models.
fit.ind(model=c(m.null,m.ind,m.sat),models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated"))
Here, we specify the hypothesized MCFA models and a
set of alternative models with a different number of dimensions. Note
that each model is parametrized by standardizing the latent factors with
std.lv = TRUE
to avoid fixing to 1 the first indicator of
each dimension since the interest is on factor loadings. Following Jag & Jorgensen (2017), we also evaluate
cross-level isomorphism by specifying more restrictive
models with weak and strong invariance across levels, respectively.
Here, we specify the configural-invariance model m3x3
,
assuming 3 latent variables on both levels. We can note
that the model shows an improper solution (Heywood
case), which will be handled in the following section
2.1.2.3.
# hypothesized multilevel model (negative variance -> see HEYWOOD CASES)
m3x3 <- cfa('level: 1
hedTone_w =~ v1 + v2 + v3
Calmness_w =~ t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
hedTone_b =~ v1 + v2 + v3
Calmness_b =~ t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3',
data=Mood,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Here, alternative models are specified with less latent factors on
level 1 and/or level 2. Specifically, models with two latent variables
(Positive Tone and Energetic Arousal) are justified by the strong
correlation between Hedonic Tone and Calmness items, especially on level
2 (see also Wilhelm & Schoebi, 2007; Menghini et al., 2022). We can note that models
m2x3
and m2x2
show improper solutions
(Heywood cases), which will be handled in the following
section 2.1.2.3.
# 3 latent on lv1, 2 latent on lv2
m3x2 <- cfa('level: 1
hedTone_w =~ v1 + v2 + v3
Calmness_w =~ t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
PosTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3',
data=Mood,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
# 2 latent on lv1, 3 latent on lv2
m2x3 <- cfa('level: 1
PosTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
hedTone_b =~ v1 + v2 + v3
Calmness_b =~ t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3',
data=Mood,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# 2 latent factors on both levels
m2x2 <- cfa('level: 1
PosTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
PosTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3',
data=Mood,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Finally, we follow Jag & Jorgensen (2017) by
evaluating different levels of factor invariance: (1) configural
invariance, where the same factor structure holds but factor
loadings differ across levels as in model m3x3
, (2)
weak factorial invariance, where the factor loadings
are equal across levels, and (3) strong factorial
invariance, when the values of factor loadings and intercepts
are equal across levels, and the residual variance on level 2 is zero.
Here we specify the models assuming, respectively, weak and strong
invariance across levels. We can note that the former model shows an
improper solution (Heywood case), which will be handled
in the following section 2.1.2.3.
# weak invariance model (negative variance -> see HEYWOOD CASES)
m3x3.weakInv <- cfa('level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3',
data=Mood,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# strong invariance
m3x3.strInv <- cfa('level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3',
data=Mood,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
All multilevel models but model m3x3
and the strong-invariance model m3x3.strInv
show a negative
variance estimate (i.e., improper solution, or Heywood case) at level 2.
Specifically, we can note that in most models the Heywood case is
related to item t2
(i.e., the item that showed the lowest
ICC in section 2.1.1), whereas in model m2x3
it involves
item t3
. In all cases, we can note that the upper CI of the
variance estimate is positive, implying that the estimated variances are
not significantly below zero and, thus, ruling out the
possibility of structural misspecification.
# m3x3
parameterestimates(m3x3)[parameterestimates(m3x3)$op=="~~" & parameterestimates(m3x3)$est<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m2x3
parameterestimates(m2x3)[parameterestimates(m2x3)$op=="~~" & parameterestimates(m2x3)$est<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m2x2
parameterestimates(m2x2)[parameterestimates(m2x2)$op=="~~" & parameterestimates(m2x2)$est<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m3x3.weakInv
parameterestimates(m3x3.weakInv)[parameterestimates(m3x3.weakInv)$op=="~~" & parameterestimates(m3x3.weakInv)$est<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
As a first strategy to solve the improper solutions, we refit the
models by fixating the residual variance of problematic
items to the 15% of the variance estimated for these items at level 2.
In other words, we make the assumption that these items have a
reliability around 85% (see Joreskog & Sobrom,
1996). We can see that the residual variance of both t2
and t3
should be fixated to solve the problem for models
m3x3
and m2x3
, and m3x3.weakInv
,
whereas fixating item t3
was enough to solve the problem
for model m2x2
.
# m3x3: fixating residual variance of item t2 ........................................................
m3x3.fix <- 'level: 1
hedTone_w =~ v1 + v2 + v3
Calmness_w =~ t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
hedTone_b =~ v1 + v2 + v3
Calmness_b =~ t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3
t2 ~~ rho2 * t2'
fit <- lmer(t2 ~ 1 + (1|ID), data=Mood) # null LMER model
t2varlv2 <- as.data.frame(VarCorr(fit))[1,4] # between-subjects variance of item t2
m3x3.fix <- cfa(gsub("rho2",t2varlv2*.15,m3x3.fix),data=Mood,cluster='ID',std.lv=TRUE) # fixating rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# same problem with item t3
parameterestimates(m3x3.fix)[parameterestimates(m3x3.fix)$op=="~~" & parameterestimates(m3x3.fix)$est<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m3x3: fixating also residual variance of item t3
m3x3.fix2 <- 'level: 1
hedTone_w =~ v1 + v2 + v3
Calmness_w =~ t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
hedTone_b =~ v1 + v2 + v3
Calmness_b =~ t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3
t2 ~~ rho2 * t2
t3 ~~ rho2b * t3'
fit <- lmer(t3 ~ 1 + (1|ID), data=Mood) # null LMER model
t3varlv2 <- as.data.frame(VarCorr(fit))[1,4] # between-subjects variance of item t2
m3x3.fix2 <- cfa(gsub("rho2",t2varlv2*.15,
gsub("rho2b",t3varlv2*.15,m3x3.fix2)),data=Mood,cluster='ID',std.lv=TRUE) # fixating rho2b
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
# m2x3: fixating residual variance of item t2 ........................................................
m2x3.fix <- 'level: 1
PosTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
hedTone_b =~ v1 + v2 + v3
Calmness_b =~ t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3
t2 ~~ rho2 * t2'
m2x3.fix <- cfa(gsub("rho2",t2varlv2*.15,m2x3.fix),data=Mood,cluster='ID',std.lv=TRUE) # fixating rho2b
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# same problem with item t3
parameterestimates(m2x3.fix)[parameterestimates(m2x3.fix)$op=="~~" & parameterestimates(m2x3.fix)$est<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m2x3: fixating also residual variance of item t3
m2x3.fix2 <- 'level: 1
PosTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
hedTone_b =~ v1 + v2 + v3
Calmness_b =~ t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3
t2 ~~ rho2 * t2
t3 ~~ rho2b * t3'
m2x3.fix2 <- cfa(gsub("rho2",t2varlv2*.15,
gsub("rho2b",t3varlv2*.15,m2x3.fix2)),data=Mood,cluster='ID',std.lv=TRUE) # fixating rho2b
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
# m2x2: fixating residual variance of item t3 ........................................................
m2x2.fix <- 'level: 1
PosTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
PosTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3
t3 ~~ rho2 * t3'
m2x2.fix <- cfa(gsub("rho2",t3varlv2*.15,m2x2.fix),data=Mood,cluster='ID',std.lv=TRUE) # fixating rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
# m3x3.weakInv: fixating residual variance of item t2 ........................................................
m3x3.weakInv.fix <- 'level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3
t2 ~~ rho2 * t2'
m3x3.weakInv.fix <- cfa(gsub("rho2",t2varlv2*.15,m3x3.weakInv.fix),data=Mood,cluster='ID',std.lv=TRUE) # fixating rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# same problem with item t3
parameterestimates(m3x3.weakInv.fix)[parameterestimates(m3x3.weakInv.fix)$op=="~~" & parameterestimates(m3x3.weakInv.fix)$est<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m3x3.weakInv: fixating also residual variance of item t3
m3x3.weakInv.fix2 <- 'level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3
t2 ~~ rho2 * t2
t3 ~~ rho2b * t3'
m3x3.weakInv.fix2 <- cfa(gsub("rho2",t2varlv2*.15,
gsub("rho2b",t3varlv2*.15,m3x3.weakInv.fix2)),data=Mood,cluster='ID',std.lv=TRUE) # fixating rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
As a second alternative strategy, we check if the removal of some participants can solve the improper solutions. Indeed, Heywood cases are likely due to population variances that are close to zero (but positive), such that that the negative estimates are just the result of random sampling fluctuations (Van Driel, 1978; Kolenikov & Bollen, 2012).
Specifically, due to our main focus on cross-level isomorphism, we
only conduct the following analysis on models m3x3
and
m3x3.weakInv
. Here, we re-specify both models by excluding
each participant one-by-one (i.e., influential
analysis). Then, we repeat the procedure by removing the
participants whose exclusion is associated with the highest estimated
variance, until the variance becomes positive in both models.
First, the parameters are estimated and plotted after the exclusion
of each participant. For both models, we can note that t2
variance remains negative in any case. The exclusion of participant
HRV036
seems to lead to the highest increase in
t2
variance considering both models.
# m3x3
write.csv(influential.analysis(data=Mood,
parameter="var"),"heywood cases/negVar.m3x3.0.csv",row.names=FALSE)
# m3x3.weakInv
write.csv(influential.analysis(data=Mood,
m='level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3',
parameter="var"),"heywood cases/negVar.m3x3.weakInv.0.csv",row.names=FALSE)
# t2 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/negVar.m3x3.0.csv"),par="var",variable="t2",level=2,
threshold_upper=-.005),
plot.influential(data=read.csv("heywood cases/negVar.m3x3.weakInv.0.csv"),par="var",variable="t2",level=2,
threshold_upper=-.003))
# v3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/negVar.m3x3.0.csv"),par="var",variable="v3",level=2,
threshold_upper = .024),
plot.influential(data=read.csv("heywood cases/negVar.m3x3.weakInv.0.csv"),par="var",variable="v3",level=2,
threshold_upper =.017))
# participant HRV036
grid.arrange(plot.traj(ema,var="HT","HRV036"),plot.traj(ema,var="CA","HRV036"),nrow=2)
Second, we replicate the procedure by also excluding participant
HRV036
. For both models, we can note that t2
variance remains negative in any case. The exclusion of participant
HRV098
seems to lead to the highest increase in
t2
variance considering both models.
# m3x3
write.csv(influential.analysis(data=Mood[Mood$ID!="HRV036",],
parameter="var"),"heywood cases/negVar.m3x3.1.csv",row.names=FALSE)
# m3x3.weakInv
write.csv(influential.analysis(data=Mood[Mood$ID!="HRV036",],
m='level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3',
parameter="var"),"heywood cases/negVar.m3x3.weakInv.1.csv",row.names=FALSE)
# t2 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/negVar.m3x3.1.csv"),par="var",variable="t2",level=2,
threshold_upper=-.0035),
plot.influential(data=read.csv("heywood cases/negVar.m3x3.weakInv.1.csv"),par="var",variable="t2",level=2,
threshold_upper=-.001))
# v3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/negVar.m3x3.1.csv"),par="var",variable="v3",level=2,
threshold_upper = .027,threshold_lower = 0),
plot.influential(data=read.csv("heywood cases/negVar.m3x3.weakInv.1.csv"),par="var",variable="v3",level=2,
threshold_upper =.023,threshold_lower = 0))
# participant HRV098
grid.arrange(plot.traj(ema,var="HT","HRV098"),plot.traj(ema,var="CA","HRV098"),nrow=2)
Third, we replicate the procedure by also excluding participant
HRV098
. We can note that t2
variance remains
negative in any case for model m3x3
, whereas the exclusion
of a number of participants solves the problem for model
m3x3.weakInv
. The exclusion of participant
HRV061
seems to lead to the highest increase in
t2
variance in both models.
# m3x3
write.csv(influential.analysis(data=Mood[!Mood$ID%in%c("HRV036","HRV098"),],
parameter="var"),"heywood cases/negVar.m3x3.2.csv",row.names=FALSE)
# m3x3.weakInv
write.csv(influential.analysis(data=Mood[!Mood$ID%in%c("HRV036","HRV098"),],
m='level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3',
parameter="var"),"heywood cases/negVar.m3x3.weakInv.2.csv",row.names=FALSE)
# t2 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/negVar.m3x3.2.csv"),par="var",variable="t2",level=2,
threshold_upper=-.0015),
plot.influential(data=read.csv("heywood cases/negVar.m3x3.weakInv.2.csv"),par="var",variable="t2",level=2,
threshold_upper=0))
# participant HRV061
grid.arrange(plot.traj(ema,var="HT","HRV061"),plot.traj(ema,var="CA","HRV061"),nrow=2)
Fourth, we replicate the procedure by also excluding participant
HRV061
. We can note that the exclusion of a number of
participants solves the problem for both models, with participant
HRV012
leading to the highest decrease in t2
variance for both models.
# m3x3
write.csv(influential.analysis(data=Mood[!Mood$ID%in%c("HRV036","HRV098","HRV061"),],
parameter="var"),"heywood cases/negVar.m3x3.3.csv",row.names=FALSE)
# m3x3.weakInv
write.csv(influential.analysis(data=Mood[!Mood$ID%in%c("HRV036","HRV098","HRV061"),],
m='level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3',
parameter="var"),"heywood cases/negVar.m3x3.weakInv.3.csv",row.names=FALSE)
# t2 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/negVar.m3x3.3.csv"),par="var",variable="t2",level=2,
threshold_upper=0),
plot.influential(data=read.csv("heywood cases/negVar.m3x3.weakInv.3.csv"),par="var",variable="t2",level=2,
threshold_upper=.003))
# participant HRV012
grid.arrange(plot.traj(ema,var="HT","HRV012"),plot.traj(ema,var="CA","HRV012"),nrow=2)
After the removal of 4 participants (4.12%), the
estimated variance for item t2
is positive in both models
m3x3
and m3x3.weakInv
. Both models specified
without participants HRV036
, HRV061
,
HRV098
and HRV012
do not show any other
Heywood case. However, Heywood cases are still present in all the
alternative models.
# excluding 4 participants associated with Heywood cases in models m3x3 and m3x3.weak
Mood_noInfl4 <- Mood[!Mood$ID%in%c("HRV036","HRV061","HRV098","HRV012"),]
# refitting m3x3
m3x3_noInfl4 <- cfa('level: 1
hedTone_w =~ v1 + v2 + v3
Calmness_w =~ t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
hedTone_b =~ v1 + v2 + v3
Calmness_b =~ t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3',
data=Mood_noInfl4,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
# refitting m3x3.weakInv
m3x3.weakInv_noInfl4 <- cfa('level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3',
data=Mood_noInfl4,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
# refitting m3x2
m3x2_noInfl4 <- cfa('level: 1
hedTone_w =~ v1 + v2 + v3
Calmness_w =~ t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
PosTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3',
data=Mood_noInfl4,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# refitting m2x3
m2x3_noInfl4 <- cfa('level: 1
PosTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
hedTone_b =~ v1 + v2 + v3
Calmness_b =~ t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3',
data=Mood_noInfl4,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# refitting m2x2
m2x2_noInfl4 <- cfa('level: 1
PosTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_w =~ f1 + f2 + f3
level: 2
PosTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
enArousal_b =~ f1 + f2 + f3',
data=Mood_noInfl4,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# strong invariance
m3x3.strInv_noInfl4 <- cfa('level: 1
hedTone_w =~ a*v1 + b*v2 + c*v3
Calmness_w =~ d*t1 + e*t2 + f*t3
enArousal_w =~ g*f1 + h*f2 + i*f3
level: 2
hedTone_b =~ a*v1 + b*v2 + c*v3
Calmness_b =~ d*t1 + e*t2 + f*t3
enArousal_b =~ g*f1 + h*f2 + i*f3
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3',
data=Mood_noInfl4,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
# refitting one-factor within model + saturated between part
m.sat_noInfl4 <- cfa('level: 1
hedTone_W =~ v1 + v2 + v3
Calmness_W =~ t1 + t2 + t3
enArousal_W =~ f1 + f2 + f3
level: 2
v1 ~~ v1 + v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v2 ~~ v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v3 ~~ v3 + t1 + t2 + t3 + f1 + f2 + f3
t1 ~~ t1 + t2 + t3 + f1 + f2 + f3
t2 ~~ t2 + t3 + f1 + f2 + f3
t3 ~~ t3 + f1 + f2 + f3
f1 ~~ f1 + f2 + f3
f2 ~~ f2 + f3
f3 ~~ f3', data=Mood_noInfl4,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: HRV068 HRV084
Here, we compare the multilevel models specified above by considering
both the fit indices and information criteria. Three
model comparisons are performed on different subsets of the sample, in
order to account for strategies used to handle the Heywood
cases highlighted above. Across all model comparisons, we can
note that both the configural model m3x3
and the
weak-invariance models m3x3.weakInv
show acceptable fit
indices, with a better fit than the saturated model
(except for srmr_between
), whereas all the
alternative models are rejected, including the
strong-invariance model m3x3.strInv
. Across all model
comparisons, the weak invariance model shows the best
fit in terms of rmsea
and the strongest evidence
in terms of BICw
. Thus, model m3x3.weakInv
is
selected as the best model.
# all participants (N = 97), with lv-2 Heywood cases in m3x3, m3x3.weakInv, m2x3, and m2x2
fit.ind(model=c(m.sat,m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv),
models.names=c("m.sat","m3x3","m3x2","m2x2","m2x3","m3x3.weakInv","m3x3.strInv"))
# all participants (N = 97), with lv-2 residual variances of problematic items fixated to the 15% of their total lv-2 variance
fit.ind(model=c(m.sat,m3x3.fix2,m3x2,m2x2.fix,m2x3.fix2,m3x3.weakInv.fix2,m3x3.strInv),
models.names=c("m.sat","m3x3","m3x2","m2x2","m2x3","m3x3.weakInv","m3x3.strInv"))
# excluding 4 participants (N = 93), with Heywood cases in m2x3, m2x2, and m3x2 (but not in m3x3 and m3x3.weakInv)
fit.ind(model=c(m.sat_noInfl4,m3x3_noInfl4,m3x2_noInfl4,m2x2_noInfl4,m2x3_noInfl4,m3x3.weakInv_noInfl4,m3x3.strInv_noInfl4),
models.names=c("m.sat","m3x3","m3x2","m2x2","m2x3","m3x3.weakInv","m3x3.strInv"))
Here, we inspect the factor loadings from the selected model
m3x3.weakInv
both in the full sample (N = 97) with fixated
variances and in the subsample with N = 93 (i.e., excluding the four
participants leading to Heywood cases). Both the fully standardized and
the unstandardized solutions are shown. From the former, we can see that
all items shows significant loadings ranging from about .60 to
about .80 at level 1, and from about .85 to about .99
at level 2, respectively. Correlations between latent variables
range from about .21 to about .73 at level 1 and from about .61 to about
.79 at level 2, respectively. Similar parameters are estimated from the
two subsamples
# fully standardized solution (loadings)
standardizedsolution(m3x3.weakInv_noInfl4)[standardizedsolution(m3x3.weakInv_noInfl4)$op=="=~",]
# fully standardized solution (correlations)
standardizedsolution(m3x3.weakInv_noInfl4)[standardizedsolution(m3x3.weakInv_noInfl4)$op=="~~",][c(13:15,28:30),]
# unstandardized solution (loadings)
parameterestimates(m3x3.weakInv_noInfl4)[parameterestimates(m3x3.weakInv_noInfl4)$op=="=~",]
# unstandardized solution (covariances)
parameterestimates(m3x3.weakInv_noInfl4)[parameterestimates(m3x3.weakInv_noInfl4)$op=="~~",][c(13:15,28:30),]
# fully standardized solution (loadings)
standardizedsolution(m3x3.weakInv.fix2)[standardizedsolution(m3x3.weakInv.fix2)$op=="=~",]
# fully standardized solution (correlations)
standardizedsolution(m3x3.weakInv.fix2)[standardizedsolution(m3x3.weakInv.fix2)$op=="~~",][c(13:15,28:30),]
# unstandardized solution (loadings)
parameterestimates(m3x3.weakInv.fix2)[parameterestimates(m3x3.weakInv.fix2)$op=="=~",]
# unstandardized solution (covariances)
parameterestimates(m3x3.weakInv.fix2)[parameterestimates(m3x3.weakInv.fix2)$op=="~~",][c(13:15,28:30),]
Here, we use the MCFArel
function showed at the
beginning of section 2 to compute indices of level-specific
composite reliability for each subscale of the MDMQ, as
recommended by Geldhof et al. (2014). Reliability
indices are computed from the multilevel model m3x3.weakInv
specified above, considering both the full sample (N = 97) with fixated
variances and the subsample with N = 93 (i.e., excluding the four
participants leading to Heywood cases). We can note that all subscales
and the total score show satisfactory reliability at
both level 1 (omega ranging from .77 to .84) and level 2 (omega ranging
from .94 to . 98), with almost identical values from the two
subsamples.
rbind(data.frame(measure="Hedonic Tone",
omega_w=MCFArel(m3x3.weakInv_noInfl4,level=1,items=1:3,item.labels=mood[1:3]),
omega_b=MCFArel(m3x3.weakInv_noInfl4,level=2,items=1:3,item.labels=mood[1:3])),
data.frame(measure="Calmness",
omega_w=MCFArel(m3x3.weakInv_noInfl4,level=1,items=4:6,item.labels=mood[4:6]),
omega_b=MCFArel(m3x3.weakInv_noInfl4,level=2,items=4:6,item.labels=mood[4:6])),
data.frame(measure="Energetic arousal",
omega_w=MCFArel(m3x3.weakInv_noInfl4,level=1,items=7:9,item.labels=mood[8:9]),
omega_b=MCFArel(m3x3.weakInv_noInfl4,level=2,items=7:9,item.labels=mood[7:9])),
data.frame(measure="Total score",
omega_w=MCFArel(m3x3.weakInv_noInfl4,level=1,items=1:9,item.labels=mood[1:9]),
omega_b=MCFArel(m3x3.weakInv_noInfl4,level=2,items=1:9,item.labels=mood[1:9])))
rbind(data.frame(measure="Hedonic Tone",
omega_w=MCFArel(m3x3.weakInv.fix2,level=1,items=1:3,item.labels=mood[1:3]),
omega_b=MCFArel(m3x3.weakInv.fix2,level=2,items=1:3,item.labels=mood[1:3])),
data.frame(measure="Calmness",
omega_w=MCFArel(m3x3.weakInv.fix2,level=1,items=4:6,item.labels=mood[4:6]),
omega_b=MCFArel(m3x3.weakInv.fix2,level=2,items=4:6,item.labels=mood[4:6])),
data.frame(measure="Energetic arousal",
omega_w=MCFArel(m3x3.weakInv.fix2,level=1,items=7:9,item.labels=mood[8:9]),
omega_b=MCFArel(m3x3.weakInv.fix2,level=2,items=7:9,item.labels=mood[7:9])),
data.frame(measure="Total score",
omega_w=MCFArel(m3x3.weakInv.fix2,level=1,items=1:9,item.labels=mood[1:9]),
omega_b=MCFArel(m3x3.weakInv.fix2,level=2,items=1:9,item.labels=mood[1:9])))
Finally, we create the aggregated score of each mood dimension by
computing the mean of the corresponding items. In
addition to such observed variables, we also save the factor
scores estimated by model m3x3.weakInv.fix2
.
# aggregate scores (mean of observed item scores)
ema$HT <- apply(ema[,c("v1","v2","v3")],1,mean,na.rm=TRUE) # Hedonic Tone
ema$CA <- apply(ema[,c("t1","t2","t3")],1,mean,na.rm=TRUE) # Calmness
ema$EA <- apply(ema[,c("f1","f2","f3")],1,mean,na.rm=TRUE) # Energetic Arousal
# factor scores estimated by model m3x3.weakInv.fix2 at level 1
ema[!is.na(ema$v1),"HT.fs.w"] <- as.data.frame(lavPredict(m3x3.weakInv.fix2,level=1))[,1]
ema[!is.na(ema$v1),"CA.fs.w"] <- as.data.frame(lavPredict(m3x3.weakInv.fix2,level=1))[,2]
ema[!is.na(ema$v1),"EA.fs.w"] <- as.data.frame(lavPredict(m3x3.weakInv.fix2,level=1))[,3]
# factor scores estimated by model m3x3.weakInv.fix2 at level 2
between$HT.fs.b <- as.data.frame(lavPredict(m3x3.weakInv.fix2,level=2))[,1]
between$CA.fs.b <- as.data.frame(lavPredict(m3x3.weakInv.fix2,level=2))[,2]
between$EA.fs.b <- as.data.frame(lavPredict(m3x3.weakInv.fix2,level=2))[,3]
# computing sum of factor scores (cluster mean + within-cluster deviations)
ema <- join(ema,between[,c("ID","HT.fs.b","CA.fs.b","EA.fs.b")],by="ID",type="left")
ema$HT.fs <- ema$HT.fs.b + ema$HT.fs.w
ema$CA.fs <- ema$CA.fs.b + ema$CA.fs.w
ema$EA.fs <- ema$EA.fs.b + ema$EA.fs.w
# removing level-specific components
ema[,c("HT.fs.b","CA.fs.b","EA.fs.b","HT.fs.w","CA.fs.w","EA.fs.w")] <- NULL
Emotional events were measured by asking participants whether the
period since the last data entry was characterized by significant
positive and/or negative events by rating two items on a slider scale
from 1 (“not at all”) to 7 (“a lot”). If significant positive and/or
negative events occurred, they were also asked to rate their intensity
from 1 (“no events”) to 7 (“very intense”), otherwise they were
instructed to respond 1 (“not at all”/”no events”) to all event and
intensity items. Here, we prepare the data to be used in the analysis by
selecting the dataset with all cases with complete responses to any
event item, which we call Event
.
# selecting event item labels
event <- c("PE","PE.int","NE","NE.int")
# selecting dataset of complete responses (listwise deletion)
Event <- na.omit(ema[,c("ID",event)])
cat("Analysing",nrow(Event),"complete responses from",nlevels(as.factor(as.character(Event$ID))),"participants")
## Analysing 1578 complete responses from 97 participants
First, we consider descriptive statistics in raw item scores, in terms of score distribution, ICC, and correlations.
First, we inspect the score distribution of each event item using the
MVN
package. We can note that event items are
strongly skewed, with most ratings being 1.
mvn(data=Event[,event],univariatePlot="histogram")[4] # histogram events
## $<NA>
## NULL
mvn(data=Event[,event],univariatePlot="qqplot")[4] # qqnorm events
## $<NA>
## NULL
# frequency of responses
for(ev in event){ cat("\n",ev,"\n"); print(summary(as.factor(ema[,ev]))) }
##
## PE
## 1 2 3 4 5 6 7 NA's
## 908 290 169 110 101 20 1 438
##
## PE.int
## 1 2 3 4 5 6 7 NA's
## 939 291 179 100 67 20 1 440
##
## NE
## 1 2 3 4 5 6 7 NA's
## 1173 236 90 59 23 7 1 448
##
## NE.int
## 1 2 3 4 5 6 7 NA's
## 1181 195 92 60 33 15 5 456
Second, we evaluate the balance between inter- and intraindividual
variability in each item score by computing the intraclass correlation
coefficients (ICC) with the itemsICC
function showed above.
We can note that all ICCs are equal to or below .27, suggesting that
most variance is at level 1 (within) but there is still
substantial interindividual variability to consider multilevel
modeling.
itemsICC(data=Event,items=event)
## PE ICC = 0.264
## PE.int ICC = 0.197
## NE ICC = 0.197
## NE.int ICC = 0.159
Third, we visualize the correlation matrices of event item scores
with the corr.matrices
function showed above, also
considering the correlations between event and mood items. As done in
section 2.1.1.3, three matrices are generated:
HT
and CA
, but not
EA
), although weak positive correlations are estimated
between positive events and HT
.corr.matrices(data=ema,vars=c("HT","CA","EA",event),matrix.type="pooled")
HT
and CA
, but not EA
) are only shown by negative
events.corr.matrices(data=ema,vars=c("HT","CA","EA",event),matrix.type="between")
HT
, corroborating the idea that emotional
events are mainly intraindividual variables.corr.matrices(data=ema,vars=c("HT","CA","EA",event),matrix.type="within")
With event items, participants were instructed to rate both valence and intensity as 1 when no events occurred, whereas both items were presumably rated as > 1 when some events occurred. Here, we inspect the inconsistencies between event valence and intensity items for both positive and negative events. We can note a small number of inconsistencies from several participants.
# PE inconsistencies between valence and intensity
nrow(ema[!is.na(ema$PE)&!is.na(ema$PE.int) & ema$PE>1 & ema$PE.int==1,]) # 58 with PE > 1 but PE.int = 1
## [1] 58
nrow(ema[!is.na(ema$PE)&!is.na(ema$PE.int) & ema$PE==1 & ema$PE.int>1,]) # 27 with PE = 1 but PE.int > 1
## [1] 27
nlevels(as.factor(as.character(ema[!is.na(ema$PE)&!is.na(ema$PE.int)
& ((ema$PE>1 & ema$PE.int==1) | (ema$PE==1 & ema$PE.int>1)),"ID"]))) # 45 part. with 1+ inc.
## [1] 45
nrow(ema[!is.na(ema$NE)&!is.na(ema$NE.int) & ema$NE>1 & ema$NE.int==1,]) # 38 with NE > 1 but NE.int = 1
## [1] 38
nrow(ema[!is.na(ema$NE)&!is.na(ema$NE.int) & ema$NE==1 & ema$NE.int>1,]) # 23 with NE = 1 but PE.int > 1
## [1] 23
nlevels(as.factor(as.character(ema[!is.na(ema$NE)&!is.na(ema$NE.int)
& ((ema$NE>1 & ema$NE.int==1) | (ema$NE==1 & ema$NE.int>1)),"ID"]))) # 35 part. with 1+ inc.
## [1] 35
Due to the strongly skewed distributions of event item scores, their
variability being mainly at the intraindividual level and the
inconsistencies highlighted above, we do not compute MCFA-based and
reliability indices to support the aggregation of event items by
averaging item ratings. That is, we question the adequacy of our initial
idea of considering these variables as ordinal/continuous, but we
proceed by dychotomizing them. More specifically, we
create two dichotomic variables PosEv
and
NegEv
with possible scores 0 (both valence and intensity =
1) or 1 (either valence or intensity > 1).
# PosEv: 0 if both valence and intensity = 1, 1 otherwise
ema[(!is.na(ema$PE) & ema$PE==1) | (!is.na(ema$PE.int) & ema$PE.int==1),"PosEv"] <- 0
ema[(!is.na(ema$PE) & ema$PE>1) | (!is.na(ema$PE.int) & ema$PE.int>1),"PosEv"] <- 1
# NegEv: 0 if both valence and intensity = 1, 1 otherwise
ema[(!is.na(ema$NE) & ema$NE==1) | (!is.na(ema$NE.int) & ema$NE.int==1),"NegEv"] <- 0
ema[(!is.na(ema$NE) & ema$NE>1) | (!is.na(ema$NE.int) & ema$NE.int>1),"NegEv"] <- 1
# summary
ema[,c("PosEv","NegEv")] <- lapply(ema[,c("PosEv","NegEv")],as.factor)
summary(ema[,c("PE","PE.int","PosEv","NE","NE.int","NegEv")])
## PE PE.int PosEv NE NE.int
## Min. :1.000 Min. :1.000 0 :881 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1 :718 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :1.000 NA's:438 Median :1.000 Median :1.000
## Mean :1.918 Mean :1.828 Mean :1.457 Mean :1.504
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## NA's :438 NA's :440 NA's :448 NA's :456
## NegEv
## 0 :1151
## 1 : 440
## NA's: 446
##
##
##
##
# removing raw item scores
ema[,c("PE","PE.int","NE","NE.int")] <- NULL
ARRIVATO QUI MA BISOGNA SISTEMARE LE FUNZIONI
Here, descriptive statistics are computed on the selected subsample (N = 97) using the following functions:
multidesc
#' @title Printing descriptive statistics of multilevel variables
#' @param datasets = list of datasets to be used: occasion, block, between, gng
#' @param EMA = ESM and HRV data
#' @param GNG = GNG data
#' @param betw = between-only variables
multidesc <- function(datasets=NA,
EMA=c("RMSSD","PI","HT","CA","EA"),
GNG=c("RT","ICV","NoGo.ACC","Go.ACC"),
betw=NA){ require(Rmisc); require(lme4)
# HRV and ESM data
out <- data.frame(Measure=EMA[1],
N=summarySE(datasets[[1]],EMA[1],na.rm=TRUE)[,2],
Mean=paste(round(summarySE(datasets[[1]],EMA[1],na.rm=TRUE)[,3],2)," (",
round(summarySE(datasets[[1]],EMA[1],na.rm=TRUE)[,4],2),")",sep=""))
for(i in 2:length(EMA)){
out <- rbind(out,
data.frame(Measure=EMA[i],
N=summarySE(datasets[[1]],EMA[i],na.rm=TRUE)[,2],
Mean=paste(round(summarySE(datasets[[1]],EMA[i],na.rm=TRUE)[,3],2)," (",
round(summarySE(datasets[[1]],EMA[i],na.rm=TRUE)[,4],2),")",sep="")))}
# GNG data
for(i in 1:length(GNG)){
out <- rbind(out,
data.frame(Measure=GNG[i],
N=summarySE(datasets[[2]],GNG[i],na.rm=TRUE)[,2],
Mean=paste(round(summarySE(datasets[[2]],GNG[i],na.rm=TRUE)[,3],2)," (",
round(summarySE(datasets[[2]],GNG[i],na.rm=TRUE)[,4],2),")",sep="")))}
# number of considered trials
out[out$Measure=="NoGo.ACC","N"] <- nrow(datasets[[4]][datasets[[4]]$TrialType=="No-go",])
out[out$Measure=="Go.ACC"|out$Measure=="RT","N"] <- nrow(datasets[[4]][datasets[[4]]$TrialType=="Go",])
# PrelQS data
for(i in 1:length(betw)){
out <- rbind(out,
data.frame(Measure=betw[i],
N=summarySE(datasets[[3]],betw[i],na.rm=TRUE)[,2],
Mean=paste(round(summarySE(datasets[[3]],betw[i],na.rm=TRUE)[,3],2)," (",
round(summarySE(datasets[[3]],betw[i],na.rm=TRUE)[,4],2),")",sep="")))}
# ICC
out$ICC <- NA
for(i in 1:length(EMA)){
m <- lmer(formula=gsub("var",EMA[i],"var~(1|ID)"),data=datasets[[1]]) # VAR_between / (VAR_between + VAR_within)
out[out$Measure==EMA[i],"ICC"] <- round(as.data.frame(VarCorr(m))[1,4]/
(as.data.frame(VarCorr(m))[1,4]+as.data.frame(VarCorr(m))[2,4]),2)}
for(i in 1:length(GNG)){
m <- lmer(formula=gsub("var",GNG[i],"var~(1|ID)"),data=datasets[[2]]) # VAR_between / (VAR_between + VAR_within)
out[out$Measure==GNG[i],"ICC"] <- round(as.data.frame(VarCorr(m))[1,4]/
(as.data.frame(VarCorr(m))[1,4]+as.data.frame(VarCorr(m))[2,4]),2)}
rownames(out) <- gsub(".cm","",rownames(out))
return(out)}
Reports Mean (SD) and intraclass correlation coefficients (ICCs) of the included variables
multicorr
#' @title Compute inter and intraindividual correlations between multilevel variables
#' @param datasets = list of datasets to be used: [[1]] occasion-by occasion, [[2]] = block, [[3]] = between, [[4]] = gng
#' @param EMA = ESM and HRV data
#' @param GNG = GNG data
#' @param betw = between-only variables
multicorr <- function(datasets=list(ema,block,between,gng),
EMA=c("RMSSD","HR","HT","TA","EA"),
GNG=c("RT","ICV","NoGo.ACC","Go.ACC"),
betw=c("DERS.tot","BIS.tot")){ require(Hmisc); require(dplyr); require(Rmisc)
# between-subjects correlations (all variables)
out.b <- rcorr(as.matrix(datasets[[3]][,c(paste(EMA,".cm",sep=""),GNG,betw)]), type = "pearson")
rb <- round(out.b$r,2)
rb[lower.tri(rb)] <- NA
# within-participant correlations (HRV and ESM deviations from individual mean)
out.w <- rcorr(as.matrix(datasets[[1]][,paste(EMA,".dm",sep="")]), type = "pearson")
rw <- round(out.w$r,2)
rw[upper.tri(rw)] <- NA
# computing HRV and ESM deviations from daily mean
dd.esm <- datasets[[1]] %>%
group_by(ID,day) %>% # grouping by ID and day
mutate(PI.md=mean(na.omit(PI)), HR.md=mean(na.omit(HR)), RMSSD.md=mean(na.omit(RMSSD)), # daily means
RMSSDcv.md=mean(na.omit(RMSSDcv)),HT.md=mean(na.omit(HT)),EA.md=mean(na.omit(EA)),
TA.md=mean(na.omit(TA)),
PI.dd=PI.md-PI.cm,HR.dd=HR.md-HR.cm, RMSSD.dd=RMSSD.md-RMSSD.cm,RMSSDcv.dd=RMSSDcv.md-RMSSDcv.cm, # daily deviations
HT.dd=HT.md-HT.cm,EA.dd=EA.md-EA.cm,TA.dd=TA.md-TA.cm,)
GNGdata <- datasets[[4]]
dd.gng <- cbind(summarySE(GNGdata[GNGdata$TrialType=="Go",],measurevar="ACC",groupvars=c("ID","day"))[,c("ID","day","ACC")],
summarySE(GNGdata[GNGdata$TrialType=="No-go",],measurevar="ACC",groupvars=c("ID","day"))[,"ACC"],
summarySE(GNGdata[GNGdata$TrialType=="Go" & (GNGdata$ACC==1 | is.na(GNGdata$ACC)),],
measurevar="RT",groupvars=c("ID","day"))[,c("RT","sd")]) # keep RT.sd
colnames(dd.gng)[3:6] <- c("Go.ACC.md","NoGo.ACC.md","RT.md","RT.sd.md")
dd.gng$ICV.md <- dd.gng$RT.sd.md/dd.gng$RT.md # ICV as RT.sd / mean RT
dd.gng <- plyr::join(dd.gng,between[,c("ID",GNG)],by="ID") # attaching individual means
dd.gng$NoGo.ACC.dd <- dd.gng$NoGo.ACC.md - dd.gng$NoGo.ACC # computing daily deviations
dd.gng$ICV.dd <- dd.gng$ICV.md - dd.gng$ICV
dd.gng$RT.dd <- dd.gng$RT.md - dd.gng$RT
dd.gng$Go.ACC.dd <- dd.gng$Go.ACC.md - dd.gng$Go.ACC
# joining dd.ema and dd.gng and computing within-participant correlation (HRV, ESM and GNG daily deviations)
# nrow(dd.gng)==nrow(dd.esm[dd.esm$within.day==1,]) # sanity check
dd <- plyr::join(dd.gng,dd.esm[dd.esm$within.day==1,],by=c("ID","day"),type="full")
out.dd <- rcorr(as.matrix(dd[,c(paste(EMA,".dd",sep=""),paste(GNG,".dd",sep=""))]),type="pearson")
rdd <- round(out.dd$r,2)
rdd[upper.tri(rdd)] <- NA
rdd <- rdd[(nrow(rdd)-3):nrow(rdd),]
# filling rb empty cells
rb[1:length(EMA),1:length(EMA)][lower.tri(rb[1:length(EMA),1:length(EMA)])] <- rw[lower.tri(rw)]
rb[(length(EMA)+1):(length(EMA)+length(GNG)),1:length(EMA)] <- rdd[,1:length(EMA)]
rb[(length(EMA)+1):(length(EMA)+length(GNG)),
(length(EMA)+1):(length(EMA)+length(GNG))][lower.tri(rb[(length(EMA)+1):(length(EMA)+length(GNG)),
(length(EMA)+1):(length(EMA)+length(GNG))])] <-
rdd[,(length(EMA)+1):(length(EMA)+length(GNG))][lower.tri(rdd[,(length(EMA)+1):(length(EMA)+length(GNG))])]
out <- rb
rownames(out) <- gsub(".cm","",rownames(out))
return(t(out))}
Computes inter and intraindividual correlations between the included variables.
First, we inspect the sample size for each type of considered variables.
TO DO
Second, we inspect
# desc <- multidesc(datasets=list(ema,block,between,gng))
Hox, J. J. (2010). Multilevel Analysis: Techniques and Applications (2nd ed.). Routledge.
Jak, S., & Jorgensen, T. D. (2017). Relating Measurement Invariance, Cross-Level Invariance, and Multilevel Reliability. Frontiers in Psychology, 8(OCT), 1–9. https://doi.org/10.3389/fpsyg.2017.01640
Menghini, L., Pastore, M., & Balducci, C. (2022). Workplace stress in real time: Three parsimonious scales for the experience sampling measurement of stressors and strain at work. European Journal of Psychological Assessment. Advanced online publication. https://doi.org/10.1027/1015-5759/a000725 full text
Wilhelm, P., & Schoebi, D. (2007). Assessing mood in daily life: Structural validity, sensitivity to change, and reliability of a short-scale to measure three basic dimensions of mood. European Journal of Psychological Assessment, 23(4), 258–267. https://doi.org/10.1027/1015-5759.23.4.258