4. Main analyses
Here, we evaluate the following psychometric proprieties of the considered ESM scales:
Multilevel Confirmatory Factor Analysis (MCFA), evaluating the factor loadings at both levels, and the assumption of cross-level isomorphism
Reliability Analysis, evaluating the ability of each scale to measure true differences between and within participants in each measured variable
Convergent Validity, evaluating the relationship between individual aggregates of ESM scores, and retrospective measures of the same or similar constructs (i.e., measured with the preliminary questionnaire)
Sensitivity to contextual factors, evaluating differences across time and work task characteristics in each measured variable
4.1. Data preparation
First, we prepare three datasets to be used in the following analyses, for each subsample:
SUBSAMPLE s1
TD
is the dataset including the four Task Demand Scale items and participantsID
s. Only occasions whereSurveyType = "work"
are considered.
(TD.s1 <- s1.w[s1.w$SurveyType=="work",c("ID","d1","d2","d3","d4")])[1:3,] # showing first 3 rows
TC
is the dataset including the three Task Control Scale items and participantsID
s. Only occasions whereSurveyType = "work"
are considered.
(TC.s1 <- s1.w[s1.w$SurveyType=="work",c("ID","c1","c2","c3")])[1:3,] # showing first 3 rows
- Finally,
Mood
is the dataset including the nine MDMQ items and participantsIDs
. All occasions are considered.
(Mood.s1 <- s1.w[,c("ID",
colnames(s1.w)[which(colnames(s1.w)=="v1"):which(colnames(s1.w)=="f3")])])[1:3,] # showing first 3 rows
SUBSAMPLE s2
TD.s2 <- s2.w[s2.w$SurveyType=="work",c("ID","d1","d2","d3","d4")]
TC.s2 <- s2.w[s2.w$SurveyType=="work",c("ID","c1","c2","c3")]
Mood.s2 <- s2.w[,c("ID",colnames(s2.w)[which(colnames(s2.w)=="v1"):which(colnames(s2.w)=="f3")])]
SUBSAMPLE s3
TD.s3 <- s3.w[s3.w$SurveyType=="work",c("ID","d1","d2","d3","d4")]
TC.s3 <- s3.w[s3.w$SurveyType=="work",c("ID","c1","c2","c3")]
Mood.s3 <- s3.w[,c("ID",colnames(s3.w)[which(colnames(s3.w)=="v1"):which(colnames(s3.w)=="f3")])]
4.2. Item descriptives
Here, we consider descriptive statistics of raw item scores.
PIPELINE & FUNCTIONS
The following functions are used to summarize missing data, distributions and correlations:
item.desc()
item.desc <- function(data,items,output="text",digits=2){ require(lme4); library(MVN)
res <- data.frame(item=NA,icc=NA)
for(i in 1:length(items)){
# LMER with random effects only
m <- lmer(formula=gsub("d1",items[i],"d1~(1|ID)"),data=data) # VAR_between / (VAR_between + VAR_within)
out <- round(as.data.frame(VarCorr(m))[1,4]/(as.data.frame(VarCorr(m))[1,4]+as.data.frame(VarCorr(m))[2,4]),digits)
# textual output or data.frame
if(output=="text"){cat(items[i],"ICC =",out,"\n")
}else{ res <- rbind(res,cbind(item=items[i],icc=out)) }}
# plotting item scores distributions
mvn(data = data[,items], univariatePlot = "histogram")[4]
mvn(data = data[,items], univariatePlot = "qqplot")[4]
if(output!="text"){return(res) }}
computes items ICCs from a random-intercept model, and plots items scores distributions (histograms and qqplots)
corr.matrices()
corr.matrices <- function(data=data,text=TRUE,
vars=c("v1","v2","v3","t1","t2","t3","f1","f2","f3","d1","d2","d3","d4","c1","c2","c3"),
IDs=c("ID","day","within.day")){ require(ggplot2); require(dplyr); require(reshape2)
data <- data[,c(IDs,vars)]
data <- data %>%
group_by(ID) %>% # grouping by ID
mutate(v1.cm = mean(na.omit(v1)), # computing individual mean for any given item
v2.cm = mean(na.omit(v2)),v3.cm = mean(na.omit(v3)),
t1.cm = mean(na.omit(t1)),t2.cm = mean(na.omit(t2)),t3.cm = mean(na.omit(t3)),
f1.cm = mean(na.omit(f1)),f2.cm = mean(na.omit(f2)),f3.cm = mean(na.omit(f3)),
d1.cm = mean(na.omit(d1)),d2.cm = mean(na.omit(d2)),d3.cm = mean(na.omit(d3)),
d4.cm = mean(na.omit(d4)),c1.cm = mean(na.omit(c1)),c2.cm = mean(na.omit(c2)),
c3.cm = mean(na.omit(c3)),
v1.dm = v1-v1.cm, # computing within-individual deviations (occasional score - mean score)
v2.dm = v2-v2.cm,v3.dm = v3-v3.cm,t1.dm = t1-t1.cm,t2.dm = t2-t2.cm,t3.dm = t3-t3.cm,
f1.dm = f1-f1.cm,f2.dm = f2-f2.cm,f3.dm = f3-f3.cm,
d1.dm = d1-d1.cm,d2.dm = d2-d2.cm,d3.dm = d3-d3.cm,d4.dm = d4-d4.cm,
c1.dm = c1-c1.cm,c2.dm = c2-c2.cm,c3.dm = c3-c3.cm)
# Matrix 1 (all scores as independent)
p1 <- ggplot(melt(cor(data[,4:19],data[,4:19],use="complete.obs",method="pearson")),
aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
ggtitle("Correlation Matrix 1 (all independent)")+labs(x="",y="")+
scale_fill_gradient2(low="darkblue",high="#f03b20",
mid="white",midpoint=0,
limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation",
guide="legend",
breaks=round(seq(1,-1,length.out = 11),2),
minor_breaks=round(seq(1,-1,length.out = 11),2))+
theme(text=element_text(face="bold",size=14))
if(text==TRUE){ p1 <- p1 + geom_text(aes(x = Var1, y = Var2, label = round(value,2)),color="black",size=3.5)}
# Matrix 2 (individual means)
DATA <- data[1,]
for(ID in levels(as.factor(as.character(data$ID)))[2:length(levels(as.factor(as.character(data$ID))))]){
DATA <- rbind(DATA,data[data$ID==ID,][1,])}
DATA <- data[data$day==1 & data$within.day==1,]
p2 <- ggplot(melt(cor(DATA[,20:35],DATA[,20:35],use="complete.obs",method="pearson")),
aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
ggtitle("Correlation Matrix 2 (individual means)")+labs(x="",y="")+
scale_fill_gradient2(low="darkblue",high="#f03b20",
mid="white",midpoint=0,
limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation",
guide="legend",
breaks=round(seq(1,-1,length.out = 11),2),
minor_breaks=round(seq(1,-1,length.out = 11),2))+
theme(text=element_text(face="bold",size=10))
if(text==TRUE){ p2 <- p2 + geom_text(aes(x = Var1, y = Var2, label = round(value,2)),color="black",size=3.5)}
# Matrix 3 (deviations from individual means)
p3 <- ggplot(data = melt(cor(data[,36:ncol(data)],data[,36:ncol(data)],use="complete.obs",method="pearson")),
aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
ggtitle("Correlation Matrix 3 (deviations from individual means)")+labs(x="",y="")+
scale_fill_gradient2(low="darkblue",high="#f03b20",
mid="white",midpoint=0,
limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation",
guide="legend",
breaks=round(seq(1,-1,length.out = 11),2),
minor_breaks=round(seq(1,-1,length.out = 11),2))+
theme(text=element_text(face="bold",size=10))
if(text==TRUE){ p3 <- p3 + geom_text(aes(x = Var1, y = Var2, label = round(value,2)),color="black",size=3.5)}
return(list(p1,p2,p3))}
visualizes the correlation matrices computed from the whole data points (all treated as independent), from the average scores (between-individuals) and from the mean-centered scores (within-individual).
SUBSAMPLE s1
MISSINGNESS
First, we inspect missing data once again.
library(dplyr); library(tidyr)
# TD missingness
TD.s1 %>% select(d1:d4) %>% gather("Variable", "value") %>% group_by(Variable) %>%
summarise(Missing=length(which(is.na(value))),'% Missing'=round(100*length(which(is.na(value)))/n(),2))
# TC missingness
TC.s1 %>% select(c1:c3) %>% gather("Variable", "value") %>% group_by(Variable) %>%
summarise(Missing=length(which(is.na(value))),'% Missing'=round(100*length(which(is.na(value)))/n(),2))
# Mood missingness
Mood.s1 %>% select(v1:f3) %>% gather("Variable", "value") %>% group_by(Variable) %>%
summarise(Missing=length(which(is.na(value))),'% Missing'=round(100*length(which(is.na(value)))/n(),2))
# detaching packages
detach("package:dplyr", unload=TRUE);detach("package:tidyr", unload=TRUE)
Comments:
as noted above, missing data are mainly in the last items of ESM questionnaires
in all cases, the percentage of missing data can be considered very low
ICC & DISTRIBUTIONS
Then, we take a look at the item scores distribution and variance partitioning, as normal distributions with substantial within-individual variability are assumed by the following MCFA models. Within-subject variability is evaluated using the intraclass correlation coefficients (ICCs), whereas the raw scores and the residuals distributions are evaluated graphically.
TD
item.desc(TD.s1,items=c("d1","d2","d3","d4"),digits=3)
## d1 ICC = 0.329
## d2 ICC = 0.327
## d3 ICC = 0.336
## d4 ICC = 0.358
Comments:
Aall ICCs are below .4, suggesting that most variance is on level 1 (within), but variance on level 2 is still relevant. Thus, we are justified to use a multilevel approach
all items are quite normally distributed, with a little skewness on the left (especially
d3
). Items show the typical “discrete” patterns of ordinal scales
TC
item.desc(TC.s1,items=c("c1","c2","c3"),digits=3)
## c1 ICC = 0.392
## c2 ICC = 0.368
## c3 ICC = 0.35
Comments:
all ICCs are below .4, suggesting that most variance is on level 1 (within), but variance on level 2 is still relevant. Thus, we are justified to use a multilevel approach
items are quite normally distributed, with a little skewness on the left (especially
c1
). Items show the typical “discrete” patterns of ordinal scales
Mood
item.desc(Mood.s1,items=colnames(Mood.s1)[2:10],digits=3)
## v1 ICC = 0.319
## v2 ICC = 0.327
## v3 ICC = 0.375
## t1 ICC = 0.267
## t2 ICC = 0.393
## t3 ICC = 0.405
## f1 ICC = 0.278
## f2 ICC = 0.228
## f3 ICC = 0.273
*Comments:
all ICCs are equal to or below .40, suggesting that most variance is on level 1 (within), but variance on level 2 is still relevant. Thus, we are justified to use a multilevel approach
t2
andt3
are the items showing the highest variance between, whereasf1
,f2
, andf3
are the items showing the highest variance withinitems are quite normally distributed, with a little skewness on the left (especially
v1
,v3
,t1
, andt3
). Items show the typical “discrete” patterns of ordinal scales
CORRELATIONS
Then, we visualize the correlation matrices with the corr.matrices function. Three correlation matrices are computed in the following ways:
- considering all item scores as independent, using all available raw scores.
corr.matrices(data=s1.w)[[1]]
- considering averaged scores average scores, that is one observation per participant (i.e., between-subjects matrix).
corr.matrices(data=s1.w)[[2]]
- considering mean-centered scores (i.e., within-subject matrix).
corr.matrices(data=s1.w)[[3]]
Comments:
we can see that correlations are in the expected directions
Task Demand and Task Control dimensions were more clearly distinguishable than Mood dimensions, with some correlations between items score measuring Negative Valence and Tense Arousal being higher than those between items assumed to measure the same dimension (note that it is difficult to discriminate between the two dimensions)
the latter also showed negative correlations with Task Control item scores, whereas Fatigue item scores showed weaker correlations with Task Demand items
correlations between mean and mean-centered scores were in the same directions, but showed, respectively, higher and lower values than those computed from the raw scores
SUBSAMPLE s2
MISSINGNESS
First, we inspect missing data once again.
library(dplyr); library(tidyr)
# TD missingness
TD.s2 %>% select(d1:d4) %>% gather("Variable", "value") %>% group_by(Variable) %>%
summarise(Missing=length(which(is.na(value))),'% Missing'=round(100*length(which(is.na(value)))/n(),2))
# TC missingness
TC.s2 %>% select(c1:c3) %>% gather("Variable", "value") %>% group_by(Variable) %>%
summarise(Missing=length(which(is.na(value))),'% Missing'=round(100*length(which(is.na(value)))/n(),2))
# Mood missingness
Mood.s2 %>% select(v1:f3) %>% gather("Variable", "value") %>% group_by(Variable) %>%
summarise(Missing=length(which(is.na(value))),'% Missing'=round(100*length(which(is.na(value)))/n(),2))
# detaching packages
detach("package:dplyr", unload=TRUE);detach("package:tidyr", unload=TRUE)
Comments:
as noted above, missing data are mainly in the last items of ESM questionnaires
in all cases, the percentage of missing data can be considered very low
ICC & DISTRIBUTIONS
Then, we take a look at the item scores distribution and variance partitioning, as normal distributions with substantial within-individual variability are assumed by the following MCFA models. Within-subject variability is evaluated using the intraclass correlation coefficients (ICCs), whereas the raw scores and the residuals distributions are evaluated graphically.
TD
item.desc(TD.s2,items=c("d1","d2","d3","d4"),digits=3)
## d1 ICC = 0.322
## d2 ICC = 0.342
## d3 ICC = 0.338
## d4 ICC = 0.372
Comments:
all ICCs are below .4, suggesting that most variance is on level 1 (within), but variance on level 2 is still relevant. Thus, we are justified to use a multilevel approach
all items are quite normally distributed, with a little skewness on the left (especially
d3
). Items show the typical “discrete” patterns of ordinal scales
TC
item.desc(TC.s2,items=c("c1","c2","c3"),digits=3)
## c1 ICC = 0.375
## c2 ICC = 0.328
## c3 ICC = 0.329
Comments:
all ICCs are below .4, suggesting that most variance is on level 1 (within), but variance on level 2 is still relevant. Thus, we are justified to use a multilevel approach
items are quite normally distributed, with a little skewness on the left (especially
c1
). Items show the typical “discrete” patterns of ordinal scales
Mood
item.desc(Mood.s2,items=colnames(Mood.s2)[2:10],digits=3)
## v1 ICC = 0.336
## v2 ICC = 0.352
## v3 ICC = 0.414
## t1 ICC = 0.291
## t2 ICC = 0.433
## t3 ICC = 0.43
## f1 ICC = 0.283
## f2 ICC = 0.234
## f3 ICC = 0.288
Comments:
all ICCs are equal to or below .40, suggesting that most variance is on level 1 (within), but variance on level 2 is still relevant. Thus, we are justified to use a multilevel approach
t2
andt3
are the items showing the highest variance between, whereasf1
,f2
, andf3
are the items showing the highest variance withinitems are quite normally distributed, with a little skewness on the left (especially
v1
,v3
,t1
, andt3
). Items show the typical “discrete” patterns of ordinal scales
CORRELATIONS
corr.matrices(data=s2.w)
## [[1]]
##
## [[2]]
##
## [[3]]
Comments:
- results are highly consistent with those shown for
the subsample
s1
SUBSAMPLE s3
MISSINGNESS
First, we inspect missing data once again.
library(dplyr); library(tidyr)
# TD missingness
TD.s3 %>% select(d1:d4) %>% gather("Variable", "value") %>% group_by(Variable) %>%
summarise(Missing=length(which(is.na(value))),'% Missing'=round(100*length(which(is.na(value)))/n(),2))
# TC missingness
TC.s3 %>% select(c1:c3) %>% gather("Variable", "value") %>% group_by(Variable) %>%
summarise(Missing=length(which(is.na(value))),'% Missing'=round(100*length(which(is.na(value)))/n(),2))
# Mood missingness
Mood.s3 %>% select(v1:f3) %>% gather("Variable", "value") %>% group_by(Variable) %>%
summarise(Missing=length(which(is.na(value))),'% Missing'=round(100*length(which(is.na(value)))/n(),2))
# detaching packages
detach("package:dplyr", unload=TRUE);detach("package:tidyr", unload=TRUE)
Comments:
as noted above, missing data are mainly in the last items of ESM questionnaires
in all cases, the percentage of missing data can be considered very low
ICC & DISTRIBUTIONS
Then, we take a look at the item scores distribution and variance partitioning, as normal distributions with substantial within-individual variability are assumed by the following MCFA models. Within-subject variability is evaluated using the intraclass correlation coefficients (ICCs), whereas the raw scores and the residuals distributions are evaluated graphically.
TD
item.desc(TD.s3,items=c("d1","d2","d3","d4"),digits=3)
## d1 ICC = 0.348
## d2 ICC = 0.345
## d3 ICC = 0.342
## d4 ICC = 0.372
Comments:
all ICCs are below .4, suggesting that most variance is on level 1 (within), but variance on level 2 is still relevant. Thus, we are justified to use a multilevel approach
all items are quite normally distributed, with a little skewness on the left (especially
d3
). Items show the typical “discrete” patterns of ordinal scales
TC
item.desc(TC.s3,items=c("c1","c2","c3"),digits=3)
## c1 ICC = 0.379
## c2 ICC = 0.368
## c3 ICC = 0.352
Comments:
all ICCs are below .4, suggesting that most variance is on level 1 (within), but variance on level 2 is still relevant. Thus, we are justified to use a multilevel approach
items are quite normally distributed, with a little skewness on the left (especially
c1
). Items show the typical “discrete” patterns of ordinal scales
Mood
item.desc(Mood.s3,items=colnames(Mood.s3)[2:10],digits=3)
## v1 ICC = 0.335
## v2 ICC = 0.335
## v3 ICC = 0.402
## t1 ICC = 0.278
## t2 ICC = 0.41
## t3 ICC = 0.42
## f1 ICC = 0.289
## f2 ICC = 0.257
## f3 ICC = 0.291
Comments:
all ICCs are equal to or below .40, suggesting that most variance is on level 1 (within), but variance on level 2 is still relevant. Thus, we are justified to use a multilevel approach
t2
andt3
are the items showing the highest variance between, whereasf1
,f2
, andf3
are the items showing the highest variance withinitems are quite normally distributed, with a little skewness on the left (especially
v1
,v3
,t1
, andt3
). Items show the typical “discrete” patterns of ordinal scales
CORRELATIONS
corr.matrices(data=s3.w)
## [[1]]
##
## [[2]]
##
## [[3]]
Comments:
- results are highly consistent with those shown for
the subsample
s1
4.3. MCFA
Here, a multilevel confirmatory factor analysis (MCFA) is performed on TDS, TCS and MDMQ items.
PIPELINE & FUNCTIONS
The measurement models of each included scale is evaluated through the following steps:
The hypothesized multilevel models and alternative models are specified, in addition to a set of preliminary benchmark models (following Hox, 2010, chapter 14) and models assuming cross-level invariance (following Jak & Jorgensen, 2017)
Eventual problems of non-convergence or Heywood cases (improper solutions) are handled
An Influential analysis (i.e., iterative estimation of the parameters with and without any participant) is performed. Participants leading to changes higher than .1 in one or more standardized loadings while showing implausible patterns of responses (e.g., extreme opposite answers to items measuring the same dimension) are excluded from the analysis
A model comparison is performed between the specified models using fit indices and information criteria.
The following packages and functions are used.
library(lavaan); library(MuMIn); library(lme4); library(gridExtra); library(tcltk)
loadings()
#' @title Summarizing standardized loadings from a multilevel CFA model
#' @param model = multilevel CFA model.
#' @param st = Character indicating the standardization level of loadings: "st.all" or "st.lv".
loadings <- function(model=NA,st="st.all"){ require(lavaan)
if(st=="st.all"){ LOADs <- standardizedsolution(model)
} else if(st=="st.lv"){ LOADs <- standardizedsolution(model,type="st.lv")
} else{ LOADs <- parameterestimates(model) }
return(LOADs[LOADs$op=="=~",])}
Print the standardized factor loadings of an ordinary or a multilevel CFA model.
fit.ind()
fit.ind <- function(model=NA,from_summary=FALSE,type="multilevel",models.names=NA,
fits=c("npar","chisq","df","pvalue","rmsea","cfi","srmr_within","srmr_between")){
require(lavaan); require(MuMIn)
# removing level-specific fit indices when model is "monolevel"
if(type=="monolevel"){
fits <- gsub("srmr_within","srmr",fits)
fits <- fits[fits!="srmr_between"] }
if(from_summary==FALSE){
# returning dataframe of models fit indices when more than one model is considered
if(length(model)>1){
fit.indices <- fitmeasures(model[[1]])[fits]
for(i in 2:length(model)){
fit.indices <- rbind(fit.indices,fitmeasures(model[[i]])[fits]) }
if(!is.na(models.names[1])){
row.names(fit.indices) <- models.names }
return(as.data.frame(fit.indices))
} else { return(fitmeasures(model)[fits]) }
} else { # in some cases the fit indices are available only from the model's summary
quiet <- function(fit) { # this was written by Alicia FRANCO MARTÍNEZ on the lavaan Google group
sink(tempfile())
on.exit(sink())
invisible(summary(fit, standardized = TRUE, fit.measures=TRUE)) }
sum <- quiet(model)
fit.indices <- sum$FIT[fits]
return(fit.indices)}}
Prints fit indices of one or more CFA models. Note: According to the criteria proposed by Hu and Bentler (1999), we consider RMSEA ≤ .06, CFI ≥ .95, and SRMR ≤ .08 as indicative of adequate fit.
mcfa.Huang()
mcfa.Huang<-function(gp,dat){
dat1<-dat[complete.cases(dat),]
g<-dat1[,gp] #grouping
freq<-data.frame(table(g))
gn<-grep(gp,names(dat1)) #which column number is the grouping var
dat2<-dat1[,-gn] #raw only
G<-length(table(g))
n<-nrow(dat2)
k<-ncol(dat2)
scaling<-(n^2-sum(freq$Freq^2)) / (n*(G-1))
varn<-names(dat1[,-gn])
ms<-matrix(0,n,k)
for (i in 1:k){
ms[,i]<-ave(dat2[,i],g)
}
cs<-dat2-ms #deviation matrix, centered scores
colnames(ms)<-colnames(cs)<-varn
b.cov<-(cov(ms) * (n - 1))/(G-1) #group level cov matrix
w.cov<-(cov(cs) * (n - 1))/(n-G) #individual level cov matrix
pb.cov<-(b.cov-w.cov)/scaling #estimate of pure/adjusted between cov matrix
w.cor<-cov2cor(w.cov) #individual level cor matrix
b.cor<-cov2cor(b.cov) #group level cor matrix
pb.cor<-cov2cor(pb.cov) #estimate of pure between cor matrix
icc<-round(diag(pb.cov)/(diag(w.cov)+diag(pb.cov)),3) #iccs
return(list(b.cov=b.cov,pw.cov=w.cov,ab.cov=pb.cov,pw.cor=w.cor,
b.cor=b.cor,ab.cor=pb.cor,
n=n,G=G,c.=scaling,sqc=sqrt(scaling),
icc=icc,dfw=n-G,dfb=G)) }
allows to generate the pooled within-subject covariance matrix. Note: function created by Francis L. Huang (University of Missouri), available from https://francish.netlify.app/
influential.analysis()
#' @title Influential analysis
#' @param data = data.frame used by the model.
#' @param cluster = Character. Variable name in the data frame defining the cluster in a two-level dataset.
#' @param parameter = Character. "var" for estimated variances, "load" for loadings.
#' @param st = Character indicating the standardization level of loadings: "st.all" or "st.lv".
#' @param n.items = Integer. Number of observed variables considered by the model.
#' @param item.labels = Character vector indicating the labels of the considered observed variables.
#' @param m = Character string specifying the model using the lavaan synthax.
influential.analysis <- function(data=NA,cluster="ID",parameter="var",st="st.all",
n.items=9,item.labels=c("v1", "v2", "v3", "t1", "t2", "t3", "f1", "f2","f3"),
m = 'level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3'){ require(lavaan); require(tcltk)
# storing results on the whole sample
m.res <- cfa(model=m,data=data,cluster=cluster,std.lv=TRUE)
if(parameter=="var"){
lv1 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="~~" &
nchar(parameterestimates(m.res)$lhs)==2 &
parameterestimates(m.res)$level==1,"est"]
lv2 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="~~" &
nchar(parameterestimates(m.res)$lhs)==2 &
parameterestimates(m.res)$level==2,"est"]
} else if(parameter=="load"){
if(st=="st.all"){
lv1 <- standardizedsolution(m.res)[standardizedsolution(m.res)$op=="=~",
"est.std"][1:n.items]
lv2 <- standardizedsolution(m.res)[standardizedsolution(m.res)$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
} else if(st=="st.lv"){
lv1 <- standardizedsolution(m.res,type="st.lv")[standardizedsolution(m.res,type="st.lv")$op=="=~",
"est.std"][1:n.items]
lv2 <- standardizedsolution(m.res,type="st.lv")[standardizedsolution(m.res,type="st.lv")$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
} else{
lv1 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="=~",
"est.std"][1:n.items]
lv2 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
}
}
# dataframe storing results
parameters <- data.frame(ID=rep("all",n.items),
lv1 = lv1,
lv2 = lv2,
neg.var=rep(1,n.items))
if(!any(diag(lavInspect(m.res,"est")[["within"]][["theta"]]) < 0) &
!any(diag(lavInspect(m.res,"est")[["ID"]][["theta"]]) < 0)){
parameters[1:n.items,"neg.var"] <- 0 }
# replicate parameters estimation by excluding any participant one-by-one
IDs <- levels(as.factor(as.character(data$ID)))
pb <- tkProgressBar("Modeling", "Data modeling",0, 100, 50) # progress bar
for(ID in IDs){ info <- sprintf("%d%% done", round(which(IDs==ID)/length(IDs)*100))
setTkProgressBar(pb, round(which(IDs==ID)/length(IDs)*100), sprintf("Data modeling", info), info)
m.res <- cfa(model=m,data=data[data$ID!=ID,],cluster=cluster,std.lv=TRUE)
if(parameter=="var"){
lv1 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="~~" &
nchar(parameterestimates(m.res)$lhs)==2 &
parameterestimates(m.res)$level==1,"est"]
lv2 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="~~" &
nchar(parameterestimates(m.res)$lhs)==2 &
parameterestimates(m.res)$level==2,"est"]
} else if(parameter=="load"){
if(st=="st.all"){
lv1 <- standardizedsolution(m.res)[standardizedsolution(m.res)$op=="=~",
"est.std"][1:n.items]
lv2 <- standardizedsolution(m.res)[standardizedsolution(m.res)$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
} else if(st=="st.lv"){
lv1 <- standardizedsolution(m.res,type="st.lv")[standardizedsolution(m.res,type="st.lv")$op=="=~",
"est.std"][1:n.items]
lv2 <- standardizedsolution(m.res,type="st.lv")[standardizedsolution(m.res,type="st.lv")$op=="=~",
"est.std"][(n.items+1):(n.items*2)]
} else{
lv1 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="=~",
"est.std"][1:n.items]
lv2 <- parameterestimates(m.res)[parameterestimates(m.res)$op=="=~",
"est.std"][(n.items+1):(n.items*2)] }}
# negative estimated variances
if(!any(diag(lavInspect(m.res,"est")[["within"]][["theta"]]) < 0) &
!any(diag(lavInspect(m.res,"est")[["ID"]][["theta"]]) < 0)){
neg.Var <- 0 } else { neg.Var <- 1 }
# storing information
parameters <- rbind(parameters,
data.frame(ID=rep(ID,n.items),
lv1=lv1,lv2=lv2,
neg.var = rep(neg.Var,n.items)))}
close(pb)
parameters$X <- rep(item.labels,length(IDs)+1)
parameters <- parameters[,c("ID","X","lv1","lv2","neg.var")]
if(parameter=="var"){
cat("Negative variance estimated on level 2 for the following items:",
levels(as.factor(as.character(parameters[parameters$lv2<0,"X"]))),
"\nNegative variance estimated on level 1 for the following items:",
levels(as.factor(as.character(parameters[parameters$lv1<0,"X"]))),sep=" ")}
return(parameters)}
fits a MCFA model on several subsamples obtained by removing each participant (cluster) one by one, and save the parameters (either item loadings or item residual variances) estimated in each subsample
plot.influential()
#' @title Plotting results of influential analysis
#' @param data = data.frame generated by the influential.analysis function.
#' @param parameter = Character. "var" for estimated variances, "load" for loadings.
#' @param variable = Character indicating the name of the observed variable to be plotted.
#' @param level = Integer indicating if focusing on level 1 or 2 (default).
#' @param threshold_lower = Numeric. Threeshold below which the participants'IDs are showed.
#' @param threshold_upper = Numeric. Threeshold above which the participants'IDs are showed.
plot.influential <- function(data,parameter="var",variable,level=2,threshold_lower=NA,threshold_upper=NA){ require(ggplot2)
if(parameter=="var"){ par <- "variance"} else { par = "loading" }
if(level==2){ colnames(data)[colnames(data)=="lv2"] <- "par"
} else { colnames(data)[colnames(data)=="lv1"] <- "par" }
p <- ggplot(data[data$X==variable,],aes(ID,par))+
geom_point()+ggtitle(paste(variable,par,"on level",level))+
geom_point(data=data[data$X==variable & data$ID=="all",],colour="blue",size=5)+
# geom_point(data=data[data$X==variable & data$par<0,],colour="red")+
geom_point(data=data[data$X==variable & data$neg.var==1,],colour="red")+
theme(axis.text.x = element_blank()) + xlab("")
if(!is.na(threshold_lower) & is.na(threshold_upper)){
p <- p + geom_text(data=data[data$X==variable &
data$par < threshold_lower,],
aes(label=ID),nudge_x=-5,size=3)
} else if(is.na(threshold_lower) & !is.na(threshold_upper)){
p <- p + geom_text(data=data[data$X==variable &
data$par > threshold_upper,],
aes(label=ID),nudge_x=-5,size=3)
} else if(!is.na(threshold_lower) & !is.na(threshold_upper)){
p <- p + geom_text(data=data[data$X==variable &
(data$par > threshold_upper |
data$par < threshold_lower),],
aes(label=ID),nudge_x=-5,size=3) }
return(p) }
plots the parameters obtained with the
influential.analysis()
function
check.influential()
#' @title Inspecting changes in parameters after the removal of influential cases
#' @param model = MCFA model fitted with lavaan.
#' @param data = data.frame generated by the influential.analysis function.
#' @param parameter = Character. Identification codes of participants to be excluded.
#' @param level = Numeric. 1 = within, 2 = between (default).
check.influential <- function(model,data,IDs,level=2){ require(lavaan); require(ggplot2)
# excluding participants
data2 <- data
for(ID in levels(as.factor(data$ID))){
if(any(IDs==ID)){ data2 <- data2[data2$ID!=ID,] }}
# updating model
model2 <- update(model,data=data2)
# standardized loadings
load1 <- standardizedsolution(model)[standardizedsolution(model)$op=="=~",]
load2 <- standardizedsolution(model2)[standardizedsolution(model2)$op=="=~",]
# data.frame of loadings
loads <- data.frame(inclusion=as.factor(c(rep("IN",nrow(load1)),rep("OUT",nrow(load1)))), # inclusion
load=as.factor(paste(load1[,"rhs"],c(rep("w",nrow(load1)/2),rep("b",nrow(load1)/2)),sep="_")),
loading=c(load1[,"est.std"],load2[,"est.std"]))
ggplot(loads,aes(x=inclusion,y=loading,color=load)) + geom_point() + geom_line(aes(group=load))
}
visualizes the changes in parameters estimated with and without one or more participants
plot.traj()
#' @title Plotting trajectories of Mood items scores of single participants
#' @param data = data.frame generated by the influential.analysis function.
#' @param ID = Character. Identification code for the selected participant.
plot.traj <- function(data,var="Mood",ID){ require(ggplot2); require(gridExtra)
if(var=="Mood"){
# Negative Valence
p1 <- ggplot(data[data$ID==ID,],aes(x=within.day,y=v1))+
geom_line(aes(y=4),linetype=2) + geom_smooth(se=FALSE,span=0.7,color="lightblue")+
geom_smooth(aes(y=v2),se=FALSE,span=0.7,color="blue")+
geom_smooth(aes(y=v3),se=FALSE,span=0.7,color="darkblue")+
facet_wrap("day",strip.position="right")+
ggtitle(paste("Negative Variance items in participant",ID))+ylab("")+xlab("")+
scale_x_continuous(breaks=1:7)+scale_y_continuous(breaks=1:7)+
theme(axis.text = element_text(size=5),strip.background = element_rect(colour=factor(data$ID)))
# tense arousal
p2 <- ggplot(data[data$ID==ID,],aes(x=within.day,y=t1))+
geom_line(aes(y=4),linetype=2) + geom_smooth(se=FALSE,span=0.7,color="salmon")+
geom_smooth(aes(y=t2),se=FALSE,span=0.7,color="red")+
geom_smooth(aes(y=t3),se=FALSE,span=0.7,color="darkred")+
facet_wrap("day",strip.position="right")+
ggtitle(paste("Tense Arousal items in participant",ID))+ylab("")+xlab("")+
scale_x_continuous(breaks=1:7)+scale_y_continuous(breaks=1:7)+
theme(axis.text = element_text(size=5),strip.background = element_rect(colour=factor(data$ID)))
# fatigue
p3 <- ggplot(data[data$ID==ID,],aes(x=within.day,y=f1))+
geom_line(aes(y=4),linetype=2) + geom_smooth(se=FALSE,span=0.7,color="yellow")+
geom_smooth(aes(y=f2),se=FALSE,span=0.7,color="salmon")+
geom_smooth(aes(y=f3),se=FALSE,span=0.7,color="orange")+
facet_wrap("day",strip.position="right")+
ggtitle(paste("Fatigue items in participant",ID))+ylab("")+xlab("")+
scale_x_continuous(breaks=1:7)+scale_y_continuous(breaks=1:7)+
theme(axis.text = element_text(size=5),strip.background = element_rect(colour=factor(data$ID)))
grid.arrange(p1,p2,p3,nrow=3)
} else if(var=="TD"){
ggplot(data[data$ID==ID,],aes(x=within.day,y=d1))+
geom_line(aes(y=4),linetype=2) + geom_smooth(se=FALSE,span=0.7,color="salmon")+
geom_smooth(aes(y=d2),se=FALSE,span=0.7,color="red")+
geom_smooth(aes(y=d3),se=FALSE,span=0.7,color="pink")+
geom_smooth(aes(y=d4),se=FALSE,span=0.7,color="darkred")+
facet_wrap("day",strip.position="right")+
ggtitle(paste("Task Demand items in participant",ID))+ylab("")+xlab("")+
scale_x_continuous(breaks=1:7)+scale_y_continuous(breaks=1:7)+
theme(axis.text = element_text(size=5),strip.background = element_rect(colour=factor(data$ID)))
} else if(var=="TC"){
ggplot(data[data$ID==ID,],aes(x=within.day,y=c1))+
geom_line(aes(y=4),linetype=2) + geom_smooth(se=FALSE,span=0.7,color="blue")+
geom_smooth(aes(y=c2),se=FALSE,span=0.7,color="darkblue")+
geom_smooth(aes(y=c3),se=FALSE,span=0.7,color="lightblue")+
facet_wrap("day",strip.position="right")+
ggtitle(paste("Task Control items in participant",ID))+ylab("")+xlab("")+
scale_x_continuous(breaks=1:7)+scale_y_continuous(breaks=1:7)+
theme(axis.text = element_text(size=5),strip.background = element_rect(colour=factor(data$ID)))}}
plots the item scores’ temporal trajectories of a given participant
SUBSAMPLE s1
MODEL SPECIFICATION
lv1 STRUCTURE
Here, we conduct the preliminary steps suggested by Hox (2010, chapter 14) for MCFA full maximum likelihood estimation. Similar to what recommended by Muthén (1994), the idea is to specify a set of models to separately assess the factor structure on Level 1 (within) and 2 (between), and to compare those models with the hypothesized multilevel model.
The first step evaluates the within-cluster factor structure. Following Muthén (1994), we conduct a conventional (one-level) CFA of the pooled within-cluster covariance matrix (Spw). The goodness of fit of the within-cluster structure is indicated by the fit indices of this model.
TD
# pooled covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=TD.s1,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_TD <- cfa('TD =~ d1 + d2 + d3 + d4',sample.cov=poolCov,sample.nobs=nrow(TC.s1),std.lv=TRUE)
loadings(m.withinPool_TD,st="st.all") # standardized loadings
round(fit.ind(m.withinPool_TD,type="monolevel",from_summary=TRUE),3) # fit indices
## npar chisq df pvalue rmsea cfi srmr
## 8.000 11.972 2.000 0.003 0.057 0.996 0.013
Comments:
- the model shows satisfactory fit indices and standardized loadings higher than .60, suggesting that the within-cluster structure holds
TC
# pooled covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=TC.s1,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_TC <- cfa('TC_W =~ c1 + c2 + c3',sample.cov=poolCov,sample.nobs=nrow(TC.s1),std.lv=TRUE)
loadings(m.withinPool_TC,st="st.all") # standardized loadings
round(fit.ind(m.withinPool_TC,type="monolevel",from_summary=TRUE),3) # fit indices (saturated)
## npar chisq df pvalue rmsea cfi srmr
## 6 0 0 NA 0 1 0
Comments:
the model shows standardized loadings higher than .60, suggesting that the within-cluster structure holds
fit indices cannot be evaluated since the model is saturated
MOOD
# pooled covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=Mood.s1,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_Mood <- cfa('NV =~ v1 + v2 + v3
TA =~ t1 + t2 + t3
FA =~ f1 + f2 + f3',sample.cov=poolCov,sample.nobs=nrow(Mood.s1),std.lv=TRUE)
loadings(m.withinPool_Mood,st="st.all") # standardized loadings
round(fit.ind(m.withinPool_Mood,type="monolevel",from_summary=TRUE),3) # fit indices
## npar chisq df pvalue rmsea cfi srmr
## 21.000 231.926 24.000 0.000 0.070 0.964 0.033
Comments:
standardized loadings estimated from the pooled within-cluster covariance matrix are significant and higher than .55. Item
v2
shows the lowest loading (.56), whereas other loadings are all higher than .60the model shows satisfactory fit indices (apart from RMSEA), suggesting that the within-cluster structure holds
lv2 STRUCTURE
Here, we conduct the preliminary steps suggested by Hox (2010, chapter 14) for MCFA full maximum likelihood estimation. Similar to what recommended by Muthén (1994), the idea is to specify a set of models to separately assess the factor structure on Level 1 (within) and 2 (between), and to compare those models with the hypothesized multilevel model.
The second step is to evaluate the between-cluster factor structure by specifying a set of benchmark models for the group level (Hox, 2010). This is done to test whether there is a between-cluster structure to be modeled.
TD
1. Null model
First, we specify a null model with no specification on level 2. If this model holds, it will mean that there is no cluster-level structure at all (all covariances in the between-clusters matrix are the result of individual sampling variation), and we can continue using a conventional (mono-level) analysis.
# one-factor within model + null between model
m.null_TD <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
d1 ~~ 0*d1
d2 ~~ 0*d2
d3 ~~ 0*d3
d4 ~~ 0*d4',
data=TD.s1, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
2. Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2 (Hox, 2010).
# one-factor within model + independence between model
m.ind_TD <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
d1 ~~ d1
d2 ~~ d2
d3 ~~ d3
d4 ~~ d4',
data=TD.s1, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
3. Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation are just ‘spurious’ (Stapleton et al., 2016).
# one-factor within model + saturated between part
m.sat_TD <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
d1 ~~ d1 + d2 + d3
d2 ~~ d2 + d3 + d4
d3 ~~ d3 + d4
d4 ~~ d4',
data = TD.s1, cluster = "ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence) should be rejected.
round(fit.ind(model=c(m.null_TD,m.ind_TD,m.sat_TD),models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated")),3)
Comments:
all benchmark models converged with no problems
none of the model showed satisfactory goodness of fit, and are all rejected
two participants (S003 and S055) showed no intra-individual variability for items (
d1
andd2
, respectively).
grid.arrange(plot.traj(s1.w,var="TD","S003"),plot.traj(s1.w,var="TD","S055"),nrow=2)
TC
1. Null model
First, we specify a null model with no specification on level 2. If this model holds, it will mean that there is no cluster-level structure at all (all covariances in the between-clusters matrix are the result of individual sampling variation), and we can continue using a conventional (mono-level) analysis.
# one-factor within model + null between model
m.null_TC <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
c1 ~~ 0*c1
c2 ~~ 0*c2
c3 ~~ 0*c3',
data=TC.s1, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S082 S121
## S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
2. Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2.
# one-factor within model + independence between model
m.ind_TC <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
c1 ~~ c1
c2 ~~ c2
c3 ~~ c3',
data=TC.s1, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S082 S121
## S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
3. Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation are just ‘spurious’ (Stapleton et al., 2016).
# one-factor within model + null between model
m.sat_TC <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
c1 ~~ c1 + c2 + c3
c2 ~~ c2 + c3
c3 ~~ c3',
data=TC.s1, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S082 S121
## S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence) should be rejected. Note that here the Lv2-saturated model is saturated, and thus we cannot evaluate the model’s fit using chi-squared-derived fit indices.
fit.ind(model=c(m.null_TC,m.ind_TC,m.sat_TC),models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated"))
Comments:
all benchmark models converged with no problems
lv2 Null and Independence models show unsatisfactory goodness of fit, and are rejected. Lv2 Saturated model is saturated, and we cannot use chi-squared-derived fit indices.
eight participants (S008, S035, S055, S062, S082, S097, S121, and S125) showed no intra-individual variability for some items
grid.arrange(plot.traj(s1.w,var="TC","S008"),plot.traj(s1.w,var="TC","S035"),plot.traj(s1.w,var="TC","S055"),nrow=3)
grid.arrange(plot.traj(s1.w,var="TC","S062"),plot.traj(s1.w,var="TC","S082"),plot.traj(s1.w,var="TC","S097"),nrow=3)
grid.arrange(plot.traj(s1.w,var="TC","S121"),plot.traj(s1.w,var="TC","S125"),nrow=2)
MOOD
1. Null model
First, we specify a null model with no specification on level 2. If this model holds, it will mean that “there is no cluster-level structure at all” (“all covariances in the between-clusters matrix are the result of individual sampling variation”), and we can continue using a conventional (mono-level) analysis.
# one-factor within model + null between model
m.null_Mood <- cfa('level: 1
NV_W =~ v1 + v2 + v3
TA_W =~ t1 + t2 + t3
FA_W =~ f1 + f2 + f3
level: 2
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3', data=Mood.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
2. Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2.
# one-factor within model + independence between model
m.ind_Mood <- cfa('level: 1
NV_W =~ v1 + v2 + v3
TA_W =~ t1 + t2 + t3
FA_W =~ f1 + f2 + f3
level: 2
v1 ~~ v1
v2 ~~ v2
v3 ~~ v3
t1 ~~ t1
t2 ~~ t2
t3 ~~ t3
f1 ~~ f1
f2 ~~ f2
f3 ~~ f3', data=Mood.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
3. Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level [here, within-individual] model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation is just ‘spurious’ (Stapleton et al., 2016).
# one-factor within model + saturated between part
m.sat_Mood <- cfa('level: 1
HT_W =~ v1 + v2 + v3
BV_W =~ t1 + t2 + t3
FA_W =~ f1 + f2 + f3
level: 2
v1 ~~ v1 + v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v2 ~~ v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v3 ~~ v3 + t1 + t2 + t3 + f1 + f2 + f3
t1 ~~ t1 + t2 + t3 + f1 + f2 + f3
t2 ~~ t2 + t3 + f1 + f2 + f3
t3 ~~ t3 + f1 + f2 + f3
f1 ~~ f1 + f2 + f3
f2 ~~ f2 + f3
f3 ~~ f3', data=Mood.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence model) should be rejected.
round(fit.ind(model=c(m.null_Mood,m.ind_Mood,m.sat_Mood),models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated")),3)
Comments:
all benchmark models converged with no problems.
none of the model showed satisfactory goodness of fit, and are all rejected (although the saturated model is not that bad)
three participants (S047, S055, and S097) showed no intra-individual variability for some items (
v2
,v3
,t3
)
plot.traj(s1.w,var="Mood","S047")
plot.traj(s1.w,var="Mood","S055")
plot.traj(s1.w,var="Mood","S097")
MULTILEVEL MODELS
Here, we specify the hypothesized multilevel CFA models and, only for Mood items, alternative models with a different number of dimensions. Note that each model is parametrized by standardizing the latent factors (std.lv = TRUE) to avoid fixing to 1 the first indicator of each dimension.
TD
For TD, only one model with four items measuring a single dimension on both levels is specified.
# hypothesized multilevel model
td <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
TD_B =~ d1 + d2 + d3 + d4',
data=TD.s1, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
Comments:
- the model converged with no problems
TC
For TC, only one model with four items measuring a single dimension on both levels is specified.
# hypothesized multilevel model
tc <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3',
data=TC.s1, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S082 S121
## S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Comments:
the model converged with no problems
a negative variance (Heywood case) is estimated for item
c3
on level 2 (seeIMPROPER SOLUTIONS
)
parameterestimates(tc)[parameterestimates(tc)$op=="~~"¶meterestimates(tc)$level==2,
c("lhs","level","est","se","ci.lower","ci.upper")][1:3,]
MOOD
For the MDMQ, we specify the hypothesized multilevel
model m3x3
(with three factors on level 1 and
three factors on level 2), and a set of alternative models with less
dimensions (m3x2
, m2x2
,
m2x3
).
1) 3x3 model
Here, we specify the hypothesized model for Mood, using all the 9 items to measure 3 latent variables on both levels.
# hypothesized multilevel model
m3x3 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s1, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
2) Alternative models
Here, alternative models are specified with less latent factors on level 1 and/or level 2. Specifically, models with two latent variables (Negative Tone and Fatigue) are justified by the strong correlation between Negative Valence and Tense Arousal items, especially on level 2.
# 3 latent on lv1, 2 latent on lv2
m3x2 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s1, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# 2 latent factors on both levels
m2x2 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s1, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# 2 latent on lv1, 3 latent on lv2
m2x3 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s1, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Comments:
all 4 models reached the convergence
negative variances (Heywood cases) are estimated by two models
m3x3
andm2x3
for itemt3
on level 2 (seeIMPROPER SOLUTIONS
)by inspecting confidence intervals of estimated variances, we note that the problem is potentially generalized to all Mood models, concerning items
t3
,t2
,v3
, andf3
(seeIMPROPER SOLUTIONS
)
# m3x3
parameterestimates(m3x3)[parameterestimates(m3x3)$op=="~~" & parameterestimates(m3x3)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m3x2
parameterestimates(m3x2)[parameterestimates(m3x2)$op=="~~" & parameterestimates(m3x2)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m2x2
parameterestimates(m2x2)[parameterestimates(m2x2)$op=="~~" & parameterestimates(m2x2)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m2x3
parameterestimates(m2x3)[parameterestimates(m2x3)$op=="~~" & parameterestimates(m2x3)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
CROSS-lv INVARIANCE
Here, we specify the models assuming cross-level invariance (or cross-level isomorphism). Accordingly to Jag & Jorgensen (2017), different levels of factor invariance can be tested: (1) configural invariance, when the same factor structure holds but factor loadings differ across, (2) weak factorial invariance, when the factor loadings are equal across clusters, and (3) strong factorial invariance, when the values of factor loadings and intercepts are equal across clusters, and the residual variance on level 2 is zero.
Since each of our constructs (TD, TC and Mood) is assumed as a
configural cluster construct (see Stapleton et al 2016), our conceptualization should be
supported by a better fit for the models assuming (at
least) weak cross-level invariance. Note that each model is parametrized
by standardizing the latent factors (std.lv = TRUE
) to
avoid fixing to 1 the first indicator of each dimension.
TD
# weak invariance model
td.weakInv <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4',
data = TD.s1, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# strong invariance model
td.strInv <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4
d1 ~~ 0*d1
d2 ~~ 0*d2
d3 ~~ 0*d3
d4 ~~ 0*d4',
data = TD.s1, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
Comments:
- both models converged with no problems
TC
# weak invariance model
tc.weakInv <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3',
data = TC.s1, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S082 S121
## S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
# strong invariance model
tc.strInv <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3
c1 ~~ 0*c1
c2 ~~ 0*c2
c3 ~~ 0*c3',
data = TC.s1, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S082 S121
## S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
Comments:
- both models converged with no problems
MOOD
# weak invariance model
m3x3.weakInv <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
data = Mood.s1, cluster = 'ID', std.lv = TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# strong invariance
m3x3.strInv <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3',
data = Mood.s1, cluster = 'ID', std.lv = TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
Comments:
both models converged with no problems
a negative variance (Heywood cases) is estimated again for item
t3
on level 2 by the weak configural model (seeIMPROPER SOLUTIONS
)By inspecting confidence intervals of estimated variances, we note that the problem is possibly generalized to items
v3
andf1
(seeIMPROPER SOLUTIONS
)
# m3x3
parameterestimates(m3x3.weakInv)[parameterestimates(m3x3.weakInv)$op=="~~" &
parameterestimates(m3x3.weakInv)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
IMPROPER SOLUTIONS
Models tc
, m3x3
, m2x3
, and
m3x3.weakInv
are not acceptable due to a negative
estimated variance (i.e., Improper solutions or Heywood Cases).
Here, we check the possible reasons.
SUMMARY
Among the possible reasons of Heywood cases (Kolenikov & Bollen, 2012), we evaluated:
empirical underidentification (Rindskopf, 1984): implausible since none of the models shows factor loading close to 0, overfactoring, or correlation between factors close to 0 or to 1.
structural misspecification (Kolenikov & Bollen, 2012): plausible only for models
m2x3
, whereas the 95% CI around the variance include zero for all other cases showing a negative varianceother reasons such as nonconvergence and missing data: not met by our models
Thus, the most likely reason for the observed Heywood cases is sampling fluctuations around a population parameter close to zero (Van Driel, 1978; Kolenikov & Bollen, 2012). Indeed, by removing participants one by one, we were able to solve the problem in each model:
the exclusion of four participants (2.88%) solved the problem for model
tc
the exclusion of five participants (3.60%) solved the problem for
m3x3
andm3x3.weakInv
the exclusion of seven participants (5.04%) solves the problem for all MDMQ models
Importantly, the exclusion of such participants is not associated with substantial changes in the standardized loadings.
As an alternative strategy, we solved the improper solutions by fixing the residual variances as .15 x rho_between, by assuming a reliability of 85% for the problematic items (see Joreskog & Sobrom, 1996).
EMP. UNDERID.
First, we check cases of empirical underidentification. Accordingly to Rindskopf (1984), empirical underspecification can arise when one or more conditions for model identification (nonnormality, nonlinearity, nonadditivity) are not met, and this can result in large standard errors and impossible estimates (e.g., negative variance) of some parameters. The conditions of empirical underidentification are the following:
factor loadings close to zero
overfactoring, that is when a factor has (a) no large loadings, (b) only one large loading, or (3) only two large loadings and no correlations with no other factors
We can check such conditions by inspecting the standardized loadings on level 2.
# tc
standardizedsolution(tc)[standardizedsolution(tc)$op=="=~" & standardizedsolution(tc)$lhs=="TC_B",1:5]
# m3x3
standardizedsolution(m3x3)[standardizedsolution(m3x3)$op=="=~" & substr(standardizedsolution(m3x3)$lhs,4,4)=="b",1:5]
# m2x3
standardizedsolution(m2x3)[standardizedsolution(m2x3)$op=="=~" & substr(standardizedsolution(m2x3)$lhs,4,4)=="b",1:5]
# m3x3.weakInv
standardizedsolution(m3x3.weakInv)[standardizedsolution(m3x3.weakInv)$op=="=~" &
substr(standardizedsolution(m3x3.weakInv)$lhs,4,4)=="b",1:5]
- correlations between factors close to zero or close to one
We can check such conditions by inspecting the estimated correlations between factors.
# m3x3
standardizedsolution(m3x3)[standardizedsolution(m3x3)$op=="~~" &
standardizedsolution(m3x3)$lhs!=standardizedsolution(m3x3)$rhs,"est.std"]
## [1] 0.8780454 0.6377684 0.4664311 0.9013741 0.8184395 0.7516927
# m2x3
standardizedsolution(m2x3)[standardizedsolution(m2x3)$op=="~~" &
standardizedsolution(m2x3)$lhs!=standardizedsolution(m2x3)$rhs,"est.std"]
## [1] 0.5765416 0.8924070 0.8245027 0.7374805
# m3x3.weakInv
standardizedsolution(m3x3.weakInv)[standardizedsolution(m3x3.weakInv)$op=="~~" &
standardizedsolution(m3x3.weakInv)$lhs!=standardizedsolution(m3x3.weakInv)$rhs,"est.std"]
## [1] 0.8728242 0.6330008 0.4638263 0.9059542 0.8381237 0.7428105
Comments:
none of the factor loadings is close to zero, as the minimum loading across the four models is .5. We can also notice that standardized loadings corresponding to negative-variance items are > 1
none of the factors shows no large loadings or overfactoring, since none of them shows just one or two large loadings (but most loadings on level 2 are close to 1 in Mood models). One possible exception is the
tc
model, in which a loading of .62 is estimated forc1
, whereas the loading is higher than 1 forc3
, but we evaluate this difference as not substantialnone of the correlations between factors are close to zero or one (ranging from .58 to .90)
Conclusions:
We can exclude empirical under identification as a possible cause of the Heywood cases.
STR. MISSP.
Second, we test if such cases are symptom of structural misspecification. Accordingly to Kolenikov & Bollen (2012), 95% CI of the estimated variance including zero (i.e., not significantly different from zero) can be interpreted as evidence that the population variance is near zero but positive, and the Heywood case can be seen as the result of sampling fluctuations.
# tc
parameterestimates(tc)[parameterestimates(tc)$op=="~~" & parameterestimates(tc)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m3x3
parameterestimates(m3x3)[parameterestimates(m3x3)$op=="~~" & parameterestimates(m3x3)$ci.lower<0,
c("lhs","level","ci.lower","ci.upper")]
# m2x3
parameterestimates(m2x3)[parameterestimates(m2x3)$op=="~~" & parameterestimates(m2x3)$ci.lower<0,
c("lhs","level","ci.lower","ci.upper")]
# m3x3.weakInv
parameterestimates(m3x3.weakInv)[parameterestimates(m3x3.weakInv)$op=="~~" & parameterestimates(m3x3.weakInv)$ci.lower<0,
c("lhs","level","ci.lower","ci.upper")]
Comments:
- in all models the 95% CI around the variances include zero, meaning that the estimated negative variances are not ‘significantly negative’
FIXING VARIANCES
A fist strategy to solve the improper solutions is that suggested by Joreskog & Sobrom (1996), in which we assume a reliability of 85% for the problematic items, implying that residual variance corresponds to the 15% of the estimated variance on level 2 for these items. Thus, we fix residual variance as .15 x rho2_between.
# tc
tc.fix <- 'level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3
c3 ~~ rho2 * c3'
fit <- lmer(c3 ~ 1 + (1|ID),data=TC.s1) # null LMER model
c3varlv2 <- as.data.frame(VarCorr(fit))[1,4] # between-subjects variance of item t2
tc.fix <- cfa(gsub("rho2",c3varlv2*.15,tc.fix),data=TC.s1,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S082 S121
## S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
# m3x3
m3x3.fix <- 'level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3
t3 ~~ rho2 * t2'
fit <- lmer(t3 ~ 1 + (1|ID), data=Mood.s1) # null LMER model
t3varlv2 <- as.data.frame(VarCorr(fit))[1,4] # between-subjects variance of item t2
m3x3.fix <- cfa(gsub("rho2",t3varlv2*.15,m3x3.fix),data=Mood.s1,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# m2x3
m2x3.fix <- 'level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3
t3 ~~ rho2 * t2'
m2x3.fix <- cfa(gsub("rho2",t3varlv2*.15,m2x3.fix),data=Mood.s1,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# m3x3.weakInv
m3x3.weakInv.fix <- 'level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
t3 ~~ rho2 * t2'
m3x3.weakInv.fix <- cfa(gsub("rho2",t3varlv2*.15,m3x3.weakInv.fix),data=Mood.s1,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# no more Heywoods
parameterestimates(tc.fix)[parameterestimates(tc.fix)$op=="~~" & parameterestimates(tc.fix)$est<0,]
parameterestimates(m3x3.fix)[parameterestimates(m3x3.fix)$op=="~~" & parameterestimates(m3x3.fix)$est<0,c("lhs","level","est")]
parameterestimates(m2x3.fix)[parameterestimates(m2x3.fix)$op=="~~" & parameterestimates(m2x3.fix)$est<0,c("lhs","level","est")]
parameterestimates(m3x3.weakInv.fix)[parameterestimates(m3x3.weakInv.fix)$op=="~~" &
parameterestimates(m3x3.weakInv.fix)$est<0,c("lhs","level","est")]
Comments:
as we fixated residual variance for item
c3
* at the 15% of the total variance for this item on level 2, we solved the improper solution for modeltc
similarly, by fixing the residual variance for item
t3
, we solved the problem for modelm3x3
,m2x3
andm3x3.weakInv
SAMPLE FLUCTUATIONS
As an alternative solution to fixing the residual variance, we check if the removal of some participant can solve the improper solution.
Indeed, provided that none of the conditions cited in the SUMMARY is immediately found in our data, we proceed with the hypothesis that the Heywood case is due to population variances that are close to zero (but positive), and that the negative estimates are just the result of sample fluctuations (Van Driel, 1978; Kolenikov & Bollen, 2012).
Here, we re-specify each problematic model by excluding each participant one-by-one (i.e., influential analysis). Then, we repeat the procedure by removing the participant whose exclusion is associated to the highest estimated variance, until the variance becomes positive.
A) TC
We start by taking a look at the variance estimated after the
exclusion of each participant in model tc
. Here, we can
focus exclusively on item c3
show influential analysis
First, the parameters are estimated and plotted after the exclusion of each participant.
# tc
write.csv(influential.analysis(data=TC.s1,
m='level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3',
parameter="var",n.items=3,item.labels=c("c1", "c2", "c3")),
"heywood cases/tc.0.csv",row.names=FALSE)
# c3 variance in tc
plot.influential(data=read.csv("heywood cases/tc.0.csv"),par="var",variable="c3",level=2,
threshold_upper=-.1,threshold_lower=-.125)
grid.arrange(plot.traj(s1.w,var="TC","S125"),
plot.traj(s1.w,var="TC","S062"),
plot.traj(s1.w,var="TC","S066"),nrow=3)
Comments:
in any case,
c2
variance remains negativeparticipants S125, S062 and S066 associated with the lowest variance estimated for
c3
in both models, and the visual inspection of the time trajectories of their scores at TCS items (light blue =c3
) suggests some cases of flat/unusual responses
Thus, we start by excluding these three participants, and we repeat the procedure.
# tc
write.csv(influential.analysis(data=TC.s1[TC.s1$ID!="S125" & TC.s1$ID!="S062" & TC.s1$ID!="S066",],
m='level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3',
parameter="var",n.items=3,item.labels=c("c1", "c2", "c3")),
"heywood cases/tc.3.csv",row.names=FALSE)
# c3 variance in tc
plot.influential(data=read.csv("heywood cases/tc.3.csv"),par="var",variable="c3",level=2,
threshold_upper=0,threshold_lower=-.02)
plot.traj(s1.w,var="TC","S121")
Comments:
after the removal of three participants, the variance estimate for item
c3
is still negativehowever, now we can see that the further removal of some participants solves the improper solution. In particular, participant
S121
is associated with the lowest variance estimate, and its exclusion leads to a positive variance estimated for itemc3
. The visual inspection of time trajectories inS121
’s responses (light blue = c3) suggests some cases of flat/unusual responses
Thus, we repeat the procedure by excluding also this participant.
# tc
write.csv(influential.analysis(data=TC.s1[TC.s1$ID!="S125" & TC.s1$ID!="S062" & TC.s1$ID!="S066" & TC.s1$ID!="S121",],
m='level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3',
parameter="var",n.items=3,item.labels=c("c1", "c2", "c3")),
"heywood cases/tc.4.csv",row.names=FALSE)
# c3 variance in tc
plot.influential(data=read.csv("heywood cases/tc.4.csv"),par="var",variable="c3",level=2,threshold_lower=0)
Results:
After the removal of four participants (2.88%), the
estimated variance on level 2 for item c3
is positive in
model tc
, and no other Heywood cases are observed in any TC
model.
TC_noInfl4.s1 <- TC.s1[TC.s1$ID!="S125" & TC.s1$ID!="S062" & TC.s1$ID!="S066" & TC.s1$ID!="S121",]
# configural model
tc_noInfl4 <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3',
data=TC_noInfl4.s1, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S082
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
# weak invariance model
tc.weakInv_noInfl4 <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3',
data = TC_noInfl4.s1, cluster = 'ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S082
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
# strong invariance model
tc.strInv_noInfl4 <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3
c1 ~~ 0*c1
c2 ~~ 0*c2
c3 ~~ 0*c3',
data = TC_noInfl4.s1, cluster = 'ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S082
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S097
As a final check, we visually inspect the differences in the estimated parameters (standardized loadings) with and without the four excluded participants.
check.influential(tc,TC.s1,IDs=c("S125","S062","S066","S121"))
Conclusions:
The exclusion of the four participants S125, S062, S066, S121 does not substantially change any of the estimated loadings
Thus, we can rely in the models estimated without these participants
B) MOOD
Here, we evaluate Mood models showing Heywood cases.
B1. m3x3 and m3x3.weakInv
We start by taking a look at the variance estimated after the exclusion of each participant in models m3x3 and m3x3.weakInv. We focus on item t3, but we also take a look at items v3 and t2 for model m3x3, and v3 and f1 for model m3x3.weakInv.
show influential analysis
First, the parameters are estimated and plotted after the exclusion of each participant.
# m3x3
write.csv(influential.analysis(data=Mood.s1,
parameter="var"),"heywood cases/m3x3.0.csv",row.names=FALSE)
# m3x3.weakInv
write.csv(influential.analysis(data=Mood.s1,
m='level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
parameter="var"),"heywood cases/m3x3.weakInv.0.csv",row.names=FALSE)
# t3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.0.csv"),par="var",variable="t3",level=2,
threshold_upper=-.004,threshold_lower=-.008),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.0.csv"),par="var",variable="t3",level=2,
threshold_upper=-.010,threshold_lower=-.014))
# v3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.0.csv"),par="var",variable="v3",level=2,
threshold_lower=.0025),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.0.csv"),par="var",variable="v3",level=2,
threshold_lower=.01))
# t2 variance in m3x3 (above) and f1 variance in model m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.0.csv"),par="var",variable="t2",level=2,
threshold_lower=.008),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.0.csv"),par="var",variable="f1",level=2,
threshold_lower=.028))
plot.traj(s1.w,var="Mood","S017")
plot.traj(s1.w,var="Mood","S035")
Comments:
in any case,
t3
variance remains negative in both models, whereas no negative variance is detected in other potentially problematic itemsparticipants S017 and S035 are both associated with lower variance estimated for
t3
, in both models, and the visual inspection of the time trajectories of their scores at Mood items (dark red =t3
) suggests some cases of flat/unusual responses, especially in the former. Although the exclusion of the former leads to a lower variance estimated for itemv3
in both models, the estimated variance is still positive
Thus, we start by excluding these three participants, and we repeat the procedure.
# m3x3
write.csv(influential.analysis(data=Mood.s1[Mood.s1$ID!="S017" & Mood.s1$ID!="S035",],
parameter="var"),"heywood cases/m3x3.2.csv",row.names=FALSE)
# m3x3.weakInv
write.csv(influential.analysis(data=Mood.s1[Mood.s1$ID!="S017" & Mood.s1$ID!="S035",],
m='level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
parameter="var"),"heywood cases/m3x3.weakInv.2.csv",row.names=FALSE)
# t3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.2.csv"),par="var",variable="t3",level=2,
threshold_lower=0),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.2.csv"),par="var",variable="t3",level=2,
threshold_upper=-.003,threshold_lower=-.008))
# v3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.2.csv"),par="var",variable="v3",level=2,
threshold_lower=0),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.2.csv"),par="var",variable="v3",level=2,
threshold_lower=0))
# t2 variance in m3x3 (above) and f1 variance in model m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.2.csv"),par="var",variable="t2",level=2,
threshold_lower=.004),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.2.csv"),par="var",variable="f1",level=2,
threshold_lower=.03))
plot.traj(s1.w,var="Mood","S139")
plot.traj(s1.w,var="Mood","S008")
Comments:
after the removal of two participants,
t3
variance is positive in model m3x3in any case,
t3
variance remains negative in modelm3x3.weakInv
in some cases,
v3
variance becomes negative in both modelsspecifically, the exclusion of participants S139 and S008 is associated to the highest variance for item
t3
in modelm3x3.weakInv
, while it does not lead to negative variance estimates in other items
Thus, we repeat the procedure by excluding these participants.
# m3x3
write.csv(influential.analysis(data=Mood.s1[Mood.s1$ID!="S017" & Mood.s1$ID!="S035" &
Mood.s1$ID!="S139" & Mood.s1$ID!="S008",],
parameter="var"),"heywood cases/m3x3.4.csv",row.names=FALSE)
# m3x3.weakInv
write.csv(influential.analysis(data=Mood.s1[Mood.s1$ID!="S017" & Mood.s1$ID!="S035" &
Mood.s1$ID!="S139" & Mood.s1$ID!="S008",],
m='level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
parameter="var"),"heywood cases/m3x3.weakInv.4.csv",row.names=FALSE)
# t3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.4.csv"),par="var",variable="t3",level=2,
threshold_lower=0),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.4.csv"),par="var",variable="t3",level=2,
threshold_upper=0,threshold_lower=-.004))
# v3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.4.csv"),par="var",variable="v3",level=2,
threshold_lower=0),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.4.csv"),par="var",variable="v3",level=2,
threshold_lower=0))
# t2 variance in m3x3 (above) and f1 variance in model m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.4.csv"),par="var",variable="t2",level=2,
threshold_lower=.004),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.4.csv"),par="var",variable="f1",level=2,
threshold_lower=.03))
plot.traj(s1.w,var="Mood","S106")
Comments:
after the removal of four participants, the variance estimate for item
t3
in modelm3x3.weakInv
is still negativehowever, now we can see that the further removal of some participants solves the improper solution. In particular, participant S106 is associated with the lowest variance estimate, and its exclusion leads to a positive variance estimated for item
t3
, while it does not lead to negative variance estimates in other items
Thus, we repeat the procedure by excluding this participant.
# m3x3
write.csv(influential.analysis(data=Mood.s1[Mood.s1$ID!="S017" & Mood.s1$ID!="S035" &
Mood.s1$ID!="S139" & Mood.s1$ID!="S008" &
Mood.s1$ID!="S106",],
parameter="var"),"heywood cases/m3x3.5.csv",row.names=FALSE)
# m3x3.weakInv
write.csv(influential.analysis(data=Mood.s1[Mood.s1$ID!="S017" & Mood.s1$ID!="S035" &
Mood.s1$ID!="S139" & Mood.s1$ID!="S008" &
Mood.s1$ID!="S106",],
m='level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
parameter="var"),"heywood cases/m3x3.weakInv.5.csv",row.names=FALSE)
# t3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.5.csv"),par="var",variable="t3",level=2,
threshold_lower=0),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.5.csv"),par="var",variable="t3",level=2,
threshold_lower=0))
# v3 variance in m3x3 (above) and m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.5.csv"),par="var",variable="v3",level=2,
threshold_lower=0),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.5.csv"),par="var",variable="v3",level=2,
threshold_lower=0))
# t2 variance in m3x3 (above) and f1 variance in model m3x3.weakInv (below)
grid.arrange(plot.influential(data=read.csv("heywood cases/m3x3.5.csv"),par="var",variable="t2",level=2,
threshold_lower=.004),
plot.influential(data=read.csv("heywood cases/m3x3.weakInv.5.csv"),par="var",variable="f1",level=2,
threshold_lower=.03))
Results:
After the removal of five participants (3.60%), the
estimated variance on level 2 for item t3
is positive in
both m3x3
a and m3x3.weakInv
models, and no
other Heywood cases are observed in these models.
# excluding influential participants
Mood_noInfl5.s1 <- Mood.s1[Mood.s1$ID!="S017" & Mood.s1$ID!="S035" &
Mood.s1$ID!="S139" & Mood.s1$ID!="S008" &
Mood.s1$ID!="S106",]
# hypothesized multilevel model
m3x3_noInfl5 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood_noInfl5.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# weak invariance model
m3x3.weakInv_noInfl5 <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
data=Mood_noInfl5.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
As a final check, we visually inspect the differences in the estimated parameters (standardized loadings) with and without the four excluded participants.
check.influential(m3x3,Mood.s1,IDs=c("S017","S035","S139","S008","S106"))
check.influential(m3x3.weakInv,Mood.s1,IDs=c("S017","S035","S139","S008","S106"))
Conclusions:
The exclusion of the five participants does not substantially change any of the estimated loadings
Thus, we can rely in the models estimated without these participants
Then we check if the exclusion of such participants solved the
improper solutions in other problematic models (m2x3
)
and/or leads to new improper solutions in the remaining models of Mood
(m3x2
, m2x2
, m3x3.strInv
).
# 2 latent on lv1, 3 latent on lv2 (showed Heywood case for t3)
m2x3_noInfl5 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood_noInfl5.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# 3 latent on lv1, 2 latent on lv2
m3x2_noInfl5 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood_noInfl5.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# 2 latent factors on both levels
m2x2_noInfl5 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood_noInfl5.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# strong invariance
m3x3.strInv_noInfl5 <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3',
data=Mood_noInfl5.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
Comments:
- the exclusion of five participants solved the improper solution for
models
m3x3
andm3x3.weakInv
, but a negative variance remains for both itemt3
and itemv3
in modelm2x3
# m2x3
parameterestimates(m2x3_noInfl5)[parameterestimates(m2x3_noInfl5)$op=="~~" & parameterestimates(m2x3_noInfl5)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
B2. m2x3
Here, we take a look at the variance estimated after the exclusion of
each participant in model m2x3
, considering also
m3x3
and m3x3.weakInv.
We focus on both
items t3
and v3
, but we also
take a look at item t2
.
show influential analysis
First, the parameters are estimated and plotted after the exclusion of each participant.
# m2x3
write.csv(influential.analysis(data=Mood_noInfl5.s1,
m='level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
parameter="var"),"heywood cases/m2x3.5.csv",row.names=FALSE)
# t3 (above), v3 variance (middle) and t2 variance in m2x3
grid.arrange(plot.influential(data=read.csv("heywood cases/m2x3.5.csv"),par="var",variable="t3",level=2,
threshold_upper=0,threshold_lower=-.004),
plot.influential(data=read.csv("heywood cases/m2x3.5.csv"),par="var",variable="v3",level=2,
threshold_upper=0,threshold_lower=-.005),
plot.influential(data=read.csv("heywood cases/m2x3.5.csv"),par="var",variable="t2",level=2,
threshold_lower=0),nrow=3)
plot.traj(s1.w,var="Mood","S142")
plot.traj(s1.w,var="Mood","S067")
Comments:
we can see that the exclusion of some participants leads to a positive variance estimate for item
t3
andv3
in modelm2x3
in particular, participants S142 and S067 are associated with the highest variance estimate, respectively, for item
t3
andv3
, whereas they do not lead to negative variance estimated in other items and in other models. The visual inspection of time trajectories in the responses of these participants suggests some cases of flat/unusual response patterns
Thus, we repeat the procedure by excluding these participants.
# m2x3
write.csv(influential.analysis(data=Mood_noInfl5.s1[Mood_noInfl5.s1$ID!="S142" & Mood_noInfl5.s1$ID!="S067",],
m='level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
parameter="var"),"heywood cases/m2x3.7.csv",row.names=FALSE)
# t3 (above), v3 variance (middle) and t2 variance in m2x3
grid.arrange(plot.influential(data=read.csv("heywood cases/m2x3.7.csv"),par="var",variable="t3",level=2,
threshold_upper=0),
plot.influential(data=read.csv("heywood cases/m2x3.7.csv"),par="var",variable="v3",level=2,
threshold_upper=0),
plot.influential(data=read.csv("heywood cases/m2x3.7.csv"),par="var",variable="t2",level=2,
threshold_upper
=0),nrow=3)
Results:
After the removal of seven participants (5.04%), the
estimated variance on level 2 for item t3
and
v3
is positive in model m2x3
.
# excluding influential participants
Mood_noInfl7.s1 <- Mood_noInfl5.s1[Mood_noInfl5.s1$ID!="S142" & Mood_noInfl5.s1$ID!="S067",]
# 2 latent on lv1, 3 latent on lv2 (showed Heywood case for t3)
m2x3_noInfl7 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood_noInfl7.s1, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
Then we check if the exclusion of such participants leads to new
improper solutions in the remaining models of Mood (m3x3
,
m3x2
, m2x2
, m3x3.weakInv
,
m3x3.strInv
).
# hypothesized multilevel model
m3x3_noInfl7 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood_noInfl7.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# weak invariance model
m3x3.weakInv_noInfl7 <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
data=Mood_noInfl7.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# 3 latent on lv1, 2 latent on lv2
m3x2_noInfl7 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood_noInfl7.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# 2 latent factors on both levels
m2x2_noInfl7 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood_noInfl7.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
# strong invariance
m3x3.strInv_noInfl7 <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3',
data=Mood_noInfl7.s1,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S047
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055
Comments:
- none of the models specified for Mood shows negative variance estimates after the removal of seven participants
As a final check, we visually inspect the differences in the estimated parameters (standardized loadings) for the configural model with and without the seven excluded participants.
check.influential(m3x3,Mood.s1,IDs=c("S017","S035","S139","S008","S106","S142","S067"))
Conclusions:
The exclusion of the seven participants does not substantially change any of the estimated loadings
Thus, we can rely in the models estimated without these participants
INFLUENTIAL CASES
Here, we perform an analysis of influential cases in all specified models. Our aim is to evaluate the presence of participants that strongly affect the estimated loadings in any of the included variables.
We consider parameters changes higher than 0.10 as an arbitrary criterion for excluding a given participant, provided that the response pattern of that participant suggests inaccurate answering. If a participant is excluded, we replicate the analysis until no influential cases are detected.
TD
For Task Demand, we focus on the configural and the weak invariance models.
td
We start by inspecting differences associated with the exclusion of
the two participants with no variance for items
d1
(S003) and d2
(S055).
check.influential(td,TD.s1,IDs=c("S003","S055"))
Comments:
- no substantial differences are showed in any of the parameter estimated with and without these participants. All changes are < 0.025
Then, we inspect the loadings associated with the exclusion of each participant. The blue dot represents the parameter estimated considering all participants. The red dots indicate cases associated with negative variance.
write.csv(influential.analysis(data=TD.s1,parameter="load",
n.items=4,item.labels=c("d1","d2","d3","d4"),
m='level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
TD_B =~ d1 + d2 + d3 + d4'),
"influential/td.csv",row.names=FALSE)
# Level 1 loadings d1 and d2
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/td.csv"),
parameter="load",variable="d1",level=1),
plot.influential(data=read.csv("influential/td.csv"),
parameter="load",variable="d2",level=1),nrow=2)
# Level 1 loadings d3 and d4
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/td.csv"),
parameter="load",variable="d3",level=1),
plot.influential(data=read.csv("influential/td.csv"),
parameter="load",variable="d4",level=1),nrow=2)
# Level 2 loadings d1 and d2
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/td.csv"),
parameter="load",variable="d1",level=2),
plot.influential(data=read.csv("influential/td.csv"),
parameter="load",variable="d2",level=2),nrow=2)
# Level 2 loadings d3 and d4
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/td.csv"),
parameter="load",variable="d3",level=2),
plot.influential(data=read.csv("influential/td.csv"),
parameter="load",variable="d4",level=2),nrow=2)
Comments:
none of the participants is associated to changes higher than .10 in the standardized loadings estimated
the maximum change is 0.04 (for item
d3
on level 2)
td.weakInv
We start by inspecting differences associated with the exclusion of
the two participants with no variance for items
d1
(S003) and d2
(S055).
check.influential(td.weakInv,TD.s1,IDs=c("S003","S055"))
Comments:
no substantial differences are showed in any of the parameter estimated with and without these participants
all changes are < 0.025
Then, we inspect the loadings associated with the exclusion of each participant. The blue dot represents the parameter estimated considering all participants. The red dots indicate cases associated with negative variance.
write.csv(influential.analysis(data=TD.s1,parameter="load",
n.items=4,item.labels=c("d1","d2","d3","d4"),
m='level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4'),
"influential/td.weakInv.csv",row.names=FALSE)
# Level 1 loadings d1 and d2
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/td.weakInv.csv"),
parameter="load",variable="d1",level=1),
plot.influential(data=read.csv("influential/td.csv"),
parameter="load",variable="d2",level=1),nrow=2)
# Level 1 loadings d3 and d4
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/td.weakInv.csv"),
parameter="load",variable="d3",level=1),
plot.influential(data=read.csv("influential/td.weakInv.csv"),
parameter="load",variable="d4",level=1),nrow=2)
# Level 2 loadings d1 and d2
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/td.weakInv.csv"),
parameter="load",variable="d1",level=2),
plot.influential(data=read.csv("influential/td.weakInv.csv"),
parameter="load",variable="d2",level=2),nrow=2)
# Level 2 loadings d3 and d4
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/td.weakInv.csv"),
parameter="load",variable="d3",level=2),
plot.influential(data=read.csv("influential/td.weakInv.csv"),
parameter="load",variable="d4",level=2),nrow=2)
Comments:
none of the participants is associated to changes higher than .10 in the standardized loadings estimated
the maximum change is 0.02 (for item
d3
on level 2)
TC
For Task Control, we focus on the configural and the weak invariance
models fitted on both the whole subsample s1
and by
excluding the four participants associated with the improper solution
showed above (see
MODEL SPECIFICATION - IMPROPER SOLUTIONS - SAMPLE FLUCTUATIONS
).
tc
We start by inspecting differences associated with the exclusion of
the two participants with no variance for items
c1
, c2
, or c3
(S008, S035, S055,
S062, S082, S097, and S121).
check.influential(tc,TC.s1,IDs=c("S008","S035","S055","S062","S082","S097","S121"))
Comments:
no substantial differences are showed in any of the parameter estimated with and without these participants
all changes are < 0.05 (max item
c1_b
)
Then, we inspect the loadings associated with the exclusion of each participant. The blue dot represents the parameter estimated considering all participants. The red dots indicate cases associated with negative variance.
write.csv(influential.analysis(data=TC.s1,parameter="load",
n.items=3,item.labels=c("c1","c2","c3"),
m='level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3'),
"influential/tc.csv",row.names=FALSE)
# Level 1 loadings
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/tc.csv"),
parameter="load",variable="c1",level=1),
plot.influential(data=read.csv("influential/tc.csv"),
parameter="load",variable="c2",level=1),
plot.influential(data=read.csv("influential/tc.csv"),
parameter="load",variable="c3",level=1),nrow=2)
# Level 2 loadings
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/tc.csv"),
parameter="load",variable="c1",level=2),
plot.influential(data=read.csv("influential/tc.csv"),
parameter="load",variable="c2",level=2),
plot.influential(data=read.csv("influential/tc.csv"),
parameter="load",variable="c3",level=2),nrow=2)
Comments:
none of the participants is associated to changes higher than .10 in the standardized loadings
the maximum change is 0.06 (for item
c1
on level 2)as noted in sections
MODEL SPECIFICATION
andIMPROPER SOLUTIONS
, all models are associated with a negative variance estimate for itemc3
on level 2
tc.weakInv
We start by inspecting differences associated with the exclusion of the participants with no variance for items c1 (FDRR1959, FPCM1947), c2 and c3 (ACLS1955, CGT16, LCNSRD94).
check.influential(tc.weakInv,TC.s1,IDs=c("FDRR1959","FPCM1947","LCNSRD94","ACLS1955","CGT16"))
Comments:
no substantial differences are showed in any of the parameter estimated with and without these participants
all changes are < 0.05 (max item
c1_b
)
Then, we inspect the loadings associated with the exclusion of each participant. The blue dot represents the parameter estimated considering all participants. The red dots indicate cases associated with negative variance.
write.csv(influential.analysis(data=TC.s1,parameter="load",
n.items=3,item.labels=c("c1","c2","c3"),
m='level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3'),
"influential/tc.weakInv.csv",row.names=FALSE)
# Level 1 loadings
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/tc.weakInv.csv"),
parameter="load",variable="c1",level=1),
plot.influential(data=read.csv("influential/tc.weakInv.csv"),
parameter="load",variable="c2",level=1),
plot.influential(data=read.csv("influential/tc.weakInv.csv"),
parameter="load",variable="c3",level=1),nrow=2)
# Level 2 loadings
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/tc.weakInv.csv"),
parameter="load",variable="c1",level=2),
plot.influential(data=read.csv("influential/tc.weakInv.csv"),
parameter="load",variable="c2",level=2),
plot.influential(data=read.csv("influential/tc.weakInv.csv"),
parameter="load",variable="c3",level=2),nrow=2)
Comments:
none of the participants is associated to changes higher than .10 in the standardized loadings estimated
the maximum change is 0.03 (for item
c1
on level 2)the exclusion of three participants is associated with a negative variance estimate (probably for item
c3
on level 2, as in model `tc*)
tc_noInfl4
We start by inspecting differences associated with the exclusion of
the participants with no variance for items
c1
, c2
, or c3
.
check.influential(tc_noInfl4,TC_noInfl4.s1,IDs=c("S008","S035","S055","S062","S082","S097","S121"))
Comments:
no substantial differences are showed in any of the parameter estimated with and without these participants
all changes are < 0.05, apart from item
c1_b
that increases by about 0.06
Then, we inspect the loadings associated with the exclusion of each participant. The blue dot represents the parameter estimated considering all participants. The red dots indicate cases associated with negative variance.
write.csv(influential.analysis(data=TC_noInfl4.s1,parameter="load",
n.items=3,item.labels=c("c1","c2","c3"),
m='level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3'),
"influential/tc_noInfl4.csv",row.names=FALSE)
# Level 1 loadings
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/tc_noInfl4.csv"),
parameter="load",variable="c1",level=1),
plot.influential(data=read.csv("influential/tc_noInfl4.csv"),
parameter="load",variable="c2",level=1),
plot.influential(data=read.csv("influential/tc_noInfl4.csv"),
parameter="load",variable="c3",level=1),nrow=2)
# Level 2 loadings
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/tc_noInfl4.csv"),
parameter="load",variable="c1",level=2),
plot.influential(data=read.csv("influential/tc_noInfl4.csv"),
parameter="load",variable="c2",level=2),
plot.influential(data=read.csv("influential/tc_noInfl4.csv"),
parameter="load",variable="c3",level=2),nrow=2)
Comments:
none of the participants is associated to changes higher than .10 in the standardized loadings.
the maximum change is 0.06 (for item
c1
on level 2)the exclusion of five participants is associated with a negative variance estimate (probably for item
c3
on level 2, as in modeltc
)
tc.weakInv_noInfl4
We start by inspecting differences associated with the exclusion of
the two participants with no variance for items
c1
, c2
, or c3.
check.influential(tc.weakInv_noInfl4,TC.s1,IDs=c("S008","S035","S055","S062","S082","S097","S121"))
Comments:
no substantial differences are showed in any of the parameter estimated with and without these participants
all changes are < 0.05 (max item c1_b)
the variance for item c3_b becomes negative (Heywood case) when removing these participants
Then, we inspect the loadings associated with the exclusion of each participant. The blue dot represents the parameter estimated considering all participants. The red dots indicate cases associated with negative variance.
write.csv(influential.analysis(data=TC_noInfl4.s1,parameter="load",
n.items=3,item.labels=c("c1","c2","c3"),
m='level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3'),
"influential/tc.weakInv_noInfl4.csv",row.names=FALSE)
# Level 1 loadings
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/tc.weakInv_noInfl4.csv"),
parameter="load",variable="c1",level=1),
plot.influential(data=read.csv("influential/tc.weakInv_noInfl4.csv"),
parameter="load",variable="c2",level=1),
plot.influential(data=read.csv("influential/tc.weakInv_noInfl4.csv"),
parameter="load",variable="c3",level=1),nrow=2)
# Level 2 loadings
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/tc.weakInv_noInfl4.csv"),
parameter="load",variable="c1",level=2),
plot.influential(data=read.csv("influential/tc.weakInv_noInfl4.csv"),
parameter="load",variable="c2",level=2),
plot.influential(data=read.csv("influential/tc.weakInv_noInfl4.csv"),
parameter="load",variable="c3",level=2),nrow=2)
Comments:
none of the participants is associated to changes higher than .10 in the standardized loadings
the maximum change is 0.04 (for item
c1
on level 2)
MOOD
For the MDMQ, we focus on the configural model fitted on the whole
subsample s1
, and both the same model and the corresponding
weak invariance model fitted by excluding the five participants
associated with the improper solutions showed above (see
MODEL SPECIFICATION - IMPROPER SOLUTIONS - SAMPLE FLUCTUATIONS
).
m3x3
We start by inspecting differences associated with the exclusion of
the three participants with no variance for items
v1
, f1
(S047), t3
(S097), or
f3
(S055) .
check.influential(m3x3,Mood.s1,IDs=c("S047","S097","S055"))
Comments:
- none of the participants is associated to changes higher than .10 in the standardized loadings
Then, we inspect the loadings associated with the exclusion of each participant.
write.csv(influential.analysis(data=Mood.s1,parameter="load"),"influential/m3x3.csv",row.names=FALSE)
# Level 1 loadings NV
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="v1",level=1),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="v2",level=1),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="v3",level=1),nrow=3)
# Level 1 loadings TA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="t1",level=1),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="t2",level=1),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="t3",level=1),nrow=3)
# Level 1 loadings FA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="f1",level=1),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="f2",level=1),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="f3",level=1),nrow=3)
# Level 2 loadings NV
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="v1",level=2),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="v2",level=2),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="v3",level=2),nrow=3)
# Level 2 loadings TA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="t1",level=2),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="t2",level=2),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="t3",level=2),nrow=3)
# Level 2 loadings FA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="f1",level=2),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="f2",level=2),
plot.influential(data=read.csv("influential/m3x3.csv"),
parameter="load",variable="f3",level=2),nrow=3)
Comments:
none of the participants is associated to changes higher than .10 in the standardized loadings estimated with model
m3x3
the maximum change is 0.02 (for item
t1
andf3
)as noted in sections
MODEL SPECIFICATION
andIMPROPER SOLUTIONS
, all models are associated with a negative variance estimate for itemt3
on level 2
m3x3_noInfl5
We start by inspecting differences associated with the exclusion of
the three participants with no variance for items
v1
, f1
(S047), t3
(S097), or
f3
(S055) .
check.influential(m3x3_noInfl5,Mood_noInfl5.s1,IDs=c("S047","S097","S055"))
Comments:
- none of the participants is associated to changes higher than .10 in the standardized loadings.
Then, we inspect the loadings associated with the exclusion of each participant.
write.csv(influential.analysis(data=Mood_noInfl5.s1,parameter="load"),"influential/m3x3_noInfl5.csv",row.names=FALSE)
# Level 1 loadings NV
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="v1",level=1),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="v2",level=1),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="v3",level=1),nrow=3)
# Level 1 loadings TA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="t1",level=1),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="t2",level=1),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="t3",level=1),nrow=3)
# Level 1 loadings FA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="f1",level=1),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="f2",level=1),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="f3",level=1),nrow=3)
# Level 2 loadings NV
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="v1",level=2),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="v2",level=2),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="v3",level=2),nrow=3)
# Level 2 loadings TA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="t1",level=2),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="t2",level=2),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="t3",level=2),nrow=3)
# Level 2 loadings FA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="f1",level=2),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="f2",level=2),
plot.influential(data=read.csv("influential/m3x3_noInfl5.csv"),
parameter="load",variable="f3",level=2),nrow=3)
Comments:
none of the participants is associated to changes higher than .10 in the standardized loadings estimated with model
m3x3_noInfl5
the maximum change is 0.02 (for item
f3
)as noted in sections
MODEL SPECIFICATION
andIMPROPER SOLUTIONS
, some models are associated with a negative variance estimate for itemt3
on level 2
m3x3.weakInv_noInfl5
We start by inspecting differences associated with the exclusion of
the three participants with no variance for items
v1
, f1
(S047), t3
(S097), or
f3
(S055) .
check.influential(m3x3.weakInv_noInfl5,Mood_noInfl5.s1,IDs=c("S047","S097","S055"))
Comments:
- none of the participants is associated to changes higher than .10 in the standardized loadings.
Then, we inspect the loadings associated with the exclusion of each participant.
write.csv(influential.analysis(data=Mood_noInfl5.s1,parameter="load",
m='level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3'),
"influential/m3x3.weakInv_noInfl5",row.names=FALSE)
# Level 1 loadings NV
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="v1",level=1),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="v2",level=1),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="v3",level=1),nrow=3)
# Level 1 loadings TA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="t1",level=1),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="t2",level=1),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="t3",level=1),nrow=3)
# Level 1 loadings FA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="f1",level=1),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="f2",level=1),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="f3",level=1),nrow=3)
# Level 2 loadings NV
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="v1",level=2),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="v2",level=2),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="v3",level=2),nrow=3)
# Level 2 loadings TA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="t1",level=2),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="t2",level=2),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="t3",level=2),nrow=3)
# Level 2 loadings FA
gridExtra::grid.arrange(plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="f1",level=2),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="f2",level=2),
plot.influential(data=read.csv("influential/m3x3.weakInv_noInfl5"),
parameter="load",variable="f3",level=2),nrow=3)
Comments:
none of the participants is associated to changes higher than .10 in the standardized loadings estimated with model
m3x3.weakInv_noInfl5
.as noted in sections
MODEL SPECIFICATION
andIMPROPER SOLUTIONS
, some models are associated with a negative variance estimate for itemt3
on level 2
MODEL COMPARISON
Here, we compare the specified multilevel models by considering both the fit indices and the Akaike Information Criterion (AIC). Different subsamples are considered depending on the influential analysis and the presence of improper solutions.
TD
GOF <- cbind(round(fit.ind(model=c(td,td.weakInv,td.strInv),models.names=c("td","td.weakInv","td.strInv")),3),
AIC=AIC(td,td.weakInv,td.strInv)[,2],
BIC=BIC(td,td.weakInv,td.strInv)[,2],
AICw=round(Weights(AIC(td,td.weakInv,td.strInv)),5))
(gof <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
Comments:
the hypothesized configural invariance model
td
shows satisfactory fit indices, with CFI, SRMR within and between showing the best values for this modelthe weak invariance model
td.weakInv
shows satisfactory fit indices and the best evidence as indicated by lowest BIC and AIC (highest AICw), and the lowest RMSEAthe strong invariance model
td.strInv
is rejected due to unsatisfactory fit indices
TC
Here, we compare the specified models considering both the fit
indices and the Akaike Information Criterion (AIC). Two model
comparisons are performed on different subsets of the sample, in order
to account for the highlighted Heywood cases (see
`IMPROPER SOLUTIONS - SAMPLE FLUCTUATIONS
). The model
comparison from the whole sample is replicated considering models with
fixed residual variances in problematic items (see
IMPROPER SOLUTION - FIXING VARIANCES
)
# all participants (N = 139) with negative variance for item c3 in model tc
GOF <- cbind(round(fit.ind(model=c(tc,tc.weakInv,tc.strInv),
models.names=c("tc","tc.weakInv","tc.strInv")),3),
AIC=AIC(tc,tc.weakInv,tc.strInv)[,2],
BIC=BIC(tc,tc.weakInv,tc.strInv)[,2],
AICw=round(Weights(AIC(tc,tc.weakInv,tc.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
gof <- rbind(gof,GOF)
# all participants (N = 139) fixing residual variance to 15% of the total variance
GOF <- cbind(round(fit.ind(model=c(tc.fix,tc.weakInv,tc.strInv),
models.names=c("tc.fix","tc.weakInv","tc.strInv")),3),
AIC=AIC(tc.fix,tc.weakInv,tc.strInv)[,2],
BIC=BIC(tc.fix,tc.weakInv,tc.strInv)[,2],
AICw=round(Weights(AIC(tc.fix,tc.weakInv,tc.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
gof <- rbind(gof,GOF)
# excluding 4 participants associated with negative variance
GOF <- cbind(round(fit.ind(model=c(tc_noInfl4,tc.weakInv_noInfl4,tc.strInv_noInfl4),
models.names=c("tc_noInfl4","tc.weakInv_noInfl4","tc.strInv_noInfl4")),3),
AIC=AIC(tc_noInfl4,tc.weakInv_noInfl4,tc.strInv_noInfl4)[,2],
BIC=BIC(tc_noInfl4,tc.weakInv_noInfl4,tc.strInv_noInfl4)[,2],
AICw=round(Weights(AIC(td,td.weakInv,td.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
gof <- rbind(gof,GOF)
Comments:
when all parameters are freely estimated, the hypothesized configural invariance model
tc
is saturated (i.e., 3 indicators for one latent), and we cannot rely on chi squared-derived fit indices. When considering all participants, this model shows the highest AICw. Whenc3
variance is fixated, this model shows unsatisfactory RMSEAthe weak invariance model
tc.weakInv
shows satisfactory fit indices and the best evidence as indicated by lowest BIC. When the improper solution is solved, this model also shows the highest AICwthe strong invariance model
td.strInv
is rejected due to unsatisfactory fit indices across the three model comparisons
MOOD
Here, we compare the specified models considering both the fit
indices and the Akaike Information Criterion (AIC). Four model
comparisons are performed on different subsets of the sample, in order
to account for the highlighted Heywood cases see
IMPROPER SOLUTIONS - SAMPLE FLUCTUATIONS
). The model
comparison from the whole sample is replicated considering models with
fixed residual variances in problematic items (see
IMPROPER SOLUTIONS - FIXING VARIANCES
)
# all participants (N = 139), with negative variances in m3x3, m2x2 and m3x3.weakInv
# (see MODEL SPECIFICATION - MULTILEVEL MODELS)
GOF <- cbind(round(fit.ind(model=c(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv),
models.names=c("m3x3","m3x2","m2x2","m2x3","m3x3.weakInv","m3x3.strInv")),3),
AIC=AIC(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv)[,2],
BIC=BIC(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv)[,2],
AICw=round(Weights(AIC(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
gof <- rbind(gof,GOF)
# all participants (N = 139), fixing residual variance to 15% of the total variance
# (see IMPROPER SOLUTION - fixing RESIDUAL VARIANCE)
GOF <- cbind(round(fit.ind(model=c(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv),
models.names=c("m3x3.fix","m3x2","m2x2","m2x3.fix","m3x3.weakInv.fix","m3x3.strInv")),3),
AIC=AIC(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv)[,2],
BIC=BIC(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv)[,2],
AICw=round(Weights(AIC(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
gof <- rbind(gof,GOF)
# excluding 5 participants associated with negative variances in m3x3, and m3x3.weakInv (N = 134)
GOF <- cbind(round(fit.ind(model=c(m3x3_noInfl5,m3x2_noInfl5,m2x2_noInfl5,m3x3.weakInv_noInfl5,m3x3.strInv_noInfl5),
models.names=c("m3x3_noInfl5","m3x2_noInfl5","m2x2_noInfl5",
"m3x3.weakInv_noInfl5","m3x3.strInv_noInfl5")),3),
AIC=AIC(m3x3_noInfl5,m3x2_noInfl5,m2x2_noInfl5,m3x3.weakInv_noInfl5,m3x3.strInv_noInfl5)[,2],
BIC=BIC(m3x3_noInfl5,m3x2_noInfl5,m2x2_noInfl5,m3x3.weakInv_noInfl5,m3x3.strInv_noInfl5)[,2],
AICw=round(Weights(AIC(m3x3_noInfl5,m3x2_noInfl5,m2x2_noInfl5,m3x3.weakInv_noInfl5,m3x3.strInv_noInfl5)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
gof <- rbind(gof,GOF)
# excluding 7 participants associated with negative variances in all models (N = 132)
GOF <- cbind(round(fit.ind(model=c(m3x3_noInfl7,m3x2_noInfl7,m2x2_noInfl7,m2x3_noInfl7,
m3x3.weakInv_noInfl7,m3x3.strInv_noInfl7),
models.names=c("m3x3_noInfl7","m3x2_noInfl7","m2x2_noInfl7","m2x3_noInfl7",
"m3x3.weakInv_noInfl7","m3x3.strInv_noInfl7")),3),
AIC=AIC(m3x3_noInfl7,m3x2_noInfl7,m2x2_noInfl7,m2x3_noInfl7,m3x3.weakInv_noInfl7,m3x3.strInv_noInfl7)[,2],
BIC=BIC(m3x3_noInfl7,m3x2_noInfl7,m2x2_noInfl7,m2x3_noInfl7,m3x3.weakInv_noInfl7,m3x3.strInv_noInfl7)[,2],
AICw=round(Weights(AIC(m3x3_noInfl7,m3x2_noInfl7,m2x2_noInfl7,m2x3_noInfl7,
m3x3.weakInv_noInfl7,m3x3.strInv_noInfl7)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
gof <- rbind(gof,GOF)
write.csv(gof,"MCFA.modComp.csv") # saving results
Comments:
the hypothesized configural invariance model
m3x3
shows satisfactory fit indices and the highest AICw across the four model comparisons, with the only exception of that obtained by fixing the residual covariance of itemt3
to the 15% of the total item score variance, in which modelm3x3
showed an excessively high RMSEA of .61alternative models
m3x2
andm2x2
show unsatisfactory fit indices in all model comparisons. Thus, both models are rejected. Modelm3x2
shows satisfactory fit indices only when we fixed the residual covariance of itemt3
to the 15% of the total item score variancethe weak invariance model (
m3x3.weakInv
) shows satisfactory fit indices and the best evidence as indicated by lowest BIC across the four model comparisonsThe strong invariance model (
m3x3.strInv
) is rejected due to unsatisfactory fit indices across the three model comparisons.
UNST. SOLUTION
Here, we evaluate the unstandardized loadings, the covariances, and the residual variances of the selected (i.e., weak-invariance) models. Note that to estimate the variances of, and the covariances between, latent variables, we need to re-specify the models by using a different parametrization in which the loading of the first item is fixed to 1.
TD
# respecifying model without standardizing the latent variables
td.unst <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4',
data = TD.s1, cluster = 'ID', std.lv=FALSE) # note that std.lv=FALSE (lat variables are not standardized)
parameterestimates(td.unst)[parameterestimates(td.unst)$op=="=~",c(1:3,5:12)] # unstandardized loadings
parameterestimates(td.unst)[parameterestimates(td.unst)$op=="~~",c(1:3,5:12)] # res. variances
Comments:
- all unstandardized loadings are significant and higher than
.85 (the lowest loading is estimated for item
d3
)
TC
# respecifying model without standardizing the latent variables
tc.unst <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3',
data = TC.s1, cluster = 'ID', std.lv=FALSE) # note that std.lv=FALSE (latent variables are not standardized)
parameterestimates(tc.unst)[parameterestimates(tc.unst)$op=="=~",c(1:3,5:12)] # unstandardized loadings
parameterestimates(tc.unst)[parameterestimates(tc.unst)$op=="~~",c(1:3,5:12)] # res. variances
Comments:
- all unstandardized loadings are significant and higher than
.80 (the lowest loading is estimated for item
c3
)
MOOD
# respecifying model without standardizing the latent variables
m3x3.unst <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
data = Mood_noInfl5.s1,cluster='ID',std.lv=FALSE) # note that std.lv=FALSE (lat var are not standardized)
parameterestimates(m3x3.unst)[parameterestimates(m3x3.unst)$op=="=~",c(1:3,5:12)] # unstandardized loadings
parameterestimates(m3x3.unst)[parameterestimates(m3x3.unst)$op=="~~",c(1:3,5:12)] # res. variances
Comments:
- all unstandardized loadings are significant and higher than
.70 (the lowest loading is estimated for item
v2
)
ST. SOLUTION
Here, we evaluate the standardized loadings, the covariances, and the residual variances of the selected (i.e., weak-invariance) models. Note that we use a complete standardization (i.e., based on both latent and observed variables, and separately performed for each level).
TD
loadings(td.weakInv) # standardized loadings
standardizedsolution(td.weakInv)[standardizedsolution(td.weakInv)$op=="~~",] # res. variances
Comments:
all standardized loadings are significant and higher than .60 (the lowest loading is estimated for item
d3
on level 1).as expected, standardized loadings on level 2 are higher than those on level 1, with loadings estimated for
d1
andd4
approaching 1
TC
loadings(tc.weakInv) # standardized loadings
standardizedsolution(tc.weakInv)[standardizedsolution(tc.weakInv_noInfl4)$op=="~~",] # correlations and res. variances
Comments:
all standardized loadings are significant and higher than .64 (the lowest loading is estimated for item
c1
on level 1)as expected, standardized loadings on level 2 are higher than those on level 1
MOOD
loadings(m3x3.weakInv_noInfl5) # standardized loadings
standardizedsolution(m3x3.weakInv_noInfl5)[standardizedsolution(m3x3.weakInv_noInfl5)$op=="~~",] # corr. and res. variances
Comments:
all standardized loadings are significant and higher than .57 (the lowest loading is estimated for item
v2
on level 1)as expected, standardized loadings on level 2 are higher than those on level 1, with loadings estimated for
v3
,t2
,t3
andf1
approaching 1estimated correlations between latent factors are within the .45-.88 range, with the exception of the correlation estimated between Tense Arousal and Fatigue on level 1, which is .46. The highest correlation is estimated between Negative Valence and Tense Arousal on level 2 (r = .91)
SUBSAMPLE s2
MODEL SPECIFICATION
lv1 STRUCTURE
Here, we conduct the preliminary steps suggested by Hox (2010, chapter 14) for MCFA full maximum likelihood estimation. Similar to what recommended by Muthén (1994), the idea is to specify a set of models to separately assess the factor structure on Level 1 (within) and 2 (between), and to compare those models with the hypothesized multilevel model.
The first step evaluates the within-cluster factor structure. Following Muthén (1994), we conduct a conventional (one-level) CFA of the pooled within-cluster covariance matrix (Spw). The goodness of fit of the within-cluster structure is indicated by the fit indices of this model.
TD
# pooled covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=TD.s2,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_TD <- cfa('TD =~ d1 + d2 + d3 + d4',sample.cov=poolCov,sample.nobs=nrow(TC.s2),std.lv=TRUE)
loadings(m.withinPool_TD,st="st.all") # standardized loadings
round(fit.ind(m.withinPool_TD,type="monolevel",from_summary=TRUE),3) # fit indices
## npar chisq df pvalue rmsea cfi srmr
## 8.000 15.008 2.000 0.001 0.076 0.992 0.018
Conclusions:
standardized loadings estimated from the pooled within-cluster covariance matrix are significant and higher than .55.
The model shows quite satisfactory fit indices (although worse than those found in subsample
s1
), suggesting that the within-cluster structure holds
TC
# pooled covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=TC.s2,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_TC <- cfa('TC_W =~ c1 + c2 + c3',sample.cov=poolCov,sample.nobs=nrow(TC.s2),std.lv=TRUE)
loadings(m.withinPool_TC,st="st.all") # standardized loadings
Conclusions:
standardized loadings estimated from the pooled within-cluster covariance matrix are significant and higher than .60.
As the model is saturated, we cannot evaluate the goodness of fit using chi-squared-derived fit indices
MOOD
# pooled covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=Mood.s2,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_Mood <- cfa('NV =~ v1 + v2 + v3
TA =~ t1 + t2 + t3
FA =~ f1 + f2 + f3',sample.cov=poolCov,sample.nobs=nrow(Mood.s2),std.lv=TRUE)
loadings(m.withinPool_Mood,st="st.all") # standardized loadings
round(fit.ind(m.withinPool_Mood,type="monolevel",from_summary=TRUE),3) # fit indices
## npar chisq df pvalue rmsea cfi srmr
## 21.000 162.090 24.000 0.000 0.067 0.966 0.032
Comments:
standardized loadings estimated from the pooled within-cluster covariance matrix are significant and higher than .55, similar to subsample
s1
the model shows satisfactory fit indices (apart from RMSEA), suggesting that the within-cluster structure holds
lv2 STRUCTURE
Here, we conduct the preliminary steps suggested by Hox (2010, chapter 14) for MCFA full maximum likelihood estimation. Similar to what recommended by Muthén (1994), the idea is to specify a set of models to separately assess the factor structure on Level 1 (within) and 2 (between), and to compare those models with the hypothesized multilevel model.
The second step is to evaluate the between-cluster factor structure by specifying a set of benchmark models for the group level (Hox, 2010). This is done to test whether there is a between-cluster structure to be modeled.
TD
1. Null model
First, we specify a null model with no specification on level 2. If this model holds, it will mean that there is no cluster-level structure at all (all covariances in the between-clusters matrix are the result of individual sampling variation), and we can continue using a conventional (mono-level) analysis.
# one-factor within model + null between model
m.null_TD <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
d1 ~~ 0*d1
d2 ~~ 0*d2
d3 ~~ 0*d3
d4 ~~ 0*d4',
data=TD.s2, cluster="ID", std.lv=TRUE)
2. Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2 (Hox, 2010).
# one-factor within model + independence between model
m.ind_TD <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
d1 ~~ d1
d2 ~~ d2
d3 ~~ d3
d4 ~~ d4',
data=TD.s2, cluster="ID", std.lv=TRUE)
3. Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation are just ‘spurious’ (Stapleton et al., 2016).
# one-factor within model + saturated between part
m.sat_TD <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
d1 ~~ d1 + d2 + d3
d2 ~~ d2 + d3 + d4
d3 ~~ d3 + d4
d4 ~~ d4',
data = TD.s2, cluster = "ID", std.lv=TRUE)
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence) should be rejected.
round(fit.ind(model=c(m.null_TD,m.ind_TD,m.sat_TD),
models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated")),3)
Comments:
all benchmark models converged with no problems
none of the model showed satisfactory goodness of fit, and are all rejected
TC
1. Null model
First, we specify a null model with no specification on level 2. If this model holds, it will mean that there is no cluster-level structure at all (all covariances in the between-clusters matrix are the result of individual sampling variation), and we can continue using a conventional (mono-level) analysis.
# one-factor within model + null between model
m.null_TC <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
c1 ~~ 0*c1
c2 ~~ 0*c2
c3 ~~ 0*c3',
data=TC.s2, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S062 S082 S121
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
2. Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2.
# one-factor within model + independence between model
m.ind_TC <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
c1 ~~ c1
c2 ~~ c2
c3 ~~ c3',
data=TC.s2, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S062 S082 S121
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
3. Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation are just ‘spurious’ (Stapleton et al., 2016).
# one-factor within model + null between model
m.sat_TC <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
c1 ~~ c1 + c2 + c3
c2 ~~ c2 + c3
c3 ~~ c3',
data=TC.s2, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S062 S082 S121
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence) should be rejected. Note that here the Lv2-saturated model is saturated, and thus we cannot evaluate the model’s fit using chi-squared-derived fit indices.
fit.ind(model=c(m.null_TC,m.ind_TC,m.sat_TC),models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated"))
Comments:
all benchmark models converged with no problems
lv2 Null and Independence models show unsatisfactory goodness of fit, and are rejected
lv2 Saturated model is saturated, and we cannot use chi-squared-derived fit indices.
MOOD
1. Null model
First, we specify a null model with no specification on level 2. If this model holds, it will mean that “there is no cluster-level structure at all” (“all covariances in the between-clusters matrix are the result of individual sampling variation”), and we can continue using a conventional (mono-level) analysis.
# one-factor within model + null between model
m.null_Mood <- cfa('level: 1
NV_W =~ v1 + v2 + v3
TA_W =~ t1 + t2 + t3
FA_W =~ f1 + f2 + f3
level: 2
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3', data=Mood.s2,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
2. Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2.
# one-factor within model + independence between model
m.ind_Mood <- cfa('level: 1
NV_W =~ v1 + v2 + v3
TA_W =~ t1 + t2 + t3
FA_W =~ f1 + f2 + f3
level: 2
v1 ~~ v1
v2 ~~ v2
v3 ~~ v3
t1 ~~ t1
t2 ~~ t2
t3 ~~ t3
f1 ~~ f1
f2 ~~ f2
f3 ~~ f3', data=Mood.s2,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
3. Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level [here, within-individual] model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation is just ‘spurious’ (Stapleton et al., 2016).
# one-factor within model + saturated between part
m.sat_Mood <- cfa('level: 1
HT_W =~ v1 + v2 + v3
BV_W =~ t1 + t2 + t3
FA_W =~ f1 + f2 + f3
level: 2
v1 ~~ v1 + v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v2 ~~ v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v3 ~~ v3 + t1 + t2 + t3 + f1 + f2 + f3
t1 ~~ t1 + t2 + t3 + f1 + f2 + f3
t2 ~~ t2 + t3 + f1 + f2 + f3
t3 ~~ t3 + f1 + f2 + f3
f1 ~~ f1 + f2 + f3
f2 ~~ f2 + f3
f3 ~~ f3', data=Mood.s2,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence model) should be rejected.
round(fit.ind(model=c(m.null_Mood,m.ind_Mood,m.sat_Mood),models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated")),3)
Comments:
all benchmark models converged with no problems
none of the model showed satisfactory goodness of fit, and are all rejected, with the exception of the saturated model, showing an acceptable fit
MULTILEVEL MODELS
Here, we specify the hypothesized multilevel CFA models and, only for Mood items, alternative models with a different number of dimensions. Note that each model is parametrized by standardizing the latent factors (std.lv = TRUE) to avoid fixing to 1 the first indicator of each dimension.
TD
For TD, only one model with four items measuring a single dimension on both levels is specified.
# hypothesized multilevel model
td <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
TD_B =~ d1 + d2 + d3 + d4',
data=TD.s2, cluster="ID", std.lv=TRUE)
Comments:
- the model converged with no problems
TC
For TC, only one model with four items measuring a single dimension on both levels is specified.
# hypothesized multilevel model
tc <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3',
data=TC.s2, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S062 S082 S121
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Comments:
the model converged with no problems
a negative variance (Heywood case) is estimated for item
c3
on level 2, as in subsamples1
(sees1 - IMPROPER SOLUTIONS
)
parameterestimates(tc)[parameterestimates(tc)$op=="~~"¶meterestimates(tc)$level==2,
c("lhs","level","est","se","ci.lower","ci.upper")][1:3,]
As done in subsample s1
(see
S1 - IMPROPER SOLUTIONS
), we try to solve the problems by
fixing residual variance as .15 x rho2_between (Joreskog & Sobrom 1996).
# tc
tc.fix <- 'level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3
c3 ~~ rho2 * c3'
fit <- lmer(c3 ~ 1 + (1|ID),data=TC.s2) # null LMER model
c3varlv2 <- as.data.frame(VarCorr(fit))[1,4] # between-subjects variance of item t2
tc.fix <- cfa(gsub("rho2",c3varlv2*.15,tc.fix),data=TC.s2,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S062 S082 S121
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
MOOD
For Mood, we specify the hypothesized multilevel
model m3x3
(with three factors on level 1 and
three factors on level 2), and a set of alternative models with less
dimensions (m3x2
, m2x2
,
m2x3
).
1) 3x3 model
Here, we specify the hypothesized model for Mood, using all the 9 items to measure 3 latent variables on both levels.
# hypothesized multilevel model
m3x3 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s2, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
2) Alternative models
Here, alternative models are specified with less latent factors on level 1 and/or level 2. Specifically, models with two latent variables (Negative Tone and Fatigue) are justified by the strong correlation between Negative Valence and Tense Arousal items, especially on level 2.
# 3 latent on lv1, 2 latent on lv2
m3x2 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s2, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
# 2 latent factors on both levels
m2x2 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s2, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
# 2 latent on lv1, 3 latent on lv2
m2x3 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s2, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Comments:
all 4 models reached the convergence
negative variances (Heywood cases) are estimated by two models (
m3x3
andm2x3
) for itemt3
on level 2, and potentially generalized to all Mood models, as in subsamples1
(see subsamples1 - IMPROPER SOLUTIONS
)
# m3x3
parameterestimates(m3x3)[parameterestimates(m3x3)$op=="~~" & parameterestimates(m3x3)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m3x2
parameterestimates(m3x2)[parameterestimates(m3x2)$op=="~~" & parameterestimates(m3x2)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m2x2
parameterestimates(m2x2)[parameterestimates(m2x2)$op=="~~" & parameterestimates(m2x2)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m2x3
parameterestimates(m2x3)[parameterestimates(m2x3)$op=="~~" & parameterestimates(m2x3)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
As done in subsample s1
(IMPROPER SOLUTIONS
), we try to solve the problems by
fixing residual variance as .15 x rho2_between (Joreskog & Sobrom 1996).
# m3x3
m3x3.fix <- 'level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3
t3 ~~ rho2 * t2'
fit <- lmer(t3 ~ 1 + (1|ID), data=Mood.s2) # null LMER model
t3varlv2 <- as.data.frame(VarCorr(fit))[1,4] # between-subjects variance of item t2
m3x3.fix <- cfa(gsub("rho2",t3varlv2*.15,m3x3.fix),data=Mood.s2,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
# m2x3
m2x3.fix <- 'level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3
t3 ~~ rho2 * t2'
m2x3.fix <- cfa(gsub("rho2",t3varlv2*.15,m2x3.fix),data=Mood.s2,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
CROSS-lv INVARIANCE
Here, we specify the models assuming cross-level invariance (or cross-level isomorphism). Accordingly to Jag & Jorgensen (2017), different levels of factor invariance can be tested: (1) configural invariance, when the same factor structure holds but factor loadings differ across, (2) weak factorial invariance, when the factor loadings are equal across clusters, and (3) strong factorial invariance, when the values of factor loadings and intercepts are equal across clusters, and the residual variance on level 2 is zero.
Since each of our constructs (TD, TC and Mood) is assumed as a
configural cluster construct (see Stapleton et al 2016), our conceptualization should be
supported by a better fit for the models assuming (at
least weak) cross-level invariance. Note that each model is parametrized
by standardizing the latent factors (std.lv = TRUE
) to
avoid fixing to 1 the first indicator of each dimension.
TD
# weak invariance model
td.weakInv <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4',
data = TD.s2, cluster = 'ID', std.lv=TRUE)
# strong invariance model
td.strInv <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4
d1 ~~ 0*d1
d2 ~~ 0*d2
d3 ~~ 0*d3
d4 ~~ 0*d4',
data = TD.s2, cluster = 'ID', std.lv=TRUE)
Comments:
- both models converged with no problems
TC
# weak invariance model
tc.weakInv <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3',
data = TC.s2, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S062 S082 S121
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
# strong invariance model
tc.strInv <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3
c1 ~~ 0*c1
c2 ~~ 0*c2
c3 ~~ 0*c3',
data = TC.s2, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S062 S082 S121
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S035 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
Comments:
- both models converged with no problems
MOOD
# weak invariance model
m3x3.weakInv <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
data = Mood.s2, cluster = 'ID', std.lv = TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# strong invariance
m3x3.strInv <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3',
data = Mood.s2, cluster = 'ID', std.lv = TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
Comments:
both models converged with no problems
a negative variance (Heywood cases) is estimated again for item
t3
on level 2 by the weak configural model (seeIMPROPER SOLUTIONS
)
As done in subsample s1
(IMPROPER SOLUTIONS
), we try to solve the problems by
fixing residual variance as .15 x rho2_between (Joreskog & Sobrom 1996).
# m3x3.weakInv
m3x3.weakInv.fix <- 'level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
t3 ~~ rho2 * t2'
m3x3.weakInv.fix <- cfa(gsub("rho2",t3varlv2*.15,m3x3.weakInv.fix),data=Mood.s2,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S097
MODEL COMPARISON & ST. SOLUTION
Here, we compare the specified multilevel models by considering both the fit indices and the Akaike Information Criterion (AIC). Different subsamples are considered depending on the influential analysis and the presence of improper solutions.
TD
Here, we compare the specified models considering both the fit indices and the Akaike Information Criterion (AIC). For TD, we did not specify any subset alternative to the whole TD sample.
GOF <- cbind(round(fit.ind(model=c(td,td.weakInv,td.strInv),
models.names=c("td","td.weakInv","td.strInv")),3),
AIC=AIC(td,td.weakInv,td.strInv)[,2],
BIC=BIC(td,td.weakInv,td.strInv)[,2],
AICw=round(Weights(AIC(td,td.weakInv,td.strInv)),5))
(gof <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
Comments:
the hypothesized configural invariance model
td
shows quite satisfactory fit indices, although RMSEA is = .07. CFI, SRMR within and between show the best values for this modelthe weak invariance model
td.weakInv
shows satisfactory fit indices and the best evidence as indicated by lowest BIC and AIC (highest AICw), and the lowest RMSEAthe strong invariance model
td.strInv
is rejected due to unsatisfactory fit indices
Finally, we evaluate the standardized loadings, and the residual variances of the selected model.
loadings(td.weakInv) # standardized loadings
standardizedsolution(td.weakInv)[standardizedsolution(td.weakInv)$op=="~~",] # correlations and res. variances
Comments:
all standardized loadings are significant and higher than .60 (the lowest loading is estimated for item
d3
on level 1)as expected, standardized loadings on level 2 are higher than those on level 1, with loadings estimated for
d1
andd4
approaching 1
Conclusions:
The results from subsample s2
replicate
those from subsample s1
for the Task Demand Scale.
TC
Here, we compare the specified models considering both the fit
indices and the Akaike Information Criterion (AIC). A second model
comparison is performed considering models with fixed residual variances
in those items showing improper solutions (see
SUBSAMPLE S1 - IMPROPER SOLUTION - FIXING VARIANCES
).
# all participants (N = 139) with negative variance for item c3 in model tc
GOF <- cbind(round(fit.ind(model=c(tc,tc.weakInv,tc.strInv),
models.names=c("tc","tc.weakInv","tc.strInv")),3),
AIC=AIC(tc,tc.weakInv,tc.strInv)[,2],
BIC=BIC(tc,tc.weakInv,tc.strInv)[,2],
AICw=round(Weights(AIC(tc,tc.weakInv,tc.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
# all participants (N = 139) fixing residual variance to 15% of the total variance
GOF <- cbind(round(fit.ind(model=c(tc.fix,tc.weakInv,tc.strInv),
models.names=c("tc.fix","tc.weakInv","tc.strInv")),3),
AIC=AIC(tc.fix,tc.weakInv,tc.strInv)[,2],
BIC=BIC(tc.fix,tc.weakInv,tc.strInv)[,2],
AICw=round(Weights(AIC(tc.fix,tc.weakInv,tc.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
Comments:
when all parameters are freely estimated, the hypothesized configural invariance model
tc
is saturated (i.e., 3 indicators for one latent), and we cannot rely on chi squared-derived fit indiceswhen considering all participants, this model shows the highest AICw. When
c3
variance is fixated, this model shows unsatisfactory RMSEAthe weak invariance model
tc.weakInv
shows satisfactory fit indices and the best evidence as indicated by lowest BIC.the strong invariance model
tc.strInv
is rejected due to unsatisfactory fit indices across the three model comparisons
Finally, we evaluate the standardized loadings, and the residual variances of the selected model, considering that fitted on the whole sample.
loadings(tc.weakInv) # standardized loadings
standardizedsolution(tc.weakInv)[standardizedsolution(tc.weakInv)$op=="~~",] # correlations and res. variances
Comments:
all standardized loadings are significant and higher than .65 (the lowest loading is estimated for item
c1
on level 1)as expected, standardized loadings on level 2 are higher than those on level 1
Conclusions:
The results obtained from subsample s2
replicate those obtained from subsample s1
for the Task Control Scale.
MOOD
Here, we compare the specified models considering both the fit indices and the Akaike Information Criterion (AIC). A second model comparison is performed considering models with fixed residual variances in those items showing improper solutions (see ‘SUBSAMPLE S1’ - ‘IMPROPER SOLUTION’ - ‘FIXING VARIANCES’).
# all participants (N = 139), with negative variances in m3x3, m2x2 and m3x3.weakInv
# (see MODEL SPECIFICATION - MULTILEVEL MODELS)
GOF <- cbind(round(fit.ind(model=c(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv),
models.names=c("m3x3","m3x2","m2x2","m2x3","m3x3.weakInv","m3x3.strInv")),3),
AIC=AIC(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv)[,2],
BIC=BIC(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv)[,2],
AICw=round(Weights(AIC(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
# all participants (N = 139), fixing residual variance to 15% of the total variance
# (see IMPROPER SOLUTION - fixing RESIDUAL VARIANCE)
GOF <- cbind(round(fit.ind(model=c(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv),
models.names=c("m3x3.fix","m3x2","m2x2","m2x3.fix","m3x3.weakInv.fix","m3x3.strInv")),3),
AIC=AIC(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv)[,2],
BIC=BIC(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv)[,2],
AICw=round(Weights(AIC(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
Comments:
- the model comparison shows similar results to those showed from
subsample
s1
selecting the configural and the weak invariance models as the best models
Finally, we evaluate the standardized loadings, the correlations between latent factors and the residual variances of the selected model, considering models with fixed residual variances in those items showing improper solutions.
loadings(m3x3.weakInv.fix) # standardized loadings
standardizedsolution(m3x3.weakInv.fix)[standardizedsolution(m3x3.weakInv.fix)$op=="~~",] # corr. and res. variances
Comments:
all standardized loadings are significant and higher than .56 (the lowest loading is estimated for item
v2
on level 1)as expected, standardized loadings on level 2 are higher than those on level 1.
estimated correlations between latent factors are within the .45-.98 range, with the exception of the correlation estimated between Tense Arousal and Fatigue on level 1, which is .19 (.13, .26). The highest correlation is estimated between Negative Valence and Tense Arousal on level 2, whereas the lowest correlation is estimated between Tense Arousal and Fatigue on level 2.
Conclusions:
The results obtained from subsample s2
replicate those obtained from subsample
s1
, although an higher estimate of the correlation between
Negative Valence and Tense Arousal.
SUBSAMPLE s3
MODEL SPECIFICATION
lv1 STRUCTURE
Here, we conduct the preliminary steps suggested by Hox (2010, chapter 14) for MCFA full maximum likelihood estimation. Similar to what recommended by Muthén (1994), the idea is to specify a set of models to separately assess the factor structure on Level 1 (within) and 2 (between), and to compare those models with the hypothesized multilevel model.
The first step evaluates the within-cluster factor structure. Following Muthén (1994), we conduct a conventional (one-level) CFA of the pooled within-cluster covariance matrix (Spw). The goodness of fit of the within-cluster structure is indicated by the fit indices of this model.
TD
# pooled covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=TD.s3,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_TD <- cfa('TD =~ d1 + d2 + d3 + d4',sample.cov=poolCov,sample.nobs=nrow(TC.s3),std.lv=TRUE)
loadings(m.withinPool_TD,st="st.all") # standardized loadings
round(fit.ind(m.withinPool_TD,type="monolevel",from_summary=TRUE),3) # fit indices
## npar chisq df pvalue rmsea cfi srmr
## 8.000 10.065 2.000 0.007 0.049 0.997 0.011
Conclusions:
standardized loadings estimated from the pooled within-cluster covariance matrix are significant and higher than .58.
The model shows quite satisfactory fit indices (even slightly better than those found in subsample
s1
), suggesting that the within-cluster structure holds
TC
# pooled covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=TC.s3,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_TC <- cfa('TC_W =~ c1 + c2 + c3',sample.cov=poolCov,sample.nobs=nrow(TC.s3),std.lv=TRUE)
loadings(m.withinPool_TC,st="st.all") # standardized loadings
Conclusions:
standardized loadings estimated from the pooled within-cluster covariance matrix are significant and higher than .60.
As the model is saturated, we cannot evaluate the goodness of fit using chi-squared-derived fit indices
MOOD
# pooled covariance matrix (using function by Huang)
poolCov <- mcfa.Huang(dat=Mood.s3,gp="ID")$pw.cov
# one-level model on the pooled within-cluster matrix
m.withinPool_Mood <- cfa('NV =~ v1 + v2 + v3
TA =~ t1 + t2 + t3
FA =~ f1 + f2 + f3',sample.cov=poolCov,sample.nobs=nrow(Mood.s3),std.lv=TRUE)
loadings(m.withinPool_Mood,st="st.all") # standardized loadings
round(fit.ind(m.withinPool_Mood,type="monolevel",from_summary=TRUE),3) # fit indices
## npar chisq df pvalue rmsea cfi srmr
## 21.000 242.836 24.000 0.000 0.068 0.966 0.032
Comments:
standardized loadings estimated from the pooled within-cluster covariance matrix are significant and higher than .59, similar to subsample
s1
the model shows satisfactory fit indices (apart from RMSEA), suggesting that the within-cluster structure holds
lv2 STRUCTURE
Here, we conduct the preliminary steps suggested by Hox (2010, chapter 14) for MCFA full maximum likelihood estimation. Similar to what recommended by Muthén (1994), the idea is to specify a set of models to separately assess the factor structure on Level 1 (within) and 2 (between), and to compare those models with the hypothesized multilevel model.
The second step is to evaluate the between-cluster factor structure by specifying a set of benchmark models for the group level (Hox, 2010). This is done to test whether there is a between-cluster structure to be modeled.
TD
1. Null model
First, we specify a null model with no specification on level 2. If this model holds, it will mean that there is no cluster-level structure at all (all covariances in the between-clusters matrix are the result of individual sampling variation), and we can continue using a conventional (mono-level) analysis.
# one-factor within model + null between model
m.null_TD <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
d1 ~~ 0*d1
d2 ~~ 0*d2
d3 ~~ 0*d3
d4 ~~ 0*d4',
data=TD.s3, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003 S007 S045 S049
## S086 S123 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S020 S049 S055 S058
## S064 S099 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d3" has no variance within some clusters. The
## cluster ids with zero within variance are: S060 S099
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d4" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S049 S058 S086
## S099 S174
2. Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2 (Hox, 2010).
# one-factor within model + independence between model
m.ind_TD <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
d1 ~~ d1
d2 ~~ d2
d3 ~~ d3
d4 ~~ d4',
data=TD.s3, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003 S007 S045 S049
## S086 S123 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S020 S049 S055 S058
## S064 S099 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d3" has no variance within some clusters. The
## cluster ids with zero within variance are: S060 S099
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d4" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S049 S058 S086
## S099 S174
3. Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation are just ‘spurious’ (Stapleton et al., 2016).
# one-factor within model + saturated between part
m.sat_TD <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
d1 ~~ d1 + d2 + d3
d2 ~~ d2 + d3 + d4
d3 ~~ d3 + d4
d4 ~~ d4',
data = TD.s3, cluster = "ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003 S007 S045 S049
## S086 S123 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S020 S049 S055 S058
## S064 S099 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d3" has no variance within some clusters. The
## cluster ids with zero within variance are: S060 S099
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d4" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S049 S058 S086
## S099 S174
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence) should be rejected.
round(fit.ind(model=c(m.null_TD,m.ind_TD,m.sat_TD),
models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated")),3)
Comments:
all benchmark models converged with no problems
none of the model showed satisfactory goodness of fit, and are all rejected
some participants showed no intra-individual variability for some items
TC
1. Null model
First, we specify a null model with no specification on level 2. If this model holds, it will mean that there is no cluster-level structure at all (all covariances in the between-clusters matrix are the result of individual sampling variation), and we can continue using a conventional (mono-level) analysis.
# one-factor within model + null between model
m.null_TC <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
c1 ~~ 0*c1
c2 ~~ 0*c2
c3 ~~ 0*c3',
data=TC.s3, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S076 S082
## S121 S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S092 S097
## S174
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S058 S097
2. Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2.
# one-factor within model + independence between model
m.ind_TC <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
c1 ~~ c1
c2 ~~ c2
c3 ~~ c3',
data=TC.s3, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S076 S082
## S121 S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S092 S097
## S174
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S058 S097
3. Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation are just ‘spurious’ (Stapleton et al., 2016).
# one-factor within model + null between model
m.sat_TC <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
c1 ~~ c1 + c2 + c3
c2 ~~ c2 + c3
c3 ~~ c3',
data=TC.s3, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S076 S082
## S121 S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S092 S097
## S174
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S058 S097
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence) should be rejected. Note that here the Lv2-saturated model is saturated, and thus we cannot evaluate the model’s fit using chi-squared-derived fit indices.
fit.ind(model=c(m.null_TC,m.ind_TC,m.sat_TC),models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated"))
Comments:
all benchmark models converged with no problems
lv2 Null and Independence models show unsatisfactory goodness of fit, and are rejected
lv2 Saturated model is saturated, and we cannot use chi-squared-derived fit indices
Many participants showed no intra-individual variability for some items
MOOD
1. Null model
First, we specify a null model with no specification on level 2. If this model holds, it will mean that “there is no cluster-level structure at all” (“all covariances in the between-clusters matrix are the result of individual sampling variation”), and we can continue using a conventional (mono-level) analysis.
# one-factor within model + null between model
m.null_Mood <- cfa('level: 1
NV_W =~ v1 + v2 + v3
TA_W =~ t1 + t2 + t3
FA_W =~ f1 + f2 + f3
level: 2
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3', data=Mood.s3,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
2. Independence model
Second, we specify an independence model on level 2, by specifying only variances but not covariances on this level. If this model holds, it will mean that “there is cluster-level structure, but not substantively interesting structural model”, and we can focus our analyses on the pooled within-cluster matrix. Otherwise, it will mean that some kind of structure exists on Level 2.
# one-factor within model + independence between model
m.ind_Mood <- cfa('level: 1
NV_W =~ v1 + v2 + v3
TA_W =~ t1 + t2 + t3
FA_W =~ f1 + f2 + f3
level: 2
v1 ~~ v1
v2 ~~ v2
v3 ~~ v3
t1 ~~ t1
t2 ~~ t2
t3 ~~ t3
f1 ~~ f1
f2 ~~ f2
f3 ~~ f3', data=Mood.s3,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
3. Saturated model
Third, we specify a saturated model for the between-clusters part that fits a full covariance matrix to the family-level observations. This allows us “to examine the best possible fit given the individual-level [here, within-individual] model” (Hox, 2010). If this model holds, the factors at the within-level in this model correspond to ‘within-only’ constructs, meaning that the construct only ‘exists’ at the within level, whereas lv2 variation is just ‘spurious’ (Stapleton et al., 2016).
# one-factor within model + saturated between part
m.sat_Mood <- cfa('level: 1
HT_W =~ v1 + v2 + v3
BV_W =~ t1 + t2 + t3
FA_W =~ f1 + f2 + f3
level: 2
v1 ~~ v1 + v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v2 ~~ v2 + v3 + t1 + t2 + t3 + f1 + f2 + f3
v3 ~~ v3 + t1 + t2 + t3 + f1 + f2 + f3
t1 ~~ t1 + t2 + t3 + f1 + f2 + f3
t2 ~~ t2 + t3 + f1 + f2 + f3
t3 ~~ t3 + f1 + f2 + f3
f1 ~~ f1 + f2 + f3
f2 ~~ f2 + f3
f3 ~~ f3', data=Mood.s3,cluster='ID',std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
Finally, we evaluate the fit of these benchmark models. If the structure on level 2 holds, all models (and especially the Null and the Independence model) should be rejected.
round(fit.ind(model=c(m.null_Mood,m.ind_Mood,m.sat_Mood),models.names=c("Lv2 Null","Lv2 Independence","Lv2 Saturated")),3)
Comments:
all benchmark models converged with no problems
none of the model showed satisfactory goodness of fit, and are all rejected, although the saturated model showed an acceptable fit
many participants showed no intra-individual variability for some items
MULTILEVEL MODELS
Here, we specify the hypothesized multilevel CFA
models and, only for Mood items, alternative
models with a different number of dimensions. Note that each
model is parametrized by standardizing the latent factors
(std.lv = TRUE
) to avoid fixing to 1 the first indicator of
each dimension.
TD
# hypothesized multilevel model
td <- cfa('level: 1
TD_W =~ d1 + d2 + d3 + d4
level: 2
TD_B =~ d1 + d2 + d3 + d4',
data=TD.s3, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003 S007 S045 S049
## S086 S123 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S020 S049 S055 S058
## S064 S099 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d3" has no variance within some clusters. The
## cluster ids with zero within variance are: S060 S099
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d4" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S049 S058 S086
## S099 S174
Comments:
- the model converged with no problems
TC
# hypothesized multilevel model
tc <- cfa('level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3',
data=TC.s3, cluster="ID", std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S076 S082
## S121 S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S092 S097
## S174
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S058 S097
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Comments:
the model converged with no problems
anegative variance (Heywood case) is estimated for item
c3
on level 2, as in subsamples1
(seeSUBSAMPLE s1 - IMPROPER SOLUTIONS
)
parameterestimates(tc)[parameterestimates(tc)$op=="~~"¶meterestimates(tc)$level==2,
c("lhs","level","est","se","ci.lower","ci.upper")][1:3,]
As done in subsample s1
(IMPROPER SOLUTIONS
), we try to solve the problems by
fixing residual variance as .15 x rho2_between (Joreskog & Sobrom 1996).
# tc
tc.fix <- 'level: 1
TC_W =~ c1 + c2 + c3
level: 2
TC_B =~ c1 + c2 + c3
c3 ~~ rho2 * c3'
fit <- lmer(c3 ~ 1 + (1|ID),data=TC.s3) # null LMER model
c3varlv2 <- as.data.frame(VarCorr(fit))[1,4] # between-subjects variance of item t2
tc.fix <- cfa(gsub("rho2",c3varlv2*.15,tc.fix),data=TC.s3,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S076 S082
## S121 S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S092 S097
## S174
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S058 S097
MOOD
For Mood, we specify the hypothesized multilevel configural
model m3x3
(with three factors on level 1 and
three factors on level 2), and a set of alternative models with less
dimensions (m3x2
, m2x2
,
m2x3
).
1) 3x3 model
Here, we specify the hypothesized model for Mood, using all the 9 items to measure 3 latent variables on both levels.
# hypothesized multilevel model
m3x3 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s3, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
2) Alternative models
Here, alternative models are specified with less latent factors on level 1 and/or level 2. Specifically, models with two latent variables (Negative Tone and Fatigue) are justified by the strong correlation between Negative Valence and Tense Arousal items, especially on level 2.
# 3 latent on lv1, 2 latent on lv2
m3x2 <- cfa('level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s3, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
# 2 latent factors on both levels
m2x2 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s3, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
# 2 latent on lv1, 3 latent on lv2
m2x3 <- cfa('level: 1
NegTone_w =~ v1 + v2 + v3 + t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3',
data=Mood.s3, cluster='ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Comments:
all 4 models reached the convergence
Negative variances (Heywood cases) are estimated by two models (
m3x3
andm2x3
) for itemt3
on level 2, and potentially generalized to all Mood models, as in subsamples1
(seeSUBSAMPLE s1 - IMPROPER SOLUTIONS
)
# m3x3
parameterestimates(m3x3)[parameterestimates(m3x3)$op=="~~" & parameterestimates(m3x3)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m3x2
parameterestimates(m3x2)[parameterestimates(m3x2)$op=="~~" & parameterestimates(m3x2)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m2x2
parameterestimates(m2x2)[parameterestimates(m2x2)$op=="~~" & parameterestimates(m2x2)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
# m2x3
parameterestimates(m2x3)[parameterestimates(m2x3)$op=="~~" & parameterestimates(m2x3)$ci.lower<0,
c("lhs","level","est","se","ci.lower","ci.upper")]
As done in subsample s1
IMPROPER SOLUTIONS
,
we try to solve the problems by fixing residual variance as .15
x rho2_between (Joreskog & Sobrom
1996).
# m3x3
m3x3.fix <- 'level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NV_b =~ v1 + v2 + v3
TA_b =~ t1 + t2 + t3
FA_b =~ f1 + f2 + f3
t3 ~~ rho2 * t2'
fit <- lmer(t3 ~ 1 + (1|ID), data=Mood.s3) # null LMER model
t3varlv2 <- as.data.frame(VarCorr(fit))[1,4] # between-subjects variance of item t2
m3x3.fix <- cfa(gsub("rho2",t3varlv2*.15,m3x3.fix),data=Mood.s3,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
# m2x3
m2x3.fix <- 'level: 1
NV_w =~ v1 + v2 + v3
TA_w =~ t1 + t2 + t3
FA_w =~ f1 + f2 + f3
level: 2
NegTone_b =~ v1 + v2 + v3 + t1 + t2 + t3
FA_b =~ f1 + f2 + f3
t3 ~~ rho2 * t2'
m2x3.fix <- cfa(gsub("rho2",t3varlv2*.15,m2x3.fix),data=Mood.s3,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
CROSS-lv INVARIANCE
Here, we specify the models assuming cross-level invariance (or cross-level isomorphism). Accordingly to Jag & Jorgensen (2017), different levels of factor invariance can be tested: (1) configural invariance, when the same factor structure holds but factor loadings differ across, (2) weak factorial invariance, when the factor loadings are equal across clusters, and (3) strong factorial invariance, when the values of factor loadings and intercepts are equal across clusters, and the residual variance on level 2 is zero.
Since each of our constructs (TD, TC and Mood) is assumed as a
configural cluster construct (see Stapleton et al 2016), our conceptualization should be
supported by a better fit for the models assuming (at
least weak) cross-level invariance. Note that each model is parametrized
by standardizing the latent factors (std.lv = TRUE
) to
avoid fixing to 1 the first indicator of each dimension.
TD
# weak invariance model
td.weakInv <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4',
data = TD.s3, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003 S007 S045 S049
## S086 S123 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S020 S049 S055 S058
## S064 S099 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d3" has no variance within some clusters. The
## cluster ids with zero within variance are: S060 S099
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d4" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S049 S058 S086
## S099 S174
# strong invariance model
td.strInv <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4
d1 ~~ 0*d1
d2 ~~ 0*d2
d3 ~~ 0*d3
d4 ~~ 0*d4',
data = TD.s3, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d1" has no variance within some clusters. The
## cluster ids with zero within variance are: S003 S007 S045 S049
## S086 S123 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d2" has no variance within some clusters. The
## cluster ids with zero within variance are: S020 S049 S055 S058
## S064 S099 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d3" has no variance within some clusters. The
## cluster ids with zero within variance are: S060 S099
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "d4" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S049 S058 S086
## S099 S174
Comments:
- both models converged with no problems
TC
# weak invariance model
tc.weakInv <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3',
data = TC.s3, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S076 S082
## S121 S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S092 S097
## S174
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S058 S097
# strong invariance model
tc.strInv <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3
c1 ~~ 0*c1
c2 ~~ 0*c2
c3 ~~ 0*c3',
data = TC.s3, cluster = 'ID', std.lv=TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c1" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S062 S076 S082
## S121 S125
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c2" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S035 S092 S097
## S174
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "c3" has no variance within some clusters. The
## cluster ids with zero within variance are: S008 S058 S097
Comments:
- both models converged with no problems
MOOD
# weak invariance model
m3x3.weakInv <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
data = Mood.s3, cluster = 'ID', std.lv = TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
# strong invariance
m3x3.strInv <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
v1 ~~ 0*v1
v2 ~~ 0*v2
v3 ~~ 0*v3
t1 ~~ 0*t1
t2 ~~ 0*t2
t3 ~~ 0*t3
f1 ~~ 0*f1
f2 ~~ 0*f2
f3 ~~ 0*f3',
data = Mood.s3, cluster = 'ID', std.lv = TRUE)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
Comments:
both models converged with no problems
a negative variance (Heywood case) is estimated again for item
t3
on level 2 by the weak configural model
As done in subsample s1
(IMPROPER SOLUTIONS
), we try to solve the problems by
fixing residual variance as .15 x rho2_between (Joreskog & Sobrom 1996).
# m3x3.weakInv
m3x3.weakInv.fix <- 'level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
t3 ~~ rho2 * t2'
m3x3.weakInv.fix <- cfa(gsub("rho2",t3varlv2*.15,m3x3.weakInv.fix),data=Mood.s3,cluster='ID',std.lv=TRUE) # fixing rho2
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v1" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S047 S049 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v2" has no variance within some clusters. The
## cluster ids with zero within variance are: S056 S089 S040
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "v3" has no variance within some clusters. The
## cluster ids with zero within variance are: S049 S056 S065 S089
## S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t2" has no variance within some clusters. The
## cluster ids with zero within variance are: S045 S123
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "t3" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S049 S058 S097
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f1" has no variance within some clusters. The
## cluster ids with zero within variance are: S031 S047 S089
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f2" has no variance within some clusters. The
## cluster ids with zero within variance are: S015 S056
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
## Level-1 variable "f3" has no variance within some clusters. The
## cluster ids with zero within variance are: S055 S123
MODEL COMPARISON & ST. SOLUTION
Here, we compare the specified multilevel models by considering both the fit indices and the Akaike Information Criterion (AIC). Different subsamples are considered depending on the influential analysis and the presence of improper solutions.
TD
GOF <- cbind(round(fit.ind(model=c(td,td.weakInv,td.strInv),
models.names=c("td","td.weakInv","td.strInv")),3),
AIC=AIC(td,td.weakInv,td.strInv)[,2],
BIC=BIC(td,td.weakInv,td.strInv)[,2],
AICw=round(Weights(AIC(td,td.weakInv,td.strInv)),5))
(gof <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
Comments:
the hypothesized configural invariance model
td
shows quite satisfactory fit indices. CFI, SRMR within and between show the best values for this modelthe weak invariance model
td.weakInv
shows satisfactory fit indices and the best evidence as indicated by lowest BIC and AIC (highest AICw), and the lowest RMSEAthe strong invariance model
td.strInv
is rejected due to unsatisfactory fit indices
Finally, we evaluate the standardized loadings, and
the residual variances of the selected model,
considering that fitted on Mood_noInfl3
(excluding 3
participants).
loadings(td.weakInv) # standardized loadings
standardizedsolution(td.weakInv)[standardizedsolution(td.weakInv)$op=="~~",] # correlations and res. variances
Comments:
all standardized loadings are significant and higher than .55 (the lowest loading is estimated for item
d3
on level 1)as expected, standardized loadings on level 2 are higher than those on level 1, with loadings estimated for d1 and
d4
approaching 1
Conclusions:
The results from subsample s3
replicate
those from subsample s1
for the Task Demand Scale.
TC
Here, we compare the specified models considering both the fit
indices and the Akaike Information Criterion (AIC). A second model
comparison is performed considering models with fixed residual variances
in those items showing improper solutions (see
SUBSAMPLE s1 - IMPROPER SOLUTION - FIXING VARIANCES
).
# all participants (N = 139) with negative variance for item c3 in model tc
GOF <- cbind(round(fit.ind(model=c(tc,tc.weakInv,tc.strInv),
models.names=c("tc","tc.weakInv","tc.strInv")),3),
AIC=AIC(tc,tc.weakInv,tc.strInv)[,2],
BIC=BIC(tc,tc.weakInv,tc.strInv)[,2],
AICw=round(Weights(AIC(tc,tc.weakInv,tc.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
# all participants (N = 139) fixing residual variance to 15% of the total variance
GOF <- cbind(round(fit.ind(model=c(tc.fix,tc.weakInv,tc.strInv),
models.names=c("tc.fix","tc.weakInv","tc.strInv")),3),
AIC=AIC(tc.fix,tc.weakInv,tc.strInv)[,2],
BIC=BIC(tc.fix,tc.weakInv,tc.strInv)[,2],
AICw=round(Weights(AIC(tc.fix,tc.weakInv,tc.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
Comments:
when all parameters are freely estimated, the hypothesized configural invariance model
tc
is saturated (i.e., 3 indicators for one latent), and we cannot rely on chi squared-derived fit indices. When considering all participants, this model shows the highest AICw. Whenc3
variance is fixated, this model shows unsatisfactory RMSEAthe weak invariance model
tc.weakInv
shows satisfactory fit indices and the best evidence as indicated by lowest BICthe strong invariance model
tc.strInv
is rejected due to unsatisfactory fit indices across the three model comparisons
Finally, we evaluate the standardized loadings, and the residual variances of the selected model, considering that fitted on the whole sample.
loadings(tc.weakInv) # standardized loadings
standardizedsolution(tc.weakInv)[standardizedsolution(tc.weakInv)$op=="~~",] # correlations and res. variances
Comments:
- all standardized loadings are significant and higher than
.65 (the lowest loading is estimated for item
c1
on level 1). As expected, standardized loadings on level 2 are higher than those on level 1
Conclusions:
The results obtained from subsample s3
replicate those obtained from subsample s1
for the Task Control Scale.
MOOD
Here, we compare the specified models considering both the fit
indices and the Akaike Information Criterion (AIC). A second model
comparison is performed considering models with fixed residual variances
in those items showing improper solutions (see
SUBSAMPLE S1 - IMPROPER SOLUTION - FIXING VARIANCES
).
# all participants (N = 139), with negative variances in m3x3, m2x2 and m3x3.weakInv
# (see MODEL SPECIFICATION - MULTILEVEL MODELS)
GOF <- cbind(round(fit.ind(model=c(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv),
models.names=c("m3x3","m3x2","m2x2","m2x3","m3x3.weakInv","m3x3.strInv")),3),
AIC=AIC(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv)[,2],
BIC=BIC(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv)[,2],
AICw=round(Weights(AIC(m3x3,m3x2,m2x2,m2x3,m3x3.weakInv,m3x3.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
# all participants (N = 139), fixing residual variance to 15% of the total variance
# (see IMPROPER SOLUTION - fixing RESIDUAL VARIANCE)
GOF <- cbind(round(fit.ind(model=c(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv),
models.names=c("m3x3.fix","m3x2","m2x2","m2x3.fix","m3x3.weakInv.fix","m3x3.strInv")),3),
AIC=AIC(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv)[,2],
BIC=BIC(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv)[,2],
AICw=round(Weights(AIC(m3x3.fix,m3x2,m2x2,m2x3.fix,m3x3.weakInv.fix,m3x3.strInv)),5))
(GOF <- GOF[order(GOF$rmsea),c(1,5:8,10:11,2:4)])
Comments:
- the model comparison shows similar results to those showed from
subsample
s1
selecting the configural and the weak invariance models as the best models
Finally, we evaluate the standardized loadings, the correlations between latent factors and the residual variances of the selected model, considering models with fixed residual variances in those items showing improper solutions.
loadings(m3x3.weakInv.fix) # standardized loadings
standardizedsolution(m3x3.weakInv.fix)[standardizedsolution(m3x3.weakInv.fix)$op=="~~",] # corr. and res. variances
Comments:
all standardized loadings are significant and higher than .53 (the lowest loading is estimated for item
v2
on level 1)as expected, standardized loadings on level 2 are higher than those on level 1
estimated correlations between latent factors are within the .46-.97 range, with the exception of the correlation estimated between Tense Arousal and Fatigue on level 1, which is .19 (.13, .26)
the highest correlation is estimated between Negative Valence and Tense Arousal on level 2, whereas the lowest correlation is estimated between Tense Arousal and Fatigue on level 2.
Conclusions:
The results obtained from subsample s3
replicate those obtained from subsample
s1
, although an higher estimate of the correlation between
Negative Valence and Tense Arousal.
4.4. Reliability analysis
Here, we compute reliability coefficients for each ESM scale, both based on the MCFA results (following Geldhof, 2014) and based on the generalizability theory (following Cranford et al., 2006).
PIPELINE & FUNCTIONS
The following functions and packages are used:
library(lavaan); library(lme4); library(reshape2); library(ggplot2)
MCFArel()
#' @title Computing composite reliability index from a MCFA model
#' @param fit = multilevel CFA model.
#' @param level = level of interest (either 1 or 2)
#' @param items = numeric string indicating the items of interest (e.g., 1:3 for items 1, 2 and 3)
#' @param item.labels = character string indicating the names of the items of interest
MCFArel <- function(fit,level,items,item.labels){ require(lavaan)
if(level==1){
sl <- standardizedsolution(fit)[1:(nrow(standardizedSolution(fit))/2),] # pars within
} else if(level==2){
sl <- standardizedsolution(fit)[(nrow(standardizedSolution(fit))/2):nrow(standardizedsolution(fit)),] # pars between
} else { stop("Error: level can be either 1 or 2") }
sl <- sl$est.std[sl$op == "=~"][items] # standardized loadings of the selected items
names(sl) <- item.labels # item names
re <- 1 - sl^2 # residual variances of items
# composite reliability index
omega <- sum(sl)^2 / (sum(sl)^2 + sum(re))
return(round(omega,2))}
Computes level-specific indices of composite reliability from a MCFA model, i.e., McDonald omega using level-1 and level-2 standardized factor loadings, respectively (see Geldhof et al., 2014).
GTHEORYrel
#' @title Computing variance components and reliability indices of a given scale (following Bolger & Lorenceau, 2013)
#' @param data = dataset
#' @param items = numeric string indicating the items of interest (e.g., 1:3 for items 1, 2 and 3)
#' @param item.labels = character string indicating the names of the items of interest
GTHEORYrel <- function(data,items,latent.lab,what=c("varComp","rel")){ require(lme4)
# creating variable TIME (n. of surveys between 1 and 18)
data$within.day <- as.numeric(as.character(data$within.day)) + 1
data$TIME <- NA
for(i in 1:nrow(data)){
if(data[i,"day"]==1){ data[i,"TIME"] <- data[i,"within.day"]
}else if(data[i,"day"]==2){ data[i,"TIME"] <- data[i,"within.day"]+6
}else if(data[i,"day"]==3){ data[i,"TIME"] <- data[i,"within.day"]+12 }}
# preparing long dataset with one row per item
psymetr <- stack(data[,items])
psymetr$ID <- data$ID
psymetr$time <- as.factor(data$TIME)
psymetr <- psymetr[,c(3,4,2,1)]
colnames(psymetr) <- c("person","time","item","y")
psymetr <- psymetr[order(psymetr$person,psymetr$time,psymetr$item),]
psymetr$y <- as.numeric(as.character(psymetr$y))
# random-only GLMM specificiation
mod1 <- lmer(y ~ 1 + (1|person) + (1|time) + (1|item) +
(1|person:time) + (1|person:item) + (1|time:item),data=psymetr)
# variance decomposition
SIGMAp <- VarCorr(mod1)[["person"]][1,1]
SIGMAt <- VarCorr(mod1)[["time"]][1,1]
SIGMAi <- VarCorr(mod1)[["item"]][1,1]
SIGMAtp <- VarCorr(mod1)[["person:time"]][1,1]
SIGMApi <- VarCorr(mod1)[["person:item"]][1,1]
SIGMAti <- VarCorr(mod1)[["time:item"]][1,1]
SIGMAres <- sigma(mod1)^2
# printing variance components
vars <- data.frame(Component=c("SIGMAp","SIGMAt","SIGMAi","SIGMAtp","SIGMApi","SIGMAti","SIGMAres","Total"),
VAR=c(SIGMAp,SIGMAt,SIGMAi,SIGMAtp,SIGMApi,SIGMAti,SIGMAres,
sum(SIGMAp,SIGMAt,SIGMAi,SIGMAtp,SIGMApi,SIGMAti,SIGMAres)))
vars$VAR <- round(vars$VAR,2)
vars$perc <- round(100*vars$VAR/vars[nrow(vars),2],2)
colnames(vars)[2:3] <- c(latent.lab,paste(latent.lab,"%"))
if(what=="varComp"){ return(vars)
}else if(what=="rel"){ # reliability coeff. based on Cranfort et al. (2006)
rel <- data.frame(measure=latent.lab,
# R1F = (varPERSON + varPERSON*ITEM/n.item) /
# (varPERSON + varPERSON*ITEM/n.item + varERROR/n.item)
R1F = round((vars[1,2] + vars[5,2]/length(items)) /
(vars[1,2] + vars[5,2]/length(items) + vars[7,2]/length(items)),2),
# RkF = (varPERSON + varPERSON*ITEM/n.item) /
# (varPERSON + varPERSON*ITEM/n.item + varERROR/(n.item*n.occasions))
RkF = round((vars[1,2] + vars[5,2]/length(items))/ (vars[1,2] + vars[5,2]/length(items) +
vars[7,2]/(length(items)*max(data$TIME))),2),
# Rc = varPERSON*TIME / (varPERSON*TIME + varERROR/n.items)
Rc = round(vars[4,2] / (vars[4,2] + vars[7,2]/length(items)),2))
return(rel)
}else { stop("Error: what argument can be either 'varComp' or 'rel'") }}
Computes indices of reliability based on generalizability theory, by fitting a random-intercept-only model to decompose the variance in item scores into participants, items, and time variability (see Bolger & Laurenceau, 2013; Cranford et al., 2006).
SUBSAMPLE s1
COMPOSITE RELIABILITY
Here, we compute indices of level-specific composite reliability for each ESM scale and MDMQ subscale, as recommended by Geldhof et al. (2014).
Selected models
Here, we re-specify the selected models specified for subsample s1
(see section 4.3 MODEL SPECIFICATION
).
# TD
td.weakInv <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4',
data = TD.s1, cluster = 'ID', std.lv=TRUE)
# TC
tc.weakInv <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3',
data = TC.s1, cluster = 'ID',std.lv=TRUE)
# Mood
m3x3.weakInv_noInfl5 <- cfa('level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3',
data=Mood_noInfl5.s1,cluster='ID',std.lv=TRUE)
TD
Level-specific composite reliability indices of TD are computed from the weak invariance model fitted on the whole dataset.
(omega <- data.frame(measure="Task Demand",
omega_w=MCFArel(fit=td.weakInv,level=1,items=1:4,item.labels=names(TD.s1[,2:5])),
omega_b=MCFArel(fit=td.weakInv,level=2,items=1:4,item.labels=names(TD.s1[,2:5]))))
Comments:
- the reliability estimated on both levels are quite satisfactory
TC
Level-specific composite reliability indices of TC are computed from the weak invariance model fitted on the whole dataset.
(OMEGA <- data.frame(measure="Task Control",
omega_w=MCFArel(fit=tc.weakInv,level=1,items=1:3,item.labels=names(TC.s1[,2:4])),
omega_b=MCFArel(fit=tc.weakInv,level=2,items=1:3,item.labels=names(TC.s1[,2:4]))))
omega <- rbind(omega,OMEGA)
Comments:
- the reliability estimated on both levels are quite satisfactory
Mood
Level-specific composite reliability indices of TC are computed from the weak invariance model fitted on the subsample ‘noInfl5’ (i.e., excluding five participants associated with Heywood cases).
(OMEGA <- rbind(data.frame(measure="Negative Valence",
omega_w=MCFArel(m3x3.weakInv_noInfl5,level=1,items=1:3,item.labels=names(Mood.s1[,2:4])),
omega_b=MCFArel(m3x3.weakInv_noInfl5,level=2,items=1:3,item.labels=names(Mood.s1[,2:4]))),
data.frame(measure="Tense Arousal",
omega_w=MCFArel(m3x3.weakInv_noInfl5,level=1,items=4:6,item.labels=names(Mood.s1[,5:7])),
omega_b=MCFArel(m3x3.weakInv_noInfl5,level=2,items=4:6,item.labels=names(Mood.s1[,5:7]))),
data.frame(measure="Fatigue",
omega_w=MCFArel(m3x3.weakInv_noInfl5,level=1,items=7:9,item.labels=names(Mood.s1[,8:10])),
omega_b=MCFArel(m3x3.weakInv_noInfl5,level=2,items=7:9,item.labels=names(Mood.s1[,8:10]))),
data.frame(measure="TOT",
omega_w=MCFArel(m3x3.weakInv_noInfl5,level=1,items=1:9,item.labels=names(Mood.s1[,2:10])),
omega_b=MCFArel(m3x3.weakInv_noInfl5,level=2,items=1:9,item.labels=names(Mood.s1[,2:10])))))
omega <- rbind(omega,OMEGA)
Comments:
- the reliability estimated on both levels is quite satisfactory
G-THEORY
Here, we compute reliability indices based on the generalizability theory (see Bolger & Laurenceau, 2013).
TD
Here, we specified a random-effects model from which we derived the variance components of item scores:
(varComp<- GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="d1"):which(colnames(s1.w)=="d4"),
latent.lab="TD",what="varComp"))
ggplot(melt(varComp[,c(1,3)]),aes(x=Component,y=variable,fill=value))+geom_tile()+
geom_text(aes(label=round(value,2)),color="black",size=3)+
scale_fill_gradient2(low="white",high="#f03b20",limit = c(0,100),space="Lab",guide="legend")+
theme(text=element_text(face="bold",size=8))
## Using Component as id variables
Comments:
assuming that item scores variate only between persons, occasions and items, the largest variance components are those reflecting time x person variability
SIGMAtp
, inter-individual variabilitySIGMAp
and residual variabilitySIGMAres
a substantial part of variance is also associated with the person x item variability
SIGMApi
Then, we use the variance components to estimate reliability coefficients.
(relInd <- GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="d1"):which(colnames(s1.w)=="d4"),
latent.lab="Task Demand",what="rel"))
Comments:
- reliability coefficients range between .79 and .99, indicating a good to excellent ability to discriminate Mood dimensions between individuals considering one fixed occasion and a relatively large set (18) occasions (i.e., when it is possible to aggregate item scores over 18 or more occasions, the expected ranking of individuals should be very stable), and to measure systematic changes in individuals over time
TC
Here, we specified a random-effects model from which we derived the variance components of item scores:
(varComp<- GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="c1"):which(colnames(s1.w)=="c3"),
latent.lab="TC",what="varComp"))
ggplot(melt(varComp[,c(1,3)]),aes(x=Component,y=variable,fill=value))+geom_tile()+
geom_text(aes(label=round(value,2)),color="black",size=3)+
scale_fill_gradient2(low="white",high="#f03b20",limit = c(0,100),space="Lab",guide="legend")+
theme(text=element_text(face="bold",size=8))
## Using Component as id variables
Comments:
assuming that item scores variate only between persons, occasions and items, the largest variance components are those reflecting time x person variability
SIGMAtp
, inter-individual variabilitySIGMAp
and residual variabilitySIGMAres
a substantial part of the variance is also associated with inter-item
SIGMAi
and item x person reliabilitySIGMApi
, suggesting less homogeneous interpretation of TC compared to TD items
Then, we use the variance components to estimate reliability coefficients.
(RELIND <- GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="c1"):which(colnames(s1.w)=="c3"),
latent.lab="Task Control",what="rel"))
relInd <- rbind(relInd,RELIND)
Comments:
- reliability coefficients range between .74 and .98, indicating a good to excellent ability to discriminate Mood dimensions between individuals considering one fixed occasion and a relatively large set (18) occasions (i.e., when it is possible to aggregate item scores over 18 or more occasions, the expected ranking of individuals should be very stable), and to measure systematic changes in individuals over time
MOOD
For each MDMQ subscale, we specified a random-effects model from which we derived the variance components of item scores:
(varComp<- cbind(GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="v1"):which(colnames(s1.w)=="v3"),
latent.lab="NV",what="varComp"),
GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="t1"):which(colnames(s1.w)=="t3"),
latent.lab="TA",what="varComp")[,2:3],
GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="f1"):which(colnames(s1.w)=="f3"),
latent.lab="F",what="varComp")[,2:3],
GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="v1"):which(colnames(s1.w)=="f3"),
latent.lab="TOT",what="varComp")[,2:3]))
ggplot(melt(varComp[,c(1,3,5,7,9)]),aes(x=Component,y=variable,fill=value))+geom_tile()+
geom_text(aes(label=round(value,2)),color="black",size=3)+
scale_fill_gradient2(low="white",high="#f03b20",limit = c(0,100),space="Lab",guide="legend")+
theme(text=element_text(face="bold",size=8))
## Using Component as id variables
Comments:
assuming that item scores variate only between persons, occasions and items, the largest variance components in all dimensions are those reflecting inter-individual variability
SIGMAp
, time x person variabilitySIGMAtp
and residual variabilitySIGMAres
compared to what found for the TDS and TCS, residual variability
SIGMAres
is higher for Mood subscalesinter-individual variability
SIGMAp
(ability to reliably discriminate different individuals) is higher for Tense Arousal and lower for Fatigue, and when considering the total scoreinter-item variability
SIGMAi
is lower than 5% for all subscalesintra-individual variability
SIGMAt
is relevant (higher than 5%) only for Fatigue, suggesting higher (although small) influence of time on this variable compared to the others, when mediating between persons and between itemstime x person variability
SIGMAtp
is relatively high (higher than 20%) in all scales, and particularly in Fatigue and TA items, suggesting a good ability to discriminate differences between individuals in temporal trajectoriesperson x item variability
SIGMApi
is lower than 5% for all subscalestime x item variability
SIGMAti
is zero in all subscales
(RELIND<- rbind(GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="v1"):which(colnames(s1.w)=="v3"),
latent.lab="Negative Valence",what="rel"),
GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="t1"):which(colnames(s1.w)=="t3"),
latent.lab="Tense Arousal",what="rel"),
GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="f1"):which(colnames(s1.w)=="f3"),
latent.lab="Fatigue",what="rel"),
GTHEORYrel(data=s1.w,items=which(colnames(s1.w)=="v1"):which(colnames(s1.w)=="f3"),
latent.lab="TOT",what="rel")))
relInd <- rbind(relInd,RELIND)
write.csv(cbind(omega,relInd[,2:4]),"relInd.csv",row.names=FALSE) # saving results for draft
Comments:
R1F
ranges between .65 and .84, indicating a good ability to discriminate Mood dimensions between individuals considering one fixed occasion (like Cronbach alpha)RkF
ranges between .97 and .98, indicating an excellent ability to discriminate Mood dimensions between individuals considering a relatively large set (18) occasions (i.e., when it is possible to aggregate item scores over 18 or more occasions, the expected ranking of individuals should be very stable)Rc
ranges between .52 and .80, indicating a modest-to-good ability to measure systematic changes in individuals over time
SUBSAMPLE s2
COMPOSITE RELIABILITY
Here, we compute indices of level-specific composite reliability for each ESM scale and MDMQ subscale, as recommended by Geldhof et al. (2014).
Selected models
Here, we re-specify the selected models specified for subsample
s2
(see section 4.3 -
MODEL SPECIFICATION
).
# TD
td.weakInv <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4',
data = TD.s2, cluster = 'ID', std.lv=TRUE)
# TC
tc.weakInv <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3',
data = TC.s2, cluster = 'ID',std.lv=TRUE)
# Mood
m3x3.weakInv.fix <- 'level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
t3 ~~ rho2 * t2'
m3x3.weakInv.fix <- cfa(gsub("rho2",t3varlv2*.15,m3x3.weakInv.fix),data=Mood.s2,cluster='ID',std.lv=TRUE) # fixing rho2
TD
Level-specific composite reliability indices of TD are computed from the weak invariance model fitted on the whole dataset.
(omega <- data.frame(measure="Task Demand",
omega_w=MCFArel(fit=td.weakInv,level=1,items=1:4,item.labels=names(TD.s2[,2:5])),
omega_b=MCFArel(fit=td.weakInv,level=2,items=1:4,item.labels=names(TD.s2[,2:5]))))
Comments:
- the reliability estimated on both levels are quite satisfactory
TC
Level-specific composite reliability indices of TC are computed from the weak invariance model fitted on the whole dataset.
(OMEGA <- data.frame(measure="Task Control",
omega_w=MCFArel(fit=tc.weakInv,level=1,items=1:3,item.labels=names(TC.s2[,2:4])),
omega_b=MCFArel(fit=tc.weakInv,level=2,items=1:3,item.labels=names(TC.s2[,2:4]))))
Comments:
- the reliability estimated on both levels are quite satisfactory
Mood
Level-specific composite reliability indices of TC are computed from the weak invariance model fitted by fixing to .15*rho the variance of items associated with improper solutions.
(OMEGA <- rbind(data.frame(measure="Negative Valence",
omega_w=MCFArel(m3x3.weakInv.fix,level=1,items=1:3,item.labels=names(Mood.s2[,2:4])),
omega_b=MCFArel(m3x3.weakInv.fix,level=2,items=1:3,item.labels=names(Mood.s2[,2:4]))),
data.frame(measure="Tense Arousal",
omega_w=MCFArel(m3x3.weakInv.fix,level=1,items=4:6,item.labels=names(Mood.s2[,5:7])),
omega_b=MCFArel(m3x3.weakInv.fix,level=2,items=4:6,item.labels=names(Mood.s2[,5:7]))),
data.frame(measure="Fatigue",
omega_w=MCFArel(m3x3.weakInv.fix,level=1,items=7:9,item.labels=names(Mood.s2[,8:10])),
omega_b=MCFArel(m3x3.weakInv.fix,level=2,items=7:9,item.labels=names(Mood.s2[,8:10]))),
data.frame(measure="TOT",
omega_w=MCFArel(m3x3.weakInv.fix,level=1,items=1:9,item.labels=names(Mood.s2[,2:10])),
omega_b=MCFArel(m3x3.weakInv.fix,level=2,items=1:9,item.labels=names(Mood.s2[,2:10])))))
Comments:
- the reliability estimated on both levels is quite satisfactory
G-THEORY
Here, we compute reliability indices based on the generalizability theory (see Bolger & Laurenceau, 2013).
TD
Here, we use the variance components to estimate reliability coefficients.
(relInd <- GTHEORYrel(data=s2.w,items=which(colnames(s1.w)=="d1"):which(colnames(s1.w)=="d4"),
latent.lab="Task Demand",what="rel"))
Comments:
- reliability coefficients range between .79 and .99, indicating a good to excellent ability to discriminate Mood dimensions between individuals considering one fixed occasion and a relatively large set (18) occasions (i.e., when it is possible to aggregate item scores over 18 or more occasions, the expected ranking of individuals should be very stable), and to measure systematic changes in individuals over time
TC
Here, we use the variance components to estimate reliability coefficients.
(RELIND <- GTHEORYrel(data=s2.w,items=which(colnames(s1.w)=="c1"):which(colnames(s1.w)=="c3"),
latent.lab="Task Control",what="rel"))
## boundary (singular) fit: see help('isSingular')
Comments:
- reliability coefficients range between .74 and .98, indicating a good to excellent ability to discriminate Mood dimensions between individuals considering one fixed occasion and a relatively large set (18) occasions (i.e., when it is possible to aggregate item scores over 18 or more occasions, the expected ranking of individuals should be very stable), and to measure systematic changes in individuals over time
MOOD
For each MDMQ subscale, we estimate reliability coefficients.
(RELIND<- rbind(GTHEORYrel(data=s2.w,items=which(colnames(s1.w)=="v1"):which(colnames(s1.w)=="v3"),
latent.lab="Negative Valence",what="rel"),
GTHEORYrel(data=s2.w,items=which(colnames(s1.w)=="t1"):which(colnames(s1.w)=="t3"),
latent.lab="Tense Arousal",what="rel"),
GTHEORYrel(data=s2.w,items=which(colnames(s1.w)=="f1"):which(colnames(s1.w)=="f3"),
latent.lab="Fatigue",what="rel"),
GTHEORYrel(data=s2.w,items=which(colnames(s1.w)=="v1"):which(colnames(s1.w)=="f3"),
latent.lab="TOT",what="rel")))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00215911 (tol = 0.002, component 1)
Comments:
R1F
ranges between .61 and .85, indicating a good ability to discriminate Mood dimensions between individuals considering one fixed occasion (like Cronbach alpha)RkF
ranges between .97 and .98, indicating an excellent ability to discriminate Mood dimensions between individuals considering a relatively large set (18) occasions (i.e., when it is possible to aggregate item scores over 18 or more occasions, the expected ranking of individuals should be very stable)Rc
ranges between .53 and .68, indicating a modest-to-good ability to measure systematic changes in individuals over time
SUBSAMPLE s3
COMPOSITE RELIABILITY
Here, we compute indices of level-specific composite reliability for each ESM scale and MDMQ subscale, as recommended by Geldhof et al. (2014).
Selected models
Here, we re-specify the selected models specified for subsample
s3
(see section 4.3 -
MODEL SPECIFICATION
).
# TD
td.weakInv <- cfa('level: 1
TD_W =~ a*d1 + b*d2 + c*d3 + d*d4
level: 2
TD_B =~ a*d1 + b*d2 + c*d3 + d*d4',
data = TD.s3, cluster = 'ID', std.lv=TRUE)
# TC
tc.weakInv <- cfa('level: 1
TC_W =~ a*c1 + b*c2 + c*c3
level: 2
TC_B =~ a*c1 + b*c2 + c*c3',
data = TC.s3, cluster = 'ID',std.lv=TRUE)
# Mood
m3x3.weakInv.fix <- 'level: 1
NV_w =~ a*v1 + b*v2 + c*v3
TA_w =~ d*t1 + e*t2 + f*t3
FA_w =~ g*f1 + h*f2 + i*f3
level: 2
NV_b =~ a*v1 + b*v2 + c*v3
TA_b =~ d*t1 + e*t2 + f*t3
FA_b =~ g*f1 + h*f2 + i*f3
t3 ~~ rho2 * t2'
m3x3.weakInv.fix <- cfa(gsub("rho2",t3varlv2*.15,m3x3.weakInv.fix),data=Mood.s3,cluster='ID',std.lv=TRUE) # fixing rho2
TD
Level-specific composite reliability indices of TD are computed from the weak invariance model fitted on the whole dataset.
(omega <- data.frame(measure="Task Demand",
omega_w=MCFArel(fit=td.weakInv,level=1,items=1:4,item.labels=names(TD.s3[,2:5])),
omega_b=MCFArel(fit=td.weakInv,level=2,items=1:4,item.labels=names(TD.s3[,2:5]))))
Comments:
- the reliability estimated on both levels are quite satisfactory
TC
Level-specific composite reliability indices of TC are computed from the weak invariance model fitted on the whole dataset.
(OMEGA <- data.frame(measure="Task Control",
omega_w=MCFArel(fit=tc.weakInv,level=1,items=1:3,item.labels=names(TC.s3[,2:4])),
omega_b=MCFArel(fit=tc.weakInv,level=2,items=1:3,item.labels=names(TC.s3[,2:4]))))
Comments:
- the reliability estimated on both levels are quite satisfactory
Mood
Level-specific composite reliability indices of TC are computed from the weak invariance model fitted by fixing the variance to .15*rho for those items showing improper solutions.
(OMEGA <- rbind(data.frame(measure="Negative Valence",
omega_w=MCFArel(m3x3.weakInv.fix,level=1,items=1:3,item.labels=names(Mood.s3[,2:4])),
omega_b=MCFArel(m3x3.weakInv.fix,level=2,items=1:3,item.labels=names(Mood.s3[,2:4]))),
data.frame(measure="Tense Arousal",
omega_w=MCFArel(m3x3.weakInv.fix,level=1,items=4:6,item.labels=names(Mood.s3[,5:7])),
omega_b=MCFArel(m3x3.weakInv.fix,level=2,items=4:6,item.labels=names(Mood.s3[,5:7]))),
data.frame(measure="Fatigue",
omega_w=MCFArel(m3x3.weakInv.fix,level=1,items=7:9,item.labels=names(Mood.s3[,8:10])),
omega_b=MCFArel(m3x3.weakInv.fix,level=2,items=7:9,item.labels=names(Mood.s3[,8:10]))),
data.frame(measure="TOT",
omega_w=MCFArel(m3x3.weakInv.fix,level=1,items=1:9,item.labels=names(Mood.s3[,2:10])),
omega_b=MCFArel(m3x3.weakInv.fix,level=2,items=1:9,item.labels=names(Mood.s3[,2:10])))))
Comments:
- the reliability estimated on both levels is quite satisfactory
G-THEORY
Here, we compute reliability indices based on the generalizability theory (see Bolger & Laurenceau, 2013).
TD
Here, we use the variance components to estimate reliability coefficients.
(relInd <- GTHEORYrel(data=s3.w,items=which(colnames(s1.w)=="d1"):which(colnames(s1.w)=="d4"),
latent.lab="Task Demand",what="rel"))
Comments:
- reliability coefficients range between .79 and .99, indicating a good to excellent ability to discriminate Mood dimensions between individuals considering one fixed occasion and a relatively large set (18) occasions (i.e., when it is possible to aggregate item scores over 18 or more occasions, the expected ranking of individuals should be very stable), and to measure systematic changes in individuals over time
TC
Here, we use the variance components to estimate reliability coefficients.
(RELIND <- GTHEORYrel(data=s3.w,items=which(colnames(s1.w)=="c1"):which(colnames(s1.w)=="c3"),
latent.lab="Task Control",what="rel"))
Comments:
- reliability coefficients range between .74 and .98, indicating a good to excellent ability to discriminate Mood dimensions between individuals considering one fixed occasion and a relatively large set (18) occasions (i.e., when it is possible to aggregate item scores over 18 or more occasions, the expected ranking of individuals should be very stable), and to measure systematic changes in individuals over time
MOOD
For each MDMQ subscale, we use the variance components to estimate reliability coefficients.
(RELIND<- rbind(GTHEORYrel(data=s3.w,items=which(colnames(s1.w)=="v1"):which(colnames(s1.w)=="v3"),
latent.lab="Negative Valence",what="rel"),
GTHEORYrel(data=s3.w,items=which(colnames(s1.w)=="t1"):which(colnames(s1.w)=="t3"),
latent.lab="Tense Arousal",what="rel"),
GTHEORYrel(data=s3.w,items=which(colnames(s1.w)=="f1"):which(colnames(s1.w)=="f3"),
latent.lab="Fatigue",what="rel"),
GTHEORYrel(data=s3.w,items=which(colnames(s1.w)=="v1"):which(colnames(s1.w)=="f3"),
latent.lab="TOT",what="rel")))
Comments:
R1F
** ranges between .62 and .70, indicating a good ability to discriminate Mood dimensions between individuals considering one fixed occasion (like Cronbach alpha)RkF
ranges between .97 and .98, indicating an excellent ability to discriminate Mood dimensions between individuals considering a relatively large set (18) occasions (i.e., when it is possible to aggregate item scores over 18 or more occasions, the expected ranking of individuals should be very stable)Rc
ranges between .53 and .68, indicating a modest-to-good ability to measure systematic changes in individuals over time
4.5. Convergent validity
Here, we compute the correlations among the aggregate scores of our four ESM indicators at the between (mean values) and within level (mean-centered values), as well as the correlations between individual averages and retrospective measures collected with the preliminary questionnaire.
PIPELINE & FUNCTIONS
First, raw item scores are summarized into aggregate scores (averages) for each ESM scale.
Second, level-1 (i.e., mean-centered scores) and level-2 components (i.e., individual mean scores) are computed for each aggregate score.
Third, the following functions are used to compute descriptive statistics of, and correlations between, each aggregate scores of ESM and retrospective scales:
multidesc()
#' @title Printing descriptive statistics of multilevel variables
#' @param long = long datasets (one row per observation)
#' @param wide = wide dataset (one row per participant)
#' @param ESM = ESM and HRV data
#' @param betw = between-only variables
multidesc <- function(long,wide,
ESM=c("TD","TC","NV","TA","FA"),
betw=c("JD","JC","JAWS.HPHA","JAWS.HPLA","JAWS.LPHA","JAWS.LPLA","CBI")){
require(Rmisc); require(lme4)
# ESM data
out <- data.frame(Measure=ESM[1],
N=summarySE(long,ESM[1],na.rm=TRUE)[,2],
Mean=paste(round(summarySE(long,ESM[1],na.rm=TRUE)[,3],2)," (",
round(summarySE(long,ESM[1],na.rm=TRUE)[,4],2),")",sep=""))
for(i in 2:length(ESM)){
out <- rbind(out,
data.frame(Measure=ESM[i],
N=summarySE(long,ESM[i],na.rm=TRUE)[,2],
Mean=paste(round(summarySE(long,ESM[i],na.rm=TRUE)[,3],2)," (",
round(summarySE(long,ESM[i],na.rm=TRUE)[,4],2),")",sep="")))}
# RETRO data
for(i in 1:length(betw)){
out <- rbind(out,
data.frame(Measure=betw[i],
N=summarySE(wide,betw[i],na.rm=TRUE)[,2],
Mean=paste(round(summarySE(wide,betw[i],na.rm=TRUE)[,3],2)," (",
round(summarySE(wide,betw[i],na.rm=TRUE)[,4],2),")",sep="")))}
# ICC
out$ICC <- NA
for(i in 1:length(ESM)){
m <- lmer(formula=gsub("var",ESM[i],"var~(1|ID)"),data=long) # VAR_between / (VAR_between + VAR_within)
out[out$Measure==ESM[i],"ICC"] <- round(as.data.frame(VarCorr(m))[1,4]/
(as.data.frame(VarCorr(m))[1,4]+as.data.frame(VarCorr(m))[2,4]),2)}
return(out)}
Computes descriptive statistics (mean, standard deviation, intraclass correlation coefficients) from a multilevel dataset.
multicorr()
#' @title Compute inter and intraindividual correlations between multilevel variables
#' @param long = long datasets (one row per observation)
#' @param wide = wide dataset (one row per participant)
#' @param ESM = ESM and HRV data
#' @param betw = between-only variables
multicorr <- function(long,wide,
ESM=c("TD","TC","NV","TA","FA"),
betw=c("JD","JC","JAWS.HPHA","JAWS.HPLA","JAWS.LPHA","JAWS.LPLA","CBI"),
corr.retro=FALSE,mu=0,twotailed=FALSE){
require(Hmisc); require(psych); require(dplyr); require(Rmisc)
# between-subjects correlations (all variables)
out.b <- rcorr(as.matrix(wide[,c(paste(ESM,".cm",sep=""),betw)]), type = "pearson")
for(i in 1:nrow(out.b$r)){ for(j in 1:ncol(out.b$r)){ # recomputing pvalues based on function arguments
if(!is.na(out.b$r[i,j]) & out.b$r[i,j]<1){
out.b$P[i,j] <- r.test(n=out.b$n[i,j],r12=abs(out.b$r[i,j]),r34=mu,twotailed=twotailed)$p }}}
rb <- round(out.b$r,2) # corr coeff.
rb.p <- out.b$P # pvalues
rb[lower.tri(rb)] <- rb.p[lower.tri(rb.p)] <- NA
rownames(rb) <- gsub(".cm","",rownames(rb))
colnames(rb) <- gsub(".cm","",colnames(rb))
# within-participant correlations (deviations from individual mean)
out.w <- rcorr(as.matrix(long[,paste(ESM,".dm",sep="")]), type = "pearson")
for(i in 1:nrow(out.w$r)){ for(j in 1:ncol(out.w$r)){ # recomputing pvalues based on function arguments
if(!is.na(out.w$r[i,j]) & out.w$r[i,j]<1){
out.w$P[i,j] <- r.test(n=out.w$n[i,j],r12=abs(out.w$r[i,j]),r34=mu,twotailed=twotailed)$p }}}
rw <- round(out.w$r,2) # corr coeff.
rw.p <- out.w$P # pvalues
rw[upper.tri(rw)] <- rw.p[upper.tri(rw.p)] <- NA
rownames(rw) <- gsub(".dm","",rownames(rw))
colnames(rw) <- gsub(".dm","",colnames(rw))
# filling rb empty cells
rb[1:length(ESM),1:length(ESM)][lower.tri(rb[1:length(ESM),1:length(ESM)])] <- rw[lower.tri(rw)]
rb.p[1:length(ESM),1:length(ESM)][lower.tri(rb.p[1:length(ESM),1:length(ESM)])] <- rw.p[lower.tri(rw.p)]
out <- t(rb)
out.p <- t(rb.p)
# showing correlations between retrospective measures?
if(corr.retro == FALSE){ out <- out[,1:length(ESM)]; out.p <- out.p[,1:length(ESM)] }
return(out)}
Computes between- and within-individual Pearson correlations from a multilevel dataset.
AGGEGATE SCORES
Here, we compute aggregate scores for the TDS, TCS and each subscale of the MDMQ.
SUBSAMPLE s1
# aggregate scores (mean)
s1.w$NV <- apply(s1.w[,c("v1","v2","v3")],1,mean,na.rm=TRUE) # Negative Valence = NV (positive-negative)
s1.w$TA <- apply(s1.w[,c("t1","t2","t3")],1,mean,na.rm=TRUE) # Tense Arousal = TA (positive-negative)
s1.w$FA <- apply(s1.w[,c("f1","f2","f3")],1,mean,na.rm=TRUE) # Fatigue = FA (positive-negative)
s1.w$TD <- apply(s1.w[,c("d1","d2","d3","d4")],1,mean,na.rm=TRUE) # Task Demand = TD (positive-negative)
s1.w$TC <- apply(s1.w[,c("c1","c2","c3")],1,mean,na.rm=TRUE) # Task Control = TC (positive-negative)
SUBSAMPLES s2 & s3
# s2 - aggregate scores (mean)
s2.w$NV <- apply(s2.w[,c("v1","v2","v3")],1,mean,na.rm=TRUE) # Negative Valence = NV (positive-negative)
s2.w$TA <- apply(s2.w[,c("t1","t2","t3")],1,mean,na.rm=TRUE) # Tense Arousal = TA (positive-negative)
s2.w$FA <- apply(s2.w[,c("f1","f2","f3")],1,mean,na.rm=TRUE) # Fatigue = FA (positive-negative)
s2.w$TD <- apply(s2.w[,c("d1","d2","d3","d4")],1,mean,na.rm=TRUE) # Task Demand = TD (positive-negative)
s2.w$TC <- apply(s2.w[,c("c1","c2","c3")],1,mean,na.rm=TRUE) # Task Control = TC (positive-negative)
# s3 - aggregate scores (mean)
s3.w$NV <- apply(s3.w[,c("v1","v2","v3")],1,mean,na.rm=TRUE) # Negative Valence = NV (positive-negative)
s3.w$TA <- apply(s3.w[,c("t1","t2","t3")],1,mean,na.rm=TRUE) # Tense Arousal = TA (positive-negative)
s3.w$FA <- apply(s3.w[,c("f1","f2","f3")],1,mean,na.rm=TRUE) # Fatigue = FA (positive-negative)
s3.w$TD <- apply(s3.w[,c("d1","d2","d3","d4")],1,mean,na.rm=TRUE) # Task Demand = TD (positive-negative)
s3.w$TC <- apply(s3.w[,c("c1","c2","c3")],1,mean,na.rm=TRUE) # Task Control = TC (positive-negative)
lv1 & lv2 COMPONENTS
Then, aggregated scores are used for computing the level-1 (between, individual mean) and level-2 component (within, mean-centered) for each ESM variable.
SUBSAMPLE s1
library(dplyr)
s1.w <- s1.w %>%
group_by(ID) %>% # grouping by ID
# between-individual component (cluster mean)
mutate(TD.cm=mean(TD,na.rm=T),TC.cm=mean(TC,na.rm=T), # stressors
NV.cm=mean(NV),TA.cm=mean(TA,na.rm=T),FA.cm=mean(FA,na.rm=T), # strain
# within-individual component (deviations from cluster means)
TD.dm=TD-TD.cm,TC.dm=TC-TC.cm,
NV.dm=NV-NV.cm,TA.dm=TA-TA.cm,FA.dm=FA-FA.cm)
(s1.w <- as.data.frame(s1.w))[1:3,c("ID","day","NV","NV.cm","NV.dm","TC","TC.cm","TC.dm")] # showing some examples
detach("package:dplyr", unload=TRUE)
Between-individual values are attached to the wide dataset, with only one row per participant.
# within.day as progressive number of received surveys (so that everyone has day = 1, within.day = 1)
long <- plyr::ddply(s1.w,c("ID","day"),transform,within.day=seq_along(day))
# merging wide dataset with individual averages of ESM measures
s1.b <- plyr::join(s1.b,long[long$day==1 & long$within.day==1,
c("ID","NV.cm","TA.cm","FA.cm","TD.cm","TC.cm")],
by="ID",type="full")
SUBSAMPLES s2 & s3
library(dplyr)
# s2
s2.w <- s2.w %>% group_by(ID) %>% mutate(TD.cm=mean(TD,na.rm=T),TC.cm=mean(TC,na.rm=T),
NV.cm=mean(NV),TA.cm=mean(TA,na.rm=T),FA.cm=mean(FA,na.rm=T),
TD.dm=TD-TD.cm,TC.dm=TC-TC.cm,
NV.dm=NV-NV.cm,TA.dm=TA-TA.cm,FA.dm=FA-FA.cm)
s2.w <- as.data.frame(s2.w)
# s3
s3.w <- s3.w %>% group_by(ID) %>% mutate(TD.cm=mean(TD,na.rm=T),TC.cm=mean(TC,na.rm=T),
NV.cm=mean(NV),TA.cm=mean(TA,na.rm=T),FA.cm=mean(FA,na.rm=T),
TD.dm=TD-TD.cm,TC.dm=TC-TC.cm,
NV.dm=NV-NV.cm,TA.dm=TA-TA.cm,FA.dm=FA-FA.cm)
s3.w <- as.data.frame(s3.w)
detach("package:dplyr", unload=TRUE)
# s2
long <- plyr::ddply(s2.w,c("ID","day"),transform,within.day=seq_along(day))
s2.b <- plyr::join(s2.b,long[long$day==1 & long$within.day==1,
c("ID","NV.cm","TA.cm","FA.cm","TD.cm","TC.cm")],by="ID",type="full")
# s3
long <- plyr::ddply(s3.w,c("ID","day"),transform,within.day=seq_along(day))
s3.b <- plyr::join(s3.b,long[long$day==1 & long$within.day==1,
c("ID","NV.cm","TA.cm","FA.cm","TD.cm","TC.cm")],by="ID",type="full")
DESCRIPTIVES
Here, we take a look at the distributions of aggregated ESM scores, mean and mean-centered scores.
SUBSAMPLE s1
par(mfrow=c(2,5)); hist(s1.w$TD); hist(s1.w$TD.dm); hist(s1.w$TC); hist(s1.w$TC.dm)
hist(s1.w$NV); hist(s1.w$NV.dm); hist(s1.w$TA); hist(s1.w$TA.dm); hist(s1.w$FA); hist(s1.w$FA.dm)
Then, we use the multidesc function to compute descriptive statistics on the selected subsample (N = 139).
multidesc(long=s1.w,wide=s1.b)
SUBSAMPLE s2
# distributions
par(mfrow=c(2,5)); hist(s2.w$TD); hist(s2.w$TD.dm); hist(s2.w$TC); hist(s2.w$TC.dm)
hist(s2.w$NV); hist(s2.w$NV.dm); hist(s2.w$TA); hist(s2.w$TA.dm); hist(s2.w$FA); hist(s1.w$FA.dm)
# descriptives
multidesc(long=s2.w,wide=s2.b)
SUBSAMPLE s3
# distributions
par(mfrow=c(2,5)); hist(s3.w$TD); hist(s3.w$TD.dm); hist(s3.w$TC); hist(s3.w$TC.dm)
hist(s3.w$NV); hist(s3.w$NV.dm); hist(s3.w$TA); hist(s3.w$TA.dm); hist(s3.w$FA); hist(s3.w$FA.dm)
# descriptives
multidesc(long=s3.w,wide=s3.b)
CORRELATIONS
Here, we join descriptive statistics with between- and
within-participant correlations computed with the
multicorr()
function specified above. Correlations are
visualized both in a table format and using a heat plot.
SUBSAMPLE s1
TABLE
desc <- cbind(multidesc(long=s1.w,wide=s1.b),multicorr(long=s1.w,wide=s1.b))
rownames(desc) <- desc$Measure
desc$Measure <- NULL
desc[,4:ncol(desc)]
write.csv(desc,"correlations.csv") # saving result
PLOT
ggplot(melt(as.matrix(multicorr(long=s1.w,wide=s1.b))),aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
geom_text(aes(label=value)) +
scale_fill_gradient2(low="darkblue",high="#f03b20",mid="white",midpoint=0,limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation",guide="legend",breaks=round(seq(1,-1,length.out = 11),2),
minor_breaks=round(seq(1,-1,length.out = 11),2))+labs(x="",y="")+
theme(text=element_text(size=9),legend.position = "none",axis.text.x=element_text(angle=45,vjust=0.5,hjust=1))
SUBSAMPLE s2
TABLE
desc <- cbind(multidesc(long=s2.w,wide=s2.b),multicorr(long=s2.w,wide=s2.b))
rownames(desc) <- desc$Measure
desc$Measure <- NULL
desc[,4:ncol(desc)]
PLOT
ggplot(melt(as.matrix(multicorr(long=s2.w,wide=s2.b))),aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
geom_text(aes(label=value)) +
scale_fill_gradient2(low="darkblue",high="#f03b20",mid="white",midpoint=0,limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation",guide="legend",breaks=round(seq(1,-1,length.out = 11),2),
minor_breaks=round(seq(1,-1,length.out = 11),2))+labs(x="",y="")+
theme(text=element_text(size=9),legend.position = "none",axis.text.x=element_text(angle=45,vjust=0.5,hjust=1))
SUBSAMPLE s3
TABLE
desc <- cbind(multidesc(long=s3.w,wide=s3.b),multicorr(long=s3.w,wide=s3.b))
rownames(desc) <- desc$Measure
desc$Measure <- NULL
desc[,4:ncol(desc)]
PLOT
ggplot(melt(as.matrix(multicorr(long=s3.w,wide=s3.b))),aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
geom_text(aes(label=value)) +
scale_fill_gradient2(low="darkblue",high="#f03b20",mid="white",midpoint=0,limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation",guide="legend",breaks=round(seq(1,-1,length.out = 11),2),
minor_breaks=round(seq(1,-1,length.out = 11),2))+labs(x="",y="")+
theme(text=element_text(size=9),legend.position = "none",axis.text.x=element_text(angle=45,vjust=0.5,hjust=1))
4.6. Contextual factors
As a final step, we evaluate the ESM measures’ sensitivity to contextual factors. That is, we evaluate if and how much ESM measures variate between days and occasions, and dependently to the information collected with the work sampling measures.
4.6.1. Days and occasions
First, we evaluate how ESM measures variate between days of week and occasions.
SUMMARY
In summary, the day of the week was not associated to substantial differences in any considered variable, whereas the day of assessment predicted differences in Negative Valence, Tense Arousal, and Task Control, and the survey number predicted differences only in Negative Valence
Overall, results obtained from subsamples
s2
ands3
replicate those found in the main subsample, although the differences in Task Control between days of assessment were not substantial in subsamples2
PIPELINE & FUNCTIONS
In this section, we distinguish three main categorical variables
associated with time: the day of the week (Day.of.week
,
from Monday to Friday), the day of assessment
Day.of.assessment
(from 1st to 3rd), and the temporal order
of ESM questionnaires surveyNumber
(from 1st to 7th).
Then, we visualize temporal trajectories of ESM scores over time, and we use generalized linear mixed-effects regression (GLMER) models to evaluate the differences in each ESM measure across the levels of each categorical variable described below.
The following functions and packages are used to compare days and occasions:
library(lme4); library(MuMIn); library(Rmisc); library(ggplot2); library(gridExtra)
timeTraj()
timeTraj <- function(data,day="day.of.week",colored=TRUE,
lineSize=1.5,dotSize=4){ require(ggplot2); require(reshape2); require(Rmisc)
# creating TIME variable
data$TIME <- data$within.day
if(day=="day.of.week"){ days=c(3,5) }else if(day=="day"){ days=c(2,3) }
for(i in 1:nrow(data)){ if(data[i,day]==days[1]){ data[i,"TIME"] <- data[i,"within.day"] + 8
} else if(data[i,day]==days[2]){ data[i,"TIME"] <- data[i,"TIME"] + 16 }}
# mean scores in each time point
mean.scores <- cbind(summarySE(data, measurevar="NV", groupvars="TIME",na.rm=TRUE)[,c(1,3)],
data.frame(TA=summarySE(data, measurevar="TA", groupvars="TIME",na.rm=TRUE)[,3],
FA=summarySE(data, measurevar="FA", groupvars="TIME",na.rm=TRUE)[,3],
TD=summarySE(data, measurevar="TD", groupvars="TIME",na.rm=TRUE)[,3],
TC=summarySE(data, measurevar="TC", groupvars="TIME",na.rm=TRUE)[,3]))
mean.scores <- rbind(mean.scores[1:7,],data.frame(TIME=8,NV=NA,TA=NA,FA=NA,TD=NA,TC=NA),
mean.scores[8:14,],data.frame(TIME=16,NV=NA,TA=NA,FA=NA,TD=NA,TC=NA),
mean.scores[15:nrow(mean.scores),])
means <- melt(mean.scores,id.vars="TIME")
means$variable <- factor(means$variable,levels=c("TD","NV","TC","TA","FA")) # sorting variables
# plotting
if(day=="day.of.week"){ labelS <- c("Monday","Wednesday","Friday") } else { labelS <- c("Day 1","Day 2","Day 3")}
p <- ggplot(means,aes(x=TIME,y=value,color=variable,shape=variable)) +
# lines and points
geom_line(size=lineSize) + geom_point(size=dotSize,fill="black") +
# between-days borders
geom_line(aes(y=4),linetype=2) + geom_vline(xintercept=c(8,16),size=2)+
geom_label(aes(x=4,y=4.7,label=labelS[1]),colour="black",size=5,label.size=0,fill=rgb(1,1,1,alpha=0))+
geom_label(aes(x=12,y=4.7,label=labelS[2]),colour="black",size=5,label.size=0,fill=rgb(1,1,1,alpha=0))+
geom_label(aes(x=20,y=4.7,label=labelS[3]),colour="black",size=5,label.size=0,fill=rgb(1,1,1,alpha=0))+
# axes
xlab("Time (hh:mm)") +
scale_x_continuous(breaks=seq(1,23,2),
labels=rep(c("9:30","12:00","15:00","18:00"),3)) +
ylab("Mean scores")
# colors and shapes
labS <- c("NV"="Negative Valence","TA"="Tense Arousal","FA"="Fatigue","TD"="Task Demand","TC"="Task Control") # labels
if(colored==TRUE){
p <- p + scale_colour_manual(name="",values=c("NV"="red","TA"="#cc0066","FA"="salmon", # colors
"TD"="#5DBEFF","TC"="lightblue"),labels=labS) +
scale_shape_manual(name="",values=c("NV"=17,"TA"=17,"FA"=17,"TD"=19,"TC"=19),labels=labS) # shapes
} else {
p <- p + scale_colour_manual(name="",values=c("NV"="gray","TA"="darkgray","FA"="gray", # gray colors
"TD"="black","TC"="black"),labels=labS) +
scale_shape_manual(name="",values=c("NV"=19,"TA"=17,"FA"=15,"TD"=25,"TC"=23),labels=labS) }
# theme
p <- p + theme_bw()+
theme(text=element_text(size=12), # text
axis.text=element_text(size=10),axis.title=element_text(size=15),
axis.text.y.right=element_text(color="blue"),axis.title.y.right = element_text(color="blue"), # axes
legend.justification=c(0,1),legend.position="top",legend.key.width = unit(1.7,"cm"),legend.box="horizontal")
p}
Plots the time trajectories in the averaged scores.
plotLMER()
plotLMER <- function(dat,pred,resp,LMER=TRUE,LMER.results=TRUE,diagnostics=FALSE,forceLMER=FALSE,
Xlab=NA,Ylab=NA,text.size=16){ require(ggplot2)
# defining predictor and response variable
colnames(dat)[which(colnames(dat)==pred)] <- "pred"
if(class(dat$pred)!="factor"){ dat$pred <- as.factor(dat$pred) }
colnames(dat)[which(colnames(dat)==resp)] <- "resp"
# LMER
if(LMER==TRUE){ require(lme4); require(MuMIn)
fit <- lmer(resp~pred+(1|ID),data=dat[!is.na(dat$pred),])
null <- lmer(resp~(1|ID),data=dat[!is.na(dat$pred),])
cat("\n\n\n***\nFitting LMER model on",nrow(dat[!is.na(dat$resp),]),
"observations from",summary(fit)$ngrps,"subjects:\n",resp,"~",pred," + (1|ID)\n\n")
AICw <- Weights(AIC(null,fit))[2]
# LMER information
if(LMER.results==TRUE){ require(MuMIn)
if(AICw>0.5 | forceLMER==TRUE){ LRT <- anova(null,fit) # likelihood ratio and coeff only when AICw > 0.5
cat("\nAICw vs. null model =",round(AICw,3),
"\nLikelihood Ratio Test: Chisq =",round(LRT$Chisq[2],2),"(",LRT$Df[2],") p =",round(LRT$`Pr(>Chisq)`[2],3),"\n\n")
print(summary(fit)$coefficients)
ci <- as.data.frame(confint(fit,method="profile"))
cat("\n\nComputing profile confidence intervals ...\n")
print(ci)
ci <- cbind(data.frame(Estimate=as.data.frame(summary(fit)$coefficients)[,1]),ci[3:nrow(ci),])
ci[2:nrow(ci),] <- ci[2:nrow(ci),] + ci[1,1]
ci$pred <- levels(dat$pred)
colnames(ci) <- c("resp","lower","upper","pred")
} else { cat("\nAICw vs. null model =",round(AICw,3),"\n\n")
if(AICw>0.5){ print(summary(fit)$coefficients) }}}} else { AICw = 0 }
# plot
p <- ggplot(dat,aes(x=pred,y=resp)) +
geom_point(col="gray",position = position_jitter(width = .15)) + geom_violin(alpha=.4) +
theme_bw() + theme(text=element_text(size=text.size),axis.title=element_text(size=16),legend.position="none")
if((LMER.results==TRUE & AICw>0.5) | forceLMER == TRUE){
p <- p + geom_point(data=ci,size=2) + geom_errorbar(data=ci,aes(ymin=lower,ymax=upper),width=.4) }
if(!is.na(Xlab)){ p <- p + xlab(Xlab) } else { p <- p + xlab(pred) }
if(!is.na(Ylab)){ p <- p + ylab(Ylab) } else { p <- p + ylab(resp) }
# LMER diagnostics
if(diagnostics==TRUE){ require(fitdistrplus)
cat("\n\nPlotting model diagnostics:")
par(mfrow=c(2,2),mar=c(3,2,3,1))
qqnorm(resid(fit),main="qqnorm residuals")
qqline(resid(fit),col="red")
qqnorm(ranef(fit)$`ID`[,1],main="qqnorm random effects")
qqline(ranef(fit)$`ID`[,1],col="red")
dt <- dat[!is.na(dat$resp) & !is.na(dat$pred),]
boxplot(resid(fit)~dt$pred,main="Residuals across X levels")
boxplot(dt$resp~dt$pred,main="Response across X levels")}
p }
Plots a quantitative response variable over the levels of a categorical predictor and returns diagnostic information, parameter estimates and 95% CI (with profile method, i.e., computing likelihood profile and finding the appropriate cutoffs based on the likelihood ratio test) according to a specified LMER model.
SUBSAMPLE s1
DESCRIPTIVES
Here, we compute the number and percentages of participants that started the protocol in each of the possible weekdays (Monday, Wednesday, Friday), and we create temporal variables.
# Day of week as factor
s1.w$Day.of.week <- "Monday"
s1.w[s1.w$day.of.week==3,"Day.of.week"] <- "Wednesday"
s1.w[s1.w$day.of.week==5,"Day.of.week"] <- "Friday"
s1.w$Day.of.week <- factor(s1.w$Day.of.week,levels=c("Monday","Wednesday","Friday"))
# Day of assessment as factor
s1.w$Day.of.assessment <- "Day 1"
s1.w[s1.w$day==2,"Day.of.assessment"] <- "Day 2"
s1.w[s1.w$day==3,"Day.of.assessment"] <- "Day 3"
s1.w$Day.of.assessment <- as.factor(s1.w$Day.of.assessment)
# Survey Number
s1.w <- plyr::ddply(s1.w,c("ID","day"),transform,surveyNumber=seq_along(day.of.week))
# number of participants by starting day
wide <- s1.w[s1.w$day==1 & s1.w$surveyNumber==1,]
summary(as.factor(wide$day.of.week)) # number
## 1 3 5
## 51 47 41
round(100*summary(as.factor(wide$day.of.week))/nrow(wide),1) # percentage
## 1 3 5
## 36.7 33.8 29.5
# computing timeLag variable (time in hours)
library(dplyr)
s1.w <- s1.w %>%
group_by(ID,Day.of.week) %>%
mutate(timeLag = as.numeric(difftime(RunTimestamp,as.POSIXct(paste(substr(RunTimestamp,1,10),"00:00:00"),
format="%Y-%m-%d %H:%M:%S"),units="hours")),
# creating time lag (hours)
timeLag = timeLag - min(timeLag,na.rm=TRUE),
# creating lagged variables
NV.LAG = dplyr::lag(NV,n=1,default=NA),TA.LAG=dplyr::lag(TA,n=1,default=NA),FA.LAG = dplyr::lag(FA,n=1,default=NA),
TD.LAG = dplyr::lag(TD,n=1,default=NA),TC.LAG = dplyr::lag(TC,n=1,default=NA))
s1.w <- as.data.frame(s1.w)
detach("package:dplyr", unload=TRUE)
Comments:
the number of participants that started each weekday is quite balanced
now the dataset includes
Day.of.week
(as factor),Day.of.assessment
(as factor) andsurveyNumber
(as integer)
TIME TRAJ.
Here, we visually inspect the temporal trajectories of mean scores in each ESM measure, considering both day of week and day of participation.
# average scores by day of week
timeTraj(s1.w)
# # saving Figure 2 for the article (black and white)
# timeTraj(s1.w,colored=FALSE,lineSize=1.3,dotSize=3)
# ggsave(filename="RESULTS/Figure2_rev2.tiff",dpi=300)
# average scores by day of participation
timeTraj(s1.w,day="day")
TD
Here, we evaluate systematic differences in Task Demand across temporal variables.
First, we use the plotLMER()
function showed above to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostic, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s1.w,pred="Day.of.week",resp="TD",Xlab="Day of week",Ylab="Task Demand"),
plotLMER(dat=s1.w,pred="Day.of.assessment",resp="TD",Xlab="Day of assessment",Ylab="Task Demand"),
plotLMER(dat=s1.w[s1.w$surveyNumber<6,],
pred="surveyNumber",resp="TD",Xlab="Survey Number",Ylab="Task Demand"),
plotLMER(dat=s1.w[s1.w$day==1 & s1.w$surveyNumber<6,],pred="surveyNumber",resp="TD",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Task Demand"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1523 observations from 139 subjects:
## TD ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.22
##
##
##
##
## ***
## Fitting LMER model on 1523 observations from 139 subjects:
## TD ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.049
##
##
##
##
## ***
## Fitting LMER model on 1410 observations from 139 subjects:
## TD ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0.319
p <- plotLMER(dat=s1.w,pred="Day.of.week",resp="TD",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1523 observations from 139 subjects:
## TD ~ Day.of.week + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
TD.null <- lmer(TD~(1|ID),data=s1.w[!is.na(s1.w$TD.LAG),])
TD.autoreg <- lmer(TD~TD.LAG+(TD.LAG|ID),data=s1.w)
TD.time <- lmer(TD~TD.LAG+timeLag+(TD.LAG|ID),data=s1.w)
Weights(AIC(TD.null,TD.autoreg,TD.time)) # time AICw = .05
## model weights
## [1] 0.000 0.953 0.047
# Day of assessment controlling by Day of week
TD.null <- lmer(TD~(1|ID),data=s1.w)
TD.day.of.week <- lmer(TD~Day.of.week+(1|ID),data=s1.w)
TD.day.of.assessment <- lmer(TD~Day.of.week+Day.of.assessment+(1|ID),data=s1.w)
TD.surveyNumber <- lmer(TD~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s1.w)
Weights(AIC(TD.null,TD.day.of.week,TD.day.of.assessment,TD.surveyNumber)) # null AICw = .77
## model weights
## [1] 0.771 0.217 0.012 0.000
Comments:
no substantial differences are observed between weekdays, with the AICw being < .50
no substantial differences are observed between days of participation (AICw < .50), although a slightly lower TD is observed in Day 3 compared to Day 1
no substantial differences are observed between survey numbers, considering both the means across days (AICw is < .50) and only the first day, although a slightly higher TD is observed in Survey 5 compared to the previous surveys
TC
Here, we evaluate systematic differences in Task Control across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s1.w,pred="Day.of.week",resp="TC",Xlab="Day of week",Ylab="Task Control"),
plotLMER(dat=s1.w,pred="Day.of.assessment",resp="TC",Xlab="Day of assessment",Ylab="Task Control"),
plotLMER(dat=s1.w[s1.w$surveyNumber<6,],pred="surveyNumber",resp="TC",
Xlab="Survey Number",Ylab="Task Control"),
plotLMER(dat=s1.w[s1.w$day==1 & s1.w$surveyNumber<6,],pred="surveyNumber",resp="TC",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Task Control"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1491 observations from 139 subjects:
## TC ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.006
##
##
##
##
## ***
## Fitting LMER model on 1491 observations from 139 subjects:
## TC ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.838
## Likelihood Ratio Test: Chisq = 14.36 ( 2 ) p = 0.001
##
## Estimate Std. Error t value
## (Intercept) 4.3260744 0.09651693 44.821922
## predDay 2 -0.2587229 0.07254502 -3.566377
## predDay 3 -0.2082722 0.07289535 -2.857139
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8456496 1.10653924
## .sigma 1.0935791 1.17922907
## (Intercept) 4.1366367 4.51559229
## predDay 2 -0.4008795 -0.11651651
## predDay 3 -0.3510955 -0.06534996
##
##
##
## ***
## Fitting LMER model on 1381 observations from 139 subjects:
## TC ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0
p <- plotLMER(dat=s1.w,pred="Day.of.assessment",resp="TC",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1491 observations from 139 subjects:
## TC ~ Day.of.assessment + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
TC.null <- lmer(TC~(1|ID),data=s1.w[!is.na(s1.w$TC.LAG),])
TC.autoreg <- lmer(TC~TC.LAG+(TC.LAG|ID),data=s1.w)
TC.time <- lmer(TC~TC.LAG+timeLag+(TC.LAG|ID),data=s1.w)
Weights(AIC(TC.null,TC.autoreg,TC.time)) # time AICw = .03
## model weights
## [1] 0.00 0.97 0.03
# Day of assessment controlling by Day of week
TC.null <- lmer(TC~(1|ID),data=s1.w)
TC.day.of.week <- lmer(TC~Day.of.week+(1|ID),data=s1.w)
TC.day.of.assessment <- lmer(TC~Day.of.week+Day.of.assessment+(1|ID),data=s1.w)
Weights(AIC(TC.null,TC.day.of.week,TC.day.of.assessment)) # null AICw = .97
## model weights
## [1] 0.967 0.005 0.028
round(as.data.frame(summary(TC.day.of.assessment)$coefficients),3)
# SurveyNumber controlling by Day
TC.surveyNumber <- lmer(TC~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s1.w)
Weights(AIC(TC.null,TC.day.of.week,TC.day.of.assessment,TC.surveyNumber)) # null AICw = .97
## model weights
## [1] 0.967 0.005 0.028 0.000
Comments:
no substantial differences are observed between weekdays, with the AICw being < .50
substantial and significant differences are observed between days of participation (AICw > .50, significant LRT but only when compared with the null)
no substantial differences are observed between survey numbers, considering both the means across days (AICw is < .50) and only the first day
NV
Here, we evaluate systematic differences in Negative Valence across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s1.w,pred="Day.of.week",resp="NV",Xlab="Day of week",Ylab="Negative Valence"),
plotLMER(dat=s1.w,pred="Day.of.assessment",resp="NV",Xlab="Day of assessment",Ylab="Negative Valence"),
plotLMER(dat=s1.w[s1.w$surveyNumber<6,],
pred="surveyNumber",resp="NV",Xlab="Survey Number",Ylab="Negative Valence"),
plotLMER(dat=s1.w[s1.w$day==1 & s1.w$surveyNumber<6,],pred="surveyNumber",resp="NV",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Negative Valence"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1774 observations from 139 subjects:
## NV ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.04
##
##
##
##
## ***
## Fitting LMER model on 1774 observations from 139 subjects:
## NV ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.992
## Likelihood Ratio Test: Chisq = 22.23 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.2378580 0.06924354 46.760432
## predDay 2 0.2053656 0.04805403 4.273639
## predDay 3 0.1859169 0.04856667 3.828075
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.62601074 0.8131494
## .sigma 0.79716764 0.8537262
## (Intercept) 3.10187901 3.3738106
## predDay 2 0.11116515 0.2995301
## predDay 3 0.09066261 0.2810610
##
##
##
## ***
## Fitting LMER model on 1661 observations from 139 subjects:
## NV ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0.977
## Likelihood Ratio Test: Chisq = 30.92 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.1936803 0.07335937 43.534730
## pred2 0.2112063 0.05805656 3.637940
## pred3 0.2073288 0.05938720 3.491136
## pred4 0.2255131 0.06408823 3.518791
## pred5 0.3881189 0.07679190 5.054164
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.62953348 0.8188165
## .sigma 0.80008471 0.8589970
## (Intercept) 3.04976076 3.3376228
## pred2 0.09748594 0.3249090
## pred3 0.09099730 0.3236329
## pred4 0.09997046 0.3510204
## pred5 0.23769501 0.5385050
# plotting LMER diagnostics
p <- plotLMER(dat=s1.w,pred="Day.of.assessment",resp="NV",LMER.results=FALSE,
Xlab="Day of assessment",Ylab="Negative Valence",diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1774 observations from 139 subjects:
## NV ~ Day.of.assessment + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
NV.null <- lmer(NV~(1|ID),data=s1.w[!is.na(s1.w$NV.LAG),])
NV.autoreg <- lmer(NV~NV.LAG+(NV.LAG|ID),data=s1.w)
NV.time <- lmer(NV~NV.LAG+timeLag+(NV.LAG|ID),data=s1.w)
Weights(AIC(NV.null,NV.autoreg,NV.time)) # time AICw = .009
## model weights
## [1] 0.000 0.991 0.009
# Day of assessment controlling by Day of week
NV.null <- lmer(NV~(1|ID),data=s1.w)
NV.day.of.week <- lmer(NV~Day.of.week+(1|ID),data=s1.w)
NV.day.of.assessment <- lmer(NV~Day.of.week+Day.of.assessment+(1|ID),data=s1.w)
Weights(AIC(NV.null,NV.day.of.week,NV.day.of.assessment)) # day.of.assessment AICw = .90
## model weights
## [1] 0.099 0.004 0.897
round(as.data.frame(summary(NV.day.of.assessment)$coefficients),3)
# SurveyNumber controlling by Day
NV.surveyNumber <- lmer(NV~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s1.w)
Weights(AIC(NV.null,NV.day.of.week,NV.day.of.assessment,NV.surveyNumber)) # surveyNumber AICw = .66
## model weights
## [1] 0.033 0.001 0.301 0.665
round(as.data.frame(summary(NV.surveyNumber)$coefficients),3)
Comments:
no substantial differences are observed between weekdays (AICw < .50), although Friday shows lower NV in Day 1
substantial and significant differences are observed between days of participation (AICW > .50, significant LRT)
substantial and significant differences are observed between survey numbers (AICw > .50, significant LRT), considering the means across days. A slightly more marked trend is observed when considering only the first day
TA
Here, we evaluate systematic differences in Tense Arousal across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s1.w,pred="Day.of.week",resp="TA",Xlab="Day of week",Ylab="Tense Arousal"),
plotLMER(dat=s1.w,pred="Day.of.assessment",resp="TA",Xlab="Day of assessment",Ylab="Tense Arousal"),
plotLMER(dat=s1.w[s1.w$surveyNumber<6,],
pred="surveyNumber",resp="TA",Xlab="Survey Number",Ylab="Tense Arousal"),
plotLMER(dat=s1.w[s1.w$day==1 & s1.w$surveyNumber<6,],pred="surveyNumber",resp="TA",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Tense Arousal"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1774 observations from 139 subjects:
## TA ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.032
##
##
##
##
## ***
## Fitting LMER model on 1774 observations from 139 subjects:
## TA ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.998
## Likelihood Ratio Test: Chisq = 24.9 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.2602317 0.07595752 42.921774
## predDay 2 0.2191554 0.05161913 4.245623
## predDay 3 0.2277889 0.05217144 4.366162
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.6914925 0.8971437
## .sigma 0.8562557 0.9170111
## (Intercept) 3.1110955 3.4094114
## predDay 2 0.1179535 0.3202977
## predDay 3 0.1254958 0.3300080
##
##
##
## ***
## Fitting LMER model on 1661 observations from 139 subjects:
## TA ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0.005
# plotting model diagnostics
p <- plotLMER(dat=s1.w,pred="Day.of.assessment",resp="TA",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1774 observations from 139 subjects:
## TA ~ Day.of.assessment + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
TA.null <- lmer(TA~(1|ID),data=s1.w[!is.na(s1.w$TA.LAG),])
TA.autoreg <- lmer(TA~TA.LAG+(TA.LAG|ID),data=s1.w)
TA.time <- lmer(TA~TA.LAG+timeLag+(TA.LAG|ID),data=s1.w)
Weights(AIC(TA.null,TA.autoreg,TA.time)) # time AICw = .01
## model weights
## [1] 0.000 0.989 0.011
# Day of assessment controlling by Day of week
TA.null <- lmer(TA~(1|ID),data=s1.w)
TA.day.of.week <- lmer(TA~Day.of.week+(1|ID),data=s1.w)
TA.day.of.assessment <- lmer(TA~Day.of.week+Day.of.assessment+(1|ID),data=s1.w)
Weights(AIC(TA.null,TA.day.of.week,TA.day.of.assessment)) # day.of.assessment AICw = .97
## model weights
## [1] 0.032 0.001 0.967
round(as.data.frame(summary(TA.day.of.assessment)$coefficients),3)
# SurveyNumber controlling by Day
TA.surveyNumber <- lmer(TA~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s1.w)
Weights(AIC(TA.null,TA.day.of.week,TA.day.of.assessment,TA.surveyNumber)) # surveyNumber AICw = 0
## model weights
## [1] 0.032 0.001 0.967 0.000
Comments:
no substantial differences are observed between weekdays (AICw < .50), although Monday is associated with higher TA in day 1
substantial and significant differences are observed between days of participation (AICw > .50, significant LRT)
no substantial differences are observed between survey numbers, considering both the means across days (AICw is < .50) and only the first day
FA
Here, we evaluate systematic differences in Fatigue across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s1.w,pred="Day.of.week",resp="FA",Xlab="Day of week",Ylab="Fatigue"),
plotLMER(dat=s1.w,pred="Day.of.assessment",resp="FA",Xlab="Day of assessment",Ylab="Fatigue"),
plotLMER(dat=s1.w[s1.w$day==1 & s1.w$surveyNumber<6,],pred="surveyNumber",resp="FA",
Xlab="Survey Number",Ylab="Fatigue"),
plotLMER(dat=s1.w[s1.w$surveyNumber<6,],pred="surveyNumber",resp="FA",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Fatigue"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1774 observations from 139 subjects:
## FA ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.004
##
##
##
##
## ***
## Fitting LMER model on 1774 observations from 139 subjects:
## FA ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.016
##
##
##
##
## ***
## Fitting LMER model on 577 observations from 139 subjects:
## FA ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 52.89 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.41486811 0.09773992 34.9383149
## pred2 -0.01257101 0.10736945 -0.1170818
## pred3 0.32471012 0.10980890 2.9570474
## pred4 0.50890641 0.11574000 4.3969797
## pred5 0.81715593 0.13669064 5.9781411
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.6204532 0.8596555
## .sigma 0.8288937 0.9460230
## (Intercept) 3.2236010 3.6061352
## pred2 -0.2225532 0.1973499
## pred3 0.1098206 0.5393534
## pred4 0.2823563 0.7351421
## pred5 0.5495907 1.0843476
p <- plotLMER(dat=s1.w[s1.w$day==1 & s1.w$surveyNumber<6,],pred="surveyNumber",resp="FA",diagnostics=TRUE,LMER.results=FALSE)
##
##
##
## ***
## Fitting LMER model on 577 observations from 139 subjects:
## FA ~ surveyNumber + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
FA.null <- lmer(FA~(1|ID),data=s1.w[!is.na(s1.w$FA.LAG),])
FA.autoreg <- lmer(FA~FA.LAG+(FA.LAG|ID),data=s1.w)
FA.time <- lmer(FA~FA.LAG+timeLag+(FA.LAG|ID),data=s1.w)
Weights(AIC(FA.null,FA.autoreg,FA.time)) # time AICw = 1 (!)
## model weights
## [1] 0 0 1
round(summary(FA.time)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 1.849 0.117 15.760
## FA.LAG 0.448 0.028 15.924
## timeLag 0.104 0.010 10.889
confint(FA.time,method="profile")[5:7,] # computing 95% CI
## 2.5 % 97.5 %
## (Intercept) 1.60263279 2.1003203
## FA.LAG 0.38637644 0.5099584
## timeLag 0.08470838 0.1228386
# survey number controlling for time
FA.time <- lmer(FA~FA.LAG+timeLag+(1|ID),data=s1.w)
FA.surveyNumber <- lmer(FA~FA.LAG+timeLag+as.factor(surveyNumber)+(1|ID),data=s1.w)
Weights(AIC(FA.null,FA.time,FA.surveyNumber)) # time AICw = 1
## model weights
## [1] 0.000 0.981 0.019
Comments:
no substantial differences are observed between weekdays, with the AICw being < .50
no substantial differences are observed between days of participation, with the AICw being < .50
substantial and significant differences are observed between survey numbers, considering the means across days (AICw > .50, significant LRT), and a very similar trend is observed when considering only the first day of participation. However, for Fatigue this trend is likely to be associated with the circadian Fatigue pattern (i.e., increasing trend throughout the workday) more than to an initial elevation bias. Indeed, the effect of surveyNumber is not substantial when the linear trend of time is accounted for
EXAMPLE PLOTS
Here, we plot some examples of substantial differences.
p1 <- plotLMER(dat=s1.w,pred="Day.of.assessment",resp="TA",Xlab="Day of assessment",Ylab="Tense Arousal")
p2 <- plotLMER(dat=s1.w,pred="Day.of.assessment",resp="TC",Xlab="Day of assessment",Ylab="Task Control")
p3 <- plotLMER(dat=s1.w,pred="Day.of.assessment",resp="NV",Xlab="Day of assessment",Ylab="Negative Valence")
p4 <- plotLMER(dat=s1.w[s1.w$surveyNumber<6,],pred="surveyNumber",resp="NV",
Xlab="Survey Number",Ylab="Negative Valence")
p <- gridExtra::grid.arrange(p1,p2,p3,p4,nrow=2)
# ggsave("RESULTS/contextual_time.tiff",p,dpi=300) # saving figure for supplementary materials
SUBSAMPLE s2
DESCRIPTIVES
Here, we compute the number and percentages of participants that started the protocol in each of the possible weekdays (Monday, Wednesday, Friday), and we create temporal variables.
# Day of week as factor
s2.w$Day.of.week <- "Monday"
s2.w[s2.w$day.of.week==3,"Day.of.week"] <- "Wednesday"
s2.w[s2.w$day.of.week==5,"Day.of.week"] <- "Friday"
s2.w$Day.of.week <- factor(s2.w$Day.of.week,levels=c("Monday","Wednesday","Friday"))
# Day of assessment as factor
s2.w$Day.of.assessment <- "Day 1"
s2.w[s2.w$day==2,"Day.of.assessment"] <- "Day 2"
s2.w[s2.w$day==3,"Day.of.assessment"] <- "Day 3"
s2.w$Day.of.assessment <- as.factor(s2.w$Day.of.assessment)
# Survey Number
s2.w <- plyr::ddply(s2.w,c("ID","day"),transform,surveyNumber=seq_along(day.of.week))
# number of participants by starting day
wide.s2 <- s2.w[s2.w$day==1 & s2.w$surveyNumber==1,]
summary(as.factor(wide.s2$day.of.week)) # number
## 1 3 5
## 34 27 29
round(100*summary(as.factor(wide.s2$day.of.week))/nrow(wide.s2),1) # percentage
## 1 3 5
## 37.8 30.0 32.2
# computing timeLag variable (time in hours)
library(dplyr)
s2.w <- s2.w %>%
group_by(ID,Day.of.week) %>%
mutate(timeLag = as.numeric(difftime(RunTimestamp,as.POSIXct(paste(substr(RunTimestamp,1,10),"00:00:00"),
format="%Y-%m-%d %H:%M:%S"),units="hours")),
# creating time lag (hours)
timeLag = timeLag - min(timeLag,na.rm=TRUE),
# creating lagged variables
NV.LAG = dplyr::lag(NV,n=1,default=NA),TA.LAG=dplyr::lag(TA,n=1,default=NA),FA.LAG = dplyr::lag(FA,n=1,default=NA),
TD.LAG = dplyr::lag(TD,n=1,default=NA),TC.LAG = dplyr::lag(TC,n=1,default=NA))
s2.w <- as.data.frame(s2.w)
detach("package:dplyr", unload=TRUE)
Comments:
the number of participants who started each weekday is quite balanced
now the dataset includes Day.of.week (as factor), Day.of.assessment (as factor) and surveyNumber (as integer)
TIME TRAJ.
Here, we visually inspect the temporal trajectories of mean averages in any ESM measure, considering both day of week and day of participation.
timeTraj(s2.w) # by day of week
timeTraj(s2.w,day="day") # by day of participation
TD
Here, we evaluate systematic differences in Task Demand across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s2.w,pred="Day.of.week",resp="TD",Xlab="Day of week",Ylab="Task Demand"),
plotLMER(dat=s2.w,pred="Day.of.assessment",resp="TD",Xlab="Day of assessment",Ylab="Task Demand"),
plotLMER(dat=s2.w[s2.w$surveyNumber<6,],
pred="surveyNumber",resp="TD",Xlab="Survey Number",Ylab="Task Demand"),
plotLMER(dat=s2.w[s2.w$day==1 & s2.w$surveyNumber<6,],pred="surveyNumber",resp="TD",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Task Demand"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1109 observations from 90 subjects:
## TD ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.161
##
##
##
##
## ***
## Fitting LMER model on 1109 observations from 90 subjects:
## TD ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.035
##
##
##
##
## ***
## Fitting LMER model on 1017 observations from 90 subjects:
## TD ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0.004
p <- plotLMER(dat=s2.w,pred="Day.of.week",resp="TD",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1109 observations from 90 subjects:
## TD ~ Day.of.week + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
TD.null2 <- lmer(TD~(1|ID),data=s2.w[!is.na(s2.w$TD.LAG),])
TD.autoreg2 <- lmer(TD~TD.LAG+(TD.LAG|ID),data=s2.w)
TD.time2 <- lmer(TD~TD.LAG+timeLag+(TD.LAG|ID),data=s2.w)
Weights(AIC(TD.null2,TD.autoreg2,TD.time2)) # time AICw = .02
## model weights
## [1] 0.000 0.984 0.016
# Day of assessment controlling by Day of week
TD.null2 <- lmer(TD~(1|ID),data=s2.w)
TD.day.of.week2 <- lmer(TD~Day.of.week+(1|ID),data=s2.w)
TD.day.of.assessment2 <- lmer(TD~Day.of.week+Day.of.assessment+(1|ID),data=s2.w)
TD.surveyNumber2 <- lmer(TD~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s2.w)
Weights(AIC(TD.null2,TD.day.of.week2,TD.day.of.assessment2,TD.surveyNumber2)) # null AICw = .83
## model weights
## [1] 0.834 0.160 0.006 0.000
Comments:
- results are similar to those found for subsample
s1
, with TD being not substantially predicted by any temporal variable
TC
Here, we evaluate systematic differences in Task Control across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s2.w,pred="Day.of.week",resp="TC",Xlab="Day of week",Ylab="Task Control"),
plotLMER(dat=s2.w,pred="Day.of.assessment",resp="TC",Xlab="Day of assessment",Ylab="Task Control"),
plotLMER(dat=s2.w[s2.w$surveyNumber<6,],pred="surveyNumber",resp="TC",
Xlab="Survey Number",Ylab="Task Control"),
plotLMER(dat=s2.w[s2.w$day==1 & s2.w$surveyNumber<6,],pred="surveyNumber",resp="TC",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Task Control"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1084 observations from 90 subjects:
## TC ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.104
##
##
##
##
## ***
## Fitting LMER model on 1084 observations from 90 subjects:
## TC ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.412
##
##
##
##
## ***
## Fitting LMER model on 994 observations from 90 subjects:
## TC ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0
p <- plotLMER(dat=s2.w,pred="Day.of.assessment",resp="TC",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1084 observations from 90 subjects:
## TC ~ Day.of.assessment + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
TC.null2 <- lmer(TC~(1|ID),data=s2.w[!is.na(s2.w$TC.LAG),])
TC.autoreg2 <- lmer(TC~TC.LAG+(TC.LAG|ID),data=s2.w)
TC.time2 <- lmer(TC~TC.LAG+timeLag+(TC.LAG|ID),data=s2.w)
Weights(AIC(TC.null2,TC.autoreg2,TC.time2)) # time AICw = .02
## model weights
## [1] 0.000 0.979 0.021
# Day of assessment controlling by Day of week
TC.null2 <- lmer(TC~(1|ID),data=s2.w)
TC.day.of.week2 <- lmer(TC~Day.of.week+(1|ID),data=s2.w)
TC.day.of.assessment2 <- lmer(TC~Day.of.week+Day.of.assessment+(1|ID),data=s2.w)
Weights(AIC(TC.null2,TC.day.of.week2,TC.day.of.assessment2)) # null AICw = .84
## model weights
## [1] 0.848 0.098 0.054
round(as.data.frame(summary(TC.day.of.assessment2)$coefficients),3)
# SurveyNumber controlling by Day
TC.surveyNumber2 <- lmer(TC~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s2.w)
Weights(AIC(TC.null2,TC.day.of.week2,TC.day.of.assessment2,TC.surveyNumber2)) # null AICw = .84
## model weights
## [1] 0.848 0.098 0.054 0.000
Comments:
- in contrast to what found in subsample
s1
, TC is not substantially predicted by any temporal variable ins2
NV
Here, we evaluate systematic differences in Negative Valence across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s2.w,pred="Day.of.week",resp="NV",Xlab="Day of week",Ylab="Negative Valence"),
plotLMER(dat=s2.w,pred="Day.of.assessment",resp="NV",Xlab="Day of assessment",Ylab="Negative Valence"),
plotLMER(dat=s2.w[s2.w$surveyNumber<6,],
pred="surveyNumber",resp="NV",Xlab="Survey Number",Ylab="Negative Valence"),
plotLMER(dat=s2.w[s2.w$day==1 & s2.w$surveyNumber<6,],pred="surveyNumber",resp="NV",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Negative Valence"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1268 observations from 90 subjects:
## NV ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.008
##
##
##
##
## ***
## Fitting LMER model on 1268 observations from 90 subjects:
## NV ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.596
## Likelihood Ratio Test: Chisq = 12.8 ( 2 ) p = 0.002
##
## Estimate Std. Error t value
## (Intercept) 3.2114878 0.08957766 35.851437
## predDay 2 0.1845880 0.05743232 3.214008
## predDay 3 0.1692819 0.05737655 2.950367
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.64785250 0.8907631
## .sigma 0.79975759 0.8670267
## (Intercept) 3.03536781 3.3876142
## predDay 2 0.07201938 0.2971409
## predDay 3 0.05678815 0.2816983
##
##
##
## ***
## Fitting LMER model on 1176 observations from 90 subjects:
## NV ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0.503
## Likelihood Ratio Test: Chisq = 22.1 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.1493827 0.09584442 32.859322
## pred2 0.1839506 0.07228065 2.544950
## pred3 0.2037037 0.07228065 2.818233
## pred4 0.2505317 0.07588399 3.301509
## pred5 0.3898099 0.08944318 4.358184
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.65521901 0.9023403
## .sigma 0.80422631 0.8748037
## (Intercept) 2.96118908 3.3375763
## pred2 0.04241892 0.3254823
## pred3 0.06217200 0.3452354
## pred4 0.10192169 0.3990973
## pred5 0.21463642 0.5649105
# plotting LMER diagnostics
p <- plotLMER(dat=s2.w,pred="Day.of.assessment",resp="NV",LMER.results=FALSE,
Xlab="Day of assessment",Ylab="Negative Valence",diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1268 observations from 90 subjects:
## NV ~ Day.of.assessment + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
NV.null2 <- lmer(NV~(1|ID),data=s2.w[!is.na(s2.w$NV.LAG),])
NV.autoreg2 <- lmer(NV~NV.LAG+(NV.LAG|ID),data=s2.w)
NV.time2 <- lmer(NV~NV.LAG+timeLag+(NV.LAG|ID),data=s2.w)
Weights(AIC(NV.null2,NV.autoreg2,NV.time2)) # time AICw = .01
## model weights
## [1] 0.000 0.989 0.011
# Day of assessment controlling by Day of week
NV.day.of.week2 <- lmer(NV~Day.of.week+(1|ID),data=s2.w)
NV.day.of.assessment2 <- lmer(NV~Day.of.week+Day.of.assessment+(1|ID),data=s2.w)
Weights(AIC(NV.null2,NV.day.of.week2,NV.day.of.assessment2)) # null AICw = .97
## model weights
## [1] 1 0 0
round(as.data.frame(summary(NV.day.of.assessment2)$coefficients),3)
# SurveyNumber controlling by Day
NV.surveyNumber2 <- lmer(NV~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s2.w)
Weights(AIC(NV.null2,NV.day.of.week2,NV.day.of.assessment2,NV.surveyNumber2)) # null AICw = 97
## model weights
## [1] 1 0 0 0
round(as.data.frame(summary(NV.surveyNumber2)$coefficients),3)
Comments:
results are similar to those found for subsample
s1
, withNV
being mainly predicted by day of assessment and survey number.the coefficients estimated by these models are similar to those found for
s1
, although the AICw suggests that any of these models shows stronger evidence than the null
TA
Here, we evaluate systematic differences in Tense Arousal across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s2.w,pred="Day.of.week",resp="TA",Xlab="Day of week",Ylab="Tense Arousal"),
plotLMER(dat=s2.w,pred="Day.of.assessment",resp="TA",Xlab="Day of assessment",Ylab="Tense Arousal"),
plotLMER(dat=s2.w[s2.w$surveyNumber<6,],
pred="surveyNumber",resp="TA",Xlab="Survey Number",Ylab="Tense Arousal"),
plotLMER(dat=s2.w[s2.w$day==1 & s2.w$surveyNumber<6,],pred="surveyNumber",resp="TA",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Tense Arousal"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1268 observations from 90 subjects:
## TA ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.015
##
##
##
##
## ***
## Fitting LMER model on 1268 observations from 90 subjects:
## TA ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.965
## Likelihood Ratio Test: Chisq = 18.44 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.2143630 0.09728276 33.041446
## predDay 2 0.2114280 0.06082618 3.475937
## predDay 3 0.2378024 0.06076730 3.913328
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.70856978 0.9726130
## .sigma 0.84699368 0.9182344
## (Intercept) 3.02310002 3.4056781
## predDay 2 0.09220809 0.3306328
## predDay 3 0.11866372 0.3568643
##
##
##
## ***
## Fitting LMER model on 1176 observations from 90 subjects:
## TA ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0.006
# plotting model diagnostics
p <- plotLMER(dat=s2.w,pred="Day.of.assessment",resp="TA",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1268 observations from 90 subjects:
## TA ~ Day.of.assessment + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
TA.null2 <- lmer(TA~(1|ID),data=s2.w[!is.na(s2.w$TA.LAG),])
TA.autoreg2 <- lmer(TA~TA.LAG+(1|ID),data=s2.w)
TA.time2 <- lmer(TA~TA.LAG+timeLag+(1|ID),data=s2.w)
Weights(AIC(TA.null2,TA.autoreg2,TA.time2)) # time AICw = .01
## model weights
## [1] 0.000 0.985 0.015
# Day of assessment controlling by Day of week
TA.null2 <- lmer(TA~(1|ID),data=s2.w)
TA.day.of.week2 <- lmer(TA~Day.of.week+(1|ID),data=s2.w)
TA.day.of.assessment2 <- lmer(TA~Day.of.week+Day.of.assessment+(1|ID),data=s2.w)
Weights(AIC(TA.null2,TA.day.of.week2,TA.day.of.assessment2)) # null AICw = .55
## model weights
## [1] 0.550 0.008 0.442
round(as.data.frame(summary(TA.day.of.assessment2)$coefficients),3)
# SurveyNumber controlling by Day
TA.surveyNumber2 <- lmer(TA~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s2.w)
Weights(AIC(TA.null2,TA.day.of.week2,TA.day.of.assessment2,TA.surveyNumber2)) # null = 0
## model weights
## [1] 0.549 0.008 0.442 0.000
Comments:
results are similar to those found for subsample
s1
, with TA being only predicted by day of assessment.the coefficients estimated by these models are similar to those found for
s1
, although the AICw suggests that any of these models shows stronger evidence than the null
FA
Here, we evaluate systematic differences in Fatigue across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s2.w,pred="Day.of.week",resp="FA",Xlab="Day of week",Ylab="Fatigue"),
plotLMER(dat=s2.w,pred="Day.of.assessment",resp="FA",Xlab="Day of assessment",Ylab="Fatigue"),
plotLMER(dat=s2.w[s2.w$day==1 & s2.w$surveyNumber<6,],pred="surveyNumber",resp="FA",
Xlab="Survey Number",Ylab="Fatigue"),
plotLMER(dat=s2.w[s2.w$surveyNumber<6,],pred="surveyNumber",resp="FA",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Fatigue"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1268 observations from 90 subjects:
## FA ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.006
##
##
##
##
## ***
## Fitting LMER model on 1268 observations from 90 subjects:
## FA ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.008
##
##
##
##
## ***
## Fitting LMER model on 402 observations from 90 subjects:
## FA ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 46.14 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.38518519 0.1219285 27.7637017
## pred2 -0.05555556 0.1272990 -0.4364178
## pred3 0.25925926 0.1272990 2.0366165
## pred4 0.57063812 0.1316046 4.3360035
## pred5 0.79733596 0.1537347 5.1864396
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.64252885 0.9379065
## .sigma 0.78598018 0.9197541
## (Intercept) 3.14664822 3.6237222
## pred2 -0.30422967 0.1931186
## pred3 0.01058515 0.5079334
## pred4 0.31343845 0.8276340
## pred5 0.49696006 1.0975602
p <- plotLMER(dat=s2.w[s2.w$day==1 & s2.w$surveyNumber<6,],pred="surveyNumber",resp="FA",diagnostics=TRUE,LMER.results=FALSE)
##
##
##
## ***
## Fitting LMER model on 402 observations from 90 subjects:
## FA ~ surveyNumber + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
FA.null2 <- lmer(FA~(1|ID),data=s2.w[!is.na(s2.w$FA.LAG),])
FA.autoreg2 <- lmer(FA~FA.LAG+(1|ID),data=s2.w)
FA.time2 <- lmer(FA~FA.LAG+timeLag+(1|ID),data=s2.w)
Weights(AIC(FA.null2,FA.autoreg2,FA.time2)) # time AICw = 1 (!)
## model weights
## [1] 0 0 1
round(summary(FA.time)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 1.686 0.091 18.481
## FA.LAG 0.485 0.023 21.302
## timeLag 0.100 0.010 10.377
# survey number controlling for time
FA.surveyNumber2 <- lmer(FA~FA.LAG+timeLag+as.factor(surveyNumber)+(1|ID),data=s2.w)
Weights(AIC(FA.null2,FA.time2,FA.surveyNumber2)) # time AICw = .99
## model weights
## [1] 0.000 0.998 0.002
Comments:
- results are similar to those found for subsample
s1
, with FA being only predicted by survey number
EXAMPLE PLOTS
Here, we plot some examples of substantial differences.
p1 <- plotLMER(dat=s2.w,pred="Day.of.assessment",resp="TA",Xlab="Day of assessment",Ylab="Tense Arousal")
p2 <- plotLMER(dat=s2.w,pred="Day.of.assessment",resp="TC",Xlab="Day of assessment",Ylab="Task Control")
p3 <- plotLMER(dat=s2.w,pred="Day.of.assessment",resp="NV",Xlab="Day of assessment",Ylab="Negative Valence")
p4 <- plotLMER(dat=s2.w[s2.w$surveyNumber<6,],pred="surveyNumber",resp="NV",
Xlab="Survey Number",Ylab="Negative Valence")
gridExtra::grid.arrange(p1,p2,p3,p4,nrow=2)
SUBSAMPLE s3
DESCRIPTIVES
Here, we compute the number and percentages of participants that started the protocol in each of the possible weekdays (Monday, Wednesday, Friday), and we create temporal variables.
# Day of week as factor
s3.w$Day.of.week <- "Monday"
s3.w[s3.w$day.of.week==3,"Day.of.week"] <- "Wednesday"
s3.w[s3.w$day.of.week==5,"Day.of.week"] <- "Friday"
s3.w$Day.of.week <- factor(s3.w$Day.of.week,levels=c("Monday","Wednesday","Friday"))
# Day of assessment as factor
s3.w$Day.of.assessment <- "Day 1"
s3.w[s3.w$day==2,"Day.of.assessment"] <- "Day 2"
s3.w[s3.w$day==3,"Day.of.assessment"] <- "Day 3"
s3.w$Day.of.assessment <- as.factor(s3.w$Day.of.assessment)
# Survey Number
s3.w <- plyr::ddply(s3.w,c("ID","day"),transform,surveyNumber=seq_along(day.of.week))
# number of participants by starting day
wide.s3 <- s3.w[s3.w$day==1 & s3.w$surveyNumber==1,]
summary(as.factor(wide.s3$day.of.week)) # number
## 1 3 5
## 68 58 49
round(100*summary(as.factor(wide.s3$day.of.week))/nrow(wide.s3),1) # percentage
## 1 3 5
## 38.9 33.1 28.0
# computing timeLag variable (time in hours)
library(dplyr)
s3.w <- s3.w %>%
group_by(ID,Day.of.week) %>%
mutate(timeLag = as.numeric(difftime(RunTimestamp,as.POSIXct(paste(substr(RunTimestamp,1,10),"00:00:00"),
format="%Y-%m-%d %H:%M:%S"),units="hours")),
# creating time lag (hours)
timeLag = timeLag - min(timeLag,na.rm=TRUE),
# creating lagged variables
NV.LAG = dplyr::lag(NV,n=1,default=NA),TA.LAG=dplyr::lag(TA,n=1,default=NA),FA.LAG = dplyr::lag(FA,n=1,default=NA),
TD.LAG = dplyr::lag(TD,n=1,default=NA),TC.LAG = dplyr::lag(TC,n=1,default=NA))
s3.w <- as.data.frame(s3.w)
detach("package:dplyr", unload=TRUE)
Comments:
the number of participants who started each weekday is quite balanced
now the dataset includes
Day.of.week
(as factor),Day.of.assessment
(as factor) andsurveyNumber
(as integer)
TIME TRAJ.
Here, we visually inspect the temporal trajectories of mean averages in any ESM measure, considering both day of week and day of participation.
timeTraj(s3.w) # by day of week
timeTraj(s3.w,day="day") # by day of participation
TD
Here, we evaluate systematic differences in Task Demand across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s3.w,pred="Day.of.week",resp="TD",Xlab="Day of week",Ylab="Task Demand"),
plotLMER(dat=s3.w,pred="Day.of.assessment",resp="TD",Xlab="Day of assessment",Ylab="Task Demand"),
plotLMER(dat=s3.w[s3.w$surveyNumber<6,],
pred="surveyNumber",resp="TD",Xlab="Survey Number",Ylab="Task Demand"),
plotLMER(dat=s3.w[s3.w$day==1 & s3.w$surveyNumber<6,],pred="surveyNumber",resp="TD",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Task Demand"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1692 observations from 174 subjects:
## TD ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.239
##
##
##
##
## ***
## Fitting LMER model on 1692 observations from 174 subjects:
## TD ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.054
##
##
##
##
## ***
## Fitting LMER model on 1571 observations from 174 subjects:
## TD ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0.268
p <- plotLMER(dat=s3.w,pred="Day.of.week",resp="TD",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1692 observations from 174 subjects:
## TD ~ Day.of.week + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
TD.null2 <- lmer(TD~(1|ID),data=s3.w[!is.na(s3.w$TD.LAG),])
TD.autoreg2 <- lmer(TD~TD.LAG+(1|ID),data=s3.w)
TD.time2 <- lmer(TD~TD.LAG+timeLag+(1|ID),data=s3.w)
Weights(AIC(TD.null2,TD.autoreg2,TD.time2)) # time AICw = .03
## model weights
## [1] 0.00 0.97 0.03
# Day of assessment controlling by Day of week
TD.day.of.week2 <- lmer(TD~Day.of.week+(1|ID),data=s3.w)
TD.day.of.assessment2 <- lmer(TD~Day.of.week+Day.of.assessment+(1|ID),data=s3.w)
TD.surveyNumber2 <- lmer(TD~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s3.w)
Weights(AIC(TD.null2,TD.day.of.week2,TD.day.of.assessment2,TD.surveyNumber2)) # null AICw = .75
## model weights
## [1] 1 0 0 0
Comments:
- results are similar to those found for subsample
s1
, with TD being not substantially predicted by any temporal variable
TC
Here, we evaluate systematic differences in Task Control across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s3.w,pred="Day.of.week",resp="TC",Xlab="Day of week",Ylab="Task Control"),
plotLMER(dat=s3.w,pred="Day.of.assessment",resp="TC",Xlab="Day of assessment",Ylab="Task Control"),
plotLMER(dat=s3.w[s3.w$surveyNumber<6,],pred="surveyNumber",resp="TC",
Xlab="Survey Number",Ylab="Task Control"),
plotLMER(dat=s3.w[s3.w$day==1 & s3.w$surveyNumber<6,],pred="surveyNumber",resp="TC",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Task Control"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1655 observations from 174 subjects:
## TC ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.005
##
##
##
##
## ***
## Fitting LMER model on 1655 observations from 174 subjects:
## TC ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.945
## Likelihood Ratio Test: Chisq = 16.99 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.3248093 0.08642125 50.043356
## predDay 2 -0.2758398 0.06822968 -4.042813
## predDay 3 -0.1840978 0.06933803 -2.655078
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8329307 1.06742408
## .sigma 1.0872517 1.16829414
## (Intercept) 4.1551840 4.49435932
## predDay 2 -0.4095749 -0.14213309
## predDay 3 -0.3199543 -0.04816023
##
##
##
## ***
## Fitting LMER model on 1537 observations from 174 subjects:
## TC ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0
p <- plotLMER(dat=s3.w,pred="Day.of.assessment",resp="TC",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1655 observations from 174 subjects:
## TC ~ Day.of.assessment + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
TC.null2 <- lmer(TC~(1|ID),data=s3.w[!is.na(s3.w$TC.LAG),])
TC.autoreg2 <- lmer(TC~TC.LAG+(1|ID),data=s3.w)
TC.time2 <- lmer(TC~TC.LAG+timeLag+(1|ID),data=s3.w)
Weights(AIC(TC.null2,TC.autoreg2,TC.time2)) # time AICw = .03
## model weights
## [1] 0.000 0.974 0.026
# Day of assessment controlling by Day of week
TC.null2 <- lmer(TC~(1|ID),data=s2.w)
TC.day.of.week2 <- lmer(TC~Day.of.week+(1|ID),data=s2.w)
TC.day.of.assessment2 <- lmer(TC~Day.of.week+Day.of.assessment+(1|ID),data=s2.w)
Weights(AIC(TC.null2,TC.day.of.week2,TC.day.of.assessment2)) # null AICw = .84
## model weights
## [1] 0.848 0.098 0.054
round(as.data.frame(summary(TC.day.of.assessment2)$coefficients),3)
# SurveyNumber controlling by Day
TC.surveyNumber2 <- lmer(TC~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s2.w)
Weights(AIC(TC.null2,TC.day.of.week2,TC.day.of.assessment2,TC.surveyNumber2)) # null AICw = .84
## model weights
## [1] 0.848 0.098 0.054 0.000
Comments:
results are similar to those found for subsample
s1
, with TC being only predicted by day of assessment.the coefficients estimated by these models are similar to those found for
s1
, although the AICw suggests that any of these models shows stronger evidence than the null
NV
Here, we evaluate systematic differences in Negative Valence across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s3.w,pred="Day.of.week",resp="NV",Xlab="Day of week",Ylab="Negative Valence"),
plotLMER(dat=s3.w,pred="Day.of.assessment",resp="NV",Xlab="Day of assessment",Ylab="Negative Valence"),
plotLMER(dat=s3.w[s3.w$surveyNumber<6,],pred="surveyNumber",resp="NV",Xlab="Survey Number",Ylab="Negative Valence"),
plotLMER(dat=s3.w[s3.w$day==1 & s3.w$surveyNumber<6,],pred="surveyNumber",resp="NV",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Negative Valence"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1979 observations from 175 subjects:
## NV ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.063
##
##
##
##
## ***
## Fitting LMER model on 1979 observations from 175 subjects:
## NV ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.883
## Likelihood Ratio Test: Chisq = 16.95 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.2381318 0.06448257 50.217165
## predDay 2 0.1664550 0.04527493 3.676537
## predDay 3 0.1563381 0.04622644 3.382006
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.65713865 0.8328048
## .sigma 0.79543031 0.8490100
## (Intercept) 3.11160304 3.3647486
## predDay 2 0.07772190 0.2551887
## predDay 3 0.06568375 0.2469033
##
##
##
## ***
## Fitting LMER model on 1858 observations from 175 subjects:
## NV ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0.809
## Likelihood Ratio Test: Chisq = 26.71 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.2039586 0.06786364 47.211712
## pred2 0.1619829 0.05392581 3.003810
## pred3 0.1696806 0.05599222 3.030432
## pred4 0.2072142 0.06123796 3.383755
## pred5 0.3486661 0.07351260 4.742943
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.66253769 0.8405078
## .sigma 0.79880925 0.8545845
## (Intercept) 3.07085425 3.3371104
## pred2 0.05633043 0.2675908
## pred3 0.05996838 0.2793291
## pred4 0.08720958 0.3271301
## pred5 0.20460889 0.4926182
# plotting LMER diagnostics
p <- plotLMER(dat=s3.w,pred="Day.of.assessment",resp="NV",LMER.results=FALSE,
Xlab="Day of assessment",Ylab="Negative Valence",diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1979 observations from 175 subjects:
## NV ~ Day.of.assessment + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
NV.null2 <- lmer(NV~(1|ID),data=s3.w)
NV.autoreg2 <- lmer(NV~NV.LAG+(NV.LAG|ID),data=s3.w)
NV.time2 <- lmer(NV~NV.LAG+timeLag+(NV.LAG|ID),data=s3.w)
Weights(AIC(NV.null2,NV.autoreg2,NV.time2)) # time AICw = .001
## model weights
## [1] 0.000 0.991 0.009
# Day of assessment controlling by Day of week
NV.null2 <- lmer(NV~(1|ID),data=s3.w)
NV.day.of.week2 <- lmer(NV~Day.of.week+(1|ID),data=s3.w)
NV.day.of.assessment2 <- lmer(NV~Day.of.week+Day.of.assessment+(1|ID),data=s3.w)
Weights(AIC(NV.null2,NV.day.of.week2,NV.day.of.assessment2)) # day.of.assessment AICw = .50
## model weights
## [1] 0.466 0.031 0.502
round(as.data.frame(summary(NV.day.of.assessment2)$coefficients),3)
# SurveyNumber controlling by Day
NV.surveyNumber2 <- lmer(NV~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s3.w)
Weights(AIC(NV.null2,NV.day.of.week2,NV.day.of.assessment2,NV.surveyNumber2)) # day.of.assessment AICw = .46
## model weights
## [1] 0.430 0.029 0.463 0.078
round(as.data.frame(summary(NV.surveyNumber2)$coefficients),3)
Comments:
results are similar to those found for subsample s1, with NV being mainly predicted by day of assessment and survey number.
the coefficients estimated by these models are similar to those found for s1, although the AICw suggests that the inclusion of survey number does not substantially add evidence to the previous model
TA
Here, we evaluate systematic differences in Tense Arousal across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s3.w,pred="Day.of.week",resp="TA",Xlab="Day of week",Ylab="Tense Arousal"),
plotLMER(dat=s3.w,pred="Day.of.assessment",resp="TA",Xlab="Day of assessment",Ylab="Tense Arousal"),
plotLMER(dat=s3.w[s3.w$surveyNumber<6,],
pred="surveyNumber",resp="TA",Xlab="Survey Number",Ylab="Tense Arousal"),
plotLMER(dat=s3.w[s3.w$day==1 & s3.w$surveyNumber<6,],pred="surveyNumber",resp="TA",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Tense Arousal"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1979 observations from 175 subjects:
## TA ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.046
##
##
##
##
## ***
## Fitting LMER model on 1979 observations from 175 subjects:
## TA ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.934
## Likelihood Ratio Test: Chisq = 17.92 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.2859002 0.06969642 47.145896
## predDay 2 0.1718921 0.04872380 3.527888
## predDay 3 0.1860827 0.04974837 3.740478
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.71143319 0.9012078
## .sigma 0.85599789 0.9136544
## (Intercept) 3.14917762 3.4228043
## predDay 2 0.07638854 0.2673752
## predDay 3 0.08853099 0.2835509
##
##
##
## ***
## Fitting LMER model on 1858 observations from 175 subjects:
## TA ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 0.001
# plotting model diagnostics
p <- plotLMER(dat=s3.w,pred="Day.of.assessment",resp="TA",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## ***
## Fitting LMER model on 1979 observations from 175 subjects:
## TA ~ Day.of.assessment + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
TA.null2 <- lmer(TA~(1|ID),data=s3.w)
TA.autoreg2 <- lmer(TA~TA.LAG+(TA.LAG|ID),data=s3.w)
TA.time2 <- lmer(TA~TA.LAG+timeLag+(TA.LAG|ID),data=s3.w)
Weights(AIC(TA.null2,TA.autoreg2,TA.time2)) # time AICw = .01
## model weights
## [1] 0.00 0.99 0.01
# Day of assessment controlling by Day of week
TA.null2 <- lmer(TA~(1|ID),data=s3.w)
TA.day.of.week2 <- lmer(TA~Day.of.week+(1|ID),data=s3.w)
TA.day.of.assessment2 <- lmer(TA~Day.of.week+Day.of.assessment+(1|ID),data=s3.w)
Weights(AIC(TA.null2,TA.day.of.week2,TA.day.of.assessment2)) # day.of.assessment AICw = .57
## model weights
## [1] 0.41 0.02 0.57
round(as.data.frame(summary(TA.day.of.assessment2)$coefficients),3)
# SurveyNumber controlling by Day
TA.surveyNumber2 <- lmer(TA~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s3.w)
Weights(AIC(TA.null2,TA.day.of.week2,TA.day.of.assessment2,TA.surveyNumber2)) # day.of.assessment AICw = .57
## model weights
## [1] 0.41 0.02 0.57 0.00
Comments:
- results are similar to those found for subsample
s1
, with TA being only predicted by day of assessment
FA
Here, we evaluate systematic differences in Fatigue across temporal variables.
First, we use the plotLMER()
function to visualize
bivariate item scores distributions while highlighting
cases associated with AICw > .50. The function also allows to
visualize the model diagnostic, in terms of normal fit
on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s3.w,pred="Day.of.week",resp="FA",Xlab="Day of week",Ylab="Fatigue"),
plotLMER(dat=s3.w,pred="Day.of.assessment",resp="FA",Xlab="Day of assessment",Ylab="Fatigue"),
plotLMER(dat=s3.w[s3.w$day==1 & s3.w$surveyNumber<6,],pred="surveyNumber",resp="FA",
Xlab="Survey Number",Ylab="Fatigue"),
plotLMER(dat=s3.w[s3.w$surveyNumber<6,],pred="surveyNumber",resp="FA",LMER=FALSE,
Xlab="Survey Number (Day 1 only)",Ylab="Fatigue"),nrow=2)
##
##
##
## ***
## Fitting LMER model on 1979 observations from 175 subjects:
## FA ~ Day.of.week + (1|ID)
##
##
## AICw vs. null model = 0.003
##
##
##
##
## ***
## Fitting LMER model on 1979 observations from 175 subjects:
## FA ~ Day.of.assessment + (1|ID)
##
##
## AICw vs. null model = 0.012
##
##
##
##
## ***
## Fitting LMER model on 667 observations from 175 subjects:
## FA ~ surveyNumber + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 48.77 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.440000000 0.09058948 37.97350417
## pred2 -0.006896111 0.09896027 -0.06968566
## pred3 0.322224006 0.10303316 3.12738144
## pred4 0.458485336 0.11028072 4.15743870
## pred5 0.763093691 0.13218657 5.77285353
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.6833184 0.9142253
## .sigma 0.8415226 0.9531745
## (Intercept) 3.2626572 3.6173428
## pred2 -0.2006014 0.1866083
## pred3 0.1202720 0.5237549
## pred4 0.2422180 0.6742465
## pred5 0.5040304 1.0216587
p <- plotLMER(dat=s3.w[s3.w$day==1 & s3.w$surveyNumber<6,],pred="surveyNumber",resp="FA",diagnostics=TRUE,LMER.results=FALSE)
##
##
##
## ***
## Fitting LMER model on 667 observations from 175 subjects:
## FA ~ surveyNumber + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional temporal factor while controlling by those factors associated with substantial differences in the previous step. Only when the inclusion of an additional factor is associated with an AICw > .50, that factor is considered in the following models.
# linear trend over time
FA.null2 <- lmer(FA~(1|ID),data=s3.w[!is.na(s3.w$FA.LAG),])
FA.autoreg2 <- lmer(FA~FA.LAG+(FA.LAG|ID),data=s3.w)
FA.time2 <- lmer(FA~FA.LAG+timeLag+(FA.LAG|ID),data=s3.w)
Weights(AIC(FA.null2,FA.autoreg2,FA.time2)) # time AICw = 1 (!)
## model weights
## [1] 0 0 1
round(summary(FA.time)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 1.686 0.091 18.481
## FA.LAG 0.485 0.023 21.302
## timeLag 0.100 0.010 10.377
# survey number controlling for time
FA.time2 <- lmer(FA~FA.LAG+timeLag+(1|ID),data=s3.w)
FA.surveyNumber2 <- lmer(FA~FA.LAG+timeLag+as.factor(surveyNumber)+(1|ID),data=s3.w)
Weights(AIC(FA.null2,FA.time2,FA.surveyNumber2)) # time AICw = .99
## model weights
## [1] 0.000 0.999 0.001
Comments:
- results are similar to those found for subsample
s1
, with FA being only predicted by survey number
EXAMPLE PLOTS
Here, we plot some examples of substantial differences.
p1 <- plotLMER(dat=s3.w,pred="Day.of.assessment",resp="TA",Xlab="Day of assessment",Ylab="Tense Arousal")
p2 <- plotLMER(dat=s3.w,pred="Day.of.assessment",resp="TC",Xlab="Day of assessment",Ylab="Task Control")
p3 <- plotLMER(dat=s3.w,pred="Day.of.assessment",resp="NV",Xlab="Day of assessment",Ylab="Negative Valence")
p4 <- plotLMER(dat=s3.w[s3.w$surveyNumber<6,],pred="surveyNumber",resp="NV",
Xlab="Survey Number",Ylab="Negative Valence")
gridExtra::grid.arrange(p1,p2,p3,p4,nrow=2)
4.6.2. Work Sampling variables
Finally, we apply the same procedures shown in section 4.6.1 by focusing on contextual factors collected with the work sampling items.
SUMMARY
In summary, the type of work activity was associated to substantial differences only in Negative Valence, Task Demand and Task Control, whereas the mean of work was substantially predictive of Task Demand and Task Control, and the involvement of other people was only associated with Task Control
Results obtained from subsamples
s2
ands3
mostly replicate those found in the main subsample
PIPELINE & FUNCTIONS
In this section, we distinguish three main categorical variables
associated with work task characteristics: the type of work task
WHAT.first
, the mean of work HOW.first
, and
the people involved in the task WHOM.first
. As highlighted
by the labels of the variables, only the first selected response
category was considered in those cases where more categories were
chosen.
Then, we we use generalized linear mixed-effects regression (GLMER) models to evaluate the differences in each ESM measure across the levels of each categorical variable described below.
The following functions and packages are used to compare ESM scores across work sampling categories:
library(lme4); library(MuMIn); library(ggplot2); library(gridExtra)
workSamplingRecode()
workSamplingRecode <- function(data){ require(ggplot2); require(gridExtra); require(reshape2); require(Rmisc)
# WHAT.first
data$WHAT.first <- NA
# delete "BREAK" when reported with other activities (bcs responses are referred to other activities)
data$WHAT <- gsub("BREAK,","",data$WHAT)
# delete "OTHER" when reported with other activities (hopefully, it is the same of BREAK)
data$WHAT <- gsub("OTHER,","",data$WHAT)
n <- 0 # for counting no. of entries with more than 1 option
for(i in 1:nrow(data)){ what <- strsplit(as.character(data[i,"WHAT"]),",")
if(length(what[[1]])>1){ n <- n + 1 }
data[i,"WHAT.first"] <- what[[1]][1] }
data$WHAT.first <- factor(data$WHAT.first,levels=c("ACQUISITION","ANALYSIS","AUTHORING","NETWORKING",
"DISSEMINATION","ADMINISTRATIVE","BREAK","OTHER"))
cat("\n\nWHAT:")
cat(paste("\n",levels(data$WHAT.first),"=",as.character(summary(data[!is.na(data$WHAT.first),"WHAT.first"])),", ",
as.character(round(100*summary(data[!is.na(data$WHAT.first),"WHAT.first"])/
nrow(data[!is.na(data$WHAT.first),]),2)),"%",sep=" "))
cat("\nNumber of entries with more than 1 option =",n,",",round(100*n/nrow(data[!is.na(data$WHAT.first),]),2),"%")
# HOW.first
data$HOW.first <- NA
data$HOW <- gsub("OTHER,","",data$HOW) # removing OTHER when reported with other categories
n <- 0 # for counting entries with more than one options
for(i in 1:nrow(data)){
what <- strsplit(as.character(data[i,"HOW"]),",")
if(length(what[[1]])>1){ n <- n + 1 }
data[i,"HOW.first"] <- what[[1]][1] }
data$HOW.first <- factor(data$HOW.first,
levels=c("PC","FACE2FACE","PHONE","SKYPE","SMARTPHONE","PAPER","OTHER"))
cat("\n\nHOW:")
cat(paste("\n",levels(data$HOW.first),"=",as.character(summary(data[!is.na(data$HOW.first),"HOW.first"])),", ",
as.character(round(100*summary(data[!is.na(data$HOW.first),"HOW.first"])/
nrow(data[!is.na(data$HOW.first),]),2)),"%",sep=" "))
cat("\nNumber of entries with more than 1 option =",n,",",round(100*n/nrow(data[!is.na(data$WHAT.first),]),2),"%")
# WHOM.first
data$WHOM.first <- NA
data$WHOM <- gsub("OTHER,","",data$WHOM) # removing OTHER when indicated with other responses
n <- 0 # for counting entries with more than one options
for(i in 1:nrow(data)){
what <- strsplit(as.character(data[i,"WHOM"]),",")
if(length(what[[1]])>1){ n <- n + 1 }
data[i,"WHOM.first"] <- what[[1]][1] }
data$WHOM.first <- factor(data$WHOM.first,levels=c("ALONE","COLL","OVER","UNDER","CUSTOMER","EXTERNAL","FAMILY","OTHER"))
cat("\n\nWHOM:")
cat(paste("\n",levels(data$WHOM.first),"=",as.character(summary(data[!is.na(data$WHOM.first),"WHOM.first"])),", ",
as.character(round(100*summary(data[!is.na(data$WHOM.first),"WHOM.first"])/
nrow(data[!is.na(data$WHOM.first),]),2)),"%",sep=" "))
cat("\nNumber of entries with more than 1 option =",n,",",round(100*n/nrow(data[!is.na(data$WHAT.first),]),2),"%")
# plotting
p1 <- ggplot(data[!is.na(data$WHAT.first),],
aes(day.of.week,fill=WHAT.first))+
geom_bar(stat="count",position=position_dodge())+
ggtitle("Number of activities by WHAT and day of week")+
scale_fill_manual(name="WHAT",
labels=levels(data$WHAT.first),
values=c("salmon","red","#D5655E","#53B400","#00C094","#00B6EB","#A58AFF","gray"))+
theme(legend.key.size=unit(.5,"cm"),legend.title = element_blank())
p2 <- ggplot(data[!is.na(data$WHAT.first),],
aes(day.of.week,fill=HOW.first))+
geom_bar(stat="count",position=position_dodge())+
ggtitle("Number of activities by Means of work and day of week")+
scale_fill_manual(name="HOW",
labels=levels(data$HOW.first),
values=c("#00B6EB","#A58AFF","salmon","red","#D5655E","#53B400","gray"))+
theme(legend.key.size=unit(.5,"cm"),legend.title = element_blank())
p3 <- ggplot(data[!is.na(data$WHAT.first),],
aes(day.of.week,fill=WHOM.first))+
geom_bar(stat="count",position=position_dodge())+
ggtitle("Number of activities by WHOM and day of week")+
scale_fill_manual(name="WHOM",
labels=levels(data$WHOM.first),
values=c("#00B6EB","salmon","red","#D5655E","#53B400","#00C094","#00B6EB","gray"))+
theme(legend.key.size=unit(.5,"cm"),legend.title = element_blank())
grid.arrange(p1,p2,p3,nrow=3)
return(data) }
Recodes work sampling variables and printing frequency and percentage for each category.
plotLMER()
*
plotLMER <- function(dat,pred,resp,LMER=TRUE,LMER.results=TRUE,diagnostics=FALSE,forceLMER=FALSE,
Xlab=NA,Ylab=NA,text.size=16){ require(ggplot2)
# defining predictor and response variable
colnames(dat)[which(colnames(dat)==pred)] <- "pred"
if(class(dat$pred)!="factor"){ dat$pred <- as.factor(dat$pred) }
colnames(dat)[which(colnames(dat)==resp)] <- "resp"
# LMER
if(LMER==TRUE){ require(lme4); require(MuMIn)
fit <- lmer(resp~pred+(1|ID),data=dat[!is.na(dat$pred),])
null <- lmer(resp~(1|ID),data=dat[!is.na(dat$pred),])
cat("\n\n\nFitting LMER model on",nrow(dat[!is.na(dat$resp),]),"observations from",summary(fit)$ngrps,"subjects:\n",
resp,"~",pred," + (1|ID)\n\n")
AICw <- Weights(AIC(null,fit))[2]
# LMER information
if(LMER.results==TRUE){ require(MuMIn)
if(AICw>0.5 | forceLMER==TRUE){ LRT <- anova(null,fit) # likelihood ratio and coeff only when AICw > 0.5
cat("\nAICw vs. null model =",round(AICw,3),
"\nLikelihood Ratio Test: Chisq =",round(LRT$Chisq[2],2),"(",LRT$Df[2],
") p =",round(LRT$`Pr(>Chisq)`[2],3),"\n\n")
print(summary(fit)$coefficients)
ci <- as.data.frame(confint(fit,method="profile"))
cat("\n\nComputing profile confidence intervals ...\n")
print(ci)
ci <- cbind(data.frame(Estimate=as.data.frame(summary(fit)$coefficients)[,1]),ci[3:nrow(ci),])
ci[2:nrow(ci),] <- ci[2:nrow(ci),] + ci[1,1]
ci$pred <- levels(dat$pred)
colnames(ci) <- c("resp","lower","upper","pred")
} else { cat("\nAICw vs. null model =",round(AICw,3),"\n\n")
}}} else { AICw = 0 }
# plot
p <- ggplot(dat,aes(x=pred,y=resp)) +
geom_point(col="gray",position = position_jitter(width = .15)) + geom_violin(alpha=.4) +
theme_bw() + theme(text=element_text(size=text.size),axis.title=element_text(size=12),legend.position="none",
axis.text.x = element_text(size=10,angle = 90, vjust = 0.5, hjust=1))
if((LMER.results==TRUE & AICw>0.5) | forceLMER == TRUE){
p <- p + geom_point(data=ci,size=2) + geom_errorbar(data=ci,aes(ymin=lower,ymax=upper),width=.4) }
if(!is.na(Xlab)){ p <- p + xlab(Xlab) } else { p <- p + xlab(pred) }
if(!is.na(Ylab)){ p <- p + ylab(Ylab) } else { p <- p + ylab(resp) }
# LMER diagnostics
if(diagnostics==TRUE){ require(fitdistrplus)
cat("\n\nPlotting model diagnostics:")
par(mfrow=c(2,2),mar=c(3,2,3,1))
qqnorm(resid(fit),main="qqnorm residuals")
qqline(resid(fit),col="red")
qqnorm(ranef(fit)$`ID`[,1],main="qqnorm random effects")
qqline(ranef(fit)$`ID`[,1],col="red")
dt <- dat[!is.na(dat$resp) & !is.na(dat$pred),]
boxplot(resid(fit)~dt$pred,main="Residuals across X levels")
boxplot(dt$resp~dt$pred,main="Response across X levels")}
p }
Plots a quantitative response variable over the levels of a categorical predictor and returns diagnostic information, parameter estimates and 95% CI (with profile method, i.e., computing likelihood profile and finding the appropriate cutoffs based on the likelihood ratio test) according to a specified LMER model.
SUBSAMPLE s1
DESCRIPTIVES
First, we recode work sampling variables by isolating only
the first choice indicated in each data entry. This is done
with the workSamplingRecode()
function.
s1.w <- workSamplingRecode(s1.w)
##
##
## WHAT:
## ACQUISITION = 439 , 28.75 %
## ANALYSIS = 209 , 13.69 %
## AUTHORING = 159 , 10.41 %
## NETWORKING = 167 , 10.94 %
## DISSEMINATION = 73 , 4.78 %
## ADMINISTRATIVE = 238 , 15.59 %
## BREAK = 49 , 3.21 %
## OTHER = 193 , 12.64 %
## Number of entries with more than 1 option = 413 , 27.05 %
##
## HOW:
## PC = 955 , 62.54 %
## FACE2FACE = 367 , 24.03 %
## PHONE = 54 , 3.54 %
## SKYPE = 11 , 0.72 %
## SMARTPHONE = 9 , 0.59 %
## PAPER = 46 , 3.01 %
## OTHER = 85 , 5.57 %
## Number of entries with more than 1 option = 395 , 25.87 %
##
## WHOM:
## ALONE = 843 , 55.35 %
## COLL = 503 , 33.03 %
## OVER = 57 , 3.74 %
## UNDER = 39 , 2.56 %
## CUSTOMER = 44 , 2.89 %
## EXTERNAL = 18 , 1.18 %
## FAMILY = 8 , 0.53 %
## OTHER = 11 , 0.72 %
## Number of entries with more than 1 option = 197 , 12.9 %
Comments:
- we note that the number of observation is highly unbalanced across types of activity
Thus, we try to recode them in an alternative way:
- The
WHAT
variable is recoded into (1) “information acquisition”, (2) “analysis & authoring”, (3) “social”, and (4) “administrative” activities.
s1.w[!is.na(s1.w$WHAT.first) & s1.w$WHAT.first=="ACQUISITION","WHAT.first2"] <- "Information \nacquisition"
s1.w[!is.na(s1.w$WHAT.first) & (s1.w$WHAT.first=="ANALYSIS"|s1.w$WHAT.first=="AUTHORING"),"WHAT.first2"] <-
"Data analysis \nand authoring"
s1.w[!is.na(s1.w$WHAT.first) & (s1.w$WHAT.first=="DISSEMINATION"|s1.w$WHAT.first=="NETWORKING"),"WHAT.first2"] <-
"Social \nactivities"
s1.w[!is.na(s1.w$WHAT.first) & s1.w$WHAT.first=="ADMINISTRATIVE","WHAT.first2"] <- "Administrative \nactivities"
s1.w[!is.na(s1.w$WHAT.first) & s1.w$WHAT.first=="BREAK","WHAT.first2"] <- "Break"
s1.w$WHAT.first2 <- factor(s1.w$WHAT.first2,
levels=c("Information \nacquisition","Data analysis \nand authoring",
"Administrative \nactivities","Social \nactivities","Break"))
round(rbind(summary(s1.w$WHAT.first2),summary(s1.w$WHAT.first2)/nrow(s1.w[!is.na(s1.w$WHAT.first2),])),2)
## Information \nacquisition Data analysis \nand authoring
## [1,] 439.00 368.00
## [2,] 0.33 0.28
## Administrative \nactivities Social \nactivities Break NA's
## [1,] 238.00 240.00 49.00 440.00
## [2,] 0.18 0.18 0.04 0.33
- The
HOW
variable is recoded into “PC”, “face to face” and “others”.
s1.w[!is.na(s1.w$HOW.first) & s1.w$HOW.first=="PC","HOW.first2"] <- "Computer"
s1.w[!is.na(s1.w$HOW.first) & s1.w$HOW.first=="FACE2FACE","HOW.first2"] <- "Face-to-face"
s1.w[!is.na(s1.w$HOW.first) & s1.w$HOW.first!="PC" & s1.w$HOW.first!="FACE2FACE","HOW.first2"] <- "Others"
s1.w$HOW.first2 <- factor(s1.w$HOW.first2,levels=c("Computer","Face-to-face","Others"))
round(rbind(summary(s1.w$HOW.first2),summary(s1.w$HOW.first2)/nrow(s1.w[!is.na(s1.w$HOW.first2),])),2)
## Computer Face-to-face Others NA's
## [1,] 955.00 367.00 205.00 247.00
## [2,] 0.63 0.24 0.13 0.16
- The
WHOM
variable is recoded into “Alone” and “With others”.
s1.w[!is.na(s1.w$WHOM.first) & s1.w$WHOM.first=="ALONE","WHOM.first2"] <- "Alone"
s1.w[!is.na(s1.w$WHOM.first) & s1.w$WHOM.first!="ALONE","WHOM.first2"] <- "With others"
s1.w$WHOM.first2 <- as.factor(s1.w$WHOM.first2)
round(rbind(summary(s1.w$WHOM.first2),summary(s1.w$WHOM.first2)/nrow(s1.w[!is.na(s1.w$WHOM.first2),])),2)
## Alone With others NA's
## [1,] 843.00 680.00 251.00
## [2,] 0.55 0.45 0.16
TD
Here, we evaluate systematic differences in Task Demand across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we explore differences across the original work sampling categories.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first),],pred="WHAT.first",resp="TD",LMER=FALSE,
Xlab="Type of work activity",Ylab="Task Demand"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first),],pred="HOW.first",resp="TD",LMER=FALSE,
Xlab="Mean of work",Ylab="Task Demand"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first),],pred="WHOM.first",resp="TD",LMER=FALSE,
Xlab="People working with",Ylab="Task Demand"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="TD",
Xlab="Type of work activity",Ylab="Task Demand"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first2),],pred="HOW.first2",resp="TD",
Xlab="Mean of work",Ylab="Task Demand"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first2),],pred="WHOM.first2",resp="TD",
Xlab="People working with",Ylab="Task Demand"),nrow=2)
##
##
##
## Fitting LMER model on 1332 observations from 136 subjects:
## TD ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 53.87 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.22328056 0.08685919 48.6221484
## predData analysis \nand authoring 0.10309436 0.08310294 1.2405622
## predAdministrative \nactivities -0.03993287 0.10268471 -0.3888881
## predSocial \nactivities -0.28006927 0.08856955 -3.1621395
## predBreak -0.92740794 0.15796469 -5.8709826
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.68578730 0.9074038
## .sigma 0.92150358 0.9984063
## (Intercept) 4.05301237 4.3935183
## predData analysis \nand authoring -0.05992159 0.2657429
## predAdministrative \nactivities -0.24097065 0.1610656
## predSocial \nactivities -0.45347666 -0.1066268
## predBreak -1.23678500 -0.6181295
##
##
##
## Fitting LMER model on 1523 observations from 139 subjects:
## TD ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 39.69 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.2310503 0.07679712 55.093869
## predFace-to-face -0.1897310 0.07262172 -2.612592
## predOthers -0.5513735 0.08849243 -6.230742
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.7001937 0.91914941
## .sigma 0.9686560 1.04359631
## (Intercept) 4.0803504 4.38205659
## predFace-to-face -0.3319773 -0.04731671
## predOthers -0.7247544 -0.37792695
##
##
##
## Fitting LMER model on 1522 observations from 139 subjects:
## TD ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.279
# plotting diagnostics
p <- plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="TD",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## Fitting LMER model on 1332 observations from 136 subjects:
## TD ~ WHAT.first2 + (1|ID)
##
##
##
## Plotting model diagnostics:
For Task Demand, we do not need to evaluate the contribution of work sampling predictors by accounting for temporal predictors since the latter did not show substantial effects in the previous step (see section 4.6.1).
TC
Here, we evaluate systematic differences in Task Control across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we explore differences across the original work sampling categories.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first),],pred="WHAT.first",resp="TC",LMER=FALSE,
Xlab="Type of work activity",Ylab="Task Control"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first),],pred="HOW.first",resp="TC",LMER=FALSE,
Xlab="Mean of work",Ylab="Task Control"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first),],pred="WHOM.first",resp="TC",LMER=FALSE,
Xlab="People working with",Ylab="Task Control"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="TC",
Xlab="Type of work activity",Ylab="Task Control"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first2),],pred="HOW.first2",resp="TC",
Xlab="Mean of work",Ylab="Task Control"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first2),],pred="WHOM.first2",resp="TC",
Xlab="People working with",Ylab="Task Control"),nrow=2)
##
##
##
## Fitting LMER model on 1303 observations from 136 subjects:
## TC ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 39.77 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.36227967 0.10656057 40.9370886
## predData analysis \nand authoring 0.03239747 0.09638869 0.3361127
## predAdministrative \nactivities -0.10588449 0.12014371 -0.8813153
## predSocial \nactivities -0.55621001 0.10303583 -5.3982193
## predBreak -0.35628752 0.18283620 -1.9486705
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8703465 1.14362986
## .sigma 1.0475918 1.13613760
## (Intercept) 4.1533534 4.57118660
## predData analysis \nand authoring -0.1563126 0.22112333
## predAdministrative \nactivities -0.3412147 0.12925274
## predSocial \nactivities -0.7578605 -0.35416778
## predBreak -0.7141461 0.00203903
##
##
##
## Fitting LMER model on 1491 observations from 139 subjects:
## TC ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 70 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.3819694 0.09151234 47.883921
## predFace-to-face -0.6828247 0.08168659 -8.359080
## predOthers -0.3303318 0.09932134 -3.325889
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8492838 1.1100192
## .sigma 1.0712168 1.1551291
## (Intercept) 4.2023246 4.5618786
## predFace-to-face -0.8428187 -0.5225707
## predOthers -0.5248625 -0.1354144
##
##
##
## Fitting LMER model on 1491 observations from 139 subjects:
## TC ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 127.29 ( 1 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.5293403 0.09390194 48.23479
## predWith others -0.7871267 0.06803354 -11.56969
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8644283 1.1270951
## .sigma 1.0474727 1.1295233
## (Intercept) 4.3451154 4.7140790
## predWith others -0.9204517 -0.6532891
# plotting diagnostics
p <- plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="HOW.first2",resp="TC",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## Fitting LMER model on 1303 observations from 136 subjects:
## TC ~ HOW.first2 + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional work sampling variable while controlling by those variables associated with substantial differences in the previous step (i.e., temporal variables). Only when the inclusion of an additional variable is associated with an AICw > .50, that variable is considered in the following models.
# work sampling (WHAT) by controlling for temporal variables
TC.null <- lmer(TC~(1|ID),data=s1.w[!is.na(s1.w$WHAT.first2),])
TC.day.of.assessment <- lmer(TC~Day.of.assessment+(1|ID),data=s1.w[!is.na(s1.w$WHAT.first2),])
TC.WHAT <- lmer(TC~Day.of.assessment+WHAT.first2+(1|ID),data=s1.w[!is.na(s1.w$WHAT.first2),])
Weights(AIC(TC.null,TC.day.of.assessment,TC.WHAT)) # WHAT AICw = 1
## model weights
## [1] 0 0 1
round(summary(TC.WHAT)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 4.497 0.115 38.973
## Day.of.assessmentDay 2 -0.235 0.075 -3.130
## Day.of.assessmentDay 3 -0.162 0.075 -2.149
## WHAT.first2Data analysis \nand authoring 0.023 0.096 0.235
## WHAT.first2Administrative \nactivities -0.131 0.120 -1.087
## WHAT.first2Social \nactivities -0.557 0.103 -5.417
## WHAT.first2Break -0.376 0.182 -2.062
# work sampling (HOW) by controlling for temporal variables
TC.null <- lmer(TC~(1|ID),data=s1.w[!is.na(s1.w$HOW.first2),])
TC.day.of.assessment <- lmer(TC~Day.of.assessment+(1|ID),data=s1.w[!is.na(s1.w$HOW.first2),])
TC.HOW <- lmer(TC~Day.of.assessment+HOW.first2+(1|ID),data=s1.w[!is.na(s1.w$HOW.first2),])
Weights(AIC(TC.null,TC.day.of.assessment,TC.HOW)) # HOW AICw = 1
## model weights
## [1] 0 0 1
round(summary(TC.HOW)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 4.500 0.099 45.387
## Day.of.assessmentDay 2 -0.218 0.071 -3.065
## Day.of.assessmentDay 3 -0.161 0.072 -2.254
## HOW.first2Face-to-face -0.659 0.082 -8.057
## HOW.first2Others -0.334 0.099 -3.374
# work sampling (WHOM) by controlling for temporal variables
TC.null <- lmer(TC~(1|ID),data=s1.w[!is.na(s1.w$WHOM.first2),])
TC.day.of.assessment <- lmer(TC~Day.of.assessment+(1|ID),data=s1.w[!is.na(s1.w$WHOM.first2),])
TC.WHOM <- lmer(TC~Day.of.assessment+WHOM.first2+(1|ID),data=s1.w[!is.na(s1.w$WHOM.first2),])
Weights(AIC(TC.null,TC.day.of.assessment,TC.HOW)) # WHOM AICw = 1
## model weights
## [1] 0 0 1
round(summary(TC.WHOM)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 4.656 0.101 46.091
## Day.of.assessmentDay 2 -0.221 0.069 -3.193
## Day.of.assessmentDay 3 -0.185 0.070 -2.661
## WHOM.first2With others -0.776 0.068 -11.438
Comments:
substantial differences are observed based on each work sampling variable, and especially in terms of
WHOM.first2
andHOW.first2
, suggesting lower Task Control in activities involving other people, and higher Task Control in activities performed on the computeralthough lower Task Control is observed in social activities, those differences are not substantial when controlling for the means of work and the people involved
NV
Here, we evaluate systematic differences in Negative Valence across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we explore differences across the original work sampling categories.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first),],pred="WHAT.first",resp="NV",LMER=FALSE,
Xlab="Type of work activity",Ylab="Negative Valence"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first),],pred="HOW.first",resp="NV",LMER=FALSE,
Xlab="Mean of work",Ylab="Negative Valence"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first),],pred="WHOM.first",resp="NV",LMER=FALSE,
Xlab="People working with",Ylab="Negative Valence"),nrow=2)
Then, we replicate the same procedure on the simplified variables WHAT.first2, HOW.first2 and WHOM.first2. Activities marked as ‘other’ are not considered.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="NV",
Xlab="Type of work activity",Ylab="Negative Valence"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first2),],pred="HOW.first2",resp="NV",
Xlab="Mean of work",Ylab="Negative Valence"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first2),],pred="WHOM.first2",resp="NV",
Xlab="People working with",Ylab="Negative Valence"),nrow=2)
##
##
##
## Fitting LMER model on 1334 observations from 136 subjects:
## NV ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 0.934
## Likelihood Ratio Test: Chisq = 25.93 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.33571321 0.07511233 44.4096616
## predData analysis \nand authoring 0.24030878 0.06969470 3.4480209
## predAdministrative \nactivities 0.12649688 0.08641237 1.4638746
## predSocial \nactivities -0.01776749 0.07420185 -0.2394481
## predBreak -0.28893909 0.13249873 -2.1806933
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.6048366 0.79771380
## .sigma 0.7720276 0.83641803
## (Intercept) 3.1882960 3.48283920
## predData analysis \nand authoring 0.1036599 0.37670948
## predAdministrative \nactivities -0.0426387 0.29570932
## predSocial \nactivities -0.1635232 0.12748955
## predBreak -0.5485033 -0.02955658
##
##
##
## Fitting LMER model on 1527 observations from 139 subjects:
## NV ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 0.014
##
##
##
##
## Fitting LMER model on 1523 observations from 139 subjects:
## NV ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.062
# plotting diagnostics
p <- plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="NV",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## Fitting LMER model on 1334 observations from 136 subjects:
## NV ~ WHAT.first2 + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional work sampling variable while controlling by those variables associated with substantial differences in the previous step (i.e., temporal variables). Only when the inclusion of an additional variable is associated with an AICw > .50, that variable is considered in the following models.
# WHAT by controlling for temporal variables
NV.null <- lmer(NV~(1|ID),data=s1.w[!is.na(s1.w$WHAT.first2),])
NV.day.of.week <- lmer(NV~Day.of.week+(1|ID),data=s1.w[!is.na(s1.w$WHAT.first2),])
NV.day.of.assessment <- lmer(NV~Day.of.week+Day.of.assessment+(1|ID),data=s1.w[!is.na(s1.w$WHAT.first2),])
NV.surveyNumber <- lmer(NV~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s1.w[!is.na(s1.w$WHAT.first2),])
NV.WHAT <- lmer(NV~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+WHAT.first2+(1|ID),data=s1.w)
Weights(AIC(NV.null,NV.day.of.week,NV.day.of.assessment,NV.surveyNumber,NV.WHAT)) # null AICw = .82
## model weights
## [1] 0.824 0.038 0.137 0.000 0.000
round(summary(NV.WHAT)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 3.150 0.108 29.178
## Day.of.weekWednesday -0.117 0.055 -2.119
## Day.of.weekFriday -0.097 0.055 -1.769
## Day.of.assessmentDay 2 0.190 0.055 3.482
## Day.of.assessmentDay 3 0.203 0.055 3.699
## as.factor(surveyNumber)2 0.114 0.079 1.448
## as.factor(surveyNumber)3 0.124 0.081 1.530
## as.factor(surveyNumber)4 0.177 0.085 2.077
## as.factor(surveyNumber)5 0.327 0.098 3.332
## as.factor(surveyNumber)6 0.101 0.124 0.814
## as.factor(surveyNumber)7 0.138 0.201 0.690
## WHAT.first2Data analysis \nand authoring 0.224 0.070 3.215
## WHAT.first2Administrative \nactivities 0.145 0.086 1.690
## WHAT.first2Social \nactivities -0.017 0.074 -0.225
## WHAT.first2Break -0.299 0.132 -2.258
Comments:
substantial differences are observed between types of work activity
WHAT.first2
, also controlling for temporal variables, with higher Negative Valence in Data analysis & authoring, and lower Negative Valence during breaks compared to other task categories, although the AICw suggests stronger evidence only when comparing theWHAT
model with the null model.no substantial differences are observed between means of work or people involved
TA
Here, we evaluate systematic differences in Tense Arousal across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we explore differences across the original work sampling categories.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first),],pred="WHAT.first",resp="TA",LMER=FALSE,
Xlab="Type of work activity",Ylab="Tense Arousal"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first),],pred="HOW.first",resp="TA",LMER=FALSE,
Xlab="Mean of work",Ylab="Tense Arousal"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first),],pred="WHOM.first",resp="TA",LMER=FALSE,
Xlab="People working with",Ylab="Tense Arousal"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
and
WHOM.first2.
Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="TA",
Xlab="Type of work activity",Ylab="Tense Arousal"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first2),],pred="HOW.first2",resp="TA",
Xlab="Mean of work",Ylab="Tense Arousal"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first2),],pred="WHOM.first2",resp="TA",
Xlab="People working with",Ylab="Tense Arousal"),nrow=2)
##
##
##
## Fitting LMER model on 1334 observations from 136 subjects:
## TA ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 0.322
##
##
##
##
## Fitting LMER model on 1527 observations from 139 subjects:
## TA ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 0.008
##
##
##
##
## Fitting LMER model on 1523 observations from 139 subjects:
## TA ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.076
# plotting diagnostics
p <- plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="TA",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## Fitting LMER model on 1334 observations from 136 subjects:
## TA ~ WHAT.first2 + (1|ID)
##
##
##
## Plotting model diagnostics:
Comments:
no substantial differences are observed between types of work tasks, means of work or people involved
consequently, we do not need to evaluate the contribution of work sampling predictors over the linear trend highlighted for Tense Arousal over the three days of participation
FA
Here, we evaluate systematic differences in Fatigue across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we explore differences across the original work sampling categories.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first),],pred="WHAT.first",resp="FA",LMER=FALSE,
Xlab="Type of work activity",Ylab="Fatigue"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first),],pred="HOW.first",resp="FA",LMER=FALSE,
Xlab="Mean of work",Ylab="Fatigue"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first),],pred="WHOM.first",resp="FA",LMER=FALSE,
Xlab="People working with",Ylab="Fatigue"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as ‘other’ are not
considered.
grid.arrange(plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="FA",
Xlab="Type of work activity",Ylab="Fatigue"),
plotLMER(dat=s1.w[!is.na(s1.w$HOW.first2),],pred="HOW.first2",resp="FA",
Xlab="Mean of work",Ylab="Fatigue"),
plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first2),],pred="WHOM.first2",resp="FA",
Xlab="People working with",Ylab="Fatigue"),nrow=2)
##
##
##
## Fitting LMER model on 1334 observations from 136 subjects:
## FA ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 0.001
##
##
##
##
## Fitting LMER model on 1527 observations from 139 subjects:
## FA ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 0.306
##
##
##
##
## Fitting LMER model on 1523 observations from 139 subjects:
## FA ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.426
# plotting diagnostics
p <- plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="FA",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## Fitting LMER model on 1334 observations from 136 subjects:
## FA ~ WHAT.first2 + (1|ID)
##
##
##
## Plotting model diagnostics:
Comments:
no substantial differences are observed between types of work tasks, means of work or people involved
consequently, we do not need to evaluate the contribution of work sampling predictors over the linear trend highlighted for Fatigue over time
EXAMPLE PLOTS
Here, we plot some examples of substantial differences.
p1 <- plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="NV",
Xlab="Type of work activity",Ylab="Negative Valence",text.size=12)
p2 <- plotLMER(dat=s1.w[!is.na(s1.w$WHAT.first2),],pred="WHAT.first2",resp="TC",
Xlab="Type of work activity",Ylab="Task Control",text.size=12)
p3 <- plotLMER(dat=s1.w[!is.na(s1.w$HOW.first2),],pred="HOW.first2",resp="TD",
Xlab="Mean of work",Ylab="Task Demand",text.size=16)
p4 <- plotLMER(dat=s1.w[!is.na(s1.w$WHOM.first2),],pred="WHOM.first2",resp="TC",
Xlab="People working with",Ylab="Task Control",text.size=16)
p <- gridExtra::grid.arrange(p1,p2,p3,p4,nrow=2)
# ggsave("RESULTS/contextual_worksampling.tiff",p,dpi=300)
SUBSAMPLE s2
DESCRIPTIVES
First, we recode work sampling variables by isolating only
the first choice indicated in each data entry. This is done
with the workSamplingRecode()
function.
s2.w <- workSamplingRecode(s2.w)
##
##
## WHAT:
## ACQUISITION = 321 , 28.87 %
## ANALYSIS = 150 , 13.49 %
## AUTHORING = 105 , 9.44 %
## NETWORKING = 115 , 10.34 %
## DISSEMINATION = 58 , 5.22 %
## ADMINISTRATIVE = 165 , 14.84 %
## BREAK = 37 , 3.33 %
## OTHER = 161 , 14.48 %
## Number of entries with more than 1 option = 290 , 26.08 %
##
## HOW:
## PC = 690 , 62.05 %
## FACE2FACE = 287 , 25.81 %
## PHONE = 30 , 2.7 %
## SKYPE = 7 , 0.63 %
## SMARTPHONE = 5 , 0.45 %
## PAPER = 28 , 2.52 %
## OTHER = 65 , 5.85 %
## Number of entries with more than 1 option = 263 , 23.65 %
##
## WHOM:
## ALONE = 602 , 54.33 %
## COLL = 373 , 33.66 %
## OVER = 49 , 4.42 %
## UNDER = 27 , 2.44 %
## CUSTOMER = 38 , 3.43 %
## EXTERNAL = 7 , 0.63 %
## FAMILY = 3 , 0.27 %
## OTHER = 9 , 0.81 %
## Number of entries with more than 1 option = 144 , 12.95 %
Comments:
- We note that the number of observation is highly unbalanced across types of activity
Thus, we try to recode them in an alternative way:
# WHAT
s2.w[!is.na(s2.w$WHAT.first) & s2.w$WHAT.first=="ACQUISITION","WHAT.first2"] <- "Information \nacquisition"
s2.w[!is.na(s2.w$WHAT.first) & (s2.w$WHAT.first=="ANALYSIS"|s2.w$WHAT.first=="AUTHORING"),"WHAT.first2"] <-
"Data analysis \nand authoring"
s2.w[!is.na(s2.w$WHAT.first) & (s2.w$WHAT.first=="DISSEMINATION"|s2.w$WHAT.first=="NETWORKING"),"WHAT.first2"] <-
"Social \nactivities"
s2.w[!is.na(s2.w$WHAT.first) & s2.w$WHAT.first=="ADMINISTRATIVE","WHAT.first2"] <- "Administrative \nactivities"
s2.w[!is.na(s2.w$WHAT.first) & s2.w$WHAT.first=="BREAK","WHAT.first2"] <- "Break"
s2.w$WHAT.first2 <- factor(s2.w$WHAT.first2,
levels=c("Information \nacquisition","Data analysis \nand authoring",
"Administrative \nactivities","Social \nactivities","Break"))
round(rbind(summary(s2.w$WHAT.first2),summary(s2.w$WHAT.first2)/nrow(s2.w[!is.na(s2.w$WHAT.first2),])),2)
## Information \nacquisition Data analysis \nand authoring
## [1,] 321.00 255.00
## [2,] 0.34 0.27
## Administrative \nactivities Social \nactivities Break NA's
## [1,] 165.00 173.00 37.00 317.00
## [2,] 0.17 0.18 0.04 0.33
# HOW
s2.w[!is.na(s2.w$HOW.first) & s2.w$HOW.first=="PC","HOW.first2"] <- "Computer"
s2.w[!is.na(s2.w$HOW.first) & s2.w$HOW.first=="FACE2FACE","HOW.first2"] <- "Face-to-face"
s2.w[!is.na(s2.w$HOW.first) & s2.w$HOW.first!="PC" & s2.w$HOW.first!="FACE2FACE","HOW.first2"] <- "Others"
s2.w$HOW.first2 <- factor(s2.w$HOW.first2,levels=c("Computer","Face-to-face","Others"))
round(rbind(summary(s2.w$HOW.first2),summary(s2.w$HOW.first2)/nrow(s2.w[!is.na(s2.w$HOW.first2),])),2)
## Computer Face-to-face Others NA's
## [1,] 690.00 287.00 135.00 156.00
## [2,] 0.62 0.26 0.12 0.14
# WHOM
s2.w[!is.na(s2.w$WHOM.first) & s2.w$WHOM.first=="ALONE","WHOM.first2"] <- "Alone"
s2.w[!is.na(s2.w$WHOM.first) & s2.w$WHOM.first!="ALONE","WHOM.first2"] <- "With others"
s2.w$WHOM.first2 <- as.factor(s2.w$WHOM.first2)
round(rbind(summary(s2.w$WHOM.first2),summary(s2.w$WHOM.first2)/nrow(s2.w[!is.na(s2.w$WHOM.first2),])),2)
## Alone With others NA's
## [1,] 602.00 506.00 160.00
## [2,] 0.54 0.46 0.14
Comments:
- the proportions of activities in each work sampling category are
similar to those observed from subsample
s1
TD
Here, we evaluate systematic differences in Task Demand across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we visually inspect the distributions by considering the original task categories.
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first),],pred="WHAT.first",resp="TD",LMER=FALSE,
Xlab="Type of work activity",Ylab="Task Demand"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first),],pred="HOW.first",resp="TD",LMER=FALSE,
Xlab="Mean of work",Ylab="Task Demand"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first),],pred="WHOM.first",resp="TD",LMER=FALSE,
Xlab="People working with",Ylab="Task Demand"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first2),],pred="WHAT.first2",resp="TD",
Xlab="Type of work activity",Ylab="Task Demand"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first2),],pred="HOW.first2",resp="TD",
Xlab="Mean of work",Ylab="Task Demand"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first2),],pred="WHOM.first2",resp="TD",
Xlab="People working with",Ylab="Task Demand"),nrow=2)
##
##
##
## Fitting LMER model on 950 observations from 87 subjects:
## TD ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 47.79 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.20551679 0.10607058 39.6482855
## predData analysis \nand authoring 0.13618213 0.09900044 1.3755710
## predAdministrative \nactivities 0.04562887 0.12290283 0.3712597
## predSocial \nactivities -0.36025032 0.10428058 -3.4546252
## predBreak -0.90894821 0.18307347 -4.9649368
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.66369773 0.9364688
## .sigma 0.92238104 1.0136466
## (Intercept) 3.99735408 4.4133679
## predData analysis \nand authoring -0.05786355 0.3298072
## predAdministrative \nactivities -0.19500618 0.2860151
## predSocial \nactivities -0.56420301 -0.1557611
## predBreak -1.26715497 -0.5503839
##
##
##
## Fitting LMER model on 1109 observations from 90 subjects:
## TD ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 36.14 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.2065977 0.09417250 44.669067
## predFace-to-face -0.2161395 0.08361971 -2.584792
## predOthers -0.6476170 0.10922619 -5.929137
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.6771043 0.94436923
## .sigma 0.9646288 1.05216071
## (Intercept) 4.0214820 4.39187843
## predFace-to-face -0.3798971 -0.05216994
## predOthers -0.8615848 -0.43352655
##
##
##
## Fitting LMER model on 1108 observations from 90 subjects:
## TD ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.077
<br
Comments:
results are similar to those reported for the subsample
s1
.for Task Demand, we do not need to evaluate the contribution of work sampling predictors by accounting for temporal predictors since the latter did not show substantial effects in the previous step (see section 4.6.1)
TC
Here, we evaluate systematic differences in Task Control across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we visually inspect the distributions by considering the original task categories.
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first),],pred="WHAT.first",resp="TC",LMER=FALSE,
Xlab="Type of work activity",Ylab="Task Control"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first),],pred="HOW.first",resp="TC",LMER=FALSE,
Xlab="Mean of work",Ylab="Task Control"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first),],pred="WHOM.first",resp="TC",LMER=FALSE,
Xlab="People working with",Ylab="Task Control"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered. Note that in subsample s2
temporal variables
were not substantially predictive of differences in Task Control
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first2),],pred="WHAT.first2",resp="TC",
Xlab="Type of work activity",Ylab="Task Control"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first2),],pred="HOW.first2",resp="TC",
Xlab="Mean of work",Ylab="Task Control"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first2),],pred="WHOM.first2",resp="TC",
Xlab="People working with",Ylab="Task Control"),nrow=2)
##
##
##
## Fitting LMER model on 927 observations from 87 subjects:
## TC ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 34.89 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.3533857 0.1302773 33.4163154
## predData analysis \nand authoring 0.0892765 0.1174245 0.7602887
## predAdministrative \nactivities -0.1516957 0.1466022 -1.0347437
## predSocial \nactivities -0.5835605 0.1241707 -4.6996636
## predBreak -0.4902681 0.2175724 -2.2533564
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8277659 1.16578710
## .sigma 1.0729931 1.18076373
## (Intercept) 4.0979275 4.60895612
## predData analysis \nand authoring -0.1404614 0.31916561
## predAdministrative \nactivities -0.4384785 0.13522966
## predSocial \nactivities -0.8264334 -0.33961291
## predBreak -0.9158618 -0.06389152
##
##
##
## Fitting LMER model on 1084 observations from 90 subjects:
## TC ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 55.62 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.3870392 0.10839764 40.471721
## predFace-to-face -0.7090532 0.09582255 -7.399649
## predOthers -0.3979566 0.12468311 -3.191744
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.7801543 1.0881076
## .sigma 1.0878543 1.1878669
## (Intercept) 4.1739174 4.6002754
## predFace-to-face -0.8967162 -0.5211738
## predOthers -0.6421375 -0.1534408
##
##
##
## Fitting LMER model on 1084 observations from 90 subjects:
## TC ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 88.2 ( 1 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.5148652 0.11240721 40.165264
## predWith others -0.7834857 0.08145566 -9.618555
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8042388 1.1178374
## .sigma 1.0679268 1.1661098
## (Intercept) 4.2941264 4.7362792
## predWith others -0.9430613 -0.6231173
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional work sampling variable while controlling by those variables associated with substantial differences in the previous step (i.e., temporal variables). Only when the inclusion of an additional variable is associated with an AICw > .50, that variable is considered in the following models.
# work sampling (WHAT) by controlling for temporal variables
TC.null <- lmer(TC~(1|ID),data=s2.w[!is.na(s2.w$WHAT.first2),])
TC.day.of.assessment <- lmer(TC~Day.of.assessment+(1|ID),data=s2.w[!is.na(s2.w$WHAT.first2),])
TC.WHAT <- lmer(TC~Day.of.assessment+WHAT.first2+(1|ID),data=s2.w[!is.na(s2.w$WHAT.first2),])
Weights(AIC(TC.null,TC.day.of.assessment,TC.WHAT)) # WHAT AICw = 1
## model weights
## [1] 0.000 0.000 0.999
round(summary(TC.WHAT)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 4.490 0.142 31.680
## Day.of.assessmentDay 2 -0.246 0.092 -2.675
## Day.of.assessmentDay 3 -0.143 0.091 -1.568
## WHAT.first2Data analysis \nand authoring 0.072 0.117 0.610
## WHAT.first2Administrative \nactivities -0.179 0.147 -1.219
## WHAT.first2Social \nactivities -0.591 0.124 -4.771
## WHAT.first2Break -0.523 0.217 -2.405
# work sampling (HOW) by controlling for temporal variables
TC.null <- lmer(TC~(1|ID),data=s2.w[!is.na(s2.w$HOW.first2),])
TC.day.of.assessment <- lmer(TC~Day.of.assessment+(1|ID),data=s2.w[!is.na(s2.w$HOW.first2),])
TC.HOW <- lmer(TC~Day.of.assessment+HOW.first2+(1|ID),data=s2.w[!is.na(s2.w$HOW.first2),])
Weights(AIC(TC.null,TC.day.of.assessment,TC.HOW)) # HOW AICw = 1
## model weights
## [1] 0 0 1
round(summary(TC.HOW)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 4.502 0.118 38.155
## Day.of.assessmentDay 2 -0.209 0.085 -2.460
## Day.of.assessmentDay 3 -0.156 0.085 -1.834
## HOW.first2Face-to-face -0.686 0.096 -7.141
## HOW.first2Others -0.402 0.124 -3.230
# work sampling (WHOM) by controlling for temporal variables
TC.null <- lmer(TC~(1|ID),data=s2.w[!is.na(s2.w$WHOM.first2),])
TC.day.of.assessment <- lmer(TC~Day.of.assessment+(1|ID),data=s2.w[!is.na(s2.w$WHOM.first2),])
TC.WHOM <- lmer(TC~Day.of.assessment+WHOM.first2+(1|ID),data=s2.w[!is.na(s2.w$WHOM.first2),])
Weights(AIC(TC.null,TC.day.of.assessment,TC.HOW)) # WHOM AICw = 1
## model weights
## [1] 0 0 1
round(summary(TC.WHOM)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 4.645 0.121 38.235
## Day.of.assessmentDay 2 -0.219 0.083 -2.630
## Day.of.assessmentDay 3 -0.190 0.083 -2.288
## WHOM.first2With others -0.774 0.081 -9.524
Comments:
- results are similar to those reported for the
subsample
s1
NV
Here, we evaluate systematic differences in Negative Valence across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we visually inspect the distributions by considering the original task categories.
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first),],pred="WHAT.first",resp="NV",LMER=FALSE,
Xlab="Type of work activity",Ylab="Negative Valence"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first),],pred="HOW.first",resp="NV",LMER=FALSE,
Xlab="Mean of work",Ylab="Negative Valence"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first),],pred="WHOM.first",resp="NV",LMER=FALSE,
Xlab="People working with",Ylab="Negative Valence"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first2),],pred="WHAT.first2",resp="NV",
Xlab="Type of work activity",Ylab="Negative Valence"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first2),],pred="HOW.first2",resp="NV",
Xlab="Mean of work",Ylab="Negative Valence"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first2),],pred="WHOM.first2",resp="NV",
Xlab="People working with",Ylab="Negative Valence"),nrow=2)
##
##
##
## Fitting LMER model on 951 observations from 87 subjects:
## NV ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 0.999
## Likelihood Ratio Test: Chisq = 33.5 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.283491994 0.09639320 34.06352393
## predData analysis \nand authoring 0.340850601 0.08378227 4.06829053
## predAdministrative \nactivities 0.097496818 0.10443885 0.93353016
## predSocial \nactivities -0.007333337 0.08786532 -0.08346111
## predBreak -0.394266790 0.15449293 -2.55200541
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.6278810 0.88011782
## .sigma 0.7763463 0.85318078
## (Intercept) 3.0939614 3.47231573
## predData analysis \nand authoring 0.1765408 0.50471627
## predAdministrative \nactivities -0.1068277 0.30191255
## predSocial \nactivities -0.1803808 0.16463962
## predBreak -0.6968931 -0.09199046
##
##
##
## Fitting LMER model on 1112 observations from 90 subjects:
## NV ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 0.012
##
##
##
##
## Fitting LMER model on 1108 observations from 90 subjects:
## NV ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.112
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional work sampling variable while controlling by those variables associated with substantial differences in the previous step (i.e., temporal variables). Only when the inclusion of an additional variable is associated with an AICw > .50, that variable is considered in the following models.
# WHAT by controlling for temporal variables
NV.null2 <- lmer(NV~(1|ID),data=s2.w[!is.na(s2.w$WHAT.first2),])
NV.day.of.week2 <- lmer(NV~Day.of.week+(1|ID),data=s2.w[!is.na(s2.w$WHAT.first2),])
NV.day.of.assessment2 <- lmer(NV~Day.of.week+Day.of.assessment+(1|ID),data=s2.w[!is.na(s2.w$WHAT.first2),])
NV.surveyNumber2 <- lmer(NV~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),
data=s2.w[!is.na(s2.w$WHAT.first2),])
NV.WHAT2 <- lmer(NV~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+WHAT.first2+(1|ID),data=s2.w)
Weights(AIC(NV.null2,NV.day.of.week2,NV.day.of.assessment2,NV.surveyNumber2,NV.WHAT2)) # null AICw = .97
## model weights
## [1] 0.970 0.011 0.012 0.000 0.007
round(summary(NV.WHAT)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 3.150 0.108 29.178
## Day.of.weekWednesday -0.117 0.055 -2.119
## Day.of.weekFriday -0.097 0.055 -1.769
## Day.of.assessmentDay 2 0.190 0.055 3.482
## Day.of.assessmentDay 3 0.203 0.055 3.699
## as.factor(surveyNumber)2 0.114 0.079 1.448
## as.factor(surveyNumber)3 0.124 0.081 1.530
## as.factor(surveyNumber)4 0.177 0.085 2.077
## as.factor(surveyNumber)5 0.327 0.098 3.332
## as.factor(surveyNumber)6 0.101 0.124 0.814
## as.factor(surveyNumber)7 0.138 0.201 0.690
## WHAT.first2Data analysis \nand authoring 0.224 0.070 3.215
## WHAT.first2Administrative \nactivities 0.145 0.086 1.690
## WHAT.first2Social \nactivities -0.017 0.074 -0.225
## WHAT.first2Break -0.299 0.132 -2.258
Comments:
- results are similar to those reported for the
subsample
s1
TA
Here, we evaluate systematic differences in Tense Arousal across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we visually inspect the distributions by considering the original task categories.
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first),],pred="WHAT.first",resp="TA",LMER=FALSE,
Xlab="Type of work activity",Ylab="Tense Arousal"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first),],pred="HOW.first",resp="TA",LMER=FALSE,
Xlab="Mean of work",Ylab="Tense Arousal"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first),],pred="WHOM.first",resp="TA",LMER=FALSE,
Xlab="People working with",Ylab="Tense Arousal"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first2),],pred="WHAT.first2",resp="TA",
Xlab="Type of work activity",Ylab="Tense Arousal"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first2),],pred="HOW.first2",resp="TA",
Xlab="Mean of work",Ylab="Tense Arousal"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first2),],pred="WHOM.first2",resp="TA",
Xlab="People working with",Ylab="Tense Arousal"),nrow=2)
##
##
##
## Fitting LMER model on 951 observations from 87 subjects:
## TA ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 0.527
## Likelihood Ratio Test: Chisq = 18.81 ( 4 ) p = 0.001
##
## Estimate Std. Error t value
## (Intercept) 3.3299214 0.10613568 31.3741936
## predData analysis \nand authoring 0.3101944 0.09055653 3.4254229
## predAdministrative \nactivities 0.0827901 0.11298283 0.7327671
## predSocial \nactivities -0.0106359 0.09492569 -0.1120445
## predBreak -0.1938140 0.16686758 -1.1614837
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.6991843 0.9748640
## .sigma 0.8380417 0.9208978
## (Intercept) 3.1211711 3.5378164
## predData analysis \nand authoring 0.1322531 0.4873610
## predAdministrative \nactivities -0.1382234 0.3039964
## predSocial \nactivities -0.1975134 0.1751429
## predBreak -0.5210194 0.1325773
##
##
##
## Fitting LMER model on 1112 observations from 90 subjects:
## TA ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 0.009
##
##
##
##
## Fitting LMER model on 1108 observations from 90 subjects:
## TA ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.055
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional work sampling variable while controlling by those variables associated with substantial differences in the previous step, including temporal variables. Only when the inclusion of an additional variable is associated with an AICw > .50, that variable is considered in the following models.
# WHAT by controlling for temporal variables
TA.null2 <- lmer(TA~(1|ID),data=s2.w[!is.na(s2.w$WHAT.first2),])
TA.time2 <- lmer(TA~Day.of.week+Day.of.assessment+(1|ID),data=s2.w[!is.na(s2.w$WHAT.first2),])
TA.WHAT2 <- lmer(TA~Day.of.week+Day.of.assessment+WHAT.first2+(1|ID),data=s2.w[!is.na(s2.w$WHAT.first2),])
Weights(AIC(TA.null2,TA.time2,TA.WHAT2)) # time AICw = .51
## model weights
## [1] 0.097 0.514 0.389
Comments:
contrarily to what found for subsample
s1
, Tense Arousal shows higher scores in “Data analysis & authoring”, and lower scores in “Breaks”, respectively, compared to “Information acquisition”however, when the day of participation is included in the model (see section 4.6.1), the effect of
WHAT.FIRST
is no longer substantial, consistently with subsamples1
no substantial differences are detected across the
HOW
and theWHOM
categories, consistently with subsamples1
FA
Here, we evaluate systematic differences in Fatigue across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we visually inspect the distributions by considering the original task categories.
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first),],pred="WHAT.first",resp="FA",LMER=FALSE,
Xlab="Type of work activity",Ylab="Fatigue"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first),],pred="HOW.first",resp="FA",LMER=FALSE,
Xlab="Mean of work",Ylab="Fatigue"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first),],pred="WHOM.first",resp="FA",LMER=FALSE,
Xlab="People working with",Ylab="Fatigue"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first2),],pred="WHAT.first2",resp="FA",
Xlab="Type of work activity",Ylab="Fatigue"),
plotLMER(dat=s2.w[!is.na(s2.w$HOW.first2),],pred="HOW.first2",resp="FA",
Xlab="Mean of work",Ylab="Fatigue"),
plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first2),],pred="WHOM.first2",resp="FA",
Xlab="People working with",Ylab="Fatigue"),nrow=2)
##
##
##
## Fitting LMER model on 951 observations from 87 subjects:
## FA ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 0.002
##
##
##
##
## Fitting LMER model on 1112 observations from 90 subjects:
## FA ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 0.046
##
##
##
##
## Fitting LMER model on 1108 observations from 90 subjects:
## FA ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.316
Comments:
- results are similar to those reported for the
subsample
s1
EXAMPLE PLOTS
Here, we plot some examples of substantial differences.
p1 <- plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first2),],pred="WHAT.first2",resp="NV",
Xlab="Type of work activity",Ylab="Negative Valence",text.size=12)
p2 <- plotLMER(dat=s2.w[!is.na(s2.w$WHAT.first2),],pred="WHAT.first2",resp="TC",
Xlab="Type of work activity",Ylab="Task Control",text.size=12)
p3 <- plotLMER(dat=s2.w[!is.na(s2.w$HOW.first2),],pred="HOW.first2",resp="TD",
Xlab="Mean of work",Ylab="Task Demand",text.size=16)
p4 <- plotLMER(dat=s2.w[!is.na(s2.w$WHOM.first2),],pred="WHOM.first2",resp="TC",
Xlab="People working with",Ylab="Task Control",text.size=16)
gridExtra::grid.arrange(p1,p2,p3,p4,nrow=2)
SUBSAMPLE s3
DESCRIPTIVES
First, we recode work sampling variables by isolating only
the first choice indicated in each data entry. This is done
with the workSamplingRecode
function.
s3.w <- workSamplingRecode(s3.w)
##
##
## WHAT:
## ACQUISITION = 476 , 28.07 %
## ANALYSIS = 241 , 14.21 %
## AUTHORING = 177 , 10.44 %
## NETWORKING = 180 , 10.61 %
## DISSEMINATION = 86 , 5.07 %
## ADMINISTRATIVE = 253 , 14.92 %
## BREAK = 53 , 3.12 %
## OTHER = 230 , 13.56 %
## Number of entries with more than 1 option = 453 , 26.71 %
##
## HOW:
## PC = 1036 , 61.08 %
## FACE2FACE = 403 , 23.76 %
## PHONE = 62 , 3.66 %
## SKYPE = 11 , 0.65 %
## SMARTPHONE = 11 , 0.65 %
## PAPER = 50 , 2.95 %
## OTHER = 123 , 7.25 %
## Number of entries with more than 1 option = 417 , 24.59 %
##
## WHOM:
## ALONE = 924 , 54.61 %
## COLL = 560 , 33.1 %
## OVER = 63 , 3.72 %
## UNDER = 44 , 2.6 %
## CUSTOMER = 46 , 2.72 %
## EXTERNAL = 18 , 1.06 %
## FAMILY = 10 , 0.59 %
## OTHER = 27 , 1.6 %
## Number of entries with more than 1 option = 207 , 12.21 %
Comments:
- we note that the number of observation is highly unbalanced across types of activity.
Thus, we try to recode them in an alternative way:
# WHAT
s3.w[!is.na(s3.w$WHAT.first) & s3.w$WHAT.first=="ACQUISITION","WHAT.first2"] <- "Information \nacquisition"
s3.w[!is.na(s3.w$WHAT.first) & (s3.w$WHAT.first=="ANALYSIS"|s3.w$WHAT.first=="AUTHORING"),"WHAT.first2"] <- "Data analysis \nand authoring"
s3.w[!is.na(s3.w$WHAT.first) & (s3.w$WHAT.first=="DISSEMINATION"|s3.w$WHAT.first=="NETWORKING"),"WHAT.first2"] <- "Social \nactivities"
s3.w[!is.na(s3.w$WHAT.first) & s3.w$WHAT.first=="ADMINISTRATIVE","WHAT.first2"] <- "Administrative \nactivities"
s3.w[!is.na(s3.w$WHAT.first) & s3.w$WHAT.first=="BREAK","WHAT.first2"] <- "Break"
s3.w$WHAT.first2 <- factor(s3.w$WHAT.first2,
levels=c("Information \nacquisition","Data analysis \nand authoring","Administrative \nactivities",
"Social \nactivities","Break"))
round(rbind(summary(s3.w$WHAT.first2),summary(s3.w$WHAT.first2)/nrow(s3.w[!is.na(s3.w$WHAT.first2),])),2)
## Information \nacquisition Data analysis \nand authoring
## [1,] 476.00 418.00
## [2,] 0.32 0.29
## Administrative \nactivities Social \nactivities Break NA's
## [1,] 253.00 266.00 53.00 513.00
## [2,] 0.17 0.18 0.04 0.35
# HOW
s3.w[!is.na(s3.w$HOW.first) & s3.w$HOW.first=="PC","HOW.first2"] <- "Computer"
s3.w[!is.na(s3.w$HOW.first) & s3.w$HOW.first=="FACE2FACE","HOW.first2"] <- "Face-to-face"
s3.w[!is.na(s3.w$HOW.first) & s3.w$HOW.first!="PC" & s3.w$HOW.first!="FACE2FACE","HOW.first2"] <- "Others"
s3.w$HOW.first2 <- factor(s3.w$HOW.first2,levels=c("Computer","Face-to-face","Others"))
round(rbind(summary(s3.w$HOW.first2),summary(s3.w$HOW.first2)/nrow(s3.w[!is.na(s3.w$HOW.first2),])),2)
## Computer Face-to-face Others NA's
## [1,] 1036.00 403.00 257.00 283.00
## [2,] 0.61 0.24 0.15 0.17
# WHOM
s3.w[!is.na(s3.w$WHOM.first) & s3.w$WHOM.first=="ALONE","WHOM.first2"] <- "Alone"
s3.w[!is.na(s3.w$WHOM.first) & s3.w$WHOM.first!="ALONE","WHOM.first2"] <- "With others"
s3.w$WHOM.first2 <- as.factor(s3.w$WHOM.first2)
round(rbind(summary(s3.w$WHOM.first2),summary(s3.w$WHOM.first2)/nrow(s3.w[!is.na(s3.w$WHOM.first2),])),2)
## Alone With others NA's
## [1,] 924.00 768.00 287.00
## [2,] 0.55 0.45 0.17
Comments:
- the proportions of activities in each work sampling category are
similar to those observed from subsample
s1
TD
Here, we evaluate systematic differences in Task Demand across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first),],pred="WHAT.first",resp="TD",LMER=FALSE,
Xlab="Type of work activity",Ylab="Task Demand"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first),],pred="HOW.first",resp="TD",LMER=FALSE,
Xlab="Mean of work",Ylab="Task Demand"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first),],pred="WHOM.first",resp="TD",LMER=FALSE,
Xlab="People working with",Ylab="Task Demand"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first2),],pred="WHAT.first2",resp="TD",
Xlab="Type of work activity",Ylab="Task Demand"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first2),],pred="HOW.first2",resp="TD",
Xlab="Mean of work",Ylab="Task Demand"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first2),],pred="WHOM.first2",resp="TD",
Xlab="People working with",Ylab="Task Demand"),nrow=2)
##
##
##
## Fitting LMER model on 1464 observations from 169 subjects:
## TD ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 59.16 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.1902189 0.08143745 51.4532188
## predData analysis \nand authoring 0.1094689 0.07852391 1.3940835
## predAdministrative \nactivities -0.0481618 0.09878097 -0.4875616
## predSocial \nactivities -0.2750426 0.08436595 -3.2601136
## predBreak -0.9242770 0.15077875 -6.1300213
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.70831319 0.9137742
## .sigma 0.91503766 0.9881162
## (Intercept) 4.03062052 4.3498107
## predData analysis \nand authoring -0.04443972 0.2631753
## predAdministrative \nactivities -0.24152736 0.1452942
## predSocial \nactivities -0.44022816 -0.1098129
## predBreak -1.21950017 -0.6289318
##
##
##
## Fitting LMER model on 1692 observations from 174 subjects:
## TD ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 48.94 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.2255385 0.07096335 59.545361
## predFace-to-face -0.2156873 0.06950668 -3.103116
## predOthers -0.5627658 0.08145087 -6.909267
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.7149556 0.91493474
## .sigma 0.9596203 1.03018419
## (Intercept) 4.0863231 4.36492867
## predFace-to-face -0.3518848 -0.07948102
## predOthers -0.7224304 -0.40319382
##
##
##
## Fitting LMER model on 1691 observations from 174 subjects:
## TD ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.186
Comments:
results are similar to those reported for the subsample
s1
for Task Demand, we do not need to evaluate the contribution of work sampling predictors by accounting for temporal predictors since the latter did not show substantial effects in the previous step (see section 4.6.1)
TC
Here, we evaluate systematic differences in Task Control across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first),],pred="WHAT.first",resp="TC",LMER=FALSE,
Xlab="Type of work activity",Ylab="Task Control"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first),],pred="HOW.first",resp="TC",LMER=FALSE,
Xlab="Mean of work",Ylab="Task Control"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first),],pred="WHOM.first",resp="TC",LMER=FALSE,
Xlab="People working with",Ylab="Task Control"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first2),],pred="WHAT.first2",resp="TC",
Xlab="Type of work activity",Ylab="Task Control"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first2),],pred="HOW.first2",resp="TC",
Xlab="Mean of work",Ylab="Task Control"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first2),],pred="WHOM.first2",resp="TC",
Xlab="People working with",Ylab="Task Control"),nrow=2)
##
##
##
## Fitting LMER model on 1431 observations from 169 subjects:
## TC ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 37.34 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.34933298 0.09715141 44.7686024
## predData analysis \nand authoring 0.01930173 0.09157441 0.2107764
## predAdministrative \nactivities -0.07255802 0.11610932 -0.6249112
## predSocial \nactivities -0.51116387 0.09876395 -5.1756118
## predBreak -0.38845275 0.17574929 -2.2102664
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8550104 1.10083526
## .sigma 1.0501838 1.13519423
## (Intercept) 4.1589336 4.53973661
## predData analysis \nand authoring -0.1599590 0.19868766
## predAdministrative \nactivities -0.2999870 0.15471941
## predSocial \nactivities -0.7045057 -0.31737717
## predBreak -0.7324759 -0.04393584
##
##
##
## Fitting LMER model on 1655 observations from 174 subjects:
## TC ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 76.59 ( 2 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.3946648 0.08245482 53.297852
## predFace-to-face -0.6763203 0.07837000 -8.629837
## predOthers -0.3843251 0.09195291 -4.179586
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8380041 1.0726044
## .sigma 1.0654265 1.1448670
## (Intercept) 4.2328591 4.5566084
## predFace-to-face -0.8298315 -0.5225711
## predOthers -0.5644431 -0.2038768
##
##
##
## Fitting LMER model on 1655 observations from 174 subjects:
## TC ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 1
## Likelihood Ratio Test: Chisq = 121.56 ( 1 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 4.5177599 0.08476414 53.29801
## predWith others -0.7334652 0.06507147 -11.27169
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.8523682 1.0886918
## .sigma 1.0480357 1.1261862
## (Intercept) 4.3515469 4.6844277
## predWith others -0.8610304 -0.6054292
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional work sampling variable while controlling by those variables associated with substantial differences in the previous step (i.e., temporal variables). Only when the inclusion of an additional variable is associated with an AICw > .50, that variable is considered in the following models.
# work sampling (WHAT) by controlling for temporal variables
TC.null <- lmer(TC~(1|ID),data=s3.w[!is.na(s3.w$WHAT.first2),])
TC.day.of.assessment <- lmer(TC~Day.of.assessment+(1|ID),data=s3.w[!is.na(s3.w$WHAT.first2),])
TC.WHAT <- lmer(TC~Day.of.assessment+WHAT.first2+(1|ID),data=s3.w[!is.na(s3.w$WHAT.first2),])
Weights(AIC(TC.null,TC.day.of.assessment,TC.WHAT)) # WHAT AICw = 1
## model weights
## [1] 0 0 1
round(summary(TC.WHAT)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 4.483 0.105 42.694
## Day.of.assessmentDay 2 -0.258 0.072 -3.600
## Day.of.assessmentDay 3 -0.156 0.072 -2.157
## WHAT.first2Data analysis \nand authoring 0.014 0.091 0.149
## WHAT.first2Administrative \nactivities -0.094 0.116 -0.814
## WHAT.first2Social \nactivities -0.508 0.098 -5.161
## WHAT.first2Break -0.414 0.175 -2.364
# work sampling (HOW) by controlling for temporal variables
TC.null <- lmer(TC~(1|ID),data=s3.w[!is.na(s3.w$HOW.first2),])
TC.day.of.assessment <- lmer(TC~Day.of.assessment+(1|ID),data=s3.w[!is.na(s3.w$HOW.first2),])
TC.HOW <- lmer(TC~Day.of.assessment+HOW.first2+(1|ID),data=s3.w[!is.na(s3.w$HOW.first2),])
Weights(AIC(TC.null,TC.day.of.assessment,TC.HOW)) # HOW AICw = 1
## model weights
## [1] 0 0 1
round(summary(TC.HOW)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 4.507 0.089 50.525
## Day.of.assessmentDay 2 -0.237 0.067 -3.542
## Day.of.assessmentDay 3 -0.138 0.068 -2.027
## HOW.first2Face-to-face -0.653 0.079 -8.320
## HOW.first2Others -0.387 0.092 -4.216
# work sampling (WHOM) by controlling for temporal variables
TC.null <- lmer(TC~(1|ID),data=s3.w[!is.na(s3.w$WHOM.first2),])
TC.day.of.assessment <- lmer(TC~Day.of.assessment+(1|ID),data=s3.w[!is.na(s3.w$WHOM.first2),])
TC.WHOM <- lmer(TC~Day.of.assessment+WHOM.first2+(1|ID),data=s3.w[!is.na(s3.w$WHOM.first2),])
Weights(AIC(TC.null,TC.day.of.assessment,TC.HOW)) # WHOM AICw = 1
## model weights
## [1] 0 0 1
round(summary(TC.WHOM)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 4.645 0.091 50.920
## Day.of.assessmentDay 2 -0.250 0.066 -3.822
## Day.of.assessmentDay 3 -0.169 0.067 -2.538
## WHOM.first2With others -0.725 0.065 -11.178
Comments:
- results are similar to those reported for the subsample s1
NV
Here, we evaluate systematic differences in Negative Valence across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
First, we visually inspect the distributions by considering the original task categories.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first),],pred="WHAT.first",resp="NV",LMER=FALSE,
Xlab="Type of work activity",Ylab="Negative Valence"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first),],pred="HOW.first",resp="NV",LMER=FALSE,
Xlab="Mean of work",Ylab="Negative Valence"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first),],pred="WHOM.first",resp="NV",LMER=FALSE,
Xlab="People working with",Ylab="Negative Valence"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first2),],pred="WHAT.first2",resp="NV",
Xlab="Type of work activity",Ylab="Negative Valence"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first2),],pred="HOW.first2",resp="NV",
Xlab="Mean of work",Ylab="Negative Valence"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first2),],pred="WHOM.first2",resp="NV",
Xlab="People working with",Ylab="Negative Valence"),nrow=2)
##
##
##
## Fitting LMER model on 1466 observations from 169 subjects:
## NV ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 0.705
## Likelihood Ratio Test: Chisq = 22.67 ( 4 ) p = 0
##
## Estimate Std. Error t value
## (Intercept) 3.32296484 0.07179179 46.2861381
## predData analysis \nand authoring 0.19107163 0.06665961 2.8663780
## predAdministrative \nactivities 0.10189881 0.08417425 1.2105698
## predSocial \nactivities -0.04538255 0.07152004 -0.6345431
## predBreak -0.29458846 0.12789086 -2.3034363
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.64046634 0.82502280
## .sigma 0.77483366 0.83678015
## (Intercept) 3.18217275 3.46362085
## predData analysis \nand authoring 0.06050332 0.32157013
## predAdministrative \nactivities -0.06287058 0.26679339
## predSocial \nactivities -0.18571202 0.09462687
## predBreak -0.54503400 -0.04411764
##
##
##
## Fitting LMER model on 1696 observations from 174 subjects:
## NV ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 0.027
##
##
##
##
## Fitting LMER model on 1692 observations from 174 subjects:
## NV ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.096
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional work sampling variable while controlling by those variables associated with substantial differences in the previous step, including temporal variables. Only when the inclusion of an additional variable is associated with an AICw > .50, that variable is considered in the following models.
# WHAT by controlling for temporal variables
NV.null2 <- lmer(NV~(1|ID),data=s3.w[!is.na(s3.w$WHAT.first2),])
NV.day.of.week2 <- lmer(NV~Day.of.week+(1|ID),data=s3.w[!is.na(s3.w$WHAT.first2),])
NV.day.of.assessment2 <- lmer(NV~Day.of.week+Day.of.assessment+(1|ID),data=s3.w[!is.na(s3.w$WHAT.first2),])
NV.surveyNumber2 <- lmer(NV~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+(1|ID),data=s3.w[!is.na(s3.w$WHAT.first2),])
NV.WHAT2 <- lmer(NV~Day.of.week+Day.of.assessment+as.factor(surveyNumber)+WHAT.first2+(1|ID),data=s3.w)
Weights(AIC(NV.null2,NV.day.of.week2,NV.day.of.assessment2,NV.surveyNumber2,NV.WHAT2)) # null AICw = .89
## model weights
## [1] 0.893 0.072 0.035 0.000 0.000
round(summary(NV.WHAT)$coefficients,3)
## Estimate Std. Error t value
## (Intercept) 3.150 0.108 29.178
## Day.of.weekWednesday -0.117 0.055 -2.119
## Day.of.weekFriday -0.097 0.055 -1.769
## Day.of.assessmentDay 2 0.190 0.055 3.482
## Day.of.assessmentDay 3 0.203 0.055 3.699
## as.factor(surveyNumber)2 0.114 0.079 1.448
## as.factor(surveyNumber)3 0.124 0.081 1.530
## as.factor(surveyNumber)4 0.177 0.085 2.077
## as.factor(surveyNumber)5 0.327 0.098 3.332
## as.factor(surveyNumber)6 0.101 0.124 0.814
## as.factor(surveyNumber)7 0.138 0.201 0.690
## WHAT.first2Data analysis \nand authoring 0.224 0.070 3.215
## WHAT.first2Administrative \nactivities 0.145 0.086 1.690
## WHAT.first2Social \nactivities -0.017 0.074 -0.225
## WHAT.first2Break -0.299 0.132 -2.258
Comments:
- results are similar to those reported for the
subsample
s1
TA
Here, we evaluate systematic differences in Tense Arousal across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first),],pred="WHAT.first",resp="TA",LMER=FALSE,
Xlab="Type of work activity",Ylab="Tense Arousal"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first),],pred="HOW.first",resp="TA",LMER=FALSE,
Xlab="Mean of work",Ylab="Tense Arousal"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first),],pred="WHOM.first",resp="TA",LMER=FALSE,
Xlab="People working with",Ylab="Tense Arousal"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first2),],pred="WHAT.first2",resp="TA",
Xlab="Type of work activity",Ylab="Tense Arousal"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first2),],pred="HOW.first2",resp="TA",
Xlab="Mean of work",Ylab="Tense Arousal"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first2),],pred="WHOM.first2",resp="TA",
Xlab="People working with",Ylab="Tense Arousal"),nrow=2)
##
##
##
## Fitting LMER model on 1466 observations from 169 subjects:
## TA ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 0.112
##
##
##
##
## Fitting LMER model on 1696 observations from 174 subjects:
## TA ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 0.005
##
##
##
##
## Fitting LMER model on 1692 observations from 174 subjects:
## TA ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.056
# plotting diagnostics
p <- plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first2),],pred="WHAT.first2",resp="TA",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## Fitting LMER model on 1466 observations from 169 subjects:
## TA ~ WHAT.first2 + (1|ID)
##
##
##
## Plotting model diagnostics:
Comments:
- results are similar to those reported for the
subsample
s1
FA
Here, we evaluate systematic differences in Fatigue across work sampling variables.
Specifically, we use the plotLMER()
function to
visualize bivariate item scores distributions while
highlighting cases associated with AICw > .50. The function also
allows to visualize the model diagnostics, in terms of
normal fit on residuals and random intercepts, and homoscedasticity.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first),],pred="WHAT.first",resp="FA",LMER=FALSE,
Xlab="Type of work activity",Ylab="Fatigue"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first),],pred="HOW.first",resp="FA",LMER=FALSE,
Xlab="Mean of work",Ylab="Fatigue"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first),],pred="WHOM.first",resp="FA",LMER=FALSE,
Xlab="People working with",Ylab="Fatigue"),nrow=2)
Then, we replicate the same procedure on the simplified variables
WHAT.first2
, HOW.first2
, and
WHOM.first2
. Activities marked as “other” are not
considered.
grid.arrange(plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first2),],pred="WHAT.first2",resp="FA",
Xlab="Type of work activity",Ylab="Fatigue"),
plotLMER(dat=s3.w[!is.na(s3.w$HOW.first2),],pred="HOW.first2",resp="FA",
Xlab="Mean of work",Ylab="Fatigue"),
plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first2),],pred="WHOM.first2",resp="FA",
Xlab="People working with",Ylab="Fatigue"),nrow=2)
##
##
##
## Fitting LMER model on 1466 observations from 169 subjects:
## FA ~ WHAT.first2 + (1|ID)
##
##
## AICw vs. null model = 0.002
##
##
##
##
## Fitting LMER model on 1696 observations from 174 subjects:
## FA ~ HOW.first2 + (1|ID)
##
##
## AICw vs. null model = 0.419
##
##
##
##
## Fitting LMER model on 1692 observations from 174 subjects:
## FA ~ WHOM.first2 + (1|ID)
##
##
## AICw vs. null model = 0.655
## Likelihood Ratio Test: Chisq = 7.19 ( 1 ) p = 0.007
##
## Estimate Std. Error t value
## (Intercept) 3.8443259 0.06451873 59.584649
## predWith others -0.1538057 0.05727549 -2.685367
##
##
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.6064709 0.78851219
## .sigma 0.9465637 1.01626492
## (Intercept) 3.7175488 3.97083626
## predWith others -0.2665692 -0.04148217
# plotting diagnostics
p <- plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first2),],pred="WHAT.first2",resp="FA",LMER.results=FALSE,diagnostics=TRUE)
##
##
##
## Fitting LMER model on 1466 observations from 169 subjects:
## FA ~ WHAT.first2 + (1|ID)
##
##
##
## Plotting model diagnostics:
Second, we specify a set of nested LMER models (i.e., hierarchical multiple regression) to evaluate the importance of each additional work sampling variable while controlling by those variables associated with substantial differences in the previous step, including temporal variables. Only when the inclusion of an additional variable is associated with an AICw > .50, that variable is considered in the following models.
# WHOM by controlling for temporal variables
FA.null2 <- lmer(FA~(1|ID),data=s3.w[!is.na(s3.w$WHOM.first2),])
FA.time2 <- lmer(FA~as.factor(surveyNumber)+(1|ID),data=s3.w[!is.na(s3.w$WHOM.first2),])
FA.WHOM2 <- lmer(FA~as.factor(surveyNumber)+WHOM.first2+(1|ID),data=s3.w[!is.na(s3.w$WHOM.first2),])
Weights(AIC(FA.null2,FA.time2,FA.WHOM2)) # time AICw = .85
## model weights
## [1] 0.000 0.847 0.153
Comments:
in contrast to what found in subsample
s1
, substantial differences in Fatigue are shown between tasks performed “alone” and those performed with “other people” (showing lower Fatigue) ins3
however, those differences are not substantial when controlling for the role of time, implying similar results than those found in subsample
s1
EXAMPLE PLOTS
Here, we plot some examples of substantial differences.
p1 <- plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first2),],pred="WHAT.first2",resp="NV",
Xlab="Type of work activity",Ylab="Negative Valence",text.size=12)
p2 <- plotLMER(dat=s3.w[!is.na(s3.w$WHAT.first2),],pred="WHAT.first2",resp="TC",
Xlab="Type of work activity",Ylab="Task Control",text.size=12)
p3 <- plotLMER(dat=s3.w[!is.na(s3.w$HOW.first2),],pred="HOW.first2",resp="TD",
Xlab="Mean of work",Ylab="Task Demand",text.size=16)
p4 <- plotLMER(dat=s3.w[!is.na(s3.w$WHOM.first2),],pred="WHOM.first2",resp="TC",
Xlab="People working with",Ylab="Task Control",text.size=16)
gridExtra::grid.arrange(p1,p2,p3,p4,nrow=2)