#### Lab 5 Exercise ####


# 1. 
library(lmSupport)
d = dfReadDat("Lab5Ex.dat")
varDescribe(d)
str(d)
d[sample(1:696,6),]


# 2. 
library(psych)
concern = c('concern1','concern2','concern3','concern4')
alpha(d[,concern], check.keys=T)
# Reliability is .83, which is good enough. It would go up if we got rid of item
# 4, but with such a marginal increase we'll just keep it in.
d$concernM = varScore(d, Forward = concern, Range=c(1,7), MaxMiss = .3, Prorate = T) / 4
varDescribe(d$concernM)
hist(d$concernM)
# In general, this sample is relatively concerned about discrimination, and many
# people said they were very concerned about it (max value).

# There are 693 observations of concernM but only 692 of concern3. That means one of the
# people who skipped concern3 answered all the other concern items, so their score on
# the third item was imputed when their composite score was calculated. This gives us
# just a liiiiittle more statistical power.


# 3. 
m1 = lm(concernM ~ condition, data=d)
modelSummary(m1, t=F)
modelEffectSizes(m1)
# Yes. People in the intervention condition had concern scores about .3 points higher
# than those in the control condition, F(1, 667) = 7.592, p < .01.

# It means we're missing condition information from some of the participants.


# 4. 
vars = c('concernM','sex','pol','condition')
cor(d[,vars], use="pairwise.complete")
library(car)
scatterplotMatrix(d[,vars])
# Neg concern with pol, nothing else super strong.
# Slight pos concern and sex with condition.


# 5.
m2 = lm(concernM ~ condition + sex, data=d)
modelSummary(m2, t=F)
modelEffectSizes(m2)
# The effect of the intervention has eroded with the addition of sex to the model, to
# the point where it is now only marginally significant, F(1, 654) = 2.887, p = .09.
# Sex is a significant predictor of concern, F(1, 654) = 32.36, p < .001.
# This result makes sense because there are more women than men in the experimental condition,
# so some of the group difference could be explained by the sex proportions in each group.
table(list(d$sex, d$condition))


# 6. 
m3 = lm(concernM ~ condition + sex + pol, data=d)
modelSummary(m3, t=F)
modelEffectSizes(m3)
# The effect of the intervention has fallen to non-significance, F(1, 648) = .834, p = .36.
# Both sex and politics are significant predictors of concern.
varDescribeBy(d$pol, d$condition)
# People in the experimental condition are slightly more liberal, on average, than those
# in the control condition.


# 7. 
# He could argue that politics doesn't need to be included. He would have to make the case
# that the intervention itself influenced political orientation in addition to concern. Then
# he could leave politics out of the model and be left with a marginal condition effect.


# 8.
pX = data.frame(condition = c(-.5,.5), sex = 0, pol = mean(d$pol, na.rm=T))
pY1 = modelPredictions(m3, pX)
plot1 = ggplot(aes(x = condition, y = concernM), data=d)
plot1
plot1 = plot1 + geom_bar(aes(y = Predicted, fill=as.factor(condition)), data=pY1, stat='identity')+ 
  geom_point(position = position_jitter(w=.1), color='darkgrey')
plot1
plot1 = plot1 + geom_errorbar(aes(y = Predicted, ymin = CILo, ymax = CIHi), data=pY1, width=.5)
plot1
plot1 = plot1 + theme_bw() + theme(legend.position = 'none') + scale_x_continuous('Condition', breaks=seq(-.5,.5,by=1)) + 
  scale_y_continuous('Mean Concern', breaks=seq(1, 7, by=1))
plot1

pY2 = modelPredictions(m1, pX)
plot2 = ggplot(aes(x = condition, y = concernM), data=d)
plot2
plot2 = plot2 + geom_bar(aes(y = Predicted, fill=as.factor(condition)), data=pY2, stat='identity')+ 
  geom_point(position = position_jitter(w=.1), color='darkgrey')
plot2
plot2 = plot2 + geom_errorbar(aes(y = Predicted, ymin = CILo, ymax = CIHi), data=pY2, width=.5)
plot2
plot2 = plot2 + theme_bw() + theme(legend.position = 'none') + scale_x_continuous('Condition', breaks=seq(-.5,.5,by=1)) + 
  scale_y_continuous('Mean Concern', breaks=seq(1, 7, by=1))
plot2

plot1
plot2


#### let's remake the graph with more informative x axis labels
d$conditionStr = as.factor(varRecode(d$condition, c(-.5,.5), c('Control','Intervention')))
d2 = d[complete.cases(d[,'conditionStr']),] # only give us the rows that have a value for "conditionStr"
mPlot = lm(concernM ~ conditionStr, data=d2)
modelSummary(mPlot)

pX = data.frame(conditionStr = c('Control','Intervention'))
pY3 = modelPredictions(mPlot, pX)
plot3 = ggplot(aes(x = conditionStr, y = concernM), data=d2)
plot3
plot3 = plot3 + geom_bar(aes(y = Predicted, fill=as.factor(conditionStr)), data=pY3, stat='identity')+ 
  geom_point(position = position_jitter(w=.1), color='darkgrey')
plot3
plot3 = plot3 + geom_errorbar(aes(y = Predicted, ymin = CILo, ymax = CIHi), data=pY3, width=.5)
plot3
plot3 = plot3 + theme_bw() + theme(legend.position = 'none') + labs(x='Condition') + 
  scale_y_continuous('Mean Concern', breaks=seq(1, 7, by=1))
plot3
# ta da!