# Lab 4 Exercise Key

# 1. Which model comparison?
# Compact:   Wk4IAT = b0 + e
# Augmented: Wk4IAT = b0 + b1*Condition + e


# We care about the test of b1 (the condition effect).

# 2. lm
mCon = lm(Wk4IAT ~ 1 + Condition, data=d)

# 3.
modelSummary(mCon)

# 4. 
# a. People who did not recieve the diversity training have, on average, a non-zero IAT 
#    score at week 4 (M = .541).
# b. People who recieved the diversity training have, on average, lower IAT scores at week 4 (by .22 units) 
# c. Both the parameters are statistically significant (i.e., have less than a 5% probability of occurring
#    if the null hypothesis is true).
# d. Yep!

# 5. 
modelEffectSizes(mCon)
# peta2 = .0721. Being in the diveristy training explains about 7% of the variance in week 4 IAT scores.

# 6. 

d$ConditionC <- varRecode(d$Condition, c(0,1),c(-.5,.5))

d$ConditionC

mConC = lm(Wk4IAT ~ 1 + ConditionC, data=d)


# 7. 
modelSummary(mConC)
# a. Only the estimate for the intercept parameter, because the representation of a score of 0 on
#    the condition variable has changed, but the group difference has not.

# b. The intercept term for the model with centered condition represents the predicted score for a 
#    participant who is NEUTRAL with respect to condition. This is called an unweighted mean.
#    You would interpret this intercept term if you were curious about week 4 IAT scores for participants 
#    independent of the condition manipulation. 

# CODA ####
# Imagine you have a dataset where you have 75 people in the experimental condition
# and 60 people in the control condition (oversampling the experimental condition is done
# somewhat commonly).
# If we centered around the arithmetic mean, what would a score of 0 represent?
# It would represent the average person IN THAT SAMPLE.
# This is called the weighted mean. If the sample sizes are different between groups,
# the weighted and unweighted means will not be the same. 
# If we center around 0, what does a score of 0 represent?
# It would represent someone who is NEUTRAL with respect to condition.
# This is called the unweighted mean. Unlike the weighted mean, this value represents the same
# person across samples, even if the groups are different sizes. In experimental psychology,
# we typically use unweighted means because the proportion of people in the groups is not 
# meaningful (we recruited and/or assigned to these groups).
# In areas where the proportion of participants in the group reflected something
# meaningful, you might see people report weighted means: it depends on your
# research question.

# c:  .54
# d.  .32
Cs = data.frame(ConditionC = c(-.5,.5))
modelPredictions(mConC, Cs)


##################
#### GRAPHING ####
##################

#################################
#### Plotting using barplots ####
#################################

pY2 = data.frame(Condition = c(0, 1)) # This is the same idea from last week,
# but now we need only 2 values to represent all possible values on condition variable.

# need to generate the mean of predicted values for each condition
# also need lower and upper standard error bounds for each condition
# SE bars represent standard error of point estimates
pY2 = modelPredictions(mCon, pY2)
pY2

# Note how the predicted values are just the means within each group
varDescribeBy(d$Wk4IAT, d$Condition) 

# Starting plot
plotB = ggplot(pY2, aes(x = Condition, y = Predicted))
plotB # correct axes? Good ranges? yup

# Add bars
plotB = plotB + geom_bar(mapping = aes(fill = Condition), # add colors and labels based on condition
                         data = pY2, 
                         stat = "identity", # using stat = "identity" because bars represent particular values in dataset
                         width = 0.5) 
plotB

# Why is the legend a gradient? R thinks we could theoretically have values in between 0
# and 1, but obviously we cannot. You can fix this by telling R to treat condition as
# a factor instead of a number:
plotB = ggplot(pY2, aes(x = Condition, y = Predicted)) + # just to re-initialize it
  geom_bar(mapping = aes(fill = as.factor(Condition)), 
           data = pY2, 
           stat = "identity", 
           width = 0.5) 
plotB

# add the raw data points with jittering so we can see them
plotB = plotB + geom_point(data = d, aes(y = Wk4IAT, x = Condition),
                           colour='darkgrey',
                           position = position_jitter(w = 0.1, h = 0.1))
plotB

# add error bars
plotB = plotB + geom_errorbar(data = pY2, width=.25, # add error bars of width 0.25
                              aes(y = Predicted, x = Condition, # set error bar aesthethics: x & y variables
                                  ymin = CILo, # define bottom (then top) of error bars
                                  ymax = CIHi), stat="identity", width=0.75) # heights of bars rep values of data
plotB

# finally, add labels, a title and remove the legend since it is unnecessary
plotB = plotB +labs(y = 'Week 4 IAT Score', x = 'Condition') + # set axis labels
  theme_bw(base_size = 14) +  # remove the background
  theme(legend.position="none") # remove the unnecessary legend
plotB

# Everything at once (combining previous code into single plot)
plotB2 = ggplot(d, aes(x = Condition, y = Predicted)) +
  geom_bar(mapping = aes(fill = as.factor(Condition)), data = pY2, stat = "identity", width = 0.5) +
  geom_point(data = d, aes(y = Wk4IAT, x = Condition),colour='darkgrey', 
             position = position_jitter(w = 0.1, h = 0.1)) +
  geom_errorbar(data = pY2, width=.25, aes(y = Predicted, x = Condition, ymin = CILo, 
                                           ymax = CIHi), stat="identity", width=0.75) + 
  labs(y = 'Baseline IAT Score', x = 'Condition') + theme_bw(base_size = 14) + theme(legend.position="none") 
plotB2


# The researchers hypothesized that diversity training lowers IAT scores. Diversity training did have a 
# significant effect on IAT scores such that IAT scores were lower at week 4 for 
# people who completed the training versus those who did not.
# Training condition explained 7% of the variance in week 4 IAT scores.