# Lab 4 Exercise Key # 1. Which model comparison? # Compact: Wk4IAT = b0 + e # Augmented: Wk4IAT = b0 + b1*Condition + e # We care about the test of b1 (the condition effect). # 2. lm mCon = lm(Wk4IAT ~ 1 + Condition, data=d) # 3. modelSummary(mCon) # 4. # a. People who did not recieve the diversity training have, on average, a non-zero IAT # score at week 4 (M = .541). # b. People who recieved the diversity training have, on average, lower IAT scores at week 4 (by .22 units) # c. Both the parameters are statistically significant (i.e., have less than a 5% probability of occurring # if the null hypothesis is true). # d. Yep! # 5. modelEffectSizes(mCon) # peta2 = .0721. Being in the diveristy training explains about 7% of the variance in week 4 IAT scores. # 6. d$ConditionC <- varRecode(d$Condition, c(0,1),c(-.5,.5)) d$ConditionC mConC = lm(Wk4IAT ~ 1 + ConditionC, data=d) # 7. modelSummary(mConC) # a. Only the estimate for the intercept parameter, because the representation of a score of 0 on # the condition variable has changed, but the group difference has not. # b. The intercept term for the model with centered condition represents the predicted score for a # participant who is NEUTRAL with respect to condition. This is called an unweighted mean. # You would interpret this intercept term if you were curious about week 4 IAT scores for participants # independent of the condition manipulation. # CODA #### # Imagine you have a dataset where you have 75 people in the experimental condition # and 60 people in the control condition (oversampling the experimental condition is done # somewhat commonly). # If we centered around the arithmetic mean, what would a score of 0 represent? # It would represent the average person IN THAT SAMPLE. # This is called the weighted mean. If the sample sizes are different between groups, # the weighted and unweighted means will not be the same. # If we center around 0, what does a score of 0 represent? # It would represent someone who is NEUTRAL with respect to condition. # This is called the unweighted mean. Unlike the weighted mean, this value represents the same # person across samples, even if the groups are different sizes. In experimental psychology, # we typically use unweighted means because the proportion of people in the groups is not # meaningful (we recruited and/or assigned to these groups). # In areas where the proportion of participants in the group reflected something # meaningful, you might see people report weighted means: it depends on your # research question. # c: .54 # d. .32 Cs = data.frame(ConditionC = c(-.5,.5)) modelPredictions(mConC, Cs) ################## #### GRAPHING #### ################## ################################# #### Plotting using barplots #### ################################# pY2 = data.frame(Condition = c(0, 1)) # This is the same idea from last week, # but now we need only 2 values to represent all possible values on condition variable. # need to generate the mean of predicted values for each condition # also need lower and upper standard error bounds for each condition # SE bars represent standard error of point estimates pY2 = modelPredictions(mCon, pY2) pY2 # Note how the predicted values are just the means within each group varDescribeBy(d$Wk4IAT, d$Condition) # Starting plot plotB = ggplot(pY2, aes(x = Condition, y = Predicted)) plotB # correct axes? Good ranges? yup # Add bars plotB = plotB + geom_bar(mapping = aes(fill = Condition), # add colors and labels based on condition data = pY2, stat = "identity", # using stat = "identity" because bars represent particular values in dataset width = 0.5) plotB # Why is the legend a gradient? R thinks we could theoretically have values in between 0 # and 1, but obviously we cannot. You can fix this by telling R to treat condition as # a factor instead of a number: plotB = ggplot(pY2, aes(x = Condition, y = Predicted)) + # just to re-initialize it geom_bar(mapping = aes(fill = as.factor(Condition)), data = pY2, stat = "identity", width = 0.5) plotB # add the raw data points with jittering so we can see them plotB = plotB + geom_point(data = d, aes(y = Wk4IAT, x = Condition), colour='darkgrey', position = position_jitter(w = 0.1, h = 0.1)) plotB # add error bars plotB = plotB + geom_errorbar(data = pY2, width=.25, # add error bars of width 0.25 aes(y = Predicted, x = Condition, # set error bar aesthethics: x & y variables ymin = CILo, # define bottom (then top) of error bars ymax = CIHi), stat="identity", width=0.75) # heights of bars rep values of data plotB # finally, add labels, a title and remove the legend since it is unnecessary plotB = plotB +labs(y = 'Week 4 IAT Score', x = 'Condition') + # set axis labels theme_bw(base_size = 14) + # remove the background theme(legend.position="none") # remove the unnecessary legend plotB # Everything at once (combining previous code into single plot) plotB2 = ggplot(d, aes(x = Condition, y = Predicted)) + geom_bar(mapping = aes(fill = as.factor(Condition)), data = pY2, stat = "identity", width = 0.5) + geom_point(data = d, aes(y = Wk4IAT, x = Condition),colour='darkgrey', position = position_jitter(w = 0.1, h = 0.1)) + geom_errorbar(data = pY2, width=.25, aes(y = Predicted, x = Condition, ymin = CILo, ymax = CIHi), stat="identity", width=0.75) + labs(y = 'Baseline IAT Score', x = 'Condition') + theme_bw(base_size = 14) + theme(legend.position="none") plotB2 # The researchers hypothesized that diversity training lowers IAT scores. Diversity training did have a # significant effect on IAT scores such that IAT scores were lower at week 4 for # people who completed the training versus those who did not. # Training condition explained 7% of the variance in week 4 IAT scores.