############################## #### General Linear Model #### #### Psychology 610/710 #### #### Lab 10 #### #### 10 November 2017 #### ############################## ########################## #### DATASET ############# ########################## # This dataset is loosely based on a study conducted in John Curtin's lab involving # stress reactivity of smokers. Smokers were either deprived of nicotine for 24 hours # or not deprived of nicotine (SmokeCon). The researchers measured the participants' # startle response (STL), and whether they smoked standard or e-cigarettes (CigType) # Startle response, as you may remember from lecture, is a physiological reaction to a startling stimulus # (e.g., eyeblink) and can be used to measure stress reactivity. We expect that participants in the # deprived condition will have greater startle response than those in the non-deprived condition, # however it is hypothesized that this manipulation may have a different effect on standard versus e-cig users. library(lmSupport) library(psych) library(car) library(ggplot2) # Read it in d <- dfReadDat("Lab10_DemoData.dat") # Do some data exploration names(d) varDescribe(d) head(d) # Univariate plots and bivariate relationships varPlot(d$STL) spm(d) corr.test(d) # We can see the number of observations in each condition table(d$SmokeCon) table(d$CigType) # Note: # Coding for SmokeCon: Deprived = -0.5, NonDeprived = 0.5 # Coding for CigType: Standard = -0.5, e-cigs = 0.5 # OK what do we need to do to center these variables? #Trick question! They are already centered! # but let's change the variable names to reflect that: d <- varRename(d, c("SmokeCon","CigType"), c("SmokeConC","CigTypeC")) # Descriptive stats for each of the four possible predictor variable groupings: varDescribeBy(d$STL,list(d$SmokeConC, d$CigTypeC)) # Any initial impressions? # First, let's fit an additive model in which we test the effect of nicotine # deprivation, controlling for cig type. mAdd <- lm(STL ~ SmokeConC + CigTypeC, data = d) #Quick outlier check res = modelCaseAnalysis(mAdd, "RESIDUALS") d[res$Rownames,] #Model assumptions checks modelAssumptions(mAdd, "NORMAL") # Not great but not terrible modelAssumptions(mAdd, "CONSTANT") # Not great but not terrible # One thing to note, though, is that sometimes violations of model assumptions are attributable to an # interaction effect that hasn't been measured. That is, the residuals might not be normally # distributed because of error, but because an unmeasured interaction influences the data. #Should we check the linear assummption? # Not needed for dichotomous variables! Cant be anything but linear modelAssumptions(mAdd, "LINEAR") #OK, let's look at the model results modelSummary(mAdd) modelEffectSizes(mAdd) confint(mAdd) # What does b1 represent? # The coefficient for condition: it represents the difference in startle response between # the nicotine deprived and the non-deprived smokers, controlling for ciggarette type. # What does b2 represent? # The coefficient for cig type: it represents the difference in startle response between standard # and e-cig users, controlling for deprivation condition. # What is the direction of these differences? (i.e. which condition/cig type has higher startle response?) # Non-deprived smokers and e-cig users have lower startle response. # What is the conclusion from this model? # Post-test startle response cannot be predicted by smoking condition or cig type when controlling # for the other. # Let's now test whether smoking deprivation affected standard and e cig users differently. # How do we do this? With an interaction! # How does computing an interaction with dichotomous variables differ from computing # an interaction with continuous variables? It doesn't! mInt <- lm(STL ~ SmokeConC*CigTypeC, data=d) #Quick outlier check res = modelCaseAnalysis(mInt, "RESIDUALS") d[res$Rownames,] #Model assumptions checks modelAssumptions(mInt, "NORMAL") # Not great but not terrible modelAssumptions(mInt, "CONSTANT") # Not great but not terrible modelAssumptions(mInt2, "LINEAR") #This gives an error # It seems that this function only handles models that are coded in r as additive (e.g., X1 + X2) # What equivalent notation did we learn last week that would allow us to have an interactive term # but still only type the model out as additive? d$SmokeConXCigType = d$SmokeConC*d$CigTypeC mInt2 <- lm(STL ~ SmokeConC + CigTypeC + SmokeConXCigType, data=d) modelSummary(mInt2) modelSummary(mInt) #Remember this is the same as the previous model modelAssumptions(mInt2, "LINEAR") # This actually looks OK now #Now let's look at the effects modelSummary(mInt) modelEffectSizes(mInt) confint(mInt) # Reminder: When we look at the significance test of the interaction term... # What is the augmented model? # Y = b0 + b1*SexC + b2*SmokeConC + b3*CigTypeC*SmokeConC # What is the compact model? #Y = b0 + b1*SexC + b2*SmokeConC # What does b1 represent? # The average smoking condition effect for someone who is neutral with respect to cig type #(average effect of smoking condition, unweighted across cig type). # What does b2 represent? # The average cig type effect for someone who is neutral with respect to smoking condition. #(average effect of cig type , unweighted across smoking condition). # What does b3 represent? # The degree to which the effect of smoking condition is different between standard and e cig smokers, # or, alternatively, the degree to which the cig typ effect is different for Deprived # and Non-Deprived conditions. #### SIMPLE EFFECTS #### # Simple effects: The effect of one variable at a particular value of its moderator. # How do we compute them? By recentering the variables! # What if we want to know the simple effect of condition for standard smokers? # Recenter the cig type variable so standard smokers have a score of 0. # Remmeber: # Coding for SmokeCon: Deprived = -0.5, NonDeprived = 0.5 # Coding for CigType: Standard = -0.5, e-cigs = 0.5 # Compute the simple effect. d$CigTypeStand <- varRecode(d$CigTypeC, c(-0.5, 0.5), c(0,1)) mC1 <- lm(STL~ SmokeConC*CigTypeStand, data = d) modelSummary(mC1) # What's the conclusion of this test? # The effect of smoking condition is significant (with those in the non-deprived condition # scoring 44 points lower on startle, on average) # Now, test the effect of deprivation for e-cig smokers. # How can we code this in a way that allows us to keep the direction of the effect consistent? d$CigTypeE <- varRecode(d$CigTypeC, c(-.5, .5), c(-1, 0)) mC2 <- lm(STL ~ SmokeConC * CigTypeE, data = d) modelSummary(mC2) # What's the conclusion of this test? # The effect of smoking condition is not significant (those in the non-deprived condition # scored 9 point higher on startle, on average) # What doesn't change between all of these analyses? # The interaction effect. ############################################### #### Bar plot: Two dichotomous variables #### ############################################### # Recode from numeric to actual names # Remmeber: # Coding for SmokeCon: Deprived = -0.5, NonDeprived = 0.5 # Coding for CigType: Standard = -0.5, e-cigs = 0.5 d$CigTypeStr <- varRecode(d$CigTypeC, c(-0.5, 0.5), c("Standard","e-cigs")) d$SmokeConStr <- varRecode(d$SmokeConC, c(-0.5, 0.5), c("Deprived","Nondeprived")) # Make the strings a factor d$CigTypeStr=as.factor(d$CigTypeStr) d$SmokeConStr=as.factor(d$SmokeConStr) # Re-run the model with IVs as factors and make sure that everything looks fine mIntPlot <- lm(STL ~ SmokeConStr*CigTypeStr, data=d) modelSummary(mIntPlot) # Are these the same numbers as before? Yup. # You have to make predictions for each condition separately. YhatDeprived <- data.frame(SmokeConStr = 'Deprived', CigTypeStr = c('Standard','e-cigs')) YhatDeprived <- modelPredictions(mIntPlot, YhatDeprived) # look at these predictions. Do they make sense? YhatDeprived YhatNonDeprived <- data.frame(SmokeConStr = 'Nondeprived', CigTypeStr = c('Standard','e-cigs')) YhatNonDeprived <- modelPredictions(mIntPlot, YhatNonDeprived) YhatNonDeprived Yhats <- rbind(YhatDeprived,YhatNonDeprived) # Combine Yhats # Graphing windows() #or quartz BarPlot <- ggplot(Yhats, aes(x = SmokeConStr, y = Predicted, fill = CigTypeStr))+ geom_bar(stat='identity', position=position_dodge(.9))+ geom_errorbar(aes(ymax=CIHi, ymin=CILo), position=position_dodge(.9), width=0.25)+ theme_bw()+ theme(legend.title=element_blank(), panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(), text=element_text())+ ylab('Startle Response') + # Y-axis title xlab('Nicotine Deprivation') # X-axis title # scale_fill_grey() # Grayscale automatic colors BarPlot ####OR A SHORTER WAY########### # You have to make predictions for each condition separately Yhats = expand.grid(SmokeConStr = c('Nondeprived','Deprived'), CigTypeStr = c('Standard','e-cigs')) Yhats = modelPredictions(mIntPlot, Yhats) # Graphing windows() #or quartz library(cowplot) BarPlot <- ggplot(Yhats, aes(x = CigTypeStr, y = Predicted, fill = SmokeConStr))+ geom_bar(stat='identity', position=position_dodge(.9))+ geom_errorbar(aes(ymax=CIHi, ymin=CILo), position=position_dodge(.9), width=0.25)+ ylab('Startle Response') + # Y-axis title xlab('Cigarette Type') + # X-axis title + scale_y_continuous(expand=c(0,0)) BarPlot # What would we do if we wanted to switch which IV was on the X axis? # We'd only have to change the first line and the name of the axis!