##############################
#### General Linear Model ####
####  Psychology 610/710  ####
####        Lab 10        ####
####   10 November 2017   ####
##############################

##########################
#### DATASET #############
##########################
# This dataset is loosely based on a study conducted in John Curtin's lab involving
# stress reactivity of smokers. Smokers were either deprived of nicotine for 24 hours
# or not deprived of nicotine (SmokeCon). The researchers measured the participants'
# startle response (STL), and whether they smoked standard or e-cigarettes (CigType) 
# Startle response, as you may remember from lecture, is a physiological reaction to a startling stimulus 
# (e.g., eyeblink) and can be used to measure stress reactivity. We expect that participants in the 
# deprived condition will have greater startle response than those in the non-deprived condition, 
# however it is hypothesized that this manipulation may have a different effect on standard versus e-cig users.

library(lmSupport)
library(psych)
library(car)
library(ggplot2)

# Read it in
d <- dfReadDat("Lab10_DemoData.dat")

# Do some data exploration
names(d)
varDescribe(d)
head(d)

# Univariate plots and bivariate relationships
varPlot(d$STL)
spm(d)
corr.test(d)


# We can see the number of observations in each condition
table(d$SmokeCon)
table(d$CigType)

# Note:
# Coding for SmokeCon: Deprived = -0.5, NonDeprived = 0.5
# Coding for CigType: Standard = -0.5, e-cigs = 0.5

# OK what do we need to do to center these variables? 

#Trick question! They are already centered! 
# but let's change the variable names to reflect that:
d <- varRename(d, c("SmokeCon","CigType"), c("SmokeConC","CigTypeC"))

# Descriptive stats for each of the four possible predictor variable groupings:
varDescribeBy(d$STL,list(d$SmokeConC, d$CigTypeC))
# Any initial impressions?

# First, let's fit an additive model in which we test the effect of nicotine
# deprivation, controlling for cig type.
mAdd <- lm(STL ~ SmokeConC + CigTypeC, data = d)

#Quick outlier check  

res = modelCaseAnalysis(mAdd, "RESIDUALS")
d[res$Rownames,]

#Model assumptions checks 
modelAssumptions(mAdd, "NORMAL") # Not great but not terrible 
modelAssumptions(mAdd, "CONSTANT") # Not great but not terrible 

# One thing to note, though, is that sometimes violations of model assumptions are attributable to an
# interaction effect that hasn't been measured. That is, the residuals might not be normally
# distributed because of error, but because an unmeasured interaction influences the data.

#Should we check the linear assummption?

# Not needed for dichotomous variables!  Cant be anything but linear

modelAssumptions(mAdd, "LINEAR")

#OK, let's look at the model results 

modelSummary(mAdd)
modelEffectSizes(mAdd)
confint(mAdd)

# What does b1 represent? 

# The coefficient for condition: it represents the difference in startle response between 
# the nicotine deprived and the non-deprived smokers, controlling for ciggarette type.

# What does b2 represent? 

# The coefficient for cig type: it represents the difference in startle response between standard  
# and e-cig users, controlling for deprivation condition.

# What is the direction of these differences? (i.e. which condition/cig type has higher startle response?)

# Non-deprived smokers and e-cig users have lower startle response.

# What is the conclusion from this model?
# Post-test startle response cannot be predicted by smoking condition or cig type when controlling
# for the other.

# Let's now test whether smoking deprivation affected standard and e cig users differently.
# How do we do this? With an interaction!

# How does computing an interaction with dichotomous variables differ from computing 
# an interaction with continuous variables? It doesn't!


mInt <- lm(STL ~ SmokeConC*CigTypeC, data=d)


#Quick outlier check 

res = modelCaseAnalysis(mInt, "RESIDUALS")
d[res$Rownames,]

#Model assumptions checks 
modelAssumptions(mInt, "NORMAL") # Not great but not terrible 
modelAssumptions(mInt, "CONSTANT") # Not great but not terrible 

modelAssumptions(mInt2, "LINEAR") #This gives an error

# It seems that this function only handles models that are coded in r as additive (e.g., X1 + X2)
# What equivalent notation did we learn last week that would allow us to have an interactive term 
# but still only type the model out as additive? 

d$SmokeConXCigType = d$SmokeConC*d$CigTypeC
mInt2 <- lm(STL ~ SmokeConC + CigTypeC + SmokeConXCigType, data=d)
modelSummary(mInt2)
modelSummary(mInt) #Remember this is the same as the previous model 

modelAssumptions(mInt2, "LINEAR") # This actually looks OK now 

#Now let's look at the effects 
modelSummary(mInt)
modelEffectSizes(mInt)
confint(mInt)

# Reminder: When we look at the significance test of the interaction term...

# What is the augmented model?

# Y = b0 + b1*SexC + b2*SmokeConC + b3*CigTypeC*SmokeConC

# What is the compact model?   

#Y = b0 + b1*SexC + b2*SmokeConC


# What does b1 represent?

# The average smoking condition effect for someone who is neutral with respect to cig type 
#(average effect of smoking condition, unweighted across cig type).

# What does b2 represent?

# The average cig type effect for someone who is neutral with respect to smoking condition.
#(average effect of cig type , unweighted across smoking condition).

# What does b3 represent?

# The degree to which the effect of smoking condition is different between standard and e cig smokers, 
# or, alternatively, the degree to which the cig typ effect is different for Deprived 
# and Non-Deprived conditions.

#### SIMPLE EFFECTS #### 
# Simple effects: The effect of one variable at a particular value of its moderator.
# How do we compute them? By recentering the variables!
# What if we want to know the simple effect of condition for standard smokers? 
# Recenter the cig type variable so standard smokers have a score of 0.

# Remmeber:
# Coding for SmokeCon: Deprived = -0.5, NonDeprived = 0.5
# Coding for CigType: Standard = -0.5, e-cigs = 0.5
# Compute the simple effect.

d$CigTypeStand <- varRecode(d$CigTypeC, c(-0.5, 0.5), c(0,1))
mC1 <- lm(STL~ SmokeConC*CigTypeStand, data = d)
modelSummary(mC1)

# What's the conclusion of this test?
# The effect of smoking condition is significant (with those in the non-deprived condition
# scoring 44 points lower on startle, on average)

# Now, test the effect of deprivation for e-cig smokers.
# How can we code this in a way that allows us to keep the direction of the effect consistent? 

d$CigTypeE <- varRecode(d$CigTypeC, c(-.5, .5), c(-1, 0))   
mC2 <- lm(STL ~ SmokeConC * CigTypeE, data = d)
modelSummary(mC2)

# What's the conclusion of this test?
# The effect of smoking condition is not significant (those in the non-deprived condition
# scored 9 point higher on startle, on average)

# What doesn't change between all of these analyses?
# The interaction effect.

###############################################
####  Bar plot: Two dichotomous variables  ####
###############################################

# Recode from numeric to actual names
# Remmeber:
# Coding for SmokeCon: Deprived = -0.5, NonDeprived = 0.5
# Coding for CigType: Standard = -0.5, e-cigs = 0.5

d$CigTypeStr <- varRecode(d$CigTypeC, c(-0.5, 0.5), c("Standard","e-cigs"))
d$SmokeConStr <- varRecode(d$SmokeConC, c(-0.5, 0.5), c("Deprived","Nondeprived"))

# Make the strings a factor
d$CigTypeStr=as.factor(d$CigTypeStr)
d$SmokeConStr=as.factor(d$SmokeConStr)

# Re-run the model with IVs as factors and make sure that everything looks fine
mIntPlot <- lm(STL ~ SmokeConStr*CigTypeStr, data=d)
modelSummary(mIntPlot)
# Are these the same numbers as before? Yup.

# You have to make predictions for each condition separately.
YhatDeprived <- data.frame(SmokeConStr = 'Deprived', CigTypeStr = c('Standard','e-cigs'))
YhatDeprived <- modelPredictions(mIntPlot, YhatDeprived)
# look at these predictions. Do they make sense?
YhatDeprived

YhatNonDeprived <- data.frame(SmokeConStr = 'Nondeprived', CigTypeStr = c('Standard','e-cigs'))
YhatNonDeprived <- modelPredictions(mIntPlot, YhatNonDeprived)
YhatNonDeprived

Yhats <- rbind(YhatDeprived,YhatNonDeprived) # Combine Yhats

# Graphing
windows() #or quartz 
BarPlot <- ggplot(Yhats, aes(x = SmokeConStr, y = Predicted, fill = CigTypeStr))+
  geom_bar(stat='identity', position=position_dodge(.9))+
  geom_errorbar(aes(ymax=CIHi, ymin=CILo), position=position_dodge(.9), width=0.25)+
  theme_bw()+
  theme(legend.title=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.border=element_blank(),
        axis.line=element_line(),
        text=element_text())+
  ylab('Startle Response') +  # Y-axis title
  xlab('Nicotine Deprivation')  # X-axis title
#  scale_fill_grey()    # Grayscale automatic colors

BarPlot


####OR A SHORTER WAY###########

# You have to make predictions for each condition separately
Yhats = expand.grid(SmokeConStr = c('Nondeprived','Deprived'), 
                    CigTypeStr = c('Standard','e-cigs'))
Yhats = modelPredictions(mIntPlot, Yhats)

# Graphing
windows() #or quartz 
library(cowplot)
BarPlot <- ggplot(Yhats, aes(x = CigTypeStr, y = Predicted, fill = SmokeConStr))+
  geom_bar(stat='identity', position=position_dodge(.9))+
  geom_errorbar(aes(ymax=CIHi, ymin=CILo), position=position_dodge(.9), width=0.25)+
  ylab('Startle Response') +  # Y-axis title
  xlab('Cigarette Type') + # X-axis title +
  scale_y_continuous(expand=c(0,0))


BarPlot

# What would we do if we wanted to switch which IV was on the X axis?
# We'd only have to change the first line and the name of the axis!