##############################
#### General Linear Model ####
####  Psychology 610/710  ####
####        Lab 10        ####
####   10 November 2017   ####
##############################

##########################
#### DATASET #############
##########################
# This dataset is loosely based on a study conducted in John Curtin's lab involving
# stress reactivity of smokers. Smokers were either deprived of nicotine for 24 hours
# or not deprived of nicotine (SmokeCon). The researchers measured the participants'
# startle response (STL), and wether they smoked standard or e-ciggaretts (CigType) 
# Startle response, as you may remember from lecture, is a physiological reaction to a startling stimulus 
# (e.g., eyeblink) and can be used to measure stress reactivity. We expect that participants in the 
# deprived condition will have greater startle response than those in the non-deprived condition, 
# however it is hypothesized that this manipulation may have a different effect on standard versus e-cig users.

library(lmSupport)
library(psych)
library(car)
library(ggplot2)

# Read it in


# Do some data exploration


# Univariate plots and bivariate relationships


# We can see the number of observations in each condition


# Note:
# Coding for SmokeCon: Deprived = -0.5, NonDeprived = 0.5
# Coding for CigType: Standard = -0.5, e-cigs = 0.5

# OK what do we need to do to center these variables? 



# Descriptive stats for each of the four possible predictor variable groupings:

# Any initial impressions?

# First, let's fit an additive model in which we test the effect of nicotine
# deprivation, controlling for cig type.


#Quick outlier check  

res = modelCaseAnalysis(mAdd, "RESIDUALS")
d[res$Rownames,]

#Model assumptions checks 


# One thing to note, though, is that sometimes violations of model assumptions are attributable to an
# interaction effect that hasn't been measured. That is, the residuals might not be normally
# distributed because of error, but because an unmeasured interaction influences the data.

#Should we check the linear assummption?



#OK, let's look at the model results 



# What does b1 represent? 


# What does b2 represent? 


# What is the direction of these differences? (i.e. which condition/cig type has higher startle response?)


# What is the conclusion from this model?

# Let's now test whether smoking deprivation affected standard and e cig users differently.
# How do we do this? 

# How does computing an interaction with dichotomous variables differ from computing 
# an interaction with continuous variables? 



#Quick outlier check 



#Model assumptions checks 


# It seems that this function only handles models that are coded in r as additive (e.g., X1 + X2)
# What equivalent notation did we learn last week that would allow us to have an interactive term 
# but still only type the model out as additive? 


#Now let's look at the effects 

# Reminder: When we look at the significance test of the interaction term...

# What is the augmented model?



# What is the compact model?   




# What does b1 represent?


# What does b2 represent?


# What does b3 represent?


#### SIMPLE EFFECTS #### 
# Simple effects: The effect of one variable at a particular value of its moderator.
# How do we compute them? 
# What if we want to know the simple effect of condition for for standard smokers? 

# Remmeber:
# Coding for SmokeCon: Deprived = -0.5, NonDeprived = 0.5
# Coding for CigType: Standard = -0.5, e-cigs = 0.5
# Compute the simple effect.


# What's the conclusion of this test?

# Now, test the effect of deprivation for e-cig smokers.
# How can we code this in a way that allows us to keep the direction of the effect consistent? 



# What's the conclusion of this test?

# What doesn't change between all of these analyses?


###############################################
####  Bar plot: Two dichotomous variables  ####
###############################################

# Recode from numeric to actual names
# Remmeber:
# Coding for SmokeCon: Deprived = -0.5, NonDeprived = 0.5
# Coding for CigType: Standard = -0.5, e-cigs = 0.5

d$CigTypeStr <- varRecode(d$CigTypeC, c(-0.5, 0.5), c("Standard","e-cigs"))
d$SmokeConStr <- varRecode(d$SmokeConC, c(-0.5, 0.5), c("Deprived","Nondeprived"))

# Make the strings a factor
d$CigTypeStr=as.factor(d$CigTypeStr)
d$SmokeConStr=as.factor(d$SmokeConStr)

# Re-run the model with IVs as factors and make sure that everything looks fine
mIntPlot <- lm(STL ~ SmokeConStr*CigTypeStr, data=d)
modelSummary(mIntPlot)
# Are these the same numbers as before? Yup.

# You have to make predictions for each condition separately.
YhatDeprived <- data.frame(SmokeConStr = 'Deprived', CigTypeStr = c('Standard','e-cigs'))
YhatDeprived <- modelPredictions(mIntPlot, YhatDeprived)
# look at these predictions. Do they make sense?
YhatDeprived

YhatNonDeprived <- data.frame(SmokeConStr = 'Nondeprived', CigTypeStr = c('Standard','e-cigs'))
YhatNonDeprived <- modelPredictions(mIntPlot, YhatNonDeprived)
YhatNonDeprived

Yhats <- rbind(YhatDeprived,YhatNonDeprived) # Combine Yhats

# Graphing
windows() #or quartz 
BarPlot <- ggplot(Yhats, aes(x = SmokeConStr, y = Predicted, fill = CigTypeStr))+
  geom_bar(stat='identity', position=position_dodge(.9))+
  geom_errorbar(aes(ymax=CIHi, ymin=CILo), position=position_dodge(.9), width=0.25)+
  theme_bw()+
  theme(legend.title=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.border=element_blank(),
        axis.line=element_line(),
        text=element_text())+
  ylab('Startle Response') +  # Y-axis title
  xlab('Nicotine Deprivation')  # X-axis title
#  scale_fill_grey()    # Grayscale automatic colors

BarPlot

# What would we do if we wanted to switch which IV was on the X axis?