# Homework Key for Week 4


# 1. Read in and inspect the data.

d = dfReadDat('HW4_Data.dat')
varDescribe(d)
varDescribeBy(d,d$BG)
summary(d)
View(d)

# 2.	Notice that there are three "Annoying" variables that we do not need for these analysis. 
#Using bracket notation, remove those variables while also 
#sorting the order of the variables (columns) in an order that makes sense to you. 
names(d)

ColumnNames = c('BG', 'AnxBase' ,'AnxTest') 

d = d[,ColumnNames]

names(d)


#3.First you may want to test to see if the two beverage groups already differ on their anxiety 
#about receiving a shock before receiving their respective beverages (at baseline). 
#Fit a linear model to test this question. Report the corresponding F-statistic, df and p-value and describe
#the result of the model in words. 
m1 = lm(AnxBase ~ BG, data=d)
modelSummary(m1)
#T(1,94) = 0.00, df = 94,
# p = 1. #You will probably never see a t of 0 and a p of 1 again! 

#Participants' baseline anxiety scores did not differ by beverage group. 

#4. From the output of that same linear model that you just ran, interpret the "intercept" or b0 coefficient. 
#What does it mean in this sample? What does its corresponding p value mean? 

#Participants in the no alcohol beverage group, on average, had an anxiety score of 1.9. The p value indicates
#that this value is significantly different than 0 indicating that the shock manipulation did elicit anxiety. 

#5.	Using ggplot, create a bar graph that shows level of anxiety in each beverage group at baseline. 
#Make sure you label each axis. 

library(ggplot2)

pY1 = data.frame(BG = c(0, 1)) # This is the same idea from last week,
# but now we need only 2 values to represent all possible values on condition variable.
# need to generate the mean of predicted values for each condition
# also need lower and upper standard error bounds for each condition
# SE bars represent standard error of point estimates

pY1 = modelPredictions(m1, pY1)
pY1


plot = ggplot(d, aes(x = BG, y = Predicted)) +
  geom_bar(mapping = aes(fill = as.factor(BG)), data = pY1, stat = "identity", width = 0.5) +
  geom_point(data = d, aes(y = AnxBase, x = BG),colour='darkgrey', 
             position = position_jitter(w = 0.1)) +
  geom_errorbar(data = pY1, width=.25, aes(y = Predicted, x = BG, ymin = CILo, 
                                           ymax = CIHi), stat="identity", width=0.75) + 
  labs(y = 'Baseline Anxiety Score', x = 'Beverage Group') + theme_bw(base_size = 14) + theme(legend.position="none") 
plot

#6.	Fit a linear model predicting anxiety from beverage group. 
#Test if beverage group significantly predicts anxiety (report F-statistic, df, and p-value) 
#and provide a 95% confidence interval for the parameter. 
#Describe the effect of beverage group on anxiety in a sentence in your script.

m2 = lm(AnxTest ~ BG, data=d)
modelSummary(m2)
confint(m2)
#F(1,94) = 13, or t = -3.605, df = 94,
# p < .001.
# confidence interval: -1.453832 -0.4211676

#Alcohol appeared to have a significant effect on anxiety about the upcoming shock. Participants' anxiety 
#in the alcohol group was approximately .9 lower than participants' anxiety in the no-alcohol group. 


#7.Report a variance-based indicator of effect size, along with its interpretation. 

modelEffectSizes(m2)
# partial eta squared = 0.12. Beverage group explains 12% of the variance in anxiety ratings 

#8. 

pY2 = data.frame(BG = c(0, 1)) 

pY2 = modelPredictions(m2, pY2)
pY2


plot = ggplot(d, aes(x = BG, y = Predicted)) +
  geom_bar(mapping = aes(fill = as.factor(BG)), data = pY2, stat = "identity", width = 0.5) +
  geom_point(data = d, aes(y = AnxTest, x = BG),colour='darkgrey', 
             position = position_jitter(w = 0.1)) +
  geom_errorbar(data = pY2, width=.25, aes(y = Predicted, x = BG, ymin = CILo, 
                                           ymax = CIHi), stat="identity", width=0.75) + 
  labs(y = 'Test Anxiety Score', x = 'Beverage Group') + theme_bw(base_size = 14) + theme(legend.position="none") 
plot


#10. time spent on homework