#### R key for take-home 1 ####


#### 1. #### (2 points) Check out the data
d = dfReadDat('THE1_Data.dat')
varDescribe(d)
head(d)
summary(d)
d[sample(nrow(d), 6),]

#### 2. ############ (2 points) remove height and weight
ColumnNames = c('BaseLoneli', 'AppCondition', 'AppHours', 'LaterLoneli') # variables we want
d = d[,ColumnNames] # indexing into data frame
d #making sure data looks right

# another (moderately less nice) way
d$Height = NULL
d$Weight = NULL

#### 3. #### (2 points) describe laterloneli by condition
varDescribeBy(d$LaterLoneli, d$AppCondition)
# 3.58 in the no app condition, 3.83 in the app condition

#### 4. #### (3 points) shift loneli to have lower bound of 0
d$BaseLoneli06 = d$BaseLoneli-1
d$LaterLoneli06 = d$LaterLoneli-1

varDescribeBy(d, d$AppCondition)

#### 5. #### (2 points) hist of baseline loneli
hist(d$BaseLoneli06, xlab = 'Baseline Loneliness', ylab = 'Percent')
# positively skewed

#### 6. #### (2 points) values of app condition
table(d$AppCondition)
#Control = 0, LOOK@ME App = 1; this is called 'dummy coding'
# more people in control than app

#### 7. #### (4 points) 0-centered app condition
d$AppCondition0C = varRecode(d$AppCondition, c(0,1),c(-.5,.5))
d$AppCondition0C
# 0 represents someone who's theoretically neutral with regard to receiving the App.
# It represents someone in the hypothetical middle of the two groups.
# The interpretation of this score does not change when group sizes change.

#### 8. #### (8 points) baseline differences
m = lm(BaseLoneli06 ~ AppCondition, data = d)
modelSummary(m)# b = .14, t = 1.097, p = 0.274
modelEffectSizes(m) # partial eta squared = .007

# The analysis showed that there were no signficant differences between the two groups at baseline in
# terms of loneliness, b = .14, t(178) = 1.097, p = 0.274. The two groups only differed by .14 units. 
# It is important to check for baseline differences to make sure that any evidence for an app affect
# at test is not due to exisiting group differences. 

#### 9. #### (3 points) interpret b0
# b0 represents average baseline loneliness for people in the control group (0 on AppCondition)

#### 10. #### (5 points) control for app condition
m = lm(LaterLoneli06 ~ AppCondition0C, data = d)
modelSummary(m)
modelEffectSizes(m)
# Regardless of app condition, participants in the sample showed a signficant amount of lonliness,
# b = 2.7, t(178) = 39.529, p < .001. 

# This test shows that there was  non-zero loneliness in the sample controlling for group. In other words, 
# people in the sample were at least somewhat lonely at post-test. 

#### 11. ###### (5 points) condition effect?
m = lm(LaterLoneli06 ~ AppCondition, data = d)
modelSummary(m)
modelEffectSizes(m)

#or 

m = lm(LaterLoneli06 ~ AppConditionZC, data = d)
modelSummary(m)
modelEffectSizes(m)

# The app did not appear to reduce lonliness scores, b = 0.25, t(178)=1.792, p=.0749. 
# Condition only accounted for 1.77% of the variance in later loneliness scores.  


#####12. ##### (3 points) does centering matter?
# No, because the interpretation of B1 does not change (only the interpretation b0 changes)

###13 figure#### (8 points) graph it
pY = data.frame(AppCondition = c(0, 1)) 

pY = modelPredictions(m, pY) 

library(ggplot2) 
plot1 = ggplot(pY, aes(x = AppCondition, y = Predicted)) 
plot1 
plot1 = plot1 + 
  geom_bar(mapping = aes(fill = as.factor(AppCondition)), 
           data = pY, 
           stat = "identity", 
           width = 0.5) 
plot1 
plot1 = plot1 + geom_point(data = d, aes(y = LaterLoneli06),
                           colour='darkgrey',
                           position = position_jitter(w = 0.1,h=0.1)) 
plot1 
plot1 = plot1 + geom_errorbar(width=.25,
                              aes(ymin = CILo, ymax = CIHi), 
                              stat="identity") 
plot1 
plot1 = plot1 +labs(y = 'Later Loneliness Score', x = 'App Condition') + 
  theme_bw(base_size = 14) +  
  theme(legend.position="none") 
plot1 
plot1 = plot1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
plot1 
plot1 = plot1 + scale_x_continuous(breaks=c(seq(0,1,1))) 
plot1

# Going a "step above" with this graph would involve doing what we did in the lab 5 exercise: making
# new string version of condition and graphing using that variable. Definitely not necessary for full
# credit here, but in the future we'll ask you to do that more often.

### 14 ##### (5 points) app hours
m = lm(d$LaterLoneli06 ~ AppHours, data = d)
modelSummary(m)
modelEffectSizes(m)
# Participants in the no-app condition (i.e., AppHours = 0) reported a significant level of lonliness, 
# b = 2.5, t(178) = 28.34, p < 0.001. For every one additional hour participants in the app condition 
# used the app, lonliness scores increased by .07,  b = 0.07, t(178) = 2.68, p = .008. App hours
# accounted for 3.9% of the variance in lonliness scores, partial eta squared = .039.

### 15 ##### (4 points) mean-center app hours
d$AppHoursMC = d$AppHours - mean(d$AppHours)

mC = lm(d$LaterLoneli06 ~ AppHoursMC, data = d)
modelSummary(mC)
modelEffectSizes(mC)

# Stays the same: All stats for b1. 
# Changes: All stats for b0 
# Mean centering does not change the slope of the regression line, thus b1 stays the same 
# Mean centering changes the intercept of the regression line, thus b0 changes. 

### 16 ##### (2 points) effects plot
m = lm(d$LaterLoneli06 ~ AppHours, data = d)
library(effects)  
plot(effect('AppHours', m))
# Note there are no observations of apphours between 0 and ~3. This makes sense based on
# how condition worked, but we should be cautious making inferences about people who
# use the app for 1-2 hours.

###17##### (8 points) scatterplot
pY2 = data.frame(AppHours = seq(min(d$AppHours), max(d$AppHours), length = 180))

# many of the possible values of AppHours variable
pY2 = modelPredictions(m,pY2) # get means and upper/lower SE bounds for the conditions
plot2 = ggplot(d, aes(x = AppHours, y = LaterLoneli06)) # set general parameters for the plot
plot2 
plot2 = plot2 + geom_point(position = position_jitter(w=0,h=.1))
plot2 
plot2 = plot2 + geom_smooth(aes(ymin = CILo, ymax = CIHi, y = Predicted),
                            data = pY2,            
                            stat = "identity",
                            color="red")         # add regression line and confidence bands
plot2 # and view it again
plot2 = plot2 + theme_bw(base_size = 14) + # remove background grey and add labels
  labs(x = 'Hours Spent on App', y = 'Later Loneliness')
plot2 # and again
plot2 = plot2 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) # remove grid
plot2 # and for the last time, we view! much better


#### 18 ##### (4 points) compare dichotomous versus continuous
mC = lm(d$LaterLoneli06 ~ AppHours, data = d)
modelSummary(mC)
modelEffectSizes(mC)

mD = lm(d$LaterLoneli06 ~ AppCondition, data = d)
modelSummary(mD)
modelEffectSizes(mD)

# The parameter estimate for b1 is different (as should be expected). More importantly, 
# b1 was not signficant with the dichotomous predictor but is significant when using 
# the continuous predictor. The continuous predictor has a lower standard error, accounts for more 
# variance and is thus more powerful. Using a dichotomous predictor often discards useful information.


###19#### (15 points)
# in word doc