#### R key for take-home 1 #### #### 1. #### (2 points) Check out the data d = dfReadDat('THE1_Data.dat') varDescribe(d) head(d) summary(d) d[sample(nrow(d), 6),] #### 2. ############ (2 points) remove height and weight ColumnNames = c('BaseLoneli', 'AppCondition', 'AppHours', 'LaterLoneli') # variables we want d = d[,ColumnNames] # indexing into data frame d #making sure data looks right # another (moderately less nice) way d$Height = NULL d$Weight = NULL #### 3. #### (2 points) describe laterloneli by condition varDescribeBy(d$LaterLoneli, d$AppCondition) # 3.58 in the no app condition, 3.83 in the app condition #### 4. #### (3 points) shift loneli to have lower bound of 0 d$BaseLoneli06 = d$BaseLoneli-1 d$LaterLoneli06 = d$LaterLoneli-1 varDescribeBy(d, d$AppCondition) #### 5. #### (2 points) hist of baseline loneli hist(d$BaseLoneli06, xlab = 'Baseline Loneliness', ylab = 'Percent') # positively skewed #### 6. #### (2 points) values of app condition table(d$AppCondition) #Control = 0, LOOK@ME App = 1; this is called 'dummy coding' # more people in control than app #### 7. #### (4 points) 0-centered app condition d$AppCondition0C = varRecode(d$AppCondition, c(0,1),c(-.5,.5)) d$AppCondition0C # 0 represents someone who's theoretically neutral with regard to receiving the App. # It represents someone in the hypothetical middle of the two groups. # The interpretation of this score does not change when group sizes change. #### 8. #### (8 points) baseline differences m = lm(BaseLoneli06 ~ AppCondition, data = d) modelSummary(m)# b = .14, t = 1.097, p = 0.274 modelEffectSizes(m) # partial eta squared = .007 # The analysis showed that there were no signficant differences between the two groups at baseline in # terms of loneliness, b = .14, t(178) = 1.097, p = 0.274. The two groups only differed by .14 units. # It is important to check for baseline differences to make sure that any evidence for an app affect # at test is not due to exisiting group differences. #### 9. #### (3 points) interpret b0 # b0 represents average baseline loneliness for people in the control group (0 on AppCondition) #### 10. #### (5 points) control for app condition m = lm(LaterLoneli06 ~ AppCondition0C, data = d) modelSummary(m) modelEffectSizes(m) # Regardless of app condition, participants in the sample showed a signficant amount of lonliness, # b = 2.7, t(178) = 39.529, p < .001. # This test shows that there was non-zero loneliness in the sample controlling for group. In other words, # people in the sample were at least somewhat lonely at post-test. #### 11. ###### (5 points) condition effect? m = lm(LaterLoneli06 ~ AppCondition, data = d) modelSummary(m) modelEffectSizes(m) #or m = lm(LaterLoneli06 ~ AppConditionZC, data = d) modelSummary(m) modelEffectSizes(m) # The app did not appear to reduce lonliness scores, b = 0.25, t(178)=1.792, p=.0749. # Condition only accounted for 1.77% of the variance in later loneliness scores. #####12. ##### (3 points) does centering matter? # No, because the interpretation of B1 does not change (only the interpretation b0 changes) ###13 figure#### (8 points) graph it pY = data.frame(AppCondition = c(0, 1)) pY = modelPredictions(m, pY) library(ggplot2) plot1 = ggplot(pY, aes(x = AppCondition, y = Predicted)) plot1 plot1 = plot1 + geom_bar(mapping = aes(fill = as.factor(AppCondition)), data = pY, stat = "identity", width = 0.5) plot1 plot1 = plot1 + geom_point(data = d, aes(y = LaterLoneli06), colour='darkgrey', position = position_jitter(w = 0.1,h=0.1)) plot1 plot1 = plot1 + geom_errorbar(width=.25, aes(ymin = CILo, ymax = CIHi), stat="identity") plot1 plot1 = plot1 +labs(y = 'Later Loneliness Score', x = 'App Condition') + theme_bw(base_size = 14) + theme(legend.position="none") plot1 plot1 = plot1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) plot1 plot1 = plot1 + scale_x_continuous(breaks=c(seq(0,1,1))) plot1 # Going a "step above" with this graph would involve doing what we did in the lab 5 exercise: making # new string version of condition and graphing using that variable. Definitely not necessary for full # credit here, but in the future we'll ask you to do that more often. ### 14 ##### (5 points) app hours m = lm(d$LaterLoneli06 ~ AppHours, data = d) modelSummary(m) modelEffectSizes(m) # Participants in the no-app condition (i.e., AppHours = 0) reported a significant level of lonliness, # b = 2.5, t(178) = 28.34, p < 0.001. For every one additional hour participants in the app condition # used the app, lonliness scores increased by .07, b = 0.07, t(178) = 2.68, p = .008. App hours # accounted for 3.9% of the variance in lonliness scores, partial eta squared = .039. ### 15 ##### (4 points) mean-center app hours d$AppHoursMC = d$AppHours - mean(d$AppHours) mC = lm(d$LaterLoneli06 ~ AppHoursMC, data = d) modelSummary(mC) modelEffectSizes(mC) # Stays the same: All stats for b1. # Changes: All stats for b0 # Mean centering does not change the slope of the regression line, thus b1 stays the same # Mean centering changes the intercept of the regression line, thus b0 changes. ### 16 ##### (2 points) effects plot m = lm(d$LaterLoneli06 ~ AppHours, data = d) library(effects) plot(effect('AppHours', m)) # Note there are no observations of apphours between 0 and ~3. This makes sense based on # how condition worked, but we should be cautious making inferences about people who # use the app for 1-2 hours. ###17##### (8 points) scatterplot pY2 = data.frame(AppHours = seq(min(d$AppHours), max(d$AppHours), length = 180)) # many of the possible values of AppHours variable pY2 = modelPredictions(m,pY2) # get means and upper/lower SE bounds for the conditions plot2 = ggplot(d, aes(x = AppHours, y = LaterLoneli06)) # set general parameters for the plot plot2 plot2 = plot2 + geom_point(position = position_jitter(w=0,h=.1)) plot2 plot2 = plot2 + geom_smooth(aes(ymin = CILo, ymax = CIHi, y = Predicted), data = pY2, stat = "identity", color="red") # add regression line and confidence bands plot2 # and view it again plot2 = plot2 + theme_bw(base_size = 14) + # remove background grey and add labels labs(x = 'Hours Spent on App', y = 'Later Loneliness') plot2 # and again plot2 = plot2 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) # remove grid plot2 # and for the last time, we view! much better #### 18 ##### (4 points) compare dichotomous versus continuous mC = lm(d$LaterLoneli06 ~ AppHours, data = d) modelSummary(mC) modelEffectSizes(mC) mD = lm(d$LaterLoneli06 ~ AppCondition, data = d) modelSummary(mD) modelEffectSizes(mD) # The parameter estimate for b1 is different (as should be expected). More importantly, # b1 was not signficant with the dichotomous predictor but is significant when using # the continuous predictor. The continuous predictor has a lower standard error, accounts for more # variance and is thus more powerful. Using a dichotomous predictor often discards useful information. ###19#### (15 points) # in word doc