#### THE2 KEY #### # 1. d = dfReadDat("THE2_Ex1.dat") varDescribe(d) d[sample(d$PetName,6),] varPlot(d$Score) # Score seems to be roughly normal across the possible values. Means indicate # even numbers of cats and dogs and 2 conditions. # 2. varDescribeBy(d$Score, list(d$Species, d$Condition)) # It would appear so, yes. # 3. m1.1 = lm(Score ~ Condition + Species, data=d) modelSummary(m1.1) confint(m1.1) modelEffectSizes(m1.1) # Cats perform better than dogs, when controlling for condition, b = 5.03, [2.21,7.86], t(104) = 3.535, p < .001, # peta2 = .10. Pets in the learning condition perform better than controls, controlling for species, b = 3.82, # [.99, 6.64], t(104) = 2.68, p < .001, peta2 = .06 # 4. # a. # No. None of the pets can be extreme on the predictor variables since the predictor variables are both dichotomous. # b. res = modelCaseAnalysis(m1.1, "RESIDUALS") # no extreme outliers # c. inf = modelCaseAnalysis(m1.1, "COOKSD") d[inf$Rownames,] # I identified 3: Dexter, Mortimer, and Spock. modelCaseAnalysis(m1.1, 'DFBETAS') # Dexter appears to be unduly influencing the Species parameter. inf # He also has the highest cook's d value. # d. # Dexter. He is far away from the rest of the data. # Dexter is very bad at the task for a cat. Maybe he fell asleep during testing. # You could also make an argument for removing Mortimer, Spock, and Spud. This was # unintentionally a bit of a trick question, because Spock and Spud have equal Cook's D # scores. If one is removed, the other should be as well. Maybe better to not remove either. # e. dFull = d d = dfRemoveCases(d, which(d$PetName=='Dexter')) # 5. d$ConditionC = d$Condition - .5 d$SpeciesC = d$Species - .5 # OR do this using varRecode m1.2 = lm(Score ~ SpeciesC * ConditionC, data=d) modelSummary(m1.2) confint(m1.2) modelEffectSizes(m1.2) # a. # The main effect of species is significant when controlling for condition and their interaction, # indicating cats are better than dogs at the task overall, b = 5.41, [2.76, 8.07], t(102) = 4.05, # p < .01, peta2 = .138. # b. library(effects) plot(effect('SpeciesC:ConditionC', m1.2)) # There is a significant interaction indicating that the difference in scores across training conditions # is greater for dogs than for cats, meaning the training was more useful for them, b = -7.84, [-13.15, -2.54], # t(102) = -2.93, p < .01, peta2 = .078. # 6. d$ConditionT = d$ConditionC - .5 varDescribe(d$ConditionT) m1.3 = lm(Score ~ SpeciesC * ConditionT, data=d) modelSummary(m1.3) confint(m1.3) modelEffectSizes(m1.3) # There is not a significant effect of species in the training condition, t(102) = 0.79, p = .43. This means # that dogs who are trained perform just as well on the task as cats who are trained. # 7. d$ConditionS = as.factor(varRecode(d$Condition, c(0,1), c('Control','Training'))) d$SpeciesS = as.factor(varRecode(d$Species, c(0,1), c('Dog','Cat'))) mPlot1 = lm(Score ~ ConditionS*SpeciesS, data=d) X = expand.grid(ConditionS = c('Control','Training'), SpeciesS = c('Dog','Cat')) Y = modelPredictions(mPlot1, X) library(cowplot) plot1 = ggplot(d, aes(x = SpeciesS, y = Score, fill = ConditionS)) + geom_bar(stat='identity', position=position_dodge(.9), aes(y=Predicted), data=Y) + geom_errorbar(aes(ymin = CILo, ymax = CIHi, y = Predicted), data=Y, position=position_dodge(.9), width=.5) + scale_fill_brewer(palette = 'Accent') + scale_y_continuous(limits = c(0,30),expand=c(0,0)) + labs(x = 'Species', fill = 'Condition') plot1 # 8 in word doc. ############### EXAMPLE 2 ############### # 1. d = dfReadDat('THE2_Ex2.dat') varDescribe(d) d[sample(d$Fat,6),] varPlot(d$Fat) varPlot(d$Spice) varPlot(d$Pumpkin) varPlot(d$SCM) varPlot(d$SgyBtm) # The ingredient parameters are roughly normally distributed, with relatively fat tails. This makes # sense based on what the researcher is trying to do. There are many more pies without soggy bottom than with. # 2. # a. cor(d$Pumpkin, d$SCM) plot(d$Pumpkin, d$SCM) d$Pumpkin + d$SCM # 32 for all # The variables are perfectly inversely correlated. In essence, the grad student kept the filling amount # constant, so as one quantity increased, the other necessarily decreased accordingly. # b. d$Filling = d$Pumpkin - d$SCM varPlot(d$Filling) # min is -8. This is a pie that has 8 ounces more SCM than pumpkin pie filling # max is 8. This is a pie that has 8 ounces more pumpkin pie filling than SCM # c. d$Pumpkin = NULL d$SCM = NULL # 3. # a. corr.test(d[,c('Scrummy','Beauty','Dodgy')]) alpha(d[,c('Dodgy','Beauty','Scrummy')], keys=('Dodgy')) # check.keys=T is ok # Alpha value is .77, which is pretty good, and won't go up if any are removed. # b. d$Rating = varScore(d, Forward = c('Scrummy','Beauty'), Reverse='Dodgy', Range=c(1,7))/3 varPlot(d$Rating) # Somewhat negatively skewed, but not too bad. # 4. m2.1 = lm(Rating ~ Filling + Spice, data=d) modelSummary(m2.1) # a. modelAssumptions(m2.1, 'NORMAL') # this looks decent, though a bit wonky modelAssumptions(m2.1, 'CONSTANT') # this looks pretty nice, though not super linear on the right pane modelAssumptions(m2.1, 'LINEAR') # not super linear. # b. # I probably wouldn't transform. If I did, I'd transform the Rating variable. It's less normally distributed # than the others and, unlike them, doesn't actually have any meaning in raw terms. Both the others are based # on real, meaningful units. # c. modelBoxCox(m2.1) # 1.47 # Since it is greater than 1, this transformation would spread out larger scores and compress smaller scores. # 5. d$FatM = d$Fat - mean(d$Fat) d$SgyBtmM = d$SgyBtm - .5 # varRecode also ok m2.2 = lm(Rating ~ FatM*SgyBtmM, data=d) modelSummary(m2.2) modelEffectSizes(m2.2) confint(m2.2) # a. plot(effect('FatM:SgyBtmM',m2.2)) # There was a positive effect of fat for pies neutral with respect to their bottom, b = 0.13, # [.07, .20], t(124) = 4.02, p < .001, peta2 = .12. # There was a significant interaction indicating that the effect of amount of fat used was more # positive for pies that did not have a soggy bottom, b = -0.14, [-0.27, -0.02], t(124) = -2.21, # p < .05, peta2 = .04. # b. d$SgyBtmY = d$SgyBtmM - .5 m2.3 = lm(Rating ~ FatM*SgyBtmY, data=d) modelSummary(m2.3) # There is no effect of fat for pies that have a soggy bottom, t(124) = 1.11, p = .27. # 6. d$SpiceM = d$Spice - mean(d$Spice) d$FillingM = d$Filling - mean(d$Filling) m2.3 = lm(Rating ~ FillingM*SpiceM, data=d) modelSummary(m2.3) modelEffectSizes(m2.3) confint(m2.3) # a. plot(effect('FillingM:SpiceM',m2.3)) # We did not observe a positive effect of spice for pies with an equal amount of pumpkin pie # filling and SCM, t(124) = 1.9, p = .06. However, we did observe the predicted interaction: # spice had a more positive effect for pies with less pumpkin in them, b = -0.05, [-0.08, -0.02], # t(124) = -2.94, p < .01, peta2 = .065. # b. # b1: For pies with an average amount of spice, each additional ounce of pumpkin pie filling versus SCM # led to a .004 point increase in Mary's rating, n.s. # b2: For pies with an equal amount of pumpkin pie spice and SCM, each additional teaspoon of pumpkin pie # spice increased Mary's rating by .16 points, p = .06. # b3: The relationship between spice amount and rating became .05 units stronger for each additional ounce # of pumpkin pie filling versus SCM. # 7. # Hypothesis 1: d$SgyBtmS = as.factor(varRecode(d$SgyBtm,c(0,1), c('Crisp','Soggy'))) mPlot21 = lm(Rating ~ Fat * SgyBtmS, data=d) X = expand.grid(Fat = seq(min(d$Fat), max(d$Fat), length=128), SgyBtmS = c('Crisp','Soggy')) Y = modelPredictions(mPlot21, X) library(cowplot) plot2 = ggplot(d, aes(x=Fat, y=Rating, color=SgyBtmS)) + geom_point() + scale_color_brewer(palette='Dark2') + geom_smooth(aes(ymin = CILo, ymax = CIHi, y = Predicted), stat='identity', data=Y) + labs(x='Amount of Fat used (oz)', color = 'Crust quality') plot2 # Hypothesis 2: mPlot22 = lm(Rating ~ Filling*Spice, data=d) XL = expand.grid(Filling = seq(min(d$Filling), max(d$Filling), length=128), Spice = mean(d$Spice - sd(d$Spice))) XH = expand.grid(Filling = seq(min(d$Filling), max(d$Filling), length=128), Spice = mean(d$Spice + sd(d$Spice))) YL = modelPredictions(mPlot22, XL) YH = modelPredictions(mPlot22, XH) plot3 = ggplot(d, aes(x = Filling, y = Rating, color=Spice)) + geom_point() + geom_smooth(aes(x = Filling, ymin = CILo, ymax = CIHi, y = Predicted), data=YL, stat='identity') + geom_smooth(aes(x = Filling, ymin = CILo, ymax = CIHi, y = Predicted), data=YH, stat='identity') + scale_x_continuous(breaks=c(-8, -6, -4, -2, 0, 2, 4, 6,8)) + labs(x = 'Balance of canned pumpkin to SCM (oz)', color = 'Spice (tsp)') plot3