# Lab 2 Exercise Key
# 15 September 2017

# 1.
d  = dfReadDat("ThreatData.dat")
str(d)
summary(d)
varDescribe(d)

# 2. composite scores
# a and b
d$perf1Z = (d$perf1 - mean(d$perf1)) / sd(d$perf1)
varDescribe(d$perf1Z)
d$perf2Z = (d$perf2 - mean(d$perf2)) / sd(d$perf2)
varDescribe(d$perf2Z)
d$perf3Z = (d$perf3 - mean(d$perf3)) / sd(d$perf3)
varDescribe(d$perf3Z)

# c.
alpha(d[,c('perf1Z','perf2Z','perf3Z')]) # .5
alpha(d[,c('perf1','perf2','perf3')]) # .44
# It does matter. Why? The different ranges of the response scales.
# Since this is an outcome (likely preregistered), we should likely still combine,
# though an alpha of .5 is definitely sub-optimal and exploratory analyses could
# look at the outcomes separately.

# d and e
d$perfM <- varScore(d, Forward=c('perf1Z','perf2Z','perf3Z'), MaxMiss = .35, Prorate=T)
varDescribe(d$perfM)

# f
# The performance scores are measured using different scales, and thus aren't comparable in their
# raw form. The scale that happens to go higher will exert a stronger influence on the composite
# if the raw scores are averaged.


# 3. 
plot(d$income, d$perfM)
abline(lm(d$perfM~d$income))
# There appears to be a slight positive relationship.


# 4. 
hist(d$perfM)
?hist
hist(d$perfM, breaks=12)


# 5. 
# It wouldn't make sense. The mean of the composite performance score is 0, so we'd be comparing models
# that both made identical predictions for every participant.


# 6.
mod1 = lm(perf2 ~ 1, data=d)
modelSummary(mod1, t=F)
# People performed significantly better than 0 on the Perf2 measure, F(1,23) = 58.2, p < .001.
# The intercept estimate is equal to the mean of the Perf2 measure.