### Lecture 7 2016
setwd('/Users/gdutilh/Dropbox/teaching/R/RHS2016/lecture_7')
# =========================================================================================
# from last week's homework:
load('../lecture_5/materials_lecture_5/data_lecture_5.Rdata')
# writing a tapply as a loop
# the tapply did
tapply(d$RT, d$subj, sd)
# now as a loop
out = numeric()
for(i in unique(d$subj)){
subjRT = d$RT[d$subj == i]
out = c(out, sd(subjRT))}
# Titanic:
# and indexing using level names
# How many females who traveled first class survived?
# many people tried something like:
apply(Titanic, c('Sex', 'Class', 'Survived'), sum)
sum(Titanic['1st','Female', ,'Yes'])
cur.sex = 'Female'
sum(Titanic['1st',cur.sex, ,'Yes'])
sum(Titanic[1st, Female, ,Yes]) # does not work, because without quotes "", R assumes
# that any character string you type is a variable name.
# And there is no variables 1st, Female, or Yes.
# Every input, for functions or indxing, can be a variable.
first.three = 1:3
d$RT[first.three]
the.variable.I.want = 'RT'
d[first.three, the.variable.I.want]
lets.remove.nas = TRUE
mean(d$RT, na.rm = lets.remove.nas)
# so, arguments can be input by variables, that should have the right type (numeric,
# logical, character, etc.)
# =========================================================================================
### Now: graphing
demo(graphics)
demo(persp)
demo(image)
# =========================================================================================
## simplest: plot()
# say, your understanding of R on a scale from zero to ten
# improves over lectures as follows:
R = c(0, 1.3, 1.1, 1.8, 3, 2.7, 5.9, 5.2, 8, 8.2)
plot(R)
# is the same as
plot(1:10, R)
# is the same as
plot(R, type = 'p')
plot(R, type = 'l')
plot(R, type = 'b')
plot(R, type = 'b',
xlim = c(-2, 12))
plot(R, type = 'b',
xlim = c(-2, 12), ylim = c(-2, 11))
plot(R, type = 'b',
xlim = c(-2, 12), ylim = c(-2, 11),
xlab = 'number of lectures', ylab = 'R skills')
plot(R, type = 'b',
xlim = c(-2, 12), ylim = c(-2, 11),
xlab = 'number of lectures', ylab = 'R skills',
main = 'My improvement of understanding R')
# =========================================================================================
# The arguments to plot I gave above are the main options,
# but there are many more, see ?par
# some important ones:
## color!
plot(R, type = 'b', col = 'red')
# is the same as
plot(R, type = 'b', col = 2)
plot(R, type = 'p', col = c(2, 1, 3, 4, 2, 3, 1, 2, 3, 2))
plot(R, type = 'b', col = c(2, 1, 3, 4, 2, 3, 1, 2, 3, 2))
plot(R, type = 'b', col = 1:2) # R is smart enough to repeat for the right length
plot(R, type = 'b', col = 1:5)
## linewidth
plot(R, type = 'b', col = 1:5, lwd = 3.5)
## plotting "character"
plot(R, type = 'b', pch = 1:10)
plot(R, type = 'b', pch = 19)
# let's see what the different numbers of col and pch mean:
plot(R, type = 'b', col = 1:10, pch = 1:10)
# These plot characters can also be literal characters:
plot(R, type = 'b', pch =
c('C','H','A','R','A','C','T','E','R','S'))
## size: cex
plot(R, type = 'b', pch = 2, cex = 3)
plot(R, type = 'b', pch = 19, cex = 10)
# =========================================================================================
# plot is high-level
# now low-level plotting functions: these add to an existing plot.
# say, we have R-skill scores for another person:
R2 = c(.8, 1.7, 1.8, 2.9, 2, 5.2, 6.3, 7.1, 9.7, 13.7)
plot(R, type = 'b')
lines(R2, type = 'b', col = 2) # added to existing plot
# is the same as
plot(R, type = 'b', ylim = c(0, 14))
lines(1:10, R2, type = 'b', col = 2)
# change ylim and color
plot(R, type = 'b', ylim = c(0, 14))
lines(R2, type = 'b', col = 2)
# change ylim generically
plot(R, type = 'b', ylim = c(0, max(R, R2)))
lines(R2, type = 'b', col = 2)
plot(R, type = 'b', ylim = c(0, max(R, R2)))
lines(R2, type = 'p', col = 2)
# is the same as
plot(R, type = 'b', ylim = c(0, max(R, R2)))
points(R2, col = 2) # points and lines are very similar, only the default type differs.
# play some
lines(x = c(2, 6, 6, 2, 2), y = c(2, 2, 6, 6, 2),
col = 'blue', lwd = 3)
lines(x = 7, y = 12, type ='p', col = 'green',
pch = 'H', cex = 4)
xs = sample(1:10, 20, replace = TRUE)
ys = sample(0:14, 20, replace = TRUE)
size = runif(20, 1, 6)
points(xs, ys, pch = "H", col = 6, cex = size)
# What about:
abline(a = 2, b = .5, col = 5, lwd = 2) # linear function
abline(v = 6.5, col = 3, lwd = 2) # or just a vertical line
abline(h = 1:10, col = 'darkblue', lwd = 4) # or mutiple horizontal lines
# =========================================================================================
# Let's plot the mean RT for correct and error responses.
# first calculate these means:
RT.means.cor = tapply(d$RT, d$correct, mean)
# then plot them:
plot(RT.means.cor, type = 'b') # hmm, not very exciting
# does this look similar for all participants?
RT.means.cor.subj = tapply(d$RT, list(d$correct, d$subj), mean)
plot(RT.means.cor.subj[, 1], type = 'b', pch = 1,
ylim = c(600, 1000))
lines(RT.means.cor.subj[, 2], type = 'b', pch = 2)
lines(RT.means.cor.subj[, 3], type = 'b', pch = 3)
lines(RT.means.cor.subj[, 4], type = 'b', pch = 4)
lines(RT.means.cor.subj[, 5], type = 'b', pch = 5)
lines(RT.means.cor.subj[, 6], type = 'b', pch = 6)
lines(RT.means.cor.subj[, 7], type = 'b', pch = 7)
# make y-limits generic and put it in a loop:
plot(RT.means.cor.subj[, 1], type = 'b', pch = 1,
ylim = range(RT.means.cor.subj))
for(i in 2:7){ # I know there are 7 subjects
lines(RT.means.cor.subj[, i], type = 'b', pch = i, col = i)}
# what a hassle. Luckily, R has a short-cut function for plotting matrices
# (like RT.means.cor.subj)
matplot(RT.means.cor.subj)
matplot(RT.means.cor.subj, type = 'l') # matplot plots the columns of a matrix
# sadly, it does so with very ugly defaults.
# let's improve that a bit:
matplot(RT.means.cor.subj, type = 'b')
matplot(RT.means.cor.subj, type = 'b', lty = 1)
matplot(RT.means.cor.subj, type = 'b', lty = 1, col = rainbow(7))
matplot(RT.means.cor.subj, type = 'b', lty = 1, col = rainbow(7), pch = 1:7)
matplot(RT.means.cor.subj, type = 'b', lty = 1, col = rainbow(7), pch = 1:7,
xlim = c(.5, 2.5), xlab = 'response type', las = 1)
legend('right', lty = 1, col = rainbow(7), legend = sort(unique(d$subj)))
title('Difference in RT between correct and error\nfor each participant')
# =========================================================================================
# histograms (are high level plots!)
hist(d$RT)
hist(d$RT, breaks = 50)
hist(d$RT, breaks = seq(0,3000, 100), xlim = c(300, 1500))
# let's add Quantile lines
quantile(d$RT, prob = c(.1, .3, .5, .7, .9))
abline(v = quantile(d$RT, prob = c(.1, .3, .5, .7, .9)), col = 'blue', lwd = 3)
hist(d$RT[d$correct == 1], breaks = seq(0,3000,100), col = 'green')
hist(d$RT[d$correct == 0], breaks = seq(0,3000,100), col = 'red', add = T) # "add = T"
# turns hist into low level plotting
legend('top', fill = c('green', 'red'), legend = c('correct', 'error'), bty = 'n')
# to show some other settings of hist:
hist(d$RT, freq = F, breaks = 30)
lines(density(d$RT), lwd = 2)
# =========================================================================================
## Boxplots are getting popular again, for their quality of indicating data distribution.
boxplot(rnorm(100))
boxplot(ToothGrowth$len ~ ToothGrowth$supp) # many plotting functions also "eat"
# the formula input you know from t.test.
boxplot(ToothGrowth$len ~ ToothGrowth$dose * ToothGrowth$supp)
boxplot(ToothGrowth$len ~ ToothGrowth$dose * ToothGrowth$supp, col = rep(grey.colors(3), 3))