# Lecture 6
# ===================================================================
## (4) Recap question:
subj = letters[1:12]
reward = c(2, 4, 3, 6, 2, 5, 3, 4, 2, 3, 4, 6)
forgotmoney = c(F, F, F, F, F, F, F, T, F, T, F, F)
d3 = data.frame(subj, reward, forgotmoney)
# ===================================================================
## (4) Recap question:
# a) Logical indexing: Who are the subjects that you didn't pay? (of course, use R-code
# to tell the answer)
# b) Making a table: Make a frequency table (table()) that show how many got pay and how
# many didn't
# c) Logical indexing: What is the mean of the rewards that are higher than 3
# d) Integration of some tricks and indexing: Which participants got an even reward?
# e) And what were those rewards?
# f) Use numeric indexing to print admin.data starting from the bottom row. So, first,
# admin.data[12, ], then admin.data[11, ], and so forth.
# g) Select from admin.data only the subject name and the forgotmoney variable,
# for those subjects who had a reward higher than 3.
# h) Add a column to the admin.data called "best" that contains 1 for all subjects who
# scored higher than 3, 0 otherwise. The easiest way to go is to first add a column of
# zero's and then change some to one's.
# challenge exercise:
# ===================================================================
# Load data
setwd("/Users/gdutilh/Dropbox/teaching/R/RHS2016/lecture_6")
load('../lecture_5/materials_lecture_5/data_lecture_5.Rdata')
# compare for each participants, using a t-test, the RT for correct
# and error responses:
pvalues = numeric()
for (i in unique(d$subj)){
tempd = d[d$subj == i,]
t.out = t.test(tempd$RT ~ tempd$correct)
pvalues = c(pvalues, t.out$p.value)
}
# ===================================================================
### arrays: n-dimensional matrices
a = array(1:12, dim = c(3, 4))
m = matrix(1:12, 3, 4)
is(a)
is(m) # they are exactly the same!
# but arrays can, unlike matrices, have more dimensions:
# say, we have 3 participants that are asked 4 questions on
# 7 different days. Our data could look like
alldata = array(NA, dim = c(7, 4, 3)) # e.g., 3 dimensions
# of 7, 4, and 3 levels
alldata
# Let's reate some random data as example:
# sample numbers between 1 and 5, we need 3 * 4 * 7 of them.
some.data = sample(1:5, size = 7 * 4 * 3, replace = TRUE)
# put them into array.
alldata = array(some.data, dim = c(7, 4, 3))
# This is boring, let's give the dimensions and levels thereof
# some names and pretend it is interesting data:
dimnames(alldata) = list('day' = 1:7,
'variable' = c('Q1', 'Q2', 'Q3', 'Q4'),
'participant' = c('A', 'B', 'C'))
# access to values analogue to how you'd do that with matrices.
# There are just more dimensions, so more comma's between [ ]
alldata[2, 4, ] # day two, question 4, both pps
alldata[, c(1, 3), 1] # all days, 1st and 3rd question, pp 1
# Much more insightful, if you want the answer of participant 2
# on question 2 on all days:
alldata[,'Q2','B']
# or even include the dimension name:
alldata['day' = 2, 'variable' = 'Q3', ]
# is the same as
alldata['2', 'Q3', ]
# is the same as
alldata[1, 3, ]
# put values in certain coordinates:
alldata[1:2, 'Q4', 1] = c(33, 44)
alldata
# say, you did something wrong, and entered variable Q1 for pp C
# in the wrong order.
alldata[1:7,'Q1', 3] = alldata[7:1,'Q1', 3]
alldata
# or, to learn a new function on the fly: reverse a vector
alldata[ ,'Q2', 3] = rev(alldata[ ,'Q2', 3])
alldata
# ===================================================================
### apply functions: among the greatest things of R!
### apply()
# say, you want per question (dim 2), per participant (dim 3), the mean answer
apply(alldata, c(2, 3), mean)
apply(alldata, c(2, 3), mean, na.rm = TRUE)
# say, we want per participant, the average answer and its standard
# deviation.
apply(alldata, 3, mean, na.rm = TRUE) #
apply(alldata, 3, sd, na.rm = TRUE) #
# say, we want per question, on day 1, the average answer.
apply(alldata[1, , ], 1, mean)
apply(alldata, c('day', 'variable'), mean, na.rm = TRUE)
data.ppA = alldata[, , 'A']
apply(alldata, c('day', 'variable'), mean, na.rm = TRUE)
apply(alldata, c('participant', 'variable'), mean, na.rm = TRUE)
# ===================================================================
### tapply()
# we take an example data set. Those sets are built in R, see data()
d <- ToothGrowth
head(d)
# which vitamin gives bigger teeth?
# You could do this with a for loop:
for ( i in unique(d$supp)){
print(i)
print(mean(d$len[d$supp==i]))
}
# much easier is:
tapply(d$len, d$supp, mean)
# what has dose for effect?
tapply(d$len, d$dose, mean)
# back to the vitamins and toothgrowth
# we want to dose effect split out for both vitamins!
# see ?tapply
# a little, important trick:
out = tapply(d$len, list(d$supp, d$dose), mean)
# see also
aggregate(d$len, list(d$supp, d$dose), mean)
# this, and my other functions do something similar with a different
# input/ output format.
# In fact, there are many others, including
# sapply() # Recommendable to read about
# lapply() # Recommendable to read about
# mapply() # Recommendable to read about
# with() # I never use this
# by() # I never use this
# ===================================================================