## assignment for lecture 7. To hand in before Friday 11th of November, 18.00
## (1)
# Sample 1000 random normally distributed (mean = 100, sd = 15) IQ scores and assign to IQ
# by writing x1 = rnorm(1000, 100, 15).
# a) Make a histogram of the distribution of IQ, and use 50 breakpoints.
# b) Make it again, but change the limit on the x-axes to run from 30 to 180 and have the
# x-axis label say "Intelligence Quotient".
# c) Add a fat red vertical line at the maximum and minimum value that you sampled.
# =========================================================================================
IQ = rnorm(1000, 100, 15)
# a)
hist(IQ, breaks = 20)
# b)
hist(IQ, breaks = 20, xlim = c(30, 180))
# c)
hist(IQ, breaks = 20, xlim = c(30, 180))
abline(v = range(IQ), col = 2, lwd = 10)
# or
abline(v = c(min(IQ), max(IQ)), col = 2, lwd = 10)
# =========================================================================================
## (2)
# In this question, use the built-in data set ToothGrowth. I would always first assign
# ToohtGrwoth to a variable with a name that you less easily misspell, like d.
# a) Use tapply to calculate the mean of len for each supplement (Orange Juice and Vitamine c)
# crossed with each dose and assign the resulting matrix to variable mean.length.
# b) Use this "mean.length" to plot the mean of "len" for each dose of supplement
# Orange Juice (OJ), with type = 'b' (both points and lines).
# c) Use lines() to add the means for Vitamin C (VC). Again, lty = 'b'.
# d) There is a problem: the line doesn't fit. Change the ylim of your answer of b)
# to make it fit.
# =========================================================================================
d = ToothGrowth
#a)
means = tapply(d$len, list(d$supp, d$dose), mean)
#b)
plot(means[1,], type = 'b')
#c)
lines(means[2,], type = 'b')
#d)
plot(means[1,], type = 'b', ylim = c(8, 26))
lines(means[2,], type = 'b')
# or generic:
plot(means[1,], type = 'b', ylim = range(means))
lines(means[2,], type = 'b')
# =========================================================================================
## (3)
# The plot you made above is not that nice yet.
# Make it again, but now change the following:
# - As you see, the x-axis in your says 1, 2, 3. That is because you probably didn't give
# an x-input, and R just plotted the means on x = 1, 2, and 3. Add the x-input in
# concordance with the doses (0.5, 1, 2, you could get these from dimnames(means))
# - Make the axis labels interesting (set the xlab and ylab arguments of plot())
# - Make the VC line blue and the OJ line orange.
# =========================================================================================
plot(c(0.5, 1, 2), means[1,], type = 'b', ylim = c(8, 26), col = 'blue',
xlab = 'Dose', ylab = 'Length')
lines(c(0.5, 1, 2), means[2,], type = 'b', col = 'orange')
# =========================================================================================
## (4)
# Recreate the figure in assignment7_4_plot.pdf, that is included in the assignment
# materials. To get the values for the points and the line, you should use one of the
# functions that you learned in lecture 2 for creating sequences. You need one call of
# plot() and add the grey lines with one call of abline(h = ..., ...).
# =========================================================================================
x = c(1, 2, 2, 3, 3, 4, 4, 5, 5, 6)
y = rep(5:1, each = 2)
plot(x, y, type = 'l', lwd = 5, main = "No Balls on the Stairs!",
ylab = 'WALL', xlab = 'FLOOR')
points(seq(2.5, 5.5, 1), seq(4.5, 1.5, -1), col = 1:4, cex = 10, pch = 19)
lines(c(1, 6), c(1, 5), col = 'red', lwd = 4)
lines(c(6, 1), c(1, 5), col = 'red', lwd = 4)
# =========================================================================================
## (5)
# Create: x = rnorm(n = 100, mean = 10, sd = 2) and y = x + rnorm(100, 0, 2)
# a) plot y on the y-axis against x on the x-axis (just as points)
# b) Do a linear regression from y on x and assign the output to out, by running:
# out = lm(y ~ x). Add the regression line to the plot running abline(out).
# Make sure that the direction of the axes and the regression line fit together.
# (that was not really a question)
# c) You see, abline can be used in different ways. Summarize which ways there are.
# =========================================================================================
# a)
x = rnorm(n = 100, mean = 10, sd = 2)
y = x + rnorm(100, 0, 2)
plot(x,y)
# b)
out = lm(y~x)
abline(out)
# c)
# You can
# 1) linear function (with intercept a and slope b)
# 2) vertical lines, by saying v = ...
# 3) horizontal lines, by saying h = ...
# 4) input the coefficients in a vector (like option 1, but then a and b in one vector)
# 5) input the output of a linear regression (e.g., from lm(y ~ x))
# =========================================================================================
## (6) Challenge Question
# For this questions, use the the built-in variable ChickWeight.
# This variable contains the growth of some chickens over time.
# I save ChickWeight to d, because I like short names.
d = ChickWeight
# Now, do the following trick:
means = tapply(d$weight, list(d$Chick, d$Time), mean)
# It does actually not make sense to calculate the mean per chicken per day, because
# there is only one measurement per chicken, per day. However, the trick gives you a
# nice matrix of chicken by time, which we are gonna use below. (have a look at means)
# Question:
# run: plot(0, xlim = c(0, 21), ylim = c(0, 400), type = 'n')
# This creates an empty plot (because type = 'n') with the dimensions I set.
# lines() is a function, and can be applied to, e.g., each row of the means you calcu-
# lated above. Do this (apply(chick.means, .., lines)) and see that the lines are added to
# the empty plot. The x-values against which the values are plotted should be the
# column names of "means" (those are the timepoints). You can add "x = " as last
# argument to apply.
# Check the example in the lecture!
# =========================================================================================
d6 = ChickWeight
means = tapply(d6$weight, list(d6$Chick, d6$Time), mean)
plot(0, xlim = c(0, 21), ylim = c(0, 400), type = 'n', xlab = 'Time', ylab = 'Weight')
apply(means, 1, lines, x = dimnames(means)[[2]])
# =========================================================================================