The original code from Statistical Rethinking is shown below.
Code from Statistical Rethinking modified by R Pruim is shown below. Differences to the oringal include:
lattice
and ggplot2
rather than base graphicstidyverse
for data transformationpos <- replicate(1000, sum(runif(16, -1, 1)))
prod(1 + runif(12, 0, 0.1))
## [1] 1.844075
growth <- replicate(10000, prod(1 + runif(12, 0, 0.1)))
densityplot(~ growth)
big <- replicate(10000, prod(1 + runif(12, 0, 0.5)))
small <- replicate(10000, prod(1 + runif(12, 0, 0.01)))
log.big <- replicate(10000, log(prod(1 + runif(12, 0, 0.5))))
w <- 6
n <- 9
Water9 <-
expand.grid(
p = seq(from = 0, to = 1, by = 0.01)) %>%
mutate(prior = dunif(p, 0, 1),
likelihood = dbinom(w, n, p),
posterior.raw = prior * likelihood,
posterior1 = posterior.raw / sum(posterior.raw),
posterior = posterior1 / 0.01
)
xyplot(prior + posterior ~ p, data = Water9, type = "l")
library(rethinking)
data(Howell1)
str(Howell1)
## 'data.frame': 544 obs. of 4 variables:
## $ height: num 152 140 137 157 145 ...
## $ weight: num 47.8 36.5 31.9 53 41.3 ...
## $ age : num 63 63 65 41 51 35 32 27 19 54 ...
## $ male : int 1 0 0 1 0 1 0 1 0 1 ...
glimpse(Howell1)
## Observations: 544
## Variables: 4
## $ height <dbl> 151.7650, 139.7000, 136.5250, 156.8450, 145.4150, 163.8...
## $ weight <dbl> 47.82561, 36.48581, 31.86484, 53.04191, 41.27687, 62.99...
## $ age <dbl> 63.0, 63.0, 65.0, 41.0, 51.0, 35.0, 32.0, 27.0, 19.0, 5...
## $ male <int> 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1...
Howell1$height
HowellAdults <- Howell1 %>% filter(age >= 18)
plotDist("norm", mean = 178, sd = 20)
plotDist("unif", min = 0, max = 50)
PriorSample <- data_frame(
mu = rnorm(1e4, 178, 20),
sigma = runif(1e4, 0, 50),
height = rnorm(1e4, mu, sigma)
)
densityplot( ~ height, data = PriorSample)
llnorm <- function(x, mu, sigma) {
sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
# use all of x for each mu-sigma combo
llnorm <- Vectorize(llnorm, c("mu", "sigma"))
HowellAdultsGrid <-
expand.grid(
mu = seq(from = 140, to = 160, length.out = 200),
sigma = seq(from = 4, to = 9, length.out = 200)) %>%
mutate(
logprior =
dnorm(mu, 178, 20, log = TRUE) +
dunif(sigma, 0, 50, log = TRUE),
loglik = llnorm(HowellAdults$height, mu, sigma),
posterior.log = logprior + loglik,
posterior.raw = exp(posterior.log - max(posterior.log)),
posterior = posterior.raw / sum(posterior.raw)
)
contourplot(posterior ~ mu + sigma, data = HowellAdultsGrid)
levelplot(posterior ~ mu + sigma, data = HowellAdultsGrid, contour = TRUE)
levelplot(posterior.log ~ mu + sigma, data = HowellAdultsGrid, contour = TRUE)
## R code 4.17
PosteriorSample <-
sample(HowellAdultsGrid, size = 1e4, replace = TRUE,
prob = HowellAdultsGrid$posterior)
xyplot(sigma ~ mu, data = PosteriorSample,
cex = 0.5,
pch = 16,
col = col.alpha(rangi2, 0.1)
)
ggplot(PosteriorSample, aes(x = mu, y = sigma)) +
geom_point(alpha = 0.2) +
geom_density2d()
ggplot(PosteriorSample, aes(x = mu, y = sigma)) +
geom_hex()
densityplot( ~ mu, data = PosteriorSample)
densityplot( ~ sigma, data = PosteriorSample)
HPDI(PosteriorSample$mu)
## |0.89 0.89|
## 153.8693 155.1759
HPDI(PosteriorSample$sigma)
## |0.89 0.89|
## 7.266332 8.195980
# sample only heights
sample(HowellAdults$height, size = 20)
## [1] 167.005 158.115 160.020 145.415 160.655 157.480 146.685 146.050
## [9] 163.830 165.735 144.780 154.940 162.560 170.180 164.465 152.400
## [17] 147.320 149.860 165.100 160.020
# sample complete rows
sample(HowellAdults, size = 20)
## height weight age male orig.id
## 239 149.2250 45.07570 47.0 0 239
## 198 161.2900 50.43376 41.0 1 198
## 236 153.0350 39.97279 49.5 0 236
## 71 156.8450 47.62716 31.0 1 71
## 132 161.9250 56.95415 38.7 1 132
## 296 147.3200 35.55027 66.0 0 296
## 126 156.2100 44.11182 21.0 0 126
## 331 161.3000 43.30000 20.0 1 331
## 134 159.3850 50.17862 63.0 1 134
## 4 156.8450 53.04191 41.0 1 4
## 264 154.3050 48.87454 50.0 0 264
## 170 149.2250 42.12736 18.0 0 170
## 138 141.6050 31.52464 19.0 1 138
## 337 148.2852 38.44192 39.0 0 337
## 80 142.8750 32.71532 73.3 0 80
## 219 152.4000 43.85668 33.0 1 219
## 107 147.3200 36.88270 22.0 0 107
## 89 151.1300 42.26910 48.0 0 89
## 74 146.0500 42.80774 23.0 0 74
## 192 171.1198 56.55725 37.0 1 192
llnorm <- function(x, mu, sigma) {
sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
# use all of x for each mu-sigma combo
llnorm <- Vectorize(llnorm, c("mu", "sigma"))
Howell20 <- sample(HowellAdults, 20)
Howell20Grid <-
expand.grid(
mu = seq(from = 140, to = 160, by = 0.1),
sigma = seq(from = 2, to = 12, by = 0.1)) %>%
mutate(
logprior =
dnorm(mu, 178, 20, log = TRUE) +
dunif(sigma, 0, 50, log = TRUE),
loglik = llnorm(Howell20$height, mu, sigma),
posterior.log = logprior + loglik,
posterior.raw = exp(posterior.log - max(posterior.log)),
posterior = posterior.raw / sum(posterior.raw)
)
Howell20Posterior <-
sample(Howell20Grid, size = 1e4,
replace = TRUE,
prob = Howell20Grid$posterior)
xyplot(sigma ~ mu, data = Howell20Posterior,
cex = 0.5, alpha = 0.1,
xlab = expression(mu), ylab = expression(sigma),
pch = 16
)
ggplot(Howell20Posterior, aes(x = mu, y = sigma)) +
geom_point(alpha = 0.1) +
geom_density2d() +
labs(x = expression(mu), y = expression(sigma))
densityplot( ~ sigma, data = Howell20Posterior)
histogram( ~ sigma, data = Howell20Posterior, width = 0.1, fit = "normal")
xqqmath( ~ sigma, data = Howell20Posterior)
library(rethinking)
data(Howell1)
HowellAdults <- Howell1 %>% filter(age >= 18)
flist <-
alist(
height ~ dnorm(mu, sigma),
mu ~ dnorm(178, 20),
sigma ~ dunif(0, 50))
m4.1 <- map(flist, data = HowellAdults)
precis(m4.1)
## Mean StdDev 5.5% 94.5%
## mu 154.61 0.41 153.95 155.27
## sigma 7.73 0.29 7.27 8.20
start <-
list(
mu = mean( ~ height, data = HowellAdults),
sigma = sd( ~ height, data = HowellAdults))
m4.2 <- map(alist(height ~ dnorm(mu, sigma),
mu ~ dnorm(178, 0.1),
sigma ~ dunif(0, 50)),
data = HowellAdults)
precis(m4.2)
## Mean StdDev 5.5% 94.5%
## mu 177.86 0.10 177.70 178.02
## sigma 24.52 0.93 23.03 26.00
vcov(m4.1)
## mu sigma
## mu 0.1697396519 0.0002180449
## sigma 0.0002180449 0.0849058736
diag(vcov(m4.1))
## mu sigma
## 0.16973965 0.08490587
cov2cor(vcov(m4.1))
## mu sigma
## mu 1.000000000 0.001816291
## sigma 0.001816291 1.000000000
library(rethinking)
post <- extract.samples(m4.1, n = 1e4)
head(post)
## mu sigma
## 1 154.0952 7.818999
## 2 154.0870 7.547863
## 3 154.2969 7.712996
## 4 154.6555 7.995772
## 5 154.6835 7.668860
## 6 153.7837 7.678249
precis(post)
## Mean StdDev |0.89 0.89|
## mu 154.61 0.42 153.91 155.23
## sigma 7.73 0.29 7.28 8.20
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
post <- mvrnorm(n = 1e4,
mu = coef(m4.1),
Sigma = vcov(m4.1))
m4.1_logsigma <-
map(
alist(
height ~ dnorm(mu, exp(log_sigma)),
mu ~ dnorm(178, 20),
log_sigma ~ dnorm(2, 10)
), data = HowellAdults)
m4.1_logsimga.post <-
extract.samples(m4.1_logsigma) %>%
mutate(sigma = exp(log_sigma))
xyplot(height ~ weight, data = HowellAdults)
# load data again, since it's a long way back
library(rethinking)
data(Howell1)
HowellAdults <-
Howell1 %>% filter(age > 18)
# fit model
m4.3 <-
map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b * weight,
a ~ dnorm(156, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = HowellAdults)
# but note: this doesn't work with link()!
m4.3a <- map(alist(
height ~ dnorm(a + b * weight, sigma),
a ~ dnorm(178, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = HowellAdults)
precis(m4.3)
## Mean StdDev 5.5% 94.5%
## a 113.82 1.94 110.73 116.92
## b 0.91 0.04 0.84 0.97
## sigma 5.11 0.19 4.80 5.42
precis(m4.3, corr = TRUE)
## Mean StdDev 5.5% 94.5% a b sigma
## a 113.82 1.94 110.73 116.92 1.00 -0.99 0
## b 0.91 0.04 0.84 0.97 -0.99 1.00 0
## sigma 5.11 0.19 4.80 5.42 0.00 0.00 1
Centering weight
:
HowellAdults <-
HowellAdults %>%
mutate(weight.c = weight - mean(weight))
m4.4 <-
map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b * weight.c,
a ~ dnorm(178, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = HowellAdults)
precis(m4.4, corr = TRUE)
## Mean StdDev 5.5% 94.5% a b sigma
## a 154.64 0.27 154.21 155.08 1 0 0
## b 0.91 0.04 0.84 0.97 0 1 0
## sigma 5.11 0.19 4.80 5.42 0 0 1
# lattice with custom panel function to overlay multiple things
xyplot(height ~ weight, data = HowellAdults,
panel = function(x, y, ...) {
panel.abline(coef(m4.3))
panel.xyplot(x, y, ...)
}
)
# using plotFun() with add = TRUE to plot a function on top of points
xyplot(height ~ weight, data = HowellAdults)
plotFun(a + b * x ~ x, a = coef(m4.3)["a"], b = coef(m4.3)["b"], add = TRUE, col = "red")
# creating a separate function for use with plotFun()
xyplot(height ~ weight, data = HowellAdults)
line.fit <- function(x, a = coef(m4.3)["a"], b = coef(m4.3)["b"]) {
a + b * x
}
plotFun(line.fit(height) ~ height, add = TRUE, col = "red")
m4.3.post <- extract.samples(m4.3)
head(m4.3.post)
## a b sigma
## 1 114.1036 0.8953189 5.037232
## 2 110.9993 0.9517947 5.118368
## 3 113.3171 0.9219457 5.072727
## 4 113.6348 0.9145692 4.963323
## 5 116.4568 0.8592025 5.242424
## 6 113.4839 0.9185361 5.055407
## R code 4.48
HowellSmall <- Howell1 %>% sample(10)
m4.4small <-
map(alist(
height ~ dnorm(mu, sigma),
mu <- a + b * weight,
a ~ dnorm(178, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = HowellSmall)
# extract 50 samples from the posterior
m4.4small.post <- extract.samples(m4.4small, n = 50)
# display raw data and sample size
xyplot(height ~ weight, data = HowellSmall,
xlim = c(30, 65),
ylim = c(130, 180),
col = rangi2,
main = concat("N = ", 10),
panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
# plot the lines, with transparency
for (i in 1:nrow(m4.4small.post)) {
panel.abline(a = m4.4small.post$a[i], b = m4.4small.post$b[i], col = "black",
alpha = 0.1)
}
}
)
m4.4small.post <-
m4.4small.post %>%
mutate(mu_at_50 = a + b * 50)
densityplot( ~ mu_at_50, data = m4.4small.post,
col = rangi2, lwd = 2, xlab = expression(paste(mu, " | weight=50")))
HPDI(m4.4small.post$mu_at_50, prob = 0.89)
## |0.89 0.89|
## 154.9138 163.1765
mu <- link(m4.3)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
## num [1:1000, 1:346] 157 158 157 157 158 ...
# define sequence of weights to compute predictions for
# these values will be on the horizontal axis
m4.3.pred <-
data.frame(weight = seq(from = 25, to = 70, by = 1))
# use link to compute mu
# for each sample from posterior
# and for each weight in weight.seq
mu <-
link(
m4.3,
data = m4.3.pred,
)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
## num [1:1000, 1:46] 137 138 137 138 137 ...
# The shape of mu coming out of link() is backwards for lattice
# t() flips rows and columns to make it work
xyplot(t(mu) ~ weight, data = m4.3.pred, alpha = 0.1, col = rangi2, pch = 16)
# summarize the distribution of mu
mu.mean <- apply(mu, 2, mean)
mu.HPDI <- apply(mu, 2, HPDI, prob = 0.89)
# plot raw data
# fading out points to make line and interval more visible
xyplot(height ~ weight, data = HowellAdults, col = rangi2, alpha = 0.5)
# plot the MAP line, aka the mean mu for each weight
plotPoints(mu.mean ~ weight, data = m4.3.pred, type = "l", add = TRUE)
# plot a shaded region for 89% HPDI
# need to figure out easiest way to do this in lattice
# author's shade() function only works with his base graphics
Here’s roughly how link()
works:
post <- extract.samples(m4.3)
mu.link <- function(weight)
post$a + post$b * weight
weight.seq <- seq(from = 25, to = 70, by = 1)
mu <- sapply(weight.seq, mu.link)
mu.mean <- apply(mu, 2, mean)
mu.HPDI <- apply(mu, 2, HPDI, prob = 0.89)
sim.height <- sim(m4.3, data = m4.3.pred)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(sim.height)
## num [1:1000, 1:46] 140 137 148 141 144 ...
height.PI <- apply(sim.height, 2, PI, prob = 0.89)
# plot raw data
xyplot(height ~ weight, HowellAdults, col = rangi2, alpha = 0.5)
# draw MAP line
plotPoints(mu.mean ~ weight, data = m4.3.pred,
col = "blue", type = "l", add = TRUE)
# need a replacement for shade() that works with lattice.
# draw HPDI region for line
# shade(mu.HPDI, weight.seq)
plotPoints(mu.HPDI[1,] ~ weight, data = m4.3.pred,
col = "navy", type = "l", add = TRUE)
plotPoints(mu.HPDI[2,] ~ weight, data = m4.3.pred,
col = "navy", type = "l", add = TRUE)
# draw PI region for simulated heights
# shade(height.PI, weight.seq)
plotPoints(height.PI[1, ] ~ weight, data = m4.3.pred,
col = "red", type = "l", add = TRUE)
plotPoints(height.PI[2, ] ~ weight, data = m4.3.pred,
col = "red", type = "l", add = TRUE)
sim.height <- sim(m4.3, data = m4.3.pred, n = 1e4)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
height.PI <- apply(sim.height, 2, PI, prob = 0.89)
This is roughly how sim()
works.
post <- extract.samples(m4.3)
weight.seq <- 25:70
sim.height <- sapply(weight.seq, function(weight)
rnorm(
n = nrow(post),
mean = post$a + post$b * weight,
sd = post$sigma
))
height.PI <- apply(sim.height, 2, PI, prob = 0.89)
library(rethinking)
data(Howell1)
str(Howell1)
## 'data.frame': 544 obs. of 4 variables:
## $ height: num 152 140 137 157 145 ...
## $ weight: num 47.8 36.5 31.9 53 41.3 ...
## $ age : num 63 63 65 41 51 35 32 27 19 54 ...
## $ male : int 1 0 0 1 0 1 0 1 0 1 ...
require(mosaic) # for zscore()
Howell1 <-
Howell1 %>%
mutate(
weight.s = zscore(weight),
weight.s2 = weight.s^2
)
m4.5 <- map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b1 * weight.s + b2 * weight.s2,
a ~ dnorm(178, 100),
b1 ~ dnorm(0, 10),
b2 ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = Howell1
)
precis(m4.5)
## Mean StdDev 5.5% 94.5%
## a 146.66 0.37 146.07 147.26
## b1 21.40 0.29 20.94 21.86
## b2 -8.42 0.28 -8.86 -7.97
## sigma 5.75 0.17 5.47 6.03
m4.5.pred <-
data_frame(
weight.s = seq(from = -2.2, to = 2, length.out = 30),
weight.s2 = weight.s^2
)
mu <- link(m4.5, data = m4.5.pred)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
sim.height <- sim(m4.5, data = m4.5.pred)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.PI <- apply(mu, 2, PI, prob = 0.89)
m4.5.pred <-
m4.5.pred %>%
mutate(
mu.mean = apply(mu, 2, mean),
mu.lo = apply(mu, 2, PI)[1,],
mu.hi = apply(mu, 2, PI)[2,],
sim.lo = apply(sim.height, 2, PI)[1,],
sim.hi = apply(sim.height, 2, PI)[2,]
)
xyplot(sim.hi + mu.hi + mu.mean + mu.lo + sim.lo ~ weight.s,
data = m4.5.pred, type = "l", auto.key = list(lines = TRUE, points = FALSE))
plotPoints(height ~ weight.s, data = Howell1, col = rangi2, alpha = 0.5, add = TRUE)
ggplot(aes(x = weight.s), data = m4.5.pred) +
geom_point(aes(y = height), data = Howell1, color = rangi2) +
geom_line(aes(y = mu.lo), color = "navy") +
geom_line(aes(y = mu.hi), color = "navy") +
geom_line(aes(y = sim.lo), color = "red") +
geom_line(aes(y = sim.hi), color = "red")
Howell1 <-
Howell1 %>% mutate(weight.s3 = weight.s^3)
m4.6 <- map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b1 * weight.s + b2 * weight.s2 + b3 * weight.s3,
a ~ dnorm(178, 100),
b1 ~ dnorm(0, 10),
b2 ~ dnorm(0, 10),
b3 ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = Howell1
)
xyplot(
height ~ weight, data = Howell1,
col = rangi2, alpha = 0.5)
xyplot(height ~ weight, data = Howell1,
col = rangi2,
scales=list(
x = list(at = round((-2:2) * sd(Howell1$weight) + mean(Howell1$weight), 1))
)
)
## R code 4.73
xyplot(height ~ weight, data = Howell1,
col = rangi2, alpha = 0.4)