The original code from Statistical Rethinking is shown below.

Code from Statistical Rethinking modified by R Pruim is shown below. Differences to the oringal include:

R code 4.1

pos <- replicate(1000, sum(runif(16, -1, 1)))

R code 4.2

prod(1 + runif(12, 0, 0.1))
## [1] 1.844075

R code 4.3

growth <- replicate(10000, prod(1 + runif(12, 0, 0.1)))
densityplot(~ growth)

R code 4.4

big <- replicate(10000, prod(1 + runif(12, 0, 0.5)))
small <- replicate(10000, prod(1 + runif(12, 0, 0.01)))

R code 4.5

log.big <- replicate(10000, log(prod(1 + runif(12, 0, 0.5))))

R code 4.6

w <- 6
n <- 9

Water9 <-
  expand.grid(
    p = seq(from = 0, to = 1, by = 0.01)) %>%
  mutate(prior = dunif(p, 0, 1),
         likelihood = dbinom(w, n, p),
         posterior.raw = prior * likelihood,
         posterior1 = posterior.raw / sum(posterior.raw),
         posterior = posterior1 / 0.01
  )

xyplot(prior + posterior ~ p, data = Water9, type = "l")

R code 4.7

library(rethinking)
data(Howell1)

R code 4.8

str(Howell1)
## 'data.frame':    544 obs. of  4 variables:
##  $ height: num  152 140 137 157 145 ...
##  $ weight: num  47.8 36.5 31.9 53 41.3 ...
##  $ age   : num  63 63 65 41 51 35 32 27 19 54 ...
##  $ male  : int  1 0 0 1 0 1 0 1 0 1 ...
glimpse(Howell1)
## Observations: 544
## Variables: 4
## $ height <dbl> 151.7650, 139.7000, 136.5250, 156.8450, 145.4150, 163.8...
## $ weight <dbl> 47.82561, 36.48581, 31.86484, 53.04191, 41.27687, 62.99...
## $ age    <dbl> 63.0, 63.0, 65.0, 41.0, 51.0, 35.0, 32.0, 27.0, 19.0, 5...
## $ male   <int> 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1...

R code 4.9

Howell1$height

R code 4.10

HowellAdults <- Howell1 %>% filter(age >= 18)

R code 4.11

plotDist("norm", mean = 178, sd = 20)

R code 4.12

plotDist("unif", min = 0, max = 50)

R code 4.13

PriorSample <- data_frame(
  mu     = rnorm(1e4, 178, 20),
  sigma  = runif(1e4, 0, 50),
  height = rnorm(1e4, mu, sigma)
)
densityplot( ~ height, data = PriorSample)

R code 4.14

llnorm <- function(x, mu, sigma) { 
  sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
# use all of x for each mu-sigma combo
llnorm <- Vectorize(llnorm, c("mu", "sigma"))
                
HowellAdultsGrid <-
  expand.grid(
    mu = seq(from = 140, to = 160, length.out = 200), 
    sigma = seq(from = 4, to = 9, length.out = 200)) %>% 
  mutate(
    logprior = 
      dnorm(mu, 178, 20, log = TRUE) +
      dunif(sigma, 0, 50, log = TRUE),
    loglik = llnorm(HowellAdults$height, mu, sigma),
    posterior.log = logprior + loglik,
    posterior.raw = exp(posterior.log - max(posterior.log)),
    posterior = posterior.raw / sum(posterior.raw)
  )

R code 4.15

contourplot(posterior ~ mu + sigma, data = HowellAdultsGrid)

R code 4.16

levelplot(posterior ~ mu + sigma, data = HowellAdultsGrid, contour = TRUE)

levelplot(posterior.log ~ mu + sigma, data = HowellAdultsGrid, contour = TRUE)

## R code 4.17

PosteriorSample <- 
  sample(HowellAdultsGrid, size = 1e4, replace = TRUE, 
         prob = HowellAdultsGrid$posterior)

R code 4.18

xyplot(sigma ~  mu, data = PosteriorSample,
  cex = 0.5,
  pch = 16,
  col = col.alpha(rangi2, 0.1)
)

ggplot(PosteriorSample, aes(x = mu, y = sigma)) +
  geom_point(alpha = 0.2) +
  geom_density2d() 

ggplot(PosteriorSample, aes(x = mu, y = sigma)) +
  geom_hex() 

R code 4.19

densityplot( ~ mu, data = PosteriorSample)

densityplot( ~ sigma, data = PosteriorSample)

R code 4.20

HPDI(PosteriorSample$mu)
##    |0.89    0.89| 
## 153.8693 155.1759
HPDI(PosteriorSample$sigma)
##    |0.89    0.89| 
## 7.266332 8.195980

R code 4.21

# sample only heights
sample(HowellAdults$height, size = 20)
##  [1] 167.005 158.115 160.020 145.415 160.655 157.480 146.685 146.050
##  [9] 163.830 165.735 144.780 154.940 162.560 170.180 164.465 152.400
## [17] 147.320 149.860 165.100 160.020
# sample complete rows
sample(HowellAdults, size = 20)
##       height   weight  age male orig.id
## 239 149.2250 45.07570 47.0    0     239
## 198 161.2900 50.43376 41.0    1     198
## 236 153.0350 39.97279 49.5    0     236
## 71  156.8450 47.62716 31.0    1      71
## 132 161.9250 56.95415 38.7    1     132
## 296 147.3200 35.55027 66.0    0     296
## 126 156.2100 44.11182 21.0    0     126
## 331 161.3000 43.30000 20.0    1     331
## 134 159.3850 50.17862 63.0    1     134
## 4   156.8450 53.04191 41.0    1       4
## 264 154.3050 48.87454 50.0    0     264
## 170 149.2250 42.12736 18.0    0     170
## 138 141.6050 31.52464 19.0    1     138
## 337 148.2852 38.44192 39.0    0     337
## 80  142.8750 32.71532 73.3    0      80
## 219 152.4000 43.85668 33.0    1     219
## 107 147.3200 36.88270 22.0    0     107
## 89  151.1300 42.26910 48.0    0      89
## 74  146.0500 42.80774 23.0    0      74
## 192 171.1198 56.55725 37.0    1     192

R code 4.22

llnorm <- function(x, mu, sigma) { 
  sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
# use all of x for each mu-sigma combo
llnorm <- Vectorize(llnorm, c("mu", "sigma"))

Howell20 <- sample(HowellAdults, 20)
                
Howell20Grid <-
  expand.grid(
    mu = seq(from = 140, to = 160, by = 0.1),
    sigma = seq(from = 2, to = 12, by = 0.1)) %>% 
  mutate(
    logprior = 
      dnorm(mu, 178, 20, log = TRUE) +
      dunif(sigma, 0, 50, log = TRUE),
    loglik = llnorm(Howell20$height, mu, sigma),
    posterior.log = logprior + loglik,
    posterior.raw = exp(posterior.log - max(posterior.log)),
    posterior = posterior.raw / sum(posterior.raw)
  )

Howell20Posterior <- 
  sample(Howell20Grid, size = 1e4,
         replace = TRUE, 
         prob = Howell20Grid$posterior)

xyplot(sigma ~ mu, data = Howell20Posterior,
  cex = 0.5, alpha = 0.1,
  xlab = expression(mu), ylab = expression(sigma),
  pch = 16
)

ggplot(Howell20Posterior, aes(x = mu, y = sigma)) +
  geom_point(alpha = 0.1) + 
  geom_density2d() +
  labs(x = expression(mu), y = expression(sigma))

R code 4.23

densityplot( ~ sigma, data = Howell20Posterior)

histogram( ~ sigma, data = Howell20Posterior, width = 0.1, fit = "normal")

xqqmath( ~ sigma, data = Howell20Posterior)

R code 4.24

library(rethinking)
data(Howell1)
HowellAdults <- Howell1 %>% filter(age >= 18)

R code 4.25

flist <- 
  alist(
    height ~ dnorm(mu, sigma),
    mu ~ dnorm(178, 20),
    sigma ~ dunif(0, 50))

R code 4.26

m4.1 <- map(flist, data = HowellAdults)

R code 4.27

precis(m4.1)
##         Mean StdDev   5.5%  94.5%
## mu    154.61   0.41 153.95 155.27
## sigma   7.73   0.29   7.27   8.20

R code 4.28

start <- 
  list(
    mu  = mean( ~ height, data = HowellAdults),
    sigma = sd( ~ height, data = HowellAdults))

R code 4.29

m4.2 <- map(alist(height ~ dnorm(mu, sigma),
                      mu ~ dnorm(178, 0.1),
                   sigma ~ dunif(0, 50)),
            data = HowellAdults)
precis(m4.2)
##         Mean StdDev   5.5%  94.5%
## mu    177.86   0.10 177.70 178.02
## sigma  24.52   0.93  23.03  26.00

R code 4.30

vcov(m4.1)
##                 mu        sigma
## mu    0.1697396519 0.0002180449
## sigma 0.0002180449 0.0849058736

R code 4.31

diag(vcov(m4.1))
##         mu      sigma 
## 0.16973965 0.08490587
cov2cor(vcov(m4.1))
##                mu       sigma
## mu    1.000000000 0.001816291
## sigma 0.001816291 1.000000000

R code 4.32

library(rethinking)
post <- extract.samples(m4.1, n = 1e4)
head(post)
##         mu    sigma
## 1 154.0952 7.818999
## 2 154.0870 7.547863
## 3 154.2969 7.712996
## 4 154.6555 7.995772
## 5 154.6835 7.668860
## 6 153.7837 7.678249

R code 4.33

precis(post)
##         Mean StdDev  |0.89  0.89|
## mu    154.61   0.42 153.91 155.23
## sigma   7.73   0.29   7.28   8.20

R code 4.34

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
post <- mvrnorm(n = 1e4,
                mu = coef(m4.1),
                Sigma = vcov(m4.1))

R code 4.5

m4.1_logsigma <- 
  map(
    alist(
      height ~ dnorm(mu, exp(log_sigma)),
      mu ~ dnorm(178, 20),
      log_sigma ~ dnorm(2, 10)
    ), data = HowellAdults)

R code 4.36

m4.1_logsimga.post <- 
  extract.samples(m4.1_logsigma) %>%
  mutate(sigma = exp(log_sigma))

Section 4.4

R code 4.37

xyplot(height ~ weight, data = HowellAdults)

R code 4.38

# load data again, since it's a long way back
library(rethinking)
data(Howell1)
HowellAdults <-
  Howell1 %>% filter(age > 18)

# fit model
m4.3 <- 
  map(
    alist(
      height ~ dnorm(mu, sigma),
      mu <- a + b * weight,
      a ~ dnorm(156, 100),
      b ~ dnorm(0, 10),
      sigma ~ dunif(0, 50)
    ),
    data = HowellAdults)

R code 4.39

# but note: this doesn't work with link()!
m4.3a <- map(alist(
  height ~ dnorm(a + b * weight, sigma),
  a ~ dnorm(178, 100),
  b ~ dnorm(0, 10),
  sigma ~ dunif(0, 50)
),
data = HowellAdults)

R code 4.40

precis(m4.3)
##         Mean StdDev   5.5%  94.5%
## a     113.82   1.94 110.73 116.92
## b       0.91   0.04   0.84   0.97
## sigma   5.11   0.19   4.80   5.42

R code 4.41

precis(m4.3, corr = TRUE)
##         Mean StdDev   5.5%  94.5%     a     b sigma
## a     113.82   1.94 110.73 116.92  1.00 -0.99     0
## b       0.91   0.04   0.84   0.97 -0.99  1.00     0
## sigma   5.11   0.19   4.80   5.42  0.00  0.00     1

R code 4.42

Centering weight:

HowellAdults <- 
  HowellAdults %>% 
  mutate(weight.c = weight - mean(weight))

R code 4.43

m4.4 <- 
  map(
    alist(
      height ~ dnorm(mu, sigma),
      mu <- a + b * weight.c,
      a ~ dnorm(178, 100),
      b ~ dnorm(0, 10),
      sigma ~ dunif(0, 50)
    ),
    data = HowellAdults)

R code 4.44

precis(m4.4, corr = TRUE)
##         Mean StdDev   5.5%  94.5% a b sigma
## a     154.64   0.27 154.21 155.08 1 0     0
## b       0.91   0.04   0.84   0.97 0 1     0
## sigma   5.11   0.19   4.80   5.42 0 0     1

R code 4.45

# lattice with custom panel function to overlay multiple things
xyplot(height ~ weight, data = HowellAdults,
       panel = function(x, y, ...) {
         panel.abline(coef(m4.3))
         panel.xyplot(x, y, ...)
       }
)

# using plotFun() with add = TRUE to plot a function on top of points
xyplot(height ~ weight, data = HowellAdults)
plotFun(a + b * x ~ x, a = coef(m4.3)["a"], b = coef(m4.3)["b"], add = TRUE, col = "red")

# creating a separate function for use with plotFun()
xyplot(height ~ weight, data = HowellAdults)

line.fit <- function(x, a = coef(m4.3)["a"], b = coef(m4.3)["b"]) {
  a + b * x
}
plotFun(line.fit(height) ~ height, add = TRUE, col = "red")

R code 4.46

m4.3.post <- extract.samples(m4.3)

R code 4.47

head(m4.3.post)
##          a         b    sigma
## 1 114.1036 0.8953189 5.037232
## 2 110.9993 0.9517947 5.118368
## 3 113.3171 0.9219457 5.072727
## 4 113.6348 0.9145692 4.963323
## 5 116.4568 0.8592025 5.242424
## 6 113.4839 0.9185361 5.055407

## R code 4.48

HowellSmall <- Howell1 %>% sample(10)
m4.4small <- 
  map(alist(
    height ~ dnorm(mu, sigma),
    mu <- a + b * weight,
    a ~ dnorm(178, 100),
    b ~ dnorm(0, 10),
    sigma ~ dunif(0, 50)
  ),
  data = HowellSmall)

R code 4.49

# extract 50 samples from the posterior
m4.4small.post <- extract.samples(m4.4small, n = 50)

# display raw data and sample size
xyplot(height ~ weight, data = HowellSmall,
       xlim = c(30, 65),
       ylim = c(130, 180),
       col = rangi2,
       main = concat("N = ", 10),
       panel = function(x, y, ...) {
         panel.xyplot(x, y, ...)
         # plot the lines, with transparency
         for (i in 1:nrow(m4.4small.post)) {
           panel.abline(a = m4.4small.post$a[i], b = m4.4small.post$b[i], col = "black",
                        alpha = 0.1)
         }
       }
)

R code 4.50

m4.4small.post <-
  m4.4small.post %>%
  mutate(mu_at_50 = a + b * 50)

R code 4.51

densityplot( ~ mu_at_50, data = m4.4small.post,
  col = rangi2, lwd = 2, xlab = expression(paste(mu, " | weight=50")))

R code 4.52

HPDI(m4.4small.post$mu_at_50, prob = 0.89)
##    |0.89    0.89| 
## 154.9138 163.1765

R code 4.53

mu <- link(m4.3)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
##  num [1:1000, 1:346] 157 158 157 157 158 ...

R code 4.54

# define sequence of weights to compute predictions for
# these values will be on the horizontal axis
m4.3.pred <-
  data.frame(weight = seq(from = 25, to = 70, by = 1))

# use link to compute mu
# for each sample from posterior
# and for each weight in weight.seq
mu <- 
  link(
    m4.3, 
    data = m4.3.pred,
  )
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
##  num [1:1000, 1:46] 137 138 137 138 137 ...

R code 4.55

# The shape of mu coming out of link() is backwards for lattice
# t() flips rows and columns to make it work
xyplot(t(mu) ~ weight, data = m4.3.pred, alpha = 0.1, col = rangi2, pch = 16)

R code 4.56

# summarize the distribution of mu
mu.mean <- apply(mu, 2, mean)
mu.HPDI <- apply(mu, 2, HPDI, prob = 0.89)

R code 4.57

# plot raw data
# fading out points to make line and interval more visible
xyplot(height ~ weight, data = HowellAdults, col = rangi2, alpha = 0.5)

# plot the MAP line, aka the mean mu for each weight
plotPoints(mu.mean ~ weight, data = m4.3.pred, type = "l", add = TRUE)

# plot a shaded region for 89% HPDI
# need to figure out easiest way to do this in lattice
# author's shade() function only works with his base graphics

R code 4.58

Here’s roughly how link() works:

post <- extract.samples(m4.3)
mu.link <- function(weight)
  post$a + post$b * weight
weight.seq <- seq(from = 25, to = 70, by = 1)
mu <- sapply(weight.seq, mu.link)
mu.mean <- apply(mu, 2, mean)
mu.HPDI <- apply(mu, 2, HPDI, prob = 0.89)

R code 4.59

sim.height <- sim(m4.3, data = m4.3.pred)  
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(sim.height)
##  num [1:1000, 1:46] 140 137 148 141 144 ...

R code 4.60

height.PI <- apply(sim.height, 2, PI, prob = 0.89)

R code 4.61

# plot raw data
xyplot(height ~ weight, HowellAdults, col = rangi2, alpha = 0.5)

# draw MAP line
plotPoints(mu.mean ~ weight, data = m4.3.pred, 
           col = "blue", type = "l", add = TRUE)

# need a replacement for shade() that works with lattice.
# draw HPDI region for line
# shade(mu.HPDI, weight.seq)
plotPoints(mu.HPDI[1,] ~ weight, data = m4.3.pred, 
           col = "navy", type = "l", add = TRUE)
plotPoints(mu.HPDI[2,] ~ weight, data = m4.3.pred, 
           col = "navy", type = "l", add = TRUE)

# draw PI region for simulated heights
# shade(height.PI, weight.seq)
plotPoints(height.PI[1, ] ~ weight, data = m4.3.pred, 
           col = "red", type = "l", add = TRUE)
plotPoints(height.PI[2, ] ~ weight, data = m4.3.pred, 
           col = "red", type = "l", add = TRUE)

R code 4.62

sim.height <- sim(m4.3, data = m4.3.pred, n = 1e4)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
height.PI <- apply(sim.height, 2, PI, prob = 0.89)

R code 4.63

This is roughly how sim() works.

post <- extract.samples(m4.3)
weight.seq <- 25:70
sim.height <- sapply(weight.seq, function(weight)
  rnorm(
    n = nrow(post),
    mean = post$a + post$b * weight,
    sd = post$sigma
  ))
height.PI <- apply(sim.height, 2, PI, prob = 0.89)

R code 4.64

library(rethinking)
data(Howell1)
str(Howell1)
## 'data.frame':    544 obs. of  4 variables:
##  $ height: num  152 140 137 157 145 ...
##  $ weight: num  47.8 36.5 31.9 53 41.3 ...
##  $ age   : num  63 63 65 41 51 35 32 27 19 54 ...
##  $ male  : int  1 0 0 1 0 1 0 1 0 1 ...

R code 4.65

require(mosaic)  # for zscore()
Howell1 <-
  Howell1 %>%
  mutate(
    weight.s = zscore(weight),
    weight.s2 = weight.s^2
  )

R code 4.66

m4.5 <- map(
  alist(
    height ~ dnorm(mu, sigma),
    mu <- a + b1 * weight.s + b2 * weight.s2,
    a ~ dnorm(178, 100),
    b1 ~ dnorm(0, 10),
    b2 ~ dnorm(0, 10),
    sigma ~ dunif(0, 50)
  ),
  data = Howell1
)

R code 4.67

precis(m4.5)
##         Mean StdDev   5.5%  94.5%
## a     146.66   0.37 146.07 147.26
## b1     21.40   0.29  20.94  21.86
## b2     -8.42   0.28  -8.86  -7.97
## sigma   5.75   0.17   5.47   6.03

R code 4.68

m4.5.pred <-
  data_frame(
    weight.s = seq(from = -2.2, to = 2, length.out = 30),
    weight.s2 = weight.s^2
  )
mu <- link(m4.5, data = m4.5.pred)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
sim.height <- sim(m4.5, data = m4.5.pred)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.PI <- apply(mu, 2, PI, prob = 0.89)
m4.5.pred <-
  m4.5.pred %>%
  mutate(
    mu.mean = apply(mu, 2, mean),
    mu.lo = apply(mu, 2, PI)[1,],
    mu.hi = apply(mu, 2, PI)[2,],
    sim.lo = apply(sim.height, 2, PI)[1,],
    sim.hi = apply(sim.height, 2, PI)[2,]
    )

R code 4.69

xyplot(sim.hi + mu.hi + mu.mean + mu.lo + sim.lo ~ weight.s, 
       data = m4.5.pred,  type = "l", auto.key = list(lines = TRUE, points = FALSE))

plotPoints(height ~ weight.s, data = Howell1, col = rangi2, alpha = 0.5, add = TRUE)

ggplot(aes(x = weight.s), data = m4.5.pred) +
  geom_point(aes(y = height), data = Howell1, color = rangi2) +
  geom_line(aes(y = mu.lo), color = "navy") +
  geom_line(aes(y = mu.hi), color = "navy") +
  geom_line(aes(y = sim.lo), color = "red") +
  geom_line(aes(y = sim.hi), color = "red") 

R code 4.70

Howell1 <- 
  Howell1 %>% mutate(weight.s3 = weight.s^3)

m4.6 <- map(
  alist(
    height ~ dnorm(mu, sigma),
    mu <- a + b1 * weight.s + b2 * weight.s2 + b3 * weight.s3,
    a ~ dnorm(178, 100),
    b1 ~ dnorm(0, 10),
    b2 ~ dnorm(0, 10),
    b3 ~ dnorm(0, 10),
    sigma ~ dunif(0, 50)
  ),
  data = Howell1
)

R code 4.71

xyplot(
  height ~ weight, data = Howell1,
  col = rangi2, alpha = 0.5)

R code 4.72

xyplot(height ~ weight, data = Howell1, 
       col = rangi2,
       scales=list(
         x = list(at =  round((-2:2) * sd(Howell1$weight) + mean(Howell1$weight), 1))
       )
)

## R code 4.73

xyplot(height ~ weight, data = Howell1,
       col = rangi2, alpha = 0.4)