Statistical Rethinking (Code)

Code from Statistical Rethinking modified by R Pruim is shown below. Differences to the oringal include:

a preference for putting data into containers (data frames, mostly), rather than working with lose vectors.
use of ggplot2 (via ggformula) rather than base graphics
use of tidyverse for data transformation
better (in my opinion) naming conventions

R Code 10.1

library(rethinking)
data(chimpanzees)
Chimps <- chimpanzees %>% 
  mutate( combo = paste0(prosoc_left, "/", condition) )  # used for plotting later

R Code 10.2

m10.1 <- 
  map(
    alist(pulled_left ~ dbinom(1, p),
          logit(p) <- a,
          a ~ dnorm(0, 10)),
    data = Chimps)
precis(m10.1)

##   Mean StdDev 5.5% 94.5%
## a 0.32   0.09 0.18  0.46

R Code 10.3

# two different ways to get the inverse of the logit function
logistic(c(0.18, 0.46))

## [1] 0.5448789 0.6130142

ilogit(c(0.18, 0.46))

## [1] 0.5448789 0.6130142

R Code 10.4

m10.2 <- 
  map(
    alist(
      pulled_left ~ dbinom(1, p),
      logit(p) <- a + bp * prosoc_left,
      a ~ dnorm(0, 10),
      bp ~ dnorm(0, 10)
    ),
    data = Chimps)
m10.3 <- 
  map(
    alist(
      pulled_left ~ dbinom(1, p),
      logit(p) <- a + (bp + bpC * condition) * prosoc_left,
      a ~ dnorm(0, 10),
      bp ~ dnorm(0, 10),
      bpC ~ dnorm(0, 10)
    ),
    data = Chimps)

R Code 10.5

compare(m10.1, m10.2, m10.3)

##        WAIC pWAIC dWAIC weight   SE  dSE
## m10.2 680.4   1.9   0.0   0.71 9.32   NA
## m10.3 682.3   3.0   2.0   0.27 9.28 0.79
## m10.1 687.9   1.0   7.6   0.02 7.20 6.11

R Code 10.6

precis(m10.3)

##      Mean StdDev  5.5% 94.5%
## a    0.05   0.13 -0.15  0.25
## bp   0.61   0.23  0.25  0.97
## bpC -0.10   0.26 -0.53  0.32

R Code 10.7

exp(0.61)

## [1] 1.840431

R Code 10.8

logistic(4)

## [1] 0.9820138

R Code 10.9

logistic(4 + 0.61)

## [1] 0.9901462

R Code 10.10

# dummy data for predictions across treatments
Chimps.pred <- 
  data_frame(
    prosoc_left = c(0, 1, 0, 1),  # right/left/right/left
    condition = c(0, 0, 1, 1),    # control/control/partner/partner
    combo = paste0(prosoc_left, "/", condition)   # used for plotting later
  )

# build prediction ensemble
chimps.ensemble <- ensemble(m10.1, m10.2, m10.3, data = Chimps.pred, refresh = 0)

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

# summarize
Chimps.pred <-
  Chimps.pred %>% 
  mutate( 
    pred.p    = apply(chimps.ensemble$link, 2, mean),
    pred.p.lo = apply(chimps.ensemble$link, 2, PI)[1,],
    pred.p.hi = apply(chimps.ensemble$link, 2, PI)[2,]
  )

R Code 10.11

# stat = "summary", fun.y = mean says use the average y value in each group
gf_line(pulled_left ~ combo + group::actor, data = Chimps, 
        stat = "summary", fun.y = mean,
        alpha = 0.6) %>%
  gf_ribbon(pred.p.lo + pred.p.hi ~ combo + fill:"red" + group:"1", data = Chimps.pred) %>%
  gf_line(pred.p ~ combo + group:"1" + color:"red", data = Chimps.pred) %>%
  gf_labs(x = "prosocial on = left / has partner", y = "proportion pulled left")

R Code 10.12

# clean NAs from the data
Chimps2 <- Chimps %>% select(-recipient)

# re-use map fit to get the formula
m10.3stan <- 
  map2stan(
    m10.3,
    data = Chimps2,
    iter = 1e4,
    warmup = 1000,
    refresh = 0)

## 
##  Elapsed Time: 0.672495 seconds (Warm-up)
##                5.8845 seconds (Sampling)
##                6.55699 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                0.000247 seconds (Sampling)
##                0.00025 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## [ 900 / 9000 ]
[ 1800 / 9000 ]
[ 2700 / 9000 ]
[ 3600 / 9000 ]
[ 4500 / 9000 ]
[ 5400 / 9000 ]
[ 6300 / 9000 ]
[ 7200 / 9000 ]
[ 8100 / 9000 ]
[ 9000 / 9000 ]

precis(m10.3stan)

##      Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a    0.05   0.13      -0.16       0.25  4884    1
## bp   0.62   0.23       0.27       1.00  4604    1
## bpC -0.11   0.26      -0.53       0.30  5331    1

R Code 10.13

pairs(m10.3stan)

R Code 10.14

m10.4 <- map2stan(
  alist(
    pulled_left ~ dbinom(1, p),
    logit(p) <- a[actor] + (bp + bpC * condition) * prosoc_left,
    a[actor] ~ dnorm(0, 10),
    bp ~ dnorm(0, 10),
    bpC ~ dnorm(0, 10)
  ),
  data = Chimps2,
  chains = 2,
  iter = 2500,
  warmup = 500,
  refresh = 0
)

## 
##  Elapsed Time: 0.630185 seconds (Warm-up)
##                2.19867 seconds (Sampling)
##                2.82885 seconds (Total)
## 
## 
##  Elapsed Time: 0.647648 seconds (Warm-up)
##                2.18752 seconds (Sampling)
##                2.83517 seconds (Total)

## Warning: There were 61 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

## Warning: Examine the pairs() plot to diagnose sampling problems

## 
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 5e-06 seconds (Warm-up)
##                0.000375 seconds (Sampling)
##                0.00038 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]

## Warning in map2stan(alist(pulled_left ~ dbinom(1, p), logit(p) <- a[actor] + : There were 61 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

R Code 10.15

tally(~ actor, data = Chimps)

## actor
##  1  2  3  4  5  6  7 
## 72 72 72 72 72 72 72

R Code 10.16

precis(m10.4, depth = 2)

## Warning in precis(m10.4, depth = 2): There were 61 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

##       Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a[1] -0.74   0.27      -1.20      -0.33  2502    1
## a[2] 11.09   5.41       3.69      18.71  1900    1
## a[3] -1.05   0.28      -1.47      -0.59  2678    1
## a[4] -1.05   0.28      -1.50      -0.60  2413    1
## a[5] -0.73   0.28      -1.18      -0.31  2703    1
## a[6]  0.22   0.27      -0.20       0.66  3025    1
## a[7]  1.81   0.40       1.11       2.37  2484    1
## bp    0.84   0.26       0.43       1.26  1737    1
## bpC  -0.14   0.30      -0.62       0.34  2206    1

R Code 10.17

m10.4_post <- extract.samples(m10.4)
str(m10.4_post)

## List of 3
##  $ a  : num [1:4000, 1:7] -0.891 -0.821 -1.058 -0.779 -0.66 ...
##  $ bp : num [1:4000(1d)] 0.966 0.951 0.853 0.999 0.999 ...
##  $ bpC: num [1:4000(1d)] -0.0648 -0.1311 0.0805 -0.0667 -0.0493 ...

R Code 10.18

gf_dens(~ (m10.4_post$a[, 2]))

as.data.frame(m10.4_post) %>% head(2)

##          a.1       a.2       a.3       a.4        a.5       a.6       a.7
## 1 -0.8910460  9.775388 -1.073532 -1.262073 -0.6483801 0.3301656 0.9398147
## 2 -0.8209078 28.384526 -1.254034 -1.139401 -0.6063624 0.2021648 1.7301476
##          bp         bpC
## 1 0.9655670 -0.06482745
## 2 0.9513141 -0.13111120

gf_dens(~ a.2, data = m10.4_post %>% as.data.frame())

R Code 10.19

Chimps.pred <-
  expand.grid(
    actor = 1:7,
    prosoc_left = 0:1,
    condition = 0:1) %>%
  mutate(
    combo = paste0(prosoc_left, "/", condition)
    )

m10.4_link <- link(m10.4, data = Chimps.pred)

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

Chimps.pred <-
  Chimps.pred %>%
  mutate(
    pred.p = apply(m10.4_link, 2, mean),
    pred.p.lo = apply(m10.4_link, 2, PI)[1,],
    pred.p.hi = apply(m10.4_link, 2, PI)[2,]
  )

gf_ribbon(pred.p.lo + pred.p.hi ~ combo + group:"1", data = Chimps.pred) %>%
  gf_line(pred.p ~ combo + group:"1", data = Chimps.pred) %>% 
  gf_line(pulled_left ~ combo + group:"1", data = Chimps, color = "red",
          stat = "summary", fun.y = mean) %>%
  gf_facet_wrap( ~ actor) %>% 
  gf_labs(x = "prosocial on left / has partner", y = "proportion pulled left") %>%
  gf_refine(ylim(0,1))

Aggregated Binomial Model

Chimps again

We can redo the models from the previous section using data that is summarised into counts.

R Code 10.20

The book only computes x (because n is 18 in each case). But in general, both x and n might vary. So let’s get in the habbit of keeping both x and n in our aggregated data.

Chimps.ag <-
  Chimps %>% 
  group_by(prosoc_left, condition, actor) %>%
  summarise(x = sum(pulled_left), n = n()) %>%
  arrange(actor, condition, prosoc_left)     # put things into same order as in book
Chimps.ag %>% head()

## Source: local data frame [6 x 5]
## Groups: prosoc_left, condition [4]
## 
##   prosoc_left condition actor     x     n
##         <int>     <int> <int> <int> <int>
## 1           0         0     1     6    18
## 2           1         0     1     9    18
## 3           0         1     1     5    18
## 4           1         1     1    10    18
## 5           0         0     2    18    18
## 6           1         0     2    18    18

R Code 10.21

No reason to assume every chimp has same number of observations (although they do in this case). Let’s put n in place of 18.

m10.5 <- map(
  alist(
    x ~ dbinom(n, p),
    logit(p) <- a + (bp + bpC * condition) * prosoc_left,
    a ~ dnorm(0, 10),
    bp ~ dnorm(0, 10),
    bpC ~ dnorm(0, 10)
  ),
  data = Chimps.ag)

Graduate School Admissions

R Code 10.22

data(UCBadmit)
Admit <- UCBadmit %>% 
  mutate(male = ifelse(applicant.gender == "male", 1, 0))

R Code 10.23

m10.6 <-  
  map(
    alist(
      admit ~ dbinom(applications, p),
      logit(p) <- a + bm * male,
      a ~ dnorm(0, 10),
      bm ~ dnorm(0, 10)
    ),
    data = Admit)

m10.7 <- 
  map(
    alist(admit ~ dbinom(applications, p),
          logit(p) <- a,
          a ~ dnorm(0, 10)),
    data = Admit,
    start = list(a = 5))

R Code 10.24

compare(m10.6, m10.7)

##         WAIC pWAIC dWAIC weight    SE   dSE
## m10.6 5954.7   1.9   0.0      1 35.11    NA
## m10.7 6046.3   1.0  91.6      0 29.87 19.25

R Code 10.25

precis(m10.6)

##     Mean StdDev  5.5% 94.5%
## a  -0.83   0.05 -0.91 -0.75
## bm  0.61   0.06  0.51  0.71

R Code 10.26

m10.6_post <- extract.samples(m10.6, refresh = 0) %>%
  mutate(
    p.admit.male = logistic(a + bm),
    p.admit.female = logistic(a),
    p.admit.diff = p.admit.male - p.admit.female
  )
gf_dens(~p.admit.diff, data = m10.6_post)
qdata(~ p.admit.diff, c(0.025, 0.5, 0.975), data = m10.6_post)

##        quantile     p
## 2.5%  0.1126204 0.025
## 50%   0.1417237 0.500
## 97.5% 0.1698590 0.975

quantile(~ p.admit.diff, p = c(0.025, 0.5, 0.975), data = m10.6_post)

##      2.5%       50%     97.5% 
## 0.1126204 0.1417237 0.1698590

quantile(m10.6_post$p.admit.diff, c(0.025, 0.5, 0.975))

##      2.5%       50%     97.5% 
## 0.1126204 0.1417237 0.1698590

R Code 10.27

PC <- postcheck(m10.6, n = 1e4)

## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]

# draw lines connecting points from same dept
for (i in 1:6) {
  x <- 1 + 2 * (i - 1)
  y1 <- Admit$admit[x] / Admit$applications[x]
  y2 <- Admit$admit[x + 1] / Admit$applications[x + 1]
  lines(c(x, x + 1), c(y1, y2), col = rangi2, lwd = 2)
  text(x + 0.5,
       (y1 + y2) / 2 + 0.05,
       Admit$dept[x],
       cex = 0.8,
       col = rangi2)
}

Here’s a version that displays this information differently and is created from scratch.

m10.6_link <- link(m10.6)

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

m10.6_sim  <-  sim(m10.6)

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

m10.6_pc <- 
  Admit %>%
  mutate(
    p.admit = admit / applications,
    pred.admit = apply(m10.6_link, 2, median),
    link.admit.lo = apply(m10.6_link, 2, PI,prob = 0.95)[1,],
    link.admit.hi = apply(m10.6_link, 2, PI,prob = 0.95)[2,],
    sim.admit.lo = apply(m10.6_sim, 2, PI,prob = 0.95)[1,] / applications,
    sim.admit.hi = apply(m10.6_sim, 2, PI,prob = 0.95)[2,] / applications
    )

gf_point(p.admit ~ dept + color::applicant.gender, data = m10.6_pc,
         position = position_dodge(width = 0.4), size = 1.8) %>%
  gf_point(pred.admit ~ dept + color::applicant.gender, data = m10.6_pc,
           position = position_dodge(width = 0.4), shape = 3, size = 3) %>%
  gf_linerange(link.admit.lo + link.admit.hi ~ dept + color::applicant.gender, data = m10.6_pc,
           position = position_dodge(width = 0.4), alpha = 0.6, size = 1.5) %>%
  gf_linerange(sim.admit.lo + sim.admit.hi ~ dept + color::applicant.gender, data = m10.6_pc,
           position = position_dodge(width = 0.4), alpha = 0.6) %>%
  gf_labs(title = "posterior check for m10.6") %>%
  gf_refine(scale_color_manual(values = c(male = "navy", female = "red")))

R Code 10.28

# make index
Admit <-
  Admit %>%
  mutate(
    dept_id = coerce_index(dept)
  )

# model with unique intercept for each dept
m10.8 <- 
  map(
    alist(
      admit ~ dbinom(applications, p),
      logit(p) <- a[dept_id],
      a[dept_id] ~ dnorm(0, 10)), 
    data = Admit)

# model with male difference as well
m10.9 <- 
  map(
    alist(
      admit ~ dbinom(applications, p),
      logit(p) <- a[dept_id] + bm * male,
      a[dept_id] ~ dnorm(0, 10),
      bm ~ dnorm(0, 10)
    ),
    data = Admit)

R Code 10.29

compare(m10.6, m10.7, m10.8, m10.9)

##         WAIC pWAIC dWAIC weight    SE   dSE
## m10.9 5200.9   6.7   0.0   0.54 57.07    NA
## m10.8 5201.2   6.1   0.3   0.46 57.03  2.58
## m10.6 5955.0   2.0 754.0   0.00 35.05 49.42
## m10.7 6046.4   1.1 845.5   0.00 29.89 52.44

R Code 10.30

precis(m10.9, depth = 2)

##       Mean StdDev  5.5% 94.5%
## a[1]  0.68   0.10  0.52  0.84
## a[2]  0.64   0.12  0.45  0.82
## a[3] -0.58   0.07 -0.70 -0.46
## a[4] -0.61   0.09 -0.75 -0.48
## a[5] -1.06   0.10 -1.22 -0.90
## a[6] -2.62   0.16 -2.88 -2.37
## bm   -0.10   0.08 -0.23  0.03

map() vs Stan

R Code 10.31

m10.9stan <- 
  map2stan(
    m10.9,
    chains = 2,
    iter = 2500,
    warmup = 500,
    refresh = 0)

## Warning: Variable 'applicant.gender' contains dots '.'.
## Will attempt to remove dots internally.

## 
##  Elapsed Time: 0.027456 seconds (Warm-up)
##                0.094403 seconds (Sampling)
##                0.121859 seconds (Total)
## 
## 
##  Elapsed Time: 0.022696 seconds (Warm-up)
##                0.095133 seconds (Sampling)
##                0.117829 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'admit ~ dbinom(applications, p)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                3.5e-05 seconds (Sampling)
##                3.9e-05 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]

## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.

precis(m10.9stan, depth = 2)

##       Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a[1]  0.68   0.10       0.54       0.85  2250    1
## a[2]  0.64   0.12       0.46       0.83  2476    1
## a[3] -0.58   0.07      -0.71      -0.47  4000    1
## a[4] -0.61   0.09      -0.75      -0.48  4000    1
## a[5] -1.06   0.10      -1.21      -0.89  4000    1
## a[6] -2.63   0.16      -2.90      -2.37  4000    1
## bm   -0.10   0.08      -0.23       0.03  1784    1

plot(precis(m10.9, depth = 2))
plot(precis(m10.9stan, depth = 2))

Posterior Predictive Check

m10.9_link <- link(m10.9)

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

m10.9_sim  <-  sim(m10.9)

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

m10.9_pc <- 
  Admit %>%
  mutate(
    p.admit = admit / applications,
    pred.admit = apply(m10.9_link, 2, median),
    link.admit.lo = apply(m10.9_link, 2, PI,prob = 0.95)[1,],
    link.admit.hi = apply(m10.9_link, 2, PI,prob = 0.95)[2,],
    sim.admit.lo = apply(m10.9_sim, 2, PI,prob = 0.95)[1,] / applications,
    sim.admit.hi = apply(m10.9_sim, 2, PI,prob = 0.95)[2,] / applications
    )

gf_point(p.admit ~ dept + color::applicant.gender, data = m10.9_pc,
         position = position_dodge(width = 0.4), size = 1.8) %>%
  gf_point(pred.admit ~ dept + color::applicant.gender, data = m10.9_pc,
           position = position_dodge(width = 0.4), shape = 3, size = 3) %>%
  gf_linerange(link.admit.lo + link.admit.hi ~ dept + color::applicant.gender, data = m10.9_pc,
           position = position_dodge(width = 0.4), alpha = 0.6, size = 1.3) %>%
  gf_linerange(sim.admit.lo + sim.admit.hi ~ dept + color::applicant.gender, data = m10.9_pc,
           position = position_dodge(width = 0.4), alpha = 0.6) %>%
  gf_labs(title = "posterior check for m10.9") %>%
  gf_refine(scale_color_manual(values = c(male = "navy", female = "red")))

Fitting with glm()

R Code 10.32

m10.7glm <-
  glm(cbind(admit, reject) ~ 1, data = Admit, family = binomial)
m10.6glm <-
  glm(cbind(admit, reject) ~ male, data = Admit, family = binomial)
m10.8glm <-
  glm(cbind(admit, reject) ~ dept, data = Admit, family = binomial)
m10.9glm <- glm(cbind(admit, reject) ~ male + dept,
                data = Admit,
                family = binomial)

R Code 10.33

data(chimpanzees)
m10.4glm <- glm(
  pulled_left ~ as.factor(actor) + prosoc_left * condition - condition,
  data = chimpanzees,
  family = binomial
)

R Code 10.34

glimmer(pulled_left ~ prosoc_left * condition - condition,
        data = chimpanzees,
        family = binomial)

## alist(
##     pulled_left ~ dbinom( 1 , p ),
##     logit(p) <- Intercept +
##         b_prosoc_left*prosoc_left +
##         b_prosoc_left_X_condition*prosoc_left_X_condition,
##     Intercept ~ dnorm(0,10),
##     b_prosoc_left ~ dnorm(0,10),
##     b_prosoc_left_X_condition ~ dnorm(0,10)
## )

A synthetic example

R Code 10.35

# outcome and predictor almost perfectly associated
y <- c(rep(0, 10), rep(1, 10))
x <- c(rep(-1, 9), rep(1, 11))
# fit binomial GLM
m.bad <- glm(y ~ x, data = list(y = y, x = x), family = binomial)
precis(m.bad)

##              Mean  StdDev     5.5%   94.5%
## (Intercept) -9.13 2955.06 -4731.89 4713.63
## x           11.43 2955.06 -4711.33 4734.19

R Code 10.36

m.good <- map(alist(y ~ dbinom(1, p),
                    logit(p) <- a + b * x,
                    c(a, b) ~ dnorm(0, 10)), data = list(y = y, x = x))
precis(m.good)

##    Mean StdDev  5.5% 94.5%
## a -1.73   2.78 -6.16  2.71
## b  4.02   2.78 -0.42  8.45

R Code 10.37

m.good.stan <- map2stan(m.good)

## 
## SAMPLING FOR MODEL 'y ~ dbinom(1, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.049456 seconds (Warm-up)
##                0.050337 seconds (Sampling)
##                0.099793 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'y ~ dbinom(1, p)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                7.4e-05 seconds (Sampling)
##                7.8e-05 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

precis(m.good.stan)

##    Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a -5.27   3.95     -10.95       0.77   130 1.01
## b  8.11   3.90       2.78      14.05   127 1.00

pairs(m.good.stan)

Poisson Regression

R Code 10.38

y <- rbinom(1e5, 1000, 1 / 1000)
c(mean(y), var(y))

## [1] 0.9978200 0.9958252

R Code 10.39

data(Kline)

R Code 10.40

Kline <-
  Kline %>% 
  mutate(
    log_pop = log(population),
    contact_high = ifelse(contact == "high", 1, 0)
  )

R Code 10.41

m10.10 <- 
  map(
    alist(
      total_tools ~ dpois(lambda),
      log(lambda) <- a + bp * log_pop +
        bc * contact_high + bpc * contact_high * log_pop,
      a ~ dnorm(0, 100),
      c(bp, bc, bpc) ~ dnorm(0, 1)
    ),
    data = Kline
  )

R Code 10

precis(m10.10, corr = TRUE)

##      Mean StdDev  5.5% 94.5%     a    bp    bc   bpc
## a    0.94   0.36  0.37  1.52  1.00 -0.98 -0.13  0.07
## bp   0.26   0.03  0.21  0.32 -0.98  1.00  0.12 -0.08
## bc  -0.09   0.84 -1.43  1.25 -0.13  0.12  1.00 -0.99
## bpc  0.04   0.09 -0.10  0.19  0.07 -0.08 -0.99  1.00

plot(precis(m10.10))

R Code 10.43

post <- 
  extract.samples(m10.10) %>%
  mutate(
    lambda_high = exp(a + bc + (bp + bpc) * 8),
    lambda_low =  exp(a + bp * 8)
  )

R Code 10.44

diff <- lambda_high - lambda_low

## Error in eval(expr, envir, enclos): object 'lambda_high' not found

sum(diff > 0) / length(diff)

## Error in diff > 0: comparison (6) is possible only for atomic and list types

R Code 10.45

# no interaction
m10.11 <- map(
  alist(
    total_tools ~ dpois(lambda),
    log(lambda) <- a + bp * log_pop + bc * contact_high,
    a ~ dnorm(0, 100),
    c(bp, bc) ~ dnorm(0, 1)
  ),
  data = Kline
)

R Code 10.46

# no contact rate
m10.12 <- 
  map(
    alist(
      total_tools ~ dpois(lambda),
      log(lambda) <- a + bp * log_pop,
      a ~ dnorm(0, 100),
      bp ~ dnorm(0, 1)
    ),
    data = Kline)

# no log-population
m10.13 <- 
  map(
    alist(
      total_tools ~ dpois(lambda),
      log(lambda) <- a + bc * contact_high,
      a ~ dnorm(0, 100),
      bc ~ dnorm(0, 1)
    ),
    data = Kline)

R Code 10.47

# intercept only
m10.14 <- 
  map(
    alist(
      total_tools ~ dpois(lambda),
      log(lambda) <- a,
      a ~ dnorm(0, 100)), 
    data = Kline)

# compare all using WAIC
# adding n=1e4 for more stable WAIC estimates
# will also plot the comparison
(islands.compare <-
  compare(m10.10, m10.11, m10.12, m10.13, m10.14, n = 1e4))

##         WAIC pWAIC dWAIC weight    SE   dSE
## m10.11  79.3   4.3   0.0   0.60 11.15    NA
## m10.10  80.3   5.0   1.1   0.36 11.40  1.23
## m10.12  84.6   3.9   5.3   0.04  8.87  8.29
## m10.14 141.6   8.4  62.3   0.00 31.62 34.42
## m10.13 150.9  17.1  71.6   0.00 44.76 46.75

plot(islands.compare)

R Code 10.48

Kline.pred <-
  expand.grid(
    log_pop = seq(from = 6, to = 13, by = 0.25),
    contact_high = 0L:1L
  ) %>%
  mutate(
    contact = ifelse(contact_high == 1, "high", "low")
  )

Kline.ens <- ensemble(m10.10, m10.11, m10.12, data = Kline.pred, refresh = 0)

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

Kline.pred <- 
  Kline.pred %>%
  mutate(
    lambda.med = apply(Kline.ens$link, 2, median),
    lambda.link.lo = apply(Kline.ens$link, 2, PI)[1,],
    lambda.link.hi = apply(Kline.ens$link, 2, PI)[2,])

gf_point(total_tools ~ log_pop + shape:contact + color:contact, data = Kline) %>%
  gf_ribbon(lambda.link.lo + lambda.link.hi ~ log_pop + fill:contact, data = Kline.pred) %>%
  gf_line(lambda.med ~ log_pop + color:contact, data = Kline.pred) %>% 
  gf_refine(scale_shape_manual(values = c(16, 1)))

R Code 10.49

m10.10stan <-
  map2stan(
    m10.10,
    iter = 3000,
    warmup = 1000,
    chains = 4,
    refresh = 0)

## 
##  Elapsed Time: 0.291652 seconds (Warm-up)
##                0.296672 seconds (Sampling)
##                0.588324 seconds (Total)
## 
## 
##  Elapsed Time: 0.624632 seconds (Warm-up)
##                0.329524 seconds (Sampling)
##                0.954156 seconds (Total)
## 
## 
##  Elapsed Time: 0.188047 seconds (Warm-up)
##                0.293161 seconds (Sampling)
##                0.481208 seconds (Total)
## 
## 
##  Elapsed Time: 0.188932 seconds (Warm-up)
##                0.330265 seconds (Sampling)
##                0.519197 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                2.4e-05 seconds (Sampling)
##                2.7e-05 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## [ 800 / 8000 ]
[ 1600 / 8000 ]
[ 2400 / 8000 ]
[ 3200 / 8000 ]
[ 4000 / 8000 ]
[ 4800 / 8000 ]
[ 5600 / 8000 ]
[ 6400 / 8000 ]
[ 7200 / 8000 ]
[ 8000 / 8000 ]

precis(m10.10stan)

##      Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a    0.94   0.35       0.34       1.46  2898    1
## bp   0.26   0.03       0.21       0.32  2876    1
## bc  -0.10   0.86      -1.46       1.27  2822    1
## bpc  0.04   0.09      -0.11       0.19  2823    1

R Code 10.50

# construct centered predictor
Kline <-
  Kline %>%
  mutate(log_pop_c = log_pop - mean(log_pop))

# re-estimate
m10.10stan.c <- 
  map2stan(
    alist(
      total_tools ~ dpois(lambda),
      log(lambda) <- a + bp * log_pop_c + bc * contact_high +
        bcp * log_pop_c * contact_high,
      a ~ dnorm(0, 10),
      bp ~ dnorm(0, 1),
      bc ~ dnorm(0, 1),
      bcp ~ dnorm(0, 1)
    ),
    data = Kline,
    iter = 3000,
    warmup = 1000,
    chains = 4,
    refresh = 0
  )

## 
##  Elapsed Time: 0.031641 seconds (Warm-up)
##                0.060115 seconds (Sampling)
##                0.091756 seconds (Total)
## 
## 
##  Elapsed Time: 0.035005 seconds (Warm-up)
##                0.060652 seconds (Sampling)
##                0.095657 seconds (Total)
## 
## 
##  Elapsed Time: 0.034197 seconds (Warm-up)
##                0.055311 seconds (Sampling)
##                0.089508 seconds (Total)
## 
## 
##  Elapsed Time: 0.030636 seconds (Warm-up)
##                0.053154 seconds (Sampling)
##                0.08379 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                2.5e-05 seconds (Sampling)
##                2.8e-05 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## [ 800 / 8000 ]
[ 1600 / 8000 ]
[ 2400 / 8000 ]
[ 3200 / 8000 ]
[ 4000 / 8000 ]
[ 4800 / 8000 ]
[ 5600 / 8000 ]
[ 6400 / 8000 ]
[ 7200 / 8000 ]
[ 8000 / 8000 ]

precis(m10.10stan.c)

##     Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a   3.31   0.09       3.17       3.45  5314    1
## bp  0.26   0.04       0.21       0.32  6010    1
## bc  0.28   0.12       0.09       0.47  5247    1
## bcp 0.07   0.17      -0.20       0.34  6601    1

R Code 10.51

num_days <- 30
y <- rpois(num_days, 1.5)

R Code 10.52

num_weeks <- 4
y_new <- rpois(num_weeks, 0.5 * 7)

R Code 10.53

D <- 
  data_frame(
    y_all = c(y, y_new),
    days = c(rep(1, 30), rep(7, 4)),
    monastery = c(rep(0, 30), rep(1, 4))
  )

R Code 10.54

# compute the offset
D <- D %>% mutate(log_days = log(days))

# fit the model
m10.15 <- 
  map(
    alist(
      y ~ dpois(lambda),
      log(lambda) <- log_days + a + b * monastery,
      a ~ dnorm(0, 100),
      b ~ dnorm(0, 1)
    ),
    data = D)

R Code 10.55

post <- extract.samples(m10.15) %>%
  mutate(
    lambda_old = exp(a),
    lambda_new = exp(a + b)
  )
precis(post %>% select(lambda_old, lambda_new))

##            Mean StdDev |0.89 0.89|
## lambda_old 1.47   0.22  1.11  1.80
## lambda_new 0.32   0.11  0.16  0.47

Careers

R Code 10.56

# simulate career choices among 500 individuals
income <- 1:3        # expected income of each career
score <- 0.5 * income  # scores for each career, based on income
# next line converts scores to probabilities
p <- softmax(score[1], score[2], score[3])
p

## [1] 0.1863237 0.3071959 0.5064804

# sample chosen career for each individual
career <- sample(1:3, size = 500, prob = p, replace = TRUE)
tally(~career, format = "proportion")

## career
##     1     2     3 
## 0.194 0.316 0.490

R Code 10.57

# fit the model, using dcategorical and softmax link
m10.16 <- 
  map(
    alist(
      career ~ dcategorical(softmax(0, s2, s3)),
      s2 <- b * 2,    # linear model for event type 2
      s3 <- b * 3,    # linear model for event type 3
      b ~ dnorm(0, 5)),
    data = list(career = career))

R Code 10.58

F <-
  data_frame(
    family_income = runif(100),
    career = NA
  )
    
# assign a unique coefficient for each type of event
b <- (1:-1)
for (i in 1:nrow(F)) {
  score <- 0.5 * (1:3) + b * F$family_income[i]
  p <- softmax(score[1], score[2], score[3])
  F$career[i] <- sample(1:3, size = 1, prob = p)
}

m10.17 <- map(
  alist(
    career ~ dcategorical(softmax(0, s2, s3)),
    s2 <- a2 + b2 * family_income,
    s3 <- a3 + b3 * family_income,
    c(a2, a3, b2, b3) ~ dnorm(0, 5)
  ),
  data = F
)

R Code 10.59

data(UCBadmit)
Admit <- UCBadmit

R Code 10

# binomial model of overall admission probability
m_binom <- 
  map(
    alist(
      admit ~ dbinom(applications, p),
      logit(p) <- a,
      a ~ dnorm(0, 100)),
    data = Admit)

# Poisson model of overall admission rate and rejection rate
Admit <- Admit %>% rename(rej = reject) # 'reject' is a reserved word
m_pois <- map2stan(
  alist(
    admit ~ dpois(lambda1),
    rej ~ dpois(lambda2),
    log(lambda1) <- a1,
    log(lambda2) <- a2,
    c(a1, a2) ~ dnorm(0, 100)
  ),
  data = Admit,
  chains = 3,
  cores = 3,
  refresh = 0
)

## Warning: Variable 'applicant.gender' contains dots '.'.
## Will attempt to remove dots internally.

## 
## SAMPLING FOR MODEL 'admit ~ dpois(lambda1)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                4.2e-05 seconds (Sampling)
##                4.5e-05 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## [ 300 / 3000 ]
[ 600 / 3000 ]
[ 900 / 3000 ]
[ 1200 / 3000 ]
[ 1500 / 3000 ]
[ 1800 / 3000 ]
[ 2100 / 3000 ]
[ 2400 / 3000 ]
[ 2700 / 3000 ]
[ 3000 / 3000 ]

R Code 10.61

logistic(coef(m_binom))

##         a 
## 0.3877596

R Code 10.62

k <- as.numeric(coef(m_pois))
exp(k[1]) / (exp(k[1]) + exp(k[2]))

## [1] 0.3876971

R Code 10.63

# simulate
simData <- function(n = 100) {
  data_frame(
    x = runif(n),
    y = rgeom(n, prob = logistic(-1 + 2 * x))
  )
}

# estimate
m10.18 <- 
  map(
    alist(y ~ dgeom(p),
          logit(p) <- a + b * x,
          a ~ dnorm(0, 10),
          b ~ dnorm(0, 1)),
    data = simData()
  )
precis(m10.18)

##    Mean StdDev  5.5% 94.5%
## a -1.24   0.23 -1.60 -0.87
## b  2.26   0.45  1.54  2.97

Statistical Rethinking (Code)

Chapter 10

April, 2017

R Code 10.1

R Code 10.2

R Code 10.3

R Code 10.4

R Code 10.5

R Code 10.6

R Code 10.7

R Code 10.8

R Code 10.9

R Code 10.10

R Code 10.11

R Code 10.12

R Code 10.13

R Code 10.14

R Code 10.15

R Code 10.16

R Code 10.17

R Code 10.18

R Code 10.19

Aggregated Binomial Model

Chimps again

R Code 10.20

R Code 10.21

Graduate School Admissions

R Code 10.22

R Code 10.23

R Code 10.24

R Code 10.25

R Code 10.26

R Code 10.27

R Code 10.28

R Code 10.29

R Code 10.30

map() vs Stan

R Code 10.31

Posterior Predictive Check

Fitting with glm()

R Code 10.32

R Code 10.33

R Code 10.34

A synthetic example

R Code 10.35

R Code 10.36

R Code 10.37

Poisson Regression

R Code 10.38

R Code 10.39

R Code 10.40

R Code 10.41

R Code 10

R Code 10.43

R Code 10.44

R Code 10.45

R Code 10.46

R Code 10.47

R Code 10.48

R Code 10.49

R Code 10.50

R Code 10.51

R Code 10.52

R Code 10.53

R Code 10.54

R Code 10.55

Careers

R Code 10.56

R Code 10.57

R Code 10.58

R Code 10.59

R Code 10

R Code 10.61

R Code 10.62

R Code 10.63