Statistical Rethinking (Code)

Code from Statistical Rethinking modified by R Pruim is shown below. Differences to the oringal include:

a preference for putting data into containers (data frames, mostly), rather than working with lose vectors.
use of ggplot2 (via ggformula) rather than base graphics
use of tidyverse for data transformation
better (in my opinion) naming conventions

R code 14.1

ThreePancakes <-
  data_frame(
    id = 1:3,
    A = c(0, 0, 1),
    B = c(0, 1, 1)
  )

pancake_sample <- function(n, cakes = ThreePancakes) {
  S <- sample(cakes, size = n, replace = TRUE)
  S %>% mutate(
    up_side = ifelse(rbinom(n, 1, 0.5), "A", "B"),
    up = ifelse(up_side == "A", A, B),
    down = ifelse(up_side == "A", B, A)
  )
}

# sim 10,000 pancakes
Pancakes <- pancake_sample(n = 1e4)

head(Pancakes)

## # A tibble: 6 × 7
##      id     A     B orig.id up_side    up  down
##   <int> <dbl> <dbl>   <chr>   <chr> <dbl> <dbl>
## 1     3     1     1       3       B     1     1
## 2     3     1     1       3       A     1     1
## 3     3     1     1       3       A     1     1
## 4     3     1     1       3       A     1     1
## 5     2     0     1       2       A     0     1
## 6     1     0     0       1       B     0     0

# if top is 1, what is prob that bottom is also 1
Pancakes %>%
  filter(up == 1) %>%
  summarise(n = n(), prop_down1 = sum(down) / n)

## # A tibble: 1 × 2
##       n prop_down1
##   <int>      <dbl>
## 1  5042  0.6592622

R code 14.2

data(WaffleDivorce)

gf_pointrange(
  Divorce + (Divorce - Divorce.SE) + (Divorce + Divorce.SE) ~ MedianAgeMarriage, 
  data = WaffleDivorce, alpha = .5) %>%
  gf_labs(x ="Median age marriage", y = "Divorce rate")

R code 14.3

Div2 <-
  with(WaffleDivorce,
       data_frame(
         div_obs = Divorce,
         div_sd = Divorce.SE,
         mar_obs = Marriage,
         mar_sd = Marriage.SE,
         age_obs = MedianAgeMarriage,
         population = Population,
         state = Location
       )
  )

m14.1 <- map2stan(
  alist(
    div_est ~ dnorm(mu, sigma),
    mu <- a + bA * age_obs + bR * mar_obs,
    div_obs ~ dnorm(div_est, div_sd),
    a ~ dnorm(0, 10),
    bA ~ dnorm(0, 10),
    bR ~ dnorm(0, 10),
    sigma ~ dcauchy(0, 2.5)
  ),
  data = Div2, start = list(div_est = Div2$div_obs),
  WAIC = FALSE,
  iter = 5000, warmup = 1000, chains = 2, cores = 2,
  control = list(adapt_delta = 0.95), refresh = 0
)

## 
## SAMPLING FOR MODEL 'div_est ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 5e-06 seconds (Warm-up)
##                4.8e-05 seconds (Sampling)
##                5.3e-05 seconds (Total)

R code 14.4

precis(m14.1, depth = 2)

##              Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## div_est[1]  11.78   0.67      10.73      12.87  8000    1
## div_est[2]  11.19   1.06       9.40      12.75  8000    1
## div_est[3]  10.47   0.62       9.44      11.42  8000    1
## div_est[4]  12.32   0.87      11.02      13.78  8000    1
## div_est[5]   8.05   0.23       7.68       8.42  8000    1
## div_est[6]  11.01   0.74       9.78      12.15  8000    1
## div_est[7]   7.23   0.64       6.16       8.22  8000    1
## div_est[8]   9.34   0.91       7.93      10.81  8000    1
## div_est[9]   7.01   1.10       5.33       8.88  4956    1
## div_est[10]  8.54   0.30       8.06       9.01  8000    1
## div_est[11] 11.14   0.52      10.29      11.97  8000    1
## div_est[12]  9.08   0.88       7.67      10.48  8000    1
## div_est[13]  9.69   0.91       8.30      11.20  3900    1
## div_est[14]  8.11   0.42       7.42       8.76  8000    1
## div_est[15] 10.68   0.56       9.75      11.55  8000    1
## div_est[16] 10.17   0.70       9.05      11.28  8000    1
## div_est[17] 10.51   0.78       9.30      11.78  8000    1
## div_est[18] 11.95   0.65      10.84      12.90  8000    1
## div_est[19] 10.48   0.70       9.35      11.57  8000    1
## div_est[20] 10.18   1.00       8.59      11.77  8000    1
## div_est[21]  8.75   0.58       7.76       9.63  8000    1
## div_est[22]  7.77   0.48       7.03       8.54  8000    1
## div_est[23]  9.15   0.48       8.36       9.90  8000    1
## div_est[24]  7.74   0.54       6.88       8.59  8000    1
## div_est[25] 10.41   0.75       9.20      11.58  8000    1
## div_est[26]  9.54   0.58       8.55      10.39  8000    1
## div_est[27]  9.43   0.96       7.95      11.01  8000    1
## div_est[28]  9.25   0.74       8.06      10.41  8000    1
## div_est[29]  9.18   0.93       7.69      10.65  8000    1
## div_est[30]  6.39   0.42       5.73       7.08  8000    1
## div_est[31]  9.97   0.78       8.71      11.20  8000    1
## div_est[32]  6.69   0.30       6.22       7.19  8000    1
## div_est[33]  9.89   0.44       9.20      10.59  8000    1
## div_est[34]  9.75   0.97       8.24      11.34  8000    1
## div_est[35]  9.43   0.41       8.80      10.12  8000    1
## div_est[36] 11.98   0.78      10.73      13.20  8000    1
## div_est[37] 10.07   0.66       9.03      11.13  8000    1
## div_est[38]  7.79   0.40       7.13       8.42  8000    1
## div_est[39]  8.22   1.03       6.60       9.81  8000    1
## div_est[40]  8.41   0.59       7.50       9.38  8000    1
## div_est[41] 10.02   1.06       8.27      11.63  8000    1
## div_est[42] 10.94   0.63       9.92      11.93  8000    1
## div_est[43] 10.03   0.33       9.51      10.56  8000    1
## div_est[44] 11.08   0.79       9.84      12.33  8000    1
## div_est[45]  8.88   1.00       7.27      10.44  8000    1
## div_est[46]  9.00   0.48       8.24       9.76  8000    1
## div_est[47]  9.95   0.55       9.10      10.85  8000    1
## div_est[48] 10.60   0.86       9.24      11.98  8000    1
## div_est[49]  8.46   0.51       7.64       9.26  8000    1
## div_est[50] 11.50   1.10       9.70      13.20  6137    1
## a           21.42   6.59      11.19      32.34  2267    1
## bA          -0.55   0.21      -0.89      -0.20  2374    1
## bR           0.13   0.08       0.00       0.24  2705    1
## sigma        1.13   0.21       0.80       1.46  2116    1

Figure 14.2

Show me the data!

We need to do a little guesswork here, because the description of the plot is missing a couple details. But they are easy enough to guess given the usual practices of the author.

likely the marriage rate was set to the mean or median for the counterfactual data
the “other model” is the one that is like m14.1 but doesn’t include estimation of div_est and just uses div_obs. We’ll call that model m14.0 below.

The plots themeselves are easily made – one you get the data into a handy format. A first step for that is to figure out where the data we need for the plot live. Here are some options:

The original data frame
A lightly transformed version of the origianl data frame (perhaps with new variable names, missing data removed, or some additional variables calculated)
Counterfactual data that we create.
The precis object.
The results of using link(), sim() or ensemble().

Sometimes these work in combination. For example, we may create counterfactual data and pass that along as the data argument to link() or sim() or ensemble(), or we might average values from one object and add them as a new column in another.

In some cases, we also need to figure out how to extract that data from those objects to put them into a data frame (because that’s the format ggformula, requires). glimpse() (or str()) can be useful for inspecting objects to see how they are organized.

Obtaining information from precis output

m14.1_precis <- precis(m14.1, depth = 2)   
glimpse(m14.1_precis)     # look at structure of the precis object

## Formal class 'precis' [package "rethinking"] with 2 slots
##   ..@ output:'data.frame':   54 obs. of  6 variables:
##   .. ..$ Mean      : num [1:54] 11.78 11.19 10.47 12.32 8.05 ...
##   .. ..$ StdDev    : num [1:54] 0.674 1.057 0.625 0.871 0.234 ...
##   .. ..$ lower 0.89: num [1:54] 10.73 9.4 9.44 11.02 7.68 ...
##   .. ..$ upper 0.89: num [1:54] 12.87 12.75 11.42 13.78 8.42 ...
##   .. ..$ n_eff     : num [1:54] 8000 8000 8000 8000 8000 ...
##   .. ..$ Rhat      : num [1:54] 1 1 1 1 1 ...
##   ..@ digits: num 2

m14.1_coef <- coef(m14.1)   
glimpse(m14.1_coef)       # look at structure of the coef object

##  Named num [1:54] 11.78 11.19 10.47 12.32 8.05 ...
##  - attr(*, "names")= chr [1:54] "div_est[1]" "div_est[2]" "div_est[3]" "div_est[4]" ...

Div3 <-
  Div2 %>%
  mutate(
    div_est = m14.1_precis@output$Mean[1:50],
    div_est2 = m14.1_coef[1:50],  # alternative method
    div_est_se = m14.1_precis@output$StdDev[1:50]
  )
gf_abline(slope = 1, yintercept = 0, color = "red") %>% 
  gf_point(div_est2 ~ div_est, data = Div3) %>% 
  gf_labs(title = "Two methods of extracting coefficients give identical results")

## Warning: Ignoring unknown parameters: yintercept

gf_point( (div_est - div_obs) ~ div_sd, data = Div3)

Using counterfactual data to look at model predictions

favstats( ~ mar_obs, data = Div3)

##   min     Q1 median   Q3  max   mean       sd  n missing
##  13.5 17.125   19.7 22.1 30.7 20.114 3.797905 50       0

CounterFactualData <-
  expand.grid(age_obs = seq(23, 30, by = 0.25), mar_obs = 20)
m14.1_link <- link(m14.1, data = CounterFactualData)
CounterFactualData <- 
  CounterFactualData %>% 
  mutate(
    div_m14.1 = apply(m14.1_link, 2, mean),
    div_m14.1_lo = apply(m14.1_link, 2, PI)[1, ],
    div_m14.1_hi = apply(m14.1_link, 2, PI)[2, ]
  )
head(CounterFactualData, 3)

##   age_obs mar_obs div_m14.1 div_m14.1_lo div_m14.1_hi
## 1   23.00      20  11.26030     10.10277     12.27469
## 2   23.25      20  11.12115     10.05672     12.05635
## 3   23.50      20  10.98201     10.01337     11.84349

gf_pointrange( 
  div_est + (div_est - div_est_se) + (div_est + div_est_se) ~ age_obs,
  data = Div3) %>%
  gf_ribbon(div_m14.1_lo + div_m14.1_hi ~ age_obs, data = CounterFactualData, fill = "navy") %>%
  gf_line(div_m14.1 ~ age_obs, data = CounterFactualData, color = "navy")

The “other model”

To get the “other model”, we need to fit a simpler model that doesn’t estimate divorce rates.

m14.0 <- map2stan(
  alist(
    div_obs ~ dnorm(mu, sigma),
    mu <- a + bA * age_obs + bR * mar_obs,
    a ~ dnorm(30, 20),
    bA ~ dnorm(0, 10),
    bR ~ dnorm(0, 10),
    sigma ~ dcauchy(0, 2.5)
  ),
  data = Div2, 
  WAIC = FALSE, iter = 5000, warmup = 1000, chains = 2, cores = 2, refresh = 0 )

## 
## SAMPLING FOR MODEL 'div_obs ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 2e-06 seconds (Warm-up)
##                3.1e-05 seconds (Sampling)
##                3.3e-05 seconds (Total)

m14.0_link <- link(m14.0, data = CounterFactualData)
CounterFactualData <- 
  CounterFactualData %>% 
  mutate(
    div_m14.0 = apply(m14.0_link, 2, mean),
    div_m14.0_lo = apply(m14.0_link, 2, PI)[1, ],
    div_m14.0_hi = apply(m14.0_link, 2, PI)[2, ]
  )
head(CounterFactualData, 3)

##   age_obs mar_obs div_m14.1 div_m14.1_lo div_m14.1_hi div_m14.0
## 1   23.00      20  11.26030     10.10277     12.27469  12.65466
## 2   23.25      20  11.12115     10.05672     12.05635  12.41210
## 3   23.50      20  10.98201     10.01337     11.84349  12.16954
##   div_m14.0_lo div_m14.0_hi
## 1     11.48711     13.81421
## 2     11.34316     13.49062
## 3     11.17676     13.16647

gf_pointrange( 
  div_est + (div_est - div_est_se) + (div_est + div_est_se) ~ age_obs,
  data = Div3) %>%
  gf_ribbon(div_m14.1_lo + div_m14.1_hi ~ age_obs, data = CounterFactualData, fill = "navy") %>%
  gf_line(div_m14.1 ~ age_obs, data = CounterFactualData, color = "navy") %>%
  gf_ribbon(div_m14.0_lo + div_m14.0_hi ~ age_obs, data = CounterFactualData) %>%
  gf_line(div_m14.0 ~ age_obs, data = CounterFactualData, linetype = "dashed")

R code 14.5

m14.2 <- map2stan(
  alist(
    div_est ~ dnorm(mu, sigma),
    mu <- a + bA * age_obs + bR * mar_est[i],
    div_obs ~ dnorm(div_est, div_sd),
    mar_obs ~ dnorm(mar_est, mar_sd),
    a ~ dnorm(0, 10),
    bA ~ dnorm(0, 10),
    bR ~ dnorm(0, 10),
    sigma ~ dcauchy(0, 2.5)
  ),
  data = Div2,
  start = list(div_est = Div2$div_obs, mar_est = Div2$mar_obs),
  WAIC = FALSE, iter = 5000, warmup = 1000, chains = 3, cores = 3,
  control = list(adapt_delta = 0.95), refresh = 0
)

## Warning: There were 3 divergent transitions after warmup. Increasing adapt_delta above 0.95 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

## Warning: Examine the pairs() plot to diagnose sampling problems

## 
## SAMPLING FOR MODEL 'div_est ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 2e-06 seconds (Warm-up)
##                6.1e-05 seconds (Sampling)
##                6.3e-05 seconds (Total)

## Warning in map2stan(alist(div_est ~ dnorm(mu, sigma), mu <- a + bA * age_obs + : There were 3 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

precis(m14.2)

## Warning in precis(m14.2): There were 3 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

## 100 vector or matrix parameters omitted in display. Use depth=2 to show them.

##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a     20.73   6.67      10.15      31.41  3603    1
## bA    -0.54   0.21      -0.89      -0.21  3876    1
## bR     0.14   0.08       0.01       0.27  3663    1
## sigma  1.10   0.21       0.76       1.41  3391    1

Figure 14.3

This plot is easier than Figure 14.2 because we don’t have to work with counterfactual data, so there is one less step. We can get everything we need from the precis() output.

m14.2_precis <-  precis(m14.2, depth = 2)

## Warning in precis(m14.2, depth = 2): There were 3 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

Div4 <-
  Div2 %>%
  mutate(
    div_est = m14.2_precis@output$Mean[1:50],
    div_est_se = m14.2_precis@output$StdDev[1:50],
    mar_est = m14.2_precis@output$Mean[51:100],
    mar_est_se = m14.2_precis@output$StdDev[51:100]
  )

gf_point( (mar_est - mar_obs) ~ mar_sd, data = Div4)

The version below adds an extra element not in the text. It showing the population of the states (larger dots for smaller states because they have more uncertainty).
As we can see,

estimates generally move toward the average (the red dots are closer to the center than their associated blue dots in most cases)
the estimates change more when states are smaller

gf_point(div_est ~ mar_est + color::"estimated" + size:population, data = Div4, alpha = 0.6) %>%
  gf_point(div_obs ~ mar_obs + color::"observed" + size:population, data = Div4, alpha = 0.6) %>%
  gf_segment(div_est + div_obs ~ mar_est + mar_obs, data = Div4, alpha = 0.5) %>%
  gf_labs(x = "marriage rate", y = "divorce rate") %>%
  gf_refine(scale_size_continuous(trans = "reciprocal"))

R code 14.6

data(milk)
Milk <- milk %>%
  mutate(
    neocortex = neocortex.perc / 100,   # proportion instead of precent
    logmass = log(mass),     # map2stan() can't compute on the fly, we have to do it in advance
    kcal = kcal.per.g        # b/c we are going to delete kcal.per.g in a moment
  ) %>% 
  select(- matches("\\."))    # avoid dots -- Stan doesn't like them
# Notice the missing values
favstats( ~ neocortex, data = Milk)

##     min     Q1 median     Q3   max      mean         sd  n missing
##  0.5516 0.6454 0.6885 0.7126 0.763 0.6757588 0.05968612 17      12

R code 14.7

# fit model
m14.3 <- map2stan(
  alist(
    kcal ~ dnorm(mu, sigma),
    mu <- a + bN * neocortex + bM * logmass,
    neocortex ~ dnorm(nu, sigma_N),
    a ~ dnorm(0, 100),
    c(bN, bM) ~ dnorm(0, 10),
    nu ~ dnorm(0.5, 1),
    sigma_N ~ dcauchy(0, 1),
    sigma ~ dcauchy(0, 1)
  ),
  data = Milk, iter = 1e4, chains = 2, refresh = 0
)

## Imputing 12 missing values (NA) in variable 'neocortex'.

## 
##  Elapsed Time: 2.0675 seconds (Warm-up)
##                2.59574 seconds (Sampling)
##                4.66324 seconds (Total)
## 
## 
##  Elapsed Time: 2.17224 seconds (Warm-up)
##                2.78988 seconds (Sampling)
##                4.96212 seconds (Total)

## The following numerical problems occurred the indicated number of times on chain 2

##                                                                                 count
## Exception thrown at line 31: normal_log: Scale parameter is 0, but must be > 0!     2

## When a numerical problem occurs, the Hamiltonian proposal gets rejected.

## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected

## If the number in the 'count' column is small, there is no need to ask about this message on stan-users.

## Warning: There were 2 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

## Warning: Examine the pairs() plot to diagnose sampling problems

## 
## SAMPLING FOR MODEL 'kcal ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                3.3e-05 seconds (Sampling)
##                3.6e-05 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## Warning in map2stan(alist(kcal ~ dnorm(mu, sigma), mu <- a + bN * neocortex + : There were 2 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

R code 14.8

precis(m14.3, depth = 2)

## Warning in precis(m14.3, depth = 2): There were 2 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

##                       Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## neocortex_impute[1]   0.63   0.05       0.55       0.71  7293    1
## neocortex_impute[2]   0.63   0.05       0.54       0.70  7499    1
## neocortex_impute[3]   0.62   0.05       0.54       0.70  6978    1
## neocortex_impute[4]   0.65   0.05       0.58       0.73 10000    1
## neocortex_impute[5]   0.70   0.05       0.62       0.78 10000    1
## neocortex_impute[6]   0.66   0.05       0.58       0.73 10000    1
## neocortex_impute[7]   0.69   0.05       0.61       0.76 10000    1
## neocortex_impute[8]   0.70   0.05       0.62       0.77 10000    1
## neocortex_impute[9]   0.71   0.05       0.64       0.79 10000    1
## neocortex_impute[10]  0.65   0.05       0.57       0.73  8415    1
## neocortex_impute[11]  0.66   0.05       0.58       0.73 10000    1
## neocortex_impute[12]  0.70   0.05       0.61       0.77 10000    1
## a                    -0.54   0.48      -1.31       0.22  1933    1
## bN                    1.90   0.75       0.73       3.12  1894    1
## bM                   -0.07   0.02      -0.10      -0.03  2624    1
## nu                    0.67   0.01       0.65       0.69  6393    1
## sigma_N               0.06   0.01       0.04       0.08  4435    1
## sigma                 0.13   0.02       0.09       0.17  3739    1

R code 14.9

# remove the rows with missing neocortex
Milkcc <- Milk %>% filter(!is.na(neocortex))

# fit model
m14.3cc <- map2stan(
  alist(
    kcal ~ dnorm(mu, sigma),
    mu <- a + bN * neocortex + bM * logmass,
    a ~ dnorm(0, 100),
    c(bN, bM) ~ dnorm(0, 10),
    sigma ~ dcauchy(0, 1)
  ),
  data = Milkcc, iter = 1e4, chains = 2, refresh = 0
)

## 
##  Elapsed Time: 0.90444 seconds (Warm-up)
##                0.988222 seconds (Sampling)
##                1.89266 seconds (Total)
## 
## 
##  Elapsed Time: 0.982238 seconds (Warm-up)
##                0.99367 seconds (Sampling)
##                1.97591 seconds (Total)

## Warning: There were 26 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

## Warning: Examine the pairs() plot to diagnose sampling problems

## 
## SAMPLING FOR MODEL 'kcal ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                2.6e-05 seconds (Sampling)
##                2.9e-05 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## Warning in map2stan(alist(kcal ~ dnorm(mu, sigma), mu <- a + bN * neocortex + : There were 26 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

precis(m14.3cc)

## Warning in precis(m14.3cc): There were 26 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a     -1.05   0.58      -1.97      -0.16  2396    1
## bN     2.73   0.90       1.33       4.14  2362    1
## bM    -0.09   0.03      -0.14      -0.05  2778    1
## sigma  0.14   0.03       0.09       0.18  2381    1

R code 14.10

m14.4 <- map2stan(
  alist(
    kcal ~ dnorm(mu, sigma),
    mu <- a + bN * neocortex + bM * logmass,
    neocortex ~ dnorm(nu, sigma_N),
    nu <- a_N + gM * logmass,
    a ~ dnorm(0, 100),
    c(bN, bM, gM) ~ dnorm(0, 10),
    a_N ~ dnorm(0.5, 1),
    sigma_N ~ dcauchy(0, 1),
    sigma ~ dcauchy(0, 1)
  ),
  data = Milk,
  iter = 1e4,
  chains = 2
)

## Imputing 12 missing values (NA) in variable 'neocortex'.

## 
## SAMPLING FOR MODEL 'kcal ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1, Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1, Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1, Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1, Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1, Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1, Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1, Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1, Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1, Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1, Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%]  (Sampling)
##  Elapsed Time: 3.51174 seconds (Warm-up)
##                3.08127 seconds (Sampling)
##                6.59301 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'kcal ~ dnorm(mu, sigma)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2, Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2, Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2, Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2, Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2, Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2, Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2, Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2, Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2, Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2, Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2, Iteration: 10000 / 10000 [100%]  (Sampling)
##  Elapsed Time: 3.46547 seconds (Warm-up)
##                4.00618 seconds (Sampling)
##                7.47165 seconds (Total)

## Warning: There were 11 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

## Warning: Examine the pairs() plot to diagnose sampling problems

## 
## SAMPLING FOR MODEL 'kcal ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                4.3e-05 seconds (Sampling)
##                4.7e-05 seconds (Total)

## Computing WAIC

## Constructing posterior predictions

## Warning in map2stan(alist(kcal ~ dnorm(mu, sigma), mu <- a + bN * neocortex + : There were 11 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

precis(m14.4, depth = 2)

## Warning in precis(m14.4, depth = 2): There were 11 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

##                       Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## neocortex_impute[1]   0.63   0.03       0.57       0.68 10000    1
## neocortex_impute[2]   0.63   0.04       0.57       0.69 10000    1
## neocortex_impute[3]   0.62   0.04       0.56       0.68 10000    1
## neocortex_impute[4]   0.65   0.03       0.59       0.70 10000    1
## neocortex_impute[5]   0.66   0.04       0.61       0.72 10000    1
## neocortex_impute[6]   0.63   0.03       0.57       0.68 10000    1
## neocortex_impute[7]   0.68   0.03       0.63       0.73 10000    1
## neocortex_impute[8]   0.70   0.03       0.64       0.75 10000    1
## neocortex_impute[9]   0.71   0.04       0.66       0.77 10000    1
## neocortex_impute[10]  0.66   0.03       0.61       0.72 10000    1
## neocortex_impute[11]  0.68   0.03       0.62       0.73 10000    1
## neocortex_impute[12]  0.74   0.04       0.69       0.80 10000    1
## a                    -0.86   0.48      -1.60      -0.08  3060    1
## bN                    2.43   0.74       1.23       3.60  3016    1
## bM                   -0.09   0.02      -0.13      -0.05  3629    1
## gM                    0.02   0.01       0.02       0.03  5760    1
## a_N                   0.64   0.01       0.62       0.66  4937    1
## sigma_N               0.04   0.01       0.03       0.05  4778    1
## sigma                 0.13   0.02       0.09       0.16  5656    1

R code 14.11

nc_missing <- ifelse(is.na(Milk$neocortex), 1, 0)
nc_missing <- nc_missing * cumsum(nc_missing)
nc_missing

##  [1]  0  1  2  3  4  0  0  0  5  0  0  0  0  6  7  0  8  0  9  0 10  0 11
## [24]  0  0 12  0  0  0

R code 14.12

nc <- ifelse(is.na(Milk$neocortex), -1, Milk$neocortex)
nc

##  [1]  0.5516 -1.0000 -1.0000 -1.0000 -1.0000  0.6454  0.6454  0.6764
##  [9] -1.0000  0.6885  0.5885  0.6169  0.6032 -1.0000 -1.0000  0.6997
## [17] -1.0000  0.7041 -1.0000  0.7340 -1.0000  0.6753 -1.0000  0.7126
## [25]  0.7260 -1.0000  0.7024  0.7630  0.7549

R code 14.13

model_code <- '
  data{
    int N;
    int nc_num_missing;
    vector[N] kcal;
    real neocortex[N];
    vector[N] logmass;
    int nc_missing[N];
  }
  parameters{
    real alpha;
    real<lower=0> sigma;
    real bN;
    real bM;
    vector[nc_num_missing] nc_impute;
    real mu_nc;
    real<lower=0> sigma_nc;
  }
  model{
    vector[N] mu;
    vector[N] nc_merged;
    alpha ~ normal(0,10);
    bN ~ normal(0,10);
    bM ~ normal(0,10);
    mu_nc ~ normal(0.5,1);
    sigma ~ cauchy(0,1);
    sigma_nc ~ cauchy(0,1);
    // merge missing and observed
    for (i in 1:N) {
      nc_merged[i] <- neocortex[i];
      if (nc_missing[i] > 0) nc_merged[i] <- nc_impute[nc_missing[i]];
    }
    // imputation
    nc_merged ~ normal(mu_nc, sigma_nc);
    // regression
    mu = alpha + bN*nc_merged + bM*logmass;
    kcal ~ normal(mu, sigma);
  }'

R code 14.14

data_list <- 
  with(Milk,
       list(
         N = nrow(Milk),
         kcal = kcal,
         neocortex = nc,
         logmass = logmass,
         nc_missing = nc_missing,
         nc_num_missing = max(nc_missing)
       )
  )
start <- list(
  alpha = mean(Milk$kcal),
  sigma = sd(Milk$kcal),
  bN = 0,
  bM = 0,
  mu_nc = 0.68,
  sigma_nc = 0.06,
  nc_impute = rep(0.5, max(nc_missing))
)
library(rstan)

## Loading required package: StanHeaders

## rstan (Version 2.14.2, packaged: 2017-03-19 00:42:29 UTC, GitRev: 5fa1e80eb817)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())

m14.3stan <-
  stan(
    model_code = model_code,
    data = data_list,
    init = list(start),
    iter = 1e4,
    chains = 1
  )

## 
## SAMPLING FOR MODEL '6bcebbd0eaf28eed9f40a6c16f8fc5fd' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1, Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1, Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1, Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1, Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1, Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1, Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1, Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1, Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1, Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1, Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%]  (Sampling)
##  Elapsed Time: 2.44146 seconds (Warm-up)
##                3.54928 seconds (Sampling)
##                5.99073 seconds (Total)

R code 14.15

set.seed(100)
x <- c(rnorm(10), NA)
y <- c(rnorm(10, x), 100)
d <- list(x = x, y = y)

Doubling standard errors

This model using double the standard errors and doesn’t fit efficiently. (I don’t think that was the point the author was looking for, but it is true.) The “divergent iterations” warnings indicate that at best, the model is converging slowly and at worst, it is not converging at all.

m14.2b <- map2stan(
  alist(
    div_est ~ dnorm(mu, sigma),
    mu <- a + bA * age_obs + bR * mar_obs,
    div_obs ~ dnorm(div_est, div_sd),
    a ~ dnorm(0, 10),
    bA ~ dnorm(0, 10),
    bR ~ dnorm(0, 10),
    sigma ~ dcauchy(0, 2.5)
  ),
  data = Div2 %>% mutate(div_sd = 2 * div_sd, mar_sd = 2 * mar_sd),
  start = list(div_est = Div2$div_obs),
  WAIC = FALSE, iter = 500, warmup = 100, chains = 4, cores = 2,
  control = list(adapt_delta = 0.99), refresh = 0
)

## Warning: There were 136 divergent transitions after warmup. Increasing adapt_delta above 0.99 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## http://mc-stan.org/misc/warnings.html#bfmi-low

## Warning: Examine the pairs() plot to diagnose sampling problems

## 
## SAMPLING FOR MODEL 'div_est ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                4.6e-05 seconds (Sampling)
##                5e-05 seconds (Total)

## Warning in map2stan(alist(div_est ~ dnorm(mu, sigma), mu <- a + bA * age_obs + : There were 136 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

plot(m14.2b)

## Waiting to draw page 2 of 4

## Waiting to draw page 3 of 4

## Waiting to draw page 4 of 4

traceplot(m14.2b@stanfit)

## 'pars' not specified. Showing first 10 parameters by default.