Goals for today

  1. Learn how to model an integer response.

  2. Learn the GLM framework.

Logistic regression: Prosocial chimpanzees.

data(chimpanzees, package = "rethinking")
Chimps <- chimpanzees
rm(chimpanzees)

The data include two experimental conditions, prosoc_left and condition, each of which has two levels. This results in four combinations.

Chimps %>% 
  distinct(prosoc_left, condition) %>% 
  mutate(description = c("Two food items on right and no partner",
                         "Two food items on left and no partner",
                         "Two food items on right and partner present",
                         "Two food items on left and partner present")) %>% 
  knitr::kable()
condition prosoc_left description
0 0 Two food items on right and no partner
0 1 Two food items on left and no partner
1 0 Two food items on right and partner present
1 1 Two food items on left and partner present
Chimps <-
  Chimps %>% 
  mutate(treatment = 1 + prosoc_left + 2 * condition) %>% 
  # this will come in handy, later for plots
  mutate(
    label = factor(treatment,
                   levels = 1:4,
                   labels = c("R/N", "L/N", "R/P", "L/P")))

We can use the dplyr::count() function to get a sense of the distribution of the conditions in the data.

Chimps %>% 
  count(condition, prosoc_left, treatment, label)
##   condition prosoc_left treatment label   n
## 1         0           0         1   R/N 126
## 2         0           1         2   L/N 126
## 3         1           0         3   R/P 126
## 4         1           1         4   L/P 126

This is the same as

Chimps %>% 
  group_by(condition, prosoc_left, treatment, label) %>%
  summarise(n = n())
## `summarise()` has grouped output by 'condition', 'prosoc_left', 'treatment'. You can override using the `.groups` argument.
## # A tibble: 4 x 5
## # Groups:   condition, prosoc_left, treatment [4]
##   condition prosoc_left treatment label     n
##       <int>       <int>     <dbl> <fct> <int>
## 1         0           0         1 R/N     126
## 2         0           1         2 L/N     126
## 3         1           0         3 R/P     126
## 4         1           1         4 L/P     126

We start with the simple intercept-only logistic regression model, which follows the statistical formula

\[\begin{align*} \text{pulled_left}_i & \sim {\sf Binom}(1, p_i) \\ \operatorname{logit}(p_i) & = \alpha \\ \alpha & \sim {\sf Norm}(0, w), \end{align*}\]

where \(w\) is a distribution parameter we still need to choose.

Let’s start by trying something really flat: \(w = 10\). (Spoiler alert: This will be a bad choice.)

m11.1

m11.1 <- 
  quap(
    data = Chimps, 
    alist(pulled_left ~ dbinom(1, p),
          logit(p) <- a,
          a ~ dnorm(0, 10))
  )
prior11.1 <- extract.prior(m11.1, 1e4) 
str(prior)
## function (prior, ...)
Prior11.1 <- prior11.1 %>%
  as.data.frame() %>%
  mutate(p = ilogit(a)
  )
head(Prior11.1) %>% pander()
a p
7.095 0.9992
-1.093 0.251
-4.535 0.01061
6.059 0.9977
-18.18 1.273e-08
6.301 0.9982
Prior11.1 %>%
  gf_dhistogram(~p, bins = 100) %>%
  gf_dens(~ p, size = 1, color = "red") %>%
  gf_dens(~ p, adjust = 0.1, size = 1, color = "purple")

m11.1a <- 
  quap(
    data = Chimps, 
    alist(pulled_left ~ dbinom(1, p),
          logit(p) <- a,
          a ~ dnorm(0, 1.5))
  )
Prior11.1a <- 
  extract.prior(m11.1a, 1e4) %>%
  as.data.frame() %>%
  mutate(p = ilogit(a)
  )
gf_dens(~ p, adjust = 0.1, size = 1, color = ~"m11.1", data = Prior11.1) %>%
  gf_dens(~ p, adjust = 0.1, size = 1, color = ~"m11.1a", data = Prior11.1a) 

gf_dens(~ p, adjust = 0.1, size = 1, color = ~"m11.1", data = Prior11.1) %>%
  gf_dens(~ p, adjust = 0.5, size = 1, color = ~"m11.1a", data = Prior11.1a) 

m11.2

\[\begin{align*} \text{pulled_left}_i & \sim {\sf Binom}(1, p_i) \\ \operatorname{logit}(p_i) & = \alpha_{\color{#54635e}{\text{actor}}[i]} + \beta_{\color{#a4692f}{\text{treatment}}[i]} \\ \alpha_{\color{#54635e}j} & \sim \Normal(0, 1.5) \\ \beta_{\color{#a4692f}k} & \sim \Normal(0, 0.5). \end{align*}\]

Chimpanzees

m11.2 <- 
  quap(
    alist(
      pulled_left ~ dbinom(1, p),
      logit(p) <- a + b[treatment],
      a ~ dnorm(0, 1.5),
      b[treatment] ~ dnorm(0, 10)
    ),
    data = Chimps)

To explore some other options for \(w\), it is handy to write a function that creates a plot like the previous one for any \(w\).

explore_prior_for_b <- function(sd = 1) {
  
  flist <-
    eval(
      substitute(
        alist(
          pulled_left ~ dbinom(1, p),
          logit(p) <- a + b[treatment],
          a ~ dnorm(0, 1.5),
          b[treatment] ~ dnorm(0, SD)
        ), list(SD = sd)
      )
    )
  
  m <- 
    quap(
      data = Chimps,
      flist
    )
  
  Prior <-
    m %>% extract.prior(1e4) %>%
    as.data.frame() %>%
    mutate(p1 = ilogit(a + b.1),
           p2 = ilogit(a + b.2),
           p3 = ilogit(a + b.3),
           p4 = ilogit(a + b.4),
           diff12 = p2 - p1,
           diff13 = p3 - p1,
           diff14 = p4 - p1,
           diff23 = p3 - p2,
           diff24 = p4 - p2,
           diff34 = p4 - p3
    )
  
  Prior %>%
    gf_dens(~ diff12, adjust = 0.5, size = 1, color = ~ "1 vs 2") %>%
    gf_dens(~ diff13, adjust = 0.5, size = 1, color = ~ "1 vs 3") %>%
    gf_dens(~ diff14, adjust = 0.5, size = 1, color = ~ "1 vs 4") %>%
    gf_dens(~ diff23, adjust = 0.5, size = 1, color = ~ "2 vs 3") %>%
    gf_dens(~ diff24, adjust = 0.5, size = 1, color = ~ "2 vs 4") %>%
    gf_dens(~ diff34, adjust = 0.5, size = 1, color = ~ "3 vs 4") %>%
    gf_labs(title = paste("sd =", sd))
}
explore_prior_for_b(1.5)

explore_prior_for_b(0.5)

m11.3

m11.3 <- 
  quap(
    data = Chimps,
    alist(
      pulled_left ~ dbinom(1, p),
      logit(p) <- a + b[treatment],
      a ~ dnorm(0, 1.5),
      b[treatment] ~ dnorm(0, 0.5)
    )
  )

m11.4

u11.4 <- ulam(
  data = Chimps %>% select(pulled_left, actor, treatment, label),
  alist(
    pulled_left ~ dbinom(1, p),
    logit(p) <- a[actor] + b[treatment],
    a[actor] ~ dnorm(0, 1.5),
    b[treatment] ~ dnorm(0, 0.5)
  ),
  chains = 4, iter = 2500, warmup = 500, refresh = 0,
  file = "fits/u11.4", log_lik = TRUE
)
precis(u11.4, depth = 2)
##             mean        sd        5.5%       94.5%    n_eff    Rhat4
## a[1] -0.44826382 0.3277945 -0.97659455  0.07054808 3223.110 1.001626
## a[2]  3.90153809 0.7375350  2.78941094  5.14990447 4750.050 1.000794
## a[3] -0.74630681 0.3289169 -1.26413825 -0.22782240 3177.684 1.000988
## a[4] -0.74311111 0.3265333 -1.26616560 -0.21810146 3355.483 1.000886
## a[5] -0.44551017 0.3227926 -0.96079886  0.07065379 3109.572 1.001447
## a[6]  0.48087695 0.3246459 -0.02814224  1.00402295 3013.638 1.001862
## a[7]  1.95778814 0.4205367  1.29811744  2.64459279 3925.681 1.000517
## b[1] -0.04242607 0.2822240 -0.48685026  0.40699665 2769.351 1.001605
## b[2]  0.47848176 0.2791331  0.03035002  0.91565346 2868.849 1.001785
## b[3] -0.38674428 0.2834148 -0.84346836  0.06669854 2881.009 1.002533
## b[4]  0.36687030 0.2800094 -0.08312746  0.81016304 2892.438 1.001537
Post11.4 <-
  extract.samples(u11.4) 
str(Post11.4)
## List of 2
##  $ a: num [1:8000, 1:7] -0.585 -0.735 -0.31 -0.923 -0.613 ...
##  $ b: num [1:8000, 1:4] -0.0729 -0.1851 -0.2171 -0.0492 -0.2422 ...
##  - attr(*, "source")= chr "ulam posterior: 8000 samples from object"
Post11.4$p <-
  ilogit(Post11.4$a) 
str(Post11.4)
## List of 3
##  $ a: num [1:8000, 1:7] -0.585 -0.735 -0.31 -0.923 -0.613 ...
##  $ b: num [1:8000, 1:4] -0.0729 -0.1851 -0.2171 -0.0492 -0.2422 ...
##  $ p: num [1:8000, 1:7] 0.358 0.324 0.423 0.284 0.351 ...
##  - attr(*, "source")= chr "ulam posterior: 8000 samples from object"
Post11.4$p %>% 
  apply(2, mean_hdi) %>%
  bind_rows() %>%
  str()
## 'data.frame':    7 obs. of  6 variables:
##  $ y        : num  0.392 0.975 0.326 0.326 0.393 ...
##  $ ymin     : num  0.245 0.94 0.195 0.194 0.25 ...
##  $ ymax     : num  0.539 0.998 0.468 0.467 0.54 ...
##  $ .width   : num  0.95 0.95 0.95 0.95 0.95 0.95 0.95
##  $ .point   : chr  "mean" "mean" "mean" "mean" ...
##  $ .interval: chr  "hdi" "hdi" "hdi" "hdi" ...
Post11.4$p %>% 
  apply(2, mean_hdi) %>%
  bind_rows() %>%
  mutate(actor = factor(1:7)) %>%
  gf_pointrange(actor ~ y + ymin + ymax) %>%
  gf_vline(xintercept = ~ 0.5, color = "red", linetype = "dashed") %>%
  gf_lims(x = c(0, 1)) %>%
  gf_labs(x = "posterior probabilty of pulling left for each chimp")

Post11.4$b %>% 
  apply(2, mean_hdi) %>%
  bind_rows() %>%
  bind_cols(Chimps %>% select(treatment, label) %>% distinct()) %>%
  gf_pointrange(label ~ y + ymin + ymax) %>%
  gf_vline(xintercept = ~ 0.0, color = "red", linetype = "dashed") %>%
  gf_labs(y = "condition", y = "model coefficient (log odds scale)", subtitle = "u11.4")

Chimps %>%
  group_by(actor, treatment, condition, prosoc_left, label) %>%
  summarise(prop_left = mean(pulled_left))  %>% pander()

summarise() has grouped output by ‘actor’, ‘treatment’, ‘condition’, ‘prosoc_left’. You can override using the .groups argument.

actor treatment condition prosoc_left label prop_left
1 1 0 0 R/N 0.3333
1 2 0 1 L/N 0.5
1 3 1 0 R/P 0.2778
1 4 1 1 L/P 0.5556
2 1 0 0 R/N 1
2 2 0 1 L/N 1
2 3 1 0 R/P 1
2 4 1 1 L/P 1
3 1 0 0 R/N 0.2778
3 2 0 1 L/N 0.6111
3 3 1 0 R/P 0.1667
3 4 1 1 L/P 0.3333
4 1 0 0 R/N 0.3333
4 2 0 1 L/N 0.5
4 3 1 0 R/P 0.1111
4 4 1 1 L/P 0.4444
5 1 0 0 R/N 0.3333
5 2 0 1 L/N 0.5556
5 3 1 0 R/P 0.2778
5 4 1 1 L/P 0.5
6 1 0 0 R/N 0.7778
6 2 0 1 L/N 0.6111
6 3 1 0 R/P 0.5556
6 4 1 1 L/P 0.6111
7 1 0 0 R/N 0.7778
7 2 0 1 L/N 0.8333
7 3 1 0 R/P 0.9444
7 4 1 1 L/P 1
p11.4a <-
  Chimps %>%
  group_by(actor, treatment, condition, prosoc_left, label) %>%
  summarise(prop_left = mean(pulled_left)) %>%
  gf_point(prop_left ~ treatment | ~ actor, color = ~factor(condition)) %>%
  gf_line(prop_left ~ treatment | ~ actor, group = ~prosoc_left, color = "gray50") %>%
  gf_labs(title = "observed proportions")

p11.4a
PChimps <- 
  Chimps %>%
  select(actor, treatment, label, condition, prosoc_left) %>% distinct()
PChimps %>% glimpse()
## Rows: 28
## Columns: 5
## $ actor       <int> 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5…
## $ treatment   <dbl> 1, 2, 3, 4, 2, 1, 3, 4, 1, 2, 4, 3, 1, 2, 4, 3, 2, 1, 4, 3…
## $ label       <fct> R/N, L/N, R/P, L/P, L/N, R/N, R/P, L/P, R/N, L/N, L/P, R/P…
## $ condition   <int> 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1…
## $ prosoc_left <int> 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0…
Avg11.4 <-
  link(u11.4, data = PChimps, n = 1e4) %>%
  apply(2, mean_hdi) %>%
  bind_rows() %>%
  bind_cols(PChimps)

p11.4b <-
  Avg11.4 %>%
  gf_pointrange(y + ymin + ymax ~ treatment | ~ actor, color = ~ factor(condition)) %>%
  gf_line(y ~ treatment | ~ actor, group = ~prosoc_left, color = "gray50") %>%
  gf_labs(title = "posterior predicted proportions")

p11.4a / p11.4b

u11.5

We could also fit a model that uses condition and prosocial left as separate variables. (See the book.)

Aggregated binomial: Chimpanzees again, condensed.

Chimps2 <-
  Chimps %>%
  group_by(actor, treatment, label, condition, prosoc_left) %>%
  summarise(left_pulls = sum(pulled_left), pulls = n()) %>%
  mutate(prop_left = left_pulls / pulls) %>% 
  ungroup()

Chimps2 %>% head() %>% pander()
actor treatment label condition prosoc_left left_pulls pulls prop_left
1 1 R/N 0 0 6 18 0.3333
1 2 L/N 0 1 9 18 0.5
1 3 R/P 1 0 5 18 0.2778
1 4 L/P 1 1 10 18 0.5556
2 1 R/N 0 0 18 18 1
2 2 L/N 0 1 18 18 1
u11.6 <- ulam(
  data = Chimps2 %>% select(left_pulls, pulls, actor, treatment, label),
  alist(
    left_pulls ~ dbinom(pulls, p),
    logit(p) <- a[actor] + b[treatment],
    a[actor] ~ dnorm(0, 1.5),
    b[treatment] ~ dnorm(0, 0.5)
  ),
  chains = 4, iter = 2500, warmup = 500, refresh = 0,
  file = "fits/u11.6", log_lik = TRUE
)
precis(u11.6, depth = 2)
##            mean        sd        5.5%       94.5%    n_eff     Rhat4
## a[1] -0.4435676 0.3253178 -0.96516464  0.07638391 2857.998 1.0007892
## a[2]  3.9032900 0.7544974  2.76557053  5.17033948 4396.974 1.0006135
## a[3] -0.7455684 0.3302017 -1.29206316 -0.22857475 2780.720 1.0002517
## a[4] -0.7426621 0.3331159 -1.27179935 -0.19984670 2925.146 1.0002438
## a[5] -0.4419994 0.3236938 -0.95703891  0.08375442 2782.626 1.0003624
## a[6]  0.4811539 0.3365557 -0.05210568  1.02765389 2828.542 1.0009433
## a[7]  1.9652826 0.4147323  1.31679471  2.63910008 3480.137 0.9997978
## b[1] -0.0445345 0.2828582 -0.50344568  0.39988274 2484.577 1.0003110
## b[2]  0.4741794 0.2820499  0.01100776  0.92575094 2592.811 1.0006490
## b[3] -0.3920240 0.2818646 -0.84456731  0.05461812 2441.356 1.0007101
## b[4]  0.3643734 0.2760523 -0.06594405  0.81016194 2493.932 1.0005271
coeftab(u11.4, u11.6) %>% plot()

compare(u11.4, u11.6)
## Warning in compare(u11.4, u11.6): Different numbers of observations found for at least two models.
## Model comparison is valid only for models fit to exactly the same observations.
## Number of observations for each model:
## u11.4 504 
## u11.6 28
##           WAIC        SE    dWAIC      dSE    pWAIC       weight
## u11.6 112.8606  8.105077   0.0000       NA 7.750591 1.000000e+00
## u11.4 531.7652 18.909188 418.9046 39.74621 8.267414 1.086462e-91
compare(u11.4, u11.6) %>% plot()
## Warning in compare(u11.4, u11.6): Different numbers of observations found for at least two models.
## Model comparison is valid only for models fit to exactly the same observations.
## Number of observations for each model:
## u11.4 504 
## u11.6 28