Learn how to model an integer response.
Learn the GLM framework.
m11.2 <-
quap(
alist(
pulled_left ~ dbinom(1, p),
logit(p) <- a + b[treatment],
a ~ dnorm(0, 1.5),
b[treatment] ~ dnorm(0, 10)
),
data = Chimps)
To explore some other options for \(w\), it is handy to write a function that creates a plot like the previous one for any \(w\).
explore_prior_for_b <- function(sd = 1) {
flist <-
eval(
substitute(
alist(
pulled_left ~ dbinom(1, p),
logit(p) <- a + b[treatment],
a ~ dnorm(0, 1.5),
b[treatment] ~ dnorm(0, SD)
), list(SD = sd)
)
)
m <-
quap(
data = Chimps,
flist
)
Prior <-
m %>% extract.prior(1e4) %>%
as.data.frame() %>%
mutate(p1 = ilogit(a + b.1),
p2 = ilogit(a + b.2),
p3 = ilogit(a + b.3),
p4 = ilogit(a + b.4),
diff12 = p2 - p1,
diff13 = p3 - p1,
diff14 = p4 - p1,
diff23 = p3 - p2,
diff24 = p4 - p2,
diff34 = p4 - p3
)
Prior %>%
gf_dens(~ diff12, adjust = 0.5, size = 1, color = ~ "1 vs 2") %>%
gf_dens(~ diff13, adjust = 0.5, size = 1, color = ~ "1 vs 3") %>%
gf_dens(~ diff14, adjust = 0.5, size = 1, color = ~ "1 vs 4") %>%
gf_dens(~ diff23, adjust = 0.5, size = 1, color = ~ "2 vs 3") %>%
gf_dens(~ diff24, adjust = 0.5, size = 1, color = ~ "2 vs 4") %>%
gf_dens(~ diff34, adjust = 0.5, size = 1, color = ~ "3 vs 4") %>%
gf_labs(title = paste("sd =", sd))
}
explore_prior_for_b(1.5)
explore_prior_for_b(0.5)
m11.3 <-
quap(
data = Chimps,
alist(
pulled_left ~ dbinom(1, p),
logit(p) <- a + b[treatment],
a ~ dnorm(0, 1.5),
b[treatment] ~ dnorm(0, 0.5)
)
)
u11.4 <- ulam(
data = Chimps %>% select(pulled_left, actor, treatment, label),
alist(
pulled_left ~ dbinom(1, p),
logit(p) <- a[actor] + b[treatment],
a[actor] ~ dnorm(0, 1.5),
b[treatment] ~ dnorm(0, 0.5)
),
chains = 4, iter = 2500, warmup = 500, refresh = 0,
file = "fits/u11.4", log_lik = TRUE
)
precis(u11.4, depth = 2)
## mean sd 5.5% 94.5% n_eff Rhat4
## a[1] -0.44826382 0.3277945 -0.97659455 0.07054808 3223.110 1.001626
## a[2] 3.90153809 0.7375350 2.78941094 5.14990447 4750.050 1.000794
## a[3] -0.74630681 0.3289169 -1.26413825 -0.22782240 3177.684 1.000988
## a[4] -0.74311111 0.3265333 -1.26616560 -0.21810146 3355.483 1.000886
## a[5] -0.44551017 0.3227926 -0.96079886 0.07065379 3109.572 1.001447
## a[6] 0.48087695 0.3246459 -0.02814224 1.00402295 3013.638 1.001862
## a[7] 1.95778814 0.4205367 1.29811744 2.64459279 3925.681 1.000517
## b[1] -0.04242607 0.2822240 -0.48685026 0.40699665 2769.351 1.001605
## b[2] 0.47848176 0.2791331 0.03035002 0.91565346 2868.849 1.001785
## b[3] -0.38674428 0.2834148 -0.84346836 0.06669854 2881.009 1.002533
## b[4] 0.36687030 0.2800094 -0.08312746 0.81016304 2892.438 1.001537
Post11.4 <-
extract.samples(u11.4)
str(Post11.4)
## List of 2
## $ a: num [1:8000, 1:7] -0.585 -0.735 -0.31 -0.923 -0.613 ...
## $ b: num [1:8000, 1:4] -0.0729 -0.1851 -0.2171 -0.0492 -0.2422 ...
## - attr(*, "source")= chr "ulam posterior: 8000 samples from object"
Post11.4$p <-
ilogit(Post11.4$a)
str(Post11.4)
## List of 3
## $ a: num [1:8000, 1:7] -0.585 -0.735 -0.31 -0.923 -0.613 ...
## $ b: num [1:8000, 1:4] -0.0729 -0.1851 -0.2171 -0.0492 -0.2422 ...
## $ p: num [1:8000, 1:7] 0.358 0.324 0.423 0.284 0.351 ...
## - attr(*, "source")= chr "ulam posterior: 8000 samples from object"
Post11.4$p %>%
apply(2, mean_hdi) %>%
bind_rows() %>%
str()
## 'data.frame': 7 obs. of 6 variables:
## $ y : num 0.392 0.975 0.326 0.326 0.393 ...
## $ ymin : num 0.245 0.94 0.195 0.194 0.25 ...
## $ ymax : num 0.539 0.998 0.468 0.467 0.54 ...
## $ .width : num 0.95 0.95 0.95 0.95 0.95 0.95 0.95
## $ .point : chr "mean" "mean" "mean" "mean" ...
## $ .interval: chr "hdi" "hdi" "hdi" "hdi" ...
Post11.4$p %>%
apply(2, mean_hdi) %>%
bind_rows() %>%
mutate(actor = factor(1:7)) %>%
gf_pointrange(actor ~ y + ymin + ymax) %>%
gf_vline(xintercept = ~ 0.5, color = "red", linetype = "dashed") %>%
gf_lims(x = c(0, 1)) %>%
gf_labs(x = "posterior probabilty of pulling left for each chimp")
Post11.4$b %>%
apply(2, mean_hdi) %>%
bind_rows() %>%
bind_cols(Chimps %>% select(treatment, label) %>% distinct()) %>%
gf_pointrange(label ~ y + ymin + ymax) %>%
gf_vline(xintercept = ~ 0.0, color = "red", linetype = "dashed") %>%
gf_labs(y = "condition", y = "model coefficient (log odds scale)", subtitle = "u11.4")
Chimps %>%
group_by(actor, treatment, condition, prosoc_left, label) %>%
summarise(prop_left = mean(pulled_left)) %>% pander()
summarise()
has grouped output by ‘actor’, ‘treatment’, ‘condition’, ‘prosoc_left’. You can override using the .groups
argument.
actor | treatment | condition | prosoc_left | label | prop_left |
---|---|---|---|---|---|
1 | 1 | 0 | 0 | R/N | 0.3333 |
1 | 2 | 0 | 1 | L/N | 0.5 |
1 | 3 | 1 | 0 | R/P | 0.2778 |
1 | 4 | 1 | 1 | L/P | 0.5556 |
2 | 1 | 0 | 0 | R/N | 1 |
2 | 2 | 0 | 1 | L/N | 1 |
2 | 3 | 1 | 0 | R/P | 1 |
2 | 4 | 1 | 1 | L/P | 1 |
3 | 1 | 0 | 0 | R/N | 0.2778 |
3 | 2 | 0 | 1 | L/N | 0.6111 |
3 | 3 | 1 | 0 | R/P | 0.1667 |
3 | 4 | 1 | 1 | L/P | 0.3333 |
4 | 1 | 0 | 0 | R/N | 0.3333 |
4 | 2 | 0 | 1 | L/N | 0.5 |
4 | 3 | 1 | 0 | R/P | 0.1111 |
4 | 4 | 1 | 1 | L/P | 0.4444 |
5 | 1 | 0 | 0 | R/N | 0.3333 |
5 | 2 | 0 | 1 | L/N | 0.5556 |
5 | 3 | 1 | 0 | R/P | 0.2778 |
5 | 4 | 1 | 1 | L/P | 0.5 |
6 | 1 | 0 | 0 | R/N | 0.7778 |
6 | 2 | 0 | 1 | L/N | 0.6111 |
6 | 3 | 1 | 0 | R/P | 0.5556 |
6 | 4 | 1 | 1 | L/P | 0.6111 |
7 | 1 | 0 | 0 | R/N | 0.7778 |
7 | 2 | 0 | 1 | L/N | 0.8333 |
7 | 3 | 1 | 0 | R/P | 0.9444 |
7 | 4 | 1 | 1 | L/P | 1 |
p11.4a <-
Chimps %>%
group_by(actor, treatment, condition, prosoc_left, label) %>%
summarise(prop_left = mean(pulled_left)) %>%
gf_point(prop_left ~ treatment | ~ actor, color = ~factor(condition)) %>%
gf_line(prop_left ~ treatment | ~ actor, group = ~prosoc_left, color = "gray50") %>%
gf_labs(title = "observed proportions")
p11.4a
PChimps <-
Chimps %>%
select(actor, treatment, label, condition, prosoc_left) %>% distinct()
PChimps %>% glimpse()
## Rows: 28
## Columns: 5
## $ actor <int> 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5…
## $ treatment <dbl> 1, 2, 3, 4, 2, 1, 3, 4, 1, 2, 4, 3, 1, 2, 4, 3, 2, 1, 4, 3…
## $ label <fct> R/N, L/N, R/P, L/P, L/N, R/N, R/P, L/P, R/N, L/N, L/P, R/P…
## $ condition <int> 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1…
## $ prosoc_left <int> 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0…
Avg11.4 <-
link(u11.4, data = PChimps, n = 1e4) %>%
apply(2, mean_hdi) %>%
bind_rows() %>%
bind_cols(PChimps)
p11.4b <-
Avg11.4 %>%
gf_pointrange(y + ymin + ymax ~ treatment | ~ actor, color = ~ factor(condition)) %>%
gf_line(y ~ treatment | ~ actor, group = ~prosoc_left, color = "gray50") %>%
gf_labs(title = "posterior predicted proportions")
p11.4a / p11.4b
We could also fit a model that uses condition and prosocial left as separate variables. (See the book.)
Chimps2 <-
Chimps %>%
group_by(actor, treatment, label, condition, prosoc_left) %>%
summarise(left_pulls = sum(pulled_left), pulls = n()) %>%
mutate(prop_left = left_pulls / pulls) %>%
ungroup()
Chimps2 %>% head() %>% pander()
actor | treatment | label | condition | prosoc_left | left_pulls | pulls | prop_left |
---|---|---|---|---|---|---|---|
1 | 1 | R/N | 0 | 0 | 6 | 18 | 0.3333 |
1 | 2 | L/N | 0 | 1 | 9 | 18 | 0.5 |
1 | 3 | R/P | 1 | 0 | 5 | 18 | 0.2778 |
1 | 4 | L/P | 1 | 1 | 10 | 18 | 0.5556 |
2 | 1 | R/N | 0 | 0 | 18 | 18 | 1 |
2 | 2 | L/N | 0 | 1 | 18 | 18 | 1 |
u11.6 <- ulam(
data = Chimps2 %>% select(left_pulls, pulls, actor, treatment, label),
alist(
left_pulls ~ dbinom(pulls, p),
logit(p) <- a[actor] + b[treatment],
a[actor] ~ dnorm(0, 1.5),
b[treatment] ~ dnorm(0, 0.5)
),
chains = 4, iter = 2500, warmup = 500, refresh = 0,
file = "fits/u11.6", log_lik = TRUE
)
precis(u11.6, depth = 2)
## mean sd 5.5% 94.5% n_eff Rhat4
## a[1] -0.4435676 0.3253178 -0.96516464 0.07638391 2857.998 1.0007892
## a[2] 3.9032900 0.7544974 2.76557053 5.17033948 4396.974 1.0006135
## a[3] -0.7455684 0.3302017 -1.29206316 -0.22857475 2780.720 1.0002517
## a[4] -0.7426621 0.3331159 -1.27179935 -0.19984670 2925.146 1.0002438
## a[5] -0.4419994 0.3236938 -0.95703891 0.08375442 2782.626 1.0003624
## a[6] 0.4811539 0.3365557 -0.05210568 1.02765389 2828.542 1.0009433
## a[7] 1.9652826 0.4147323 1.31679471 2.63910008 3480.137 0.9997978
## b[1] -0.0445345 0.2828582 -0.50344568 0.39988274 2484.577 1.0003110
## b[2] 0.4741794 0.2820499 0.01100776 0.92575094 2592.811 1.0006490
## b[3] -0.3920240 0.2818646 -0.84456731 0.05461812 2441.356 1.0007101
## b[4] 0.3643734 0.2760523 -0.06594405 0.81016194 2493.932 1.0005271
coeftab(u11.4, u11.6) %>% plot()
compare(u11.4, u11.6)
## Warning in compare(u11.4, u11.6): Different numbers of observations found for at least two models.
## Model comparison is valid only for models fit to exactly the same observations.
## Number of observations for each model:
## u11.4 504
## u11.6 28
## WAIC SE dWAIC dSE pWAIC weight
## u11.6 112.8606 8.105077 0.0000 NA 7.750591 1.000000e+00
## u11.4 531.7652 18.909188 418.9046 39.74621 8.267414 1.086462e-91
compare(u11.4, u11.6) %>% plot()
## Warning in compare(u11.4, u11.6): Different numbers of observations found for at least two models.
## Model comparison is valid only for models fit to exactly the same observations.
## Number of observations for each model:
## u11.4 504
## u11.6 28