density
: initial tadpole density (number of tadpoles in a 1.2 x 0.8 x 0.4 m tank)
pred
: predators present? (pred or no)
size
big or small tadpoles
surv
: number of tadpoles surviving
propsurv
: proportion of tadpoles surviging (surv / density
)
tank
: each row represents a different tank
Frogs %>%
group_by(density) %>%
summarise(propsurv = mean(propsurv), n = n()) %>%
pander()
density | propsurv | n |
---|---|---|
10 | 0.8063 | 16 |
25 | 0.6675 | 16 |
35 | 0.6911 | 16 |
Frogs %>%
gf_jitter(propsurv ~ factor(density), height = 0, width = 0.1, alpha = 0.5)
u13.0 <-
ulam(
data = Frogs %>% select(surv, density, tank),
alist(
surv ~ dbinom(density, p),
logit(p) <- a,
a ~ dnorm(0, 1.5)
),
chains = 4, cores = 4, refresh = 0, log_lik = TRUE,
file = "fits/u13.0"
)
u13.1 <-
ulam(
data = Frogs %>% select(surv, density, tank),
alist(
surv ~ dbinom(density, p),
logit(p) <- a[tank],
a[tank] ~ dnorm(0, 1.5)
),
chains = 4, cores = 4, refresh = 0, log_lik = TRUE,
file = "fits/u13.1"
)
u13.2 <-
ulam(
data = Frogs %>% select(surv, density, tank),
alist(
surv ~ dbinom(density, p),
logit(p) <- a[tank],
a[tank] ~ dnorm(a_bar, sigma),
a_bar ~ dnorm(0, 1.5),
sigma ~ dexp(1)
),
iter = 4000, chains = 4, cores = 4, refresh = 0, log_lik = TRUE,
file = "fits/u13.2")
1. How many parameters does each model have? What does each parameter “mean”?
compare(u13.0, u13.1, u13.2) %>% plot()
compare(u13.1, u13.2) %>% plot()
compare(u13.1, u13.2, func = PSIS) %>% plot()
## Some Pareto k values are very high (>1). Set pointwise=TRUE to inspect individual points.
## Some Pareto k values are very high (>1). Set pointwise=TRUE to inspect individual points.
compare(u13.0, u13.1, u13.2)
## WAIC SE dWAIC dSE pWAIC weight
## u13.2 201.0908 7.341078 0.00000 NA 21.32363 9.990185e-01
## u13.1 214.9416 4.510742 13.85082 4.062775 25.72467 9.815379e-04
## u13.0 585.2671 68.882764 384.17631 67.171271 11.29516 3.773524e-84
2. How do you explain the effective number of parameters vs. the acutal number of parameters for each model?
The plot below shows the survival proportion in each tank (data
) and the mean posterior predicted survival proportion (u13.2
).
The horizontal line is the mean survival proportion over all the tanks.
Frogs %>%
gf_point(propsurv ~ tank, shape = ~"data", color = ~"data") %>%
gf_point(propsurv_est13.2 ~ tank, shape = ~ "u13.2", color = ~"u13.2") %>%
# mark posterior median probability across tanks
gf_hline(yintercept = ~ logistic(mean(Post13.2$a)), lty = 2, alpha = 0.5) %>%
gf_facet_grid( ~ density, scale="free", labeller = label_both) %>%
gf_labs(y = "proportion surviving", color = "source", shape = "source") %>%
gf_refine(scale_shape_manual(values = c(16, 1))) %>%
gf_labs("Survival Rate: Empirical vs model predicted")
3. How do the two survival proportions compare in each tank? Why is there this pattern?
4. This phenomonon is sometimes called “shrinkage”. Why is it called that?
5. Which tanks exhibit the most shrinkage? Why do you think this is? (Note: there are at least two components to this answer.)
6. What good and/or bad features about the model does this plot reveal or suggest?
Note: We can make a similar plot for model u13.1
Frogs %>%
gf_point(propsurv ~ tank, shape = ~"data", color = ~"data") %>%
gf_point(propsurv_est13.1 ~ tank, shape = ~ "u13.1", color = ~"u13.1") %>%
gf_hline(yintercept = ~ 0.5, lty = 3, alpha = 0.5) %>%
gf_facet_grid( ~ density, scale="free", labeller = label_both) %>%
gf_labs(y = "proportion surviving", color = "source", shape = "source") %>%
gf_refine(scale_shape_manual(values = c(16, 1))) %>%
gf_labs("Survival Rate: Empirical vs model predicted")
7. Why is the horizontal line for this plot at 0.5?
u13.2
models a population of tanks.
7. What does the model say about the population of tanks?
8. How can we look at what the posterior distribution says about this?
# blank starting plot
p <- gf_blank(0 ~ 0)
# show first 100 populations in the posterior
for (i in 1:100) {
p <-
p %>%
gf_dist("norm", mean = Post13.2$a_bar[i], sd = Post13.2$sigma[i],
alpha = 0.1, color = "navy")
}
p %>%
gf_labs(x = "log-odds of survival (100 posterior samples)")
# sample 8000 imaginary tanks from the posterior distribution
sim_tanks <- rnorm(8000, Post13.2$a_bar, Post13.2$sigma)
gf_dens( ~ sim_tanks) %>%
gf_labs(x = "log-odds of survival (sample of 8000 simulated tanks)")
# transform to probability and visualize
gf_dens( ~ (logistic(sim_tanks))) %>%
gf_labs(x = "probability survival (sample of 8000 simulated tanks)")
To distinguish our simulations from the real data, we refer to ponds rather than tanks.
Three levels of pooling:
Since Bayesian models are generative, for any choice of parameters, we should be able to generate data that matches the way the model thinks data arise.
10. For model u13.2
, what do we need to choose to simulate data?
FrogSim <-
function(
reps = 15L,
density = c(5L, 10L, 25L, 35L),
a_bar = 1.4,
sigma = 1.5
) {
expand_grid(density = density, rep = 1:reps) %>%
mutate(
pond = 1:n(),
a_true = rnorm(n(), mean = a_bar, sd = sigma),
surv = rbinom(n(), prob = logistic(a_true), size = density)
)
}
11. Explain what each line of FrogSim()
is doing.
Since we know “truth” in our simulation, we can compare the model predictions to “truth”. (We don’t get to do this will real data, that’s the joy of simulation.)
set.seed(12345)
SFrogs <- FrogSim()
u13.3 <- ulam(
data = SFrogs,
alist(
surv ~ dbinom(density, p),
logit(p) <- a[pond],
a[pond] ~ dnorm(a_bar, sigma),
a_bar ~ dnorm(0, 1.5),
sigma ~ dexp(1)
),
iter = 1e4, warmup = 1000, refresh = 0,
log_lik = TRUE,
file = "fits/u13.3")
By combining this code into a function, we can apply it again later without retyping. This
frog_plot <- function(model, data = model@data) {
data <-
data %>% as.data.frame() %>%
mutate(
a_pond_est = as.numeric(coef(model)[1:60]),
p_est = logistic(a_pond_est),
p_true = logistic(a_true),
p_raw = surv / density,
nopool_error = abs(p_raw - p_true),
partpool_error = abs(p_est - p_true)
)
gf_point(nopool_error ~ rep, data = data,
shape = ~"no pooling", color = ~"no pooling") %>%
gf_point(partpool_error ~ rep, data = data,
shape = ~"partial pooling", color = ~"partial pooling") %>%
gf_line(partpool_error ~ rep,
data = data %>% group_by(density) %>%
mutate(partpool_error = mean(partpool_error)),
linetype = "dashed", color = ~ "partial pooling") %>%
gf_line(nopool_error ~ rep,
data = data %>% group_by(density) %>%
mutate(nopool_error = mean(nopool_error)),
linetype = "dotted", color = ~"no pooling") %>%
gf_facet_grid( ~ density, scale = "free", labeller = label_both) %>%
gf_labs(
shape = "pooling", color = "pooling",
linetype = "pooling",
y = "absolute error"
) %>%
gf_theme(legend.position = "top") %>%
gf_refine(
scale_shape_manual(values = c(16, 1)),
scale_color_manual(values = c("black", "steelblue"))
)
}
Generating a plot from a model is a one-liner now:
frog_plot(model = u13.3)
12. Explain what each line of frog_plot()
is doing.
13. What does the resulting plot tell us about our three models?