library(cherryblossom)
Data set of the day: The milk data set
Comparative primate milk composition data, from Table 2 of Hinde and Milligan. 2011. Evolutionary Anthropology 20:9-23.
library(rethinking)
data(milk)
glimpse(milk)
## Rows: 29
## Columns: 8
## $ clade <fct> Strepsirrhine, Strepsirrhine, Strepsirrhine, Strepsirrh…
## $ species <fct> Eulemur fulvus, E macaco, E mongoz, E rubriventer, Lemu…
## $ kcal.per.g <dbl> 0.49, 0.51, 0.46, 0.48, 0.60, 0.47, 0.56, 0.89, 0.91, 0…
## $ perc.fat <dbl> 16.60, 19.27, 14.11, 14.91, 27.28, 21.22, 29.66, 53.41,…
## $ perc.protein <dbl> 15.42, 16.91, 16.85, 13.18, 19.50, 23.58, 23.46, 15.80,…
## $ perc.lactose <dbl> 67.98, 63.82, 69.04, 71.91, 53.22, 55.20, 46.88, 30.79,…
## $ mass <dbl> 1.95, 2.09, 2.51, 1.62, 2.19, 5.25, 5.37, 2.51, 0.71, 0…
## $ neocortex.perc <dbl> 55.16, NA, NA, NA, NA, 64.54, 64.54, 67.64, NA, 68.85, …
clade: Broad taxonomic group
species: Species name
kcal.per.g: Kilocalories per gram of milk
perc.fat: Percent fat
perc.protein: Percent protein
perc.lactose: Percent lactose
mass: Body mass of mother, in kilograms
neocortex.perc: Percent of brain mass that is neocortex
indicator variables
index variables
\[ \mu_i = \mbox{other stuff} + \beta_{x_i} \] where
So if the first few rows of our data look like
tibble(x = c(1, 2, 1, 3)) %>% pander()
| x |
|---|
| 1 |
| 2 |
| 1 |
| 3 |
Then we have
\[\begin{align*} \mu_1 = \mbox{other stuff} + \beta_1 \\ \mu_2 = \mbox{other stuff} + \beta_2 \\ \mu_3 = \mbox{other stuff} + \beta_1 \\ \mu_4 = \mbox{other stuff} + \beta_3 \\ \end{align*}\]
gf_jitter(kcal.per.g ~ clade, data = milk, height = 0, width = 0.1)
Milk <-
milk %>%
mutate(
K = standardize(kcal.per.g),
clade_id = as.numeric(clade), # this works because clade is a factor
)
Milk %>% select(species, matches("clade")) %>% sample_n(5)
## species clade clade_id
## 1 Miopithecus talpoin Old World Monkey 3
## 2 G gorilla beringei Ape 1
## 3 Pongo pygmaeus Ape 1
## 4 P troglodytes Ape 1
## 5 Cebuella pygmaea New World Monkey 2
m5.9a <- quap(
alist(
K ~ dnorm(mu, sigma),
mu <- a[clade_id],
a[clade_id] ~ dnorm(0, 5),
sigma ~ dexp(1)
),
data = Milk
)
m5.9b <- quap(
alist(
K ~ dnorm(mu, sigma),
mu <- a0 + a[clade_id],
a0 ~ dnorm(0, 5),
a[clade_id] ~ dnorm(0, 5),
sigma ~ dexp(1)
),
data = Milk
)
precis(m5.9a, depth = 2)
## mean sd 5.5% 94.5%
## a[1] -0.5945565 0.23342490 -0.96761461 -0.2214985
## a[2] 0.4495520 0.23342481 0.07649407 0.8226099
## a[3] 0.9054167 0.28573101 0.44876333 1.3620700
## a[4] -0.8252779 0.31290049 -1.32535331 -0.3252025
## sigma 0.7010384 0.09043311 0.55650879 0.8455679
coeftab(m5.9a, m5.9b) %>% pander::pander()
## Warning in pander.default(.): No pander.method for "coeftab", reverting to
## default.
## Warning in class(x) <- "list": Setting class(x) to "list" sets attribute to
## NULL; result will no longer be an S4 object
## Error in `class<-`(`*tmp*`, value = "list"): cannot coerce type 'S4' to vector of type 'list'
## Error in if (tail(stdout, 1) == "") {: argument is of length zero
plot(coeftab(m5.9a, m5.9b))
Notice the different here.
m5.9a %>%
extract.samples(1e3) %>%
data.frame() %>%
GGally::ggpairs(aes(alpha = 0.5))
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
m5.9b %>%
extract.samples(1e3) %>%
data.frame() %>%
GGally::ggpairs(aes(alpha = 0.5))
Model 5.9b is much more sure about things like a0 +a.1. In fact,a0 + a.1(let's call thatb1) looks a lot likea.1` from model 5.9a.
Post5.9a <- m5.9a %>%
extract.samples() %>%
data.frame()
Post5.9b <- m5.9b %>%
extract.samples() %>%
data.frame() %>%
mutate(b1 = a0 + a.1)
gf_dens( ~ a.1, data = Post5.9a, color = ~ "m5.9a") %>%
gf_dens( ~ b1, data = Post5.9b, color = ~"m5.9b")
So the models are not so different after all.
a0m5.9c <- quap(
alist(
K ~ dnorm(mu, sigma),
mu <- a0 + a[clade_id],
a0 ~ dnorm(10, 0.5),
a[clade_id] ~ dnorm(0, 5),
sigma ~ dexp(1)
),
data = Milk
)
# precis(m5.9a, depth = 2)
# coeftab(m5.9a, m5.9b) %>% pander::pander()
plot(coeftab(m5.9a, m5.9b, m5.9c))
So what’s happening here? The likelihood is essentially the same for all three models. That is, if you choose any combination of a0, a.1, a.2, a.3, a.4 that produces the same value of mu will give the same likelihood value. But in the models with intercept, there are many ways to get the same value of mu. The posterior for these only differs because of the prior. So when we use such a model, our prior will selecte which of these likelihood-equivalent combinations are preferred.
data(cherry_blossoms)
Cherry <- cherry_blossoms %>%
drop_na(temp, doy) %>%
mutate(
temp_s = standardize(temp)
)
I saw a lot of this in homework.
library(splines)
knots15 <- quantile(Cherry$temp_s, (1:15)/16)
B15 <- bs(Cherry$temp_s, knots = knots15, degree = 3, intercept = TRUE)
mc15 <-
quap(
data = Cherry,
alist(
doy ~ dnorm(mu, sigma),
mu <- a0 + B15 %*% w,
a0 ~ dnorm(100, 20),
w ~ dnorm(0, 10),
sigma ~ dexp(1/4)
),
start = list(w = rep(0, ncol(B15)))
)
But that model doesn’t reflect how we expect doy to depend on temp – it’s way too wiggly:
link(mc15) %>%
apply(2, mean_hdi, .width = 0.95) %>%
bind_rows() %>%
bind_cols(Cherry) %>%
gf_line(y ~ temp) %>%
gf_ribbon(ymin + ymax ~ temp) %>%
gf_point(doy ~ temp, data = Cherry, shape = 21, size = 0.5, alpha = 0.6, inherit = FALSE)
In fact, we don’t need any data to see this – we could look at prior samples:
PL15 <-
link(mc15, post = mc15 %>% extract.prior(10))
expand_grid(temp = Cherry$temp, sample = 1:10) %>%
mutate(doy = as.vector(PL15)) %>%
gf_line(doy ~ temp, color = ~ factor(sample), group = ~sample)
Perhaps fewer knots would be better. Let’s try 5:
knots5 <- quantile(Cherry$temp_s, (1:5)/6)
B5 <- bs(Cherry$temp_s, knots = knots5, degree = 3, intercept = TRUE)
mc5 <-
quap(
data = Cherry,
alist(
doy ~ dnorm(mu, sigma),
mu <- a0 + B5 %*% w,
a0 ~ dnorm(100, 20),
w ~ dnorm(0, 10),
sigma ~ dexp(1/4)
),
start = list(w = rep(0, ncol(B5)))
)
PL5 <-
link(mc5, post = mc5 %>% extract.prior(10))
expand_grid(temp = Cherry$temp, sample = 1:10) %>%
mutate(doy = as.vector(PL5)) %>%
gf_line(doy ~ temp, color = ~ factor(sample), group = ~sample)
Hmm… That’s probably still too wiggly. Let’s try just 3!
B3 <- bs(Cherry$temp_s, knots = c(5.5, 6.5, 7.5), degree = 3, intercept = TRUE)
mc3 <-
quap(
data = Cherry,
alist(
doy ~ dnorm(mu, sigma),
mu <- a0 + B3 %*% w,
a0 ~ dnorm(100, 5),
w ~ dnorm(0, 5),
sigma ~ dexp(1/4)
),
start = list(w = rep(0, ncol(B3)))
)
PL3 <-
link(mc3, post = mc3 %>% extract.prior(10))
expand_grid(temp = Cherry$temp, sample = 1:10) %>%
mutate(doy = as.vector(PL3)) %>%
gf_line(doy ~ temp, color = ~ factor(sample), group = ~sample)
Those seem reasonable. (As might a linear model or a quadratic model.)
link(mc3) %>%
apply(2, mean_hdi, .width = 0.95) %>%
bind_rows() %>%
bind_cols(Cherry) %>%
gf_line(y ~ temp) %>%
gf_ribbon(ymin + ymax ~ temp) %>%
gf_point(doy ~ temp, data = Cherry, shape = 21, size = 0.5, alpha = 0.6, inherit = FALSE)
In fact, the fit is pretty close to a line, even though we allowed it to have some bend.
This is just a reminder about one way to create residuals. If we have a plot with observed values and predicted values, we must have everything we need to compute obsesrved - predicted:
AvgMc3 <-
link(mc3) %>%
apply(2, mean_hdi, .width = 0.95) %>%
bind_rows() %>%
bind_cols(Cherry) %>%
mutate(resid = doy - y)
AvgMc3 %>%
gf_point(resid ~ temp)
AvgMc3 %>%
gf_histogram( ~ resid)