library(cherryblossom)
Data set of the day: The milk data set
Comparative primate milk composition data, from Table 2 of Hinde and Milligan. 2011. Evolutionary Anthropology 20:9-23.
library(rethinking)
data(milk)
glimpse(milk)
## Rows: 29
## Columns: 8
## $ clade <fct> Strepsirrhine, Strepsirrhine, Strepsirrhine, Strepsirrh…
## $ species <fct> Eulemur fulvus, E macaco, E mongoz, E rubriventer, Lemu…
## $ kcal.per.g <dbl> 0.49, 0.51, 0.46, 0.48, 0.60, 0.47, 0.56, 0.89, 0.91, 0…
## $ perc.fat <dbl> 16.60, 19.27, 14.11, 14.91, 27.28, 21.22, 29.66, 53.41,…
## $ perc.protein <dbl> 15.42, 16.91, 16.85, 13.18, 19.50, 23.58, 23.46, 15.80,…
## $ perc.lactose <dbl> 67.98, 63.82, 69.04, 71.91, 53.22, 55.20, 46.88, 30.79,…
## $ mass <dbl> 1.95, 2.09, 2.51, 1.62, 2.19, 5.25, 5.37, 2.51, 0.71, 0…
## $ neocortex.perc <dbl> 55.16, NA, NA, NA, NA, 64.54, 64.54, 67.64, NA, 68.85, …
clade
: Broad taxonomic group
species
: Species name
kcal.per.g
: Kilocalories per gram of milk
perc.fat
: Percent fat
perc.protein
: Percent protein
perc.lactose
: Percent lactose
mass
: Body mass of mother, in kilograms
neocortex.perc
: Percent of brain mass that is neocortex
indicator variables
index variables
\[ \mu_i = \mbox{other stuff} + \beta_{x_i} \] where
So if the first few rows of our data look like
tibble(x = c(1, 2, 1, 3)) %>% pander()
x |
---|
1 |
2 |
1 |
3 |
Then we have
\[\begin{align*} \mu_1 = \mbox{other stuff} + \beta_1 \\ \mu_2 = \mbox{other stuff} + \beta_2 \\ \mu_3 = \mbox{other stuff} + \beta_1 \\ \mu_4 = \mbox{other stuff} + \beta_3 \\ \end{align*}\]
gf_jitter(kcal.per.g ~ clade, data = milk, height = 0, width = 0.1)
Milk <-
milk %>%
mutate(
K = standardize(kcal.per.g),
clade_id = as.numeric(clade), # this works because clade is a factor
)
Milk %>% select(species, matches("clade")) %>% sample_n(5)
## species clade clade_id
## 1 Miopithecus talpoin Old World Monkey 3
## 2 G gorilla beringei Ape 1
## 3 Pongo pygmaeus Ape 1
## 4 P troglodytes Ape 1
## 5 Cebuella pygmaea New World Monkey 2
m5.9a <- quap(
alist(
K ~ dnorm(mu, sigma),
mu <- a[clade_id],
a[clade_id] ~ dnorm(0, 5),
sigma ~ dexp(1)
),
data = Milk
)
m5.9b <- quap(
alist(
K ~ dnorm(mu, sigma),
mu <- a0 + a[clade_id],
a0 ~ dnorm(0, 5),
a[clade_id] ~ dnorm(0, 5),
sigma ~ dexp(1)
),
data = Milk
)
precis(m5.9a, depth = 2)
## mean sd 5.5% 94.5%
## a[1] -0.5945565 0.23342490 -0.96761461 -0.2214985
## a[2] 0.4495520 0.23342481 0.07649407 0.8226099
## a[3] 0.9054167 0.28573101 0.44876333 1.3620700
## a[4] -0.8252779 0.31290049 -1.32535331 -0.3252025
## sigma 0.7010384 0.09043311 0.55650879 0.8455679
coeftab(m5.9a, m5.9b) %>% pander::pander()
## Warning in pander.default(.): No pander.method for "coeftab", reverting to
## default.
## Warning in class(x) <- "list": Setting class(x) to "list" sets attribute to
## NULL; result will no longer be an S4 object
## Error in `class<-`(`*tmp*`, value = "list"): cannot coerce type 'S4' to vector of type 'list'
## Error in if (tail(stdout, 1) == "") {: argument is of length zero
plot(coeftab(m5.9a, m5.9b))
Notice the different here.
m5.9a %>%
extract.samples(1e3) %>%
data.frame() %>%
GGally::ggpairs(aes(alpha = 0.5))
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
m5.9b %>%
extract.samples(1e3) %>%
data.frame() %>%
GGally::ggpairs(aes(alpha = 0.5))
Model 5.9b is much more sure about things like a0 +
a.1. In fact,
a0 + a.1(let's call that
b1) looks a lot like
a.1` from model 5.9a.
Post5.9a <- m5.9a %>%
extract.samples() %>%
data.frame()
Post5.9b <- m5.9b %>%
extract.samples() %>%
data.frame() %>%
mutate(b1 = a0 + a.1)
gf_dens( ~ a.1, data = Post5.9a, color = ~ "m5.9a") %>%
gf_dens( ~ b1, data = Post5.9b, color = ~"m5.9b")
So the models are not so different after all.
a0
m5.9c <- quap(
alist(
K ~ dnorm(mu, sigma),
mu <- a0 + a[clade_id],
a0 ~ dnorm(10, 0.5),
a[clade_id] ~ dnorm(0, 5),
sigma ~ dexp(1)
),
data = Milk
)
# precis(m5.9a, depth = 2)
# coeftab(m5.9a, m5.9b) %>% pander::pander()
plot(coeftab(m5.9a, m5.9b, m5.9c))
So what’s happening here? The likelihood is essentially the same for all three models. That is, if you choose any combination of a0
, a.1
, a.2
, a.3
, a.4
that produces the same value of mu
will give the same likelihood value. But in the models with intercept, there are many ways to get the same value of mu
. The posterior for these only differs because of the prior. So when we use such a model, our prior will selecte which of these likelihood-equivalent combinations are preferred.
data(cherry_blossoms)
Cherry <- cherry_blossoms %>%
drop_na(temp, doy) %>%
mutate(
temp_s = standardize(temp)
)
I saw a lot of this in homework.
library(splines)
knots15 <- quantile(Cherry$temp_s, (1:15)/16)
B15 <- bs(Cherry$temp_s, knots = knots15, degree = 3, intercept = TRUE)
mc15 <-
quap(
data = Cherry,
alist(
doy ~ dnorm(mu, sigma),
mu <- a0 + B15 %*% w,
a0 ~ dnorm(100, 20),
w ~ dnorm(0, 10),
sigma ~ dexp(1/4)
),
start = list(w = rep(0, ncol(B15)))
)
But that model doesn’t reflect how we expect doy
to depend on temp
– it’s way too wiggly:
link(mc15) %>%
apply(2, mean_hdi, .width = 0.95) %>%
bind_rows() %>%
bind_cols(Cherry) %>%
gf_line(y ~ temp) %>%
gf_ribbon(ymin + ymax ~ temp) %>%
gf_point(doy ~ temp, data = Cherry, shape = 21, size = 0.5, alpha = 0.6, inherit = FALSE)
In fact, we don’t need any data to see this – we could look at prior samples:
PL15 <-
link(mc15, post = mc15 %>% extract.prior(10))
expand_grid(temp = Cherry$temp, sample = 1:10) %>%
mutate(doy = as.vector(PL15)) %>%
gf_line(doy ~ temp, color = ~ factor(sample), group = ~sample)
Perhaps fewer knots would be better. Let’s try 5:
knots5 <- quantile(Cherry$temp_s, (1:5)/6)
B5 <- bs(Cherry$temp_s, knots = knots5, degree = 3, intercept = TRUE)
mc5 <-
quap(
data = Cherry,
alist(
doy ~ dnorm(mu, sigma),
mu <- a0 + B5 %*% w,
a0 ~ dnorm(100, 20),
w ~ dnorm(0, 10),
sigma ~ dexp(1/4)
),
start = list(w = rep(0, ncol(B5)))
)
PL5 <-
link(mc5, post = mc5 %>% extract.prior(10))
expand_grid(temp = Cherry$temp, sample = 1:10) %>%
mutate(doy = as.vector(PL5)) %>%
gf_line(doy ~ temp, color = ~ factor(sample), group = ~sample)
Hmm… That’s probably still too wiggly. Let’s try just 3!
B3 <- bs(Cherry$temp_s, knots = c(5.5, 6.5, 7.5), degree = 3, intercept = TRUE)
mc3 <-
quap(
data = Cherry,
alist(
doy ~ dnorm(mu, sigma),
mu <- a0 + B3 %*% w,
a0 ~ dnorm(100, 5),
w ~ dnorm(0, 5),
sigma ~ dexp(1/4)
),
start = list(w = rep(0, ncol(B3)))
)
PL3 <-
link(mc3, post = mc3 %>% extract.prior(10))
expand_grid(temp = Cherry$temp, sample = 1:10) %>%
mutate(doy = as.vector(PL3)) %>%
gf_line(doy ~ temp, color = ~ factor(sample), group = ~sample)
Those seem reasonable. (As might a linear model or a quadratic model.)
link(mc3) %>%
apply(2, mean_hdi, .width = 0.95) %>%
bind_rows() %>%
bind_cols(Cherry) %>%
gf_line(y ~ temp) %>%
gf_ribbon(ymin + ymax ~ temp) %>%
gf_point(doy ~ temp, data = Cherry, shape = 21, size = 0.5, alpha = 0.6, inherit = FALSE)
In fact, the fit is pretty close to a line, even though we allowed it to have some bend.
This is just a reminder about one way to create residuals. If we have a plot with observed values and predicted values, we must have everything we need to compute obsesrved - predicted:
AvgMc3 <-
link(mc3) %>%
apply(2, mean_hdi, .width = 0.95) %>%
bind_rows() %>%
bind_cols(Cherry) %>%
mutate(resid = doy - y)
AvgMc3 %>%
gf_point(resid ~ temp)
AvgMc3 %>%
gf_histogram( ~ resid)