Code from Statistical Rethinking modified by R Pruim is shown below. Differences to the oringal include:
lattice
and ggplot2
rather than base graphicstidyverse
for data transformationPrPV <- 0.95
PrPM <- 0.01
PrV <- 0.001
PrP <- PrPV * PrV + PrPM * (1 - PrV)
(PrVP <- PrPV * PrV / PrP)
## [1] 0.08683729
p_grid <- seq(from = 0,
to = 1,
length.out = 1000)
prior <- rep(1, 1000)
likelihood <- dbinom(6, size = 9, prob = p_grid)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
# this function turns a "rasterized" kernel into a true (rasterized) density function
# useful for rescaling priors and posteriors when using the grid method
densify <-
function(x, y) {
if ( diff(range(diff(x))) > 0.1 * min(abs(diff(x))) )
stop("`x` must be (approximately) equally spaced.")
width <- mean(diff(x))
y / sum(y) / width
}
Water9 <-
expand.grid(p = seq(from = 0, to = 1, length.out = 1000)) %>%
mutate(
prior = 1,
likelihood = dbinom(6, size = 9, prob = p),
posterior = likelihood * prior,
posterior = posterior / sum(posterior),
posterior.dens = densify(p, posterior)
)
samples <-
sample(p_grid,
prob = posterior,
size = 1e4,
replace = TRUE)
str(samples)
## num [1:10000] 0.896 0.813 0.594 0.709 0.353 ...
water9.ps.p <-
with(Water9, sample(p, prob = posterior, size = 1e4, replace = TRUE))
plot(samples)
xyplot(water9.ps.p ~ 1:length(water9.ps.p))
library(rethinking)
dens(samples)
library(rethinking)
densityplot( ~ water9.ps.p, plot.points = FALSE)
plotPoints( posterior.dens ~ p, type = "l", add = TRUE, col = "red", data = Water9)
# add up posterior probability where p < 0.5
sum(posterior[p_grid < 0.5])
## [1] 0.1718746
# add up posterior probability where p < 0.5; 3 different ways
with(Water9, sum(posterior[p < 0.5]))
## [1] 0.1718746
Water9 %>% summarize(sum(posterior[p < 0.5]))
## sum(posterior[p < 0.5])
## 1 0.1718746
Water9 %>% group_by(p < 0.5) %>% summarize(sum(posterior))
## # A tibble: 2 × 2
## `p < 0.5` `sum(posterior)`
## <lgl> <dbl>
## 1 FALSE 0.8281254
## 2 TRUE 0.1718746
sum(samples < 0.5) / 1e4
## [1] 0.1772
# prop() is in the mosaic package and aviods need to use sum() and compute denominator
prop(water9.ps.p < 0.5)
## TRUE
## 0.1732
sum(samples > 0.5 & samples < 0.75) / 1e4
## [1] 0.5952
prop(water9.ps.p > 0.5 & water9.ps.p < 0.75)
## TRUE
## 0.598
quantile(samples, 0.8)
## 80%
## 0.7617618
quantile(water9.ps.p, 0.8)
## 80%
## 0.7627628
quantile(samples, c(0.1, 0.9))
## 10% 90%
## 0.4434434 0.8128128
quantile(water9.ps.p, c(0.1, 0.9))
## 10% 90%
## 0.4494494 0.8148148
p_grid <-
seq(from = 0,
to = 1,
length.out = 1000)
prior <- rep(1, 1000)
likelihood <- dbinom(3, size = 3, prob = p_grid)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
samples <-
sample(p_grid,
size = 1e4,
replace = TRUE,
prob = posterior)
Water3 <-
expand.grid(p = seq(from = 0, to = 1, length.out = 1000)) %>%
mutate(
prior = 1, # recycling makes this into 1000 1's
prior = prior / sum(prior),
likelihood = dbinom(3, size = 3, prob = p),
posterior = likelihood * prior,
posterior = posterior / sum(posterior),
posterior.dens = densify(p, posterior) # true density
)
water3.ps.p <- with(Water3, sample(p, size = 1e5, replace = TRUE, prob = posterior))
PI(samples, prob = 0.5)
## 25% 75%
## 0.7057057 0.9289289
PI(water3.ps.p, prob = 0.5)
## 25% 75%
## 0.7087087 0.9309309
HPDI(samples, prob = 0.5)
## |0.5 0.5|
## 0.8358358 0.9989990
HPDI(water3.ps.p, prob = 0.5)
## |0.5 0.5|
## 0.8418418 1.0000000
p_grid[which.max(posterior)]
## [1] 1
Water3 %>% arrange(-posterior) %>% head(1)
## p prior likelihood posterior posterior.dens
## 1 1 0.001 1 0.003996 3.992004
chainmode(samples, adj = 0.01)
## [1] 0.9970504
chainmode(water3.ps.p, adj = 1) # default amount of smoothing
## [1] 0.9693726
chainmode(water3.ps.p, adj = 0.1) # less smoothing
## [1] 0.9950912
chainmode(water3.ps.p, adj = 0.01) # lots less smoothing
## [1] 0.9985943
densityplot(water3.ps.p, adj = 1, plot.points = FALSE)
densityplot(water3.ps.p, adj = 0.1, plot.points = FALSE, )
densityplot(water3.ps.p, adj = 0.01, plot.points = FALSE)
mean(samples)
## [1] 0.7984865
median(samples)
## [1] 0.8368368
favstats( ~ water3.ps.p)
## min Q1 median Q3 max mean sd n
## 0.05805806 0.7087087 0.8418418 0.9309309 1 0.8009678 0.1627364 100000
## missing
## 0
sum(posterior * abs(0.5 - p_grid))
## [1] 0.3128752
with(Water3, sum(posterior * abs(0.5 - p)))
## [1] 0.3128752
# We can compute this from the density too, but we need to adjust for the widths
# of the intervals.
with(Water3, sum(posterior.dens * abs(0.5 - p_grid) * ediff(p, pad.value = 0)))
## [1] 0.3128752
# easiest is to work from the posterior samples (small differences due to sampling)
mean(abs(water3.ps.p - 0.5))
## [1] 0.3131737
loss <-
sapply(p_grid, function(d)
sum(posterior * abs(d - p_grid)))
Water3 <-
Water3 %>%
mutate(loss =
sapply(p_grid, function(d) {sum(posterior * abs(d - p_grid))})
)
p_grid[which.min(loss)]
## [1] 0.8408408
with(Water3, p[which.min(loss)])
## [1] 0.8408408
Water3 %>% arrange(loss) %>% head(1)
## p prior likelihood posterior posterior.dens loss
## 1 0.8408408 0.001 0.5944857 0.002375565 2.373189 0.1273465
dbinom(0:2, size = 2, prob = 0.7)
## [1] 0.09 0.42 0.49
rbinom(1, size = 2, prob = 0.7)
## [1] 2
rbinom(10, size = 2, prob = 0.7)
## [1] 2 2 1 1 1 1 2 2 2 1
dummy_w <- rbinom(1e5, size = 2, prob = 0.7)
table(dummy_w) / 1e5
## dummy_w
## 0 1 2
## 0.08969 0.42177 0.48854
tally( ~ rbinom(1e5, size = 2, prob = 0.7), format = "proportion")
## rbinom(1e+05, size = 2, prob = 0.7)
## 0 1 2
## 0.09106 0.41795 0.49099
dummy_w <- rbinom(1e5, size = 9, prob = 0.7)
simplehist(dummy_w, xlab = "dummy water count")
histogram( ~ rbinom(1e5, size = 9, prob = 0.7), width = 1, xlab = "dummy water count")
dummy_w <- rbinom(1e5, size = 9, prob = 0.7)
histogram( ~ dummy_w, xlab = "dummy water count", width = 1)
w <- rbinom(1e4, size = 9, prob = 0.6)
w <- rbinom(1e4, size = 9, prob = samples)
w <- rbinom(1e4, size = 9, prob = water9.ps.p)
p_grid <- seq(from = 0,
to = 1,
length.out = 1000)
prior <- rep(1, 1000)
likelihood <- dbinom(6, size = 9, prob = p_grid)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
set.seed(100)
samples <-
sample(p_grid,
prob = posterior,
size = 1e4,
replace = TRUE)
Water9a <-
expand.grid(p = seq(from = 0, to = 1, length.out = 1000)) %>%
mutate(
prior = 1,
likelihood = dbinom(6, size = 9, prob = p),
posterior = likelihood * prior,
posterior = posterior / sum(posterior),
posterior.dens = densify(p, posterior)
)
set.seed(100)
sampled_p <-
with(Water9a, sample(p, prob = posterior, size = 1e4, replace = TRUE))
data(homeworkch3, package = "rethinking")
# command above avoids us having to type it all in by hand:
#
# birth1 <- c(1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
# 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0,
# 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
# 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1)
#
# birth2 <- c(0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1,
# 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1,
# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
# 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0)
# this puts the data into a data frame using a more transparent coding scheme
BirthSex <-
data_frame(
first = c("F", "M") [1 + birth1],
second = c("F", "M")[1 + birth2]
) %>%
mutate(boys = (first == "M") + (second == "M"), girls = 2 - boys)
head(BirthSex)
## # A tibble: 6 × 4
## first second boys girls
## <chr> <chr> <int> <dbl>
## 1 M F 1 1
## 2 F M 1 1
## 3 F F 0 2
## 4 F M 1 1
## 5 M F 1 1
## 6 M M 2 0
library(rethinking)
data(homeworkch3)
sum(birth1) + sum(birth2)
## [1] 111
BirthSex %>%
group_by(boys) %>%
summarise(n = n()) %>%
mutate(cumsum(n), total.boys = boys * n, cumsum(total.boys))
## # A tibble: 3 × 5
## boys n `cumsum(n)` total.boys `cumsum(total.boys)`
## <int> <int> <int> <int> <int>
## 1 0 10 10 0 0
## 2 1 69 79 69 69
## 3 2 21 100 42 111