Code from Statistical Rethinking modified by R Pruim is shown below. Differences to the oringal include:

Max Entropy for Binomial Dists

The examples below illustrate that the Binomial distributions are the maximum intropy distributions among distributions with a fixed expected number of successes.

R Code 9.1

q <- list(
  A = c(0, 0, 10, 0, 0),
  B = c(0, 1, 8, 1, 0),
  C = c(0, 2, 6, 2, 0),
  D = c(1, 2, 4, 2, 1),
  E = c(2, 2, 2, 2, 2))

R Code 9.2

# convert counts to probabilities
p <- purrr::map(q, ~ .x / sum(.x))
p
## $A
## [1] 0 0 1 0 0
## 
## $B
## [1] 0.0 0.1 0.8 0.1 0.0
## 
## $C
## [1] 0.0 0.2 0.6 0.2 0.0
## 
## $D
## [1] 0.1 0.2 0.4 0.2 0.1
## 
## $E
## [1] 0.2 0.2 0.2 0.2 0.2

R Code 9.3

# Entropy function -- being careful to handle 0 probabilities correctly
H <- function(p) {
  - sum(ifelse(p > 0, p * log(p), 0))
}
sapply(p, H)
##         A         B         C         D         E 
## 0.0000000 0.6390319 0.9502705 1.4708085 1.6094379

R Code 9.4

ways <- c(1, 90, 1260, 37800, 113400)
log(ways) / 10
## [1] 0.0000000 0.4499810 0.7138867 1.0540064 1.1638677

R Code 9.5

# build list of the candidate distributions
p <- list(
  A = c(1 / 4, 1 / 4, 1 / 4, 1 / 4),
  B = c(2 / 6, 1 / 6, 1 / 6, 2 / 6),
  C = c(1 / 6, 2 / 6, 2 / 6, 1 / 6),
  D = c(1 / 8, 4 / 8, 2 / 8, 1 / 8)
)

# compute expected value of each -- should all be the same
sapply(p, function(x) sum(x * c(0, 1, 1, 2)))
## A B C D 
## 1 1 1 1

R Code 9.6

# compute entropy of each distribution
sapply(p, H)
##        A        B        C        D 
## 1.386294 1.329661 1.329661 1.213008
D <- data_frame(dist = LETTERS[1:4], H = sapply(p, H))
gf_point(H ~ dist, data = D) 

R Code 9.7

p <- 0.7
A <- c((1 - p) ^ 2, p * (1 - p), (1 - p) * p, p ^ 2)
A
## [1] 0.09 0.21 0.21 0.49

R Code 9.8

H(A)
## [1] 1.221729

R Code 9.9

This version of sim.p() prepares a nice data frame with all of the results. The (calculaed) expected value is addd for good measure. It should always equal the target expected value given in the call of the function.

sim.p <- function(n = 1e4, E = 1.4) {
  Q <- matrix(runif(3 *n), ncol = 3) 
  q4 <- (E * (Q[, 1] + Q[, 2] + Q[, 3]) - Q[, 2] - Q[, 3]) / (2 - E)
  Q <- cbind(Q, q4)
  S <- apply(Q, 1, sum)
  P <- Q / S
  data_frame(
    p1 = P[, 1], 
    p2 = P[, 2],
    p3 = P[, 3],
    p4 = P[, 4],
    H = apply(P, 1, H),
    E = apply(P, 1, function(q) sum(q * c(0, 1, 1, 2)))
  )
}
sim.p(3)
## # A tibble: 3 × 6
##           p1         p2        p3        p4         H     E
##        <dbl>      <dbl>     <dbl>     <dbl>     <dbl> <dbl>
## 1 0.01052492 0.46332043 0.1156297 0.4105249 1.0193330   1.4
## 2 0.23407434 0.05303432 0.0788170 0.6340743 0.9847772   1.4
## 3 0.17010913 0.14255252 0.1172292 0.5701091 1.1506709   1.4

R Code 9.10

Now let’s simulate a bunch of these and compare them to our distribution A.

Sims <- sim.p(1e5)
gf_histogram( ~ H, data = Sims, binwidth = 0.01, boundary = H(A)) %>% 
gf_vline(xintercept = H(A), color = "red")

R Code 9.11 – 9.13

Since our simulations are preparing a nice data frame, we can get the information we want very easily.

H(A)
## [1] 1.221729
Sims %>% arrange(-H) %>% head(4) %>% round(4)
## # A tibble: 4 × 6
##       p1     p2     p3     p4      H     E
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 0.0900 0.2101 0.2099 0.4900 1.2217   1.4
## 2 0.0898 0.2101 0.2103 0.4898 1.2217   1.4
## 3 0.0899 0.2104 0.2098 0.4899 1.2217   1.4
## 4 0.0904 0.2094 0.2099 0.4904 1.2217   1.4