Bootstrap CIs for a mean

These examples appear in our text, but the code here is a bit different. In particular, I’m appending _star to variable names to indicate that they are calcualted for each of the resamples.

Dimes example

Bootstrap distribution

set.seed(1234)
Dimes_boot <- 
  do(2000) * df_stats(~ mass, data = resample(Dimes), 
                      xbar_star = mean, s_star = sd, n = length)
head(Dimes_boot)

##   response xbar_star     s_star  n .row .index
## 1     mass  2.253567 0.02263151 30    1      1
## 2     mass  2.249367 0.01978938 30    1      2
## 3     mass  2.254900 0.02087458 30    1      3
## 4     mass  2.261467 0.02220086 30    1      4
## 5     mass  2.254367 0.02060714 30    1      5
## 6     mass  2.254367 0.02191281 30    1      6

SE method

xbar <- mean( ~mass, data = Dimes); xbar

## [1] 2.258233

s <- sd( ~mass, data = Dimes); s

## [1] 0.02206993

SE <- sd( ~xbar_star, data = Dimes_boot); SE

## [1] 0.003925842

xbar + c(-1, 1) * qt(0.975, df= 29) * SE

## [1] 2.250204 2.266263

Percentile method

cdata(~xbar_star, data = Dimes_boot)

##         lower    upper central.p
## 2.5% 2.250666 2.265668      0.95

Boostrap t method

Note: the usual method is to use \(s\) from our original sample, not \(s^*\) from each particular bootstrap sample. This avoids one level of approximation, and for small samples, it avoids the possibility of drawing a bootstrap sample with a very small standard deviation, which would lead to a very large value of \(t^*\). In this example, we see that it would make very little difference.

xbar <- mean(~mass, data = Dimes)  # sample mean estimates population mean below
s <- sd(~mass, data = Dimes)       # sample sd estimates population sd below

Dimes_boot <- 
  Dimes_boot |>
  mutate(
    t = (xbar_star - xbar) / (s/sqrt(n)),        # this is the usual way
    t2 = (xbar_star - xbar) / (s_star/sqrt(n)),  # this is not usually done
  )
 
head(Dimes_boot)

##   response xbar_star     s_star  n .row .index          t         t2
## 1     mass  2.253567 0.02263151 30    1      1 -1.1581546 -1.1294161
## 2     mass  2.249367 0.01978938 30    1      2 -2.2004938 -2.4540807
## 3     mass  2.254900 0.02087458 30    1      3 -0.8272533 -0.8746243
## 4     mass  2.261467 0.02220086 30    1      4  0.8024357  0.7977032
## 5     mass  2.254367 0.02060714 30    1      5 -0.9596138 -1.0277317
## 6     mass  2.254367 0.02191281 30    1      6 -0.9596138 -0.9664943

gf_dens(~t, data = Dimes_boot, color = ~ "t") |>
gf_dens(~t2, data = Dimes_boot, color = ~ "t2")

SE <- s / sqrt(30); SE

## [1] 0.004029399

t_star <- cdata(~t, data = Dimes_boot, p = 0.95); t_star

##          lower    upper central.p
## 2.5% -1.878072 1.845188      0.95

xbar - t_star[2:1] * SE

##         upper    lower
## 2.5% 2.250798 2.265801

Lifetime

Small sample, but clearly not coming from a normal population. Skewed right.

Lifetime <-
  tibble(life = c(16, 34, 53, 75, 93, 120, 150, 191, 240, 339))
gf_histogram(~life, data = Lifetime, bins = 5)

gf_qq(~life, data = Lifetime, bins = 5)

Bootstrap distribution

Life_boot <- 
  do(2000) * df_stats(~life, data = resample(Lifetime), 
                      xbar_star = mean, s_star = sd, n = length)
head(Life_boot)

##   response xbar_star    s_star  n .row .index
## 1     life     175.9 113.42589 10    1      1
## 2     life      96.7  40.44214 10    1      2
## 3     life      91.6  67.38975 10    1      3
## 4     life      84.5  68.30365 10    1      4
## 5     life      77.9  58.51961 10    1      5
## 6     life     148.0 103.88241 10    1      6

gf_histogram(~xbar_star, data = Life_boot)

SE method

xbar <- mean(~life, data = Lifetime)
SE <- sd(~xbar_star, data = Life_boot)       

xbar + c(-1, 1) * qt(0.975, df = 9) * SE

## [1]  62.30303 199.89697

Percentile method

cdata(~xbar_star, data = Life_boot)

##       lower upper central.p
## 2.5% 76.595 194.3      0.95

Boostrap t method

xbar <- mean(~life, data = Lifetime)  # sample mean estiamtes population mean below
s <- sd(~life, data = Lifetime)        # sample sd estimates population sd below

# add t stat to bootstrap data
Life_boot <- 
  Life_boot |>
  mutate(
    t = (xbar - xbar_star) / (s / sqrt(n))
  )
 
head(Life_boot)

##   response xbar_star    s_star  n .row .index          t
## 1     life     175.9 113.42589 10    1      1 -1.3972366
## 2     life      96.7  40.44214 10    1      2  1.0728781
## 3     life      91.6  67.38975 10    1      3  1.2319385
## 4     life      84.5  68.30365 10    1      4  1.4533756
## 5     life      77.9  58.51961 10    1      5  1.6592185
## 6     life     148.0 103.88241 10    1      6 -0.5270826

gf_dens( ~t, data = Life_boot, color = ~"t")

SE <- s / sqrt(10); SE

## [1] 32.06329

t_star <- cdata(~t, data = Life_boot, p = 0.95); t_star

##          lower    upper central.p
## 2.5% -1.971102 1.699919      0.95

xbar - t_star[2:1] * SE

##       upper lower
## 2.5% 76.595 194.3

Bootstrap CIs for a mean

Stat 343

12/7/2021

Dimes example

Bootstrap distribution

SE method

Percentile method

Boostrap t method

Lifetime

Bootstrap distribution

SE method

Percentile method

Boostrap t method