oilSpills <- xlsx_cells('oilSpillsUS.xlsx') %>%   #convert to cell format so that tidyxl and unpivotr work
  filter(row != 1) %>%        #first row is title of the table
  select(row, col, data_type, character, numeric)   #select the columns containing data and its location

oilSpills <- oilSpills %>% 
  behead("up-left", Year) %>%  #behead Year
  filter(row < 14)  %>%   #last rows are metadata so filter them out
  behead("up", Value) %>%  #behead Incidents and Gallons spilled
  behead('left', Source) %>%  #behead Source
  pivot_wider(names_from = Value, values_from = numeric) #pivot wider so there is a column for Incidents and Gallons spilled

oilSpills <- oilSpills %>% 
 select(Year, Source, Incidents, `Gallons spilled`) %>% 
 group_by(Year, Source) %>% 
 summarise_all(list(~trimws(paste(., collapse = '')))) %>% #merge rows together
 ungroup()

oilSpills$Incidents <- gsub('.{2}$', '', oilSpills$Incidents) %>%  #trim NAs
  as.numeric() %>% #convert string to double 
  suppressWarnings()  #suppress warnings, there should be some NAs
oilSpills$`Gallons spilled` <- gsub('^[A-Za-z]{2}', '', oilSpills$`Gallons spilled`) %>% #trim NAs
  as.numeric() %>% #convert string to double
   suppressWarnings()   #suppress warnings, there should be some NAs

Data

The data came from the Bureau of Transportation Statistics. Only the pollution incidents where the Coast Guard investigated as the lead agency are included in this dataset. Data on spills where the Environmental Protection Agency or any of the state authorities are the lead agency is not included. These are mostly offshore spills as the EPA usually handles inshore spills. These statistics cover yearly gallons spilled, number of incidents, and source type from 1985, 1990, 1995-2020.

Vessel, Non-Vessel, and Mystery Sources

Gallons Spilled

oilSpills %>% filter(Source == "Vessel sources, total" | 
                     Source == "Nonvessel sources, total"|
                     Source == "Mysteryc") %>%
  plot_ly(x = ~Year, y =~`Gallons spilled`, color = ~Source, type="bar", colors = 
            c("#a6cee3", "#1f78b4", "#b2df8a"),
           width = 1000) %>%
  layout(barmode = 'dodge', yaxis = list(fixedrange = FALSE)) 

“On April 20, 2010, the oil drilling rig Deepwater Horizon, operating in the Macondo Prospect in the Gulf of Mexico, exploded and sank resulting in the death of 11 workers on the Deepwater Horizon and the largest spill of oil in the history of marine oil drilling operations.”

Incidents

oilSpills %>% filter(Source == "Vessel sources, total"| 
                     Source == "Nonvessel sources, total"|
                     Source == "Mysteryc")  %>%
  drop_na() %>%
  plot_ly(width = 1000) %>%
  add_trace(x = ~Year, y = ~Incidents, type = 'scatter', color =~Source, mode = 'lines+markers', 
            colors = c("#a6cee3", "#1f78b4", "#b2df8a"))

Source, Incidents, and Gallons Spilled

y2 <- list(
  overlaying = "y",
  side = "right",
  title = "Number of Incidents",
  automargin = T)

oilSpills %>% filter(Source != "TOTAL all spills", 
                     Source != "Vessel sources, total", 
                     Source != "Nonvessel sources, total")  %>%
  drop_na() %>%
  mutate(Source = fct_reorder(Source, desc(`Gallons spilled`))) %>%
  plot_ly(x = ~Year, y =~`Gallons spilled`, color = ~Source, type="bar", colors = "Accent",
          width = 1000, height = 500) %>%
  group_by(Year) %>%
  summarise(n = sum(Incidents)) %>%
  add_trace(x = ~Year, y = ~n, type = 'scatter',  mode = 'markers', 
            name = "Incidents", yaxis = "y2", color = I("black")) %>%
  layout(barmode = 'stack', yaxis2 = y2,
         yaxis = list(fixedrange = FALSE))

References

Reflections