The US Energy Information Administration (EIA) provides a wealth of data for energy markets. This update focuses on their regularly-updated oil and gas data sources including:

  • Spot Prices and Some Futures Market Info
  • Weekly Petroleum Status Report
  • Weekly Gas Storage Report
  • Oil and gas production updates
    • Monthly Production Data by Region
    • Drilling Productivity Reports
    • Shale Gas and Light Tight Oil Production Reports
  • Short Term Energy Outlook
  • Annual Energy Outlook

For each section, I’ve included R code to access the data as well as links to the web interface for the data. The R code relies on stadard Tidyverse packages as well as a couple of custom functions and an API key. You should be able to run this with the code provided once you get your own key here, and you’ll have to store it in a variable called KEY to get the code to work.

If you’re going to run the R code, you’ll need a few basic set-up elements to get everything to work. I’ve included the code here for your reference.

#packages used
library(RColorBrewer)
library(viridis)
library(scales) 
library(pdfetch)
library(tidyverse)
library(lubridate)
library(reshape2)
library(knitr)
library(prettydoc)
library(xml2)
library(zoo)
library(openxlsx)
library(readxl)

data_fetch<-function(key, cat){
  #key<-KEY
  #cat=476336
  ifelse(cat==999999999,
         url <- paste("https://api.eia.gov/category/?api_key=",
                      key, "&out=xml", sep="" ),
         url <- paste("https://api.eia.gov/category/?api_key=",
                      key, "&category_id=", cat, "&out=xml", sep="" )
  )
  
  #http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=476336
  #url <- paste("https://api.eia.gov/category?api_key=",
  #             key, "&category_id=", cat, "&out=xml", sep="" )
  #https://api.eia.gov/category/?api_key=91b4dca0b858df64a2279d82f71af240&category_id=476336&out=xml
  #https://api.eia.gov/category?api_key=91b4dca0b858df64a2279d82f71af240&category_id=476336&out=xml
  
  x <- read_xml(url)
  doc <- XML::xmlParse(file=x)
  
  
  Parent_Category <- tryCatch(XML::xmlToDataFrame(,stringsAsFactors = F,nodes =
                                               XML::getNodeSet(doc, "//category/parent_category_id")),
                              warning=function(w) FALSE, error=function(w) FALSE)
  Sub_Categories <- XML::xmlToDataFrame(,stringsAsFactors = F,nodes =
                                     XML::getNodeSet(doc, "//childcategories/row"))
  Series_IDs <- XML::xmlToDataFrame(nodes =
                                 XML::getNodeSet(doc, "///childseries/row"),stringsAsFactors = F)
  Categories <- list(Parent_Category, Sub_Categories, Series_IDs)
  names(Categories) <- c("Parent_Category", "Sub_Categories", "Series_IDs")
  Categories
}

 get_children<-function(category_id=476336){
   subs<-data_fetch(KEY,cat=category_id)
   sub_cats<-subs$Sub_Categories
   #build list from sub_cats
   cat_store <- list()
   cat_count<-1
   for (cat in sub_cats$category_id) {
     #cat<-sub_cats$category_id[1]
     series<-data_fetch(KEY,cat=cat)
     cat_store[[cat_count]]<-series$Series_IDs
     cat_count<-cat_count+1
   }
   data.frame(do.call(rbind,cat_store))
 }
 #get_children()
 
 get_series<-function(category_id=476336){
   #series,name,f,units,updated
   subs<-data_fetch(KEY,cat=category_id)
   subs$Series_IDs
 }
 #get_series()
 
 pd_fix<-function(data,name){
   data<-data.frame(date=index(data), coredata(data))
   data$date<-ymd(data$date)
   data <- setNames(data, c("date",name)) 
 }
 
 EIA_to_DF<-function(series_info){
   data<- pdfetch_EIA(series_info$series_id,KEY)
   pd_fix(data,series_info$name)
   }

#Commodity Prices

Both the EIA data interface here or the API here allow you to access benchmark commodity prices we’ll talk a lot about in the first couple of classes. For most of what we do with prices, we’ll use data from the Bloomberg terminal but the EIA prices are a useful source of information that you can access from anywhere.

The first step is to access the data (click on the code button to see how to do things in R if you’re interested).

#don't need to cache this long-term - it's daily data
#GET EIA NYMEX SPOTS 
subs<-data_fetch(KEY,cat=241335)
series<-t(subs$Series_IDs$series_id)
series<- as.character(series)
names<-t(subs$Series_IDs$name)

subs<-data_fetch(KEY,cat=241347)
series<-c(series,t(subs$Series_IDs$series_id))
series<- as.character(series)
names<-c(names,t(subs$Series_IDs$name))


nymex_data<- pdfetch_EIA(series,KEY)
nymex_data <- setNames(nymex_data, names)
nymex_data<-data.frame(date=index(nymex_data), coredata(nymex_data),stringsAsFactors = F)
nymex_data$date<-as.Date(nymex_data$date,format = "%m/%d/%Y")

annual_crude<-cbind(nymex_data$date,nymex_data[,grep("Annual", colnames(nymex_data))])  #all annual columns
annual_crude<-na.omit(annual_crude)
names(annual_crude)[1]<-paste("Date")  

monthly_crude<-cbind(nymex_data$date,nymex_data[,grep("Monthly", colnames(nymex_data))])  #all monthly columns
monthly_crude<-na.omit(monthly_crude)
names(monthly_crude)[1]<-paste("Date")

weekly_crude<-cbind(nymex_data$date,nymex_data[,grep("Weekly", colnames(nymex_data))])  #all weekly_crude columns
#weekly_crude<-na.omit(weekly_crude)
names(weekly_crude)[1]<-paste("Date")
weekly_crude<-subset(weekly_crude, Date > as.Date("2007-01-01"))
weekly_crude<-na.omit(weekly_crude)

daily_crude<-cbind(nymex_data$date,nymex_data[,grep("Daily", colnames(nymex_data))])  #all weekly_crude columns
#weekly_crude<-na.omit(weekly_crude)
names(daily_crude)[1]<-paste("Date")

daily_crude$WTI_Brent_diff<- daily_crude$Europe.Brent.Spot.Price.FOB..Daily-daily_crude$Cushing..OK.WTI.Spot.Price.FOB..Daily

Then, we set up some graphing preliminaries. Again, R code provided for your interest.

#set up graph format
weekly_graphs<-function(caption_align=1){
  theme_minimal()+theme(
    plot.margin = margin(.25, .75, .25, .75, "cm"),
    legend.position = "bottom",
    legend.margin=margin(c(0,0,0,0),unit="cm"),
    legend.text = element_text(colour="black", size = 14, face = "bold"),
    plot.caption = element_text(size = 14, face = "italic",hjust=caption_align),
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(size = 16, face = "italic"),
    panel.grid.minor = element_blank(),
    text = element_text(size = 14,face = "bold"),
    axis.title.x = element_text(size = 14,face = "bold", colour="black",margin = margin(t = 15, b = 0)),
    axis.title.y = element_text(size = 14,face = "bold", colour="black",margin = margin(r = 10)),
    axis.text = element_text(size = 14,face = "bold", colour="black",margin = margin(t = 10, b = 10)),
  )
}

weekly_small<-function(caption_align=1){
  theme_minimal()+theme(
    plot.margin = margin(.25, .75, .25, .75, "cm"),
    legend.position = "bottom",
    legend.margin=margin(c(0,0,0,0),unit="cm"),
    legend.text = element_text(colour="black", size = 9),
    plot.caption = element_text(size = 11, face = "italic",hjust=caption_align),
    plot.title = element_text(size = 16,face = "bold"),
    plot.subtitle = element_text(size = 14, face = "italic"),
    panel.grid.minor = element_blank(),
    text = element_text(size = 11,face = "bold"),
    axis.title.x = element_text(size = 11,face = "bold", colour="black",margin = margin(t = 15, b = 0)),
    axis.text = element_text(size = 11,face = "bold", colour="black",margin = margin(t = 10, b = 10)),
  )
}


#define levels chart design function
levels_chart<-function(data_sent,names,name_labels,years,title_sent="Benchmark Oil Prices",break_set="12 months",y_lab="Spot Prices ($US/bbl)")
{
  #send a data set and variable names
  #testing
    #data_sent<-daily_crude
    #names<-"Europe.Brent.Spot.Price.FOB..Daily"
    #name_labels<-"Brent"
    #years<-5   
    #break_set<-"12 months"
    #title_sent<-"Benchmark Oil Prices"
  rows_legend<-max(1,round(sum(nchar(names))/60))
  data_sent<-filter(data_sent,Date>=max(Date)-years(years))
  
  df1<-melt(data_sent,id=c("Date"),measure.vars = names)
  df1<-na.omit(df1)
  lims<-c(max(df1$Date)-years(years),max(df1$Date)+months(1))
  lims_y<-c(min(0,min(df1$value)-5),max(df1$value)+10)
  
  p<-ggplot(df1) +
    geom_line(data=filter(df1,variable!="diff"),aes(Date,value,group = variable,colour=variable),size=1.7) +
    #geom_point(size=1) +
    scale_colour_manual(NULL,values=colors_tableau10(), labels=name_labels)+
    scale_x_date(name=NULL,date_breaks = break_set, date_labels =  "%b\n%Y",limits=lims,expand=c(0,0)) +
    scale_y_continuous(expand = c(0, 0),limits=lims_y,breaks=c(-10,seq(0,lims_y[2],10))) +
    guides(colour=guide_legend(nrow=rows_legend))+
    labs(y=y_lab,x="Date",
         title=title_sent,
         caption="Data via EIA API")+
    weekly_graphs()
  p
}

Now we can make the graphs we need.

names<-c("Cushing..OK.WTI.Spot.Price.FOB..Daily","Europe.Brent.Spot.Price.FOB..Daily")
name_labels<-c("WTI (Cushing)", "Brent (Europe)")
years<-5   
break_set<-"12 months"
title_sent<-"Benchmark Oil Prices"
WTI_Brent<-levels_chart(daily_crude,names,name_labels,years,title_sent="North American and European Benchmark Oil Prices",break_set="12 months")

For example, if you access the data, you’ll see that the most recent prices for WTI and Brent are $US 79.86/bbl and $US 85.25/bbl respectively, based on the January 24, 2023 update. The most recent 5 years of data for WTI and Brent prices are shown below.

The EIA data library also provides a lot of other prices and we can use those to produce some of the other types of graphs we use in the class. For example, if you’re interested in refining margins, you can look at New York Harbour and Gulf Coast 3:2:1 crack spreads: the difference between the value of crude oil and the value of a refined product yield of 2/3 gasoline and 1/3 diesel.

# first, make the cracks
daily_crude<-daily_crude %>% mutate(
  gc_321=U.S..Gulf.Coast.Ultra.Low.Sulfur.No.2.Diesel.Spot.Price..Daily*42/3+ U.S..Gulf.Coast.Conventional.Gasoline.Regular.Spot.Price.FOB..Daily*42*2/3-Europe.Brent.Spot.Price.FOB..Daily,
  nyh_321=New.York.Harbor.Ultra.Low.Sulfur.No.2.Diesel.Spot.Price..Daily*42/3+New.York.Harbor.Conventional.Gasoline.Regular.Spot.Price.FOB..Daily*42*2/3 -Cushing..OK.WTI.Spot.Price.FOB..Daily
)

names<-c("gc_321","nyh_321")
name_labels<-c("Gulf Coast 3:2:1 Crack to Brent","NY Harbour 3:2:1 Crack to WTI")
years<-5   
break_set<-"12 months"
title<-"3:2:1 Crack Spreads"
y_label<-"3:2:1 Crack Spread ($/bbl)"

levels_chart(daily_crude,names,name_labels,years = years,title_sent = title,y_lab = y_label)

On the graph above, the last points imply that the gross profit margin from refining a barrel purchased at Brent prices on the Gulf Coast would be $US 38.51/bbl while a barrel purchased at WTI prices with refined products sold at NY Harbour prices would yield $US 38.51/bbl in gross profits, based on January 24, 2023 prices.

Weekly Petroleum Status Report

The EIA Weekly Petroleum Status Report is one of the most-watched data releases in energy markets. The report is released at 10:30 a.m. (Eastern time) on Wednesdays every week, with publication of the data to the API after 1:00 p.m. (Eastern Time) on Wednesday. You can see the full Petroleum Status Report here or you can access via the EIA API starting from here. For most of what follows, I’ll be using three data series: the full US weekly supply here and the Cushing oil stocks data here. You can download the data any way you like, but for this purpose, I’m grabbing the data with R as follows:

#first, build a list of series names we want to access
#get the information from the EIA weekly supply API
cat_store <- list()
name_store<-list()

subs<-data_fetch(KEY,cat=235081)
#take out the 4 week average data
series_ids<-subs$Series_IDs$series_id[-grep("\\b.4\\b",subs$Series_IDs$series_id)]
#make sure series ids aren't stored as factors
series_ids<- as.character(series_ids)
#same for names
names<-t(subs$Series_IDs$name[-grep("\\b.4\\b",subs$Series_IDs$series_id)])
cat_store[[1]]<-series_ids
name_store[[1]]<-names


#get the Cushing series ids and names
subs<-data_fetch(KEY,cat=235418)
series_ids<-subs$Series_IDs$series_id[]
names<-t(subs$Series_IDs$name[])
cat_store[[2]]<-series_ids
name_store[[2]]<-names

#get the gas storage series and IDs
subs<-data_fetch(KEY,cat=1709237)
series_ids<-subs$Series_IDs$series_id[]
names<-t(subs$Series_IDs$name[])
cat_store[[3]]<-series_ids
name_store[[3]]<-names

#stack all of them
series<-data.frame(do.call(c,cat_store))
name_list<-do.call(c,name_store)

#fetchez le data
all_data<- pdfetch_EIA(t(series),KEY)
all_data<-data.frame(date=index(all_data), coredata(all_data))
#fix the data names list
name_list<- gsub("\\.", " ", name_list)
name_list<- gsub("\\U S ", "US", name_list)
name_list<- gsub("  ", " ", name_list)
name_list<- gsub(", Weekly", "", name_list)
name_list<- gsub("US ", "", name_list)
name_list<- gsub("U S ", "", name_list)
name_list<- gsub("^\\s+|\\s+$", "",name_list)
#assign names
all_data <- setNames(all_data,c("date",name_list))
#build week and year indicators
all_data<-all_data%>% mutate(
week=week(date),
month=month(date),
year=year(date)
)

The weekly petroleum supply information is very detailed, and you’ll see markets move in response to its release. The most important series traders will watch are production, inventories, and trade data. I set up a basic graph to allow me to quickly compare the current release data with previous years and the trailing 5 year range. The graph code is provided under the Code icon for your reference.

ribbon_graph<-function(data_sent,series_name,unit_sent,years,alt_axis=FALSE){
#melt the data into long form
df1<-melt(data_sent,id=c("date","week","month","year"),measure.vars = series_name)
#remove NAs
df1<-na.omit(df1)
#truncate to 52 weeks for the years where there is a 53rd week.
df1$week[df1$week==53]<-52

#create range data
max_year<-max(df1$year)

#first, we want the ones we are going to graph - this year and last year
df.years <- df1 %>% 
  filter(year >= max_year-1)

#now we want the ones for the range
df.year.range <- df1 %>% 
  filter(year >= max_year - years & year < max_year) %>% 
  group_by(week) %>% 
  summarize(mean = mean(value), min = min(value), max = max(value))

y_lab<-paste(alt_axis,"\n(",unit_sent,")",sep="")
if(alt_axis==FALSE)
  y_lab<-paste(series_name,"\n(",unit_sent,")",sep="")

#We can then trick ggplot into printing a nice title for the fill on the legend, by setting fill inside aes to the intended string. Because fill is set in aes(), we control its color with scale_fill_manual.
graph<- ggplot() +
  geom_ribbon(data = df.year.range, aes(x = week, ymin = min, ymax = max, fill =  "fill1")) +
  geom_line(data = df.year.range, aes(x = week, y = mean,colour="3"),size=1.7) +
  geom_line(data = subset(df.years,year==max_year-1), aes(x = week, y = value, color = "2"),size=1.7)+ 
  geom_line(data = subset(df.years,year==max_year), aes(x = week, y = value, color = "1"),size=1.7) +
  geom_point(data = subset(df.years,year==max_year), aes(x = week, y = value, color = "1"),size=1.7) +
  #scale_fill_manual(values = '#ffccff')
  scale_fill_manual("",values = 'grey70',labels=paste(max_year-years,"-",max_year-1,"\nrange",sep =""))+     
  scale_color_viridis("",discrete=TRUE,option="C",labels=c(max_year, max_year-1,paste(max_year-years,"-",max_year-1,"\naverage",sep ="")))+
  #scale_fill_continuous()+
  scale_x_continuous(limits=c(min(df1$week),max(df1$week)),expand=c(.001,0))+
  theme_minimal()+
  theme(
    legend.position = "bottom",
    legend.margin=margin(c(0,0,0,0),unit="cm"),
    legend.text = element_text(colour="black", size = 12),
    plot.caption = element_text(size = 14, face = "italic"),
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(size = 14, face = "italic"),
    #panel.grid.minor = element_blank(),
    text = element_text(size = 14,face = "bold"),
    axis.line = element_line(color="black", size = 1),
    axis.text.y =element_text(size = 14,face = "bold", colour="black"),
    axis.text.x=element_text(size = 14,face = "bold", colour="black",angle=90, hjust=1),
  )+
  labs(y=y_lab,x="Week",
       title=paste(series_name,sep=""),
       caption="Source: EIA API, graph by Andrew Leach.")
print(graph)
}

Crude Production

Certainly, the most-watched element in the data most weeks will be the US crude production data. The below shows the previous 5 year range of production as well as the previous 5 year average, current year, and previous year’s time series.

Crude Trade

Over the past decade, the US has moved from a major crude oil importer to a major crude oil exporter. Net imports have also decreased. Below, I’ve included both the total imports and exports as well as net imports data, with 10 year ranges and averages, as well as the current year, and previous year’s time series.

Crude Imports

Displays the most recent crude import data and historic ranges.

Crude Exports

Displays the most recent crude export data and historic ranges.

Net Imports

Displays the most recent crude net imports data and historic ranges.

Total Products Trade

Refined products like gasoline, diesel and jet fuel are traded just as crude oil is, and the US trade picture has also been changing a lot on the products front.

Products Imports

Products Exports

Net Imports of Crude Oil and Petroleum Products

Below, I’ve also included some key graphs for gasoline and diesel.

Gasoline and Diesel Trade

Each of the graphs shows the 5 year range of data as well as the 5 year average, current year, and previous year’s time series for the key refined products we’ll talk about in class.

Gasoline Imports

Gasoline Exports

Diesel Imports

Diesel Exports

Jet Imports

Jet Exports

Crude and Products Storage

The other markers that people will pay a lot of attention to in the Weekly Petroleum Status Report are storage levels for crude oil and refined products. Inventory changes tell us how quantities supplied and imported related to quantities consumed and exported, and also tell us whether the physical side of the market is looking bullish or bearish. As with the previous graphs, each of the graphs below shows the 5 year range of data as well as the 5 year average, current year, and previous year’s time series, but in this case the axis labels are in total storage levels.

Crude

Cushing Crude

Strategic Petroleum Reserve (SPR)

Gasoline

Distillate

Refining

The Weekly Petroleum Status Report also contains significant information on refining. A small sample of these are shown below. The Operable Distillation Capacity tells you what’s available each week in terms of conventional refining. We don’t have weekly operable capacity data on more complex units post-distillation, but the EIA does track these on a longer-term basis. We also have weekly data on inputs of crude oil to refineries as well as total production.

Operable Distillation Capacity

Gross Inputs to Refineries

Net Production of Gasoline

Net Production of Diesel

Weekly Natural Gas Storage Report

The weekly gas storage report is available here or via the API here. I’ve included it in my code above, so there’s no separate data pull for this section, and the graphs rely on the same code as above. Natural gas storage and use is much more seasonal than oil, so you can really see the inventory builds following a pattern that you don’t see in oil markets to the same degree. As above, each of the graphs I’ve provided below shows the 5 year range of data as well as the 5 year average, current year, and previous year’s time series.

Lower 48 States Natural Gas Working Underground Storage

East Region Natural Gas Working Underground Storage

Oil and gas production updates

The EIA also provides regular updates on production of light tight oil and shale gas as well as other play- or resource-specific data. There are two sources of data which I use, so I’m sharing code for them here: the Drilling Productivity Report and the

Monthly Production Updates by State and/or Area

The key longer-term data series that the EIA provides on oil and gas production are also found either on their website here for oil and gas. I’ll use the API here for oil production by state, here for oil production by API gravity, and here for gas. As above, you can download the data any way you like, but for this purpose, I’m again grabbing and cleaning the data with R as follows:

#first, build a list of series names we want to access
#get the information from the EIA weekly supply API
cat_store <- list()
name_store<-list()
unit_store<-list()

#monthly crude production by Area
subs<-data_fetch(KEY,cat=296686)
#take out the 4 week average data
series_ids<-subs$Series_IDs$series_id[grep("\\.M\\b",subs$Series_IDs$series_id)]
#make sure series ids aren't stored as factors
series_ids<- as.character(series_ids)
#same for names
names<-t(subs$Series_IDs$name[grep("\\.M\\b",subs$Series_IDs$series_id)])
units<-subs$Series_IDs$units[grep("\\.M\\b",subs$Series_IDs$series_id)]

cat_store[[1]]<-series_ids
name_store[[1]]<-names
unit_store[[1]]<-units


#get the natural gas production data
subs<-data_fetch(KEY,cat=474572)
series_ids<-subs$Series_IDs$series_id[grep("\\.M\\b",subs$Series_IDs$series_id)]
#make sure series ids aren't stored as factors
series_ids<- as.character(series_ids)
#same for names
names<-t(subs$Series_IDs$name[grep("\\.M\\b",subs$Series_IDs$series_id)])
units<-subs$Series_IDs$units[grep("\\.M\\b",subs$Series_IDs$series_id)]

cat_store[[2]]<-series_ids
name_store[[2]]<-names
unit_store[[2]]<-units
#get crude production by API gravity
subs<-data_fetch(KEY,cat=2477496)
series_ids<-subs$Series_IDs$series_id[grep("\\.M\\b",subs$Series_IDs$series_id)]
units<-subs$Series_IDs$units[grep("\\.M\\b",subs$Series_IDs$series_id)]

#make sure series ids aren't stored as factors
series_ids<- as.character(series_ids)
#same for names
names<-t(subs$Series_IDs$name[grep("\\.M\\b",subs$Series_IDs$series_id)])
cat_store[[3]]<-series_ids
name_store[[3]]<-names
unit_store[[3]]<-units
#stack all of them
series<-data.frame(do.call(c,cat_store))
name_list<-do.call(c,name_store)
units<-do.call(c,unit_store)
#fetchez le data
production_data<- pdfetch_EIA(t(series),KEY)
production_data<-data.frame(date=index(production_data), coredata(production_data))
#store units
attributes(production_data)$units<-c("Date",units)
#fix the data names list
name_list<- gsub("\\.", " ", name_list)
name_list<- gsub("\\U S ", "US", name_list)
name_list<- gsub("  ", " ", name_list)
name_list<- gsub(", Monthly", "", name_list)
name_list<- gsub("US ", "", name_list)
name_list<- gsub("U S ", "", name_list)
name_list<- gsub("^\\s+|\\s+$", "",name_list)
#assign names
production_data <- setNames(production_data,c("Date",name_list))

full_names<-paste(name_list,units,sep=", ")
production_data <- setNames(production_data,c("Date",full_names))

#build week and year indicators
production_data<-production_data%>% mutate(
week=week(Date),
month=month(Date),
year=year(Date)
)

These data are super-rich and far exceed the level of detail we’re going to need in the class. However, one thing we will talk a lot about is US regional crude production so let’s use this as an example for these data. First, as above, we need some graphing preliminaries

#define production chart design function
#default will be oil
oil_production_chart<-function(data_sent,names,name_labels,years,title_sent="Benchmark Oil Prices",break_set="12 months",y_lab="Production (Thousands of Barrels Per Day)")
{
  #send a data set and variable names
  #testing
    #data_sent<-daily_crude
    #names<-"Europe.Brent.Spot.Price.FOB..Daily"
    #name_labels<-"Brent"
    #years<-5   
    #break_set<-"12 months"
    #title_sent<-"Benchmark Oil Prices"
  rows_legend<-max(1,round(sum(nchar(names))/180))
  data_sent<-filter(data_sent,Date>=max(Date)-years(years))
  df1<-melt(data_sent,id=c("Date"),measure.vars = names)
  df1<-na.omit(df1)
  lims<-c(max(df1$Date)-years(years),max(df1$Date)+months(1))
  lims_y<-c(0,max(df1$value)*1.1)
  
  p<-ggplot(df1) +
    geom_line(data=filter(df1,variable!="diff"),aes(Date,value,group = variable,colour=variable),size=1.7) +
    #geom_point(size=1) +
    scale_colour_manual(NULL,values=colors_tableau10(), labels=name_labels)+
    scale_x_date(name=NULL,date_breaks = break_set, date_labels =  "%b\n%Y",limits=lims,expand=c(0,0)) +
    scale_y_continuous(expand = c(0,0),limits=lims_y) +
    guides(colour=guide_legend(nrow=rows_legend))+
    labs(y=y_lab,x="Date",
         title=title_sent,
         caption="Data via EIA API")+
    weekly_small()
  p
}

Now we can make the graphs we need. Start with a basic graph of US oil production.

names<-c("Field Production of Crude Oil, Thousand Barrels per Day")
name_labels<-c("US Crude Oil Production")
years<-10   
break_set<-"12 months"
title_sent<-"U.S. Field Production of Crude Oil"
oil_production_chart(production_data,names,name_labels,years,title_sent=title_sent,break_set=break_set)

As mentioned above, regional production of crude oil is very important for Canada, as much of the growth in US production has occurred either in or closer to markets that we traditionally serve with our exports to the US.

names<-c("East Coast (PADD 1) Field Production of Crude Oil, Thousand Barrels per Day","Midwest (PADD 2) Field Production of Crude Oil, Thousand Barrels per Day","Gulf Coast (PADD 3) Field Production of Crude Oil, Thousand Barrels per Day","Rocky Mountain (PADD 4) Field Production of Crude Oil, Thousand Barrels per Day","West Coast (PADD 5) Field Production of Crude Oil, Thousand Barrels per Day")
name_labels<-c("East Coast (PADD 1)","Midwest (PADD 2)","Gulf Coast (PADD 3)","Rocky Mountain (PADD 4)","West Coast (PADD 5)")

years<-10   
break_set<-"12 months"
title_sent<-"U.S. Field Production of Crude Oil by PADD"
oil_production_chart(production_data,names,name_labels,years,title_sent=title_sent,break_set=break_set)

There are many other sources of data in the reports, but this is enough of a sense of what’s possible for these purposes.

Drilling Productivity Reports

The US EIA also produces the Drilling Productivity Report (DPR) which provides a select set of data by region for oil and shale gas production. The DPR is accessible on their website here or you can access the Excel spreadsheet directly here. Again, the code link shows the means of downloading and processing the data via R.

dpr_file<-"https://www.eia.gov/petroleum/drilling/xls/dpr-data.xlsx"

download.file(dpr_file,"dpr-data.xlsx",mode="wb")
plays<-getSheetNames("dpr-data.xlsx")
plays<-plays[plays!="RegionCounties"]
wb <- loadWorkbook("dpr-data.xlsx")
cnames<-c("Month","Rigcount",   "Oil_Production_per_rig",   "Oil_Legacy_prod_chg",  "Total_oil_prod","Gas_Production_per_rig",  "Gas_Legacy_prod_chg",  "Total_gas_prod")


dpr_data<-data.frame()
for(play in plays){
  play_data <- read.xlsx(xlsxFile = "dpr-data.xlsx", sheet = play, startRow = 2,skipEmptyRows = TRUE,detectDates = TRUE)
  names(play_data)<-cnames
  play_data$play<-play
  dpr_data<-rbind(dpr_data,play_data)
}


dpr_data<-melt(dpr_data,id=c("Month","Total_oil_prod","Total_gas_prod"),measure.vars = c("play"),value.name = "Play")
dpr_data$variable<-NULL
dpr_data<-na.omit(dpr_data)
dpr_data$Play<-as.factor(dpr_data$Play)

There’s a ton of information in the DPR as well, but a quick snapshot here shows you production for shale gas and light tight oil by play based on the code included below.

lto_dpr<-ggplot(dpr_data,aes(Month,Total_oil_prod/1000,group = Play,colour=Play,fill=Play)) +
  geom_area(position = "stack") +
  #geom_point(size=1) +
  scale_color_viridis("",discrete=TRUE,option="D")+
  scale_fill_viridis("",discrete=TRUE,option="D")+   
  #scale_colour_brewer(NULL,labels=c("Gasoline Exports","Gasoline Imports","Net Gasoline Exports"),type = "seq", palette = "Paired", direction = 1)+
  #scale_x_date(date_breaks = "1 year", date_labels =  "%Y",limits=c(max(as.Date("2000-01-01"),min(df1$date)),Sys.Date()),expand=c(0,0)) +
  #scale_colour_manual(labels=c("Gasoline Exports","Gasoline Imports","Net Gasoline Exports",values=c("#41ae76","#238b45","#006d2c","#00441b","Black","Black","Black","Black"))+
  #scale_y_continuous(limits=c(min(df1$value),max(df1$value)),expand=c(0,0))+
  #scale_y_continuous(limits=c(min(df1$value),max(df1$value)),expand=c(0,0))+
  theme_minimal()+theme(
    legend.position = "bottom",
    legend.margin=margin(c(0,0,0,0),unit="cm"),
    legend.text = element_text(colour="black", size = 12),
    plot.caption = element_text(size = 14, face = "italic"),
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(size = 14, face = "italic"),
    #panel.grid.minor = element_blank(),
    text = element_text(size = 14,face = "bold"),
    axis.text.y =element_text(size = 14,face = "bold", colour="black"),
    axis.text.x=element_text(size = 14,face = "bold", colour="black",angle=90, hjust=1),
  )+
  labs(y="Crude Oil Production by Play \n(Monthly, Thousands of Barrels per Day)",x="Week",
       title=paste("US Production of Tight Oil",sep=""),
       caption="Source: EIA DPR, graph by Andrew Leach.")

lto_no_permian<-ggplot(subset(dpr_data,Play != "Permian Region"),aes(Month,Total_oil_prod/1000,group = Play,colour=Play,fill=Play)) +
  geom_area(position = "stack") +
  #geom_point(size=1) +
  scale_color_viridis("",discrete=TRUE,option="D")+
  scale_fill_viridis("",discrete=TRUE,option="D")+   
  #scale_colour_brewer(NULL,labels=c("Gasoline Exports","Gasoline Imports","Net Gasoline Exports"),type = "seq", palette = "Paired", direction = 1)+
  #scale_x_date(date_breaks = "1 year", date_labels =  "%Y",limits=c(max(as.Date("2000-01-01"),min(df1$date)),Sys.Date()),expand=c(0,0)) +
  #scale_colour_manual(labels=c("Gasoline Exports","Gasoline Imports","Net Gasoline Exports",values=c("#41ae76","#238b45","#006d2c","#00441b","Black","Black","Black","Black"))+
  #scale_y_continuous(limits=c(min(df1$value),max(df1$value)),expand=c(0,0))+
  #scale_y_continuous(limits=c(min(df1$value),max(df1$value)),expand=c(0,0))+
  theme_minimal()+theme(
    legend.position = "bottom",
    legend.margin=margin(c(0,0,0,0),unit="cm"),
    legend.text = element_text(colour="black", size = 12),
    plot.caption = element_text(size = 14, face = "italic"),
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(size = 14, face = "italic"),
    #panel.grid.minor = element_blank(),
    text = element_text(size = 14,face = "bold"),
    axis.text.y =element_text(size = 14,face = "bold", colour="black"),
    axis.text.x=element_text(size = 14,face = "bold", colour="black",angle=90, hjust=1),
  )+
  labs(y="Crude Oil Production by Play \n(Monthly, Thousands of Barrels per Day)",x="Week",
       title=paste("US Production of Tight Oil ex Permian Basin",sep=""),
       caption="Source: EIA DPR, graph by Andrew Leach.")

gas_dpr<-ggplot(dpr_data,aes(Month,Total_gas_prod/10^6,group = Play,colour=Play,fill=Play)) +
  geom_area(position = "stack") +
  #geom_point(size=1) +
  scale_color_viridis("",discrete=TRUE,option="D")+
  scale_fill_viridis("",discrete=TRUE,option="D")+   
  theme_minimal()+theme(
    legend.position = "bottom",
    legend.margin=margin(c(0,0,0,0),unit="cm"),
    legend.text = element_text(colour="black", size = 12),
    plot.caption = element_text(size = 14, face = "italic"),
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(size = 14, face = "italic"),
    #panel.grid.minor = element_blank(),
    text = element_text(size = 14,face = "bold"),
    axis.text.y =element_text(size = 14,face = "bold", colour="black"),
    axis.text.x=element_text(size = 14,face = "bold", colour="black",angle=90, hjust=1),
  )+
  labs(y="Shale and Tight Gas Production by Play \n(Monthly, Billion Cubic Feet per Day)",x="Week",
       title=paste("US Production of Tight and Shale Gas",sep=""),
       caption="Source: EIA DPR, graph by Andrew Leach.")

Gas Production by Play

Oil Production by Play

Shale Gas and Light Tight Oil Production Reports

Finally for current data, the US EIA also produces light tight oil spreadsheet with data by play which is slightly different than the DPR data - it’s more disaggregated. You can access the Excel spreadsheet directly here for oil and here for gas. Again, the code link shows the means of downloading and processing the data via R.

oil_prod_file<-"https://www.eia.gov/energyexplained/oil-and-petroleum-products/data/US-tight-oil-production.xlsx"

download.file(oil_prod_file,"play-data.xlsx",mode="wb")
#plays<-getSheetNames("play-data.xlsx")
#plays<-plays[plays!="RegionCounties"]
play_data <- read.xlsx(xlsxFile = "play-data.xlsx", sheet = "data", startRow = 1,skipEmptyRows = TRUE,detectDates = TRUE)
play_data$X17<-NULL
play_data$X12<-NULL
play_data$Date<-as.Date(play_data$Date,origin = "1899-12-30")
df1<-melt(play_data,id=c("Date"),variable.name = "play",value.name = "volume" )
df1$play<-gsub("\\.", " ", df1$play)
df1$play<-gsub("&N", " & N", df1$play)
df1$play<-gsub("  ", " ", df1$play)
df1$play<-gsub("  ", " ", df1$play)
df1$play<-gsub("Rest of US 'tight oil'","Other Tight Oil",df1$play)
df1$play<-factor(df1$play,levels = unique(df1$play))
df1$play<-fct_relevel(df1$play, "Other Tight Oil", after = Inf)

oil_play<-ggplot(df1,aes(Date,volume*1000,group = play,fill=play)) +
  geom_area(position = "stack") +
  scale_fill_viridis("",discrete=TRUE,option="D")+
  weekly_small()+guides(fill=guide_legend(nrow=3,byrow=TRUE))+
  labs(y="Tight Oil Production by Play \n(Monthly, Thousands of Barrels per Day)",x="Week",
       title=paste("US tight oil production estimates by play",sep=""),
       caption="Source: EIA data, graph by Andrew Leach.")
gas_prod_file<-"https://www.eia.gov/naturalgas/weekly/img/shale_gas_202211.xlsx"

download.file(gas_prod_file,"play-data.xlsx",mode="wb")
#plays<-getSheetNames("play-data.xlsx")
#plays<-plays[plays!="RegionCounties"]
play_data <- read.xlsx(xlsxFile = "play-data.xlsx", sheet = "data", startRow = 1,skipEmptyRows = TRUE,detectDates = TRUE)
play_data$X13<-NULL
play_data$X14<-NULL
play_data$Date<-as.Date(play_data$Date,origin = "1899-12-30")
df1<-melt(play_data,id=c("Date"),variable.name = "play",value.name = "volume" )
df1$play<-gsub("\\.", " ", df1$play)
df1$play<-gsub("&N", " & N", df1$play)
df1$play<-gsub("  ", " ", df1$play)
df1$play<-gsub("Rest of US 'shale'","Other Shale Plays",df1$play)
df1$play<-factor(df1$play,levels = unique(df1$play))
df1$play<-fct_relevel(df1$play, "Other Shale Plays", after = Inf)


gas_play<-ggplot(df1,aes(Date,volume,group = play,fill=play)) +
  geom_area(position = "stack") +
 geom_area(position = "stack") +
  scale_fill_viridis("",discrete=TRUE,option="D")+
  weekly_small()+guides(fill=guide_legend(nrow=3,byrow=TRUE))+
  labs(y="Shale Gas Production (Dry)\n(Monthly, Billion Cubic Feet per Day)",x="Week",
       title=paste("US shale gas production estimates by play",sep=""),
       caption="Source: EIA data, graph by Andrew Leach.")

Using the same basic graph code as for the DPR, we can see the results are comparable but with a little more disaggregation.

Gas Production by Play

Light Tight Oil Production by Play

#US Trade with Canada

The US is Canada’s most important trading partner and, as far as energy is concerned, as also becoming a significant competitor. These graphs show oil, refined product, and natural gas liquids trade between Canada and the US. The code snippets show you how to first download and stack the data and then how to build the generic trade graph.

Oil, Oil Products, and Natural Gas Liquids Trade

#this section grabs data for trade with Canada

cat_store <- list()
name_store<-list()
unit_store<-list()
#Imports from Canada
subs<-data_fetch(KEY,cat=340981)
series_ids<-subs$Series_IDs$series_id[grep("\\.M\\b",subs$Series_IDs$series_id)]#monthly data
#same for names
names<-t(subs$Series_IDs$name[grep("\\.M\\b",subs$Series_IDs$series_id)])
#and units
units<-subs$Series_IDs$units[grep("\\.M\\b",subs$Series_IDs$series_id)]
cat_store[[1]]<-series_ids
name_store[[1]]<-names
unit_store[[1]]<-units

#Exports to Canada
subs<-data_fetch(KEY,cat=316977)
series_ids<-subs$Series_IDs$series_id[grep("\\.M\\b",subs$Series_IDs$series_id)]
#same for names
names<-t(subs$Series_IDs$name[grep("\\.M\\b",subs$Series_IDs$series_id)])
#and units
units<-subs$Series_IDs$units[grep("\\.M\\b",subs$Series_IDs$series_id)]
cat_store[[2]]<-series_ids
name_store[[2]]<-names
unit_store[[2]]<-units


#net imports will be in 323934

#Net Imports to Canada
subs<-data_fetch(KEY,cat=323934)
series_ids<-subs$Series_IDs$series_id[grep("\\.M\\b",subs$Series_IDs$series_id)]
#same for names
names<-t(subs$Series_IDs$name[grep("\\.M\\b",subs$Series_IDs$series_id)])
#and units
units<-subs$Series_IDs$units[grep("\\.M\\b",subs$Series_IDs$series_id)]
cat_store[[3]]<-series_ids
name_store[[3]]<-names
unit_store[[3]]<-units



#stack all of them
series<-data.frame(do.call(c,cat_store))
name_list<-do.call(c,name_store)
units<-do.call(c,unit_store)

#fetchez le data
cda_trade_data<- pdfetch_EIA(t(series),KEY)
cda_trade_data<-data.frame(date=index(cda_trade_data), coredata(cda_trade_data))
#store units
attributes(cda_trade_data)$units<-c("Date",units)
#fix the data names list
name_list<- gsub("\\.", " ", name_list)
name_list<- gsub("  ", " ", name_list) #take out extrac spaces
name_list<- gsub(", Monthly", "", name_list) #take out monthly label
name_list<- gsub("U.S. ", "", name_list)
name_list<- gsub("U S ", "", name_list)
name_list<- gsub("^\\s+|\\s+$", "",name_list)

#assign names
cda_trade_data <- setNames(cda_trade_data,c("Date",name_list))

full_names<-paste(name_list,units,sep=", ")
cda_trade_data <- setNames(cda_trade_data,c("Date",full_names))

#build week and year indicators
cda_trade_data<-cda_trade_data%>% mutate(
week=week(Date),
month=month(Date),
year=year(Date)
)

#We're interested in a few commodities

commod_list<-c("Conventional Motor Gasoline",
               "Crude Oil and Petroleum Products",
               "Residual Fuel Oil",
               "Propane",
               "Normal Butane",
               "Isobutane",
               "Pentanes Plus",
               "Natural Gas Liquids",
               #"Natural.Gasoline..Monthly",
               "Kerosene Type Jet Fuel",
               "Distillate Fuel Oil",
               "Distillate Fuel Oil, 0 to 15 ppm Sulfur",
               "Total Petroleum Products",
               "Crude Oil"
               )
commod_list<-paste(commod_list,"Thousand Barrels per Day",sep=", ")
#now cda_trade_data has what you need - call it to make the graphs  
  generic_trade_graph<-function(commod_sent,unit_sent="Thousand Barrels per Day"){
      #testing
      #commod_sent<-"Crude Oil and Petroleum Products"
      #unit_sent<-"Thousand Barrels per Day"
    export_field<-paste("Exports to Canada of ",commod_sent,", Thousand Barrels per Day",sep = "")
    net_import_field<-paste("Net Imports from Canada of ",commod_sent,", Thousand Barrels per Day",sep = "")
    import_field<-paste("Imports from Canada of ",commod_sent,", Thousand Barrels per Day",sep = "")
  
  #order is imports, exports, net imports
  df1<-melt(cda_trade_data,id=c("Date"),measure.vars = c(import_field,export_field,net_import_field))
  
  labels<-gsub(paste(",",unit_sent),"",levels(df1$variable))
  
  graph<-ggplot(df1,aes(Date,value,group = variable,colour=variable))+
    geom_line(size=1.7) +
    scale_color_manual("",values=colors_tableau10(),labels=labels)+
    scale_x_date(date_breaks = "1 year", date_labels ="%b\n%Y",limits=c(max(as.Date("2007-07-01"),min(df1$Date)),Sys.Date())) +
    #scale_colour_manual(labels=c("Gasoline Exports","Gasoline Imports","Net Gasoline Exports",values=c("#41ae76","#238b45","#006d2c","#00441b","Black","Black","Black","Black"))+
    #scale_y_continuous(limits=c(min(df1$value),max(df1$value)),expand=c(0,0))+
    #scale_y_continuous(limits=c(min(df1$value),max(df1$value)),expand=c(0,0))+
    guides(colour=guide_legend(nrow=2,byrow=TRUE))+
   weekly_small()+
    labs(y=paste("Monthly Trade (",unit_sent,")",sep=""),x="Year",
         title=paste("US ",commod_sent," Trade with Canada",sep=""),
         caption="Source: EIA API, graph by Andrew Leach.")
  graph
}

Crude Oil

Petroleum Products

Gasoline

Distillates

Low Sulphur Distillates

Pentanes Plus

Pentanes plus (or natural gasoline) is a natural gas liquid used primarily in Canada as a diluent for oil sands crude.

Short Term Energy Outlook

The Short Term Energy Outlook (STEO) is updated monthly and provides recent data and, as the name suggests, a short term (12 month) forecast of potential energy market impacts. There is, as you’d expect, significant oil and gas production and pricing information in the STEO as well. You can access the STEO on the web here as well as archived versions of it here. You can also access the data directly via the API here.

The key data series we’ll use from the STEO are U.S. Prices, U.S. and International Petroleum and Other Liquids, and U.S. Natural Gas. For this example, let’s look at their petroleum price outlooks first.

#STEO Production Outlooks
petroleum_series<-data_fetch(KEY,cat=829726)
sub_cats<-petroleum_series$Sub_Categories$category_id

data_store <- list()
stack<-1
for (cat in sub_cats) {
  #for testing
  #cat<-sub_cats$category_id[2]
  subs<-data_fetch(KEY,cat=cat)
  series <-subs$Series_IDs %>% filter(grepl("Monthly",name))#,units=="Thousand Barrels per Day")
  #series$name<-as.character(series$name)
  #series$To<-"Export"
  #series$From<-do.call("rbind",strsplit(series$name, " Exports of "))[,1]
  #series$Mode<-"Export"
  #Rest<-do.call("rbind",strsplit(series$name, " Exports of "))[,2]
  #series<-series[!grepl("Exports to",Rest),]
  #Rest<-Rest[!grepl("Exports to",Rest)] #take out exports to the PADD itself - odd
  #series$Commodity<-do.call("rbind",strsplit(Rest, ", Monthly"))[,1]
  #series$freq<-"Monthly"
  data_store[[stack]]<-series
  stack<-stack+1
}
series_proc<-data.frame(do.call(rbind,data_store),stringsAsFactors = FALSE)
series_proc<-data.frame(lapply(series_proc,as.character),stringsAsFactors = FALSE)

price_data<-data.frame(pdfetch_EIA(series_proc$series_id,KEY),stringsAsFactors = F)

price_data$date=ymd(rownames(price_data))
rownames(price_data)<-NULL
price_data<-melt(price_data,id="date",variable.name = "series_id",value.name = "price") #convert data to long form with series ids
price_data$series_id<-as.character(price_data$series_id)


price_data<-price_data %>% left_join(series_proc,by=c("series_id"="series_id")) %>%
  na.omit()%>% mutate(price=ifelse(units=="cents per gallon",price/100*42,price),
                      units=ifelse(units=="cents per gallon","dollars per barrel",units)) %>%
                filter(date>=ymd("1990-01-01"))
#adjust everything to dollars per barrel

#figure out crack spread

#add the spot price for two barrels of Gulf Coast conventional gasoline to the spot price for one barrel of Gulf Coast ultra-low sulfur diesel. 

#add a 3:2:1 crack to the price
crack_data<-dcast(price_data, date ~name,value.var="price",sum)%>%
  group_by(date) %>% summarize(price=`Refiner Wholesale Gasoline Price, Monthly`*2/3+
                                 `Diesel Fuel Refiner Wholesale Price, Monthly`*1/3-
                                 `Refiner Average Crude Oil Acquisition Cost, Monthly`)%>%
  mutate(name="crack_321",
         units="dollars per barrel",
         series_id="321_crack_calculated",
         f="M",
         updated=max(price_data$updated))

price_data<-rbind(price_data,crack_data)
series_names<-c("Refiner Average Crude Oil Acquisition Cost, Monthly",
          "Refiner Wholesale Gasoline Price, Monthly",
          "Diesel Fuel Refiner Wholesale Price, Monthly"#,
          #"Heating Oil Refiner Wholesale Price, Monthly",
          #"No. 6 Residual Fuel Oil Price, Monthly")
)
labels=c("Crude Costs", "Gasoline Rack Price", "Diesel Rack Price","3:2:1 Crack Spread")
min_forecast=dmy_hms(min(price_data$updated))
min_forecast=ymd(paste(year(min_forecast),month(min_forecast),1,sep = "-"))
max_forecast=max(price_data$date)
ggplot() +
  geom_line(data=filter(price_data,date>=ymd("1998-01-01"),name %in%series_names[1]),aes(date,price,group=name,color="A"),size=1.7) +
  geom_line(data=filter(price_data,date>=ymd("1998-01-01"),name %in%series_names[2]),aes(date,price,group=name,color="B"),size=1.7) +
  geom_line(data=filter(price_data,date>=ymd("1998-01-01"),name %in%series_names[3]),aes(date,price,group=name,color="C"),size=1.7) +
  geom_area(data=filter(price_data,date>=ymd("1998-01-01"),name =="crack_321"),aes(date,price,fill="Z")) +
  scale_color_manual("",labels=labels[1:3],values=colors_tableau10())+
  scale_fill_manual("" ,labels=labels[4],values="dodgerblue")+   
  #scale_colour_brewer(NULL,labels=c("Gasoline Exports","Gasoline Imports","Net Gasoline Exports"),type = "seq", palette = "Paired", direction = 1)+
  scale_x_date(date_breaks = "2 years", date_labels =  "%Y",expand=c(0,0)) +
  guides(colour=guide_legend(nrow=1,byrow=TRUE))+
  weekly_small()+
  labs(y="Crude Oil and Refined Product Prices ($/bbl)",x="Year",
       title=paste("STEO US Crude Oil and Refined Product Outlook"),
       caption="Source: EIA STEO, graph by Andrew Leach.")+
  annotate("rect", fill = "grey10", alpha = 0.25, 
           xmin = min_forecast, xmax =max_forecast+months(1), ymin = -Inf, ymax = Inf)+
  annotate("text", x = min_forecast+(max_forecast-min_forecast)/2+months(1), y = 130, label = "STEO\nForecast",size=1.75)

Annual Energy Outlook

Finally, for long-term outlooks, we can look to the Annual Energy Outlook which carries forecasts through to 2050 under a variety of assumptions and scenarios. The Annual Outlook which appears twice per year (in November as an Early Release and in March in final form) is available here from the EIA website and, as usual, data are available for the previous 5 editions through the API here. One of the more interesting things to do with Annual Outlook data from year to year is to see how the forward views have changed on prices or quantities in which we are interested. Below are are few examples of how the projections have evolved since 2014.

eia_aeo_comp<-function(start_year=2014,end_year=2022,api_series=".SUP_NA_LFL_NA_DCP_NA_USA_MILLBRLPDY.A",
                   label="US Total Crude Oil Production",
                   units="mmbbl/d",
                   history=FALSE,
                   hist_series="PET.MCRFPUS2.A",
                   hist_conversion=1,
                   hist_year=1950,
                   zero_y=TRUE
                   ){ #use oil as the default
  #testing
  #api_series<-".GEN_NA_ELEP_NA_SLR_PHTVL_NA_BLNKWH.A"
  #start_year=2015
  #end_year=2022
  #label="Oil Production"
  #units="mmbbl/d"
  #hist_series<-"PET.MCRFPUS2.A"
  #hist_conversion=1000
  series<-paste("AEO.",seq(start_year,end_year),".REF",seq(start_year,end_year),api_series,sep="")
  labels<-paste(seq(start_year,end_year)," AEO",sep="")
  elements=end_year-start_year+1
  data<-pd_fix(pdfetch_EIA(series,KEY),labels)%>%
    pivot_longer(-date,names_to = "variable")
  plot<-ggplot(data)+
  geom_line(aes(date,value,group=variable,color=variable,size=variable==paste(end_year,"AEO")),lty="31")+
  #geom_point(data=data %>% filter(variable==paste(end_year,"AEO")),aes(date,value,group=variable,color=variable),size=2.25)+
  scale_y_continuous(breaks=pretty_breaks(),expand=c(0,0))+
  scale_x_date(breaks=pretty_breaks(n=10),expand=c(0,0))+
  scale_color_viridis("",discrete = T,option="A",direction = -1,end = .9)+
  scale_size_manual("",values=c(1,1.5))+
  scale_linetype_manual("",values=c("solid"))+
  theme_minimal()+weekly_graphs()+
    theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))+
  guides(linetype=guide_legend(order = 1,keywidth = unit(1.6,"cm")),
         size="none",
          #shape = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2),
         #linetype = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2),
         #colour = guide_legend(keywidth = unit(1.6,"cm"),override.aes = list(lty = "11")  ,nrow = 2),
         colour = guide_legend(keywidth = unit(1.6,"cm"),nrow = trunc(elements/6)+1,
                               override.aes = list(size=c(rep(1,elements-1),1.5))),
         #fill = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2)
         NULL)
  if(zero_y)
    plot<-plot+expand_limits(y=0)
  
  #get historical data
  if(history)
    {
    hist_data<-pd_fix(pdfetch_EIA(hist_series,KEY),"Historical data")%>%
      pivot_longer(-date,names_to = "variable")%>%
      mutate(value=value/hist_conversion)%>%
      filter(year(date)>=hist_year)
    plot<-plot+
    geom_line(data=hist_data,aes(date,value,lty="Historical Data"),size=1)+
    labs(y=paste(label," (",units,")",sep=""),x="",
         title=paste("Historical",label,"and EIA AEO Reference Case Projections"),
         caption="Source: Data via EIA AEO, graph by Andrew Leach.")
   }
  
  if(!history){
    plot<-plot+
    labs(y=paste(label," (",units,")",sep=""),x="",
         title=paste("EIA Annual Energy Outlook",label,"Forecasts"),
         caption="Source: Data via EIA AEO, graph by Andrew Leach.")
    }
plot
  
  }

Oil Production, Trade, and Prices

US Oil Production

eia_aeo_comp(history = TRUE,hist_conversion = 1000)

US Oil Imports

eia_aeo_comp(start_year = 2014,api_series =".TRAD_NA_LFL_NA_GIM_NA_USA_MILLBRLPDY.A",label = "US Gross Crude Oil Imports",
                          units = "million barrels per day",
                          history = TRUE,
                          hist_series = "PET.MCRIMUS2.A",
                          hist_conversion = 1000,
                          hist_year=1950)

US Oil Exports

eia_aeo_comp(start_year = 2014,api_series =".TRAD_NA_LFL_NA_EXP_NA_USA_MILLBRLPDY.A",label = "US Gross Crude Oil Exports",
                          units = "million barrels per day",
                          history = TRUE,
                          hist_series = "PET.MCREXUS2.A",
                          hist_conversion = 1000,
                          hist_year=2010)

WTI Oil Prices

eia_aeo_comp(start_year = 2015,api_series =".PRCE_NA_NA_NA_CR_WTI_USA_NDLRPBRL.A",label = "WTI (Cushing) Nominal Spot Price",
                        units = "$/bbl",
                        history = TRUE,
                        hist_series = "PET.RWTC.M",
                        hist_conversion = 1,
                        hist_year=1990)

Natural Gas Production, Trade, and Prices

US Natural Gas Production

eia_aeo_comp(start_year = 2014,api_series =".SUP_DPR_NA_NA_NG_TOT_USA_TRLCF.A",label = "US Dry Natural Gas Production",
                          units = "TCF",
                          history = TRUE,
                          hist_series = "NG.N9070US2.M",
                          hist_conversion = 10^6/12,
                          hist_year=1950)

US Natural Gas Imports

eia_aeo_comp(start_year = 2020,api_series =".SUP_IMP_NA_NA_NG_NA_NA_TRLCF.A",label = "US Gross Natural Gas Imports",
                          units = "TCF",
                          history = TRUE,
                          hist_series = "NG.N9100US2.M",
                          hist_conversion = 10^6/12,
                          hist_year=1950)

US Natural Gas Exports

eia_aeo_comp(start_year = 2014,api_series =".SUP_EXPT_NA_NA_NG_NA_NA_TRLCF.A",label = "US Gross Natural Gas Exports",
                          units = "TCF",
                          history = TRUE,
                          hist_series = "NG.N9130US2.M",
                          hist_conversion = 10^6/12,
                          hist_year=2010)

Henry Hub Natural Gas Prices

eia_aeo_comp(start_year = 2014,api_series =".PRCE_HHP_NA_NA_NG_NA_USA_NDLRPMBTU.A",label = "Henry Hub Nominal Spot Price",
                          units = "$/MMBTU",
                          history = TRUE,
                          hist_series = "NG.RNGWHHD.M",
                          hist_conversion = 1,
                          hist_year=1990)

Trade Flows

#AEO Gas trade

 export_set<-c("Exports : Pipeline Exports to Canada",
               "Exports : Pipeline Exports to Mexico",
               "Exports : Liquefied Natural Gas Exports")
 import_set<-c("Imports : Pipeline Imports from Canada",
               "Imports : Pipeline Imports from Mexico",
               "Imports : Liquefied Natural Gas Imports")
 
 
 
#imports by data_series
#http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=476336

import_series<-get_children(476336)
import_series<-filter(import_series,grepl("U.S. Natural Gas Pipeline Imports From",name)|grepl("U.S. Liquefied Natural Gas Imports,",name),!grepl("Price",name),!grepl("Annual",name))
       
#exports by data series
#http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=476803
export_series<-get_children(476802)
export_series<-filter(export_series,grepl("U.S. Natural Gas Pipeline Exports to",name)|grepl("Liquefied U.S. Natural Gas Exports,",name),!grepl("Price",name),!grepl("Annual",name))

gas_trade<-rbind(import_series,export_series)
gas_trade_data<-EIA_to_DF(gas_trade)
#reset to match AEO Names
names(gas_trade_data)<-c("date",                                                  
                         "Imports : Pipeline Imports from Canada",
                         "Imports : Pipeline Imports from Mexico",
                         "Imports : Liquefied Natural Gas Imports",           
                         "Exports : Pipeline Exports to Canada",  
                         "Exports : Pipeline Exports to Mexico",  
                         "Exports : Liquefied Natural Gas Exports")

#gas_trade_data$`Imports : Pipeline Imports`<-gas_trade_data$`Imports : Pipeline Imports from Canada`+gas_trade_data$`Imports : Pipeline Imports from Mexico`
#gas_trade_data$`Exports : Pipeline Exports`<-gas_trade_data$`Exports : Pipeline Exports to Canada`+gas_trade_data$`Exports : Pipeline Exports to Mexico`

#gas_trade_data$`Pipeline Net Imports`<-gas_trade_data$`Imports : Pipeline Imports`- gas_trade_data$`Exports : Pipeline Exports`
#gas_trade_data$`Liquefied Natural Gas Net Imports`<-gas_trade_data$`Imports : Liquefied Natural Gas Imports` - gas_trade_data$`Exports : Liquefied Natural Gas Exports`

  
gas_trade_data<-gas_trade_data %>% pivot_longer(-date,names_to ="series") 

#make it annual
gas_trade_data<-gas_trade_data %>% mutate(year=year(date)) %>% group_by(year,series) %>%
  summarise(value=12*mean(value,na.rm = T)) %>% ungroup %>% #now annual values based on mean non-na month
  mutate(date=ymd(paste(year,12,31,sep = "-")),year=NULL)
#adjust to TCF per year
gas_trade_data$value<-gas_trade_data$value/10^6

history_dates<-tibble(date=unique(gas_trade_data$date))

gas_trade_data<-gas_trade_data %>% filter(series %in% import_set | series %in% export_set) %>%
  mutate(value=ifelse(series %in% import_set,-1*value,value))



#build AEO list

subs<-data_fetch(KEY,cat=3162260)

#AEO_data$test<-"SUP_IMP_LIQ_NA_NG_NA_NA_TRLCF.A"

AEO_data<-subs$Series_IDs
AEO_data<-NULL
for(j in seq(2014,2019)){
  #print(paste("working on AEO",j,sep=""))
  work_data<-subs$Series_IDs
  work_data$series_id<-gsub("2019",j,work_data$series_id)
  work_data$name<-gsub("2019",j,work_data$name)
  AEO_data<-rbind(AEO_data,work_data)
}

# #get the 2022 defns
#2022 is 4577710
#2021 is 4047706
# 2019 is 3604661
 subs<-data_fetch(KEY,cat=4577710)
 for(j in seq(2020,2022)){
   #print(paste("working on AEO",j,sep=""))
   work_data<-subs$Series_IDs
   work_data$series_id<-gsub("2022",j,work_data$series_id)
   work_data$name<-gsub("2022",j,work_data$name)
   AEO_data<-rbind(AEO_data,work_data)
 }



AEO_data$name<-gsub("Natural Gas : Volumes : ","",AEO_data$name)


test<- pdfetch_EIA(AEO_data$series_id,KEY)
series_data<-data.frame(date=index(test), coredata(test))
series_data$date<-ymd(series_data$date)
series_data <- setNames(series_data, c("date",AEO_data$name))


suppressMessages(series_data <-series_data %>% full_join(history_dates))#include the dates for which we have history. They will be NA now, but we'll stack them in later


#melt it
series_data<-series_data %>% pivot_longer(-c(date),names_to="variable") #%>% na.omit()


#series_data$`Pipeline Net Imports`<-series_data$`Imports : Pipeline Imports`- gas_trade_data$`Exports : Pipeline Exports`
#series_data$`Liquefied Natural Gas Net Imports`<-gas_trade_data$`Imports : Liquefied Natural Gas Imports` - gas_trade_data$`Exports : Liquefied Natural Gas Exports`



#get the year for each
name_split<-do.call(rbind,strsplit(as.character(series_data$variable),", "))

series_data$series<-name_split[,1]
#series_data$case<-name_split[,2]
series_data$aeo_year<-name_split[,3]




suppressMessages(joint_data<-series_data%>%filter(year(date)>=2000)%>%left_join(gas_trade_data%>%rename(history=value)))

joint_data<-joint_data %>% filter(series %in% import_set | series %in% export_set) %>%
  mutate(value=ifelse(series %in% import_set,-1*value,value),
         #history=ifelse(series %in% import_set,-1*history,history)
         NULL)

#strip out history after forecast date

joint_data<-joint_data %>% 
  mutate(year=as.numeric(gsub("AEO","",aeo_year)),
    history=ifelse(year(date)<as.numeric(gsub("AEO","",aeo_year)),history,NA),
    series=gsub("Exports : ","",series),
    series=gsub("Imports : ","",series),
    series=factor(series,levels=c("Liquefied Natural Gas Exports", "Pipeline Exports to Canada",  "Pipeline Exports to Mexico",
                                  "Liquefied Natural Gas Imports", "Pipeline Imports from Canada" ,"Pipeline Imports from Mexico" ))
    
    )


gas_trade_plot<-ggplot(joint_data)+
  geom_area(aes(date,value,fill=series),position="stack",alpha=0.6,color="black",size=0.25)+
  geom_area(aes(date,history,fill=series),position="stack",color="black",size=0.25)+
  facet_wrap(~aeo_year,nrow = 1)+
  scale_y_continuous(breaks=pretty_breaks(),expand=c(0,0))+
  scale_x_date(breaks=pretty_breaks(n=5),expand=c(0,0))+
  #scale_color_viridis("",discrete = T,option="A",direction = -1,end = .9)+
  scale_fill_viridis("",discrete = T,option="A",direction = -1,end = .9)+
  scale_size_manual("",values=c(1,1.5))+
  scale_linetype_manual("",values=c("solid"))+
  expand_limits(x=ymd("1999-10-01"))+
  theme_minimal()+weekly_graphs()+
  theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))+
  theme(axis.text.x = element_text(angle=90,hjust = 0.5,vjust = 0.5))+
  guides(linetype=guide_legend(order = 1,keywidth = unit(1.6,"cm")),
         size="none",
         #shape = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2),
         #linetype = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2),
         #colour = guide_legend(keywidth = unit(1.6,"cm"),override.aes = list(lty = "11")  ,nrow = 2),
         fill = guide_legend(keywidth = unit(1,"cm"),nrow = 2,byrow = TRUE),
         #fill = guide_legend(keywidth = unit(1.6,"cm"),nrow = 2)
         NULL)+
  labs(y=paste("Annual Net Outflows (TCF)",sep=""),x="",
       title=paste("EIA Annual Energy Outlook Natural Gas Trade Projections"),
       subtitle=paste("Historical data up to forecast date in darker shade, forecasts shown with more transparency"),
       caption="Source: Data via EIA, graph by Andrew Leach.")

gas_trade_plot

Electricity supply

Total Electricity Supply

eia_aeo_comp(start_year = 2015,api_series =".GEN_NA_ELEP_NA_TEG_NA_USA_BLNKWH.A",label = "Electricity Generation",
                          units = "billion kWh",
                          history = TRUE,
                          hist_conversion = 1000,
                          hist_series = "ELEC.GEN.ALL-US-99.A"
                          )

Solar Generation

eia_aeo_comp(start_year = 2014,api_series =".GEN_NA_ALLS_NA_SLR_NA_NA_BLNKWH.A",label = "Solar Electricity Generation",
                          units = "billion kWh",
                          history = TRUE,
                          hist_conversion = 1000,
                          hist_series = "ELEC.GEN.TSN-US-99.A"
                          )

Coal Generation

eia_aeo_comp(start_year = 2018,api_series =".GEN_NA_ELEP_POW_CL_NA_USA_BLNKWH.A",label = "Coal-fired Electricity Generation",
                          units = "billion kWh",
                         history = TRUE,
                         hist_conversion = 1000,
                         hist_series = "ELEC.GEN.COW-US-98.A")