Enrique_ds

Last updated: July 21st, 20192019-07-21Project preview
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')
In [2]:
consumption_df = pd.read_csv('consumption_monthly.csv', header=0, skiprows=4, na_values='.')
In [3]:
consumption_df['CONSUMPTION'] = consumption_df['CONSUMPTION'].str.replace(',', '')
consumption_df['CONSUMPTION'] = consumption_df['CONSUMPTION'].apply(int)
In [4]:
consumption_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3120 entries, 0 to 3119
Data columns (total 6 columns):
YEAR                3120 non-null int64
MONTH               3120 non-null int64
STATE               3120 non-null object
TYPE OF PRODUCER    3120 non-null object
ENERGY SOURCE       3120 non-null object
CONSUMPTION         3120 non-null int64
dtypes: int64(3), object(3)
memory usage: 146.3+ KB
In [5]:
consumption_df.head()
Out[5]:
YEAR MONTH STATE TYPE OF PRODUCER ENERGY SOURCE CONSUMPTION
0 2019 1 AK Total Electric Power Industry Coal (Short Tons) 59973
1 2019 1 AK Total Electric Power Industry Natural Gas (Mcf) 3414933
2 2019 1 AK Total Electric Power Industry Petroleum (Barrels) 157580
3 2019 1 AK Combined Heat and Power, Industrial Power Natural Gas (Mcf) 26836
4 2019 1 AK Combined Heat and Power, Industrial Power Petroleum (Barrels) 5093
In [8]:
states_df = consumption_df.groupby("STATE")["CONSUMPTION"].sum().sort_values(ascending=False).head(21).to_frame()
states_df = states_df.drop('US-Total')
states_df
Out[8]:
CONSUMPTION
STATE
TX 946505908
FL 743289836
PA 388907950
CA 364914477
LA 306878816
VA 250479040
OH 240754220
AL 236636112
GA 232638274
NY 212024376
NC 200045190
MS 199050032
AZ 189270550
OK 178081988
NJ 168271768
MI 160875665
IN 153771462
NV 106820425
WI 105814706
IL 105686016
In [9]:
ax = states_df.plot(kind='bar', title ="Per State Consumption",figsize=(15,10),legend=True, fontsize=12)
ax.set_xlabel("State",fontsize=12)
ax.set_ylabel("Consumption",fontsize=12)
Out[9]:
Text(0,0.5,'Consumption')
In [10]:
producer_df = consumption_df.groupby("TYPE OF PRODUCER")["CONSUMPTION"].sum().sort_values(ascending=False)
producer_df
Out[10]:
TYPE OF PRODUCER
Total Electric Power Industry                       6768022461
Electric Generators, Electric Utilities             3500315607
Electric Generators, Independent Power Producers    2380385395
Combined Heat and Power, Electric Power              483778096
Combined Heat and Power, Industrial Power            369631109
Combined Heat and Power, Commercial Power             33912252
Name: CONSUMPTION, dtype: int64
In [11]:
ax = producer_df.plot(kind='bar', title ="Consumption by Producer Type",figsize=(15,10),legend=True, fontsize=12)
ax.set_xlabel("Type of Producer",fontsize=12)
ax.set_ylabel("Consumption",fontsize=12)
Out[11]:
Text(0,0.5,'Consumption')
In [12]:
energy_df = consumption_df.groupby("ENERGY SOURCE")["CONSUMPTION"].sum().sort_values(ascending=False)
energy_df
Out[12]:
ENERGY SOURCE
Natural Gas (Mcf)            12777921288
Coal (Short Tons)              712450668
Petroleum (Barrels)             45374859
Geothermal (Billion Btu)          205972
Other Gases (Billion Btu)          92133
Name: CONSUMPTION, dtype: int64
In [13]:
ax = energy_df.plot(kind='bar', title ="Consumption by Energy Source",figsize=(15,10),legend=True, fontsize=12)
ax.set_xlabel("Energy Source",fontsize=12)
ax.set_ylabel("Consumption",fontsize=12)
Out[13]:
Text(0,0.5,'Consumption')
Notebooks AI
Notebooks AI Profile20060