Hands on!¶
In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
Loading our data:¶
In [17]:
df = pd.read_excel('data/TRO.xlsx')
The data at a glance:¶
In [19]:
df.head()
Out[19]:
In [20]:
df.shape
Out[20]:
In [22]:
df.info
Out[22]:
In [23]:
df.describe()
Out[23]:
In [26]:
df['AWS'].describe()
Out[26]:
In [27]:
df['AWS'].mean()
Out[27]:
In [28]:
df['AWS'].median()
Out[28]:
In [29]:
df['AWS'].plot(kind='box', vert=False, figsize=(14,6))
Out[29]:
In [30]:
df['AWS'].plot(kind='density', figsize=(14,6)) # kde
Out[30]:
In [31]:
ax = df['AWS'].plot(kind='density', figsize=(14,6)) # kde
ax.axvline(df['AWS'].mean(), color='red')
ax.axvline(df['AWS'].median(), color='green')
Out[31]:
In [39]:
ax = df['AWS'].plot(kind='hist', figsize=(14,6))
ax.set_ylabel('Number of Articles')
ax.set_xlabel('Average Weekly Sales')
Out[39]:
In [41]:
df.head()
Out[41]:
In [42]:
df['HFB'].value_counts()
Out[42]:
In [43]:
df['HFB'].value_counts().plot(kind='pie', figsize=(6,6))
Out[43]:
In [44]:
ax = df['HFB'].value_counts().plot(kind='bar', figsize=(14,6))
ax.set_ylabel('Number of articles')
Out[44]:
In [45]:
corr = df.corr()
corr
Out[45]:
In [46]:
fig = plt.figure(figsize=(8,8))
plt.matshow(corr, cmap='RdBu', fignum=fig.number)
plt.xticks(range(len(corr.columns)), corr.columns, rotation='vertical');
plt.yticks(range(len(corr.columns)), corr.columns);
In [52]:
df.plot(kind='scatter', x='AWS', y='Unit Volume', figsize=(6,6))
Out[52]:
In [50]:
df.plot(kind='scatter', x='AWS', y='Sales Space Capacity', figsize=(6,6))
Out[50]:
In [56]:
ax = df[['Sales Space Capacity', 'HFB']].boxplot(by='HFB', figsize=(10,6))
ax.set_ylabel('Sales Space Capacity')
Out[56]:
In [59]:
boxplot_cols = ['HFB', 'Service Level', 'Lead Time', 'PQ', 'Actual Inventory', 'Sales Space Capacity']
df[boxplot_cols].plot(kind='box', subplots=True, layout=(2,3), figsize=(14,8))
Out[59]: