Lou_mock_project

Last updated: November 21st, 20182018-11-21Project preview
In [108]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')
sns.set(rc={'figure.figsize': (18, 6)})

pd.set_option('precision', 5)
In [109]:
IS = pd.read_csv('data/IS_dataset_real.csv', sep=',', engine='python', header= 0, parse_dates= True)
#header = 0 reads the first record as the header
#IS is our in-sample parameter set
#IS.head()
In [110]:
OOS = pd.read_csv('data/OOS_dataset_larger.csv', sep=',', engine='python', header= 0, parse_dates= True)
#OOS is our trend data set (see TCM)
#OOS.head()
In [111]:
Vol = pd.read_csv('data/vol_forecasts.csv', sep=',', engine='python', header= 0, parse_dates=True)
#Vol is our 1-period OOS vol forecast using HAR and ARIMA estimators
#Vol.tail()
In [112]:
IS.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 11 columns):
ID           36 non-null int64
firstdate    36 non-null object
lastdate     36 non-null object
type         36 non-null object
vola         36 non-null float64
u_mean       36 non-null float64
u_median     36 non-null float64
u_mode       36 non-null float64
d_mean       36 non-null float64
d_median     36 non-null float64
d_mode       36 non-null float64
dtypes: float64(7), int64(1), object(3)
memory usage: 3.2+ KB
In [113]:
IS['lastdate'] = pd.to_datetime(IS['lastdate'])
#converts df column to a datetime object
In [114]:
IS.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 11 columns):
ID           36 non-null int64
firstdate    36 non-null object
lastdate     36 non-null datetime64[ns]
type         36 non-null object
vola         36 non-null float64
u_mean       36 non-null float64
u_median     36 non-null float64
u_mode       36 non-null float64
d_mean       36 non-null float64
d_median     36 non-null float64
d_mode       36 non-null float64
dtypes: datetime64[ns](1), float64(7), int64(1), object(2)
memory usage: 3.2+ KB
In [115]:
OOS.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65527 entries, 0 to 65526
Data columns (total 8 columns):
Begin_DateTime    65527 non-null object
End_DateTime      65527 non-null object
au                16355 non-null float64
bu                9750 non-null float64
cu                6633 non-null float64
ad                16233 non-null float64
bd                10076 non-null float64
cd                6480 non-null float64
dtypes: float64(6), object(2)
memory usage: 4.0+ MB
In [116]:
IS['firstdate'] = pd.to_datetime(IS['firstdate'])
#converts df column to a datetime object
In [117]:
OOS['Begin_DateTime'] = pd.to_datetime(OOS['Begin_DateTime'])
OOS['End_DateTime'] = pd.to_datetime(OOS['End_DateTime'])


Vol['date'] = pd.to_datetime(Vol['date'])
OOS.set_index('End_DateTime', inplace = True)
Vol.set_index('date', inplace = True)
#converts df columns to a datetime object
#converts a df column to a dataframe index
In [118]:
OOS.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 65527 entries, 2009-09-01 06:48:00 to 2015-04-06 11:38:30
Data columns (total 7 columns):
Begin_DateTime    65527 non-null datetime64[ns]
au                16355 non-null float64
bu                9750 non-null float64
cu                6633 non-null float64
ad                16233 non-null float64
bd                10076 non-null float64
cd                6480 non-null float64
dtypes: datetime64[ns](1), float64(6)
memory usage: 4.0 MB
In [ ]:
 
In [ ]:
 
In [119]:
OOS_2015 = pd.read_csv('data/OOS_dataset_2015.csv', sep=',', engine='python', header = 0, parse_dates=True)
#import additional OOS data
In [120]:
OOS_2015['Begin_DateTime'] = pd.to_datetime(OOS_2015['Begin_DateTime'])
OOS_2015['End_DateTime'] = pd.to_datetime(OOS_2015['End_DateTime'])
OOS_2015.set_index('End_DateTime', inplace = True)
#convert OOS_2015 to same format as OOS
In [121]:
OOS_2015.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 23107 entries, 2015-10-01 05:20:12 to 2016-10-05 14:07:31
Data columns (total 7 columns):
Begin_DateTime    23107 non-null datetime64[ns]
au                5704 non-null float64
bu                3473 non-null float64
cu                2281 non-null float64
ad                5799 non-null float64
bd                3585 non-null float64
cd                2265 non-null float64
dtypes: datetime64[ns](1), float64(6)
memory usage: 1.4 MB
In [122]:
OOS = pd.concat([OOS, OOS_2015], axis=0)
#merge OOS and OOS_2015 by rows (axis = 0)
In [123]:
OOS.head()
Out[123]:
Begin_DateTime au bu cu ad bd cd
End_DateTime
2009-09-01 06:48:00 2009-09-01 06:10:00 nan 0.001542 nan nan nan nan
2009-09-01 07:09:00 2009-09-01 07:03:00 nan nan nan -0.000555 nan nan
2009-09-01 07:47:00 2009-09-01 07:34:00 nan nan nan -0.001109 nan nan
2009-09-01 08:27:00 2009-09-01 08:20:00 nan nan nan -0.000431 nan nan
2009-09-01 08:34:00 2009-09-01 06:59:00 nan nan 0.002468 nan nan nan
In [124]:
OOS.tail()
Out[124]:
Begin_DateTime au bu cu ad bd cd
End_DateTime
2016-10-05 13:01:04 2016-10-05 12:52:00 nan nan nan -0.000267 nan nan
2016-10-05 13:19:00 2016-10-05 13:10:52 nan nan nan -0.000489 nan nan
2016-10-05 13:44:22 2016-10-05 13:19:00 nan 0.001468 nan nan nan nan
2016-10-05 13:44:22 2016-10-05 11:59:49 nan nan 0.002316 nan nan nan
2016-10-05 14:07:31 2016-10-05 13:44:22 nan nan nan nan nan -0.000578
In [125]:
OOS = OOS.drop_duplicates()
#remove any duplicates resulting from the merge (I think this is index duplicates only)
In [126]:
OOS.info()
#check post-merge date range and dimensions
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 88630 entries, 2009-09-01 06:48:00 to 2016-10-05 14:07:31
Data columns (total 7 columns):
Begin_DateTime    88630 non-null datetime64[ns]
au                22058 non-null float64
bu                13223 non-null float64
cu                8914 non-null float64
ad                22029 non-null float64
bd                13661 non-null float64
cd                8745 non-null float64
dtypes: datetime64[ns](1), float64(6)
memory usage: 5.4 MB
In [127]:
#OOS.head()
In [128]:
IS_b = IS[IS['type'] == 'b']
IS_a = IS[IS['type'] == 'a']
#filter the IS dataframe by type to get IS_b and IS_a.  2 types of trend
In [129]:
IS_au_mode= IS_a.sort_values(by=['u_mode'])
In [130]:
IS_au_mode.head()
Out[130]:
ID firstdate lastdate type vola u_mean u_median u_mode d_mean d_median d_mode
0 160292028 2010-01-01 2012-11-01 a 0.050000 0.000371 0.000351 0.000279 0.000371 0.000351 0.000279
3 838053737 2010-01-01 2012-11-01 a 0.055000 0.000427 0.000391 0.000318 0.000427 0.000391 0.000318
6 428150932 2010-01-01 2012-11-01 a 0.060000 0.000459 0.000414 0.000325 0.000459 0.000414 0.000325
9 352918991 2010-01-01 2012-11-01 a 0.065000 0.000457 0.000417 0.000346 0.000457 0.000417 0.000345
15 801646283 2010-01-01 2012-11-01 a 0.075000 0.000524 0.000488 0.000370 0.000524 0.000488 0.000370
In [131]:
IS_b.head()
Out[131]:
ID firstdate lastdate type vola u_mean u_median u_mode d_mean d_median d_mode
1 160292028 2010-01-01 2012-11-01 b 0.050000 0.001124 0.001060 0.000846 0.001124 0.001060 0.000847
4 838053737 2010-01-01 2012-11-01 b 0.055000 0.001242 0.001136 0.000957 0.001242 0.001137 0.000958
7 428150932 2010-01-01 2012-11-01 b 0.060000 0.001332 0.001221 0.001017 0.001333 0.001221 0.001017
10 352918991 2010-01-01 2012-11-01 b 0.065000 0.001337 0.001233 0.001012 0.001338 0.001232 0.001016
13 961426529 2010-01-01 2012-11-01 b 0.070000 0.001469 0.001354 0.001149 0.001469 0.001355 0.001148
In [132]:
#IS_a['IS_median']
vola05a_umean = IS_a[IS_a['vola'] == 0.05]['u_mean'].values[0]
vola06a_umean = IS_a[IS_a['vola'] == 0.06]['u_mean'].values[0]
vola07a_umean = IS_a[IS_a['vola'] == 0.07]['u_mean'].values[0]
vola08a_umean = .00065
#filter the IS_a dataframe by vola level and extract the u_mean for each level
#store value in as numpy.float64 dtypes
#KEY: we use .values[0] because we want the first value in a numpy array
In [133]:
vola05b_umean = IS_b[IS_b['vola'] == 0.05]['u_mean'].values[0]
vola06b_umean = IS_b[IS_b['vola'] == 0.06]['u_mean'].values[0]
vola07b_umean = IS_b[IS_b['vola'] == 0.07]['u_mean'].values[0]
vola08b_umean = .00170
In [134]:
vola05b_umedian = IS_b[IS_b['vola'] == 0.05]['u_median'].values[0]
vola06b_umedian = IS_b[IS_b['vola'] == 0.06]['u_median'].values[0]
vola07b_umedian = IS_b[IS_b['vola'] == 0.07]['u_median'].values[0]
vola08b_umedian = .00160
In [135]:
#IS_a['eps+'] = 0
#IS_a[IS_a['vola'] == 0.05]['eps+'] == 0.05
#vola05b_umode
In [136]:
#IS_a.at[IS_a['ID'] == 160292028]['eps+'] = 0.01
#IS_a.loc[IS_a['ID'] == 160292028].at['eps+'] = 0.01

#IS_a[IS_a['vola'] == 0.05]['eps+']
#IS_a
In [137]:
vola05a_umean_upper = vola05a_umean + vola06a_umean - vola05a_umean
vola06a_umean_upper = vola06a_umean + vola07a_umean - vola06a_umean
vola07a_umean_upper = vola07a_umean + vola08a_umean - vola07a_umean
#create an upper bound (a range) for each mean-by-vol by adding the diff(next mean - current mean)
In [138]:
#vola05a_umean
In [139]:
#vola05a_umean_upper
In [140]:
#Matching OOS ARIMA Vol to IS type_A means by HAR vol
#Note ARIMA may be medians, so we might be contrasting medians vs means
Vol526a = Vol[(Vol['ARIMA'] > vola05a_umean) & (Vol['ARIMA'] < vola05a_umean_upper)]
Vol627a = Vol[(Vol['ARIMA'] > vola06a_umean) & (Vol['ARIMA'] < vola06a_umean_upper)]
Vol728a = Vol[(Vol['ARIMA'] > vola07a_umean) & (Vol['ARIMA'] < vola07a_umean_upper)]
#filter the Vol 1-period forecast dataframe by the column Vol['ARIMA'] and only return the records filtered by the ***IS_a*** data set
#filter the ARIMA 1-period forecast by the type_a range set from our IS_a mean vol range
#name each OOS vol dataframe by its vol range, e.g., Vol526a is the OOS ARIMA vol filtered by the IS_a mean vol range from 5% to 6%

#NOTE: breaks should be set at means 3.7, 4.6, and 4.99
In [141]:
#Vol526a
In [193]:
Vol728a.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 240 entries, 2013-07-22 to 2016-09-23
Data columns (total 2 columns):
HAR      240 non-null float64
ARIMA    240 non-null float64
dtypes: float64(2)
memory usage: 5.6 KB
In [194]:
Vol728a['HAR'].unique()
Out[194]:
array([0.112, 0.109, 0.102, 0.086, 0.087, 0.08 , 0.081, 0.078, 0.088,
       0.091, 0.094, 0.095, 0.098, 0.089, 0.09 , 0.171, 0.145, 0.141,
       0.128, 0.114, 0.113, 0.107, 0.11 , 0.1  , 0.104, 0.105, 0.106,
       0.092, 0.093, 0.153, 0.117, 0.122, 0.116, 0.101, 0.099, 0.097,
       0.096, 0.085, 0.084, 0.079, 0.083, 0.082, 0.077, 0.074, 0.072,
       0.07 , 0.069, 0.076, 0.065, 0.066, 0.067, 0.075, 0.103, 0.139,
       0.16 , 0.136, 0.144, 0.108])
In [143]:
#Vol728a.iloc[0]
Vol728a.iloc[-1].name  #OR use Vol728a.index[-1]
#We use .iloc[-1].name to extract the last index value for the Vol728a dataframe
#We'll use the index value for a date range input next
Out[143]:
Timestamp('2016-09-23 00:00:00')
In [144]:
#Vol728a
OOSau_728_all = OOS[Vol728a.iloc[0].name:Vol728a.iloc[-1].name]['au'].mean() - vola07a_umean
print("{:.5f}".format(OOSau_728_all))

#filter the OOS _returns_ forecast column ['au'] (type_a, up) from the first 7%-to-8% record to the the last record 
#calculate the mean for OOS['au'] returns over the 728 vol date range
#calculate the difference in means (OOS mean-by-vol - IS mean-by-vol)
#store value in a numpy float
#print the mean diff to screen
-0.00010
In [145]:
#OOSau_728_all.dtypes()
In [146]:
OOSau_627_all = OOS[Vol627a.iloc[0].name:Vol627a.iloc[-1].name]['au'].mean() - vola06a_umean
print("{:.5f}".format(OOSau_627_all))
-0.00006
In [147]:
OOSau_526_all = OOS[Vol526a.iloc[0].name:Vol526a.iloc[-1].name]['au'].mean() - vola05a_umean
print("{:.5f}".format(OOSau_526_all))
0.00002
In [148]:
OOSbu_728_all= OOS[Vol728a.iloc[0].name:Vol728a.iloc[-1].name]['bu'].median() - vola07b_umedian
print("{:.6f}".format(OOSbu_728_all))
-0.000295
In [149]:
OOSbu_627_all= OOS[Vol627a.iloc[0].name:Vol627a.iloc[-1].name]['bu'].median() - vola06b_umedian
print("{:.6f}".format(OOSbu_627_all))
-0.000162
In [150]:
OOSbu_526_all= OOS[Vol526a.iloc[0].name:Vol526a.iloc[-1].name]['bu'].median() - vola05b_umedian
print("{:.6f}".format(OOSbu_526_all))
-0.000004
In [151]:
#Vol526a.head()
In [152]:
#Vol526a.tail()
In [153]:
#OOS['2016-01-05':'2016-02-11']['bu'].median() - vola07b_umedian
#ASK: why isn't mode working?  do we need another package for mode?
In [154]:
Vol526a_2012 = Vol526a['2012']
#Vol526a_2012.info()
In [155]:
Vol526a_2013 = Vol526a['2013']
#Vol526a_2013.info()
In [156]:
Vol526a_2014 = Vol526a['2014']
#Vol526a_2014.info()
In [157]:
Vol526a_2015 = Vol526a['2015']
#Vol526a_2015.info()
In [158]:
#Vol526a_2016 = Vol526a['2016-01-01':'2016-12-31']
#Vol526a_2016.info()
In [159]:
Vol526a.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 395 entries, 2012-07-23 to 2015-12-11
Data columns (total 2 columns):
HAR      395 non-null float64
ARIMA    395 non-null float64
dtypes: float64(2)
memory usage: 9.3 KB

''' Observation: vola07 OOS-IS is negative by >= 2 ticks so we don't have to adjust offset vola05/06 OOS-IS is positive and this requires an offset. I.e. this is how we adjust the IS forecast to match the expected (and realized) OOS estimators

'''

In [160]:
#Vol526a_2012 = Vol526a['2012-01-01':'2012-12-31']
#Filter OOS vol by vol range 526 and then by year

#Vol526a_2012.info()
In [161]:
Vol627a.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 101 entries, 2013-07-08 to 2016-09-30
Data columns (total 2 columns):
HAR      101 non-null float64
ARIMA    101 non-null float64
dtypes: float64(2)
memory usage: 2.4 KB
In [162]:
Vol627a_2013 = Vol627a['2013']
#Vol627a_2013.info()
In [163]:
Vol627a_2015 = Vol627a['2015']
#Vol627a_2015.info()
In [164]:
Vol627a_2016 = Vol627a['2016']
#Vol627a_2016.head()
#Vol627a_2016.info()
In [165]:
Vol728a.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 240 entries, 2013-07-22 to 2016-09-23
Data columns (total 2 columns):
HAR      240 non-null float64
ARIMA    240 non-null float64
dtypes: float64(2)
memory usage: 5.6 KB
In [166]:
Vol728a['ARIMA'].count()
Out[166]:
240
In [167]:
Vol728a_2015 = Vol728a['2015']
#Vol728a_2015.info()
In [168]:
Vol728a_2016 = Vol728a['2016']
Vol728a_2016.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 121 entries, 2016-01-04 to 2016-09-23
Data columns (total 2 columns):
HAR      121 non-null float64
ARIMA    121 non-null float64
dtypes: float64(2)
memory usage: 2.8 KB
In [169]:
#NOTE: the slice could be simplifed to just the date vector in the Vol526a_2012 dataframe


OOSbu_526_2012= OOS[Vol526a_2012.iloc[0].name:Vol526a_2012.iloc[-1].name]['bu'].median() - vola05b_umedian
print("{:.6f}".format(OOSbu_526_2012))

#Filter OOS _median_ ['bu'] trend returns by 526a vol filter for 2012
#Calculate diff[OOS median 'bu' - IS median 'bu'] by vol range '526'
0.000026
In [ ]:
 
In [170]:
OOSbu_526_2013= OOS[Vol526a_2013.iloc[0].name:Vol526a_2013.iloc[-1].name]['bu'].median() - vola05b_umedian
print("{:.6f}".format(OOSbu_526_2013))
0.000071
In [171]:
OOSbu_526_2014= OOS[Vol526a_2014.iloc[0].name:Vol526a_2014.iloc[-1].name]['bu'].median() - vola05b_umedian
print("{:.6f}".format(OOSbu_526_2014))
-0.000097
In [172]:
OOSbu_526_2015= OOS[Vol526a_2015.iloc[0].name:Vol526a_2015.iloc[-1].name]['bu'].median() - vola05b_umedian
print("{:.6f}".format(OOSbu_526_2015))
0.000068
In [173]:
OOSbu_627_2013= OOS[Vol627a_2013.iloc[0].name:Vol627a_2013.iloc[-1].name]['bu'].median() - vola06b_umedian
print("{:.6f}".format(OOSbu_627_2013))
0.000197
In [174]:
OOSbu_627_2015= OOS[Vol627a_2015.iloc[0].name:Vol627a_2015.iloc[-1].name]['bu'].median() - vola06b_umedian
print("{:.6f}".format(OOSbu_627_2015))
-0.000096
In [175]:
OOSbu_627_2016 = OOS[Vol627a_2016.iloc[0].name:Vol627a_2016.iloc[-1].name]['bu'].median() - vola06b_umedian
print("{:.6f}".format(OOSbu_627_2016))
-0.000192
In [176]:
OOSbu_728_2015 = OOS[Vol728a_2015.iloc[0].name:Vol728a_2015.iloc[-1].name]['bu'].median() - vola07b_umedian
print("{:.6f}".format(OOSbu_728_2015))
-0.000250
In [177]:
OOSbu_728_2016 = OOS[Vol728a_2016.iloc[0].name:Vol728a_2016.iloc[-1].name]['bu'].median() - vola07b_umedian
print("{:.6f}".format(OOSbu_728_2016))
-0.000296
In [178]:
diffs = [OOSbu_526_2012, OOSbu_526_2013, OOSbu_526_2014, OOSbu_526_2015, np.nan, OOSbu_526_all]
#combine the OOS bu returns_by_vol_by_year into a list for each year
#use np.nan for null results (years without results)

pd.options.display.float_format = '{:,.6f}'.format
#modifier? for pd to set format for floats

df_diff = pd.DataFrame(np.array(diffs).reshape(6,1))
#take the np.array, reshape it to a 6x1 vector, put in dataframe

df_diff = df_diff.fillna('')
#replace the NaN's with blanks


df_diff.rename(index={0:'2012', 1:'2013', 2:'2014', 3:'2015', 4:'2016', 5:'all'}, columns={0:'5%'}, inplace=True)
#rename the index and column of the dataframe

df_diff5 = df_diff
df_diff5

#NOTE: 'all' result looks suspect
#NOTE: add column for count
Out[178]:
5%
2012 0.000026
2013 0.000071
2014 -0.000097
2015 0.000068
2016
all -0.000004
In [179]:
diffs = [np.nan, OOSbu_627_2013, np.nan, OOSbu_627_2015, OOSbu_627_2016, OOSbu_627_all]
pd.options.display.float_format = '{:,.6f}'.format

df_diff = pd.DataFrame(np.array(diffs).reshape(6,1))
df_diff = df_diff.fillna('')

df_diff.rename(index={0:'2012', 1:'2013', 2:'2014', 3:'2015', 4:'2016', 5:'all'}, columns={0:'6%'}, inplace=True)
df_diff6 = df_diff
df_diff6
Out[179]:
6%
2012
2013 0.000197
2014
2015 -0.000096
2016 -0.000192
all -0.000162
In [180]:
diffs = [np.nan,np.nan ,np.nan, OOSbu_728_2015, OOSbu_728_2016, OOSbu_728_all]
pd.options.display.float_format = '{:,.6f}'.format

df_diff = pd.DataFrame(np.array(diffs).reshape(6,1))
df_diff = df_diff.fillna('')
df_diff.rename(index={0:'2012', 1:'2013', 2:'2014', 3:'2015', 4:'2016', 5:'all'}, columns={0:'7%'}, inplace=True)

#df_diff.values #should have checked this first!
#df_diff.shape
#df_diff.dtypes
#df_diff.describe
df_diff7 = df_diff
df_diff7
Out[180]:
7%
2012
2013
2014
2015 -0.000250
2016 -0.000296
all -0.000295
In [181]:
OOS_IS = pd.concat([df_diff5, df_diff6], axis=1, sort=False)
OOS_IS = pd.concat([OOS_IS, df_diff7], axis=1, sort=False)
#merge the 3 dataframes by row (axis =1)

OOS_IS
#NOTE: add counts for each column, add to bottom
Out[181]:
5% 6% 7%
2012 0.000026
2013 0.000071 0.000197
2014 -0.000097
2015 0.000068 -0.000096 -0.000250
2016 -0.000192 -0.000296
all -0.000004 -0.000162 -0.000295
In [182]:
OOSbu_526_all_ct= OOS[Vol526a.iloc[0].name:Vol526a.iloc[-1].name]['bu'].count()
print("{:.6f}".format(OOSbu_526_all_ct))
#These are counts of all of the 'bu' records b/t 526 vo
5392.000000
In [183]:
OOSbu_627_all_ct= OOS[Vol627a.iloc[0].name:Vol627a.iloc[-1].name]['bu'].count()
print("{:.6f}".format(OOSbu_627_all_ct))
6257.000000
In [184]:
OOSbu_728_all_ct= OOS[Vol728a.iloc[0].name:Vol728a.iloc[-1].name]['bu'].count()
print("{:.6f}".format(OOSbu_728_all_ct))
6111.000000
In [185]:
counts = [OOSbu_526_all_ct, OOSbu_627_all_ct, OOSbu_728_all_ct]
pd.options.display.float_format = '{:,.6f}'.format

df_count = pd.DataFrame(np.array(counts).reshape(1,3))
df_count = df_count.fillna('')
df_count.rename(index={0:'Counts'}, columns={0:'5%', 1:'6%', 2:'7%'}, inplace=True)

#df_diff.values #should have checked this first!
#df_diff.shape
#df_diff.dtypes
#df_diff.describe
df_count
Out[185]:
5% 6% 7%
Counts 5392 6257 6111
In [186]:
OOS_IS = pd.concat([OOS_IS, df_count], axis=0, sort=False)
OOS_IS


#NOTE: FIX VOL BREAKS
Out[186]:
5% 6% 7%
2012 0.000026
2013 0.000071 0.000197
2014 -0.000097
2015 0.000068 -0.000096 -0.000250
2016 -0.000192 -0.000296
all -0.000004 -0.000162 -0.000295
Counts 5392 6257 6111
In [187]:
#writer = pd.ExcelWriter('output.xlsx')

#OOS_IS.to_excel(writer,'Sheet1')
#writer.save()
#write the merged dataframe to an xlsx file named 'output.xlsx' and into 'Sheet1'

OOS_IS.to_html('index.html')
#to output to an html file
In [188]:
#NOTE: what are diffs by actual trades by trend type by vol, W/L ratio?
In [189]:
IS.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 11 columns):
ID           36 non-null int64
firstdate    36 non-null datetime64[ns]
lastdate     36 non-null datetime64[ns]
type         36 non-null object
vola         36 non-null float64
u_mean       36 non-null float64
u_median     36 non-null float64
u_mode       36 non-null float64
d_mean       36 non-null float64
d_median     36 non-null float64
d_mode       36 non-null float64
dtypes: datetime64[ns](2), float64(7), int64(1), object(1)
memory usage: 3.2+ KB
In [190]:
OOS.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 88630 entries, 2009-09-01 06:48:00 to 2016-10-05 14:07:31
Data columns (total 7 columns):
Begin_DateTime    88630 non-null datetime64[ns]
au                22058 non-null float64
bu                13223 non-null float64
cu                8914 non-null float64
ad                22029 non-null float64
bd                13661 non-null float64
cd                8745 non-null float64
dtypes: datetime64[ns](1), float64(6)
memory usage: 7.9 MB
In [191]:
Vol.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1042 entries, 2012-07-23 to 2016-09-30
Data columns (total 2 columns):
HAR      1042 non-null float64
ARIMA    1042 non-null float64
dtypes: float64(2)
memory usage: 24.4 KB
In [192]:
Vol['2013'].head()
Out[192]:
HAR ARIMA
date
2013-01-02 0.037000 0.000300
2013-01-03 0.035000 0.000300
2013-01-04 0.036000 0.000300
2013-01-07 0.038000 0.000328
2013-01-08 0.038000 0.000328
Notebooks AI
Notebooks AI Profile20060