Misc

Last updated: July 2nd, 20202020-07-02Project preview
In [5]:
show(layout)
In [4]:
from bokeh.layouts import column
from ipywidgets import interact
from bokeh.io import push_notebook, output_notebook, output_file, show
from bokeh.plotting import figure
from math import pi
from bokeh.models import CustomJS, ColumnDataSource, Slider
from sklearn.covariance import EmpiricalCovariance, MinCovDet
from pandas import DataFrame
import numpy as np

# output_file('mcd.html')
output_notebook()

n_inliers = 250
n_all_outliers = 20

np.random.seed(20)

# generate data with 20 outliers
inliers_cov = [[2, 0], [0, 1]]
outliers_cov = [[1, 4], [4, 1]]
inliers = np.dot(np.random.randn(n_inliers, 2), inliers_cov)
outliers = np.dot(np.random.randn(n_all_outliers, 2), outliers_cov)

df_in = DataFrame(inliers, columns=['x', 'y'])
df_out = DataFrame(outliers, columns=['x', 'y'])
df_out['pt_alpha'] = 0
df_out['em_alpha'] = 0
df_out['mcd_alpha'] = 0
df_out.loc[0, 'em_alpha'] = 1
df_out.loc[0, 'mcd_alpha'] = 1

em_ellipse_param = []
mcd_ellipse_param = []
for n_outliers in range(n_all_outliers):
    cov = EmpiricalCovariance(assume_centered=True).fit(np.append(inliers, outliers[:n_outliers, :], axis=0)).covariance_
    cov_inv = np.linalg.inv(cov)
    z11, z22, z12 = cov_inv[0, 0], cov_inv[1, 1], cov_inv[0, 1]
    theta = np.arctan(2*z12/(z11-z22))/2
    rot = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    wh = rot.transpose() @ cov_inv @ rot
    width, height = 6/np.sqrt([wh[0, 0], wh[1, 1]])
    em_ellipse_param.append((height, width, theta))

    cov = MinCovDet(assume_centered=True).fit(np.append(inliers, outliers[:n_outliers, :], axis=0)).covariance_
    cov_inv = np.linalg.inv(cov)
    z11, z22, z12 = cov_inv[0, 0], cov_inv[1, 1], cov_inv[0, 1]
    theta = np.arctan(2*z12/(z11-z22))/2
    rot = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    wh = rot.transpose() @ cov_inv @ rot
    width, height = 6/np.sqrt([wh[0, 0], wh[1, 1]])
    mcd_ellipse_param.append((height, width, theta))

df_out['em_h'], df_out['em_w'], df_out['em_th'] = np.array(em_ellipse_param).T
df_out['mcd_h'], df_out['mcd_w'], df_out['mcd_th'] = np.array(mcd_ellipse_param).T

src_in = ColumnDataSource(df_in)
src_out = ColumnDataSource(df_out)

TOOLTIPS = [("(x, y)", "(@x, @y)")]
p = figure(tools='box_zoom,reset,save', 
#            tooltips=TOOLTIPS, 
           plot_height=400, 
           plot_width=400, 
           x_range=(-10, 10), y_range=(-10, 10)
          )
p.circle(x='x', y='y', fill_color='black', line_color=None, size=6, source=src_in)
p.circle(x='x', y='y', fill_color='red', line_color=None, size=6, source=src_out, fill_alpha='pt_alpha')
p.ellipse(x=0, y=0, width='mcd_w', height='mcd_h', angle='mcd_th', line_alpha='mcd_alpha', line_color='orange', fill_alpha=0, line_width=2.5, source=src_out)
p.ellipse(x=0, y=0, width='em_w', height='em_h', angle='em_th', line_alpha='em_alpha', line_color='green', fill_alpha=0, line_width=2.5, source=src_out)
p.line(x=[-20], y=[0], line_width=2.5, color='orange', legend_label='MCD')
p.line(x=[-20], y=[0], line_width=2.5, color='green', legend_label='MLE')
p.legend.location = "top_right"

callback_code = """
    var data = src_out.data
    var n_outliers = cb_obj.value
    var pt_alpha = data['pt_alpha']
    var em_alpha = data['em_alpha']
    var mcd_alpha = data['mcd_alpha']
    
    for (var i = 0; i < pt_alpha.length; i++) {
        em_alpha[i] = 0
        mcd_alpha[i] = 0
        if (i < n_outliers)
            pt_alpha[i] = 1
        else
            pt_alpha[i] = 0
    }
    em_alpha[n_outliers] = 1
    mcd_alpha[n_outliers] = 1
    
    src_out.change.emit()
"""
callback = CustomJS(args=dict(src_out=src_out), code=callback_code)
slider = Slider(start=0, end=n_outliers, value=0, step=1, title='n_outliers')
slider.js_on_change('value', callback)

layout = column(slider, p)
Loading BokehJS ...
Notebooks AI
Notebooks AI Profile20060