# Meta - Lesson 3

Last updated: October 16th, 2020
In [1]:
import numpy as np
import pandas as pd
import altair as alt
from sklearn.cluster import KMeans
In [2]:
alt.renderers.enable('mimetype')
Out[2]:
RendererRegistry.enable('mimetype')
In [3]:
In [4]:
Out[4]:
sepallength sepalwidth petallength petalwidth class
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
In [5]:
x = df.iloc[:, [0,1,2,3]].values

# Cluster Model¶

In [6]:
CLUSTERS = 4
COLOR_BY = 'cluster'
In [7]:
clusters = CLUSTERS
max_clusters = 11
sample_size = 5

## color by cluster or class
color_by = COLOR_BY
In [8]:
kmeans = KMeans(clusters)
y_kmeans = kmeans.fit_predict(x)
In [9]:
clustered_data = df
clustered_data['cluster'] = y_kmeans
clustered_data.sample(sample_size)
Out[9]:
sepallength sepalwidth petallength petalwidth class cluster
124 6.7 3.3 5.7 2.1 Iris-virginica 2
50 7.0 3.2 4.7 1.4 Iris-versicolor 0
79 5.7 2.6 3.5 1.0 Iris-versicolor 3
42 4.4 3.2 1.3 0.2 Iris-setosa 1
68 6.2 2.2 4.5 1.5 Iris-versicolor 0
In [10]:
Error =[]

for i in range(1, max_clusters):
kmeans = KMeans(n_clusters = i).fit(x)
kmeans.fit(x)
Error.append(kmeans.inertia_)

elbow_data = pd.DataFrame({"Error": Error, "Clusters": range(1,max_clusters)})

elbow_plot = alt.Chart(elbow_data).mark_line().encode(
x = "Clusters",
y = "Error"
)

elbow_plot
Out[10]:
In [11]:
alt.Chart(clustered_data).mark_point().encode(
alt.X(alt.repeat("column"), type='quantitative'),
alt.Y(alt.repeat("row"), type='quantitative'),
color='cluster:N'
).properties(
width=200,
height=200
).repeat(
row=['petallength', 'petalwidth'],
column=['sepallength', 'sepalwidth']
).interactive()
Out[11]:
In [12]:
alt.Chart(df).mark_point().encode(
x='petallength:Q',
y='petalwidth:Q',
color= color_by + ':N',
column='class:N'
).properties(
width=180,
height=180
)
Out[12]: