import pandas as pd
data=pd.read_csv('iris.csv')
data.head(5)

from collections import Counter
Counter(data.variety)
Counter({'Setosa': 50, 'Versicolor': 50, 'Virginica': 50})
data.shape
(150, 5)
X=data.iloc[:,:-1]
from sklearn.cluster import KMeans
model=KMeans(n_clusters=3,random_state=10)
model.fit(X)
model.labels_
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2,
       2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1])
Counter(model.labels_)
Counter({0: 50, 1: 62, 2: 38})
import numpy as np
color=np.array(['red','blue','green'])
color[1]
'blue'
import matplotlib.pyplot as plt
%matplotlib inline
plt.scatter(X.petallength,X.petalwidth)
<matplotlib.collections.PathCollection at 0x24c72f26208>

from sklearn.preprocessing import LabelEncoder
enc=LabelEncoder()
data.variety=enc.fit_transform(data.variety)
fig=plt.figure()
fig,ax=plt.subplots(1,2)
ax[0].set_title('Actual Iris Data')
ax[1].set_title('KMean Clustered')
ax[0].scatter(X.petallength,X.petalwidth,color=color[data.variety])
ax[1].scatter(X.petallength,X.petalwidth,color=color[model.labels_])
<matplotlib.collections.PathCollection at 0x24c731569e8>




<Figure size 432x288 with 0 Axes>

PCA-Уменьшение размерности

from sklearn.decomposition import PCA
pca=PCA()
XP=pca.fit_transform(X)
pca.explained_variance_ratio_
array([0.92461872, 0.05306648, 0.01710261, 0.00521218])
X_pca=XP[:,[0,1]]
from sklearn.cluster import KMeans
model=KMeans(n_clusters=3,random_state=10)
model.fit(X_pca)
model.labels_
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1,
       1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1,
       1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2])
Counter(model.labels_)
Counter({0: 50, 1: 39, 2: 61})