import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.datasets.samples_generator import make_blobs
import seaborn as sns; sns.set()
df = pd.read_csv("data_1024_kMeans.csv", names=['Distance_Feature', 'Speeding_Feature'],sep='\t', thousands=',',skiprows=1)
#print(data.head())
f1 = df['Distance_Feature'].values
f2 = df['Speeding_Feature'].values
#==================================
#Enabling the Scatter (only visual)
#==================================
#plt.plot()
#plt.title('Dataset')
#plt.scatter(f1, f2)
#plt.show()
# create new plot and data
plt.plot()
X = np.array(list(zip(f1, f2))).reshape(len(f1), 2)
colors = ['b', 'g', 'r', 'y']
markers = ['o', 'v', 's', 's']
# KMeans algorithm
K = 4 #(number of the Clusters)
kmeans_model = KMeans(n_clusters=K).fit(X)
plt.plot()
for i, l in enumerate(kmeans_model.labels_):
plt.plot(f1[i], f2[i], color=colors[l], marker=markers[l],ls='None')
plt.show()
Datasource: data_1024_kMeans.xls (300544)