K-Means in Python based on a CSV Source

 

 

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.datasets.samples_generator import make_blobs
import seaborn as sns; sns.set()

df = pd.read_csv("data_1024_kMeans.csv", names=['Distance_Feature', 'Speeding_Feature'],sep='\t', thousands=',',skiprows=1)

#print(data.head())

f1 = df['Distance_Feature'].values
f2 = df['Speeding_Feature'].values

#==================================
#Enabling the Scatter (only visual)
#==================================
#plt.plot()
#plt.title('Dataset')
#plt.scatter(f1, f2)
#plt.show()



# create new plot and data
plt.plot()

X = np.array(list(zip(f1, f2))).reshape(len(f1), 2)
colors = ['b', 'g', 'r', 'y']
markers = ['o', 'v', 's', 's']

# KMeans algorithm
K = 4 #(number of the Clusters)
kmeans_model = KMeans(n_clusters=K).fit(X)

plt.plot()
for i, l in enumerate(kmeans_model.labels_):
plt.plot(f1[i], f2[i], color=colors[l], marker=markers[l],ls='None')

plt.show()

 

Datasource: data_1024_kMeans.xls (300544)