1
+ """
2
+ K-means implementation
3
+
4
+ Seems okay: Have not tested this rigorously, but this separates 'Iris-setosa'
5
+ pretty well from 'Iris-versicolor' and 'Iris-virginica',
6
+ but mixes the latter two.
7
+
8
+ Dataset: Iris dataset.
9
+ """
10
+
11
+ from load_data import load_iris
12
+ import numpy as np
13
+
14
+
15
+ X_std , y = load_iris (std = True )
16
+
17
+
18
+ def k_means (X , num_means = 3 , num_iterations = 10 ):
19
+ """K means. Assumes each datapoint is a 1D array."""
20
+ # data dim
21
+ N , D = X .shape
22
+
23
+ # initialise vars
24
+ assignments = np .zeros (N )
25
+ dists = np .zeros ((N , num_means ))
26
+
27
+ # 1. Init means
28
+ means = np .random .random ((num_means , D ))
29
+
30
+ # 2. Iterate
31
+ for i in range (num_iterations ):
32
+ # 2a(i) Calculate dists
33
+ for k in range (num_means ):
34
+ dists [:,k ] = np .sum ((X - np .tile (means [k ],(N ,1 )))** 2 ,axis = 1 )
35
+
36
+ # 2a(ii): Assign clusters
37
+ for n in range (N ):
38
+ assignments [n ] = np .argmin (dists [n ])
39
+
40
+ # 2b. Recalculate cluster means
41
+ for k in range (num_means ):
42
+ means [k ] = np .mean ([X [i ] for i in range (N ) if assignments [i ] == k ], axis = 0 )
43
+
44
+ return means , assignments
45
+
46
+
47
+ means , assignments = k_means (X_std )
48
+
49
+ print (assignments , y )
0 commit comments