Skip to content

Commit c9f861e

Browse files
committed
feat(algs): add k means
1 parent fd95155 commit c9f861e

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

algorithms/k_means.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
K-means implementation
3+
4+
Seems okay: Have not tested this rigorously, but this separates 'Iris-setosa'
5+
pretty well from 'Iris-versicolor' and 'Iris-virginica',
6+
but mixes the latter two.
7+
8+
Dataset: Iris dataset.
9+
"""
10+
11+
from load_data import load_iris
12+
import numpy as np
13+
14+
15+
X_std, y = load_iris(std=True)
16+
17+
18+
def k_means(X, num_means=3, num_iterations=10):
19+
"""K means. Assumes each datapoint is a 1D array."""
20+
# data dim
21+
N, D = X.shape
22+
23+
# initialise vars
24+
assignments = np.zeros(N)
25+
dists = np.zeros((N, num_means))
26+
27+
# 1. Init means
28+
means = np.random.random((num_means, D))
29+
30+
# 2. Iterate
31+
for i in range(num_iterations):
32+
# 2a(i) Calculate dists
33+
for k in range(num_means):
34+
dists[:,k] = np.sum((X - np.tile(means[k],(N,1)))**2,axis=1)
35+
36+
# 2a(ii): Assign clusters
37+
for n in range(N):
38+
assignments[n] = np.argmin(dists[n])
39+
40+
# 2b. Recalculate cluster means
41+
for k in range(num_means):
42+
means[k] = np.mean([X[i] for i in range(N) if assignments[i] == k], axis=0)
43+
44+
return means, assignments
45+
46+
47+
means, assignments = k_means(X_std)
48+
49+
print(assignments, y)

0 commit comments

Comments
 (0)