|
| 1 | +// |
| 2 | +// KMeans.swift |
| 3 | +// |
| 4 | +// Created by John Gill on 2/25/16. |
| 5 | + |
| 6 | +import Foundation |
| 7 | + |
| 8 | +// Need a container to easily hold N Dimensional Vectors |
| 9 | +class VectorND: CustomStringConvertible { |
| 10 | + private var length = 0 |
| 11 | + private var data = [Double]() |
| 12 | + |
| 13 | + init(d:[Double]) { |
| 14 | + data = d |
| 15 | + length = d.count |
| 16 | + } |
| 17 | + |
| 18 | + var description: String { return "VectorND (\(data)" } |
| 19 | + func getData() -> [Double] { return data } |
| 20 | + func getLength() -> Int { return length } |
| 21 | +} |
| 22 | + |
| 23 | +// MARK: VectorND Operators |
| 24 | +func +(left: VectorND, right: VectorND) -> VectorND { |
| 25 | + var results = [Double](count: left.getLength(), repeatedValue: 0.0) |
| 26 | + for idx in 0..<left.getLength() { |
| 27 | + results[idx] = left.getData()[idx] + right.getData()[idx] |
| 28 | + } |
| 29 | + return VectorND(d: results) |
| 30 | +} |
| 31 | +func +=(inout left: VectorND, right: VectorND) { |
| 32 | + left = left + right |
| 33 | +} |
| 34 | +func /(left:VectorND, right: Double) -> VectorND { |
| 35 | + var results = [Double](count: left.getLength(), repeatedValue: 0.0) |
| 36 | + for (idx, value) in left.getData().enumerate() { |
| 37 | + results[idx] = value / right |
| 38 | + } |
| 39 | + return VectorND(d: results) |
| 40 | +} |
| 41 | +func /=(inout left: VectorND, right: Double) { |
| 42 | + left = left / right |
| 43 | +} |
| 44 | + |
| 45 | +// MARK: Assist Functions |
| 46 | +// Pick a k random elements from samples |
| 47 | +func reservoirSample(samples:[VectorND], k:Int) -> [VectorND] { |
| 48 | + var result = [VectorND]() |
| 49 | + |
| 50 | + // Fill the result array with first k elements |
| 51 | + for i in 0..<k { |
| 52 | + result.append(samples[i]) |
| 53 | + } |
| 54 | + // randomly replace elements from remaining ones |
| 55 | + for i in (k+1)..<samples.count { |
| 56 | + let j = Int(arc4random_uniform(UInt32(i+1))) |
| 57 | + if j < k { |
| 58 | + result[j] = samples[i] |
| 59 | + } |
| 60 | + } |
| 61 | + return result |
| 62 | +} |
| 63 | + |
| 64 | +// Calculates the Euclidean distance between two VectorNDs |
| 65 | +func euclidean(v1:VectorND, v2:VectorND) -> Double { |
| 66 | + var result = 0.0 |
| 67 | + for idx in 0..<v1.getLength() { |
| 68 | + result += pow(v1.getData()[idx] - v2.getData()[idx], 2.0) |
| 69 | + } |
| 70 | + return sqrt(result) |
| 71 | +} |
| 72 | + |
| 73 | +// Get the INDEX of nearest Center to X |
| 74 | +func nearestCenter(x: VectorND, Centers: [VectorND]) -> Int { |
| 75 | + var nearestDist = DBL_MAX |
| 76 | + var minIndex = 0; |
| 77 | + |
| 78 | + for (idx, c) in Centers.enumerate() { |
| 79 | + let dist = euclidean(x, v2: c) |
| 80 | + if dist < nearestDist { |
| 81 | + minIndex = idx |
| 82 | + nearestDist = dist |
| 83 | + } |
| 84 | + } |
| 85 | + return minIndex |
| 86 | +} |
| 87 | + |
| 88 | +// MARK: Main Function |
| 89 | +func kMeans(numCenters: Int, convergeDist: Double, points: [VectorND]) -> [VectorND] { |
| 90 | + var centerMoveDist = 0.0 |
| 91 | + let zeros = [Double](count: points[0].getLength(), repeatedValue: 0.0) |
| 92 | + |
| 93 | + // 1. Choose k Random VectorNDs as the initial centers |
| 94 | + var kCenters = reservoirSample(points, k: numCenters) |
| 95 | + |
| 96 | + // do following steps until convergence |
| 97 | + repeat { |
| 98 | + var cnts = [Double](count: numCenters, repeatedValue: 0.0) |
| 99 | + var newCenters = [VectorND](count:numCenters, repeatedValue: VectorND(d:zeros)) |
| 100 | + // 2. Assign VectorNDs to centers |
| 101 | + // a. Determine which center each VectorND is closest to |
| 102 | + // b. Record how many VectorNDs are assigned to each center |
| 103 | + for p in points { |
| 104 | + let c = nearestCenter(p, Centers: kCenters) |
| 105 | + cnts[c]++ |
| 106 | + newCenters[c] += p |
| 107 | + } |
| 108 | + // 3. Calculate a new centers |
| 109 | + for idx in 0..<numCenters { |
| 110 | + newCenters[idx] /= cnts[idx] |
| 111 | + } |
| 112 | + // 4. Determine how far centers moved |
| 113 | + centerMoveDist = 0.0 |
| 114 | + for idx in 0..<numCenters { |
| 115 | + centerMoveDist += euclidean(kCenters[idx], v2: newCenters[idx]) |
| 116 | + } |
| 117 | + // 5. Update centers to the newly calculated ones |
| 118 | + kCenters = newCenters |
| 119 | + print("Complete iteration coverge(\(centerMoveDist) <? \(convergeDist))") |
| 120 | + } while(centerMoveDist > convergeDist) |
| 121 | + return kCenters |
| 122 | +} |
| 123 | + |
| 124 | +// MARK: Sample Data |
| 125 | +var points = [VectorND]() |
| 126 | +let numPoints = 10 |
| 127 | +let numDimmensions = 5 |
| 128 | +for _ in 0..<numPoints { |
| 129 | + var data = [Double]() |
| 130 | + for x in 0..<numDimmensions { |
| 131 | + data.append(Double(arc4random_uniform(UInt32(numPoints*numDimmensions)))) |
| 132 | + } |
| 133 | + points.append(VectorND(d: data)) |
| 134 | +} |
| 135 | + |
| 136 | +print("\nCenters") |
| 137 | +for c in kMeans(3, convergeDist: 0.01, points: points) { |
| 138 | + print(c) |
| 139 | +} |
0 commit comments