@@ -7,22 +7,22 @@ import Foundation
7
7
8
8
// Need a container to easily hold N Dimensional Vectors
9
9
class VectorND : CustomStringConvertible {
10
- private var length : Int = 0
11
- private var data : [ Float ] = [ Float ] ( )
10
+ private var length = 0
11
+ private var data = [ Double ] ( )
12
12
13
- init ( d: [ Float ] ) {
14
- self . data = d
15
- self . length = d. count
13
+ init ( d: [ Double ] ) {
14
+ data = d
15
+ length = d. count
16
16
}
17
17
18
- var description : String { return " VectorND ( \( self . data) " }
19
- func getData( ) -> [ Float ] { return data }
18
+ var description : String { return " VectorND ( \( data) " }
19
+ func getData( ) -> [ Double ] { return data }
20
20
func getLength( ) -> Int { return length }
21
21
}
22
22
23
- // Ability to use std operators on VectorND object
23
+ // MARK: VectorND Operators
24
24
func + ( left: VectorND , right: VectorND ) -> VectorND {
25
- var results = [ Float ] ( count: left. getLength ( ) , repeatedValue: 0.0 )
25
+ var results = [ Double ] ( count: left. getLength ( ) , repeatedValue: 0.0 )
26
26
for idx in 0 ..< left. getLength ( ) {
27
27
results [ idx] = left. getData ( ) [ idx] + right. getData ( ) [ idx]
28
28
}
@@ -31,35 +31,39 @@ func +(left: VectorND, right: VectorND) -> VectorND {
31
31
func += ( inout left: VectorND , right: VectorND ) {
32
32
left = left + right
33
33
}
34
- func / ( left: VectorND , right: Float ) -> VectorND {
35
- var results = [ Float ] ( count: left. getLength ( ) , repeatedValue: 0.0 )
34
+ func / ( left: VectorND , right: Double ) -> VectorND {
35
+ var results = [ Double ] ( count: left. getLength ( ) , repeatedValue: 0.0 )
36
36
for (idx, value) in left. getData ( ) . enumerate ( ) {
37
37
results [ idx] = value / right
38
38
}
39
39
return VectorND ( d: results)
40
40
}
41
- func /= ( inout left: VectorND , right: Float ) {
41
+ func /= ( inout left: VectorND , right: Double ) {
42
42
left = left / right
43
43
}
44
44
45
- // TODO: Explain/Replace/Cleanup
46
- extension Array {
47
- var shuffle : [ Element ] {
48
- var elements = self
49
- for index in indices {
50
- let anotherIndex = Int ( arc4random_uniform ( UInt32 ( elements. count - index) ) ) + index
51
- anotherIndex != index ? swap ( & elements[ index] , & elements[ anotherIndex] ) : ( )
52
- }
53
- return elements
45
+ // MARK: Assist Functions
46
+ // Pick a k random elements from samples
47
+ func reservoirSample( samples: [ VectorND ] , k: Int ) -> [ VectorND ] {
48
+ var result = [ VectorND] ( )
49
+
50
+ // Fill the result array with first k elements
51
+ for i in 0 ..< k {
52
+ result. append ( samples [ i] )
54
53
}
55
- func choose( n: Int ) -> [ Element ] {
56
- return Array ( shuffle. prefix ( n) )
54
+ // randomly replace elements from remaining ones
55
+ for i in ( k+ 1 ) ..< samples. count {
56
+ let j = Int ( arc4random_uniform ( UInt32 ( i+ 1 ) ) )
57
+ if j < k {
58
+ result [ j] = samples [ i]
59
+ }
57
60
}
61
+ return result
58
62
}
59
63
60
64
// Calculates the Euclidean distance between two VectorNDs
61
- func euclidean( v1: VectorND , v2: VectorND ) -> Float {
62
- var result : Float = 0.0
65
+ func euclidean( v1: VectorND , v2: VectorND ) -> Double {
66
+ var result = 0.0
63
67
for idx in 0 ..< v1. getLength ( ) {
64
68
result += pow ( v1. getData ( ) [ idx] - v2. getData ( ) [ idx] , 2.0 )
65
69
}
@@ -68,9 +72,9 @@ func euclidean(v1:VectorND, v2:VectorND) -> Float {
68
72
69
73
// Get the INDEX of nearest Center to X
70
74
func nearestCenter( x: VectorND , Centers: [ VectorND ] ) -> Int {
71
- var nearestDist = FLT_MAX
72
- var minIndex : Int = 0 ;
73
- // Calculate the distance from VectorND X to all the centers
75
+ var nearestDist = DBL_MAX
76
+ var minIndex = 0 ;
77
+
74
78
for (idx, c) in Centers . enumerate ( ) {
75
79
let dist = euclidean ( x, v2: c)
76
80
if dist < nearestDist {
@@ -81,50 +85,56 @@ func nearestCenter(x: VectorND, Centers: [VectorND]) -> Int {
81
85
return minIndex
82
86
}
83
87
84
- func kNN( numCenters: Int , convergeDist: Float , points: [ VectorND ] ) -> [ VectorND ] {
85
- var centerMoveDist : Float = 0.0
86
- let zeros = [ Float] ( count: points [ 0 ] . getLength ( ) , repeatedValue: 0.0 )
88
+ // MARK: Main Function
89
+ func kMeans( numCenters: Int , convergeDist: Double , points: [ VectorND ] ) -> [ VectorND ] {
90
+ var centerMoveDist = 0.0
91
+ let zeros = [ Double] ( count: points [ 0 ] . getLength ( ) , repeatedValue: 0.0 )
87
92
88
93
// 1. Choose k Random VectorNDs as the initial centers
89
- var kCenters : [ VectorND ] = points . choose ( numCenters)
94
+ var kCenters = reservoirSample ( points , k : numCenters)
90
95
91
96
// do following steps until convergence
92
97
repeat {
93
- var cnts = [ Float ] ( count: numCenters, repeatedValue: 0.0 )
94
- var nCenters = [ VectorND] ( count: numCenters, repeatedValue: VectorND ( d: zeros) )
98
+ var cnts = [ Double ] ( count: numCenters, repeatedValue: 0.0 )
99
+ var newCenters = [ VectorND] ( count: numCenters, repeatedValue: VectorND ( d: zeros) )
95
100
// 2. Assign VectorNDs to centers
96
101
// a. Determine which center each VectorND is closest to
97
102
// b. Record how many VectorNDs are assigned to each center
98
103
for p in points {
99
104
let c = nearestCenter ( p, Centers: kCenters)
100
105
cnts [ c] ++
101
- nCenters [ c] += p
106
+ newCenters [ c] += p
102
107
}
103
108
// 3. Calculate a new centers
104
109
for idx in 0 ..< numCenters {
105
- nCenters [ idx] /= cnts [ idx]
110
+ newCenters [ idx] /= cnts [ idx]
106
111
}
107
112
// 4. Determine how far centers moved
108
113
centerMoveDist = 0.0
109
114
for idx in 0 ..< numCenters {
110
- centerMoveDist += euclidean ( kCenters [ idx] , v2: nCenters [ idx] )
115
+ centerMoveDist += euclidean ( kCenters [ idx] , v2: newCenters [ idx] )
111
116
}
112
117
// 5. Update centers to the newly calculated ones
113
- kCenters = nCenters
118
+ kCenters = newCenters
114
119
print ( " Complete iteration coverge( \( centerMoveDist) <? \( convergeDist) ) " )
115
120
} while ( centerMoveDist > convergeDist)
116
121
return kCenters
117
122
}
118
123
124
+ // MARK: Sample Data
119
125
var points = [ VectorND] ( )
120
- let lim = 10
121
- for _ in 0 ..< lim {
122
- let x = Float ( arc4random_uniform ( UInt32 ( lim) ) )
123
- let y = Float ( arc4random_uniform ( UInt32 ( lim) ) )
124
- points. append ( VectorND ( d: [ x, y] ) )
126
+ let numPoints = 10
127
+ let numDimmensions = 5
128
+ for _ in 0 ..< numPoints {
129
+ var data = [ Double] ( )
130
+ for x in 0 ..< numDimmensions {
131
+ data. append ( Double ( arc4random_uniform ( UInt32 ( numPoints*numDimmensions) ) ) )
132
+ }
133
+ points. append ( VectorND ( d: data) )
125
134
}
126
135
127
136
print ( " \n Centers " )
128
- for c in kNN ( 3 , convergeDist: 0.1 , points: points) {
137
+ for c in kMeans ( 3 , convergeDist: 0.01 , points: points) {
129
138
print ( c)
130
- }
139
+ }
140
+
0 commit comments