|
1 | 1 | //: Playground - noun: a place where people can play
|
2 |
| - |
3 |
| -public class BloomFilter<T> { |
4 |
| - fileprivate var array: [Bool] |
5 |
| - private var hashFunctions: [(T) -> Int] |
6 |
| - |
7 |
| - public init(size: Int = 1024, hashFunctions: [(T) -> Int]) { |
8 |
| - self.array = [Bool](repeating: false, count: size) |
9 |
| - self.hashFunctions = hashFunctions |
10 |
| - } |
11 |
| - |
12 |
| - private func computeHashes(_ value: T) -> [Int] { |
13 |
| - return hashFunctions.map { hashFunc in abs(hashFunc(value) % array.count) } |
14 |
| - } |
15 |
| - |
16 |
| - public func insert(_ element: T) { |
17 |
| - for hashValue in computeHashes(element) { |
18 |
| - array[hashValue] = true |
| 2 | +import Foundation |
| 3 | + |
| 4 | +public class BloomFilter<T: Hashable> { |
| 5 | + fileprivate var array: [Bool] |
| 6 | + private var hashFunctions: [(T) -> Int] |
| 7 | + |
| 8 | + public init(size: Int = 1024, hashFunctions: [(T) -> Int]) { |
| 9 | + if (hashFunctions.count > size) { |
| 10 | + fatalError("Number of hash functions should be less than or equal to size of array") |
| 11 | + } |
| 12 | + self.array = [Bool](repeating: false, count: size) |
| 13 | + self.hashFunctions = hashFunctions |
19 | 14 | }
|
20 |
| - } |
21 | 15 |
|
22 |
| - public func insert(_ values: [T]) { |
23 |
| - for value in values { |
24 |
| - insert(value) |
| 16 | + private func computeHashes(_ value: T) -> [Int] { |
| 17 | + return hashFunctions.map { hashFunc in abs(hashFunc(value) % array.count) } |
25 | 18 | }
|
26 |
| - } |
27 | 19 |
|
28 |
| - public func query(_ value: T) -> Bool { |
29 |
| - let hashValues = computeHashes(value) |
| 20 | + public func insert(_ element: T) { |
| 21 | + for hashValue in computeHashes(element) { |
| 22 | + array[hashValue] = true |
| 23 | + } |
| 24 | + } |
30 | 25 |
|
31 |
| - // Map hashes to indices in the Bloom Filter |
32 |
| - let results = hashValues.map { hashValue in array[hashValue] } |
| 26 | + public func insert(_ values: [T]) { |
| 27 | + for value in values { |
| 28 | + insert(value) |
| 29 | + } |
| 30 | + } |
33 | 31 |
|
34 |
| - // All values must be 'true' for the query to return true |
| 32 | + public func query(_ value: T) -> Bool { |
| 33 | + let hashValues = computeHashes(value) |
35 | 34 |
|
36 |
| - // This does NOT imply that the value is in the Bloom filter, |
37 |
| - // only that it may be. If the query returns false, however, |
38 |
| - // you can be certain that the value was not added. |
| 35 | + // Map hashes to indices in the Bloom Filter |
| 36 | + let results = hashValues.map { hashValue in array[hashValue] } |
39 | 37 |
|
40 |
| - let exists = results.reduce(true, { $0 && $1 }) |
41 |
| - return exists |
42 |
| - } |
| 38 | + // All values must be 'true' for the query to return true |
| 39 | + // This does NOT imply that the value is in the Bloom filter, |
| 40 | + // only that it may be. If the query returns false, however, |
| 41 | + // you can be certain that the value was not added. |
| 42 | + let exists = results.reduce(true, { $0 && $1 }) |
| 43 | + return exists |
| 44 | + } |
43 | 45 |
|
44 |
| - public func isEmpty() -> Bool { |
45 |
| - // As soon as the reduction hits a 'true' value, the && condition will fail. |
46 |
| - return array.reduce(true) { prev, next in prev && !next } |
47 |
| - } |
| 46 | + public func isEmpty() -> Bool { |
| 47 | + // As soon as the reduction hits a 'true' value, the && condition will fail. |
| 48 | + return array.reduce(true) { prev, next in prev && !next } |
| 49 | + } |
48 | 50 | }
|
49 | 51 |
|
50 | 52 | /* Two hash functions, adapted from http://www.cse.yorku.ca/~oz/hash.html */
|
51 | 53 |
|
52 | 54 | func djb2(x: String) -> Int {
|
53 |
| - var hash = 5381 |
54 |
| - for char in x { |
55 |
| - hash = ((hash << 5) &+ hash) &+ char.hashValue |
56 |
| - } |
57 |
| - return Int(hash) |
| 55 | + var hash = 5381 |
| 56 | + for char in x { |
| 57 | + hash = ((hash << 5) &+ hash) &+ char.hashValue |
| 58 | + } |
| 59 | + return Int(hash) |
58 | 60 | }
|
59 | 61 |
|
60 | 62 | func sdbm(x: String) -> Int {
|
61 |
| - var hash = 0 |
62 |
| - for char in x { |
63 |
| - hash = char.hashValue &+ (hash << 6) &+ (hash << 16) &- hash |
64 |
| - } |
65 |
| - return Int(hash) |
| 63 | + var hash = 0 |
| 64 | + for char in x { |
| 65 | + hash = char.hashValue &+ (hash << 6) &+ (hash << 16) &- hash |
| 66 | + } |
| 67 | + return Int(hash) |
66 | 68 | }
|
67 | 69 |
|
68 | 70 | /* A simple test */
|
69 |
| - |
70 | 71 | let bloom = BloomFilter<String>(size: 17, hashFunctions: [djb2, sdbm])
|
71 | 72 |
|
72 | 73 | bloom.insert("Hello world!")
|
73 | 74 | print(bloom.array)
|
74 | 75 |
|
75 |
| -bloom.query("Hello world!") // true |
76 |
| -bloom.query("Hello WORLD") // false |
| 76 | +(bloom.query("Hello world!")) // true |
| 77 | +(bloom.query("Hello WORLD")) // false |
77 | 78 |
|
78 | 79 | bloom.insert("Bloom Filterz")
|
79 |
| -print(bloom.array) |
| 80 | +(bloom.array) |
| 81 | + |
| 82 | +(bloom.query("Bloom Filterz")) // true |
| 83 | +(bloom.query("Hello WORLD")) // false |
80 | 84 |
|
81 |
| -bloom.query("Bloom Filterz") // true |
82 |
| -bloom.query("Hello WORLD") // true |
|
0 commit comments