Skip to content

Commit 26cb9d1

Browse files
author
Bill Barbour
committed
Switched to Double to handle large prime
multiplier
1 parent f895ba1 commit 26cb9d1

File tree

6 files changed

+279
-1
lines changed

6 files changed

+279
-1
lines changed

README.markdown

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ If you're new to algorithms and data structures, here are a few good ones to sta
5656
- [Brute-Force String Search](Brute-Force String Search/). A naive method.
5757
- [Boyer-Moore](Boyer-Moore/). A fast method to search for substrings. It skips ahead based on a look-up table, to avoid looking at every character in the text.
5858
- Knuth-Morris-Pratt
59-
- Rabin-Karp
59+
- [Rabin-Karp](Rabin-Karp/) Faster search by using hashing.
6060
- [Longest Common Subsequence](Longest Common Subsequence/). Find the longest sequence of characters that appear in the same order in both strings.
6161

6262
### Sorting

Rabin-Karp/README.markdown

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Rabin-Karp string search algorithm
2+
3+
The Rabin-Karp string search alogrithm is used to search text for a pattern.
4+
5+
Algorithms that check for palindromes are a common programming interview question.
6+
7+
## Example
8+
9+
Given a text of "The big dog jumped over the fox" and a search pattern of "ump" this will return 13.
10+
It starts by hashing "ump" then hashing "The". If hashed don't match then it slides the window a character
11+
at a time (e.g. "he ") and subtracts out the previous hash from the "T".
12+
13+
## Algorithm
14+
15+
The Rabin-Karp alogrithm uses a sliding window the size of the search pattern. It starts by hashing the search pattern, then
16+
hashing the first x characters of the text string where x is the length of the search pattern. It then slides the window one character over and uses
17+
the previous hash value to calculate the new hash faster. Only when it finds a hash that matches the hash of the search pattern will it compare
18+
the two strings it see if they are the same (prevent a hash collision from producing a false positive)
19+
20+
## The code
21+
22+
The major search method is next. More implementation details are in rabin-karp.swift
23+
24+
```swift
25+
public func search(text: String , pattern: String) -> Int {
26+
// convert to array of ints
27+
let patternArray = pattern.characters.flatMap { $0.asInt }
28+
let textArray = text.characters.flatMap { $0.asInt }
29+
30+
if textArray.count < patternArray.count {
31+
return -1
32+
}
33+
34+
let patternHash = hash(array: patternArray)
35+
var endIdx = patternArray.count - 1
36+
let firstChars = Array(textArray[0...endIdx])
37+
let firstHash = hash(array: firstChars)
38+
39+
if (patternHash == firstHash) {
40+
// Verify this was not a hash collison
41+
if firstChars == patternArray {
42+
return 0
43+
}
44+
}
45+
46+
var prevHash = firstHash
47+
// Now slide the window across the text to be searched
48+
for idx in 1...(textArray.count - patternArray.count) {
49+
endIdx = idx + (patternArray.count - 1)
50+
let window = Array(textArray[idx...endIdx])
51+
let windowHash = nextHash(prevHash: prevHash, dropped: textArray[idx - 1], added: textArray[endIdx], patternSize: patternArray.count - 1)
52+
53+
if windowHash == patternHash {
54+
if patternArray == window {
55+
return idx
56+
}
57+
}
58+
59+
prevHash = windowHash
60+
}
61+
62+
return -1
63+
}
64+
```
65+
66+
This code can be tested in a playground using the following:
67+
68+
```swift
69+
search(text: "The big dog jumped"", "ump")
70+
```
71+
72+
This will return 13 since ump is in the 13 position of the zero based string.
73+
74+
## Additional Resources
75+
76+
[Rabin-Karp Wikipedia](https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm)
77+
78+
79+
*Written by [Bill Barbour](https://github.com/brbatwork)*
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//: Taking our rabin-karp algorithm for a walk
2+
3+
import UIKit
4+
5+
struct Constants {
6+
static let hashMultiplier = 69069
7+
}
8+
9+
precedencegroup PowerPrecedence { higherThan: MultiplicationPrecedence }
10+
infix operator ** : PowerPrecedence
11+
func ** (radix: Int, power: Int) -> Int {
12+
return Int(pow(Double(radix), Double(power)))
13+
}
14+
func ** (radix: Double, power: Int) -> Double {
15+
return pow(radix, Double(power))
16+
}
17+
18+
extension Character {
19+
var asInt:Int {
20+
let s = String(self).unicodeScalars
21+
return Int(s[s.startIndex].value)
22+
}
23+
}
24+
25+
// Find first position of pattern in the text using Rabin Karp algorithm
26+
public func search(text: String , pattern: String) -> Int {
27+
// convert to array of ints
28+
let patternArray = pattern.characters.flatMap { $0.asInt }
29+
let textArray = text.characters.flatMap { $0.asInt }
30+
31+
if textArray.count < patternArray.count {
32+
return -1
33+
}
34+
35+
let patternHash = hash(array: patternArray)
36+
var endIdx = patternArray.count - 1
37+
let firstChars = Array(textArray[0...endIdx])
38+
let firstHash = hash(array: firstChars)
39+
40+
if (patternHash == firstHash) {
41+
// Verify this was not a hash collison
42+
if firstChars == patternArray {
43+
return 0
44+
}
45+
}
46+
47+
var prevHash = firstHash
48+
// Now slide the window across the text to be searched
49+
for idx in 1...(textArray.count - patternArray.count) {
50+
endIdx = idx + (patternArray.count - 1)
51+
let window = Array(textArray[idx...endIdx])
52+
let windowHash = nextHash(prevHash: prevHash, dropped: textArray[idx - 1], added: textArray[endIdx], patternSize: patternArray.count - 1)
53+
54+
if windowHash == patternHash {
55+
if patternArray == window {
56+
return idx
57+
}
58+
}
59+
60+
prevHash = windowHash
61+
}
62+
63+
return -1
64+
}
65+
66+
public func hash(array: Array<Int>) -> Double {
67+
var total : Double = 0
68+
var exponent = array.count - 1
69+
for i in array {
70+
total += Double(i) * (Double(Constants.hashMultiplier) ** exponent)
71+
exponent -= 1
72+
}
73+
74+
return Double(total)
75+
}
76+
77+
public func nextHash(prevHash: Double, dropped: Int, added: Int, patternSize: Int) -> Double {
78+
let oldHash = prevHash - (Double(dropped) * (Double(Constants.hashMultiplier) ** patternSize))
79+
return Double(Constants.hashMultiplier) * oldHash + Double(added)
80+
}
81+
82+
// TESTS
83+
assert(search(text:"The big dog jumped over the fox", pattern:"ump") == 13, "Invalid index returned")
84+
assert(search(text:"The big dog jumped over the fox", pattern:"missed") == Int(-1), "Invalid index returned")
85+
assert(search(text:"The big dog jumped over the fox", pattern:"T") == 0, "Invalid index returned")
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2+
<playground version='5.0' target-platform='ios'>
3+
<timeline fileName='timeline.xctimeline'/>
4+
</playground>

Rabin-Karp/Rabin-Karp.playground/playground.xcworkspace/contents.xcworkspacedata

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Rabin-Karp/rabin-karp.swift

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// The MIT License (MIT)
2+
3+
// Copyright (c) 2016 Bill Barbour (brbatwork[at]gmail.com)
4+
5+
// Permission is hereby granted, free of charge, to any person obtaining a copy
6+
// of this software and associated documentation files (the "Software"), to deal
7+
// in the Software without restriction, including without limitation the rights
8+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
// copies of the Software, and to permit persons to whom the Software is
10+
// furnished to do so, subject to the following conditions:
11+
12+
// The above copyright notice and this permission notice shall be included in all
13+
// copies or substantial portions of the Software.
14+
15+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
// SOFTWARE.
22+
23+
struct Constants {
24+
static let hashMultiplier = 69069
25+
}
26+
27+
precedencegroup PowerPrecedence { higherThan: MultiplicationPrecedence }
28+
infix operator ** : PowerPrecedence
29+
func ** (radix: Int, power: Int) -> Int {
30+
return Int(pow(Double(radix), Double(power)))
31+
}
32+
func ** (radix: Double, power: Int) -> Double {
33+
return pow(radix, Double(power))
34+
}
35+
36+
extension Character {
37+
var asInt:Int {
38+
let s = String(self).unicodeScalars
39+
return Int(s[s.startIndex].value)
40+
}
41+
}
42+
43+
// Find first position of pattern in the text using Rabin Karp algorithm
44+
public func search(text: String , pattern: String) -> Int {
45+
// convert to array of ints
46+
let patternArray = pattern.characters.flatMap { $0.asInt }
47+
let textArray = text.characters.flatMap { $0.asInt }
48+
49+
if textArray.count < patternArray.count {
50+
return -1
51+
}
52+
53+
let patternHash = hash(array: patternArray)
54+
var endIdx = patternArray.count - 1
55+
let firstChars = Array(textArray[0...endIdx])
56+
let firstHash = hash(array: firstChars)
57+
58+
if (patternHash == firstHash) {
59+
// Verify this was not a hash collison
60+
if firstChars == patternArray {
61+
return 0
62+
}
63+
}
64+
65+
var prevHash = firstHash
66+
// Now slide the window across the text to be searched
67+
for idx in 1...(textArray.count - patternArray.count) {
68+
endIdx = idx + (patternArray.count - 1)
69+
let window = Array(textArray[idx...endIdx])
70+
let windowHash = nextHash(prevHash: prevHash, dropped: textArray[idx - 1], added: textArray[endIdx], patternSize: patternArray.count - 1)
71+
72+
if windowHash == patternHash {
73+
if patternArray == window {
74+
return idx
75+
}
76+
}
77+
78+
prevHash = windowHash
79+
}
80+
81+
return -1
82+
}
83+
84+
public func hash(array: Array<Int>) -> Double {
85+
var total : Double = 0
86+
var exponent = array.count - 1
87+
for i in array {
88+
total += Double(i) * (Double(Constants.hashMultiplier) ** exponent)
89+
exponent -= 1
90+
}
91+
92+
return Double(total)
93+
}
94+
95+
public func nextHash(prevHash: Double, dropped: Int, added: Int, patternSize: Int) -> Double {
96+
let oldHash = prevHash - (Double(dropped) * (Double(Constants.hashMultiplier) ** patternSize))
97+
return Double(Constants.hashMultiplier) * oldHash + Double(added)
98+
}
99+
100+
// TESTS
101+
assert(search(text:"The big dog jumped over the fox", pattern:"ump") == 13, "Invalid index returned")
102+
assert(search(text:"The big dog jumped over the fox", pattern:"missed") == -1, "Invalid index returned")
103+
assert(search(text:"The big dog jumped over the fox", pattern:"T") == 0, "Invalid index returned")

0 commit comments

Comments
 (0)