Skip to content

Commit 1d7ae6b

Browse files
authored
Merge pull request kodecocodes#330 from mmazzei/master
Boyer-Moore algorithm updates
2 parents 9ac2d76 + 960bca1 commit 1d7ae6b

File tree

11 files changed

+695
-120
lines changed

11 files changed

+695
-120
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ script:
1212
- xcodebuild test -project ./Array2D/Tests/Tests.xcodeproj -scheme Tests
1313
- xcodebuild test -project ./AVL\ Tree/Tests/Tests.xcodeproj -scheme Tests
1414
- xcodebuild test -project ./Binary\ Search/Tests/Tests.xcodeproj -scheme Tests
15+
- xcodebuild test -project ./Boyer-Moore/Tests/Tests.xcodeproj -scheme Tests
1516
# - xcodebuild test -project ./Binary\ Search\ Tree/Solution\ 1/Tests/Tests.xcodeproj -scheme Tests
1617
- xcodebuild test -project ./Bloom\ Filter/Tests/Tests.xcodeproj -scheme Tests
1718
# - xcodebuild test -project ./Bounded\ Priority\ Queue/Tests/Tests.xcodeproj -scheme Tests
Lines changed: 78 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,88 @@
11
//: Playground - noun: a place where people can play
22

3+
/*
4+
Boyer-Moore string search
5+
6+
This code is based on the article "Faster String Searches" by Costas Menico
7+
from Dr Dobb's magazine, July 1989.
8+
http://www.drdobbs.com/database/faster-string-searches/184408171
9+
*/
310
extension String {
4-
func indexOf(pattern: String) -> String.Index? {
5-
// Cache the length of the search pattern because we're going to
6-
// use it a few times and it's expensive to calculate.
7-
let patternLength = pattern.characters.count
8-
assert(patternLength > 0)
9-
assert(patternLength <= characters.count)
10-
11-
// Make the skip table. This table determines how far we skip ahead
12-
// when a character from the pattern is found.
13-
var skipTable = [Character: Int]()
14-
for (i, c) in pattern.characters.enumerated() {
15-
skipTable[c] = patternLength - i - 1
16-
}
17-
18-
// This points at the last character in the pattern.
19-
let p = pattern.index(before: pattern.endIndex)
20-
let lastChar = pattern[p]
21-
22-
// The pattern is scanned right-to-left, so skip ahead in the string by
23-
// the length of the pattern. (Minus 1 because startIndex already points
24-
// at the first character in the source string.)
25-
var i = index(startIndex, offsetBy: patternLength - 1)
26-
27-
// This is a helper function that steps backwards through both strings
28-
// until we find a character that doesn’t match, or until we’ve reached
29-
// the beginning of the pattern.
30-
func backwards() -> String.Index? {
31-
var q = p
32-
var j = i
33-
while q > pattern.startIndex {
34-
j = index(before: j)
35-
q = index(before: q)
36-
if self[j] != pattern[q] { return nil }
37-
}
38-
return j
39-
}
40-
41-
// The main loop. Keep going until the end of the string is reached.
42-
while i < endIndex {
43-
let c = self[i]
44-
45-
// Does the current character match the last character from the pattern?
46-
if c == lastChar {
47-
48-
// There is a possible match. Do a brute-force search backwards.
49-
if let k = backwards() { return k }
50-
51-
// If no match, we can only safely skip one character ahead.
52-
i = index(after: i)
53-
} else {
54-
// The characters are not equal, so skip ahead. The amount to skip is
55-
// determined by the skip table. If the character is not present in the
56-
// pattern, we can skip ahead by the full pattern length. However, if
57-
// the character *is* present in the pattern, there may be a match up
58-
// ahead and we can't skip as far.
59-
i = index(i, offsetBy: skipTable[c] ?? patternLength)
60-
}
11+
func index(of pattern: String, usingHorspoolImprovement: Bool = false) -> Index? {
12+
// Cache the length of the search pattern because we're going to
13+
// use it a few times and it's expensive to calculate.
14+
let patternLength = pattern.characters.count
15+
guard patternLength > 0, patternLength <= characters.count else { return nil }
16+
17+
// Make the skip table. This table determines how far we skip ahead
18+
// when a character from the pattern is found.
19+
var skipTable = [Character: Int]()
20+
for (i, c) in pattern.characters.enumerated() {
21+
skipTable[c] = patternLength - i - 1
22+
}
23+
24+
// This points at the last character in the pattern.
25+
let p = pattern.index(before: pattern.endIndex)
26+
let lastChar = pattern[p]
27+
28+
// The pattern is scanned right-to-left, so skip ahead in the string by
29+
// the length of the pattern. (Minus 1 because startIndex already points
30+
// at the first character in the source string.)
31+
var i = index(startIndex, offsetBy: patternLength - 1)
32+
33+
// This is a helper function that steps backwards through both strings
34+
// until we find a character that doesn’t match, or until we’ve reached
35+
// the beginning of the pattern.
36+
func backwards() -> Index? {
37+
var q = p
38+
var j = i
39+
while q > pattern.startIndex {
40+
j = index(before: j)
41+
q = index(before: q)
42+
if self[j] != pattern[q] { return nil }
43+
}
44+
return j
45+
}
46+
47+
// The main loop. Keep going until the end of the string is reached.
48+
while i < endIndex {
49+
let c = self[i]
50+
51+
// Does the current character match the last character from the pattern?
52+
if c == lastChar {
53+
54+
// There is a possible match. Do a brute-force search backwards.
55+
if let k = backwards() { return k }
56+
57+
if !usingHorspoolImprovement {
58+
// If no match, we can only safely skip one character ahead.
59+
i = index(after: i)
60+
} else {
61+
// Ensure to jump at least one character (this is needed because the first
62+
// character is in the skipTable, and `skipTable[lastChar] = 0`)
63+
let jumpOffset = max(skipTable[c] ?? patternLength, 1)
64+
i = index(i, offsetBy: jumpOffset, limitedBy: endIndex) ?? endIndex
65+
}
66+
} else {
67+
// The characters are not equal, so skip ahead. The amount to skip is
68+
// determined by the skip table. If the character is not present in the
69+
// pattern, we can skip ahead by the full pattern length. However, if
70+
// the character *is* present in the pattern, there may be a match up
71+
// ahead and we can't skip as far.
72+
i = index(i, offsetBy: skipTable[c] ?? patternLength, limitedBy: endIndex) ?? endIndex
73+
}
74+
}
75+
return nil
6176
}
62-
return nil
63-
}
6477
}
6578

6679
// A few simple tests
6780

68-
let s = "Hello, World"
69-
s.indexOf(pattern: "World") // 7
81+
let str = "Hello, World"
82+
str.index(of: "World") // 7
7083

7184
let animals = "🐶🐔🐷🐮🐱"
72-
animals.indexOf(pattern: "🐮") // 6
85+
animals.index(of: "🐮") // 6
86+
87+
let lorem = "Lorem ipsum dolor sit amet"
88+
lorem.index(of: "sit", usingHorspoolImprovement: true) // 18

Boyer-Moore/BoyerMoore.swift

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,33 +6,32 @@
66
http://www.drdobbs.com/database/faster-string-searches/184408171
77
*/
88
extension String {
9-
func indexOf(pattern: String) -> String.Index? {
9+
func index(of pattern: String, usingHorspoolImprovement: Bool = false) -> Index? {
1010
// Cache the length of the search pattern because we're going to
1111
// use it a few times and it's expensive to calculate.
1212
let patternLength = pattern.characters.count
13-
assert(patternLength > 0)
14-
assert(patternLength <= self.characters.count)
15-
13+
guard patternLength > 0, patternLength <= characters.count else { return nil }
14+
1615
// Make the skip table. This table determines how far we skip ahead
1716
// when a character from the pattern is found.
1817
var skipTable = [Character: Int]()
1918
for (i, c) in pattern.characters.enumerated() {
2019
skipTable[c] = patternLength - i - 1
2120
}
22-
21+
2322
// This points at the last character in the pattern.
2423
let p = pattern.index(before: pattern.endIndex)
2524
let lastChar = pattern[p]
26-
25+
2726
// The pattern is scanned right-to-left, so skip ahead in the string by
2827
// the length of the pattern. (Minus 1 because startIndex already points
2928
// at the first character in the source string.)
30-
var i = self.index(startIndex, offsetBy: patternLength - 1)
31-
29+
var i = index(startIndex, offsetBy: patternLength - 1)
30+
3231
// This is a helper function that steps backwards through both strings
3332
// until we find a character that doesn’t match, or until we’ve reached
3433
// the beginning of the pattern.
35-
func backwards() -> String.Index? {
34+
func backwards() -> Index? {
3635
var q = p
3736
var j = i
3837
while q > pattern.startIndex {
@@ -42,26 +41,33 @@ extension String {
4241
}
4342
return j
4443
}
45-
44+
4645
// The main loop. Keep going until the end of the string is reached.
47-
while i < self.endIndex {
46+
while i < endIndex {
4847
let c = self[i]
49-
48+
5049
// Does the current character match the last character from the pattern?
5150
if c == lastChar {
52-
51+
5352
// There is a possible match. Do a brute-force search backwards.
5453
if let k = backwards() { return k }
55-
56-
// If no match, we can only safely skip one character ahead.
57-
i = index(after: i)
54+
55+
if !usingHorspoolImprovement {
56+
// If no match, we can only safely skip one character ahead.
57+
i = index(after: i)
58+
} else {
59+
// Ensure to jump at least one character (this is needed because the first
60+
// character is in the skipTable, and `skipTable[lastChar] = 0`)
61+
let jumpOffset = max(skipTable[c] ?? patternLength, 1)
62+
i = index(i, offsetBy: jumpOffset, limitedBy: endIndex) ?? endIndex
63+
}
5864
} else {
5965
// The characters are not equal, so skip ahead. The amount to skip is
6066
// determined by the skip table. If the character is not present in the
6167
// pattern, we can skip ahead by the full pattern length. However, if
6268
// the character *is* present in the pattern, there may be a match up
6369
// ahead and we can't skip as far.
64-
i = self.index(i, offsetBy: skipTable[c] ?? patternLength)
70+
i = index(i, offsetBy: skipTable[c] ?? patternLength, limitedBy: endIndex) ?? endIndex
6571
}
6672
}
6773
return nil

Boyer-Moore/README.markdown

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,11 @@ Here's how you could write it in Swift:
3232

3333
```swift
3434
extension String {
35-
func indexOf(pattern: String) -> String.Index? {
35+
func index(of pattern: String) -> Index? {
3636
// Cache the length of the search pattern because we're going to
3737
// use it a few times and it's expensive to calculate.
3838
let patternLength = pattern.characters.count
39-
assert(patternLength > 0)
40-
assert(patternLength <= self.characters.count)
39+
guard patternLength > 0, patternLength <= characters.count else { return nil }
4140

4241
// Make the skip table. This table determines how far we skip ahead
4342
// when a character from the pattern is found.
@@ -53,12 +52,12 @@ extension String {
5352
// The pattern is scanned right-to-left, so skip ahead in the string by
5453
// the length of the pattern. (Minus 1 because startIndex already points
5554
// at the first character in the source string.)
56-
var i = self.index(startIndex, offsetBy: patternLength - 1)
55+
var i = index(startIndex, offsetBy: patternLength - 1)
5756

5857
// This is a helper function that steps backwards through both strings
5958
// until we find a character that doesn’t match, or until we’ve reached
6059
// the beginning of the pattern.
61-
func backwards() -> String.Index? {
60+
func backwards() -> Index? {
6261
var q = p
6362
var j = i
6463
while q > pattern.startIndex {
@@ -70,7 +69,7 @@ extension String {
7069
}
7170

7271
// The main loop. Keep going until the end of the string is reached.
73-
while i < self.endIndex {
72+
while i < endIndex {
7473
let c = self[i]
7574

7675
// Does the current character match the last character from the pattern?
@@ -87,7 +86,7 @@ extension String {
8786
// pattern, we can skip ahead by the full pattern length. However, if
8887
// the character *is* present in the pattern, there may be a match up
8988
// ahead and we can't skip as far.
90-
i = self.index(i, offsetBy: skipTable[c] ?? patternLength)
89+
i = index(i, offsetBy: skipTable[c] ?? patternLength, limitedBy: endIndex) ?? endIndex
9190
}
9291
}
9392
return nil
@@ -157,41 +156,66 @@ Here's an implementation of the Boyer-Moore-Horspool algorithm:
157156

158157
```swift
159158
extension String {
160-
func indexOf(pattern: String) -> String.Index? {
161-
let patternLength = pattern.characters.count
162-
assert(patternLength > 0)
163-
assert(patternLength <= self.characters.count)
164-
165-
var skipTable = [Character: Int]()
166-
for (i, c) in pattern.characters.enumerated() {
167-
skipTable[c] = patternLength - i - 1
168-
}
159+
func index(of pattern: String) -> Index? {
160+
// Cache the length of the search pattern because we're going to
161+
// use it a few times and it's expensive to calculate.
162+
let patternLength = pattern.characters.count
163+
guard patternLength > 0, patternLength <= characters.count else { return nil }
169164

170-
let p = pattern.index(before: pattern.endIndex)
171-
let lastChar = pattern[p]
172-
var i = self.index(startIndex, offsetBy: patternLength - 1)
173-
174-
func backwards() -> String.Index? {
175-
var q = p
176-
var j = i
177-
while q > pattern.startIndex {
178-
j = index(before: j)
179-
q = index(before: q)
180-
if self[j] != pattern[q] { return nil }
181-
}
182-
return j
183-
}
165+
// Make the skip table. This table determines how far we skip ahead
166+
// when a character from the pattern is found.
167+
var skipTable = [Character: Int]()
168+
for (i, c) in pattern.characters.enumerated() {
169+
skipTable[c] = patternLength - i - 1
170+
}
184171

185-
while i < self.endIndex {
186-
let c = self[i]
187-
if c == lastChar {
188-
if let k = backwards() { return k }
189-
i = index(after: i)
190-
} else {
191-
i = index(i, offsetBy: skipTable[c] ?? patternLength)
192-
}
193-
}
194-
return nil
172+
// This points at the last character in the pattern.
173+
let p = pattern.index(before: pattern.endIndex)
174+
let lastChar = pattern[p]
175+
176+
// The pattern is scanned right-to-left, so skip ahead in the string by
177+
// the length of the pattern. (Minus 1 because startIndex already points
178+
// at the first character in the source string.)
179+
var i = index(startIndex, offsetBy: patternLength - 1)
180+
181+
// This is a helper function that steps backwards through both strings
182+
// until we find a character that doesn’t match, or until we’ve reached
183+
// the beginning of the pattern.
184+
func backwards() -> Index? {
185+
var q = p
186+
var j = i
187+
while q > pattern.startIndex {
188+
j = index(before: j)
189+
q = index(before: q)
190+
if self[j] != pattern[q] { return nil }
191+
}
192+
return j
193+
}
194+
195+
// The main loop. Keep going until the end of the string is reached.
196+
while i < endIndex {
197+
let c = self[i]
198+
199+
// Does the current character match the last character from the pattern?
200+
if c == lastChar {
201+
202+
// There is a possible match. Do a brute-force search backwards.
203+
if let k = backwards() { return k }
204+
205+
// Ensure to jump at least one character (this is needed because the first
206+
// character is in the skipTable, and `skipTable[lastChar] = 0`)
207+
let jumpOffset = max(skipTable[c] ?? patternLength, 1)
208+
i = index(i, offsetBy: jumpOffset, limitedBy: endIndex) ?? endIndex
209+
} else {
210+
// The characters are not equal, so skip ahead. The amount to skip is
211+
// determined by the skip table. If the character is not present in the
212+
// pattern, we can skip ahead by the full pattern length. However, if
213+
// the character *is* present in the pattern, there may be a match up
214+
// ahead and we can't skip as far.
215+
i = index(i, offsetBy: skipTable[c] ?? patternLength, limitedBy: endIndex) ?? endIndex
216+
}
217+
}
218+
return nil
195219
}
196220
}
197221
```
@@ -200,4 +224,4 @@ In practice, the Horspool version of the algorithm tends to perform a little bet
200224

201225
Credits: This code is based on the paper: [R. N. Horspool (1980). "Practical fast searching in strings". Software - Practice & Experience 10 (6): 501–506.](http://www.cin.br/~paguso/courses/if767/bib/Horspool_1980.pdf)
202226

203-
_Written for Swift Algorithm Club by Matthijs Hollemans, updated by Andreas Neusüß_
227+
_Written for Swift Algorithm Club by Matthijs Hollemans, updated by Andreas Neusüß_, [Matías Mazzei](https://github.com/mmazzei).

0 commit comments

Comments
 (0)