Skip to content

Commit cf45ebd

Browse files
committed
Update as requested in code review.
1 parent cd2106a commit cf45ebd

File tree

4 files changed

+135
-93
lines changed

4 files changed

+135
-93
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,91 @@
11
//: Playground - noun: a place where people can play
22

3+
/*
4+
Boyer-Moore string search
5+
6+
This code is based on the article "Faster String Searches" by Costas Menico
7+
from Dr Dobb's magazine, July 1989.
8+
http://www.drdobbs.com/database/faster-string-searches/184408171
9+
*/
310
extension String {
4-
func indexOf(pattern: String) -> String.Index? {
5-
// Cache the length of the search pattern because we're going to
6-
// use it a few times and it's expensive to calculate.
7-
let patternLength = pattern.characters.count
8-
assert(patternLength > 0)
9-
assert(patternLength <= characters.count)
10-
11-
// Make the skip table. This table determines how far we skip ahead
12-
// when a character from the pattern is found.
13-
var skipTable = [Character: Int]()
14-
for (i, c) in pattern.characters.enumerated() {
15-
skipTable[c] = patternLength - i - 1
16-
}
17-
18-
// This points at the last character in the pattern.
19-
let p = pattern.index(before: pattern.endIndex)
20-
let lastChar = pattern[p]
21-
22-
// The pattern is scanned right-to-left, so skip ahead in the string by
23-
// the length of the pattern. (Minus 1 because startIndex already points
24-
// at the first character in the source string.)
25-
var i = index(startIndex, offsetBy: patternLength - 1)
26-
27-
// This is a helper function that steps backwards through both strings
28-
// until we find a character that doesn’t match, or until we’ve reached
29-
// the beginning of the pattern.
30-
func backwards() -> String.Index? {
31-
var q = p
32-
var j = i
33-
while q > pattern.startIndex {
34-
j = index(before: j)
35-
q = index(before: q)
36-
if self[j] != pattern[q] { return nil }
37-
}
38-
return j
39-
}
40-
41-
// The main loop. Keep going until the end of the string is reached.
42-
while i < endIndex {
43-
let c = self[i]
44-
45-
// Does the current character match the last character from the pattern?
46-
if c == lastChar {
47-
48-
// There is a possible match. Do a brute-force search backwards.
49-
if let k = backwards() { return k }
50-
51-
// If no match, we can only safely skip one character ahead.
52-
i = index(after: i)
53-
} else {
54-
// The characters are not equal, so skip ahead. The amount to skip is
55-
// determined by the skip table. If the character is not present in the
56-
// pattern, we can skip ahead by the full pattern length. However, if
57-
// the character *is* present in the pattern, there may be a match up
58-
// ahead and we can't skip as far.
59-
i = index(i, offsetBy: skipTable[c] ?? patternLength)
60-
}
11+
func index(of pattern: String, usingHorspoolImprovement: Bool = false) -> Index? {
12+
// There are no possible match in an empty string
13+
guard !isEmpty else { return nil }
14+
15+
// Cache the length of the search pattern because we're going to
16+
// use it a few times and it's expensive to calculate.
17+
let patternLength = pattern.characters.count
18+
guard patternLength > 0, patternLength <= characters.count else { return nil }
19+
20+
// Make the skip table. This table determines how far we skip ahead
21+
// when a character from the pattern is found.
22+
var skipTable = [Character: Int]()
23+
for (i, c) in pattern.characters.enumerated() {
24+
skipTable[c] = patternLength - i - 1
25+
}
26+
27+
// This points at the last character in the pattern.
28+
let p = pattern.index(before: pattern.endIndex)
29+
let lastChar = pattern[p]
30+
31+
// The pattern is scanned right-to-left, so skip ahead in the string by
32+
// the length of the pattern. (Minus 1 because startIndex already points
33+
// at the first character in the source string.)
34+
var i = index(startIndex, offsetBy: patternLength - 1)
35+
36+
// This is a helper function that steps backwards through both strings
37+
// until we find a character that doesn’t match, or until we’ve reached
38+
// the beginning of the pattern.
39+
func backwards() -> Index? {
40+
var q = p
41+
var j = i
42+
while q > pattern.startIndex {
43+
j = index(before: j)
44+
q = index(before: q)
45+
if self[j] != pattern[q] { return nil }
46+
}
47+
return j
48+
}
49+
50+
// The main loop. Keep going until the end of the string is reached.
51+
while i < endIndex {
52+
let c = self[i]
53+
54+
// Does the current character match the last character from the pattern?
55+
if c == lastChar {
56+
57+
// There is a possible match. Do a brute-force search backwards.
58+
if let k = backwards() { return k }
59+
60+
if !usingHorspoolImprovement {
61+
// If no match, we can only safely skip one character ahead.
62+
i = index(after: i)
63+
} else {
64+
// Ensure to jump at least one character (this is needed because the first
65+
// character is in the skipTable, and `skipTable[lastChar] = 0`)
66+
let jumpOffset = max(skipTable[c] ?? patternLength, 1)
67+
i = index(i, offsetBy: jumpOffset, limitedBy: endIndex) ?? endIndex
68+
}
69+
} else {
70+
// The characters are not equal, so skip ahead. The amount to skip is
71+
// determined by the skip table. If the character is not present in the
72+
// pattern, we can skip ahead by the full pattern length. However, if
73+
// the character *is* present in the pattern, there may be a match up
74+
// ahead and we can't skip as far.
75+
i = index(i, offsetBy: skipTable[c] ?? patternLength, limitedBy: endIndex) ?? endIndex
76+
}
77+
}
78+
return nil
6179
}
62-
return nil
63-
}
6480
}
6581

6682
// A few simple tests
6783

68-
let s = "Hello, World"
69-
s.indexOf(pattern: "World") // 7
84+
let str = "Hello, World"
85+
str.index(of: "World") // 7
7086

7187
let animals = "🐶🐔🐷🐮🐱"
72-
animals.indexOf(pattern: "🐮") // 6
88+
animals.index(of: "🐮") // 6
89+
90+
let lorem = "Lorem ipsum dolor sit amet"
91+
lorem.index(of: "sit", usingHorspoolImprovement: true) // 18

Boyer-Moore/BoyerMoore.swift

+18-17
Original file line numberDiff line numberDiff line change
@@ -6,33 +6,35 @@
66
http://www.drdobbs.com/database/faster-string-searches/184408171
77
*/
88
extension String {
9-
func indexOf(pattern: String, useHorspoolImprovement: Bool = false) -> String.Index? {
9+
func index(of pattern: String, usingHorspoolImprovement: Bool = false) -> Index? {
10+
// There are no possible match in an empty string
11+
guard !isEmpty else { return nil }
12+
1013
// Cache the length of the search pattern because we're going to
1114
// use it a few times and it's expensive to calculate.
1215
let patternLength = pattern.characters.count
13-
assert(patternLength > 0)
14-
assert(patternLength <= self.characters.count)
15-
16+
guard patternLength > 0, patternLength <= characters.count else { return nil }
17+
1618
// Make the skip table. This table determines how far we skip ahead
1719
// when a character from the pattern is found.
1820
var skipTable = [Character: Int]()
1921
for (i, c) in pattern.characters.enumerated() {
2022
skipTable[c] = patternLength - i - 1
2123
}
22-
24+
2325
// This points at the last character in the pattern.
2426
let p = pattern.index(before: pattern.endIndex)
2527
let lastChar = pattern[p]
26-
28+
2729
// The pattern is scanned right-to-left, so skip ahead in the string by
2830
// the length of the pattern. (Minus 1 because startIndex already points
2931
// at the first character in the source string.)
30-
var i = self.index(startIndex, offsetBy: patternLength - 1)
31-
32+
var i = index(startIndex, offsetBy: patternLength - 1)
33+
3234
// This is a helper function that steps backwards through both strings
3335
// until we find a character that doesn’t match, or until we’ve reached
3436
// the beginning of the pattern.
35-
func backwards() -> String.Index? {
37+
func backwards() -> Index? {
3638
var q = p
3739
var j = i
3840
while q > pattern.startIndex {
@@ -42,22 +44,21 @@ extension String {
4244
}
4345
return j
4446
}
45-
47+
4648
// The main loop. Keep going until the end of the string is reached.
47-
while i < self.endIndex {
49+
while i < endIndex {
4850
let c = self[i]
49-
51+
5052
// Does the current character match the last character from the pattern?
5153
if c == lastChar {
52-
54+
5355
// There is a possible match. Do a brute-force search backwards.
5456
if let k = backwards() { return k }
55-
56-
if !useHorspoolImprovement {
57+
58+
if !usingHorspoolImprovement {
5759
// If no match, we can only safely skip one character ahead.
5860
i = index(after: i)
59-
}
60-
else {
61+
} else {
6162
// Ensure to jump at least one character (this is needed because the first
6263
// character is in the skipTable, and `skipTable[lastChar] = 0`)
6364
let jumpOffset = max(skipTable[c] ?? patternLength, 1)

Boyer-Moore/README.markdown

+16-12
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,14 @@ Here's how you could write it in Swift:
3232

3333
```swift
3434
extension String {
35-
func indexOf(pattern: String) -> String.Index? {
35+
func index(of pattern: String) -> Index? {
36+
// There are no possible match in an empty string
37+
guard !isEmpty else { return nil }
38+
3639
// Cache the length of the search pattern because we're going to
3740
// use it a few times and it's expensive to calculate.
3841
let patternLength = pattern.characters.count
39-
assert(patternLength > 0)
40-
assert(patternLength <= self.characters.count)
42+
guard patternLength > 0, patternLength <= characters.count else { return nil }
4143

4244
// Make the skip table. This table determines how far we skip ahead
4345
// when a character from the pattern is found.
@@ -53,12 +55,12 @@ extension String {
5355
// The pattern is scanned right-to-left, so skip ahead in the string by
5456
// the length of the pattern. (Minus 1 because startIndex already points
5557
// at the first character in the source string.)
56-
var i = self.index(startIndex, offsetBy: patternLength - 1)
58+
var i = index(startIndex, offsetBy: patternLength - 1)
5759

5860
// This is a helper function that steps backwards through both strings
5961
// until we find a character that doesn’t match, or until we’ve reached
6062
// the beginning of the pattern.
61-
func backwards() -> String.Index? {
63+
func backwards() -> Index? {
6264
var q = p
6365
var j = i
6466
while q > pattern.startIndex {
@@ -70,7 +72,7 @@ extension String {
7072
}
7173

7274
// The main loop. Keep going until the end of the string is reached.
73-
while i < self.endIndex {
75+
while i < endIndex {
7476
let c = self[i]
7577

7678
// Does the current character match the last character from the pattern?
@@ -157,12 +159,14 @@ Here's an implementation of the Boyer-Moore-Horspool algorithm:
157159

158160
```swift
159161
extension String {
160-
func indexOf(pattern: String) -> String.Index? {
162+
func index(of pattern: String) -> Index? {
163+
// There are no possible match in an empty string
164+
guard !isEmpty else { return nil }
165+
161166
// Cache the length of the search pattern because we're going to
162167
// use it a few times and it's expensive to calculate.
163168
let patternLength = pattern.characters.count
164-
assert(patternLength > 0)
165-
assert(patternLength <= self.characters.count)
169+
guard patternLength > 0, patternLength <= characters.count else { return nil }
166170

167171
// Make the skip table. This table determines how far we skip ahead
168172
// when a character from the pattern is found.
@@ -178,12 +182,12 @@ extension String {
178182
// The pattern is scanned right-to-left, so skip ahead in the string by
179183
// the length of the pattern. (Minus 1 because startIndex already points
180184
// at the first character in the source string.)
181-
var i = self.index(startIndex, offsetBy: patternLength - 1)
185+
var i = index(startIndex, offsetBy: patternLength - 1)
182186

183187
// This is a helper function that steps backwards through both strings
184188
// until we find a character that doesn’t match, or until we’ve reached
185189
// the beginning of the pattern.
186-
func backwards() -> String.Index? {
190+
func backwards() -> Index? {
187191
var q = p
188192
var j = i
189193
while q > pattern.startIndex {
@@ -195,7 +199,7 @@ extension String {
195199
}
196200

197201
// The main loop. Keep going until the end of the string is reached.
198-
while i < self.endIndex {
202+
while i < endIndex {
199203
let c = self[i]
200204

201205
// Does the current character match the last character from the pattern?

Boyer-Moore/Tests/BoyerMooreTests.swift

+20-2
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ class BoyerMooreTest: XCTestCase {
1717
}
1818

1919
func assert(pattern: String, doesNotExistsIn string: String) {
20-
let index = string.indexOf(pattern: pattern, useHorspoolImprovement: useHorspoolImprovement)
20+
let index = string.index(of: pattern, usingHorspoolImprovement: useHorspoolImprovement)
2121
XCTAssertNil(index)
2222
}
2323

2424
func assert(pattern: String, existsIn string: String) {
25-
let index = string.indexOf(pattern: pattern, useHorspoolImprovement: useHorspoolImprovement)
25+
let index = string.index(of: pattern, usingHorspoolImprovement: useHorspoolImprovement)
2626
XCTAssertNotNil(index)
2727

2828
let startIndex = index!
@@ -31,6 +31,24 @@ class BoyerMooreTest: XCTestCase {
3131
XCTAssertEqual(match, pattern)
3232
}
3333

34+
func testSearchPatternInEmptyString() {
35+
let string = ""
36+
let pattern = "ABCDEF"
37+
assert(pattern: pattern, doesNotExistsIn: string)
38+
}
39+
40+
func testSearchEmptyPatternString() {
41+
let string = "ABCDEF"
42+
let pattern = ""
43+
assert(pattern: pattern, doesNotExistsIn: string)
44+
}
45+
46+
func testSearchPatternLongerThanString() {
47+
let string = "ABC"
48+
let pattern = "ABCDEF"
49+
assert(pattern: pattern, doesNotExistsIn: string)
50+
}
51+
3452
func testSearchTheSameString() {
3553
let string = "ABCDEF"
3654
let pattern = "ABCDEF"

0 commit comments

Comments
 (0)