|
1 | 1 | //: Playground - noun: a place where people can play
|
2 | 2 |
|
| 3 | +/* |
| 4 | + Boyer-Moore string search |
| 5 | + |
| 6 | + This code is based on the article "Faster String Searches" by Costas Menico |
| 7 | + from Dr Dobb's magazine, July 1989. |
| 8 | + http://www.drdobbs.com/database/faster-string-searches/184408171 |
| 9 | +*/ |
3 | 10 | extension String {
|
4 |
| - func indexOf(pattern: String) -> String.Index? { |
5 |
| - // Cache the length of the search pattern because we're going to |
6 |
| - // use it a few times and it's expensive to calculate. |
7 |
| - let patternLength = pattern.characters.count |
8 |
| - assert(patternLength > 0) |
9 |
| - assert(patternLength <= characters.count) |
10 |
| - |
11 |
| - // Make the skip table. This table determines how far we skip ahead |
12 |
| - // when a character from the pattern is found. |
13 |
| - var skipTable = [Character: Int]() |
14 |
| - for (i, c) in pattern.characters.enumerated() { |
15 |
| - skipTable[c] = patternLength - i - 1 |
16 |
| - } |
17 |
| - |
18 |
| - // This points at the last character in the pattern. |
19 |
| - let p = pattern.index(before: pattern.endIndex) |
20 |
| - let lastChar = pattern[p] |
21 |
| - |
22 |
| - // The pattern is scanned right-to-left, so skip ahead in the string by |
23 |
| - // the length of the pattern. (Minus 1 because startIndex already points |
24 |
| - // at the first character in the source string.) |
25 |
| - var i = index(startIndex, offsetBy: patternLength - 1) |
26 |
| - |
27 |
| - // This is a helper function that steps backwards through both strings |
28 |
| - // until we find a character that doesn’t match, or until we’ve reached |
29 |
| - // the beginning of the pattern. |
30 |
| - func backwards() -> String.Index? { |
31 |
| - var q = p |
32 |
| - var j = i |
33 |
| - while q > pattern.startIndex { |
34 |
| - j = index(before: j) |
35 |
| - q = index(before: q) |
36 |
| - if self[j] != pattern[q] { return nil } |
37 |
| - } |
38 |
| - return j |
39 |
| - } |
40 |
| - |
41 |
| - // The main loop. Keep going until the end of the string is reached. |
42 |
| - while i < endIndex { |
43 |
| - let c = self[i] |
44 |
| - |
45 |
| - // Does the current character match the last character from the pattern? |
46 |
| - if c == lastChar { |
47 |
| - |
48 |
| - // There is a possible match. Do a brute-force search backwards. |
49 |
| - if let k = backwards() { return k } |
50 |
| - |
51 |
| - // If no match, we can only safely skip one character ahead. |
52 |
| - i = index(after: i) |
53 |
| - } else { |
54 |
| - // The characters are not equal, so skip ahead. The amount to skip is |
55 |
| - // determined by the skip table. If the character is not present in the |
56 |
| - // pattern, we can skip ahead by the full pattern length. However, if |
57 |
| - // the character *is* present in the pattern, there may be a match up |
58 |
| - // ahead and we can't skip as far. |
59 |
| - i = index(i, offsetBy: skipTable[c] ?? patternLength) |
60 |
| - } |
| 11 | + func index(of pattern: String, usingHorspoolImprovement: Bool = false) -> Index? { |
| 12 | + // There are no possible match in an empty string |
| 13 | + guard !isEmpty else { return nil } |
| 14 | + |
| 15 | + // Cache the length of the search pattern because we're going to |
| 16 | + // use it a few times and it's expensive to calculate. |
| 17 | + let patternLength = pattern.characters.count |
| 18 | + guard patternLength > 0, patternLength <= characters.count else { return nil } |
| 19 | + |
| 20 | + // Make the skip table. This table determines how far we skip ahead |
| 21 | + // when a character from the pattern is found. |
| 22 | + var skipTable = [Character: Int]() |
| 23 | + for (i, c) in pattern.characters.enumerated() { |
| 24 | + skipTable[c] = patternLength - i - 1 |
| 25 | + } |
| 26 | + |
| 27 | + // This points at the last character in the pattern. |
| 28 | + let p = pattern.index(before: pattern.endIndex) |
| 29 | + let lastChar = pattern[p] |
| 30 | + |
| 31 | + // The pattern is scanned right-to-left, so skip ahead in the string by |
| 32 | + // the length of the pattern. (Minus 1 because startIndex already points |
| 33 | + // at the first character in the source string.) |
| 34 | + var i = index(startIndex, offsetBy: patternLength - 1) |
| 35 | + |
| 36 | + // This is a helper function that steps backwards through both strings |
| 37 | + // until we find a character that doesn’t match, or until we’ve reached |
| 38 | + // the beginning of the pattern. |
| 39 | + func backwards() -> Index? { |
| 40 | + var q = p |
| 41 | + var j = i |
| 42 | + while q > pattern.startIndex { |
| 43 | + j = index(before: j) |
| 44 | + q = index(before: q) |
| 45 | + if self[j] != pattern[q] { return nil } |
| 46 | + } |
| 47 | + return j |
| 48 | + } |
| 49 | + |
| 50 | + // The main loop. Keep going until the end of the string is reached. |
| 51 | + while i < endIndex { |
| 52 | + let c = self[i] |
| 53 | + |
| 54 | + // Does the current character match the last character from the pattern? |
| 55 | + if c == lastChar { |
| 56 | + |
| 57 | + // There is a possible match. Do a brute-force search backwards. |
| 58 | + if let k = backwards() { return k } |
| 59 | + |
| 60 | + if !usingHorspoolImprovement { |
| 61 | + // If no match, we can only safely skip one character ahead. |
| 62 | + i = index(after: i) |
| 63 | + } else { |
| 64 | + // Ensure to jump at least one character (this is needed because the first |
| 65 | + // character is in the skipTable, and `skipTable[lastChar] = 0`) |
| 66 | + let jumpOffset = max(skipTable[c] ?? patternLength, 1) |
| 67 | + i = index(i, offsetBy: jumpOffset, limitedBy: endIndex) ?? endIndex |
| 68 | + } |
| 69 | + } else { |
| 70 | + // The characters are not equal, so skip ahead. The amount to skip is |
| 71 | + // determined by the skip table. If the character is not present in the |
| 72 | + // pattern, we can skip ahead by the full pattern length. However, if |
| 73 | + // the character *is* present in the pattern, there may be a match up |
| 74 | + // ahead and we can't skip as far. |
| 75 | + i = index(i, offsetBy: skipTable[c] ?? patternLength, limitedBy: endIndex) ?? endIndex |
| 76 | + } |
| 77 | + } |
| 78 | + return nil |
61 | 79 | }
|
62 |
| - return nil |
63 |
| - } |
64 | 80 | }
|
65 | 81 |
|
66 | 82 | // A few simple tests
|
67 | 83 |
|
68 |
| -let s = "Hello, World" |
69 |
| -s.indexOf(pattern: "World") // 7 |
| 84 | +let str = "Hello, World" |
| 85 | +str.index(of: "World") // 7 |
70 | 86 |
|
71 | 87 | let animals = "🐶🐔🐷🐮🐱"
|
72 |
| -animals.indexOf(pattern: "🐮") // 6 |
| 88 | +animals.index(of: "🐮") // 6 |
| 89 | + |
| 90 | +let lorem = "Lorem ipsum dolor sit amet" |
| 91 | +lorem.index(of: "sit", usingHorspoolImprovement: true) // 18 |
0 commit comments