Skip to content

Commit fa0067d

Browse files
committed
merges pull request by twmb/diff@c86783d24e76297f7a20a548c4e7ec4e9e93f9fb with minor changes
2 parents 55a6d30 + 2389af5 commit fa0067d

File tree

4 files changed

+171
-33
lines changed

4 files changed

+171
-33
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ The algorithm is described by Eugene Myers in
88

99
Example
1010
-------
11-
You can use diff.Ints and diff.Runes
11+
You can use diff.Ints, diff.Runes, diff.Strings, and diff.Bytes
1212

1313
diff.Runes([]rune("sögen"), []rune("mögen")) // returns []Changes{{0,0,1,1}}
1414

@@ -27,4 +27,8 @@ and call
2727
m := &MixedInput{..}
2828
diff.Diff(len(m.A), len(m.B), m)
2929

30+
Also has granularity functions to merge changes that are close by.
31+
32+
diff.GranularStrings("emtire", "umpire", 1) // returns []Changes{{0,0,3,3}}
33+
3034
Documentation at http://godoc.org/github.com/mb0/diff

diff.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ type Data interface {
1313
Equal(i, j int) bool
1414
}
1515

16+
// ByteStrings returns the differences of two strings in bytes.
17+
func ByteStrings(a, b string) []Change {
18+
return Diff(len(a), len(b), &strings{a, b})
19+
}
20+
21+
type strings struct{ a, b string }
22+
23+
func (d *strings) Equal(i, j int) bool { return d.a[i] == d.b[j] }
24+
1625
// Bytes returns the difference of two byte slices
1726
func Bytes(a, b []byte) []Change {
1827
return Diff(len(a), len(b), &bytes{a, b})
@@ -40,6 +49,31 @@ type runes struct{ a, b []rune }
4049

4150
func (d *runes) Equal(i, j int) bool { return d.a[i] == d.b[j] }
4251

52+
// Granular merges neighboring changes smaller than the specified granularity.
53+
// The changes must be ordered by ascending positions as returned by this package.
54+
func Granular(granularity int, changes []Change) []Change {
55+
if len(changes) == 0 {
56+
return changes
57+
}
58+
gap := 0
59+
for i := 1; i < len(changes); i++ {
60+
curr := changes[i]
61+
prev := changes[i-gap-1]
62+
// same as curr.B-(prev.B+prev.Ins); consistency is key
63+
if curr.A-(prev.A+prev.Del) <= granularity {
64+
// merge changes:
65+
curr = Change{
66+
A: prev.A, B: prev.B, // start at same spot
67+
Del: curr.A - prev.A + curr.Del, // from first to end of second
68+
Ins: curr.B - prev.B + curr.Ins, // from first to end of second
69+
}
70+
gap++
71+
}
72+
changes[i-gap] = curr
73+
}
74+
return changes[:len(changes)-gap : len(changes)-gap]
75+
}
76+
4377
// Diff returns the differences of data.
4478
// data.Equal is called repeatedly with 0<=i<n and 0<=j<m
4579
func Diff(n, m int, data Data) []Change {

diff_test.go

Lines changed: 105 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,63 +2,64 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
package diff
5+
package diff_test
66

77
import (
8+
"github.com/mb0/diff"
89
"testing"
910
)
1011

1112
type testcase struct {
1213
name string
1314
a, b []int
14-
res []Change
15+
res []diff.Change
1516
}
1617

1718
var tests = []testcase{
1819
{"shift",
1920
[]int{1, 2, 3},
2021
[]int{0, 1, 2, 3},
21-
[]Change{{0, 0, 0, 1}},
22+
[]diff.Change{{0, 0, 0, 1}},
2223
},
2324
{"push",
2425
[]int{1, 2, 3},
2526
[]int{1, 2, 3, 4},
26-
[]Change{{3, 3, 0, 1}},
27+
[]diff.Change{{3, 3, 0, 1}},
2728
},
2829
{"unshift",
2930
[]int{0, 1, 2, 3},
3031
[]int{1, 2, 3},
31-
[]Change{{0, 0, 1, 0}},
32+
[]diff.Change{{0, 0, 1, 0}},
3233
},
3334
{"pop",
3435
[]int{1, 2, 3, 4},
3536
[]int{1, 2, 3},
36-
[]Change{{3, 3, 1, 0}},
37+
[]diff.Change{{3, 3, 1, 0}},
3738
},
3839
{"all changed",
3940
[]int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
4041
[]int{10, 11, 12, 13, 14},
41-
[]Change{
42+
[]diff.Change{
4243
{0, 0, 10, 5},
4344
},
4445
},
4546
{"all same",
4647
[]int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
4748
[]int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
48-
[]Change{},
49+
[]diff.Change{},
4950
},
5051
{"wrap",
5152
[]int{1},
5253
[]int{0, 1, 2, 3},
53-
[]Change{
54+
[]diff.Change{
5455
{0, 0, 0, 1},
5556
{1, 2, 0, 2},
5657
},
5758
},
5859
{"snake",
5960
[]int{0, 1, 2, 3, 4, 5},
6061
[]int{1, 2, 3, 4, 5, 6},
61-
[]Change{
62+
[]diff.Change{
6263
{0, 0, 1, 0},
6364
{6, 5, 0, 1},
6465
},
@@ -69,7 +70,7 @@ var tests = []testcase{
6970
{"paper fig. 1",
7071
[]int{1, 2, 3, 1, 2, 2, 1},
7172
[]int{3, 2, 1, 2, 1, 3},
72-
[]Change{
73+
[]diff.Change{
7374
{0, 0, 1, 1},
7475
{2, 2, 1, 0},
7576
{5, 4, 1, 0},
@@ -80,7 +81,7 @@ var tests = []testcase{
8081

8182
func TestDiffAB(t *testing.T) {
8283
for _, test := range tests {
83-
res := Ints(test.a, test.b)
84+
res := diff.Ints(test.a, test.b)
8485
if len(res) != len(test.res) {
8586
t.Error(test.name, "expected length", len(test.res), "for", res)
8687
continue
@@ -95,36 +96,87 @@ func TestDiffAB(t *testing.T) {
9596

9697
func TestDiffBA(t *testing.T) {
9798
// interesting: fig.1 Diff(b, a) results in the same path as `diff -d a b`
98-
tests[len(tests)-1].res = []Change{
99+
tests[len(tests)-1].res = []diff.Change{
99100
{0, 0, 2, 0},
100101
{3, 1, 1, 0},
101102
{5, 2, 0, 1},
102103
{7, 5, 0, 1},
103104
}
104105
for _, test := range tests {
105-
res := Ints(test.b, test.a)
106+
res := diff.Ints(test.b, test.a)
106107
if len(res) != len(test.res) {
107108
t.Error(test.name, "expected length", len(test.res), "for", res)
108109
continue
109110
}
110111
for i, c := range test.res {
111112
// flip change data also
112-
rc := Change{c.B, c.A, c.Ins, c.Del}
113+
rc := diff.Change{c.B, c.A, c.Ins, c.Del}
113114
if rc != res[i] {
114115
t.Error(test.name, "expected ", rc, "got", res[i])
115116
}
116117
}
117118
}
118119
}
119120

121+
func diffsEqual(a, b []diff.Change) bool {
122+
if len(a) != len(b) {
123+
return false
124+
}
125+
for i := 0; i < len(a); i++ {
126+
if a[i] != b[i] {
127+
return false
128+
}
129+
}
130+
return true
131+
}
132+
133+
func TestGranularStrings(t *testing.T) {
134+
a := "abcdefghijklmnopqrstuvwxyza"
135+
b := "AbCdeFghiJklmnOpqrstUvwxyzab"
136+
// each iteration of i increases granularity and will absorb one more lower-letter-followed-by-upper-letters sequence
137+
changesI := [][]diff.Change{
138+
{{0, 0, 1, 1}, {2, 2, 1, 1}, {5, 5, 1, 1}, {9, 9, 1, 1}, {14, 14, 1, 1}, {20, 20, 1, 1}, {27, 27, 0, 1}},
139+
{{0, 0, 3, 3}, {5, 5, 1, 1}, {9, 9, 1, 1}, {14, 14, 1, 1}, {20, 20, 1, 1}, {27, 27, 0, 1}},
140+
{{0, 0, 6, 6}, {9, 9, 1, 1}, {14, 14, 1, 1}, {20, 20, 1, 1}, {27, 27, 0, 1}},
141+
{{0, 0, 10, 10}, {14, 14, 1, 1}, {20, 20, 1, 1}, {27, 27, 0, 1}},
142+
{{0, 0, 15, 15}, {20, 20, 1, 1}, {27, 27, 0, 1}},
143+
{{0, 0, 21, 21}, {27, 27, 0, 1}},
144+
{{0, 0, 27, 28}},
145+
}
146+
for i := 0; i < len(changesI); i++ {
147+
diffs := diff.Granular(i, diff.ByteStrings(a, b))
148+
if !diffsEqual(diffs, changesI[i]) {
149+
t.Errorf("expected %v, got %v", diffs, changesI[i])
150+
}
151+
}
152+
}
153+
120154
func TestDiffRunes(t *testing.T) {
121-
d := &runes{
122-
[]rune("brown fox jumps over the lazy dog"),
123-
[]rune("brwn faax junps ovver the lay dago"),
155+
a := []rune("brown fox jumps over the lazy dog")
156+
b := []rune("brwn faax junps ovver the lay dago")
157+
res := diff.Runes(a, b)
158+
echange := []diff.Change{
159+
{2, 2, 1, 0},
160+
{7, 6, 1, 2},
161+
{12, 12, 1, 1},
162+
{18, 18, 0, 1},
163+
{27, 28, 1, 0},
164+
{31, 31, 0, 2},
165+
{32, 34, 1, 0},
124166
}
125-
n, m := len(d.a), len(d.b)
126-
res := Diff(n, m, d)
127-
echange := []Change{
167+
for i, c := range res {
168+
t.Log(c)
169+
if c != echange[i] {
170+
t.Error("expected", echange[i], "got", c)
171+
}
172+
}
173+
}
174+
175+
func TestDiffByteStrings(t *testing.T) {
176+
a := "brown fox jumps over the lazy dog"
177+
b := "brwn faax junps ovver the lay dago"
178+
res := diff.ByteStrings(a, b)
179+
echange := []diff.Change{
128180
{2, 2, 1, 0},
129181
{7, 6, 1, 2},
130182
{12, 12, 1, 1},
@@ -141,19 +193,47 @@ func TestDiffRunes(t *testing.T) {
141193
}
142194
}
143195

196+
type ints struct{ a, b []int }
197+
198+
func (d *ints) Equal(i, j int) bool { return d.a[i] == d.b[j] }
144199
func BenchmarkDiff(b *testing.B) {
145200
t := tests[len(tests)-1]
146201
d := &ints{t.a, t.b}
147202
n, m := len(d.a), len(d.b)
148203
for i := 0; i < b.N; i++ {
149-
Diff(n, m, d)
204+
diff.Diff(n, m, d)
205+
}
206+
}
207+
208+
func BenchmarkInts(b *testing.B) {
209+
t := tests[len(tests)-1]
210+
d1 := t.a
211+
d2 := t.b
212+
for i := 0; i < b.N; i++ {
213+
diff.Ints(d1, d2)
150214
}
151215
}
152216

153217
func BenchmarkDiffRunes(b *testing.B) {
154-
d := &runes{[]rune("1231221"), []rune("321213")}
155-
n, m := len(d.a), len(d.b)
218+
d1 := []rune("1231221")
219+
d2 := []rune("321213")
220+
for i := 0; i < b.N; i++ {
221+
diff.Runes(d1, d2)
222+
}
223+
}
224+
225+
func BenchmarkDiffBytes(b *testing.B) {
226+
d1 := []byte("lorem ipsum dolor sit amet consectetur")
227+
d2 := []byte("lorem lovesum daenerys targaryen ami consecteture")
228+
for i := 0; i < b.N; i++ {
229+
diff.Bytes(d1, d2)
230+
}
231+
}
232+
233+
func BenchmarkDiffByteStrings(b *testing.B) {
234+
d1 := "lorem ipsum dolor sit amet consectetur"
235+
d2 := "lorem lovesum daenerys targaryen ami consecteture"
156236
for i := 0; i < b.N; i++ {
157-
Diff(n, m, d)
237+
diff.ByteStrings(d1, d2)
158238
}
159239
}

example_test.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,25 @@ import (
99
"github.com/mb0/diff"
1010
)
1111

12-
var names = map[string]int{
13-
"one": 1,
14-
"two": 2,
15-
"three": 3,
16-
}
17-
1812
// Diff on inputs with different representations
1913
type MixedInput struct {
2014
A []int
2115
B []string
2216
}
2317

18+
var names map[string]int
19+
2420
func (m *MixedInput) Equal(a, b int) bool {
2521
return m.A[a] == names[m.B[b]]
2622
}
2723

28-
func ExampleInterface() {
24+
func ExampleDiff() {
25+
names = map[string]int{
26+
"one": 1,
27+
"two": 2,
28+
"three": 3,
29+
}
30+
2931
m := &MixedInput{
3032
[]int{1, 2, 3, 1, 2, 2, 1},
3133
[]string{"three", "two", "one", "two", "one", "three"},
@@ -34,4 +36,22 @@ func ExampleInterface() {
3436
for _, c := range changes {
3537
fmt.Println("change at", c.A, c.B)
3638
}
39+
// Output:
40+
// change at 0 0
41+
// change at 2 2
42+
// change at 5 4
43+
// change at 7 5
44+
}
45+
46+
func ExampleGranularStrings() {
47+
a := "hElLo!"
48+
b := "hello!"
49+
changes := diff.Granular(5, diff.ByteStrings(a, b)) // ignore small gaps in differences
50+
for l := len(changes) - 1; l >= 0; l-- {
51+
change := changes[l]
52+
b = b[:change.B] + "|" + b[change.B:change.B+change.Ins] + "|" + b[change.B+change.Ins:]
53+
}
54+
fmt.Println(b)
55+
// Output:
56+
// h|ell|o!
3757
}

0 commit comments

Comments
 (0)