@@ -3,6 +3,55 @@ class CSVDiff
33 # Implements the CSV diff algorithm.
44 module Algorithm
55
6+ # Holds the details of a single difference
7+ class Diff
8+
9+ attr_accessor :diff_type
10+ attr_reader :fields
11+ attr_reader :row
12+ attr_reader :sibling_position
13+
14+ def initialize ( diff_type , fields , row_idx , pos_idx )
15+ @diff_type = diff_type
16+ @fields = fields
17+ @row = row_idx + 1
18+ self . sibling_position = pos_idx
19+ end
20+
21+
22+ def sibling_position = ( pos_idx )
23+ if pos_idx . is_a? ( Array )
24+ pos_idx . compact!
25+ if pos_idx . first != pos_idx . last
26+ @sibling_position = pos_idx . map { |pos | pos + 1 }
27+ else
28+ @sibling_position = pos_idx . first + 1
29+ end
30+ else
31+ @sibling_position = pos_idx + 1
32+ end
33+ end
34+
35+
36+ # For backwards compatibility and access to fields with differences
37+ def []( key )
38+ case key
39+ when String
40+ @fields [ key ]
41+ when :action
42+ a = diff_type . to_s
43+ a [ 0 ] = a [ 0 ] . upcase
44+ a
45+ when :row
46+ @row
47+ when :sibling_position
48+ @sibling_position
49+ end
50+ end
51+
52+ end
53+
54+
655 # Diffs two CSVSource structures.
756 #
857 # @param left [CSVSource] A CSVSource object containing the contents of
@@ -33,74 +82,83 @@ def diff_sources(left, right, key_fields, diff_fields, options = {})
3382 right_index = right . index
3483 right_values = right . lines
3584 right_keys = right_values . keys
36- parent_fields = left . parent_fields . length
85+ parent_field_count = left . parent_fields . length
3786
3887 include_adds = !options [ :ignore_adds ]
3988 include_moves = !options [ :ignore_moves ]
4089 include_updates = !options [ :ignore_updates ]
4190 include_deletes = !options [ :ignore_deletes ]
4291
43- diffs = Hash . new { |h , k | h [ k ] = { } }
92+ diffs = { }
93+ potential_moves = Hash . new { |h , k | h [ k ] = [ ] }
4494
4595 # First identify deletions
4696 if include_deletes
4797 ( left_keys - right_keys ) . each do |key |
4898 # Delete
4999 key_vals = key . split ( '~' , -1 )
50- parent = key_vals [ 0 ...parent_fields ] . join ( '~' )
100+ parent = key_vals [ 0 ...parent_field_count ] . join ( '~' )
101+ child = key_vals [ parent_field_count ..-1 ] . join ( '~' )
51102 left_parent = left_index [ parent ]
52103 left_value = left_values [ key ]
53- left_idx = left_parent . index ( key )
54- next unless left_idx
55- id = { }
56- id [ :row ] = left_keys . index ( key ) + 1
57- id [ :sibling_position ] = left_idx + 1
58- key_fields . each do |field_name |
59- id [ field_name ] = left_value [ field_name ]
60- end
61- diffs [ key ] . merge! ( id . merge ( left_values [ key ] . merge ( :action => 'Delete' ) ) )
104+ row_idx = left_keys . index ( key )
105+ sib_idx = left_parent . index ( key )
106+ raise "Can't locate key #{ key } in parent #{ parent } " unless sib_idx
107+ diffs [ key ] = d = Diff . new ( :delete , left_value , row_idx , sib_idx )
108+ potential_moves [ child ] << key
62109 #puts "Delete: #{key}"
63110 end
64111 end
65112
66113 # Now identify adds/updates
67114 right_keys . each_with_index do |key , right_row_id |
68115 key_vals = key . split ( '~' , -1 )
69- parent = key_vals [ 0 ...parent_fields ] . join ( '~' )
116+ parent = key_vals [ 0 ...parent_field_count ] . join ( '~' )
70117 left_parent = left_index [ parent ]
71118 right_parent = right_index [ parent ]
72119 left_value = left_values [ key ]
73120 right_value = right_values [ key ]
74121 left_idx = left_parent && left_parent . index ( key )
75122 right_idx = right_parent && right_parent . index ( key )
76123
77- id = { }
78- id [ :row ] = right_row_id + 1
79- id [ :sibling_position ] = right_idx + 1
80- key_fields . each do |field_name |
81- id [ field_name ] = right_value [ field_name ]
82- end
83124 if left_idx && right_idx
125+ if include_updates && ( changes = diff_row ( left_value , right_value , diff_fields , case_sensitive ) )
126+ id = id_fields ( key_fields , right_value )
127+ diffs [ key ] = Diff . new ( :update , id . merge! ( changes ) , right_row_id , right_idx )
128+ #puts "Change: #{key}"
129+ end
84130 if include_moves
85131 left_common = left_parent & right_parent
86132 right_common = right_parent & left_parent
87133 left_pos = left_common . index ( key )
88134 right_pos = right_common . index ( key )
89135 if left_pos != right_pos
90136 # Move
91- diffs [ key ] . merge! ( id . merge! ( :action => 'Move' ,
92- :sibling_position => [ left_idx + 1 , right_idx + 1 ] ) )
137+ if d = diffs [ key ]
138+ d . sibling_position = [ left_idx , right_idx ]
139+ else
140+ id = id_fields ( key_fields , right_value )
141+ diffs [ key ] = Diff . new ( :move , id , right_row_id , [ left_idx , right_idx ] )
142+ end
93143 #puts "Move #{left_idx} -> #{right_idx}: #{key}"
94144 end
95145 end
96- if include_updates && ( changes = diff_row ( left_value , right_value , diff_fields , case_sensitive ) )
97- diffs [ key ] . merge! ( id . merge ( changes . merge ( :action => 'Update' ) ) )
98- #puts "Change: #{key}"
99- end
100- elsif include_adds && right_idx
146+ elsif right_idx
101147 # Add
102- diffs [ key ] . merge! ( id . merge ( right_values [ key ] . merge ( :action => 'Add' ) ) )
103- #puts "Add: #{key}"
148+ child = key_vals [ parent_field_count ..-1 ] . join ( '~' )
149+ if potential_moves . has_key? ( child ) && old_key = potential_moves [ child ] . pop
150+ diffs . delete ( old_key )
151+ if include_updates
152+ left_value = left_values [ old_key ]
153+ id = id_fields ( right . child_fields , right_value )
154+ changes = diff_row ( left_value , right_value , left . parent_fields + diff_fields , case_sensitive )
155+ diffs [ key ] = Diff . new ( :update , id . merge! ( changes ) , right_row_id , right_idx )
156+ #puts "Update Parent: #{key}"
157+ end
158+ elsif include_adds
159+ diffs [ key ] = Diff . new ( :add , right_value , right_row_id , right_idx )
160+ #puts "Add: #{key}"
161+ end
104162 end
105163 end
106164
@@ -137,6 +195,19 @@ def diff_row(left_row, right_row, fields, case_sensitive)
137195 diffs if diffs . size > 0
138196 end
139197
198+
199+ private
200+
201+
202+ # Return a hash containing just the key field values
203+ def id_fields ( key_fields , fields )
204+ id = { }
205+ key_fields . each do |field_name |
206+ id [ field_name ] = fields [ field_name ]
207+ end
208+ id
209+ end
210+
140211 end
141212
142213end
0 commit comments