11#!/usr/bin/env python3
22import argparse
33import json
4+ import os
5+ import re
46import sys
57from pathlib import Path
68from typing import Literal
1113Status = Literal ["OK" , "BAD" , "FILE_ERROR" ]
1214
1315
16+ def parse_accessor (accessor_string : str ) -> list [str | int ]:
17+ """
18+ Parses a field accessor string like "['key'][0]" into a list ['key', 0].
19+ This allows for programmatic access to nested JSON elements.
20+ """
21+ # Regex to find content within brackets, e.g., ['key'] or [0]
22+ parts = re .findall (r"\[([^\]]+)\]" , accessor_string )
23+ keys = []
24+ for part in parts :
25+ try :
26+ # Try to convert to an integer for list indices
27+ keys .append (int (part ))
28+ except ValueError :
29+ # Otherwise, it's a string key; strip surrounding quotes
30+ keys .append (part .strip ("'\" " ))
31+ return keys
32+
33+
34+ def delete_path (data : dict | list , path : list [str | int ]):
35+ """
36+ Deletes a value from a nested dictionary or list based on a path.
37+ This function modifies the data in place. If the path is invalid
38+ or doesn't exist, it does nothing.
39+ """
40+ if not path :
41+ return
42+
43+ # Traverse to the parent of the target element to delete it
44+ parent = data
45+ key_to_delete = path [- 1 ]
46+ path_to_parent = path [:- 1 ]
47+
48+ try :
49+ for key in path_to_parent :
50+ parent = parent [key ]
51+
52+ # Check if the final key/index exists in the parent before deleting
53+ if isinstance (parent , dict ) and key_to_delete in parent :
54+ del parent [key_to_delete ]
55+ elif (
56+ isinstance (parent , list )
57+ and isinstance (key_to_delete , int )
58+ and 0 <= key_to_delete < len (parent )
59+ ):
60+ del parent [key_to_delete ]
61+ except (KeyError , IndexError , TypeError ):
62+ # Path is invalid (e.g., key missing, index out of bounds). Ignore and proceed.
63+ pass
64+
65+
1466def format_diff_custom (diff : DeepDiff ) -> str :
1567 """
1668 Formats a DeepDiff object into a custom human-readable string.
@@ -65,27 +117,39 @@ def format_value(value):
65117 return "\n " .join (output )
66118
67119
68- def compare_json_files (file1_path : Path , file2_path : Path ) -> Status :
120+ def compare_json_files (
121+ file1_path : Path , file2_path : Path , ignore_fields : list [str ] | None = None
122+ ) -> tuple [Status , DeepDiff | None ]:
69123 """
70- Compares the content of two JSON files without printing output .
124+ Compares two JSON files, optionally ignoring specified fields .
71125
72126 Returns:
73- "OK" if they match, "BAD" if they don't, "FILE_ERROR" on read/parse error.
127+ A tuple containing the status ("OK", "BAD", "FILE_ERROR")
128+ and the DeepDiff object if differences were found.
74129 """
75130 try :
76131 with open (file1_path , "r" , encoding = "utf-8" ) as f1 :
77132 json1 = json .load (f1 )
78133 with open (file2_path , "r" , encoding = "utf-8" ) as f2 :
79134 json2 = json .load (f2 )
80135 except (FileNotFoundError , json .JSONDecodeError ):
81- return "FILE_ERROR"
136+ return "FILE_ERROR" , None
137+
138+ # Delete ignored fields from both JSON objects before comparison
139+ if ignore_fields :
140+ for field_accessor in ignore_fields :
141+ path = parse_accessor (field_accessor )
142+ delete_path (json1 , path )
143+ delete_path (json2 , path )
82144
83145 diff = DeepDiff (json1 , json2 , ignore_order = True )
84146
85- return "BAD" if diff else "OK"
147+ return ( "BAD" , diff ) if diff else ( "OK" , None )
86148
87149
88- def process_directory_comparison (old_dir : Path , new_dir : Path ) -> bool :
150+ def process_directory_comparison (
151+ old_dir : Path , new_dir : Path , ignore_fields : list [str ] | None = None
152+ ) -> bool :
89153 """
90154 Compares JSON files across two directories and prints results in a list format.
91155 """
@@ -94,7 +158,9 @@ def process_directory_comparison(old_dir: Path, new_dir: Path) -> bool:
94158 new_files = {p .name for p in new_dir .glob ("*.json" )}
95159
96160 for filename in sorted (old_files .intersection (new_files )):
97- status = compare_json_files (old_dir / filename , new_dir / filename )
161+ status , _ = compare_json_files (
162+ old_dir / filename , new_dir / filename , ignore_fields
163+ )
98164 results ["BAD" if status != "OK" else "OK" ].append (filename )
99165
100166 for filename in sorted (old_files - new_files ):
@@ -125,8 +191,25 @@ def main():
125191 parser .add_argument (
126192 "path2" , type = Path , help = "Path to the second file or 'new' directory."
127193 )
194+ parser .add_argument (
195+ "-i" ,
196+ "--ignore" ,
197+ action = "append" ,
198+ default = [],
199+ help = "Field to ignore, as an accessor string. Can be used multiple times. "
200+ "Also reads whitespace-separated values from $DIFFJSON_IGNORE. "
201+ "Example: -i \" ['metadata']['timestamp']\" " ,
202+ )
128203 args = parser .parse_args ()
129204
205+ # --- Combine ignore fields from CLI and environment variable ---
206+ cli_ignore_fields = args .ignore
207+ env_ignore_str = os .environ .get ("DIFFJSON_IGNORE" , "" )
208+ env_ignore_fields = env_ignore_str .split () if env_ignore_str else []
209+
210+ # Combine both sources and remove duplicates
211+ all_ignore_fields = list (set (cli_ignore_fields + env_ignore_fields ))
212+
130213 path1 , path2 = args .path1 , args .path2
131214
132215 if not path1 .exists () or not path2 .exists ():
@@ -139,7 +222,7 @@ def main():
139222 # --- Handle Directory Comparison ---
140223 if path1 .is_dir () and path2 .is_dir ():
141224 print (f"Comparing directories:\n - Old: { path1 } \n - New: { path2 } \n " )
142- if process_directory_comparison (path1 , path2 ):
225+ if process_directory_comparison (path1 , path2 , all_ignore_fields ):
143226 print ("\n Comparison finished with errors." , file = sys .stderr )
144227 return 1
145228 else :
@@ -148,23 +231,17 @@ def main():
148231
149232 # --- Handle Single File Comparison ---
150233 elif path1 .is_file () and path2 .is_file ():
151- try :
152- with open (path1 , "r" , encoding = "utf-8" ) as f1 :
153- json1 = json .load (f1 )
154- with open (path2 , "r" , encoding = "utf-8" ) as f2 :
155- json2 = json .load (f2 )
156- except (FileNotFoundError , json .JSONDecodeError ) as e :
157- print (f"Error reading or parsing file: { e } " , file = sys .stderr )
158- return 1
234+ status , diff = compare_json_files (path1 , path2 , all_ignore_fields )
159235
160- diff = DeepDiff (json1 , json2 , ignore_order = True )
236+ if status == "FILE_ERROR" :
237+ print ("Error reading or parsing a file." , file = sys .stderr )
238+ return 1
161239
162- if diff :
240+ if status == "BAD" and diff :
163241 print (
164242 f"Differences found between '{ path1 .name } ' and '{ path2 .name } ':\n " ,
165243 file = sys .stderr ,
166244 )
167- # Format the diff into a custom readable format and print to stderr
168245 custom_output = format_diff_custom (diff )
169246 print (custom_output , file = sys .stderr )
170247 return 1
0 commit comments