1010import simplejson as json
1111
1212
13- def read_file (file_path ):
14- """Read in the json dataset file and return a list of python dicts."""
15- file_contents = []
13+ def read_and_write_file (json_file_path , csv_file_path , column_names ):
14+ """Read in the json dataset file and write it out to a csv file, given the column names."""
15+ with open (csv_file_path , 'wb+' ) as fout :
16+ csv_file = csv .writer (fout )
17+ csv_file .writerow (list (column_names ))
18+ with open (json_file_path ) as fin :
19+ for line in fin :
20+ line_contents = json .loads (line )
21+ csv_file .writerow (get_row (line_contents , column_names ))
22+
23+ def get_superset_of_column_names_from_file (json_file_path ):
24+ """Read in the json dataset file and return the superset of column names."""
1625 column_names = set ()
17- with open (file_path ) as fin :
26+ with open (json_file_path ) as fin :
1827 for line in fin :
1928 line_contents = json .loads (line )
2029 column_names .update (
2130 set (get_column_names (line_contents ).keys ())
2231 )
23- file_contents .append (line_contents )
24- return file_contents , column_names
32+ return column_names
2533
2634def get_column_names (line_contents , parent_key = '' ):
2735 """Return a list of flattened key names given a dict.
@@ -93,15 +101,6 @@ def get_row(line_contents, column_names):
93101 row .append ('' )
94102 return row
95103
96- def write_file (file_path , file_contents , column_names ):
97- """Create and write a csv file given file_contents of our json dataset file and column names."""
98- csv_file = csv .writer (open ('file_path' , 'wb+' ))
99- with open (file_path , 'wb+' ) as fin :
100- csv_file = csv .writer (fin )
101- csv_file .writerow (list (column_names ))
102- for line_contents in file_contents :
103- csv_file .writerow (get_row (line_contents , column_names ))
104-
105104if __name__ == '__main__' :
106105 """Convert a yelp dataset file from json to csv."""
107106
@@ -120,5 +119,5 @@ def write_file(file_path, file_contents, column_names):
120119 json_file = args .json_file
121120 csv_file = '{0}.csv' .format (json_file .split ('.json' )[0 ])
122121
123- file_contents , column_names = read_file (json_file )
124- write_file ( csv_file , file_contents , column_names )
122+ column_names = get_superset_of_column_names_from_file (json_file )
123+ read_and_write_file ( json_file , csv_file , column_names )
0 commit comments