Skip to content
This repository was archived by the owner on May 13, 2025. It is now read-only.

Commit 338b69b

Browse files
author
Scott Clark
committed
Made json to csv converter more memory efficient
1 parent edc7966 commit 338b69b

File tree

1 file changed

+16
-17
lines changed

1 file changed

+16
-17
lines changed

json_to_csv_converter.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,26 @@
1010
import simplejson as json
1111

1212

13-
def read_file(file_path):
14-
"""Read in the json dataset file and return a list of python dicts."""
15-
file_contents = []
13+
def read_and_write_file(json_file_path, csv_file_path, column_names):
14+
"""Read in the json dataset file and write it out to a csv file, given the column names."""
15+
with open(csv_file_path, 'wb+') as fout:
16+
csv_file = csv.writer(fout)
17+
csv_file.writerow(list(column_names))
18+
with open(json_file_path) as fin:
19+
for line in fin:
20+
line_contents = json.loads(line)
21+
csv_file.writerow(get_row(line_contents, column_names))
22+
23+
def get_superset_of_column_names_from_file(json_file_path):
24+
"""Read in the json dataset file and return the superset of column names."""
1625
column_names = set()
17-
with open(file_path) as fin:
26+
with open(json_file_path) as fin:
1827
for line in fin:
1928
line_contents = json.loads(line)
2029
column_names.update(
2130
set(get_column_names(line_contents).keys())
2231
)
23-
file_contents.append(line_contents)
24-
return file_contents, column_names
32+
return column_names
2533

2634
def get_column_names(line_contents, parent_key=''):
2735
"""Return a list of flattened key names given a dict.
@@ -93,15 +101,6 @@ def get_row(line_contents, column_names):
93101
row.append('')
94102
return row
95103

96-
def write_file(file_path, file_contents, column_names):
97-
"""Create and write a csv file given file_contents of our json dataset file and column names."""
98-
csv_file = csv.writer(open('file_path', 'wb+'))
99-
with open(file_path, 'wb+') as fin:
100-
csv_file = csv.writer(fin)
101-
csv_file.writerow(list(column_names))
102-
for line_contents in file_contents:
103-
csv_file.writerow(get_row(line_contents, column_names))
104-
105104
if __name__ == '__main__':
106105
"""Convert a yelp dataset file from json to csv."""
107106

@@ -120,5 +119,5 @@ def write_file(file_path, file_contents, column_names):
120119
json_file = args.json_file
121120
csv_file = '{0}.csv'.format(json_file.split('.json')[0])
122121

123-
file_contents, column_names = read_file(json_file)
124-
write_file(csv_file, file_contents, column_names)
122+
column_names = get_superset_of_column_names_from_file(json_file)
123+
read_and_write_file(json_file, csv_file, column_names)

0 commit comments

Comments
 (0)