Skip to content

Commit c5c3339

Browse files
committed
Optional flags allow to convert python NaN to null in JSON writer
1 parent 1943eef commit c5c3339

File tree

3 files changed

+36
-2
lines changed

3 files changed

+36
-2
lines changed

mapreduceutils/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ def record_map(data_record):
310310
ctx = context.get()
311311
property_map = ctx.mapreduce_spec.mapper.params.get('property_map')
312312
output_format = ctx.mapreduce_spec.mapper.params.get('output_format', 'JSON')
313+
writer_args = ctx.mapreduce_spec.mapper.params.get('writer_args', dict())
313314

314315
record = MapperRecord.create(data_record)
315316
if record:
@@ -324,7 +325,7 @@ def record_map(data_record):
324325
writer = OutputWriter.get_writer(output_format)
325326
if 'mapper_key_spec' in map_rule:
326327
key = record.mapper_key(map_rule.get('mapper_key_spec'))
327-
data = writer.write(row)
328+
data = writer.write(row, **writer_args)
328329
#logging.warn("Mapper pre yield MR: {}:{}".format(key, data))
329330
yield (key, data)
330331
else:

mapreduceutils/tests/test_writers.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python
22
from collections import OrderedDict
3+
import datetime
34
from mapreduceutils.writers import OutputWriter
45
import unittest
56

@@ -66,3 +67,21 @@ def test_escaped_output(self):
6667
'}\r\n'
6768
)
6869
self.assertEqual(expected, out)
70+
71+
def test_nan_as_null_output(self):
72+
record = OrderedDict([
73+
("sprop_abc", float('NaN')),
74+
("sprop_bcd", 'test "message"'),
75+
("sprop_cde", datetime.datetime(2014, 8, 1, 0, 0, 0))
76+
])
77+
writer = OutputWriter.get_writer('json')
78+
out = writer.write(record, nan_to_null=True)
79+
80+
expected = (
81+
'{'
82+
'"sprop_cde": "2014-08-01 00:00:00", '
83+
'"sprop_bcd": "test \\"message\\"", '
84+
'"sprop_abc": null'
85+
'}\r\n'
86+
)
87+
self.assertEqual(expected, out)

mapreduceutils/writers.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from cStringIO import StringIO
33
import datetime
44
from json import JSONEncoder
5+
import math
56

67

78
class OutputWriter:
@@ -63,5 +64,18 @@ def write(cls, data_obj):
6364

6465
class JSONWriter(OutputWriter):
6566
@classmethod
66-
def write(cls, data_obj):
67+
def write(cls, data_obj, nan_to_null=False):
68+
"""
69+
Encodes given python object to JSON
70+
71+
Args:
72+
data_obj: Python object to encode to JSON
73+
nan_to_null: (Bool) If True NaN values will be converted to None
74+
before encoding to JSON
75+
Returns:
76+
JSON string
77+
"""
78+
if nan_to_null:
79+
data_obj = {k: None if isinstance(v, float) and math.isnan(v) else v
80+
for k, v in data_obj.items()}
6781
return "{}\r\n".format(MapperJSONEncoder().encode(data_obj))

0 commit comments

Comments
 (0)