Skip to content

Commit 7a7c676

Browse files
committed
Improve type detection for pandas.Series serialization
1 parent e66644a commit 7a7c676

File tree

4 files changed

+52
-31
lines changed

4 files changed

+52
-31
lines changed

CHANGELOG.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
------------------------------------------------------------------------------
2-
qPython 1.0.0 [2015.04.08]
2+
qPython 1.0.0 [2015.04.10]
33
------------------------------------------------------------------------------
44

5+
- Improve type detection for pandas.Series serialization
56
- Fix: bug during handshake with blank username/password
67

78
------------------------------------------------------------------------------

qpython/_pandas.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,10 @@ def _write_pandas_series(self, data, qtype = None):
147147
if qtype is None:
148148
# determinate type based on first element of the numpy array
149149
qtype = Q_TYPE.get(type(data[0]), QGENERAL_LIST)
150+
151+
if qtype == QSTRING:
152+
# assume we have a generic list of strings -> force representation as symbol list
153+
qtype = QSYMBOL
150154

151155
if qtype is None:
152156
raise QWriterException('Unable to serialize pandas series %s' % data)

tests/pandas_test.py

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,17 @@
163163
('("quick"; "brown"; "fox"; "jumps"; "over"; "a lazy"; "dog")',
164164
{'data': pandas.Series(['quick', 'brown', 'fox', 'jumps', 'over', 'a lazy', 'dog']),
165165
'meta': MetaData(qtype = QSTRING_LIST) }),
166+
('`the`quick`brown`fox', ({'data': pandas.Series(numpy.array(['the', 'quick', 'brown', 'fox'])),
167+
'meta': MetaData(qtype = QSYMBOL_LIST) },
168+
{'data': pandas.Series(['the', 'quick', 'brown', 'fox']),
169+
'meta': MetaData(qtype = QSYMBOL_LIST) },
170+
pandas.Series(['the', 'quick', 'brown', 'fox'])
171+
)),
172+
('flip `name`iq!(`Dent`Beeblebrox`Prefect;98 42 126)',
173+
pandas.DataFrame(OrderedDict((('name', pandas.Series(['Dent', 'Beeblebrox', 'Prefect'])),
174+
('iq', pandas.Series(numpy.array([98, 42, 126], dtype = numpy.int64))),
175+
))),
176+
),
166177
))
167178

168179
def arrays_equal(left, right):
@@ -203,7 +214,7 @@ def compare(left, right):
203214
for c in left:
204215
if not arrays_equal(left[c], right[c]):
205216
return False
206-
217+
207218
return True
208219
elif type(left) == QFunction:
209220
return type(right) == QFunction
@@ -226,7 +237,7 @@ def init():
226237
break
227238

228239
BINARY[query] = binary
229-
240+
230241

231242
def test_reading_pandas():
232243
print('Deserialization (pandas)')
@@ -263,9 +274,9 @@ def test_reading_pandas():
263274

264275
def test_writing_pandas():
265276
w = qwriter.QWriter(None, 3)
266-
277+
267278
for query, value in PANDAS_EXPRESSIONS.iteritems():
268-
sys.stdout.write( '%-75s' % query )
279+
sys.stdout.write('%-75s' % query)
269280
if isinstance(value, dict):
270281
data = value['data']
271282
if 'index' in value:
@@ -275,25 +286,29 @@ def test_writing_pandas():
275286
else:
276287
data = value
277288
serialized = binascii.hexlify(w.write(data, 1))[16:].lower()
278-
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (value, BINARY[query].lower(), serialized)
279-
sys.stdout.write( '.' )
280-
289+
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (value, BINARY[query].lower(), serialized)
290+
sys.stdout.write('.')
291+
281292
print('')
282-
283-
for query, value in PANDAS_EXPRESSIONS_ALT.iteritems():
284-
sys.stdout.write( '%-75s' % query )
285-
if isinstance(value, dict):
286-
data = value['data']
287-
if 'index' in value:
288-
data.reset_index(drop = True)
289-
data = data.set_index(value['index'])
290-
data.meta = value['meta']
291-
else:
292-
data = value
293-
serialized = binascii.hexlify(w.write(data, 1))[16:].lower()
294-
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (value, BINARY[query].lower(), serialized)
295-
sys.stdout.write( '.' )
296-
293+
294+
for query, variants in PANDAS_EXPRESSIONS_ALT.iteritems():
295+
sys.stdout.write('%-75s' % query)
296+
variants = [variants] if not isinstance(variants, tuple) else variants
297+
298+
for value in variants:
299+
if isinstance(value, dict):
300+
data = value['data']
301+
if 'index' in value:
302+
data.reset_index(drop = True)
303+
data = data.set_index(value['index'])
304+
data.meta = value['meta']
305+
else:
306+
data = value
307+
serialized = binascii.hexlify(w.write(data, 1))[16:].lower()
308+
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (value, BINARY[query].lower(), serialized)
309+
310+
sys.stdout.write('.')
311+
297312
print('')
298313

299314

tests/qwriter_test.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@
190190
('(`one;2 3;"456";(7;8 9))', [numpy.string_('one'), qlist(numpy.array([2, 3], dtype=numpy.int64), qtype=QLONG_LIST), '456', [numpy.int64(7), qlist(numpy.array([8, 9], dtype=numpy.int64), qtype=QLONG_LIST)]]),
191191

192192
('`jumps`over`a`lazy`dog', (numpy.array(['jumps', 'over', 'a', 'lazy', 'dog'], dtype=numpy.string_),
193+
numpy.array(['jumps', 'over', 'a', 'lazy', 'dog']),
193194
qlist(numpy.array(['jumps', 'over', 'a', 'lazy', 'dog']), qtype = QSYMBOL_LIST),
194195
qlist(['jumps', 'over', 'a', 'lazy', 'dog'], qtype = QSYMBOL_LIST))),
195196
('`the`quick`brown`fox', numpy.array([numpy.string_('the'), numpy.string_('quick'), numpy.string_('brown'), numpy.string_('fox')], dtype=numpy.object)),
@@ -307,6 +308,9 @@
307308
(qtable(qlist(numpy.array(['pos', 'dates']), qtype = QSYMBOL_LIST),
308309
[qlist(numpy.array(['d1', 'd2', 'd3']), qtype = QSYMBOL_LIST),
309310
qlist(numpy.array([366, 121, qnull(QDATE)]), qtype=QDATE_LIST)]),
311+
qtable(['pos', 'dates'],
312+
[numpy.array(['d1', 'd2', 'd3']),
313+
numpy.array([numpy.datetime64('2001-01-01'), numpy.datetime64('2000-05-01'), numpy.datetime64('NaT')], dtype='datetime64[D]')]),
310314
qtable(['pos', 'dates'],
311315
[qlist(numpy.array(['d1', 'd2', 'd3']), qtype = QSYMBOL_LIST),
312316
numpy.array([numpy.datetime64('2001-01-01'), numpy.datetime64('2000-05-01'), numpy.datetime64('NaT')], dtype='datetime64[D]')])
@@ -338,16 +342,13 @@ def init():
338342
def test_writing():
339343
w = qwriter.QWriter(None, 3)
340344

341-
for query, value in EXPRESSIONS.iteritems():
345+
for query, variants in EXPRESSIONS.iteritems():
342346
sys.stdout.write( '%-75s' % query )
343-
if isinstance(value, tuple):
344-
for object in value:
345-
sys.stdout.write( '.' )
346-
serialized = binascii.hexlify(w.write(object, 1))[16:].lower()
347-
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (query, BINARY[query].lower(), serialized)
348-
else:
347+
variants = [variants] if not isinstance(variants, tuple) else variants
348+
349+
for object in variants:
349350
sys.stdout.write( '.' )
350-
serialized = binascii.hexlify(w.write(value, 1))[16:].lower()
351+
serialized = binascii.hexlify(w.write(object, 1))[16:].lower()
351352
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (query, BINARY[query].lower(), serialized)
352353

353354
print('')

0 commit comments

Comments
 (0)