Skip to content

Commit 57ec6e1

Browse files
committed
Resolve exxeleron#18: pandas: null representation for strings
1 parent 1a65e6d commit 57ec6e1

File tree

5 files changed

+47
-4
lines changed

5 files changed

+47
-4
lines changed

CHANGELOG.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
------------------------------------------------------------------------------
2+
qPython 1.0 RC2 [TBA]
3+
------------------------------------------------------------------------------
4+
5+
- Fix: handling of nested lists of homogeneous length
6+
- Fix: pandas: null representation for strings
7+
18
------------------------------------------------------------------------------
29
qPython 1.0 RC1 [2014.10.22]
310
------------------------------------------------------------------------------

qpython/_pandas.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def _read_table(self, qtype = QTABLE, options = READER_CONFIGURATION):
7676
if isinstance(data[i], str):
7777
# convert character list (represented as string) to numpy representation
7878
meta[columns[i]] = QSTRING
79-
odict[columns[i]] = numpy.array(list(data[i]), dtype = numpy.str)
79+
odict[columns[i]] = pandas.Series(list(data[i]), dtype = numpy.str).replace(' ', numpy.nan)
8080
elif isinstance(data[i], (list, tuple)):
8181
meta[columns[i]] = QGENERAL_LIST
8282
tarray = numpy.ndarray(shape = len(data[i]), dtype = numpy.dtype('O'))
@@ -113,6 +113,15 @@ def _read_list(self, qtype, options):
113113
return list
114114

115115

116+
@parse(QGENERAL_LIST)
117+
def _read_general_list(self, qtype = QGENERAL_LIST, options = READER_CONFIGURATION):
118+
list = QReader._read_general_list(self, qtype, options)
119+
if options.pandas:
120+
return [numpy.nan if isinstance(element, basestring) and element == ' ' else element for element in list]
121+
else:
122+
return list
123+
124+
116125

117126
class PandasQWriter(QWriter):
118127

@@ -145,7 +154,7 @@ def _write_pandas_series(self, data, qtype = None):
145154
if qtype == QGENERAL_LIST:
146155
self._write_generic_list(data.as_matrix())
147156
elif qtype == QCHAR:
148-
self._write_string(data.as_matrix().astype(numpy.string_).tostring())
157+
self._write_string(data.replace(numpy.nan, ' ').as_matrix().astype(numpy.string_).tostring())
149158
elif data.dtype.type not in (numpy.datetime64, numpy.timedelta64):
150159
data = data.fillna(QNULLMAP[-abs(qtype)][1])
151160
data = data.as_matrix()
@@ -160,7 +169,6 @@ def _write_pandas_series(self, data, qtype = None):
160169
self._write_list(data, qtype = qtype)
161170

162171

163-
164172
@serialize(pandas.DataFrame)
165173
def _write_pandas_data_frame(self, data, qtype = None):
166174
data_columns = data.columns.values
@@ -184,3 +192,10 @@ def _write_pandas_data_frame(self, data, qtype = None):
184192
for column in data_columns:
185193
self._write_pandas_series(data[column], qtype = data.meta[column] if hasattr(data, 'meta') else None)
186194

195+
196+
@serialize(tuple, list)
197+
def _write_generic_list(self, data):
198+
self._buffer.write(struct.pack('=bxi', QGENERAL_LIST, len(data)))
199+
for element in data:
200+
# assume nan represents a string null
201+
self._write(' ' if type(element) in [float, numpy.float32, numpy.float64] and numpy.isnan(element) else element)

tests/QExpressions3.out

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ ED00000080
132132
0B00020000000000
133133
("quick"; "brown"; "fox"; "jumps"; "over"; "a lazy"; "dog")
134134
0000070000000A0005000000717569636B0A000500000062726F776E0A0003000000666F780A00050000006A756D70730A00040000006F7665720A000600000061206C617A790A0003000000646F67
135+
("quick"; " "; "fox"; "jumps"; "over"; "a lazy"; "dog")
136+
0000070000000A0005000000717569636BF6200A0003000000666F780A00050000006A756D70730A00040000006F7665720A000600000061206C617A790A0003000000646F67
135137
2000.01.04D05:36:57.600 0Np
136138
0C000200000000C0CAFA20FE00000000000000000080
137139
(2001.01m; 0Nm)
@@ -196,6 +198,8 @@ flip `name`iq`grade!(`Dent`Beeblebrox`Prefect;98 42 126;"a c")
196198
6200630B00030000006E616D65006971006772616465000000030000000B000300000044656E7400426565626C6562726F7800507265666563740007000300000062000000000000002A000000000000007E000000000000000A0003000000612063
197199
flip `name`iq`fullname!(`Dent`Beeblebrox`Prefect;98 42 126;("Arthur Dent"; "Zaphod Beeblebrox"; "Ford Prefect"))
198200
6200630B00030000006E616D650069710066756C6C6E616D65000000030000000B000300000044656E7400426565626C6562726F7800507265666563740007000300000062000000000000002A000000000000007E000000000000000000030000000A000B0000004172746875722044656E740A00110000005A6170686F6420426565626C6562726F780A000C000000466F72642050726566656374
201+
flip `name`iq`fullname!(`Dent`Beeblebrox`Prefect;98 42 126;("Arthur Dent"; " "; "Ford Prefect"))
202+
6200630B00030000006E616D650069710066756C6C6E616D65000000030000000B000300000044656E7400426565626C6562726F7800507265666563740007000300000062000000000000002A000000000000007E000000000000000000030000000A000B0000004172746875722044656E74F6200A000C000000466F72642050726566656374
199203
flip `name`iq`misc!(`Dent`Beeblebrox`Prefect;98 42 126;("The Hitch Hiker's Guide to the Galaxy"; 160; 1979.10.12))
200204
6200630B00030000006E616D65006971006D697363000000030000000B000300000044656E7400426565626C6562726F7800507265666563740007000300000062000000000000002A000000000000007E000000000000000000030000000A00250000005468652048697463682048696B6572277320477569646520746F207468652047616C617879F9A000000000000000F226E3FFFF
201205
([] sc:1 2 3; nsc:(1 2; 3 4; 5 6 7))

tests/pandas_test.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,15 @@
3636
{'data': pandas.Series(numpy.array([uuid.UUID('8c680a01-5a49-5aab-5a65-d4bfddb6a661'), numpy.nan])),
3737
'meta': MetaData(qtype = QGUID_LIST) }),
3838
('"quick brown fox jumps over a lazy dog"', 'quick brown fox jumps over a lazy dog'),
39+
('" "', ' '),
3940
('``quick``fox', {'data': pandas.Series(numpy.array([qnull(QSYMBOL), numpy.string_('quick'), qnull(QSYMBOL), numpy.string_('fox')])),
4041
'meta': MetaData(qtype = QSYMBOL_LIST) }),
4142
('`the`quick`brown`fox', {'data': pandas.Series(numpy.array([numpy.string_('the'), numpy.string_('quick'), numpy.string_('brown'), numpy.string_('fox')])),
4243
'meta': MetaData(qtype = QSYMBOL_LIST) }),
4344
('("quick"; "brown"; "fox"; "jumps"; "over"; "a lazy"; "dog")',
4445
['quick', 'brown', 'fox', 'jumps', 'over', 'a lazy', 'dog']),
46+
('("quick"; " "; "fox"; "jumps"; "over"; "a lazy"; "dog")',
47+
['quick', numpy.nan, 'fox', 'jumps', 'over', 'a lazy', 'dog']),
4548

4649
('(0b;1b;0b)', {'data': pandas.Series(numpy.array([False, True, False], dtype = numpy.bool)),
4750
'meta': MetaData(qtype = QBOOL_LIST) }),
@@ -110,7 +113,7 @@
110113
('flip `name`iq`grade!(`Dent`Beeblebrox`Prefect;98 42 126;"a c")',
111114
{'data': pandas.DataFrame(OrderedDict((('name', pandas.Series(['Dent', 'Beeblebrox', 'Prefect'])),
112115
('iq', pandas.Series(numpy.array([98, 42, 126], dtype = numpy.int64))),
113-
('grade', pandas.Series(list("a c"))),
116+
('grade', pandas.Series(["a", numpy.nan,"c"])),
114117
))
115118
),
116119
'meta': MetaData(**{'qtype': QTABLE, 'name': QSYMBOL_LIST, 'iq': QLONG_LIST, 'grade': QSTRING}) }),
@@ -121,6 +124,13 @@
121124
))
122125
),
123126
'meta': MetaData(**{'qtype': QTABLE, 'name': QSYMBOL_LIST, 'iq': QLONG_LIST, 'fullname': QSTRING_LIST}) }),
127+
('flip `name`iq`fullname!(`Dent`Beeblebrox`Prefect;98 42 126;("Arthur Dent"; " "; "Ford Prefect"))',
128+
{'data': pandas.DataFrame(OrderedDict((('name', pandas.Series(['Dent', 'Beeblebrox', 'Prefect'])),
129+
('iq', pandas.Series(numpy.array([98, 42, 126], dtype = numpy.int64))),
130+
('fullname', pandas.Series(["Arthur Dent", numpy.nan, "Ford Prefect"])),
131+
))
132+
),
133+
'meta': MetaData(**{'qtype': QTABLE, 'name': QSYMBOL_LIST, 'iq': QLONG_LIST, 'fullname': QSTRING_LIST}) }),
124134
('([] sc:1 2 3; nsc:(1 2; 3 4; 5 6 7))', {'data': pandas.DataFrame(OrderedDict((('sc', pandas.Series(numpy.array([1, 2, 3], dtype = numpy.int64))),
125135
('nsc', [pandas.Series(numpy.array([1, 2], dtype = numpy.int64)), pandas.Series(numpy.array([3, 4], dtype = numpy.int64)), pandas.Series(numpy.array([5, 6, 7], dtype = numpy.int64))])))
126136
),

tests/qreader_test.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@
104104
('``', qlist(numpy.array([qnull(QSYMBOL), qnull(QSYMBOL)], dtype=numpy.object), qtype=QSYMBOL_LIST)),
105105
('("quick"; "brown"; "fox"; "jumps"; "over"; "a lazy"; "dog")',
106106
['quick', 'brown', 'fox', 'jumps', 'over', 'a lazy', 'dog']),
107+
('("quick"; " "; "fox"; "jumps"; "over"; "a lazy"; "dog")',
108+
['quick', ' ', 'fox', 'jumps', 'over', 'a lazy', 'dog']),
107109
('{x+y}', QLambda('{x+y}')),
108110
('{x+y}[3]', QProjection([QLambda('{x+y}'), numpy.int64(3)])),
109111
('insert [1]', QProjection([QFunction(0), numpy.int64(1)])),
@@ -149,6 +151,11 @@
149151
[qlist(numpy.array(['Dent', 'Beeblebrox', 'Prefect']), qtype = QSYMBOL_LIST),
150152
qlist(numpy.array([98, 42, 126]), qtype = QLONG_LIST),
151153
["Arthur Dent", "Zaphod Beeblebrox", "Ford Prefect"]])),
154+
('flip `name`iq`fullname!(`Dent`Beeblebrox`Prefect;98 42 126;("Arthur Dent"; " "; "Ford Prefect"))',
155+
qtable(qlist(numpy.array(['name', 'iq', 'fullname']), qtype = QSYMBOL_LIST),
156+
[qlist(numpy.array(['Dent', 'Beeblebrox', 'Prefect']), qtype = QSYMBOL_LIST),
157+
qlist(numpy.array([98, 42, 126]), qtype = QLONG_LIST),
158+
["Arthur Dent", " ", "Ford Prefect"]])),
152159
('([] sc:1 2 3; nsc:(1 2; 3 4; 5 6 7))', qtable(qlist(numpy.array(['sc', 'nsc']), qtype = QSYMBOL_LIST),
153160
[qlist(numpy.array([1, 2, 3]), qtype = QLONG_LIST),
154161
[qlist(numpy.array([1, 2]), qtype = QLONG_LIST),

0 commit comments

Comments
 (0)