|
| 1 | +# |
| 2 | +# Copyright (c) 2011-2014 Exxeleron GmbH |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | +# |
| 16 | + |
| 17 | +import pandas |
| 18 | +import struct |
| 19 | + |
| 20 | +from collections import OrderedDict |
| 21 | + |
| 22 | +from qpython import MetaData |
| 23 | +from qpython.qreader import QReader, READER_CONFIGURATION, QReaderException |
| 24 | +from qpython.qcollection import QDictionary, qlist |
| 25 | +from qpython.qwriter import QWriter, QWriterException |
| 26 | +from qpython.qtype import * |
| 27 | + |
| 28 | + |
| 29 | + |
| 30 | +class PandasQReader(QReader): |
| 31 | + |
| 32 | + parse = Mapper(QReader._reader_map) |
| 33 | + |
| 34 | + @parse(QDICTIONARY) |
| 35 | + def _read_dictionary(self, qtype = QDICTIONARY, options = READER_CONFIGURATION): |
| 36 | + if options.pandas: |
| 37 | + keys = self._read_object(options = options) |
| 38 | + values = self._read_object(options = options) |
| 39 | + |
| 40 | + if isinstance(keys, pandas.DataFrame): |
| 41 | + if not isinstance(values, pandas.DataFrame): |
| 42 | + raise QReaderException('Keyed table creation: values are expected to be of type pandas.DataFrame. Actual: %s' % type(values)) |
| 43 | + |
| 44 | + indices = keys.columns |
| 45 | + table = keys |
| 46 | + table.meta = keys.meta |
| 47 | + table.meta.qtype = QKEYED_TABLE |
| 48 | + |
| 49 | + for column in values.columns: |
| 50 | + table[column] = values[column] |
| 51 | + table.meta[column] = values.meta[column] |
| 52 | + |
| 53 | + table.set_index([column for column in indices], inplace = True) |
| 54 | + |
| 55 | + return table |
| 56 | + else: |
| 57 | + keys = keys if not isinstance(keys, pandas.Series) else keys.as_matrix() |
| 58 | + values = values if not isinstance(values, pandas.Series) else values.as_matrix() |
| 59 | + return QDictionary(keys, values) |
| 60 | + else: |
| 61 | + return QReader._read_dictionary(self, qtype = qtype, options = options) |
| 62 | + |
| 63 | + |
| 64 | + @parse(QTABLE) |
| 65 | + def _read_table(self, qtype = QTABLE, options = READER_CONFIGURATION): |
| 66 | + if options.pandas: |
| 67 | + self._buffer.skip() # ignore attributes |
| 68 | + self._buffer.skip() # ignore dict type stamp |
| 69 | + |
| 70 | + columns = self._read_object(options = options) |
| 71 | + data = self._read_object(options = options) |
| 72 | + |
| 73 | + odict = OrderedDict() |
| 74 | + meta = MetaData(qtype = QTABLE) |
| 75 | + for i in xrange(len(columns)): |
| 76 | + if isinstance(data[i], str): |
| 77 | + # convert character list (represented as string) to numpy representation |
| 78 | + meta[columns[i]] = QSTRING |
| 79 | + odict[columns[i]] = numpy.array(list(data[i]), dtype = numpy.str) |
| 80 | + elif isinstance(data[i], (list, tuple)): |
| 81 | + # convert character list (represented as string) to numpy representation |
| 82 | + meta[columns[i]] = QGENERAL_LIST |
| 83 | + odict[columns[i]] = numpy.array(list(data[i])) |
| 84 | + else: |
| 85 | + meta[columns[i]] = data[i].meta.qtype |
| 86 | + odict[columns[i]] = data[i] |
| 87 | + |
| 88 | + df = pandas.DataFrame(odict) |
| 89 | + df.meta = meta |
| 90 | + return df |
| 91 | + else: |
| 92 | + return QReader._read_table(self, qtype = qtype, options = options) |
| 93 | + |
| 94 | + |
| 95 | + def _read_list(self, qtype, options): |
| 96 | + if options.pandas: |
| 97 | + options.numpy_temporals = True |
| 98 | + |
| 99 | + list = QReader._read_list(self, qtype = qtype, options = options) |
| 100 | + |
| 101 | + if options.pandas: |
| 102 | + if qtype != QSYMBOL_LIST: |
| 103 | + null = QNULLMAP[-abs(qtype)][1] |
| 104 | + ps = pandas.Series(data = list).replace(null, numpy.NaN) |
| 105 | + else: |
| 106 | + ps = pandas.Series(data = list) |
| 107 | + |
| 108 | + ps.meta = MetaData(qtype = qtype) |
| 109 | + return ps |
| 110 | + else: |
| 111 | + return list |
| 112 | + |
| 113 | + |
| 114 | + |
| 115 | +class PandasQWriter(QWriter): |
| 116 | + |
| 117 | + serialize = Mapper(QWriter._writer_map) |
| 118 | + |
| 119 | + @serialize(pandas.Series) |
| 120 | + def _write_pandas_series(self, data, qtype = None): |
| 121 | + if qtype is not None: |
| 122 | + qtype = -abs(qtype) |
| 123 | + |
| 124 | + if qtype is None and hasattr(data, 'meta'): |
| 125 | + qtype = -abs(data.meta.qtype) |
| 126 | + |
| 127 | + if data.dtype == '|S1': |
| 128 | + qtype = QCHAR |
| 129 | + |
| 130 | + if qtype is None: |
| 131 | + qtype = Q_TYPE.get(data.dtype.type, None) |
| 132 | + |
| 133 | + if qtype is None and data.dtype.type in (numpy.datetime64, numpy.timedelta64): |
| 134 | + qtype = TEMPORAL_PY_TYPE.get(str(data.dtype), None) |
| 135 | + |
| 136 | + if qtype is None: |
| 137 | + # determinate type based on first element of the numpy array |
| 138 | + qtype = Q_TYPE.get(type(data[0]), QGENERAL_LIST) |
| 139 | + |
| 140 | + if qtype is None: |
| 141 | + raise QWriterException('Unable to serialize pandas series %s' % data) |
| 142 | + |
| 143 | + if qtype == QGENERAL_LIST: |
| 144 | + self._write_generic_list(data.as_matrix()) |
| 145 | + elif qtype == QCHAR: |
| 146 | + self._write_string(data.as_matrix().astype(numpy.string_).tostring()) |
| 147 | + elif data.dtype.type not in (numpy.datetime64, numpy.timedelta64): |
| 148 | + data = data.fillna(QNULLMAP[-abs(qtype)][1]) |
| 149 | + data = data.as_matrix() |
| 150 | + |
| 151 | + if PY_TYPE[qtype] != data.dtype: |
| 152 | + data = data.astype(PY_TYPE[qtype]) |
| 153 | + |
| 154 | + self._write_list(data, qtype = qtype) |
| 155 | + else: |
| 156 | + data = data.as_matrix() |
| 157 | + data = data.astype(TEMPORAL_Q_TYPE[qtype]) |
| 158 | + self._write_list(data, qtype = qtype) |
| 159 | + |
| 160 | + |
| 161 | + |
| 162 | + @serialize(pandas.DataFrame) |
| 163 | + def _write_pandas_data_frame(self, data, qtype = None): |
| 164 | + data_columns = data.columns.values |
| 165 | + |
| 166 | + if hasattr(data, 'meta') and data.meta.qtype == QKEYED_TABLE: |
| 167 | + # data frame represents keyed table |
| 168 | + self._buffer.write(struct.pack('=b', QDICTIONARY)) |
| 169 | + self._buffer.write(struct.pack('=bxb', QTABLE, QDICTIONARY)) |
| 170 | + index_columns = data.index.names |
| 171 | + self._write(qlist(numpy.array(index_columns), qtype = QSYMBOL_LIST)) |
| 172 | + data.reset_index(inplace = True) |
| 173 | + self._buffer.write(struct.pack('=bxi', QGENERAL_LIST, len(index_columns))) |
| 174 | + for column in index_columns: |
| 175 | + self._write_pandas_series(data[column], qtype = data.meta[column] if hasattr(data, 'meta') else None) |
| 176 | + |
| 177 | + data.set_index(index_columns, inplace = True) |
| 178 | + |
| 179 | + self._buffer.write(struct.pack('=bxb', QTABLE, QDICTIONARY)) |
| 180 | + self._write(qlist(numpy.array(data_columns), qtype = QSYMBOL_LIST)) |
| 181 | + self._buffer.write(struct.pack('=bxi', QGENERAL_LIST, len(data_columns))) |
| 182 | + for column in data_columns: |
| 183 | + self._write_pandas_series(data[column], qtype = data.meta[column] if hasattr(data, 'meta') else None) |
| 184 | + |
0 commit comments