Skip to content

Commit 7ca44bd

Browse files
jasonqngtswast
authored andcommitted
BUG: Fix bug in type conversion of arrays; add array and struct tests (googleapis#101)
* Fix array bug in type conversion; add array and struct tests * Update changelog
1 parent 4cab83c commit 7ca44bd

File tree

3 files changed

+54
-2
lines changed

3 files changed

+54
-2
lines changed

docs/source/changelog.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Changelog
55
------------------
66

77
- Use the `google-cloud-bigquery <https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html>`__ library for API calls. The ``google-cloud-bigquery`` package is a new dependency, and dependencies on ``google-api-python-client`` and ``httplib2`` are removed. See the `installation guide <https://pandas-gbq.readthedocs.io/en/latest/install.html#dependencies>`__ for more details. (:issue:`93`)
8+
- Structs and arrays are now named properly (:issue:`23`) and BigQuery functions like ``array_agg`` no longer run into errors during type conversion (:issue:`22`).
89
- :func:`to_gbq` now uses a load job instead of the streaming API. Remove ``StreamingInsertError`` class, as it is no longer used by :func:`to_gbq`. (:issue:`7`, :issue:`75`)
910

1011
0.2.1 / 2017-11-27

pandas_gbq/gbq.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -867,11 +867,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
867867
)
868868

869869
# cast BOOLEAN and INTEGER columns from object to bool/int
870-
# if they dont have any nulls
870+
# if they dont have any nulls AND field mode is not repeated (i.e., array)
871871
type_map = {'BOOLEAN': bool, 'INTEGER': int}
872872
for field in schema['fields']:
873873
if field['type'].upper() in type_map and \
874-
final_df[field['name']].notnull().all():
874+
final_df[field['name']].notnull().all() and \
875+
field['mode'] != 'repeated':
875876
final_df[field['name']] = \
876877
final_df[field['name']].astype(type_map[field['type'].upper()])
877878

pandas_gbq/tests/test_gbq.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,56 @@ def test_query_response_bytes(self):
920920
assert self.gbq_connector.sizeof_fmt(1.208926E24) == "1.0 YB"
921921
assert self.gbq_connector.sizeof_fmt(1.208926E28) == "10000.0 YB"
922922

923+
def test_struct(self):
924+
query = """SELECT 1 int_field,
925+
STRUCT("a" as letter, 1 as num) struct_field"""
926+
df = gbq.read_gbq(query, project_id=_get_project_id(),
927+
private_key=_get_private_key_path(),
928+
dialect='standard')
929+
tm.assert_frame_equal(df, DataFrame([[1, {"letter": "a", "num": 1}]],
930+
columns=["int_field", "struct_field"]))
931+
932+
def test_array(self):
933+
query = """select ["a","x","b","y","c","z"] as letters"""
934+
df = gbq.read_gbq(query, project_id=_get_project_id(),
935+
private_key=_get_private_key_path(),
936+
dialect='standard')
937+
tm.assert_frame_equal(df, DataFrame([[["a", "x", "b", "y", "c", "z"]]],
938+
columns=["letters"]))
939+
940+
def test_array_length_zero(self):
941+
query = """WITH t as (
942+
SELECT "a" letter, [""] as array_field
943+
UNION ALL
944+
SELECT "b" letter, [] as array_field)
945+
946+
select letter, array_field, array_length(array_field) len
947+
from t
948+
order by letter ASC"""
949+
df = gbq.read_gbq(query, project_id=_get_project_id(),
950+
private_key=_get_private_key_path(),
951+
dialect='standard')
952+
tm.assert_frame_equal(df, DataFrame([["a", [""], 1], ["b", [], 0]],
953+
columns=["letter", "array_field", "len"]))
954+
955+
def test_array_agg(self):
956+
query = """WITH t as (
957+
SELECT "a" letter, 1 num
958+
UNION ALL
959+
SELECT "b" letter, 2 num
960+
UNION ALL
961+
SELECT "a" letter, 3 num)
962+
963+
select letter, array_agg(num order by num ASC) numbers
964+
from t
965+
group by letter
966+
order by letter ASC"""
967+
df = gbq.read_gbq(query, project_id=_get_project_id(),
968+
private_key=_get_private_key_path(),
969+
dialect='standard')
970+
tm.assert_frame_equal(df, DataFrame([["a", [1, 3]], ["b", [2]]],
971+
columns=["letter", "numbers"]))
972+
923973

924974
class TestToGBQIntegrationWithServiceAccountKeyPath(object):
925975
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015

0 commit comments

Comments
 (0)