From f26e390432ae5a78ea2e2c8a12801eada019e7b6 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 25 Jan 2019 15:15:39 -0800
Subject: [PATCH 01/19] CLN: Use `to_dataframe` to download query results.

This allows us to remove logic for parsing the schema and align with
google-cloud-bigquery.
---
 benchmark/README.md                 | 16 +++++++++++++
 benchmark/read_gbq_large_results.py |  8 +++++++
 benchmark/read_gbq_small_results.py |  7 ++++++
 pandas_gbq/gbq.py                   | 37 ++++-------------------------
 4 files changed, 36 insertions(+), 32 deletions(-)
 create mode 100644 benchmark/README.md
 create mode 100644 benchmark/read_gbq_large_results.py
 create mode 100644 benchmark/read_gbq_small_results.py

diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 00000000..5ede71d7
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,16 @@
+# pandas-gbq benchmarks
+
+This directory contains a few scripts which are useful for performance
+testing the pandas-gbq library. Use cProfile to time the script and see
+details about where time is spent. To avoid timing how long BigQuery takes to
+execute a query, run the benchmark twice to ensure the results are cached.
+
+## `read_gbq`
+
+Read a small table (a few KB).
+
+    python -m cProfile --sort=cumtime read_gbq_small_results.py
+
+Read a large-ish table (100+ MB).
+
+    python -m cProfile --sort=cumtime read_gbq_large_results.py
diff --git a/benchmark/read_gbq_large_results.py b/benchmark/read_gbq_large_results.py
new file mode 100644
index 00000000..5a8bf268
--- /dev/null
+++ b/benchmark/read_gbq_large_results.py
@@ -0,0 +1,8 @@
+
+import pandas_gbq
+
+# Select 163 MB worth of data, to time how long it takes to download large
+# result sets.
+df = pandas_gbq.read_gbq(
+    "SELECT * FROM `bigquery-public-data.usa_names.usa_1910_2013`",
+    dialect="standard")
diff --git a/benchmark/read_gbq_small_results.py b/benchmark/read_gbq_small_results.py
new file mode 100644
index 00000000..cfff10b3
--- /dev/null
+++ b/benchmark/read_gbq_small_results.py
@@ -0,0 +1,7 @@
+
+import pandas_gbq
+
+# Select a few KB worth of data, to time downloading small result sets.
+df = pandas_gbq.read_gbq(
+    "SELECT * FROM `bigquery-public-data.utility_us.country_code_iso`",
+    dialect="standard")
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 948fd980..8cf2cf3b 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -1,11 +1,9 @@
 import logging
 import time
 import warnings
-from collections import OrderedDict
 from datetime import datetime
 
 import numpy as np
-from pandas import DataFrame
 
 from pandas_gbq.exceptions import AccessDenied
 
@@ -482,15 +480,9 @@ def run_query(self, query, **kwargs):
             rows_iter = query_reply.result()
         except self.http_error as ex:
             self.process_http_error(ex)
-        result_rows = list(rows_iter)
-        total_rows = rows_iter.total_rows
-        schema = {
-            "fields": [field.to_api_repr() for field in rows_iter.schema]
-        }
-
-        logger.debug("Got {} rows.\n".format(total_rows))
-
-        return schema, result_rows
+        df = rows_iter.to_dataframe()
+        logger.debug("Got {} rows.\n".format(rows_iter.total_rows))
+        return df
 
     def load_data(
         self,
@@ -661,25 +653,6 @@ def _parse_schema(schema_fields):
             yield name, dtype
 
 
-def _parse_data(schema, rows):
-
-    column_dtypes = OrderedDict(_parse_schema(schema["fields"]))
-    df = DataFrame(data=(iter(r) for r in rows), columns=column_dtypes.keys())
-
-    for column in df:
-        dtype = column_dtypes[column]
-        null_safe = (
-            df[column].notnull().all()
-            or dtype == float
-            or dtype == "datetime64[ns]"
-        )
-        if dtype and null_safe:
-            df[column] = df[column].astype(
-                column_dtypes[column], errors="ignore"
-            )
-    return df
-
-
 def read_gbq(
     query,
     project_id=None,
@@ -825,8 +798,8 @@ def read_gbq(
         credentials=credentials,
         private_key=private_key,
     )
-    schema, rows = connector.run_query(query, configuration=configuration)
-    final_df = _parse_data(schema, rows)
+
+    final_df = connector.run_query(query, configuration=configuration)
 
     # Reindex the DataFrame on the provided column
     if index_col is not None:

From 70435019d8d7eb5ac9352b9c7996715feb218bbd Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 8 Feb 2019 14:06:02 -0800
Subject: [PATCH 02/19] Supply expected dtypes to to_dataframe()

---
 benchmark/read_gbq_large_results.py |  3 +-
 benchmark/read_gbq_small_results.py |  3 +-
 pandas_gbq/gbq.py                   | 26 ++++++++++------
 tests/system/test_gbq.py            |  8 -----
 tests/unit/test_gbq.py              | 47 +++++++++--------------------
 5 files changed, 36 insertions(+), 51 deletions(-)

diff --git a/benchmark/read_gbq_large_results.py b/benchmark/read_gbq_large_results.py
index 5a8bf268..7dedbb13 100644
--- a/benchmark/read_gbq_large_results.py
+++ b/benchmark/read_gbq_large_results.py
@@ -5,4 +5,5 @@
 # result sets.
 df = pandas_gbq.read_gbq(
     "SELECT * FROM `bigquery-public-data.usa_names.usa_1910_2013`",
-    dialect="standard")
+    dialect="standard",
+)
diff --git a/benchmark/read_gbq_small_results.py b/benchmark/read_gbq_small_results.py
index cfff10b3..68621194 100644
--- a/benchmark/read_gbq_small_results.py
+++ b/benchmark/read_gbq_small_results.py
@@ -4,4 +4,5 @@
 # Select a few KB worth of data, to time downloading small result sets.
 df = pandas_gbq.read_gbq(
     "SELECT * FROM `bigquery-public-data.utility_us.country_code_iso`",
-    dialect="standard")
+    dialect="standard",
+)
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 8cf2cf3b..fef73ca6 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -480,7 +480,10 @@ def run_query(self, query, **kwargs):
             rows_iter = query_reply.result()
         except self.http_error as ex:
             self.process_http_error(ex)
-        df = rows_iter.to_dataframe()
+
+        schema_fields = [field.to_api_repr() for field in rows_iter.schema]
+        dtypes = _bqschema_to_dtypes(schema_fields)
+        df = rows_iter.to_dataframe(dtypes=dtypes)
         logger.debug("Got {} rows.\n".format(rows_iter.total_rows))
         return df
 
@@ -630,27 +633,32 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
         table.create(table_id, table_schema)
 
 
-def _parse_schema(schema_fields):
+def _bqschema_to_dtypes(schema_fields):
+    # Only specify dtype when the dtype allows nulls. Otherwise, use pandas's
+    # default dtype choice.
+    #
     # see:
     # http://pandas.pydata.org/pandas-docs/dev/missing_data.html
     # #missing-data-casting-rules-and-indexing
     dtype_map = {
         "FLOAT": np.dtype(float),
-        "TIMESTAMP": "datetime64[ns]",
+        "TIMESTAMP": "datetime64[ns, UTC]",
         "TIME": "datetime64[ns]",
         "DATE": "datetime64[ns]",
         "DATETIME": "datetime64[ns]",
-        "BOOLEAN": bool,
-        "INTEGER": np.int64,
     }
 
+    dtypes = {}
     for field in schema_fields:
         name = str(field["name"])
         if field["mode"].upper() == "REPEATED":
-            yield name, object
-        else:
-            dtype = dtype_map.get(field["type"].upper())
-            yield name, dtype
+            continue
+
+        dtype = dtype_map.get(field["type"].upper())
+        if dtype:
+            dtypes[name] = dtype
+
+    return dtypes
 
 
 def read_gbq(
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index dde34cb1..765fb8e2 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -138,14 +138,6 @@ def test_should_be_able_to_get_a_bigquery_client(self, gbq_connector):
         bigquery_client = gbq_connector.get_client()
         assert bigquery_client is not None
 
-    def test_should_be_able_to_get_schema_from_query(self, gbq_connector):
-        schema, pages = gbq_connector.run_query("SELECT 1")
-        assert schema is not None
-
-    def test_should_be_able_to_get_results_from_query(self, gbq_connector):
-        schema, pages = gbq_connector.run_query("SELECT 1")
-        assert pages is not None
-
 
 def test_should_read(project, credentials):
     query = 'SELECT "PI" AS valid_string'
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 4f1d18ad..4427a07e 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -2,6 +2,7 @@
 
 import pandas.util.testing as tm
 import pytest
+import numpy
 from pandas import DataFrame
 from pandas.compat.numpy import np_datetime64_compat
 
@@ -64,26 +65,23 @@ def no_auth(monkeypatch):
 
 
 @pytest.mark.parametrize(
-    ("input", "type_", "expected"),
+    ("type_", "expected"),
     [
-        (1, "INTEGER", int(1)),
-        (1, "FLOAT", float(1)),
-        pytest.param("false", "BOOLEAN", False, marks=pytest.mark.xfail),
-        pytest.param(
-            "0e9",
-            "TIMESTAMP",
-            np_datetime64_compat("1970-01-01T00:00:00Z"),
-            marks=pytest.mark.xfail,
-        ),
-        ("STRING", "STRING", "STRING"),
+        ("INTEGER", None),  # Can't handle NULL
+        ("BOOLEAN", None),  # Can't handle NULL
+        ("FLOAT", numpy.dtype(float)),
+        ("TIMESTAMP", "datetime64[ns, UTC]"),
+        ("DATETIME", "datetime64[ns]"),
     ],
 )
-def test_should_return_bigquery_correctly_typed(input, type_, expected):
-    result = gbq._parse_data(
-        dict(fields=[dict(name="x", type=type_, mode="NULLABLE")]),
-        rows=[[input]],
-    ).iloc[0, 0]
-    assert result == expected
+def test_should_return_bigquery_correctly_typed(type_, expected):
+    result = gbq._bqschema_to_dtypes(
+        [dict(name="x", type=type_, mode="NULLABLE")]
+    )
+    if not expected:
+        assert result == {}
+    else:
+        assert result == {"x": expected}
 
 
 def test_to_gbq_should_fail_if_invalid_table_name_passed():
@@ -263,21 +261,6 @@ def test_read_gbq_with_inferred_project_id(monkeypatch):
     assert df is not None
 
 
-def test_that_parse_data_works_properly():
-    from google.cloud.bigquery.table import Row
-
-    test_schema = {
-        "fields": [{"mode": "NULLABLE", "name": "column_x", "type": "STRING"}]
-    }
-    field_to_index = {"column_x": 0}
-    values = ("row_value",)
-    test_page = [Row(values, field_to_index)]
-
-    test_output = gbq._parse_data(test_schema, test_page)
-    correct_output = DataFrame({"column_x": ["row_value"]})
-    tm.assert_frame_equal(test_output, correct_output)
-
-
 def test_read_gbq_with_invalid_private_key_json_should_fail():
     with pytest.raises(pandas_gbq.exceptions.InvalidPrivateKeyFormat):
         gbq.read_gbq(

From b9f931d7d106219bc8ac626c65723c3ca16d33cf Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 8 Feb 2019 14:08:25 -0800
Subject: [PATCH 03/19] Bump miniimum google-cloud-bigquery version for dtypes
 argument.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index e53d43f5..e5e40505 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@ def readme():
     "pydata-google-auth",
     "google-auth",
     "google-auth-oauthlib",
-    "google-cloud-bigquery>=0.32.0",
+    "google-cloud-bigquery>=1.9.0",
 ]
 
 extras = {"tqdm": "tqdm>=4.23.0"}

From f805dba186fc3b850bb49f028b4de8c796f11a7c Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 8 Feb 2019 16:51:39 -0800
Subject: [PATCH 04/19] Update tests to match dtypes from to_dataframe().

---
 tests/system/test_gbq.py | 76 +++++++++++++++++++++++++++++++---------
 1 file changed, 59 insertions(+), 17 deletions(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 765fb8e2..82835e56 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -6,11 +6,12 @@
 
 import google.oauth2.service_account
 import numpy as np
+import pandas
 import pandas.util.testing as tm
-import pytest
-import pytz
 from pandas import DataFrame, NaT, compat
 from pandas.compat import range, u
+import pytest
+import pytz
 
 from pandas_gbq import gbq
 
@@ -311,7 +312,8 @@ def test_should_properly_handle_timestamp_unix_epoch(self, project_id):
         tm.assert_frame_equal(
             df,
             DataFrame(
-                {"unix_epoch": [np.datetime64("1970-01-01T00:00:00.000000Z")]}
+                {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]},
+                dtype="datetime64[ns, UTC]",
             ),
         )
 
@@ -323,6 +325,38 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
             credentials=self.credentials,
             dialect="legacy",
         )
+        tm.assert_frame_equal(
+            df,
+            DataFrame(
+                {"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]},
+                dtype="datetime64[ns, UTC]",
+            ),
+        )
+
+    def test_should_properly_handle_datetime_unix_epoch(self, project_id):
+        query = 'SELECT DATETIME("1970-01-01 00:00:00") AS unix_epoch'
+        df = gbq.read_gbq(
+            query,
+            project_id=project_id,
+            credentials=self.credentials,
+            dialect="legacy",
+        )
+        tm.assert_frame_equal(
+            df,
+            DataFrame(
+                {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]},
+                dtype="datetime64[ns]",
+            ),
+        )
+
+    def test_should_properly_handle_arbitrary_datetime(self, project_id):
+        query = 'SELECT DATETIME("2004-09-15 05:00:00") AS valid_timestamp'
+        df = gbq.read_gbq(
+            query,
+            project_id=project_id,
+            credentials=self.credentials,
+            dialect="legacy",
+        )
         tm.assert_frame_equal(
             df,
             DataFrame(
@@ -338,7 +372,7 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
         "expression, type_",
         [
             ("current_date()", "<M8[ns]"),
-            ("current_timestamp()", "<M8[ns]"),
+            ("current_timestamp()", "datetime64[ns, UTC]"),
             ("current_datetime()", "<M8[ns]"),
             ("TRUE", bool),
             ("FALSE", bool),
@@ -370,7 +404,20 @@ def test_should_properly_handle_null_timestamp(self, project_id):
             credentials=self.credentials,
             dialect="legacy",
         )
-        tm.assert_frame_equal(df, DataFrame({"null_timestamp": [NaT]}))
+        tm.assert_frame_equal(
+            df,
+            DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns, UTC]"),
+        )
+
+    def test_should_properly_handle_null_datetime(self, project_id):
+        query = "SELECT CAST(NULL AS DATETIME) AS null_datetime"
+        df = gbq.read_gbq(
+            query,
+            project_id=project_id,
+            credentials=self.credentials,
+            dialect="standard",
+        )
+        tm.assert_frame_equal(df, DataFrame({"null_datetime": [NaT]}))
 
     def test_should_properly_handle_null_boolean(self, project_id):
         query = "SELECT BOOLEAN(NULL) AS null_boolean"
@@ -541,18 +588,13 @@ def test_zero_rows(self, project_id):
             credentials=self.credentials,
             dialect="legacy",
         )
-        page_array = np.zeros(
-            (0,),
-            dtype=[
-                ("title", object),
-                ("id", np.dtype(int)),
-                ("is_bot", np.dtype(bool)),
-                ("ts", "M8[ns]"),
-            ],
-        )
-        expected_result = DataFrame(
-            page_array, columns=["title", "id", "is_bot", "ts"]
-        )
+        empty_columns = {
+            "title": pandas.Series([], dtype=object),
+            "id": pandas.Series([], dtype=object),
+            "is_bot": pandas.Series([], dtype=object),
+            "ts": pandas.Series([], dtype="datetime64[ns, UTC]"),
+        }
+        expected_result = DataFrame(empty_columns)
         tm.assert_frame_equal(df, expected_result, check_index_type=False)
 
     def test_one_row_one_column(self, project_id):

From 275ea26f04d580f736ece4ca29017df3a21c8566 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 12 Feb 2019 09:53:25 -0800
Subject: [PATCH 05/19] Cast to correct dtype in empty dataframes.

---
 pandas_gbq/gbq.py        | 43 ++++++++++++++++++++++++++++++++++++----
 tests/system/test_gbq.py |  4 ++--
 tests/unit/test_gbq.py   |  2 +-
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index fef73ca6..75dc735c 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -482,8 +482,12 @@ def run_query(self, query, **kwargs):
             self.process_http_error(ex)
 
         schema_fields = [field.to_api_repr() for field in rows_iter.schema]
-        dtypes = _bqschema_to_dtypes(schema_fields)
-        df = rows_iter.to_dataframe(dtypes=dtypes)
+        nullsafe_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields)
+        df = rows_iter.to_dataframe(dtypes=nullsafe_dtypes)
+
+        if df.empty:
+            df = _cast_empty_df_dtypes(schema_fields, df)
+
         logger.debug("Got {} rows.\n".format(rows_iter.total_rows))
         return df
 
@@ -633,11 +637,11 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
         table.create(table_id, table_schema)
 
 
-def _bqschema_to_dtypes(schema_fields):
+def _bqschema_to_nullsafe_dtypes(schema_fields):
     # Only specify dtype when the dtype allows nulls. Otherwise, use pandas's
     # default dtype choice.
     #
-    # see:
+    # See:
     # http://pandas.pydata.org/pandas-docs/dev/missing_data.html
     # #missing-data-casting-rules-and-indexing
     dtype_map = {
@@ -661,6 +665,37 @@ def _bqschema_to_dtypes(schema_fields):
     return dtypes
 
 
+def _cast_empty_df_dtypes(schema_fields, df):
+    """Cast any columns in an empty dataframe to correct type.
+
+    In an empty dataframe, pandas cannot choose a dtype unless one is
+    explicitly provided. The _bqschema_to_nullsafe_dtypes() function only
+    provides dtypes when the dtype safely handles null values. This means
+    that empty int64 and boolean columns are incorrectly classified as
+    ``object``.
+    """
+    if not df.empty:
+        raise ValueError(
+            "DataFrame must be empty in order to cast non-nullsafe dtypes"
+        )
+
+    dtype_map = {
+        "BOOLEAN": bool,
+        "INTEGER": np.int64,
+    }
+
+    for field in schema_fields:
+        column = str(field["name"])
+        if field["mode"].upper() == "REPEATED":
+            continue
+
+        dtype = dtype_map.get(field["type"].upper())
+        if dtype:
+            df[column] = df[column].astype(dtype)
+
+    return df
+
+
 def read_gbq(
     query,
     project_id=None,
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 82835e56..de243e14 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -590,8 +590,8 @@ def test_zero_rows(self, project_id):
         )
         empty_columns = {
             "title": pandas.Series([], dtype=object),
-            "id": pandas.Series([], dtype=object),
-            "is_bot": pandas.Series([], dtype=object),
+            "id": pandas.Series([], dtype=np.dtype(int)),
+            "is_bot": pandas.Series([], dtype=np.dtype(bool)),
             "ts": pandas.Series([], dtype="datetime64[ns, UTC]"),
         }
         expected_result = DataFrame(empty_columns)
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 4427a07e..fd5fa493 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -75,7 +75,7 @@ def no_auth(monkeypatch):
     ],
 )
 def test_should_return_bigquery_correctly_typed(type_, expected):
-    result = gbq._bqschema_to_dtypes(
+    result = gbq._bqschema_to_nullsafe_dtypes(
         [dict(name="x", type=type_, mode="NULLABLE")]
     )
     if not expected:

From 90eb9fe097eb149d9168f752f3867aeadf06343b Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 12 Feb 2019 09:54:22 -0800
Subject: [PATCH 06/19] Blacken

---
 pandas_gbq/gbq.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 75dc735c..22e1b0b0 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -679,10 +679,7 @@ def _cast_empty_df_dtypes(schema_fields, df):
             "DataFrame must be empty in order to cast non-nullsafe dtypes"
         )
 
-    dtype_map = {
-        "BOOLEAN": bool,
-        "INTEGER": np.int64,
-    }
+    dtype_map = {"BOOLEAN": bool, "INTEGER": np.int64}
 
     for field in schema_fields:
         column = str(field["name"])

From 013b00fb7164e4a14640e66d3bea79c684384e37 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 12 Feb 2019 10:36:20 -0800
Subject: [PATCH 07/19] Blacken benchmark.

---
 benchmark/read_gbq_large_results.py | 1 -
 benchmark/read_gbq_small_results.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/benchmark/read_gbq_large_results.py b/benchmark/read_gbq_large_results.py
index 7dedbb13..98d9ff53 100644
--- a/benchmark/read_gbq_large_results.py
+++ b/benchmark/read_gbq_large_results.py
@@ -1,4 +1,3 @@
-
 import pandas_gbq
 
 # Select 163 MB worth of data, to time how long it takes to download large
diff --git a/benchmark/read_gbq_small_results.py b/benchmark/read_gbq_small_results.py
index 68621194..8e91b0a0 100644
--- a/benchmark/read_gbq_small_results.py
+++ b/benchmark/read_gbq_small_results.py
@@ -1,4 +1,3 @@
-
 import pandas_gbq
 
 # Select a few KB worth of data, to time downloading small result sets.

From 5a526c0699989a936ea4685ccc17462ed50124ed Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 12 Feb 2019 10:57:35 -0800
Subject: [PATCH 08/19] Remove timezone from datetime tests.

---
 tests/system/test_gbq.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index de243e14..1f887fa3 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -344,7 +344,7 @@ def test_should_properly_handle_datetime_unix_epoch(self, project_id):
         tm.assert_frame_equal(
             df,
             DataFrame(
-                {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]},
+                {"unix_epoch": ["1970-01-01T00:00:00"]},
                 dtype="datetime64[ns]",
             ),
         )
@@ -362,7 +362,7 @@ def test_should_properly_handle_arbitrary_datetime(self, project_id):
             DataFrame(
                 {
                     "valid_timestamp": [
-                        np.datetime64("2004-09-15T05:00:00.000000Z")
+                        np.datetime64("2004-09-15T05:00:00")
                     ]
                 }
             ),

From d8e3b99e6599e174b3305402d019f62179bc669c Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 12 Feb 2019 12:47:43 -0800
Subject: [PATCH 09/19] Blacken tests.

---
 tests/system/test_gbq.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 1f887fa3..976a2bda 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -344,8 +344,7 @@ def test_should_properly_handle_datetime_unix_epoch(self, project_id):
         tm.assert_frame_equal(
             df,
             DataFrame(
-                {"unix_epoch": ["1970-01-01T00:00:00"]},
-                dtype="datetime64[ns]",
+                {"unix_epoch": ["1970-01-01T00:00:00"]}, dtype="datetime64[ns]"
             ),
         )
 
@@ -360,11 +359,7 @@ def test_should_properly_handle_arbitrary_datetime(self, project_id):
         tm.assert_frame_equal(
             df,
             DataFrame(
-                {
-                    "valid_timestamp": [
-                        np.datetime64("2004-09-15T05:00:00")
-                    ]
-                }
+                {"valid_timestamp": [np.datetime64("2004-09-15T05:00:00")]}
             ),
         )
 

From 1e4009a2dce3534150d6336dbaa87138e120dfda Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 12 Feb 2019 13:32:27 -0800
Subject: [PATCH 10/19] Update docs for minimum google-cloud-bigquery version.

---
 ci/requirements-3.6-0.20.1.conda |  2 +-
 docs/source/changelog.rst        | 18 ++++++++++++++++++
 pandas_gbq/gbq.py                |  2 +-
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/ci/requirements-3.6-0.20.1.conda b/ci/requirements-3.6-0.20.1.conda
index a057399d..2f8d0d83 100644
--- a/ci/requirements-3.6-0.20.1.conda
+++ b/ci/requirements-3.6-0.20.1.conda
@@ -1,6 +1,6 @@
 google-auth
 google-auth-oauthlib
-google-cloud-bigquery==0.32.0
+google-cloud-bigquery==1.9.0
 pytest
 pytest-cov
 codecov
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 6f3aa5cd..60861dea 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -1,6 +1,24 @@
 Changelog
 =========
 
+.. _changelog-0.10.0:
+
+0.10.0 / TBD
+------------
+
+Dependency updates
+~~~~~~~~~~~~~~~~~~
+
+- Update the minimum version of ``google-cloud-bigquery`` to 1.9.0.
+  (:issue:`247`)
+
+Internal changes
+~~~~~~~~~~~~~~~~
+
+- Use ``to_dataframe()`` from ``google-cloud-bigquery`` in the ``read_gbq()``
+  function. (:issue:`247`)
+
+
 .. _changelog-0.9.0:
 
 0.9.0 / 2019-01-11
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 22e1b0b0..e99eda2c 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -35,7 +35,7 @@ def _check_google_client_version():
         raise ImportError("Could not import pkg_resources (setuptools).")
 
     # https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/bigquery/CHANGELOG.md
-    bigquery_minimum_version = pkg_resources.parse_version("0.32.0")
+    bigquery_minimum_version = pkg_resources.parse_version("1.9.0")
     BIGQUERY_INSTALLED_VERSION = pkg_resources.get_distribution(
         "google-cloud-bigquery"
     ).parsed_version

From 827a0650a141df7cc8ddec37ae8f6c5b6467c567 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 15 Feb 2019 10:17:59 -0800
Subject: [PATCH 11/19] Update version number in unit tests.

---
 tests/unit/test_gbq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index fd5fa493..c868d853 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -23,7 +23,7 @@
 def min_bq_version():
     import pkg_resources
 
-    return pkg_resources.parse_version("0.32.0")
+    return pkg_resources.parse_version("1.9.0")
 
 
 def mock_none_credentials(*args, **kwargs):

From 159bda0d02c3035dfd06bbc92f9d22d3b4317515 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 15 Feb 2019 15:40:02 -0800
Subject: [PATCH 12/19] Update dependencies for tests.

---
 ci/requirements-2.7.pip | 2 +-
 ci/requirements-3.5.pip | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip
index 48ceb439..10300f12 100644
--- a/ci/requirements-2.7.pip
+++ b/ci/requirements-2.7.pip
@@ -2,5 +2,5 @@ mock
 pandas==0.17.1
 google-auth==1.4.1
 google-auth-oauthlib==0.0.1
-google-cloud-bigquery==0.32.0
+google-cloud-bigquery==1.9.0
 pydata-google-auth==0.1.2
diff --git a/ci/requirements-3.5.pip b/ci/requirements-3.5.pip
index 980d0700..41a41891 100644
--- a/ci/requirements-3.5.pip
+++ b/ci/requirements-3.5.pip
@@ -1,5 +1,5 @@
 pandas==0.19.0
 google-auth==1.4.1
 google-auth-oauthlib==0.0.1
-google-cloud-bigquery==0.32.0
+google-cloud-bigquery==1.9.0
 pydata-google-auth==0.1.2
\ No newline at end of file

From 0ecce9bde9157fdd9c141e23131c27596eebbfdd Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 15 Feb 2019 16:50:03 -0800
Subject: [PATCH 13/19] Fix lint error.

---
 tests/unit/test_gbq.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index c868d853..a1317887 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -4,7 +4,6 @@
 import pytest
 import numpy
 from pandas import DataFrame
-from pandas.compat.numpy import np_datetime64_compat
 
 import pandas_gbq.exceptions
 from pandas_gbq import gbq

From b80cf5c769b3c621a9c68e03a7331d6189bf5864 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 15 Feb 2019 16:50:29 -0800
Subject: [PATCH 14/19] Specify column order on empty DataFrame.

---
 tests/system/test_gbq.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 976a2bda..8781fac9 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -589,7 +589,9 @@ def test_zero_rows(self, project_id):
             "is_bot": pandas.Series([], dtype=np.dtype(bool)),
             "ts": pandas.Series([], dtype="datetime64[ns, UTC]"),
         }
-        expected_result = DataFrame(empty_columns)
+        expected_result = DataFrame(
+            empty_columns, columns=["title", "id", "is_bot", "ts"]
+        )
         tm.assert_frame_equal(df, expected_result, check_index_type=False)
 
     def test_one_row_one_column(self, project_id):

From 20dd01a3e16b0279e6d829adacf6d4cd8adee9cd Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 15 Feb 2019 16:54:35 -0800
Subject: [PATCH 15/19] Don't wipe out conda dependencies.

---
 ci/run_conda.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/run_conda.sh b/ci/run_conda.sh
index 59769328..60ae6ff0 100755
--- a/ci/run_conda.sh
+++ b/ci/run_conda.sh
@@ -21,7 +21,7 @@ fi
 
 REQ="ci/requirements-${PYTHON}-${PANDAS}"
 conda install -q --file "$REQ.conda";
-python setup.py develop
+python setup.py develop --no-deps
 
 # Run the tests
 $DIR/run_tests.sh

From 59a93282efa1f81abbd37890bed40d328805eff5 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 15 Feb 2019 17:19:05 -0800
Subject: [PATCH 16/19] Add pydata-google-auth to conda deps.

---
 ci/requirements-3.6-0.20.1.conda | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ci/requirements-3.6-0.20.1.conda b/ci/requirements-3.6-0.20.1.conda
index 2f8d0d83..1c7eb3f2 100644
--- a/ci/requirements-3.6-0.20.1.conda
+++ b/ci/requirements-3.6-0.20.1.conda
@@ -1,5 +1,4 @@
-google-auth
-google-auth-oauthlib
+pydata-google-auth
 google-cloud-bigquery==1.9.0
 pytest
 pytest-cov

From d7c1ca5fd675cb44b4b905e302d6d4b1875e0eba Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 19 Feb 2019 09:54:23 -0800
Subject: [PATCH 17/19] Document change in behavior of TIMEZONE columns.

---
 docs/source/changelog.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 60861dea..98c913b9 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -6,6 +6,9 @@ Changelog
 0.10.0 / TBD
 ------------
 
+- ``read_gbq()`` converts BigQuery ``TIMESTAMP`` columns to the
+  timezone-aware ``datetime64`` ``dtype``. (:issue:`247`)
+
 Dependency updates
 ~~~~~~~~~~~~~~~~~~
 

From 5b1bd0a78330f8dcd25df6f33db8554a65ea32b7 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 22 Feb 2019 11:27:39 -0800
Subject: [PATCH 18/19] Use timezone-naive datetime64 dtype for TIMESTAMP

---
 docs/source/changelog.rst |  3 ---
 pandas_gbq/gbq.py         |  5 ++++-
 tests/system/test_gbq.py  | 10 +++++-----
 tests/unit/test_gbq.py    |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 98c913b9..60861dea 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -6,9 +6,6 @@ Changelog
 0.10.0 / TBD
 ------------
 
-- ``read_gbq()`` converts BigQuery ``TIMESTAMP`` columns to the
-  timezone-aware ``datetime64`` ``dtype``. (:issue:`247`)
-
 Dependency updates
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index e99eda2c..13d65669 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -646,7 +646,10 @@ def _bqschema_to_nullsafe_dtypes(schema_fields):
     # #missing-data-casting-rules-and-indexing
     dtype_map = {
         "FLOAT": np.dtype(float),
-        "TIMESTAMP": "datetime64[ns, UTC]",
+        # Even though TIMESTAMPs are timezone-aware in BigQuery, pandas doesn't
+        # support datetime64[ns, UTC] as dtype in DataFrame constructors. See:
+        # https://github.com/pandas-dev/pandas/issues/12513
+        "TIMESTAMP": "datetime64[ns]",
         "TIME": "datetime64[ns]",
         "DATE": "datetime64[ns]",
         "DATETIME": "datetime64[ns]",
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 8781fac9..fa068642 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -313,7 +313,7 @@ def test_should_properly_handle_timestamp_unix_epoch(self, project_id):
             df,
             DataFrame(
                 {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]},
-                dtype="datetime64[ns, UTC]",
+                dtype="datetime64[ns]",
             ),
         )
 
@@ -329,7 +329,7 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
             df,
             DataFrame(
                 {"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]},
-                dtype="datetime64[ns, UTC]",
+                dtype="datetime64[ns]",
             ),
         )
 
@@ -367,7 +367,7 @@ def test_should_properly_handle_arbitrary_datetime(self, project_id):
         "expression, type_",
         [
             ("current_date()", "<M8[ns]"),
-            ("current_timestamp()", "datetime64[ns, UTC]"),
+            ("current_timestamp()", "datetime64[ns]"),
             ("current_datetime()", "<M8[ns]"),
             ("TRUE", bool),
             ("FALSE", bool),
@@ -401,7 +401,7 @@ def test_should_properly_handle_null_timestamp(self, project_id):
         )
         tm.assert_frame_equal(
             df,
-            DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns, UTC]"),
+            DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns]"),
         )
 
     def test_should_properly_handle_null_datetime(self, project_id):
@@ -587,7 +587,7 @@ def test_zero_rows(self, project_id):
             "title": pandas.Series([], dtype=object),
             "id": pandas.Series([], dtype=np.dtype(int)),
             "is_bot": pandas.Series([], dtype=np.dtype(bool)),
-            "ts": pandas.Series([], dtype="datetime64[ns, UTC]"),
+            "ts": pandas.Series([], dtype="datetime64[ns]"),
         }
         expected_result = DataFrame(
             empty_columns, columns=["title", "id", "is_bot", "ts"]
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index a1317887..f4bf8e16 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -69,7 +69,7 @@ def no_auth(monkeypatch):
         ("INTEGER", None),  # Can't handle NULL
         ("BOOLEAN", None),  # Can't handle NULL
         ("FLOAT", numpy.dtype(float)),
-        ("TIMESTAMP", "datetime64[ns, UTC]"),
+        ("TIMESTAMP", "datetime64[ns]"),
         ("DATETIME", "datetime64[ns]"),
     ],
 )

From 094058bee106862ffd78d340f998789d3fe675a2 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 22 Feb 2019 11:32:59 -0800
Subject: [PATCH 19/19] Blacken

---
 tests/system/test_gbq.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index fa068642..82753a38 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -400,8 +400,7 @@ def test_should_properly_handle_null_timestamp(self, project_id):
             dialect="legacy",
         )
         tm.assert_frame_equal(
-            df,
-            DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns]"),
+            df, DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns]")
         )
 
     def test_should_properly_handle_null_datetime(self, project_id):