Skip to content

ENH: Use tz-aware dtype for timestamp columns #269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 3, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Don't localize TIMESTAMP columns if they are already tz-aware.
  • Loading branch information
tswast committed Apr 3, 2019
commit 65d31ebccffa4970d57b1c2f4121e6148ce68800
29 changes: 8 additions & 21 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
BIGQUERY_INSTALLED_VERSION = None
SHOW_VERBOSE_DEPRECATION = False
SHOW_PRIVATE_KEY_DEPRECATION = False
USE_TZAWARE_TIMESTAMP = False
PRIVATE_KEY_DEPRECATION_MESSAGE = (
"private_key is deprecated and will be removed in a future version."
"Use the credentials argument instead. See "
Expand All @@ -27,7 +26,7 @@


def _check_google_client_version():
global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION, USE_TZAWARE_TIMESTAMP
global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION

try:
import pkg_resources
Expand Down Expand Up @@ -62,12 +61,6 @@ def _check_google_client_version():
SHOW_PRIVATE_KEY_DEPRECATION = (
pandas_installed_version >= pandas_version_with_credentials_arg
)
pandas_version_supporting_tzaware_dtype = pkg_resources.parse_version(
"0.24.0"
)
USE_TZAWARE_TIMESTAMP = (
pandas_installed_version >= pandas_version_supporting_tzaware_dtype
)


def _test_google_api_imports():
Expand Down Expand Up @@ -501,8 +494,8 @@ def run_query(self, query, **kwargs):
if df.empty:
df = _cast_empty_df_dtypes(schema_fields, df)

if not USE_TZAWARE_TIMESTAMP:
df = _localize_df(schema_fields, df)
# Ensure any TIMESTAMP columns are tz-aware.
df = _localize_df(schema_fields, df)

logger.debug("Got {} rows.\n".format(rows_iter.total_rows))
return df
Expand Down Expand Up @@ -662,20 +655,14 @@ def _bqschema_to_nullsafe_dtypes(schema_fields):
See: http://pandas.pydata.org/pandas-docs/dev/missing_data.html
#missing-data-casting-rules-and-indexing
"""
import pandas.api.types

# pandas doesn't support timezone-aware dtype in DataFrame/Series
# constructors until 0.24.0. See:
# https://github.com/pandas-dev/pandas/issues/25843#issuecomment-479656947
timestamp_dtype = "datetime64[ns]"
if USE_TZAWARE_TIMESTAMP:
timestamp_dtype = pandas.api.types.DatetimeTZDtype(unit="ns", tz="UTC")

# If you update this mapping, also update the table at
# `docs/source/reading.rst`.
dtype_map = {
"FLOAT": np.dtype(float),
"TIMESTAMP": timestamp_dtype,
# pandas doesn't support timezone-aware dtype in DataFrame/Series
# constructors. It's more idiomatic to localize after construction.
# https://github.com/pandas-dev/pandas/issues/25843
"TIMESTAMP": "datetime64[ns]",
"TIME": "datetime64[ns]",
"DATE": "datetime64[ns]",
"DATETIME": "datetime64[ns]",
Expand Down Expand Up @@ -734,7 +721,7 @@ def _localize_df(schema_fields, df):
if field["mode"].upper() == "REPEATED":
continue

if field["type"].upper() == "TIMESTAMP":
if field["type"].upper() == "TIMESTAMP" and df[column].dt.tz is None:
df[column] = df[column].dt.tz_localize("UTC")

return df
Expand Down
7 changes: 4 additions & 3 deletions tests/system/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,10 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
{"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]},
dtype="datetime64[ns]",
)
expected["valid_timestamp"] = expected[
"valid_timestamp"
].dt.tz_localize("UTC")
if expected["valid_timestamp"].dt.tz is None:
expected["valid_timestamp"] = expected[
"valid_timestamp"
].dt.tz_localize("UTC")
tm.assert_frame_equal(df, expected)

def test_should_properly_handle_datetime_unix_epoch(self, project_id):
Expand Down