Skip to content

fix: update default temp table expiration to 7 days #174

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bigframes/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@

ABSTRACT_METHOD_ERROR_MESSAGE = f"Abstract method. You have likely encountered a bug. Please share this stacktrace and how you reached it with the BigQuery DataFrames team. {FEEDBACK_LINK}"

DEFAULT_EXPIRATION = datetime.timedelta(days=1)
DEFAULT_EXPIRATION = datetime.timedelta(days=7)
4 changes: 3 additions & 1 deletion bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from __future__ import annotations

import datetime
import re
import textwrap
import typing
Expand Down Expand Up @@ -2327,7 +2328,8 @@ def to_gbq(
self._session.bqclient,
self._session._anonymous_dataset,
# TODO(swast): allow custom expiration times, probably via session configuration.
constants.DEFAULT_EXPIRATION,
datetime.datetime.now(datetime.timezone.utc)
+ constants.DEFAULT_EXPIRATION,
)

if if_exists is not None and if_exists != "replace":
Expand Down
4 changes: 3 additions & 1 deletion bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,9 @@ def _read_gbq_query(
index_cols = list(index_col)

destination, query_job = self._query_to_destination(
query, index_cols, api_name="read_gbq_query"
query,
index_cols,
api_name=api_name,
)

# If there was no destination table, that means the query must have
Expand Down
36 changes: 28 additions & 8 deletions bigframes/session/_io/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

"""Private module: Helpers for I/O operations."""

from __future__ import annotations

import datetime
import textwrap
import types
Expand Down Expand Up @@ -69,6 +71,29 @@ def create_export_data_statement(
)


def random_table(dataset: bigquery.DatasetReference) -> bigquery.TableReference:
"""Generate a random table ID with BigQuery DataFrames prefix.
Args:
dataset (google.cloud.bigquery.DatasetReference):
The dataset to make the table reference in. Usually the anonymous
dataset for the session.
Returns:
google.cloud.bigquery.TableReference:
Fully qualified table ID of a table that doesn't exist.
"""
now = datetime.datetime.now(datetime.timezone.utc)
random_id = uuid.uuid4().hex
table_id = TEMP_TABLE_PREFIX.format(
date=now.strftime("%Y%m%d"), random_id=random_id
)
return dataset.table(table_id)


def table_ref_to_sql(table: bigquery.TableReference) -> str:
"""Format a table reference as escaped SQL."""
return f"`{table.project}`.`{table.dataset_id}`.`{table.table_id}`"


def create_snapshot_sql(
table_ref: bigquery.TableReference, current_timestamp: datetime.datetime
) -> str:
Expand All @@ -95,17 +120,12 @@ def create_snapshot_sql(
def create_temp_table(
bqclient: bigquery.Client,
dataset: bigquery.DatasetReference,
expiration: datetime.timedelta,
expiration: datetime.datetime,
) -> str:
"""Create an empty table with an expiration in the desired dataset."""
now = datetime.datetime.now(datetime.timezone.utc)
random_id = uuid.uuid4().hex
table_id = TEMP_TABLE_PREFIX.format(
date=now.strftime("%Y%m%d"), random_id=random_id
)
table_ref = dataset.table(table_id)
table_ref = random_table(dataset)
destination = bigquery.Table(table_ref)
destination.expires = now + expiration
destination.expires = expiration
bqclient.create_table(destination)
return f"{table_ref.project}.{table_ref.dataset_id}.{table_ref.table_id}"

Expand Down
10 changes: 5 additions & 5 deletions tests/unit/session/test_io_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ def test_create_temp_table_default_expiration():
"""Make sure the created table has an expiration."""
bqclient = mock.create_autospec(bigquery.Client)
dataset = bigquery.DatasetReference("test-project", "test_dataset")
now = datetime.datetime.now(datetime.timezone.utc)
expiration = datetime.timedelta(days=3)
expected_expires = now + expiration
expiration = datetime.datetime(
2023, 11, 2, 13, 44, 55, 678901, datetime.timezone.utc
)

bigframes.session._io.bigquery.create_temp_table(bqclient, dataset, expiration)

Expand All @@ -69,9 +69,9 @@ def test_create_temp_table_default_expiration():
assert table.dataset_id == "test_dataset"
assert table.table_id.startswith("bqdf")
assert (
(expected_expires - datetime.timedelta(minutes=1))
(expiration - datetime.timedelta(minutes=1))
< table.expires
< (expected_expires + datetime.timedelta(minutes=1))
< (expiration + datetime.timedelta(minutes=1))
)


Expand Down