Skip to content

fix: table schema change error #692

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,12 +1205,15 @@ def to_gbq(
)
table_connector.create(table_id, table_schema)
else:
# Convert original schema (the schema that already exists) to pandas-gbq API format
original_schema = pandas_gbq.schema.to_pandas_gbq(table.schema)

# Update the local `table_schema` so mode (NULLABLE/REQUIRED)
# matches. See: https://github.com/pydata/pandas-gbq/issues/315
table_schema = pandas_gbq.schema.update_schema(table_schema, original_schema)
if if_exists == "append":
# Convert original schema (the schema that already exists) to pandas-gbq API format
original_schema = pandas_gbq.schema.to_pandas_gbq(table.schema)

# Update the local `table_schema` so mode (NULLABLE/REQUIRED)
# matches. See: https://github.com/pydata/pandas-gbq/issues/315
table_schema = pandas_gbq.schema.update_schema(
table_schema, original_schema
)

if dataframe.empty:
# Create the table (if needed), but don't try to run a load job with an
Expand Down
14 changes: 8 additions & 6 deletions tests/system/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@ def test_upload_data_if_table_exists_replace(self, project_id):
test_size = 10
df = make_mixed_dataframe_v2(test_size)
df_different_schema = make_mixed_dataframe_v1()
schema_new = gbq.generate_bq_schema(df_different_schema)

# Initialize table with sample data
gbq.to_gbq(
Expand All @@ -798,7 +799,7 @@ def test_upload_data_if_table_exists_replace(self, project_id):
credentials=self.credentials,
)

# Test the if_exists parameter with the value 'replace'.
# When if_exists == 'replace', table schema should change too.
gbq.to_gbq(
df_different_schema,
self.destination_table + test_id,
Expand All @@ -807,15 +808,16 @@ def test_upload_data_if_table_exists_replace(self, project_id):
credentials=self.credentials,
)

result = gbq.read_gbq(
"SELECT COUNT(*) AS num_rows FROM {0}".format(
self.destination_table + test_id
),
df_new = gbq.read_gbq(
"SELECT * FROM {0}".format(self.destination_table + test_id),
project_id=project_id,
credentials=self.credentials,
dialect="legacy",
)
assert result["num_rows"][0] == 5

schema_returned = gbq.generate_bq_schema(df_new)
assert schema_new == schema_returned
assert df_new.shape[0] == 5

def test_upload_data_if_table_exists_raises_value_error(self, project_id):
test_id = "4"
Expand Down
53 changes: 53 additions & 0 deletions tests/unit/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,59 @@ def test_load_does_not_modify_schema_arg(mock_bigquery_client):
assert original_schema == original_schema_cp


def test_load_modifies_schema(mock_bigquery_client):
"""Test of https://github.com/googleapis/python-bigquery-pandas/issues/670"""
from google.api_core.exceptions import NotFound

# Create table with new schema.
mock_bigquery_client.get_table.side_effect = NotFound("nope")
df = DataFrame(
{
"field1": ["a", "b"],
"field2": [1, 2],
"field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)],
}
)
original_schema = [
{"name": "field1", "type": "STRING", "mode": "REQUIRED"},
{"name": "field2", "type": "INTEGER"},
{"name": "field3", "type": "DATE"},
]
original_schema_cp = copy.deepcopy(original_schema)
gbq.to_gbq(
df,
"dataset.schematest",
project_id="my-project",
table_schema=original_schema,
if_exists="fail",
)
assert original_schema == original_schema_cp

# Test that when if_exists == "replace", the new table schema updates
# according to the local schema.
new_df = DataFrame(
{
"field1": ["a", "b"],
"field2": ["c", "d"],
"field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)],
}
)
new_schema = [
{"name": "field1", "type": "STRING", "mode": "REQUIRED"},
{"name": "field2", "type": "STRING"},
{"name": "field3", "type": "DATE"},
]
new_schema_cp = copy.deepcopy(new_schema)
gbq.to_gbq(
new_df,
"dataset.schematest",
project_id="my-project",
table_schema=new_schema,
if_exists="replace",
)
assert new_schema == new_schema_cp


def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_credentials):
mock_service_account_credentials.project_id = "service_account_project_id"
df = gbq.read_gbq(
Expand Down