Skip to content

fix: Fix read_gbq with ORDER BY query and index_col set #963

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2573,7 +2573,7 @@ def _get_rows_as_json_values(self) -> Block:
# The only ways this code is used is through df.apply(axis=1) cope path
# TODO: Stop using internal API
destination, query_job = self.session._loader._query_to_destination(
json_sql, index_cols=[ordering_column_name], api_name="apply"
json_sql, cluster_candidates=[ordering_column_name], api_name="apply"
)
if not destination:
raise ValueError(f"Query job {query_job} did not produce result table")
Expand Down
7 changes: 4 additions & 3 deletions bigframes/session/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,9 +603,10 @@ def read_gbq_query(
time_travel_timestamp=None,
)

# No cluster candidates as user query might not be clusterable (eg because of ORDER BY clause)
destination, query_job = self._query_to_destination(
query,
index_cols,
cluster_candidates=[],
api_name=api_name,
configuration=configuration,
)
Expand Down Expand Up @@ -642,7 +643,7 @@ def read_gbq_query(
def _query_to_destination(
self,
query: str,
index_cols: List[str],
cluster_candidates: List[str],
api_name: str,
configuration: dict = {"query": {"useQueryCache": True}},
do_clustering=True,
Expand All @@ -665,7 +666,7 @@ def _query_to_destination(
assert schema is not None
if do_clustering:
cluster_cols = bf_io_bigquery.select_cluster_cols(
schema, cluster_candidates=index_cols
schema, cluster_candidates=cluster_candidates
)
else:
cluster_cols = []
Expand Down
3 changes: 2 additions & 1 deletion tests/system/small/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,10 @@ def test_read_gbq_w_unknown_index_col(
CONCAT(t.string_col, "_2") AS my_strings,
t.int64_col > 0 AS my_bools,
FROM `{scalars_table_id}` AS t
ORDER BY my_strings
""",
["my_strings"],
id="string_index",
id="string_index_w_order_by",
),
pytest.param(
"SELECT GENERATE_UUID() AS uuid, 0 AS my_value FROM UNNEST(GENERATE_ARRAY(1, 20))",
Expand Down