googleapis · TrevorBergeron · Mar 17, 2025 · Sep 5, 2024 · Sep 5, 2024 · Feb 3, 2025
@@ -2573,7 +2573,7 @@ def _get_rows_as_json_values(self) -> Block:
         # The only ways this code is used is through df.apply(axis=1) cope path
         # TODO: Stop using internal API
         destination, query_job = self.session._loader._query_to_destination(
-            json_sql, index_cols=[ordering_column_name], api_name="apply"
+            json_sql, cluster_candidates=[ordering_column_name], api_name="apply"
         )
         if not destination:
             raise ValueError(f"Query job {query_job} did not produce result table")

@@ -603,9 +603,10 @@ def read_gbq_query(
                 time_travel_timestamp=None,
             )
 
+        # No cluster candidates as user query might not be clusterable (eg because of ORDER BY clause)
         destination, query_job = self._query_to_destination(
             query,
-            index_cols,
+            cluster_candidates=[],
             api_name=api_name,
             configuration=configuration,
         )
@@ -642,7 +643,7 @@ def read_gbq_query(
     def _query_to_destination(
         self,
         query: str,
-        index_cols: List[str],
+        cluster_candidates: List[str],
         api_name: str,
         configuration: dict = {"query": {"useQueryCache": True}},
         do_clustering=True,
@@ -665,7 +666,7 @@ def _query_to_destination(
         assert schema is not None
         if do_clustering:
             cluster_cols = bf_io_bigquery.select_cluster_cols(
-                schema, cluster_candidates=index_cols
+                schema, cluster_candidates=cluster_candidates
             )
         else:
             cluster_cols = []

@@ -129,9 +129,10 @@ def test_read_gbq_w_unknown_index_col(
                 CONCAT(t.string_col, "_2") AS my_strings,
                 t.int64_col > 0 AS my_bools,
             FROM `{scalars_table_id}` AS t
+            ORDER BY my_strings
             """,
             ["my_strings"],
-            id="string_index",
+            id="string_index_w_order_by",
         ),
         pytest.param(
             "SELECT GENERATE_UUID() AS uuid, 0 AS my_value FROM UNNEST(GENERATE_ARRAY(1, 20))",