Skip to content

fix: avoid unnecessary row_number() on sort key for io #211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions bigframes/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,18 @@ def to_sql(
col_id_overrides: typing.Mapping[str, str] = {},
sorted: bool = False,
) -> str:
if sorted or offset_column:
return self._compile_ordered().to_sql(
offset_column=offset_column,
array_value = self
if offset_column:
array_value = self.promote_offsets(offset_column)
if sorted:
return array_value._compile_ordered().to_sql(
col_id_overrides=col_id_overrides,
sorted=sorted,
)
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: redundant else

return self._compile_unordered().to_sql(col_id_overrides=col_id_overrides)
return array_value._compile_unordered().to_sql(
col_id_overrides=col_id_overrides
)

def start_query(
self,
Expand Down
29 changes: 20 additions & 9 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,31 +1031,42 @@ def _reproject_to_table(self) -> OrderedIR:

def to_sql(
self,
offset_column: typing.Optional[str] = None,
col_id_overrides: typing.Mapping[str, str] = {},
sorted: bool = False,
) -> str:
offsets_id = offset_column or ORDER_ID_COLUMN

sql = ibis_bigquery.Backend().compile(
self._to_ibis_expr(
ordering_mode="offset_col"
if (offset_column or sorted)
else "unordered",
order_col_name=offsets_id,
ordering_mode="unordered",
col_id_overrides=col_id_overrides,
expose_hidden_cols=sorted,
)
)
if sorted:
output_columns = [
col_id_overrides.get(col) if (col in col_id_overrides) else col
for col in self.column_ids
]
selection = ", ".join(map(lambda col_id: f"`{col_id}`", output_columns))
order_by_clause = self._ordering_clause(self._ordering.all_ordering_columns)

sql = textwrap.dedent(
f"SELECT * EXCEPT (`{offsets_id}`)\n"
f"SELECT {selection}\n"
"FROM (\n"
f"{sql}\n"
")\n"
f"ORDER BY `{offsets_id}`\n"
f"{order_by_clause}\n"
)
return typing.cast(str, sql)

def _ordering_clause(self, ordering: Iterable[OrderingColumnReference]) -> str:
parts = []
for col_ref in ordering:
asc_desc = "ASC" if col_ref.direction.is_ascending else "DESC"
null_clause = "NULLS LAST" if col_ref.na_last else "NULLS FIRST"
part = f"`{col_ref.column_id}` {asc_desc} {null_clause}"
parts.append(part)
return f"ORDER BY {' ,'.join(parts)}"

def _to_ibis_expr(
self,
*,
Expand Down
12 changes: 4 additions & 8 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2577,14 +2577,10 @@ def _create_io_query(self, index: bool, ordering_id: Optional[str]) -> str:
}

if ordering_id is not None:
return array_value.to_sql(
offset_column=ordering_id,
col_id_overrides=id_overrides,
)
else:
return array_value.to_sql(
col_id_overrides=id_overrides,
)
array_value = array_value.promote_offsets(ordering_id)
return array_value.to_sql(
col_id_overrides=id_overrides,
)

def _run_io_query(
self,
Expand Down