googleapis
diff --git a/‎CHANGELOG.md‎
Lines changed: 25 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎bigframes/_config/compute_options.py‎
Lines changed: 39 additions & 30 deletions b/‎bigframes/_config/compute_options.py‎
Lines changed: 39 additions & 30 deletions
diff --git a/‎bigframes/core/compile/compiler.py‎
Lines changed: 1 addition & 0 deletions b/‎bigframes/core/compile/compiler.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bigframes/core/compile/googlesql/query.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/compile/googlesql/query.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 13 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/scalar_compiler.py‎
Lines changed: 7 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/scalar_compiler.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/sqlglot_ir.py‎
Lines changed: 10 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/sqlglot_ir.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎bigframes/core/nodes.py‎
Lines changed: 9 additions & 1 deletion b/‎bigframes/core/nodes.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎bigframes/core/rewrite/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎bigframes/core/rewrite/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎bigframes/core/rewrite/select_pullup.py‎
Lines changed: 144 additions & 0 deletions b/‎bigframes/core/rewrite/select_pullup.py‎
Lines changed: 144 additions & 0 deletions
@@ -4,6 +4,31 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.8.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.7.0...v2.8.0) (2025-06-23)
+
+
+### ⚠ BREAKING CHANGES
+
+* add required param 'engine' to multimodal functions ([#1834](https://github.com/googleapis/python-bigquery-dataframes/issues/1834))
+
+### Features
+
+* Add `bpd.options.compute.maximum_result_rows` option to limit client data download ([#1829](https://github.com/googleapis/python-bigquery-dataframes/issues/1829)) ([e22a3f6](https://github.com/googleapis/python-bigquery-dataframes/commit/e22a3f61a02cc1b7a5155556e5a07a1a2fea1d82))
+* Add `bpd.options.display.repr_mode = "anywidget"` to create an interactive display of the results ([#1820](https://github.com/googleapis/python-bigquery-dataframes/issues/1820)) ([be0a3cf](https://github.com/googleapis/python-bigquery-dataframes/commit/be0a3cf7711dadc68d8366ea90b99855773e2a2e))
+* Add DataFrame.ai.forecast() support ([#1828](https://github.com/googleapis/python-bigquery-dataframes/issues/1828)) ([7bc7f36](https://github.com/googleapis/python-bigquery-dataframes/commit/7bc7f36fc20d233f4cf5ed688cc5dcaf100ce4fb))
+* Add describe() method to Series ([#1827](https://github.com/googleapis/python-bigquery-dataframes/issues/1827)) ([a4205f8](https://github.com/googleapis/python-bigquery-dataframes/commit/a4205f882012820c034cb15d73b2768ec4ad3ac8))
+* Add required param 'engine' to multimodal functions ([#1834](https://github.com/googleapis/python-bigquery-dataframes/issues/1834)) ([37666e4](https://github.com/googleapis/python-bigquery-dataframes/commit/37666e4c137d52c28ab13477dfbcc6e92b913334))
+
+
+### Performance Improvements
+
+* Produce simpler sql ([#1836](https://github.com/googleapis/python-bigquery-dataframes/issues/1836)) ([cf9c22a](https://github.com/googleapis/python-bigquery-dataframes/commit/cf9c22a09c4e668a598fa1dad0f6a07b59bc6524))
+
+
+### Documentation
+
+* Add ai.forecast notebook ([#1840](https://github.com/googleapis/python-bigquery-dataframes/issues/1840)) ([2430497](https://github.com/googleapis/python-bigquery-dataframes/commit/24304972fdbdfd12c25c7f4ef5a7b280f334801a))
+
 ## [2.7.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.6.0...v2.7.0) (2025-06-16)
 
 
 
@@ -55,29 +55,7 @@ class ComputeOptions:
         {'test2': 'abc', 'test3': False}
 
     Attributes:
-        maximum_bytes_billed (int, Options):
-            Limits the bytes billed for query jobs. Queries that will have
-            bytes billed beyond this limit will fail (without incurring a
-            charge). If unspecified, this will be set to your project default.
-            See `maximum_bytes_billed`: https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed.
-
-        enable_multi_query_execution (bool, Options):
-            If enabled, large queries may be factored into multiple smaller queries
-            in order to avoid generating queries that are too complex for the query
-            engine to handle. However this comes at the cost of increase cost and latency.
-
-        extra_query_labels (Dict[str, Any], Options):
-            Stores additional custom labels for query configuration.
-
-        semantic_ops_confirmation_threshold (int, optional):
-            .. deprecated:: 1.42.0
-                Semantic operators are deprecated. Please use AI operators instead
-
-        semantic_ops_threshold_autofail (bool):
-            .. deprecated:: 1.42.0
-                Semantic operators are deprecated. Please use AI operators instead
-
-        ai_ops_confirmation_threshold (int, optional):
+        ai_ops_confirmation_threshold (int | None):
             Guards against unexpected processing of large amount of rows by semantic operators.
             If the number of rows exceeds the threshold, the user will be asked to confirm
             their operations to resume. The default value is 0. Set the value to None
@@ -87,26 +65,57 @@ class ComputeOptions:
             Guards against unexpected processing of large amount of rows by semantic operators.
             When set to True, the operation automatically fails without asking for user inputs.
 
-        allow_large_results (bool):
+        allow_large_results (bool | None):
             Specifies whether query results can exceed 10 GB. Defaults to False. Setting this
             to False (the default) restricts results to 10 GB for potentially faster execution;
             BigQuery will raise an error if this limit is exceeded. Setting to True removes
             this result size limit.
+
+        enable_multi_query_execution (bool | None):
+            If enabled, large queries may be factored into multiple smaller queries
+            in order to avoid generating queries that are too complex for the query
+            engine to handle. However this comes at the cost of increase cost and latency.
+
+        extra_query_labels (Dict[str, Any] | None):
+            Stores additional custom labels for query configuration.
+
+        maximum_bytes_billed (int | None):
+            Limits the bytes billed for query jobs. Queries that will have
+            bytes billed beyond this limit will fail (without incurring a
+            charge). If unspecified, this will be set to your project default.
+            See `maximum_bytes_billed`: https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed.
+
+        maximum_result_rows (int | None):
+            Limits the number of rows in an execution result. When converting
+            a BigQuery DataFrames object to a pandas DataFrame or Series (e.g.,
+            using ``.to_pandas()``, ``.peek()``, ``.__repr__()``, direct
+            iteration), the data is downloaded from BigQuery to the client
+            machine. This option restricts the number of rows that can be
+            downloaded.  If the number of rows to be downloaded exceeds this
+            limit, a ``bigframes.exceptions.MaximumResultRowsExceeded``
+            exception is raised.
+
+        semantic_ops_confirmation_threshold (int | None):
+            .. deprecated:: 1.42.0
+                Semantic operators are deprecated. Please use AI operators instead
+
+        semantic_ops_threshold_autofail (bool):
+            .. deprecated:: 1.42.0
+                Semantic operators are deprecated. Please use AI operators instead
     """
 
-    maximum_bytes_billed: Optional[int] = None
+    ai_ops_confirmation_threshold: Optional[int] = 0
+    ai_ops_threshold_autofail: bool = False
+    allow_large_results: Optional[bool] = None
     enable_multi_query_execution: bool = False
     extra_query_labels: Dict[str, Any] = dataclasses.field(
         default_factory=dict, init=False
     )
+    maximum_bytes_billed: Optional[int] = None
+    maximum_result_rows: Optional[int] = None
     semantic_ops_confirmation_threshold: Optional[int] = 0
     semantic_ops_threshold_autofail = False
 
-    ai_ops_confirmation_threshold: Optional[int] = 0
-    ai_ops_threshold_autofail: bool = False
-
-    allow_large_results: Optional[bool] = None
-
     def assign_extra_query_labels(self, **kwargs: Any) -> None:
         """
         Assigns additional custom labels for query configuration. The method updates the
 
@@ -65,6 +65,7 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
     ordering: Optional[bf_ordering.RowOrdering] = result_node.order_by
     result_node = dataclasses.replace(result_node, order_by=None)
     result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node))
+    result_node = cast(nodes.ResultNode, rewrites.defer_selection(result_node))
     sql = compile_result_node(result_node)
     # Return the ordering iff no extra columns are needed to define the row order
     if ordering is not None:
 
@@ -125,7 +125,7 @@ def sql(self) -> str:
         return "\n".join(text)
 
 
-@dataclasses.dataclass
+@dataclasses.dataclass(frozen=True)
 class SelectExpression(abc.SQLSyntax):
     """This class represents `select_expression`."""
 
 
@@ -87,6 +87,9 @@ def _compile_sql(self, request: configs.CompileRequest) -> configs.CompileResult
                 nodes.ResultNode, rewrite.column_pruning(result_node)
             )
             result_node = self._remap_variables(result_node)
+            result_node = typing.cast(
+                nodes.ResultNode, rewrite.defer_selection(result_node)
+            )
             sql = self._compile_result_node(result_node)
             return configs.CompileResult(
                 sql, result_node.schema.to_bigquery(), result_node.order_by
@@ -97,6 +100,9 @@ def _compile_sql(self, request: configs.CompileRequest) -> configs.CompileResult
         result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
 
         result_node = self._remap_variables(result_node)
+        result_node = typing.cast(
+            nodes.ResultNode, rewrite.defer_selection(result_node)
+        )
         sql = self._compile_result_node(result_node)
         # Return the ordering iff no extra columns are needed to define the row order
         if ordering is not None:
@@ -205,6 +211,13 @@ def compile_projection(
         )
         return child.project(projected_cols)
 
+    @_compile_node.register
+    def compile_filter(
+        self, node: nodes.FilterNode, child: ir.SQLGlotIR
+    ) -> ir.SQLGlotIR:
+        condition = scalar_compiler.compile_scalar_expression(node.predicate)
+        return child.filter(condition)
+
     @_compile_node.register
     def compile_concat(
         self, node: nodes.ConcatNode, *children: ir.SQLGlotIR
 
@@ -99,3 +99,10 @@ def compile_addop(op: ops.AddOp, left: TypedExpr, right: TypedExpr) -> sge.Expre
 
     # Numerical addition
     return sge.Add(this=left.expr, expression=right.expr)
+
+
+def compile_ge(
+    op: ops.ge_op, left: TypedExpr, right: TypedExpr  # type: ignore[valid-type]
+) -> sge.Expression:
+
+    return sge.GTE(this=left.expr, expression=right.expr)
@@ -250,6 +250,16 @@ def project(
         new_expr = self._encapsulate_as_cte().select(*projected_cols_expr, append=True)
         return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen)
 
+    def filter(
+        self,
+        condition: sge.Expression,
+    ) -> SQLGlotIR:
+        """Filters the query with the given condition."""
+        new_expr = self._encapsulate_as_cte()
+        return SQLGlotIR(
+            expr=new_expr.where(condition, append=False), uid_gen=self.uid_gen
+        )
+
     def insert(
         self,
         destination: bigquery.TableReference,
 
@@ -75,7 +75,7 @@ def additive_base(self) -> BigFrameNode:
         ...
 
     @abc.abstractmethod
-    def replace_additive_base(self, BigFrameNode):
+    def replace_additive_base(self, BigFrameNode) -> BigFrameNode:
         ...
 
 
@@ -1568,6 +1568,10 @@ class ExplodeNode(UnaryNode):
     # Offsets are generated only if this is non-null
     offsets_col: Optional[identifiers.ColumnId] = None
 
+    def _validate(self):
+        for col in self.column_ids:
+            assert col.id in self.child.ids
+
     @property
     def row_preserving(self) -> bool:
         return False
@@ -1646,6 +1650,10 @@ class ResultNode(UnaryNode):
     limit: Optional[int] = None
     # TODO: CTE definitions
 
+    def _validate(self):
+        for ref, name in self.output_cols:
+            assert ref.id in self.child.ids
+
     @property
     def node_defined_ids(self) -> Tuple[identifiers.ColumnId, ...]:
         return ()
 
@@ -22,6 +22,7 @@
     try_reduce_to_local_scan,
     try_reduce_to_table_scan,
 )
+from bigframes.core.rewrite.select_pullup import defer_selection
 from bigframes.core.rewrite.slices import pull_out_limit, pull_up_limits, rewrite_slice
 from bigframes.core.rewrite.timedeltas import rewrite_timedelta_expressions
 from bigframes.core.rewrite.windows import pull_out_window_order, rewrite_range_rolling
@@ -42,4 +43,5 @@
     "try_reduce_to_local_scan",
     "fold_row_counts",
     "pull_out_window_order",
+    "defer_selection",
 ]
@@ -0,0 +1,144 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import dataclasses
+from typing import cast
+
+from bigframes.core import expression, nodes
+
+
+def defer_selection(
+    root: nodes.BigFrameNode,
+) -> nodes.BigFrameNode:
+    """
+    Defers SelectionNode operations in the tree, pulling them up.
+
+    In many cases, these nodes will be merged or eliminated entirely, simplifying the overall tree.
+    """
+    return nodes.bottom_up(root, pull_up_select)
+
+
+def pull_up_select(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
+    if isinstance(node, nodes.LeafNode):
+        return node
+    if isinstance(node, nodes.JoinNode):
+        return pull_up_selects_under_join(node)
+    if isinstance(node, nodes.ConcatNode):
+        return handle_selects_under_concat(node)
+    if isinstance(node, nodes.UnaryNode):
+        return pull_up_select_unary(node)
+    # shouldn't hit this, but not worth crashing over
+    return node
+
+
+def pull_up_select_unary(node: nodes.UnaryNode) -> nodes.BigFrameNode:
+    child = node.child
+    if not isinstance(child, nodes.SelectionNode):
+        return node
+
+    # Schema-preserving nodes
+    if isinstance(
+        node,
+        (
+            nodes.ReversedNode,
+            nodes.OrderByNode,
+            nodes.SliceNode,
+            nodes.FilterNode,
+            nodes.RandomSampleNode,
+        ),
+    ):
+        pushed_down_node: nodes.BigFrameNode = node.remap_refs(
+            {id: ref.id for ref, id in child.input_output_pairs}
+        ).replace_child(child.child)
+        pulled_up_select = cast(
+            nodes.SelectionNode, child.replace_child(pushed_down_node)
+        )
+        return pulled_up_select
+    elif isinstance(
+        node,
+        (
+            nodes.SelectionNode,
+            nodes.ResultNode,
+        ),
+    ):
+        return node.remap_refs(
+            {id: ref.id for ref, id in child.input_output_pairs}
+        ).replace_child(child.child)
+    elif isinstance(node, nodes.AggregateNode):
+        pushed_down_agg = node.remap_refs(
+            {id: ref.id for ref, id in child.input_output_pairs}
+        ).replace_child(child.child)
+        new_selection = tuple(
+            nodes.AliasedRef.identity(id).remap_refs(
+                {id: ref.id for ref, id in child.input_output_pairs}
+            )
+            for id in node.ids
+        )
+        return nodes.SelectionNode(pushed_down_agg, new_selection)
+    elif isinstance(node, nodes.ExplodeNode):
+        pushed_down_node = node.remap_refs(
+            {id: ref.id for ref, id in child.input_output_pairs}
+        ).replace_child(child.child)
+        pulled_up_select = cast(
+            nodes.SelectionNode, child.replace_child(pushed_down_node)
+        )
+        if node.offsets_col:
+            pulled_up_select = dataclasses.replace(
+                pulled_up_select,
+                input_output_pairs=(
+                    *pulled_up_select.input_output_pairs,
+                    nodes.AliasedRef(
+                        expression.DerefOp(node.offsets_col), node.offsets_col
+                    ),
+                ),
+            )
+        return pulled_up_select
+    elif isinstance(node, nodes.AdditiveNode):
+        pushed_down_node = node.replace_additive_base(child.child).remap_refs(
+            {id: ref.id for ref, id in child.input_output_pairs}
+        )
+        new_selection = (
+            *child.input_output_pairs,
+            *(
+                nodes.AliasedRef(expression.DerefOp(col.id), col.id)
+                for col in node.added_fields
+            ),
+        )
+        pulled_up_select = dataclasses.replace(
+            child, child=pushed_down_node, input_output_pairs=new_selection
+        )
+        return pulled_up_select
+    # shouldn't hit this, but not worth crashing over
+    return node
+
+
+def pull_up_selects_under_join(node: nodes.JoinNode) -> nodes.JoinNode:
+    # Can in theory pull up selects here, but it is a bit dangerous, in particular or self-joins, when there are more transforms to do.
+    # TODO: Safely pull up selects above join
+    return node
+
+
+def handle_selects_under_concat(node: nodes.ConcatNode) -> nodes.ConcatNode:
+    new_children = []
+    for child in node.child_nodes:
+        # remove select if no-op
+        if not isinstance(child, nodes.SelectionNode):
+            new_children.append(child)
+        else:
+            inputs = (ref.id for ref in child.input_output_pairs)
+            if inputs == tuple(child.child.ids):
+                new_children.append(child.child)
+            else:
+                new_children.append(child)
+    return dataclasses.replace(node, children=tuple(new_children))
Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@`
`22`	`22`	`try_reduce_to_local_scan,`
`23`	`23`	`try_reduce_to_table_scan,`
`24`	`24`	`)`
	`25`	`+from bigframes.core.rewrite.select_pullup import defer_selection`
`25`	`26`	`from bigframes.core.rewrite.slices import pull_out_limit, pull_up_limits, rewrite_slice`
`26`	`27`	`from bigframes.core.rewrite.timedeltas import rewrite_timedelta_expressions`
`27`	`28`	`from bigframes.core.rewrite.windows import pull_out_window_order, rewrite_range_rolling`
`@@ -42,4 +43,5 @@`
`42`	`43`	`"try_reduce_to_local_scan",`
`43`	`44`	`"fold_row_counts",`
`44`	`45`	`"pull_out_window_order",`
	`46`	`+ "defer_selection",`
`45`	`47`	`]`