googleapis · TrevorBergeron · Sep 4, 2025 · Sep 3, 2025 · Sep 4, 2025 · Sep 4, 2025
@@ -0,0 +1,151 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import abc
+import dataclasses
+import functools
+import itertools
+import typing
+from typing import Callable, Mapping, TypeVar
+
+from bigframes import dtypes
+from bigframes.core import expression
+import bigframes.core.identifiers as ids
+import bigframes.operations.aggregations as agg_ops
+
+TExpression = TypeVar("TExpression", bound="Aggregation")
+
+
+@dataclasses.dataclass(frozen=True)
+class Aggregation(expression.Expression):
+    """Represents windowing or aggregation over a column."""
+
+    op: agg_ops.WindowOp = dataclasses.field()
+
+    @property
+    def column_references(self) -> typing.Tuple[ids.ColumnId, ...]:
+        return tuple(
+            itertools.chain.from_iterable(
+                map(lambda x: x.column_references, self.inputs)
+            )
+        )
+
+    @functools.cached_property
+    def is_resolved(self) -> bool:
+        return all(input.is_resolved for input in self.inputs)
+
+    @functools.cached_property
+    def output_type(self) -> dtypes.ExpressionType:
+        if not self.is_resolved:
+            raise ValueError(f"Type of expression {self.op} has not been fixed.")
+
+        input_types = [input.output_type for input in self.inputs]
+
+        return self.op.output_type(*input_types)
+
+    @property
+    @abc.abstractmethod
+    def inputs(
+        self,
+    ) -> typing.Tuple[expression.Expression, ...]:
+        ...
+
+    @property
+    def free_variables(self) -> typing.Tuple[str, ...]:
+        return tuple(
+            itertools.chain.from_iterable(map(lambda x: x.free_variables, self.inputs))
+        )
+
+    @property
+    def is_const(self) -> bool:
+        return all(child.is_const for child in self.inputs)
+
+    @abc.abstractmethod
+    def replace_args(self: TExpression, *arg) -> TExpression:
+        ...
+
+    def transform_children(
+        self: TExpression, t: Callable[[expression.Expression], expression.Expression]
+    ) -> TExpression:
+        return self.replace_args(*(t(arg) for arg in self.inputs))
+
+    def bind_variables(
+        self: TExpression,
+        bindings: Mapping[str, expression.Expression],
+        allow_partial_bindings: bool = False,
+    ) -> TExpression:
+        return self.transform_children(
+            lambda x: x.bind_variables(bindings, allow_partial_bindings)
+        )
+
+    def bind_refs(
+        self: TExpression,
+        bindings: Mapping[ids.ColumnId, expression.Expression],
+        allow_partial_bindings: bool = False,
+    ) -> TExpression:
+        return self.transform_children(
+            lambda x: x.bind_refs(bindings, allow_partial_bindings)
+        )
+
+
+@dataclasses.dataclass(frozen=True)
+class NullaryAggregation(Aggregation):
+    op: agg_ops.NullaryWindowOp = dataclasses.field()
+
+    @property
+    def inputs(
+        self,
+    ) -> typing.Tuple[expression.Expression, ...]:
+        return ()
+
+    def replace_args(self, *arg) -> NullaryAggregation:
+        return self
+
+
+@dataclasses.dataclass(frozen=True)
+class UnaryAggregation(Aggregation):
+    op: agg_ops.UnaryWindowOp
+    arg: expression.Expression
+
+    @property
+    def inputs(
+        self,
+    ) -> typing.Tuple[expression.Expression, ...]:
+        return (self.arg,)
+
+    def replace_args(self, arg: expression.Expression) -> UnaryAggregation:
+        return UnaryAggregation(
+            self.op,
+            arg,
+        )
+
+
+@dataclasses.dataclass(frozen=True)
+class BinaryAggregation(Aggregation):
+    op: agg_ops.BinaryAggregateOp = dataclasses.field()
+    left: expression.Expression = dataclasses.field()
+    right: expression.Expression = dataclasses.field()
+
+    @property
+    def inputs(
+        self,
+    ) -> typing.Tuple[expression.Expression, ...]:
+        return (self.left, self.right)
+
+    def replace_args(
+        self, larg: expression.Expression, rarg: expression.Expression
+    ) -> BinaryAggregation:
+        return BinaryAggregation(self.op, larg, rarg)
@@ -24,6 +24,7 @@
 import pandas
 import pyarrow as pa
 
+from bigframes.core import agg_expressions
 import bigframes.core.expression as ex
 import bigframes.core.guid
 import bigframes.core.identifiers as ids
@@ -190,7 +191,7 @@ def row_count(self) -> ArrayValue:
                 child=self.node,
                 aggregations=(
                     (
-                        ex.NullaryAggregation(agg_ops.size_op),
+                        agg_expressions.NullaryAggregation(agg_ops.size_op),
                         ids.ColumnId(bigframes.core.guid.generate_guid()),
                     ),
                 ),
@@ -379,7 +380,7 @@ def drop_columns(self, columns: Iterable[str]) -> ArrayValue:
 
     def aggregate(
         self,
-        aggregations: typing.Sequence[typing.Tuple[ex.Aggregation, str]],
+        aggregations: typing.Sequence[typing.Tuple[agg_expressions.Aggregation, str]],
         by_column_ids: typing.Sequence[str] = (),
         dropna: bool = True,
     ) -> ArrayValue:
@@ -420,15 +421,15 @@ def project_window_op(
         """
 
         return self.project_window_expr(
-            ex.UnaryAggregation(op, ex.deref(column_name)),
+            agg_expressions.UnaryAggregation(op, ex.deref(column_name)),
             window_spec,
             never_skip_nulls,
             skip_reproject_unsafe,
         )
 
     def project_window_expr(
         self,
-        expression: ex.Aggregation,
+        expression: agg_expressions.Aggregation,
         window: WindowSpec,
         never_skip_nulls=False,
         skip_reproject_unsafe: bool = False,

@@ -20,15 +20,12 @@
 import functools
 import itertools
 import typing
-from typing import Callable, Dict, Generator, Iterable, Mapping, Sequence, Tuple, Union
+from typing import Callable, Dict, Generator, Iterable, Mapping, Sequence, Tuple
 
 from bigframes.core import expression, field, identifiers
 import bigframes.core.schema as schemata
 import bigframes.dtypes
 
-if typing.TYPE_CHECKING:
-    import bigframes.session
-
 COLUMN_SET = frozenset[identifiers.ColumnId]
 
 T = typing.TypeVar("T")
@@ -281,8 +278,8 @@ def field_by_id(self) -> Mapping[identifiers.ColumnId, field.Field]:
     @property
     def _node_expressions(
         self,
-    ) -> Sequence[Union[expression.Expression, expression.Aggregation]]:
-        """List of scalar expressions. Intended for checking engine compatibility with used ops."""
+    ) -> Sequence[expression.Expression]:
+        """List of expressions. Intended for checking engine compatibility with used ops."""
         return ()
 
     # Plan algorithms

@@ -21,12 +21,12 @@
 import pandas as pd
 
 import bigframes.constants
+from bigframes.core import agg_expressions
 import bigframes.core as core
 import bigframes.core.blocks as blocks
 import bigframes.core.expression as ex
 import bigframes.core.ordering as ordering
 import bigframes.core.window_spec as windows
-import bigframes.dtypes
 import bigframes.dtypes as dtypes
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
@@ -133,7 +133,7 @@ def quantile(
     block, _ = block.aggregate(
         grouping_column_ids,
         tuple(
-            ex.UnaryAggregation(agg_ops.AnyValueOp(), ex.deref(col))
+            agg_expressions.UnaryAggregation(agg_ops.AnyValueOp(), ex.deref(col))
             for col in quantile_cols
         ),
         column_labels=pd.Index(labels),
@@ -363,7 +363,7 @@ def value_counts(
         block = dropna(block, columns, how="any")
     block, agg_ids = block.aggregate(
         by_column_ids=(*grouping_keys, *columns),
-        aggregations=[ex.NullaryAggregation(agg_ops.size_op)],
+        aggregations=[agg_expressions.NullaryAggregation(agg_ops.size_op)],
         dropna=drop_na and not grouping_keys,
     )
     count_id = agg_ids[0]
@@ -647,15 +647,15 @@ def skew(
     # counts, moment3 for each column
     aggregations = []
     for i, col in enumerate(original_columns):
-        count_agg = ex.UnaryAggregation(
+        count_agg = agg_expressions.UnaryAggregation(
             agg_ops.count_op,
             ex.deref(col),
         )
-        moment3_agg = ex.UnaryAggregation(
+        moment3_agg = agg_expressions.UnaryAggregation(
             agg_ops.mean_op,
             ex.deref(delta3_ids[i]),
         )
-        variance_agg = ex.UnaryAggregation(
+        variance_agg = agg_expressions.UnaryAggregation(
             agg_ops.PopVarOp(),
             ex.deref(col),
         )
@@ -698,9 +698,13 @@ def kurt(
     # counts, moment4 for each column
     aggregations = []
     for i, col in enumerate(original_columns):
-        count_agg = ex.UnaryAggregation(agg_ops.count_op, ex.deref(col))
-        moment4_agg = ex.UnaryAggregation(agg_ops.mean_op, ex.deref(delta4_ids[i]))
-        variance_agg = ex.UnaryAggregation(agg_ops.PopVarOp(), ex.deref(col))
+        count_agg = agg_expressions.UnaryAggregation(agg_ops.count_op, ex.deref(col))
+        moment4_agg = agg_expressions.UnaryAggregation(
+            agg_ops.mean_op, ex.deref(delta4_ids[i])
+        )
+        variance_agg = agg_expressions.UnaryAggregation(
+            agg_ops.PopVarOp(), ex.deref(col)
+        )
         aggregations.extend([count_agg, moment4_agg, variance_agg])
 
     block, agg_ids = block.aggregate(