Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bigframes/core/compile/sqlglot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,7 @@
from __future__ import annotations

from bigframes.core.compile.sqlglot.compiler import SQLGlotCompiler
import bigframes.core.compile.sqlglot.expressions.binary_compiler # noqa: F401
import bigframes.core.compile.sqlglot.expressions.unary_compiler # noqa: F401

__all__ = ["SQLGlotCompiler"]
8 changes: 4 additions & 4 deletions bigframes/core/compile/sqlglot/aggregate_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def compile_aggregate(
return nullary_compiler.compile(aggregate.op)
if isinstance(aggregate, agg_expressions.UnaryAggregation):
column = typed_expr.TypedExpr(
scalar_compiler.compile_scalar_expression(aggregate.arg),
scalar_compiler.scalar_op_compiler.compile_expression(aggregate.arg),
aggregate.arg.output_type,
)
if not aggregate.op.order_independent:
Expand All @@ -46,11 +46,11 @@ def compile_aggregate(
return unary_compiler.compile(aggregate.op, column)
elif isinstance(aggregate, agg_expressions.BinaryAggregation):
left = typed_expr.TypedExpr(
scalar_compiler.compile_scalar_expression(aggregate.left),
scalar_compiler.scalar_op_compiler.compile_expression(aggregate.left),
aggregate.left.output_type,
)
right = typed_expr.TypedExpr(
scalar_compiler.compile_scalar_expression(aggregate.right),
scalar_compiler.scalar_op_compiler.compile_expression(aggregate.right),
aggregate.right.output_type,
)
return binary_compiler.compile(aggregate.op, left, right)
Expand All @@ -66,7 +66,7 @@ def compile_analytic(
return nullary_compiler.compile(aggregate.op)
if isinstance(aggregate, agg_expressions.UnaryAggregation):
column = typed_expr.TypedExpr(
scalar_compiler.compile_scalar_expression(aggregate.arg),
scalar_compiler.scalar_op_compiler.compile_expression(aggregate.arg),
aggregate.arg.output_type,
)
return unary_compiler.compile(aggregate.op, column, window)
Expand Down
7 changes: 5 additions & 2 deletions bigframes/core/compile/sqlglot/aggregations/windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ def apply_window_if_present(
order = sge.Order(expressions=order_by) if order_by else None

group_by = (
[scalar_compiler.compile_scalar_expression(key) for key in window.grouping_keys]
[
scalar_compiler.scalar_op_compiler.compile_expression(key)
for key in window.grouping_keys
]
if window.grouping_keys
else None
)
Expand Down Expand Up @@ -101,7 +104,7 @@ def get_window_order_by(

order_by = []
for ordering_spec_item in ordering:
expr = scalar_compiler.compile_scalar_expression(
expr = scalar_compiler.scalar_op_compiler.compile_expression(
ordering_spec_item.scalar_expression
)
desc = not ordering_spec_item.direction.is_ascending
Expand Down
28 changes: 17 additions & 11 deletions bigframes/core/compile/sqlglot/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,15 @@ def _compile_result_node(self, root: nodes.ResultNode) -> str:
# Have to bind schema as the final step before compilation.
root = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(root))
selected_cols: tuple[tuple[str, sge.Expression], ...] = tuple(
(name, scalar_compiler.compile_scalar_expression(ref))
(name, scalar_compiler.scalar_op_compiler.compile_expression(ref))
for ref, name in root.output_cols
)
sqlglot_ir = self.compile_node(root.child).select(selected_cols)

if root.order_by is not None:
ordering_cols = tuple(
sge.Ordered(
this=scalar_compiler.compile_scalar_expression(
this=scalar_compiler.scalar_op_compiler.compile_expression(
ordering.scalar_expression
),
desc=ordering.direction.is_ascending is False,
Expand Down Expand Up @@ -199,7 +199,7 @@ def compile_selection(
self, node: nodes.SelectionNode, child: ir.SQLGlotIR
) -> ir.SQLGlotIR:
selected_cols: tuple[tuple[str, sge.Expression], ...] = tuple(
(id.sql, scalar_compiler.compile_scalar_expression(expr))
(id.sql, scalar_compiler.scalar_op_compiler.compile_expression(expr))
for expr, id in node.input_output_pairs
)
return child.select(selected_cols)
Expand All @@ -209,7 +209,7 @@ def compile_projection(
self, node: nodes.ProjectionNode, child: ir.SQLGlotIR
) -> ir.SQLGlotIR:
projected_cols: tuple[tuple[str, sge.Expression], ...] = tuple(
(id.sql, scalar_compiler.compile_scalar_expression(expr))
(id.sql, scalar_compiler.scalar_op_compiler.compile_expression(expr))
for expr, id in node.assignments
)
return child.project(projected_cols)
Expand All @@ -218,7 +218,9 @@ def compile_projection(
def compile_filter(
self, node: nodes.FilterNode, child: ir.SQLGlotIR
) -> ir.SQLGlotIR:
condition = scalar_compiler.compile_scalar_expression(node.predicate)
condition = scalar_compiler.scalar_op_compiler.compile_expression(
node.predicate
)
return child.filter(tuple([condition]))

@_compile_node.register
Expand All @@ -228,10 +230,12 @@ def compile_join(
conditions = tuple(
(
typed_expr.TypedExpr(
scalar_compiler.compile_scalar_expression(left), left.output_type
scalar_compiler.scalar_op_compiler.compile_expression(left),
left.output_type,
),
typed_expr.TypedExpr(
scalar_compiler.compile_scalar_expression(right), right.output_type
scalar_compiler.scalar_op_compiler.compile_expression(right),
right.output_type,
),
)
for left, right in node.conditions
Expand All @@ -250,11 +254,11 @@ def compile_isin_join(
) -> ir.SQLGlotIR:
conditions = (
typed_expr.TypedExpr(
scalar_compiler.compile_scalar_expression(node.left_col),
scalar_compiler.scalar_op_compiler.compile_expression(node.left_col),
node.left_col.output_type,
),
typed_expr.TypedExpr(
scalar_compiler.compile_scalar_expression(node.right_col),
scalar_compiler.scalar_op_compiler.compile_expression(node.right_col),
node.right_col.output_type,
),
)
Expand Down Expand Up @@ -308,7 +312,7 @@ def compile_aggregate(
for agg, id in node.aggregations
)
by_cols: tuple[sge.Expression, ...] = tuple(
scalar_compiler.compile_scalar_expression(by_col)
scalar_compiler.scalar_op_compiler.compile_expression(by_col)
for by_col in node.by_column_ids
)

Expand All @@ -332,7 +336,9 @@ def compile_window(
window_op = aggregate_compiler.compile_analytic(node.expression, window_spec)

inputs: tuple[sge.Expression, ...] = tuple(
scalar_compiler.compile_scalar_expression(expression.DerefOp(column))
scalar_compiler.scalar_op_compiler.compile_expression(
expression.DerefOp(column)
)
for column in node.expression.column_references
)

Expand Down
8 changes: 8 additions & 0 deletions bigframes/core/compile/sqlglot/expressions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Expression implementations for the SQLGlot-based compiler.

This directory structure should reflect the same layout as the
`bigframes/operations` directory where the expressions are defined.

Prefer a few ops per file to keep file sizes manageable for text editors and LLMs.
"""
65 changes: 31 additions & 34 deletions bigframes/core/compile/sqlglot/expressions/binary_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,16 @@
from bigframes import dtypes
from bigframes import operations as ops
import bigframes.core.compile.sqlglot.expressions.constants as constants
from bigframes.core.compile.sqlglot.expressions.op_registration import OpRegistration
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler

BINARY_OP_REGISTRATION = OpRegistration()
register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op


def compile(op: ops.BinaryOp, left: TypedExpr, right: TypedExpr) -> sge.Expression:
return BINARY_OP_REGISTRATION[op](op, left, right)
# TODO: add parenthesize for operators


# TODO: add parenthesize for operators
@BINARY_OP_REGISTRATION.register(ops.add_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.add_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
if left.dtype == dtypes.STRING_DTYPE and right.dtype == dtypes.STRING_DTYPE:
# String addition
return sge.Concat(expressions=[left.expr, right.expr])
Expand Down Expand Up @@ -66,15 +63,15 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
)


@BINARY_OP_REGISTRATION.register(ops.eq_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.eq_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)
return sge.EQ(this=left_expr, expression=right_expr)


@BINARY_OP_REGISTRATION.register(ops.eq_null_match_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.eq_null_match_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = left.expr
if right.dtype != dtypes.BOOL_DTYPE:
left_expr = _coerce_bool_to_int(left)
Expand All @@ -93,8 +90,8 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
return sge.EQ(this=left_coalesce, expression=right_coalesce)


@BINARY_OP_REGISTRATION.register(ops.div_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.div_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)

Expand All @@ -105,8 +102,8 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
return result


@BINARY_OP_REGISTRATION.register(ops.floordiv_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.floordiv_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)

Expand Down Expand Up @@ -138,41 +135,41 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
return result


@BINARY_OP_REGISTRATION.register(ops.ge_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.ge_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)
return sge.GTE(this=left_expr, expression=right_expr)


@BINARY_OP_REGISTRATION.register(ops.gt_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.gt_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)
return sge.GT(this=left_expr, expression=right_expr)


@BINARY_OP_REGISTRATION.register(ops.JSONSet)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.JSONSet, pass_op=True)
def _(left: TypedExpr, right: TypedExpr, op) -> sge.Expression:
return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr)


@BINARY_OP_REGISTRATION.register(ops.lt_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.lt_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)
return sge.LT(this=left_expr, expression=right_expr)


@BINARY_OP_REGISTRATION.register(ops.le_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.le_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)
return sge.LTE(this=left_expr, expression=right_expr)


@BINARY_OP_REGISTRATION.register(ops.mul_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.mul_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)

Expand All @@ -186,20 +183,20 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
return result


@BINARY_OP_REGISTRATION.register(ops.ne_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.ne_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)
return sge.NEQ(this=left_expr, expression=right_expr)


@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.obj_make_ref_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
return sge.func("OBJ.MAKE_REF", left.expr, right.expr)


@BINARY_OP_REGISTRATION.register(ops.sub_op)
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
@register_binary_op(ops.sub_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
left_expr = _coerce_bool_to_int(left)
right_expr = _coerce_bool_to_int(right)
Expand Down
27 changes: 0 additions & 27 deletions bigframes/core/compile/sqlglot/expressions/nary_compiler.py

This file was deleted.

54 changes: 0 additions & 54 deletions bigframes/core/compile/sqlglot/expressions/op_registration.py

This file was deleted.

Loading