Skip to content

Commit ce5de57

Browse files
feat: Add str, dt accessors to pd.col Expression objects (#2488)
1 parent 47a0feb commit ce5de57

File tree

8 files changed

+174
-109
lines changed

8 files changed

+174
-109
lines changed

bigframes/core/col.py

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from __future__ import annotations
1515

1616
import dataclasses
17-
from typing import Any, Hashable
17+
from typing import Any, Hashable, Literal, TYPE_CHECKING
1818

1919
import bigframes_vendored.pandas.core.col as pd_col
2020

@@ -23,6 +23,10 @@
2323
import bigframes.operations as bf_ops
2424
import bigframes.operations.aggregations as agg_ops
2525

26+
if TYPE_CHECKING:
27+
import bigframes.operations.datetimes as datetimes
28+
import bigframes.operations.strings as strings
29+
2630

2731
# Not to be confused with the Expression class in `bigframes.core.expressions`
2832
# Name collision unintended
@@ -32,7 +36,7 @@ class Expression:
3236

3337
_value: bf_expression.Expression
3438

35-
def _apply_unary(self, op: bf_ops.UnaryOp) -> Expression:
39+
def _apply_unary_op(self, op: bf_ops.UnaryOp) -> Expression:
3640
return Expression(op.as_expr(self._value))
3741

3842
def _apply_unary_agg(self, op: agg_ops.UnaryAggregateOp) -> Expression:
@@ -44,7 +48,14 @@ def _apply_unary_agg(self, op: agg_ops.UnaryAggregateOp) -> Expression:
4448
agg_expressions.WindowExpression(agg_expr, window_spec.unbound())
4549
)
4650

47-
def _apply_binary(self, other: Any, op: bf_ops.BinaryOp, reverse: bool = False):
51+
# alignment is purely for series compatibility, and is ignored here
52+
def _apply_binary_op(
53+
self,
54+
other: Any,
55+
op: bf_ops.BinaryOp,
56+
alignment: Literal["outer", "left"] = "outer",
57+
reverse: bool = False,
58+
):
4859
if isinstance(other, Expression):
4960
other_value = other._value
5061
else:
@@ -55,79 +66,79 @@ def _apply_binary(self, other: Any, op: bf_ops.BinaryOp, reverse: bool = False):
5566
return Expression(op.as_expr(self._value, other_value))
5667

5768
def __add__(self, other: Any) -> Expression:
58-
return self._apply_binary(other, bf_ops.add_op)
69+
return self._apply_binary_op(other, bf_ops.add_op)
5970

6071
def __radd__(self, other: Any) -> Expression:
61-
return self._apply_binary(other, bf_ops.add_op, reverse=True)
72+
return self._apply_binary_op(other, bf_ops.add_op, reverse=True)
6273

6374
def __sub__(self, other: Any) -> Expression:
64-
return self._apply_binary(other, bf_ops.sub_op)
75+
return self._apply_binary_op(other, bf_ops.sub_op)
6576

6677
def __rsub__(self, other: Any) -> Expression:
67-
return self._apply_binary(other, bf_ops.sub_op, reverse=True)
78+
return self._apply_binary_op(other, bf_ops.sub_op, reverse=True)
6879

6980
def __mul__(self, other: Any) -> Expression:
70-
return self._apply_binary(other, bf_ops.mul_op)
81+
return self._apply_binary_op(other, bf_ops.mul_op)
7182

7283
def __rmul__(self, other: Any) -> Expression:
73-
return self._apply_binary(other, bf_ops.mul_op, reverse=True)
84+
return self._apply_binary_op(other, bf_ops.mul_op, reverse=True)
7485

7586
def __truediv__(self, other: Any) -> Expression:
76-
return self._apply_binary(other, bf_ops.div_op)
87+
return self._apply_binary_op(other, bf_ops.div_op)
7788

7889
def __rtruediv__(self, other: Any) -> Expression:
79-
return self._apply_binary(other, bf_ops.div_op, reverse=True)
90+
return self._apply_binary_op(other, bf_ops.div_op, reverse=True)
8091

8192
def __floordiv__(self, other: Any) -> Expression:
82-
return self._apply_binary(other, bf_ops.floordiv_op)
93+
return self._apply_binary_op(other, bf_ops.floordiv_op)
8394

8495
def __rfloordiv__(self, other: Any) -> Expression:
85-
return self._apply_binary(other, bf_ops.floordiv_op, reverse=True)
96+
return self._apply_binary_op(other, bf_ops.floordiv_op, reverse=True)
8697

8798
def __ge__(self, other: Any) -> Expression:
88-
return self._apply_binary(other, bf_ops.ge_op)
99+
return self._apply_binary_op(other, bf_ops.ge_op)
89100

90101
def __gt__(self, other: Any) -> Expression:
91-
return self._apply_binary(other, bf_ops.gt_op)
102+
return self._apply_binary_op(other, bf_ops.gt_op)
92103

93104
def __le__(self, other: Any) -> Expression:
94-
return self._apply_binary(other, bf_ops.le_op)
105+
return self._apply_binary_op(other, bf_ops.le_op)
95106

96107
def __lt__(self, other: Any) -> Expression:
97-
return self._apply_binary(other, bf_ops.lt_op)
108+
return self._apply_binary_op(other, bf_ops.lt_op)
98109

99110
def __eq__(self, other: object) -> Expression: # type: ignore
100-
return self._apply_binary(other, bf_ops.eq_op)
111+
return self._apply_binary_op(other, bf_ops.eq_op)
101112

102113
def __ne__(self, other: object) -> Expression: # type: ignore
103-
return self._apply_binary(other, bf_ops.ne_op)
114+
return self._apply_binary_op(other, bf_ops.ne_op)
104115

105116
def __mod__(self, other: Any) -> Expression:
106-
return self._apply_binary(other, bf_ops.mod_op)
117+
return self._apply_binary_op(other, bf_ops.mod_op)
107118

108119
def __rmod__(self, other: Any) -> Expression:
109-
return self._apply_binary(other, bf_ops.mod_op, reverse=True)
120+
return self._apply_binary_op(other, bf_ops.mod_op, reverse=True)
110121

111122
def __and__(self, other: Any) -> Expression:
112-
return self._apply_binary(other, bf_ops.and_op)
123+
return self._apply_binary_op(other, bf_ops.and_op)
113124

114125
def __rand__(self, other: Any) -> Expression:
115-
return self._apply_binary(other, bf_ops.and_op, reverse=True)
126+
return self._apply_binary_op(other, bf_ops.and_op, reverse=True)
116127

117128
def __or__(self, other: Any) -> Expression:
118-
return self._apply_binary(other, bf_ops.or_op)
129+
return self._apply_binary_op(other, bf_ops.or_op)
119130

120131
def __ror__(self, other: Any) -> Expression:
121-
return self._apply_binary(other, bf_ops.or_op, reverse=True)
132+
return self._apply_binary_op(other, bf_ops.or_op, reverse=True)
122133

123134
def __xor__(self, other: Any) -> Expression:
124-
return self._apply_binary(other, bf_ops.xor_op)
135+
return self._apply_binary_op(other, bf_ops.xor_op)
125136

126137
def __rxor__(self, other: Any) -> Expression:
127-
return self._apply_binary(other, bf_ops.xor_op, reverse=True)
138+
return self._apply_binary_op(other, bf_ops.xor_op, reverse=True)
128139

129140
def __invert__(self) -> Expression:
130-
return self._apply_unary(bf_ops.invert_op)
141+
return self._apply_unary_op(bf_ops.invert_op)
131142

132143
def sum(self) -> Expression:
133144
return self._apply_unary_agg(agg_ops.sum_op)
@@ -147,6 +158,18 @@ def min(self) -> Expression:
147158
def max(self) -> Expression:
148159
return self._apply_unary_agg(agg_ops.max_op)
149160

161+
@property
162+
def dt(self) -> datetimes.DatetimeSimpleMethods:
163+
import bigframes.operations.datetimes as datetimes
164+
165+
return datetimes.DatetimeSimpleMethods(self)
166+
167+
@property
168+
def str(self) -> strings.StringMethods:
169+
import bigframes.operations.strings as strings
170+
171+
return strings.StringMethods(self)
172+
150173

151174
def col(col_name: Hashable) -> Expression:
152175
return Expression(bf_expression.free_var(col_name))

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -982,7 +982,9 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp):
982982

983983
@scalar_op_compiler.register_unary_op(ops.ToDatetimeOp, pass_op=True)
984984
def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp):
985-
if x.type() in (ibis_dtypes.str, ibis_dtypes.Timestamp("UTC")): # type: ignore
985+
if x.type() == ibis_dtypes.Timestamp(None): # type: ignore
986+
return x # already a timestamp, no-op
987+
elif x.type() in (ibis_dtypes.str, ibis_dtypes.Timestamp("UTC")): # type: ignore
986988
return x.try_cast(ibis_dtypes.Timestamp(None)) # type: ignore
987989
else:
988990
# Numerical inputs.

bigframes/operations/datetime_ops.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
7474
dtypes.STRING_DTYPE,
7575
dtypes.DATE_DTYPE,
7676
dtypes.TIMESTAMP_DTYPE,
77+
dtypes.DATETIME_DTYPE,
7778
):
7879
raise TypeError("expected string or numeric input")
7980
return pd.ArrowDtype(pa.timestamp("us", tz=None))
@@ -87,6 +88,8 @@ class ToTimestampOp(base_ops.UnaryOp):
8788

8889
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
8990
# Must be numeric or string
91+
if input_types[0] == dtypes.TIMESTAMP_DTYPE:
92+
raise TypeError("Already tz-aware.")
9093
if input_types[0] not in (
9194
dtypes.FLOAT_DTYPE,
9295
dtypes.INT_DTYPE,

0 commit comments

Comments
 (0)