sqlframe 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/__init__.py +0 -0
- sqlframe/_version.py +16 -0
- sqlframe/base/__init__.py +0 -0
- sqlframe/base/_typing.py +39 -0
- sqlframe/base/catalog.py +1163 -0
- sqlframe/base/column.py +388 -0
- sqlframe/base/dataframe.py +1519 -0
- sqlframe/base/decorators.py +51 -0
- sqlframe/base/exceptions.py +14 -0
- sqlframe/base/function_alternatives.py +1055 -0
- sqlframe/base/functions.py +1678 -0
- sqlframe/base/group.py +102 -0
- sqlframe/base/mixins/__init__.py +0 -0
- sqlframe/base/mixins/catalog_mixins.py +419 -0
- sqlframe/base/mixins/readwriter_mixins.py +118 -0
- sqlframe/base/normalize.py +84 -0
- sqlframe/base/operations.py +87 -0
- sqlframe/base/readerwriter.py +679 -0
- sqlframe/base/session.py +585 -0
- sqlframe/base/transforms.py +13 -0
- sqlframe/base/types.py +418 -0
- sqlframe/base/util.py +242 -0
- sqlframe/base/window.py +139 -0
- sqlframe/bigquery/__init__.py +23 -0
- sqlframe/bigquery/catalog.py +255 -0
- sqlframe/bigquery/column.py +1 -0
- sqlframe/bigquery/dataframe.py +54 -0
- sqlframe/bigquery/functions.py +378 -0
- sqlframe/bigquery/group.py +14 -0
- sqlframe/bigquery/readwriter.py +29 -0
- sqlframe/bigquery/session.py +89 -0
- sqlframe/bigquery/types.py +1 -0
- sqlframe/bigquery/window.py +1 -0
- sqlframe/duckdb/__init__.py +20 -0
- sqlframe/duckdb/catalog.py +108 -0
- sqlframe/duckdb/column.py +1 -0
- sqlframe/duckdb/dataframe.py +55 -0
- sqlframe/duckdb/functions.py +47 -0
- sqlframe/duckdb/group.py +14 -0
- sqlframe/duckdb/readwriter.py +111 -0
- sqlframe/duckdb/session.py +65 -0
- sqlframe/duckdb/types.py +1 -0
- sqlframe/duckdb/window.py +1 -0
- sqlframe/postgres/__init__.py +23 -0
- sqlframe/postgres/catalog.py +106 -0
- sqlframe/postgres/column.py +1 -0
- sqlframe/postgres/dataframe.py +54 -0
- sqlframe/postgres/functions.py +61 -0
- sqlframe/postgres/group.py +14 -0
- sqlframe/postgres/readwriter.py +29 -0
- sqlframe/postgres/session.py +68 -0
- sqlframe/postgres/types.py +1 -0
- sqlframe/postgres/window.py +1 -0
- sqlframe/redshift/__init__.py +23 -0
- sqlframe/redshift/catalog.py +127 -0
- sqlframe/redshift/column.py +1 -0
- sqlframe/redshift/dataframe.py +54 -0
- sqlframe/redshift/functions.py +18 -0
- sqlframe/redshift/group.py +14 -0
- sqlframe/redshift/readwriter.py +29 -0
- sqlframe/redshift/session.py +53 -0
- sqlframe/redshift/types.py +1 -0
- sqlframe/redshift/window.py +1 -0
- sqlframe/snowflake/__init__.py +26 -0
- sqlframe/snowflake/catalog.py +134 -0
- sqlframe/snowflake/column.py +1 -0
- sqlframe/snowflake/dataframe.py +54 -0
- sqlframe/snowflake/functions.py +18 -0
- sqlframe/snowflake/group.py +14 -0
- sqlframe/snowflake/readwriter.py +29 -0
- sqlframe/snowflake/session.py +53 -0
- sqlframe/snowflake/types.py +1 -0
- sqlframe/snowflake/window.py +1 -0
- sqlframe/spark/__init__.py +23 -0
- sqlframe/spark/catalog.py +1028 -0
- sqlframe/spark/column.py +1 -0
- sqlframe/spark/dataframe.py +54 -0
- sqlframe/spark/functions.py +22 -0
- sqlframe/spark/group.py +14 -0
- sqlframe/spark/readwriter.py +29 -0
- sqlframe/spark/session.py +90 -0
- sqlframe/spark/types.py +1 -0
- sqlframe/spark/window.py +1 -0
- sqlframe/standalone/__init__.py +26 -0
- sqlframe/standalone/catalog.py +13 -0
- sqlframe/standalone/column.py +1 -0
- sqlframe/standalone/dataframe.py +36 -0
- sqlframe/standalone/functions.py +1 -0
- sqlframe/standalone/group.py +14 -0
- sqlframe/standalone/readwriter.py +19 -0
- sqlframe/standalone/session.py +40 -0
- sqlframe/standalone/types.py +1 -0
- sqlframe/standalone/window.py +1 -0
- sqlframe-1.1.3.dist-info/LICENSE +21 -0
- sqlframe-1.1.3.dist-info/METADATA +172 -0
- sqlframe-1.1.3.dist-info/RECORD +98 -0
- sqlframe-1.1.3.dist-info/WHEEL +5 -0
- sqlframe-1.1.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1678 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import typing as t
|
|
7
|
+
|
|
8
|
+
from sqlglot import exp as expression
|
|
9
|
+
from sqlglot.helper import ensure_list
|
|
10
|
+
from sqlglot.helper import flatten as _flatten
|
|
11
|
+
|
|
12
|
+
from sqlframe.base.column import Column
|
|
13
|
+
from sqlframe.base.decorators import func_metadata as meta
|
|
14
|
+
|
|
15
|
+
if t.TYPE_CHECKING:
|
|
16
|
+
from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
|
|
17
|
+
from sqlframe.base.session import DF
|
|
18
|
+
from sqlframe.base.types import ArrayType, StructType
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@meta()
|
|
24
|
+
def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
|
|
25
|
+
from sqlframe.base.session import _BaseSession
|
|
26
|
+
|
|
27
|
+
dialect = _BaseSession().input_dialect
|
|
28
|
+
if isinstance(column_name, str):
|
|
29
|
+
return Column(
|
|
30
|
+
expression.to_column(column_name, dialect=dialect).transform(
|
|
31
|
+
dialect.normalize_identifier
|
|
32
|
+
)
|
|
33
|
+
)
|
|
34
|
+
return Column(column_name)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@meta()
|
|
38
|
+
def lit(value: t.Optional[t.Any] = None) -> Column:
|
|
39
|
+
if isinstance(value, str):
|
|
40
|
+
return Column(expression.Literal.string(value))
|
|
41
|
+
return Column(value)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@meta()
|
|
45
|
+
def greatest(*cols: ColumnOrName) -> Column:
|
|
46
|
+
if len(cols) > 1:
|
|
47
|
+
return Column.invoke_expression_over_column(
|
|
48
|
+
cols[0], expression.Greatest, expressions=cols[1:]
|
|
49
|
+
)
|
|
50
|
+
return Column.invoke_expression_over_column(cols[0], expression.Greatest)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@meta()
|
|
54
|
+
def least(*cols: ColumnOrName) -> Column:
|
|
55
|
+
if len(cols) > 1:
|
|
56
|
+
return Column.invoke_expression_over_column(cols[0], expression.Least, expressions=cols[1:])
|
|
57
|
+
return Column.invoke_expression_over_column(cols[0], expression.Least)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@meta(unsupported_engines="bigquery")
|
|
61
|
+
def count_distinct(col: ColumnOrName, *cols: ColumnOrName) -> Column:
|
|
62
|
+
columns = [Column.ensure_col(x) for x in [col] + list(cols)]
|
|
63
|
+
return Column(
|
|
64
|
+
expression.Count(this=expression.Distinct(expressions=[x.expression for x in columns]))
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
countDistinct = count_distinct
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@meta()
|
|
72
|
+
def when(condition: Column, value: t.Any) -> Column:
|
|
73
|
+
true_value = value if isinstance(value, Column) else lit(value)
|
|
74
|
+
return Column(
|
|
75
|
+
expression.Case(
|
|
76
|
+
ifs=[expression.If(this=condition.column_expression, true=true_value.column_expression)]
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@meta()
|
|
82
|
+
def asc(col: ColumnOrName) -> Column:
|
|
83
|
+
return Column.ensure_col(col).asc()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@meta()
|
|
87
|
+
def desc(col: ColumnOrName):
|
|
88
|
+
return Column.ensure_col(col).desc()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@meta(unsupported_engines="*")
|
|
92
|
+
def broadcast(df: DF) -> DF:
|
|
93
|
+
return df.hint("broadcast")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@meta()
|
|
97
|
+
def sqrt(col: ColumnOrName) -> Column:
|
|
98
|
+
return Column.invoke_expression_over_column(col, expression.Sqrt)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@meta()
|
|
102
|
+
def abs(col: ColumnOrName) -> Column:
|
|
103
|
+
return Column.invoke_expression_over_column(col, expression.Abs)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@meta()
|
|
107
|
+
def max(col: ColumnOrName) -> Column:
|
|
108
|
+
return Column.invoke_expression_over_column(col, expression.Max)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@meta()
|
|
112
|
+
def min(col: ColumnOrName) -> Column:
|
|
113
|
+
return Column.invoke_expression_over_column(col, expression.Min)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@meta(unsupported_engines="postgres")
|
|
117
|
+
def max_by(col: ColumnOrName, ord: ColumnOrName) -> Column:
|
|
118
|
+
return Column.invoke_expression_over_column(col, expression.ArgMax, expression=ord)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@meta(unsupported_engines="postgres")
|
|
122
|
+
def min_by(col: ColumnOrName, ord: ColumnOrName) -> Column:
|
|
123
|
+
return Column.invoke_expression_over_column(col, expression.ArgMin, expression=ord)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@meta()
|
|
127
|
+
def count(col: ColumnOrName) -> Column:
|
|
128
|
+
return Column.invoke_expression_over_column(col, expression.Count)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@meta()
|
|
132
|
+
def sum(col: ColumnOrName) -> Column:
|
|
133
|
+
return Column.invoke_expression_over_column(col, expression.Sum)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@meta()
|
|
137
|
+
def avg(col: ColumnOrName) -> Column:
|
|
138
|
+
return Column.invoke_expression_over_column(col, expression.Avg)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
mean = avg
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@meta()
|
|
145
|
+
def sumDistinct(col: ColumnOrName) -> Column:
|
|
146
|
+
return Column(
|
|
147
|
+
expression.Sum(this=expression.Distinct(expressions=[Column.ensure_col(col).expression]))
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
sum_distinct = sumDistinct
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@meta(unsupported_engines="*")
|
|
155
|
+
def product(col: ColumnOrName) -> Column:
|
|
156
|
+
raise NotImplementedError("Product is not currently implemented")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@meta()
|
|
160
|
+
def acos(col: ColumnOrName) -> Column:
|
|
161
|
+
return Column.invoke_anonymous_function(col, "ACOS")
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@meta(unsupported_engines="duckdb")
|
|
165
|
+
def acosh(col: ColumnOrName) -> Column:
|
|
166
|
+
return Column.invoke_anonymous_function(col, "ACOSH")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@meta()
|
|
170
|
+
def asin(col: ColumnOrName) -> Column:
|
|
171
|
+
return Column.invoke_anonymous_function(col, "ASIN")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@meta(unsupported_engines="duckdb")
|
|
175
|
+
def asinh(col: ColumnOrName) -> Column:
|
|
176
|
+
return Column.invoke_anonymous_function(col, "ASINH")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@meta()
|
|
180
|
+
def atan(col: ColumnOrName) -> Column:
|
|
181
|
+
return Column.invoke_anonymous_function(col, "ATAN")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@meta()
|
|
185
|
+
def atan2(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
|
|
186
|
+
col1_value = lit(col1) if isinstance(col1, (int, float)) else col1
|
|
187
|
+
col2_value = lit(col2) if isinstance(col2, (int, float)) else col2
|
|
188
|
+
|
|
189
|
+
return Column.invoke_anonymous_function(col1_value, "ATAN2", col2_value)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
@meta(unsupported_engines="duckdb")
|
|
193
|
+
def atanh(col: ColumnOrName) -> Column:
|
|
194
|
+
return Column.invoke_anonymous_function(col, "ATANH")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@meta()
|
|
198
|
+
def cbrt(col: ColumnOrName) -> Column:
|
|
199
|
+
return Column.invoke_expression_over_column(col, expression.Cbrt)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@meta()
|
|
203
|
+
def ceil(col: ColumnOrName) -> Column:
|
|
204
|
+
return Column.invoke_expression_over_column(col, expression.Ceil)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
ceiling = ceil
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
@meta()
|
|
211
|
+
def cos(col: ColumnOrName) -> Column:
|
|
212
|
+
return Column.invoke_anonymous_function(col, "COS")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
@meta(unsupported_engines="duckdb")
|
|
216
|
+
def cosh(col: ColumnOrName) -> Column:
|
|
217
|
+
return Column.invoke_anonymous_function(col, "COSH")
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@meta()
|
|
221
|
+
def cot(col: ColumnOrName) -> Column:
|
|
222
|
+
return Column.invoke_anonymous_function(col, "COT")
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
226
|
+
def csc(col: ColumnOrName) -> Column:
|
|
227
|
+
return Column.invoke_anonymous_function(col, "CSC")
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
@meta()
|
|
231
|
+
def e() -> Column:
|
|
232
|
+
return Column(expression.Anonymous(this="e"))
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@meta()
|
|
236
|
+
def exp(col: ColumnOrName) -> Column:
|
|
237
|
+
return Column.invoke_expression_over_column(col, expression.Exp)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@meta()
|
|
241
|
+
def expm1(col: ColumnOrName) -> Column:
|
|
242
|
+
return Column.invoke_anonymous_function(col, "EXPM1")
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@meta()
|
|
246
|
+
def factorial(col: ColumnOrName) -> Column:
|
|
247
|
+
return Column.invoke_anonymous_function(col, "FACTORIAL")
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@meta()
|
|
251
|
+
def floor(col: ColumnOrName) -> Column:
|
|
252
|
+
return Column.invoke_expression_over_column(col, expression.Floor)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@meta()
|
|
256
|
+
def log10(col: ColumnOrName) -> Column:
|
|
257
|
+
return Column.invoke_expression_over_column(lit(10), expression.Log, expression=col)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@meta()
|
|
261
|
+
def log1p(col: ColumnOrName) -> Column:
|
|
262
|
+
return Column.invoke_anonymous_function(col, "LOG1P")
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@meta()
|
|
266
|
+
def log2(col: ColumnOrName) -> Column:
|
|
267
|
+
return Column.invoke_expression_over_column(lit(2), expression.Log, expression=col)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@meta()
|
|
271
|
+
def log(arg1: t.Union[ColumnOrName, float], arg2: t.Optional[ColumnOrName] = None) -> Column:
|
|
272
|
+
arg1_value = lit(arg1) if isinstance(arg1, (int, float)) else arg1
|
|
273
|
+
|
|
274
|
+
if arg2 is None:
|
|
275
|
+
return Column.invoke_expression_over_column(arg1_value, expression.Ln)
|
|
276
|
+
return Column.invoke_expression_over_column(arg1_value, expression.Log, expression=arg2)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@meta()
|
|
280
|
+
def rint(col: ColumnOrName) -> Column:
|
|
281
|
+
return Column.invoke_anonymous_function(col, "RINT")
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
285
|
+
def sec(col: ColumnOrName) -> Column:
|
|
286
|
+
return Column.invoke_anonymous_function(col, "SEC")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
@meta()
|
|
290
|
+
def signum(col: ColumnOrName) -> Column:
|
|
291
|
+
return Column.invoke_expression_over_column(col, expression.Sign)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@meta()
|
|
295
|
+
def sin(col: ColumnOrName) -> Column:
|
|
296
|
+
return Column.invoke_anonymous_function(col, "SIN")
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
@meta(unsupported_engines="duckdb")
|
|
300
|
+
def sinh(col: ColumnOrName) -> Column:
|
|
301
|
+
return Column.invoke_anonymous_function(col, "SINH")
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
@meta()
|
|
305
|
+
def tan(col: ColumnOrName) -> Column:
|
|
306
|
+
return Column.invoke_anonymous_function(col, "TAN")
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
@meta(unsupported_engines="duckdb")
|
|
310
|
+
def tanh(col: ColumnOrName) -> Column:
|
|
311
|
+
return Column.invoke_anonymous_function(col, "TANH")
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
@meta()
|
|
315
|
+
def degrees(col: ColumnOrName) -> Column:
|
|
316
|
+
return Column.invoke_anonymous_function(col, "DEGREES")
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
toDegrees = degrees
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
@meta()
|
|
323
|
+
def radians(col: ColumnOrName) -> Column:
|
|
324
|
+
return Column.invoke_anonymous_function(col, "RADIANS")
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
toRadians = radians
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
@meta()
|
|
331
|
+
def bitwiseNOT(col: ColumnOrName) -> Column:
|
|
332
|
+
return bitwise_not(col)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
@meta()
|
|
336
|
+
def bitwise_not(col: ColumnOrName) -> Column:
|
|
337
|
+
return Column.invoke_expression_over_column(col, expression.BitwiseNot)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
@meta()
|
|
341
|
+
def asc_nulls_first(col: ColumnOrName) -> Column:
|
|
342
|
+
return Column.ensure_col(col).asc_nulls_first()
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@meta()
|
|
346
|
+
def asc_nulls_last(col: ColumnOrName) -> Column:
|
|
347
|
+
return Column.ensure_col(col).asc_nulls_last()
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@meta()
|
|
351
|
+
def desc_nulls_first(col: ColumnOrName) -> Column:
|
|
352
|
+
return Column.ensure_col(col).desc_nulls_first()
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
@meta()
|
|
356
|
+
def desc_nulls_last(col: ColumnOrName) -> Column:
|
|
357
|
+
return Column.ensure_col(col).desc_nulls_last()
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
@meta()
|
|
361
|
+
def stddev(col: ColumnOrName) -> Column:
|
|
362
|
+
return Column.invoke_expression_over_column(col, expression.Stddev)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
@meta()
|
|
366
|
+
def stddev_samp(col: ColumnOrName) -> Column:
|
|
367
|
+
return Column.invoke_expression_over_column(col, expression.StddevSamp)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
@meta()
|
|
371
|
+
def stddev_pop(col: ColumnOrName) -> Column:
|
|
372
|
+
return Column.invoke_expression_over_column(col, expression.StddevPop)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
@meta()
|
|
376
|
+
def variance(col: ColumnOrName) -> Column:
|
|
377
|
+
return Column.invoke_expression_over_column(col, expression.Variance)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
@meta()
|
|
381
|
+
def var_samp(col: ColumnOrName) -> Column:
|
|
382
|
+
return Column.invoke_expression_over_column(col, expression.Variance)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
@meta()
|
|
386
|
+
def var_pop(col: ColumnOrName) -> Column:
|
|
387
|
+
return Column.invoke_expression_over_column(col, expression.VariancePop)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
391
|
+
def skewness(col: ColumnOrName) -> Column:
|
|
392
|
+
return Column.invoke_anonymous_function(col, "SKEWNESS")
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
396
|
+
def kurtosis(col: ColumnOrName) -> Column:
|
|
397
|
+
return Column.invoke_anonymous_function(col, "KURTOSIS")
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
@meta()
|
|
401
|
+
def collect_list(col: ColumnOrName) -> Column:
|
|
402
|
+
return Column.invoke_expression_over_column(col, expression.ArrayAgg)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
@meta()
|
|
406
|
+
def collect_set(col: ColumnOrName) -> Column:
|
|
407
|
+
return Column.invoke_expression_over_column(col, expression.ArrayUniqueAgg)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
411
|
+
def hypot(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
|
|
412
|
+
col1_value = lit(col1) if isinstance(col1, (int, float)) else col1
|
|
413
|
+
col2_value = lit(col2) if isinstance(col2, (int, float)) else col2
|
|
414
|
+
|
|
415
|
+
return Column.invoke_anonymous_function(col1_value, "HYPOT", col2_value)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
@meta()
|
|
419
|
+
def pow(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
|
|
420
|
+
col1_value = lit(col1) if isinstance(col1, (int, float)) else col1
|
|
421
|
+
col2_value = lit(col2) if isinstance(col2, (int, float)) else col2
|
|
422
|
+
|
|
423
|
+
return Column.invoke_expression_over_column(col1_value, expression.Pow, expression=col2_value)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
@meta()
|
|
427
|
+
def row_number() -> Column:
|
|
428
|
+
return Column(expression.Anonymous(this="ROW_NUMBER"))
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
@meta()
|
|
432
|
+
def dense_rank() -> Column:
|
|
433
|
+
return Column(expression.Anonymous(this="DENSE_RANK"))
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
@meta()
|
|
437
|
+
def rank() -> Column:
|
|
438
|
+
return Column(expression.Anonymous(this="RANK"))
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
@meta()
|
|
442
|
+
def cume_dist() -> Column:
|
|
443
|
+
return Column(expression.Anonymous(this="CUME_DIST"))
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
@meta()
|
|
447
|
+
def percent_rank() -> Column:
|
|
448
|
+
return Column(expression.Anonymous(this="PERCENT_RANK"))
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
@meta(unsupported_engines="postgres")
|
|
452
|
+
def approx_count_distinct(col: ColumnOrName, rsd: t.Optional[float] = None) -> Column:
|
|
453
|
+
if rsd is None:
|
|
454
|
+
return Column.invoke_expression_over_column(col, expression.ApproxDistinct)
|
|
455
|
+
return Column.invoke_expression_over_column(col, expression.ApproxDistinct, accuracy=lit(rsd))
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
approxCountDistinct = approx_count_distinct
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
@meta()
|
|
462
|
+
def coalesce(*cols: ColumnOrName) -> Column:
|
|
463
|
+
if len(cols) > 1:
|
|
464
|
+
return Column.invoke_expression_over_column(
|
|
465
|
+
cols[0], expression.Coalesce, expressions=cols[1:]
|
|
466
|
+
)
|
|
467
|
+
return Column.invoke_expression_over_column(cols[0], expression.Coalesce)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
@meta()
|
|
471
|
+
def corr(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
472
|
+
return Column.invoke_expression_over_column(col1, expression.Corr, expression=col2)
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
@meta()
|
|
476
|
+
def covar_pop(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
477
|
+
return Column.invoke_expression_over_column(col1, expression.CovarPop, expression=col2)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
@meta()
|
|
481
|
+
def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
482
|
+
return Column.invoke_expression_over_column(col1, expression.CovarSamp, expression=col2)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
486
|
+
def first(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
|
|
487
|
+
this = Column.invoke_expression_over_column(col, expression.First)
|
|
488
|
+
if ignorenulls:
|
|
489
|
+
return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
|
|
490
|
+
return this
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
494
|
+
def grouping_id(*cols: ColumnOrName) -> Column:
|
|
495
|
+
if not cols:
|
|
496
|
+
return Column.invoke_anonymous_function(None, "GROUPING_ID")
|
|
497
|
+
if len(cols) == 1:
|
|
498
|
+
return Column.invoke_anonymous_function(cols[0], "GROUPING_ID")
|
|
499
|
+
return Column.invoke_anonymous_function(cols[0], "GROUPING_ID", *cols[1:])
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
@meta()
|
|
503
|
+
def input_file_name() -> Column:
|
|
504
|
+
from sqlframe.base.session import _BaseSession
|
|
505
|
+
|
|
506
|
+
return Column(expression.Literal.string(_BaseSession()._last_loaded_file or ""))
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
@meta()
|
|
510
|
+
def isnan(col: ColumnOrName) -> Column:
|
|
511
|
+
return Column.invoke_expression_over_column(col, expression.IsNan)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
@meta()
|
|
515
|
+
def isnull(col: ColumnOrName) -> Column:
|
|
516
|
+
return Column.invoke_anonymous_function(col, "ISNULL")
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
520
|
+
def last(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
|
|
521
|
+
this = Column.invoke_expression_over_column(col, expression.Last)
|
|
522
|
+
if ignorenulls:
|
|
523
|
+
return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
|
|
524
|
+
return this
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
@meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
|
|
528
|
+
def monotonically_increasing_id() -> Column:
|
|
529
|
+
return Column.invoke_anonymous_function(None, "MONOTONICALLY_INCREASING_ID")
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
@meta()
|
|
533
|
+
def nanvl(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
534
|
+
return Column.invoke_anonymous_function(col1, "NANVL", col2)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
@meta(unsupported_engines="postgres")
|
|
538
|
+
def percentile_approx(
|
|
539
|
+
col: ColumnOrName,
|
|
540
|
+
percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
|
|
541
|
+
accuracy: t.Optional[t.Union[ColumnOrLiteral, int]] = None,
|
|
542
|
+
) -> Column:
|
|
543
|
+
if accuracy:
|
|
544
|
+
return Column.invoke_expression_over_column(
|
|
545
|
+
col, expression.ApproxQuantile, quantile=lit(percentage), accuracy=accuracy
|
|
546
|
+
)
|
|
547
|
+
return Column.invoke_expression_over_column(
|
|
548
|
+
col, expression.ApproxQuantile, quantile=lit(percentage)
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
@meta(unsupported_engines="bigquery")
|
|
553
|
+
def percentile(
|
|
554
|
+
col: ColumnOrName,
|
|
555
|
+
percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
|
|
556
|
+
frequency: t.Optional[ColumnOrLiteral] = None,
|
|
557
|
+
) -> Column:
|
|
558
|
+
if frequency:
|
|
559
|
+
logger.warning("Frequency is not supported in all engines")
|
|
560
|
+
return Column.invoke_expression_over_column(
|
|
561
|
+
col, expression.PercentileDisc, expression=lit(percentage)
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
@meta()
|
|
566
|
+
def rand(seed: t.Optional[int] = None) -> Column:
|
|
567
|
+
if seed is not None:
|
|
568
|
+
return Column.invoke_expression_over_column(None, expression.Rand, this=lit(seed))
|
|
569
|
+
return Column.invoke_expression_over_column(None, expression.Rand)
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
573
|
+
def randn(seed: t.Optional[int] = None) -> Column:
|
|
574
|
+
if seed is not None:
|
|
575
|
+
return Column.invoke_expression_over_column(None, expression.Randn, this=lit(seed))
|
|
576
|
+
return Column.invoke_expression_over_column(None, expression.Randn)
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
@meta()
|
|
580
|
+
def round(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
|
|
581
|
+
if scale is not None:
|
|
582
|
+
return Column.invoke_expression_over_column(col, expression.Round, decimals=scale)
|
|
583
|
+
return Column.invoke_expression_over_column(col, expression.Round)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
587
|
+
def bround(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
|
|
588
|
+
if scale is not None:
|
|
589
|
+
return Column.invoke_anonymous_function(col, "BROUND", lit(scale))
|
|
590
|
+
return Column.invoke_anonymous_function(col, "BROUND")
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
@meta()
|
|
594
|
+
def shiftleft(col: ColumnOrName, numBits: int) -> Column:
|
|
595
|
+
return Column.invoke_expression_over_column(
|
|
596
|
+
col, expression.BitwiseLeftShift, expression=lit(numBits)
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
shiftLeft = shiftleft
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
@meta()
|
|
604
|
+
def shiftright(col: ColumnOrName, numBits: int) -> Column:
|
|
605
|
+
return Column.invoke_expression_over_column(
|
|
606
|
+
col, expression.BitwiseRightShift, expression=lit(numBits)
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
shiftRight = shiftright
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
614
|
+
def shiftrightunsigned(col: ColumnOrName, numBits: int) -> Column:
|
|
615
|
+
return Column.invoke_anonymous_function(
|
|
616
|
+
Column.ensure_col(col).cast("bigint"), "SHIFTRIGHTUNSIGNED", lit(numBits)
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
shiftRightUnsigned = shiftrightunsigned
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
@meta()
|
|
624
|
+
def expr(str: str) -> Column:
|
|
625
|
+
return Column(str)
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
@meta(unsupported_engines=["postgres"])
|
|
629
|
+
def struct(col: t.Union[ColumnOrName, t.Iterable[ColumnOrName]], *cols: ColumnOrName) -> Column:
|
|
630
|
+
columns = ensure_list(col) + list(cols)
|
|
631
|
+
return Column.invoke_expression_over_column(None, expression.Struct, expressions=columns)
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
635
|
+
def conv(col: ColumnOrName, fromBase: int, toBase: int) -> Column:
|
|
636
|
+
return Column.invoke_anonymous_function(col, "CONV", lit(fromBase), lit(toBase))
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
@meta()
|
|
640
|
+
def lag(
|
|
641
|
+
col: ColumnOrName, offset: t.Optional[int] = 1, default: t.Optional[ColumnOrLiteral] = None
|
|
642
|
+
) -> Column:
|
|
643
|
+
if default is not None:
|
|
644
|
+
return Column.invoke_expression_over_column(
|
|
645
|
+
col, expression.Lag, offset=lit(offset), default=default
|
|
646
|
+
)
|
|
647
|
+
return Column.invoke_expression_over_column(col, expression.Lag, offset=lit(offset))
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
@meta()
|
|
651
|
+
def lead(
|
|
652
|
+
col: ColumnOrName, offset: t.Optional[int] = 1, default: t.Optional[t.Any] = None
|
|
653
|
+
) -> Column:
|
|
654
|
+
if default is not None:
|
|
655
|
+
return Column.invoke_expression_over_column(
|
|
656
|
+
col, expression.Lead, offset=lit(offset), default=default
|
|
657
|
+
)
|
|
658
|
+
return Column.invoke_expression_over_column(col, expression.Lead, offset=lit(offset))
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
@meta()
|
|
662
|
+
def nth_value(
|
|
663
|
+
col: ColumnOrName, offset: t.Optional[int] = 1, ignoreNulls: t.Optional[bool] = None
|
|
664
|
+
) -> Column:
|
|
665
|
+
this = Column.invoke_expression_over_column(col, expression.NthValue, offset=lit(offset))
|
|
666
|
+
if ignoreNulls is not None:
|
|
667
|
+
return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
|
|
668
|
+
return this
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
@meta()
|
|
672
|
+
def ntile(n: int) -> Column:
|
|
673
|
+
return Column.invoke_anonymous_function(None, "NTILE", lit(n))
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
@meta()
|
|
677
|
+
def current_date() -> Column:
|
|
678
|
+
return Column.invoke_expression_over_column(None, expression.CurrentDate)
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
@meta()
|
|
682
|
+
def current_timestamp() -> Column:
|
|
683
|
+
return Column.invoke_expression_over_column(None, expression.CurrentTimestamp)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
@meta()
|
|
687
|
+
def date_format(col: ColumnOrName, format: str) -> Column:
|
|
688
|
+
return Column.invoke_expression_over_column(
|
|
689
|
+
Column(expression.TimeStrToTime(this=Column.ensure_col(col).expression)),
|
|
690
|
+
expression.TimeToStr,
|
|
691
|
+
format=lit(format),
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
@meta()
|
|
696
|
+
def year(col: ColumnOrName) -> Column:
|
|
697
|
+
return Column.invoke_expression_over_column(
|
|
698
|
+
Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.Year
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
@meta()
|
|
703
|
+
def quarter(col: ColumnOrName) -> Column:
|
|
704
|
+
return Column(
|
|
705
|
+
expression.Anonymous(
|
|
706
|
+
this="QUARTER",
|
|
707
|
+
expressions=[expression.TsOrDsToDate(this=Column.ensure_col(col).expression)],
|
|
708
|
+
)
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
@meta()
|
|
713
|
+
def month(col: ColumnOrName) -> Column:
|
|
714
|
+
return Column.invoke_expression_over_column(
|
|
715
|
+
Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.Month
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
@meta()
|
|
720
|
+
def dayofweek(col: ColumnOrName) -> Column:
|
|
721
|
+
return Column.invoke_expression_over_column(
|
|
722
|
+
Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)),
|
|
723
|
+
expression.DayOfWeek,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
@meta()
|
|
728
|
+
def dayofmonth(col: ColumnOrName) -> Column:
|
|
729
|
+
return Column.invoke_expression_over_column(
|
|
730
|
+
Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)),
|
|
731
|
+
expression.DayOfMonth,
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
@meta()
|
|
736
|
+
def dayofyear(col: ColumnOrName) -> Column:
|
|
737
|
+
return Column.invoke_expression_over_column(
|
|
738
|
+
Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)),
|
|
739
|
+
expression.DayOfYear,
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
@meta()
|
|
744
|
+
def hour(col: ColumnOrName) -> Column:
|
|
745
|
+
return Column.invoke_anonymous_function(col, "HOUR")
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
@meta()
|
|
749
|
+
def minute(col: ColumnOrName) -> Column:
|
|
750
|
+
return Column.invoke_anonymous_function(col, "MINUTE")
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
@meta()
|
|
754
|
+
def second(col: ColumnOrName) -> Column:
|
|
755
|
+
return Column.invoke_anonymous_function(col, "SECOND")
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
@meta()
|
|
759
|
+
def weekofyear(col: ColumnOrName) -> Column:
|
|
760
|
+
return Column.invoke_expression_over_column(
|
|
761
|
+
Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)),
|
|
762
|
+
expression.WeekOfYear,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
@meta()
|
|
767
|
+
def make_date(year: ColumnOrName, month: ColumnOrName, day: ColumnOrName) -> Column:
|
|
768
|
+
return Column.invoke_anonymous_function(year, "MAKE_DATE", month, day)
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
@meta()
|
|
772
|
+
def date_add(
|
|
773
|
+
col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True
|
|
774
|
+
) -> Column:
|
|
775
|
+
if isinstance(days, int):
|
|
776
|
+
if days < 0:
|
|
777
|
+
return date_sub(col, days * -1)
|
|
778
|
+
days = lit(days)
|
|
779
|
+
result = Column.invoke_expression_over_column(
|
|
780
|
+
Column.ensure_col(col).cast("date"),
|
|
781
|
+
expression.DateAdd,
|
|
782
|
+
expression=days,
|
|
783
|
+
unit=expression.Var(this="DAY"),
|
|
784
|
+
)
|
|
785
|
+
if cast_as_date:
|
|
786
|
+
return result.cast("date")
|
|
787
|
+
return result
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
@meta()
|
|
791
|
+
def date_sub(
|
|
792
|
+
col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True
|
|
793
|
+
) -> Column:
|
|
794
|
+
"""
|
|
795
|
+
Non-standard argument: cast_as_date
|
|
796
|
+
"""
|
|
797
|
+
if isinstance(days, int):
|
|
798
|
+
if days < 0:
|
|
799
|
+
return date_add(col, days * -1)
|
|
800
|
+
days = lit(days)
|
|
801
|
+
result = Column.invoke_expression_over_column(
|
|
802
|
+
Column.ensure_col(col).cast("date"),
|
|
803
|
+
expression.DateSub,
|
|
804
|
+
expression=days,
|
|
805
|
+
unit=expression.Var(this="DAY"),
|
|
806
|
+
)
|
|
807
|
+
if cast_as_date:
|
|
808
|
+
return result.cast("date")
|
|
809
|
+
return result
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
@meta()
|
|
813
|
+
def date_diff(end: ColumnOrName, start: ColumnOrName) -> Column:
|
|
814
|
+
return Column.invoke_expression_over_column(
|
|
815
|
+
Column.ensure_col(end).cast("date"),
|
|
816
|
+
expression.DateDiff,
|
|
817
|
+
expression=Column.ensure_col(start).cast("date"),
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
@meta()
|
|
822
|
+
def add_months(
|
|
823
|
+
start: ColumnOrName, months: t.Union[ColumnOrName, int], cast_as_date: bool = True
|
|
824
|
+
) -> Column:
|
|
825
|
+
"""
|
|
826
|
+
Non-standard argument: cast_as_date
|
|
827
|
+
"""
|
|
828
|
+
start_col = Column(start).cast("date")
|
|
829
|
+
|
|
830
|
+
if isinstance(months, int):
|
|
831
|
+
if months < 0:
|
|
832
|
+
end_col = Column(
|
|
833
|
+
expression.Interval(
|
|
834
|
+
this=lit(months * -1).expression, unit=expression.Var(this="MONTH")
|
|
835
|
+
)
|
|
836
|
+
)
|
|
837
|
+
result = start_col - end_col
|
|
838
|
+
else:
|
|
839
|
+
end_col = Column(
|
|
840
|
+
expression.Interval(this=lit(months).expression, unit=expression.Var(this="MONTH"))
|
|
841
|
+
)
|
|
842
|
+
result = start_col + end_col
|
|
843
|
+
else:
|
|
844
|
+
end_col = Column(
|
|
845
|
+
expression.Interval(
|
|
846
|
+
this=Column.ensure_col(months).expression, unit=expression.Var(this="MONTH")
|
|
847
|
+
)
|
|
848
|
+
)
|
|
849
|
+
result = start_col + end_col
|
|
850
|
+
if cast_as_date:
|
|
851
|
+
return result.cast("date")
|
|
852
|
+
return result
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
@meta()
|
|
856
|
+
def months_between(
|
|
857
|
+
date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None
|
|
858
|
+
) -> Column:
|
|
859
|
+
if roundOff is None:
|
|
860
|
+
return Column.invoke_expression_over_column(
|
|
861
|
+
date1, expression.MonthsBetween, expression=date2
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
return Column.invoke_expression_over_column(
|
|
865
|
+
date1, expression.MonthsBetween, expression=date2, roundoff=lit(roundOff)
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
@meta()
|
|
870
|
+
def to_date(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
871
|
+
if format is not None:
|
|
872
|
+
return Column.invoke_expression_over_column(
|
|
873
|
+
col, expression.TsOrDsToDate, format=lit(format)
|
|
874
|
+
)
|
|
875
|
+
return Column.invoke_expression_over_column(col, expression.TsOrDsToDate)
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
@meta()
|
|
879
|
+
def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
880
|
+
if format is not None:
|
|
881
|
+
return Column.invoke_expression_over_column(col, expression.StrToTime, format=lit(format))
|
|
882
|
+
|
|
883
|
+
return Column.ensure_col(col).cast("timestamp")
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
@meta()
|
|
887
|
+
def trunc(col: ColumnOrName, format: str) -> Column:
|
|
888
|
+
return Column.invoke_expression_over_column(
|
|
889
|
+
Column(col).cast("date"), expression.DateTrunc, unit=lit(format)
|
|
890
|
+
).cast("date")
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
@meta()
|
|
894
|
+
def date_trunc(format: str, timestamp: ColumnOrName) -> Column:
|
|
895
|
+
return Column.invoke_expression_over_column(
|
|
896
|
+
Column(timestamp).cast("timestamp"), expression.TimestampTrunc, unit=lit(format)
|
|
897
|
+
).cast("timestamp")
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
901
|
+
def next_day(col: ColumnOrName, dayOfWeek: str) -> Column:
|
|
902
|
+
return Column.invoke_anonymous_function(col, "NEXT_DAY", lit(dayOfWeek))
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
906
|
+
def last_day(col: ColumnOrName) -> Column:
|
|
907
|
+
return Column.invoke_expression_over_column(col, expression.LastDay)
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
@meta()
|
|
911
|
+
def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
912
|
+
from sqlframe.base.session import _BaseSession
|
|
913
|
+
|
|
914
|
+
if format is None:
|
|
915
|
+
format = _BaseSession().DEFAULT_TIME_FORMAT
|
|
916
|
+
return Column.invoke_expression_over_column(col, expression.UnixToStr, format=lit(format))
|
|
917
|
+
|
|
918
|
+
|
|
919
|
+
@meta()
|
|
920
|
+
def unix_timestamp(
|
|
921
|
+
timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None
|
|
922
|
+
) -> Column:
|
|
923
|
+
from sqlframe.base.session import _BaseSession
|
|
924
|
+
|
|
925
|
+
if format is None:
|
|
926
|
+
format = _BaseSession().DEFAULT_TIME_FORMAT
|
|
927
|
+
return Column.invoke_expression_over_column(
|
|
928
|
+
timestamp, expression.StrToUnix, format=lit(format)
|
|
929
|
+
).cast("bigint")
|
|
930
|
+
|
|
931
|
+
|
|
932
|
+
@meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
|
|
933
|
+
def from_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column:
|
|
934
|
+
tz_column = tz if isinstance(tz, Column) else lit(tz)
|
|
935
|
+
return Column.invoke_expression_over_column(timestamp, expression.AtTimeZone, zone=tz_column)
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
@meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
|
|
939
|
+
def to_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column:
|
|
940
|
+
tz_column = tz if isinstance(tz, Column) else lit(tz)
|
|
941
|
+
return Column.invoke_expression_over_column(timestamp, expression.FromTimeZone, zone=tz_column)
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
@meta()
|
|
945
|
+
def timestamp_seconds(col: ColumnOrName) -> Column:
|
|
946
|
+
return Column.invoke_expression_over_column(col, expression.UnixToTime)
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
@meta(unsupported_engines=["duckdb", "postgres", "bigquery", "redshift", "snowflake", "spark"])
|
|
950
|
+
def window(
|
|
951
|
+
timeColumn: ColumnOrName,
|
|
952
|
+
windowDuration: str,
|
|
953
|
+
slideDuration: t.Optional[str] = None,
|
|
954
|
+
startTime: t.Optional[str] = None,
|
|
955
|
+
) -> Column:
|
|
956
|
+
if slideDuration is not None and startTime is not None:
|
|
957
|
+
value = Column.invoke_anonymous_function(
|
|
958
|
+
timeColumn, "WINDOW", lit(windowDuration), lit(slideDuration), lit(startTime)
|
|
959
|
+
)
|
|
960
|
+
elif slideDuration is not None:
|
|
961
|
+
value = Column.invoke_anonymous_function(
|
|
962
|
+
timeColumn, "WINDOW", lit(windowDuration), lit(slideDuration)
|
|
963
|
+
)
|
|
964
|
+
elif startTime is not None:
|
|
965
|
+
value = Column.invoke_anonymous_function(
|
|
966
|
+
timeColumn, "WINDOW", lit(windowDuration), lit(windowDuration), lit(startTime)
|
|
967
|
+
)
|
|
968
|
+
else:
|
|
969
|
+
value = Column.invoke_anonymous_function(timeColumn, "WINDOW", lit(windowDuration))
|
|
970
|
+
return value
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
@meta(unsupported_engines=["duckdb", "postgres", "bigquery", "redshift", "snowflake", "spark"])
|
|
974
|
+
def session_window(timeColumn: ColumnOrName, gapDuration: ColumnOrName) -> Column:
|
|
975
|
+
gap_duration_column = gapDuration if isinstance(gapDuration, Column) else lit(gapDuration)
|
|
976
|
+
return Column.invoke_anonymous_function(timeColumn, "SESSION_WINDOW", gap_duration_column)
|
|
977
|
+
|
|
978
|
+
|
|
979
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
980
|
+
def crc32(col: ColumnOrName) -> Column:
|
|
981
|
+
return Column.invoke_anonymous_function(col, "CRC32")
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
@meta()
|
|
985
|
+
def md5(col: ColumnOrName) -> Column:
|
|
986
|
+
return Column.invoke_expression_over_column(col, expression.MD5)
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
990
|
+
def sha1(col: ColumnOrName) -> Column:
|
|
991
|
+
return Column.invoke_expression_over_column(col, expression.SHA)
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
995
|
+
def sha2(col: ColumnOrName, numBits: int) -> Column:
|
|
996
|
+
return Column.invoke_expression_over_column(col, expression.SHA2, length=lit(numBits))
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
@meta(unsupported_engines=["postgres"])
|
|
1000
|
+
def hash(*cols: ColumnOrName) -> Column:
|
|
1001
|
+
args = cols[1:] if len(cols) > 1 else []
|
|
1002
|
+
return Column.invoke_anonymous_function(cols[0], "HASH", *args)
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1006
|
+
def xxhash64(*cols: ColumnOrName) -> Column:
|
|
1007
|
+
args = cols[1:] if len(cols) > 1 else []
|
|
1008
|
+
return Column.invoke_anonymous_function(cols[0], "XXHASH64", *args)
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
@meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
|
|
1012
|
+
def assert_true(col: ColumnOrName, errorMsg: t.Optional[ColumnOrName] = None) -> Column:
|
|
1013
|
+
if errorMsg is not None:
|
|
1014
|
+
error_msg_col = errorMsg if isinstance(errorMsg, Column) else lit(errorMsg)
|
|
1015
|
+
return Column.invoke_anonymous_function(col, "ASSERT_TRUE", error_msg_col)
|
|
1016
|
+
return Column.invoke_anonymous_function(col, "ASSERT_TRUE")
|
|
1017
|
+
|
|
1018
|
+
|
|
1019
|
+
@meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
|
|
1020
|
+
def raise_error(errorMsg: ColumnOrName) -> Column:
|
|
1021
|
+
error_msg_col = errorMsg if isinstance(errorMsg, Column) else lit(errorMsg)
|
|
1022
|
+
return Column.invoke_anonymous_function(error_msg_col, "RAISE_ERROR")
|
|
1023
|
+
|
|
1024
|
+
|
|
1025
|
+
@meta()
|
|
1026
|
+
def upper(col: ColumnOrName) -> Column:
|
|
1027
|
+
return Column.invoke_expression_over_column(col, expression.Upper)
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
@meta()
|
|
1031
|
+
def lower(col: ColumnOrName) -> Column:
|
|
1032
|
+
return Column.invoke_expression_over_column(col, expression.Lower)
|
|
1033
|
+
|
|
1034
|
+
|
|
1035
|
+
@meta()
|
|
1036
|
+
def ascii(col: ColumnOrName) -> Column:
|
|
1037
|
+
return Column.invoke_anonymous_function(col, "ASCII")
|
|
1038
|
+
|
|
1039
|
+
|
|
1040
|
+
@meta()
|
|
1041
|
+
def base64(col: ColumnOrName) -> Column:
|
|
1042
|
+
return Column.invoke_expression_over_column(col, expression.ToBase64)
|
|
1043
|
+
|
|
1044
|
+
|
|
1045
|
+
@meta()
|
|
1046
|
+
def unbase64(col: ColumnOrName) -> Column:
|
|
1047
|
+
return Column.invoke_expression_over_column(col, expression.FromBase64)
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
@meta()
|
|
1051
|
+
def ltrim(col: ColumnOrName) -> Column:
|
|
1052
|
+
return Column.invoke_anonymous_function(col, "LTRIM")
|
|
1053
|
+
|
|
1054
|
+
|
|
1055
|
+
@meta()
|
|
1056
|
+
def rtrim(col: ColumnOrName) -> Column:
|
|
1057
|
+
return Column.invoke_anonymous_function(col, "RTRIM")
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
@meta()
|
|
1061
|
+
def trim(col: ColumnOrName) -> Column:
|
|
1062
|
+
return Column.invoke_expression_over_column(col, expression.Trim)
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
@meta()
|
|
1066
|
+
def concat_ws(sep: str, *cols: ColumnOrName) -> Column:
|
|
1067
|
+
return Column.invoke_expression_over_column(
|
|
1068
|
+
None, expression.ConcatWs, expressions=[lit(sep)] + list(cols)
|
|
1069
|
+
)
|
|
1070
|
+
|
|
1071
|
+
|
|
1072
|
+
@meta(unsupported_engines="bigquery")
|
|
1073
|
+
def decode(col: ColumnOrName, charset: str) -> Column:
|
|
1074
|
+
return Column.invoke_expression_over_column(
|
|
1075
|
+
col, expression.Decode, charset=expression.Literal.string(charset)
|
|
1076
|
+
)
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
@meta(unsupported_engines="bigquery")
|
|
1080
|
+
def encode(col: ColumnOrName, charset: str) -> Column:
|
|
1081
|
+
return Column.invoke_expression_over_column(
|
|
1082
|
+
col, expression.Encode, charset=expression.Literal.string(charset)
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
|
|
1086
|
+
@meta(unsupported_engines="duckdb")
|
|
1087
|
+
def format_number(col: ColumnOrName, d: int) -> Column:
|
|
1088
|
+
return Column.invoke_anonymous_function(col, "FORMAT_NUMBER", lit(d))
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
@meta()
|
|
1092
|
+
def format_string(format: str, *cols: ColumnOrName) -> Column:
|
|
1093
|
+
format_col = lit(format)
|
|
1094
|
+
columns = [Column.ensure_col(x) for x in cols]
|
|
1095
|
+
return Column.invoke_anonymous_function(format_col, "FORMAT_STRING", *columns)
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
@meta()
|
|
1099
|
+
def instr(col: ColumnOrName, substr: str) -> Column:
|
|
1100
|
+
return Column.invoke_expression_over_column(col, expression.StrPosition, substr=lit(substr))
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
@meta()
|
|
1104
|
+
def overlay(
|
|
1105
|
+
src: ColumnOrName,
|
|
1106
|
+
replace: ColumnOrName,
|
|
1107
|
+
pos: t.Union[ColumnOrName, int],
|
|
1108
|
+
len: t.Optional[t.Union[ColumnOrName, int]] = None,
|
|
1109
|
+
) -> Column:
|
|
1110
|
+
pos_value = lit(pos) if isinstance(pos, int) else pos
|
|
1111
|
+
if len is not None:
|
|
1112
|
+
len_value = lit(len) if isinstance(len, int) else len
|
|
1113
|
+
return Column.invoke_anonymous_function(src, "OVERLAY", replace, pos_value, len_value)
|
|
1114
|
+
return Column.invoke_anonymous_function(src, "OVERLAY", replace, pos_value)
|
|
1115
|
+
|
|
1116
|
+
|
|
1117
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1118
|
+
def sentences(
|
|
1119
|
+
string: ColumnOrName,
|
|
1120
|
+
language: t.Optional[ColumnOrName] = None,
|
|
1121
|
+
country: t.Optional[ColumnOrName] = None,
|
|
1122
|
+
) -> Column:
|
|
1123
|
+
if language is not None and country is not None:
|
|
1124
|
+
return Column.invoke_anonymous_function(string, "SENTENCES", language, country)
|
|
1125
|
+
if language is not None:
|
|
1126
|
+
return Column.invoke_anonymous_function(string, "SENTENCES", language)
|
|
1127
|
+
if country is not None:
|
|
1128
|
+
return Column.invoke_anonymous_function(string, "SENTENCES", lit("en"), country)
|
|
1129
|
+
return Column.invoke_anonymous_function(string, "SENTENCES")
|
|
1130
|
+
|
|
1131
|
+
|
|
1132
|
+
@meta()
|
|
1133
|
+
def substring(str: ColumnOrName, pos: int, len: int) -> Column:
|
|
1134
|
+
return Column.ensure_col(str).substr(pos, len)
|
|
1135
|
+
|
|
1136
|
+
|
|
1137
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
1138
|
+
def substring_index(str: ColumnOrName, delim: str, count: int) -> Column:
|
|
1139
|
+
return Column.invoke_anonymous_function(str, "SUBSTRING_INDEX", lit(delim), lit(count))
|
|
1140
|
+
|
|
1141
|
+
|
|
1142
|
+
@meta(unsupported_engines="bigquery")
|
|
1143
|
+
def levenshtein(
|
|
1144
|
+
left: ColumnOrName, right: ColumnOrName, threshold: t.Optional[int] = None
|
|
1145
|
+
) -> Column:
|
|
1146
|
+
value: t.Union[expression.Case, expression.Levenshtein] = expression.Levenshtein(
|
|
1147
|
+
this=Column.ensure_col(left).expression,
|
|
1148
|
+
expression=Column.ensure_col(right).expression,
|
|
1149
|
+
)
|
|
1150
|
+
if threshold is not None:
|
|
1151
|
+
value = (
|
|
1152
|
+
expression.case()
|
|
1153
|
+
.when(expression.LTE(this=value, expression=lit(threshold).expression), value)
|
|
1154
|
+
.else_(lit(-1).expression)
|
|
1155
|
+
)
|
|
1156
|
+
return Column(value)
|
|
1157
|
+
|
|
1158
|
+
|
|
1159
|
+
@meta(unsupported_engines="bigquery")
|
|
1160
|
+
def locate(substr: str, str: ColumnOrName, pos: t.Optional[int] = None) -> Column:
|
|
1161
|
+
substr_col = lit(substr)
|
|
1162
|
+
if pos is not None:
|
|
1163
|
+
return Column.invoke_expression_over_column(
|
|
1164
|
+
str, expression.StrPosition, substr=substr_col, position=pos
|
|
1165
|
+
)
|
|
1166
|
+
return Column.invoke_expression_over_column(str, expression.StrPosition, substr=substr_col)
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
@meta()
|
|
1170
|
+
def lpad(col: ColumnOrName, len: int, pad: str) -> Column:
|
|
1171
|
+
return Column.invoke_anonymous_function(col, "LPAD", lit(len), lit(pad))
|
|
1172
|
+
|
|
1173
|
+
|
|
1174
|
+
@meta()
|
|
1175
|
+
def rpad(col: ColumnOrName, len: int, pad: str) -> Column:
|
|
1176
|
+
return Column.invoke_anonymous_function(col, "RPAD", lit(len), lit(pad))
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
@meta()
|
|
1180
|
+
def repeat(col: ColumnOrName, n: int) -> Column:
|
|
1181
|
+
return Column.invoke_expression_over_column(col, expression.Repeat, times=lit(n))
|
|
1182
|
+
|
|
1183
|
+
|
|
1184
|
+
@meta()
|
|
1185
|
+
def split(str: ColumnOrName, pattern: str, limit: t.Optional[int] = None) -> Column:
|
|
1186
|
+
if limit is not None:
|
|
1187
|
+
return Column.invoke_expression_over_column(
|
|
1188
|
+
str, expression.RegexpSplit, expression=lit(pattern), limit=lit(limit)
|
|
1189
|
+
)
|
|
1190
|
+
return Column.invoke_expression_over_column(
|
|
1191
|
+
str, expression.RegexpSplit, expression=lit(pattern)
|
|
1192
|
+
)
|
|
1193
|
+
|
|
1194
|
+
|
|
1195
|
+
@meta(unsupported_engines="postgres")
|
|
1196
|
+
def regexp_extract(str: ColumnOrName, pattern: str, idx: t.Optional[int] = None) -> Column:
|
|
1197
|
+
if idx is not None:
|
|
1198
|
+
return Column.invoke_expression_over_column(
|
|
1199
|
+
str,
|
|
1200
|
+
expression.RegexpExtract,
|
|
1201
|
+
expression=lit(pattern),
|
|
1202
|
+
group=lit(idx),
|
|
1203
|
+
)
|
|
1204
|
+
return Column.invoke_expression_over_column(
|
|
1205
|
+
str, expression.RegexpExtract, expression=lit(pattern)
|
|
1206
|
+
)
|
|
1207
|
+
|
|
1208
|
+
|
|
1209
|
+
@meta()
|
|
1210
|
+
def regexp_replace(
|
|
1211
|
+
str: ColumnOrName, pattern: str, replacement: str, position: t.Optional[int] = None
|
|
1212
|
+
) -> Column:
|
|
1213
|
+
if position is not None:
|
|
1214
|
+
return Column.invoke_expression_over_column(
|
|
1215
|
+
str,
|
|
1216
|
+
expression.RegexpReplace,
|
|
1217
|
+
expression=lit(pattern),
|
|
1218
|
+
replacement=lit(replacement),
|
|
1219
|
+
position=lit(position),
|
|
1220
|
+
)
|
|
1221
|
+
return Column.invoke_expression_over_column(
|
|
1222
|
+
str,
|
|
1223
|
+
expression.RegexpReplace,
|
|
1224
|
+
expression=lit(pattern),
|
|
1225
|
+
replacement=lit(replacement),
|
|
1226
|
+
)
|
|
1227
|
+
|
|
1228
|
+
|
|
1229
|
+
@meta(unsupported_engines="duckdb")
|
|
1230
|
+
def initcap(col: ColumnOrName) -> Column:
|
|
1231
|
+
return Column.invoke_expression_over_column(col, expression.Initcap)
|
|
1232
|
+
|
|
1233
|
+
|
|
1234
|
+
@meta()
|
|
1235
|
+
def soundex(col: ColumnOrName) -> Column:
|
|
1236
|
+
return Column.invoke_anonymous_function(col, "SOUNDEX")
|
|
1237
|
+
|
|
1238
|
+
|
|
1239
|
+
@meta(unsupported_engines="postgres")
|
|
1240
|
+
def bin(col: ColumnOrName) -> Column:
|
|
1241
|
+
return Column.invoke_anonymous_function(col, "BIN")
|
|
1242
|
+
|
|
1243
|
+
|
|
1244
|
+
@meta(unsupported_engines="postgres")
|
|
1245
|
+
def hex(col: ColumnOrName) -> Column:
|
|
1246
|
+
return Column.invoke_expression_over_column(col, expression.Hex)
|
|
1247
|
+
|
|
1248
|
+
|
|
1249
|
+
@meta(unsupported_engines="postgres")
|
|
1250
|
+
def unhex(col: ColumnOrName) -> Column:
|
|
1251
|
+
return Column.invoke_expression_over_column(col, expression.Unhex)
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
@meta()
|
|
1255
|
+
def length(col: ColumnOrName) -> Column:
|
|
1256
|
+
return Column.invoke_expression_over_column(col, expression.Length)
|
|
1257
|
+
|
|
1258
|
+
|
|
1259
|
+
@meta(unsupported_engines="duckdb")
|
|
1260
|
+
def octet_length(col: ColumnOrName) -> Column:
|
|
1261
|
+
return Column.invoke_anonymous_function(col, "OCTET_LENGTH")
|
|
1262
|
+
|
|
1263
|
+
|
|
1264
|
+
@meta()
|
|
1265
|
+
def bit_length(col: ColumnOrName) -> Column:
|
|
1266
|
+
return Column.invoke_anonymous_function(col, "BIT_LENGTH")
|
|
1267
|
+
|
|
1268
|
+
|
|
1269
|
+
@meta()
|
|
1270
|
+
def translate(srcCol: ColumnOrName, matching: str, replace: str) -> Column:
|
|
1271
|
+
return Column.invoke_anonymous_function(srcCol, "TRANSLATE", lit(matching), lit(replace))
|
|
1272
|
+
|
|
1273
|
+
|
|
1274
|
+
@meta()
|
|
1275
|
+
def array(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
|
|
1276
|
+
columns = _flatten(cols) if not isinstance(cols[0], (str, Column)) else cols
|
|
1277
|
+
return Column.invoke_expression_over_column(None, expression.Array, expressions=columns)
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
1281
|
+
def create_map(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
|
|
1282
|
+
cols = list(_flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore
|
|
1283
|
+
return Column.invoke_expression_over_column(
|
|
1284
|
+
None,
|
|
1285
|
+
expression.VarMap,
|
|
1286
|
+
keys=array(*cols[::2]).expression,
|
|
1287
|
+
values=array(*cols[1::2]).expression,
|
|
1288
|
+
)
|
|
1289
|
+
|
|
1290
|
+
|
|
1291
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
1292
|
+
def map_from_arrays(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
1293
|
+
return Column.invoke_expression_over_column(None, expression.Map, keys=col1, values=col2)
|
|
1294
|
+
|
|
1295
|
+
|
|
1296
|
+
@meta()
|
|
1297
|
+
def array_contains(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1298
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
1299
|
+
return Column.invoke_expression_over_column(
|
|
1300
|
+
col, expression.ArrayContains, expression=value_col.expression
|
|
1301
|
+
)
|
|
1302
|
+
|
|
1303
|
+
|
|
1304
|
+
@meta(unsupported_engines="bigquery")
|
|
1305
|
+
def arrays_overlap(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
1306
|
+
return Column.invoke_expression_over_column(col1, expression.ArrayOverlaps, expression=col2)
|
|
1307
|
+
|
|
1308
|
+
|
|
1309
|
+
@meta()
|
|
1310
|
+
def slice(
|
|
1311
|
+
x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int]
|
|
1312
|
+
) -> Column:
|
|
1313
|
+
start_col = lit(start) if isinstance(start, int) else start
|
|
1314
|
+
length_col = lit(length) if isinstance(length, int) else length
|
|
1315
|
+
return Column.invoke_anonymous_function(x, "SLICE", start_col, length_col)
|
|
1316
|
+
|
|
1317
|
+
|
|
1318
|
+
@meta()
|
|
1319
|
+
def array_join(
|
|
1320
|
+
col: ColumnOrName, delimiter: str, null_replacement: t.Optional[str] = None
|
|
1321
|
+
) -> Column:
|
|
1322
|
+
if null_replacement is not None:
|
|
1323
|
+
return Column.invoke_expression_over_column(
|
|
1324
|
+
col, expression.ArrayToString, expression=lit(delimiter), null=lit(null_replacement)
|
|
1325
|
+
)
|
|
1326
|
+
return Column.invoke_expression_over_column(
|
|
1327
|
+
col, expression.ArrayToString, expression=lit(delimiter)
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
|
|
1331
|
+
@meta()
|
|
1332
|
+
def concat(*cols: ColumnOrName) -> Column:
|
|
1333
|
+
return Column.invoke_expression_over_column(None, expression.Concat, expressions=cols)
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
@meta()
|
|
1337
|
+
def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1338
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
1339
|
+
# Some engines return NULL if item is not found but Spark expects 0 so we coalesce to 0
|
|
1340
|
+
return coalesce(Column.invoke_anonymous_function(col, "ARRAY_POSITION", value_col), lit(0))
|
|
1341
|
+
|
|
1342
|
+
|
|
1343
|
+
@meta()
|
|
1344
|
+
def element_at(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1345
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
1346
|
+
return Column.invoke_anonymous_function(col, "ELEMENT_AT", value_col)
|
|
1347
|
+
|
|
1348
|
+
|
|
1349
|
+
@meta()
|
|
1350
|
+
def array_remove(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1351
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
1352
|
+
return Column.invoke_anonymous_function(col, "ARRAY_REMOVE", value_col)
|
|
1353
|
+
|
|
1354
|
+
|
|
1355
|
+
@meta(unsupported_engines="postgres")
|
|
1356
|
+
def array_distinct(col: ColumnOrName) -> Column:
|
|
1357
|
+
return Column.invoke_anonymous_function(col, "ARRAY_DISTINCT")
|
|
1358
|
+
|
|
1359
|
+
|
|
1360
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
1361
|
+
def array_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
1362
|
+
return Column.invoke_anonymous_function(col1, "ARRAY_INTERSECT", Column.ensure_col(col2))
|
|
1363
|
+
|
|
1364
|
+
|
|
1365
|
+
@meta(unsupported_engines=["postgres"])
|
|
1366
|
+
def array_union(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
1367
|
+
return Column.invoke_anonymous_function(col1, "ARRAY_UNION", Column.ensure_col(col2))
|
|
1368
|
+
|
|
1369
|
+
|
|
1370
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1371
|
+
def array_except(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
1372
|
+
return Column.invoke_anonymous_function(col1, "ARRAY_EXCEPT", Column.ensure_col(col2))
|
|
1373
|
+
|
|
1374
|
+
|
|
1375
|
+
@meta()
|
|
1376
|
+
def explode(col: ColumnOrName) -> Column:
|
|
1377
|
+
return Column.invoke_expression_over_column(col, expression.Explode)
|
|
1378
|
+
|
|
1379
|
+
|
|
1380
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
1381
|
+
def posexplode(col: ColumnOrName) -> Column:
|
|
1382
|
+
return Column.invoke_expression_over_column(col, expression.Posexplode)
|
|
1383
|
+
|
|
1384
|
+
|
|
1385
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
1386
|
+
def explode_outer(col: ColumnOrName) -> Column:
|
|
1387
|
+
return Column.invoke_expression_over_column(col, expression.ExplodeOuter)
|
|
1388
|
+
|
|
1389
|
+
|
|
1390
|
+
@meta(unsupported_engines=["duckdb", "postgres"])
|
|
1391
|
+
def posexplode_outer(col: ColumnOrName) -> Column:
|
|
1392
|
+
return Column.invoke_expression_over_column(col, expression.PosexplodeOuter)
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
@meta()
|
|
1396
|
+
def get_json_object(col: ColumnOrName, path: str) -> Column:
|
|
1397
|
+
return Column.invoke_expression_over_column(col, expression.JSONExtract, expression=lit(path))
|
|
1398
|
+
|
|
1399
|
+
|
|
1400
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1401
|
+
def json_tuple(col: ColumnOrName, *fields: str) -> Column:
|
|
1402
|
+
return Column.invoke_anonymous_function(col, "JSON_TUPLE", *[lit(field) for field in fields])
|
|
1403
|
+
|
|
1404
|
+
|
|
1405
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1406
|
+
def from_json(
|
|
1407
|
+
col: ColumnOrName,
|
|
1408
|
+
schema: t.Union[ArrayType, StructType, Column, str],
|
|
1409
|
+
options: t.Optional[t.Dict[str, str]] = None,
|
|
1410
|
+
) -> Column:
|
|
1411
|
+
from sqlframe.base.types import ArrayType, StructType
|
|
1412
|
+
|
|
1413
|
+
if isinstance(schema, (ArrayType, StructType)):
|
|
1414
|
+
schema = schema.simpleString()
|
|
1415
|
+
schema = schema if isinstance(schema, Column) else lit(schema)
|
|
1416
|
+
if options is not None:
|
|
1417
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1418
|
+
return Column.invoke_anonymous_function(col, "FROM_JSON", schema, options_col)
|
|
1419
|
+
return Column.invoke_anonymous_function(col, "FROM_JSON", schema)
|
|
1420
|
+
|
|
1421
|
+
|
|
1422
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1423
|
+
def to_json(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1424
|
+
if options is not None:
|
|
1425
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1426
|
+
return Column.invoke_expression_over_column(col, expression.JSONFormat, options=options_col)
|
|
1427
|
+
return Column.invoke_expression_over_column(col, expression.JSONFormat)
|
|
1428
|
+
|
|
1429
|
+
|
|
1430
|
+
@meta(unsupported_engines="*")
|
|
1431
|
+
def schema_of_json(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1432
|
+
if options is not None:
|
|
1433
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1434
|
+
return Column.invoke_anonymous_function(col, "SCHEMA_OF_JSON", options_col)
|
|
1435
|
+
return Column.invoke_anonymous_function(col, "SCHEMA_OF_JSON")
|
|
1436
|
+
|
|
1437
|
+
|
|
1438
|
+
@meta(unsupported_engines="*")
|
|
1439
|
+
def schema_of_csv(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1440
|
+
if options is not None:
|
|
1441
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1442
|
+
return Column.invoke_anonymous_function(col, "SCHEMA_OF_CSV", options_col)
|
|
1443
|
+
return Column.invoke_anonymous_function(col, "SCHEMA_OF_CSV")
|
|
1444
|
+
|
|
1445
|
+
|
|
1446
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1447
|
+
def to_csv(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1448
|
+
if options is not None:
|
|
1449
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1450
|
+
return Column.invoke_anonymous_function(col, "TO_CSV", options_col)
|
|
1451
|
+
return Column.invoke_anonymous_function(col, "TO_CSV")
|
|
1452
|
+
|
|
1453
|
+
|
|
1454
|
+
@meta()
|
|
1455
|
+
def size(col: ColumnOrName) -> Column:
|
|
1456
|
+
return Column.invoke_expression_over_column(col, expression.ArraySize)
|
|
1457
|
+
|
|
1458
|
+
|
|
1459
|
+
@meta()
|
|
1460
|
+
def array_min(col: ColumnOrName) -> Column:
|
|
1461
|
+
return Column.invoke_anonymous_function(col, "ARRAY_MIN")
|
|
1462
|
+
|
|
1463
|
+
|
|
1464
|
+
@meta()
|
|
1465
|
+
def array_max(col: ColumnOrName) -> Column:
|
|
1466
|
+
return Column.invoke_anonymous_function(col, "ARRAY_MAX")
|
|
1467
|
+
|
|
1468
|
+
|
|
1469
|
+
@meta(unsupported_engines="postgres")
|
|
1470
|
+
def sort_array(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column:
|
|
1471
|
+
if asc is not None:
|
|
1472
|
+
return Column.invoke_expression_over_column(col, expression.SortArray, asc=lit(asc))
|
|
1473
|
+
return Column.invoke_expression_over_column(col, expression.SortArray)
|
|
1474
|
+
|
|
1475
|
+
|
|
1476
|
+
@meta(unsupported_engines="postgres")
|
|
1477
|
+
def array_sort(
|
|
1478
|
+
col: ColumnOrName,
|
|
1479
|
+
comparator: t.Optional[t.Union[t.Callable[[Column, Column], Column]]] = None,
|
|
1480
|
+
) -> Column:
|
|
1481
|
+
if comparator is not None:
|
|
1482
|
+
f_expression = _get_lambda_from_func(comparator)
|
|
1483
|
+
return Column.invoke_expression_over_column(
|
|
1484
|
+
col, expression.ArraySort, expression=f_expression
|
|
1485
|
+
)
|
|
1486
|
+
return Column.invoke_expression_over_column(col, expression.ArraySort)
|
|
1487
|
+
|
|
1488
|
+
|
|
1489
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1490
|
+
def shuffle(col: ColumnOrName) -> Column:
|
|
1491
|
+
return Column.invoke_anonymous_function(col, "SHUFFLE")
|
|
1492
|
+
|
|
1493
|
+
|
|
1494
|
+
@meta()
|
|
1495
|
+
def reverse(col: ColumnOrName) -> Column:
|
|
1496
|
+
return Column.invoke_anonymous_function(col, "REVERSE")
|
|
1497
|
+
|
|
1498
|
+
|
|
1499
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
1500
|
+
def flatten(col: ColumnOrName) -> Column:
|
|
1501
|
+
return Column.invoke_expression_over_column(col, expression.Flatten)
|
|
1502
|
+
|
|
1503
|
+
|
|
1504
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1505
|
+
def map_keys(col: ColumnOrName) -> Column:
|
|
1506
|
+
return Column.invoke_anonymous_function(col, "MAP_KEYS")
|
|
1507
|
+
|
|
1508
|
+
|
|
1509
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1510
|
+
def map_values(col: ColumnOrName) -> Column:
|
|
1511
|
+
return Column.invoke_anonymous_function(col, "MAP_VALUES")
|
|
1512
|
+
|
|
1513
|
+
|
|
1514
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1515
|
+
def map_entries(col: ColumnOrName) -> Column:
|
|
1516
|
+
return Column.invoke_anonymous_function(col, "MAP_ENTRIES")
|
|
1517
|
+
|
|
1518
|
+
|
|
1519
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1520
|
+
def map_from_entries(col: ColumnOrName) -> Column:
|
|
1521
|
+
return Column.invoke_expression_over_column(col, expression.MapFromEntries)
|
|
1522
|
+
|
|
1523
|
+
|
|
1524
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1525
|
+
def array_repeat(col: ColumnOrName, count: t.Union[ColumnOrName, int]) -> Column:
|
|
1526
|
+
count_col = count if isinstance(count, Column) else lit(count)
|
|
1527
|
+
return Column.invoke_anonymous_function(col, "ARRAY_REPEAT", count_col)
|
|
1528
|
+
|
|
1529
|
+
|
|
1530
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1531
|
+
def arrays_zip(*cols: ColumnOrName) -> Column:
|
|
1532
|
+
if len(cols) == 1:
|
|
1533
|
+
return Column.invoke_anonymous_function(cols[0], "ARRAYS_ZIP")
|
|
1534
|
+
return Column.invoke_anonymous_function(cols[0], "ARRAYS_ZIP", *cols[1:])
|
|
1535
|
+
|
|
1536
|
+
|
|
1537
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1538
|
+
def map_concat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
|
|
1539
|
+
columns = list(flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore
|
|
1540
|
+
if len(columns) == 1:
|
|
1541
|
+
return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT")
|
|
1542
|
+
return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT", *columns[1:])
|
|
1543
|
+
|
|
1544
|
+
|
|
1545
|
+
@meta(unsupported_engines="postgres")
|
|
1546
|
+
def sequence(
|
|
1547
|
+
start: ColumnOrName, stop: ColumnOrName, step: t.Optional[ColumnOrName] = None
|
|
1548
|
+
) -> Column:
|
|
1549
|
+
if step is not None:
|
|
1550
|
+
return Column.invoke_anonymous_function(start, "SEQUENCE", stop, step)
|
|
1551
|
+
return Column.invoke_anonymous_function(start, "SEQUENCE", stop)
|
|
1552
|
+
|
|
1553
|
+
|
|
1554
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1555
|
+
def from_csv(
|
|
1556
|
+
col: ColumnOrName,
|
|
1557
|
+
schema: t.Union[Column, str],
|
|
1558
|
+
options: t.Optional[t.Dict[str, str]] = None,
|
|
1559
|
+
) -> Column:
|
|
1560
|
+
schema = schema if isinstance(schema, Column) else lit(schema)
|
|
1561
|
+
if options is not None:
|
|
1562
|
+
option_cols = create_map([lit(x) for x in _flatten(options.items())])
|
|
1563
|
+
return Column.invoke_anonymous_function(col, "FROM_CSV", schema, option_cols)
|
|
1564
|
+
return Column.invoke_anonymous_function(col, "FROM_CSV", schema)
|
|
1565
|
+
|
|
1566
|
+
|
|
1567
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1568
|
+
def aggregate(
|
|
1569
|
+
col: ColumnOrName,
|
|
1570
|
+
initialValue: ColumnOrName,
|
|
1571
|
+
merge: t.Callable[[Column, Column], Column],
|
|
1572
|
+
finish: t.Optional[t.Callable[[Column], Column]] = None,
|
|
1573
|
+
) -> Column:
|
|
1574
|
+
merge_exp = _get_lambda_from_func(merge)
|
|
1575
|
+
if finish is not None:
|
|
1576
|
+
finish_exp = _get_lambda_from_func(finish)
|
|
1577
|
+
return Column.invoke_expression_over_column(
|
|
1578
|
+
col,
|
|
1579
|
+
expression.Reduce,
|
|
1580
|
+
initial=initialValue,
|
|
1581
|
+
merge=Column(merge_exp),
|
|
1582
|
+
finish=Column(finish_exp),
|
|
1583
|
+
)
|
|
1584
|
+
return Column.invoke_expression_over_column(
|
|
1585
|
+
col, expression.Reduce, initial=initialValue, merge=Column(merge_exp)
|
|
1586
|
+
)
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1590
|
+
def transform(
|
|
1591
|
+
col: ColumnOrName,
|
|
1592
|
+
f: t.Union[t.Callable[[Column], Column], t.Callable[[Column, Column], Column]],
|
|
1593
|
+
) -> Column:
|
|
1594
|
+
f_expression = _get_lambda_from_func(f)
|
|
1595
|
+
return Column.invoke_expression_over_column(
|
|
1596
|
+
col, expression.Transform, expression=Column(f_expression)
|
|
1597
|
+
)
|
|
1598
|
+
|
|
1599
|
+
|
|
1600
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1601
|
+
def exists(col: ColumnOrName, f: t.Callable[[Column], Column]) -> Column:
|
|
1602
|
+
f_expression = _get_lambda_from_func(f)
|
|
1603
|
+
return Column.invoke_anonymous_function(col, "EXISTS", Column(f_expression))
|
|
1604
|
+
|
|
1605
|
+
|
|
1606
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1607
|
+
def forall(col: ColumnOrName, f: t.Callable[[Column], Column]) -> Column:
|
|
1608
|
+
f_expression = _get_lambda_from_func(f)
|
|
1609
|
+
return Column.invoke_anonymous_function(col, "FORALL", Column(f_expression))
|
|
1610
|
+
|
|
1611
|
+
|
|
1612
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1613
|
+
def filter(
|
|
1614
|
+
col: ColumnOrName,
|
|
1615
|
+
f: t.Union[t.Callable[[Column], Column], t.Callable[[Column, Column], Column]],
|
|
1616
|
+
) -> Column:
|
|
1617
|
+
f_expression = _get_lambda_from_func(f)
|
|
1618
|
+
return Column.invoke_expression_over_column(
|
|
1619
|
+
col, expression.ArrayFilter, expression=f_expression
|
|
1620
|
+
)
|
|
1621
|
+
|
|
1622
|
+
|
|
1623
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1624
|
+
def zip_with(
|
|
1625
|
+
left: ColumnOrName, right: ColumnOrName, f: t.Callable[[Column, Column], Column]
|
|
1626
|
+
) -> Column:
|
|
1627
|
+
f_expression = _get_lambda_from_func(f)
|
|
1628
|
+
return Column.invoke_anonymous_function(left, "ZIP_WITH", right, Column(f_expression))
|
|
1629
|
+
|
|
1630
|
+
|
|
1631
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1632
|
+
def transform_keys(col: ColumnOrName, f: t.Union[t.Callable[[Column, Column], Column]]) -> Column:
|
|
1633
|
+
f_expression = _get_lambda_from_func(f)
|
|
1634
|
+
return Column.invoke_anonymous_function(col, "TRANSFORM_KEYS", Column(f_expression))
|
|
1635
|
+
|
|
1636
|
+
|
|
1637
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1638
|
+
def transform_values(col: ColumnOrName, f: t.Union[t.Callable[[Column, Column], Column]]) -> Column:
|
|
1639
|
+
f_expression = _get_lambda_from_func(f)
|
|
1640
|
+
return Column.invoke_anonymous_function(col, "TRANSFORM_VALUES", Column(f_expression))
|
|
1641
|
+
|
|
1642
|
+
|
|
1643
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1644
|
+
def map_filter(col: ColumnOrName, f: t.Union[t.Callable[[Column, Column], Column]]) -> Column:
|
|
1645
|
+
f_expression = _get_lambda_from_func(f)
|
|
1646
|
+
return Column.invoke_anonymous_function(col, "MAP_FILTER", Column(f_expression))
|
|
1647
|
+
|
|
1648
|
+
|
|
1649
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
1650
|
+
def map_zip_with(
|
|
1651
|
+
col1: ColumnOrName,
|
|
1652
|
+
col2: ColumnOrName,
|
|
1653
|
+
f: t.Union[t.Callable[[Column, Column, Column], Column]],
|
|
1654
|
+
) -> Column:
|
|
1655
|
+
f_expression = _get_lambda_from_func(f)
|
|
1656
|
+
return Column.invoke_anonymous_function(col1, "MAP_ZIP_WITH", col2, Column(f_expression))
|
|
1657
|
+
|
|
1658
|
+
|
|
1659
|
+
@meta(unsupported_engines="postgres")
|
|
1660
|
+
def typeof(col: ColumnOrName) -> Column:
|
|
1661
|
+
return Column.invoke_anonymous_function(col, "TYPEOF")
|
|
1662
|
+
|
|
1663
|
+
|
|
1664
|
+
@meta()
|
|
1665
|
+
def _lambda_quoted(value: str) -> t.Optional[bool]:
|
|
1666
|
+
return False if value == "_" else None
|
|
1667
|
+
|
|
1668
|
+
|
|
1669
|
+
@meta()
|
|
1670
|
+
def _get_lambda_from_func(lambda_expression: t.Callable):
|
|
1671
|
+
variables = [
|
|
1672
|
+
expression.to_identifier(x, quoted=_lambda_quoted(x))
|
|
1673
|
+
for x in lambda_expression.__code__.co_varnames
|
|
1674
|
+
]
|
|
1675
|
+
return expression.Lambda(
|
|
1676
|
+
this=lambda_expression(*[Column(x) for x in variables]).expression,
|
|
1677
|
+
expressions=variables,
|
|
1678
|
+
)
|