sqlframe 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. sqlframe/__init__.py +0 -0
  2. sqlframe/_version.py +16 -0
  3. sqlframe/base/__init__.py +0 -0
  4. sqlframe/base/_typing.py +39 -0
  5. sqlframe/base/catalog.py +1163 -0
  6. sqlframe/base/column.py +388 -0
  7. sqlframe/base/dataframe.py +1519 -0
  8. sqlframe/base/decorators.py +51 -0
  9. sqlframe/base/exceptions.py +14 -0
  10. sqlframe/base/function_alternatives.py +1055 -0
  11. sqlframe/base/functions.py +1678 -0
  12. sqlframe/base/group.py +102 -0
  13. sqlframe/base/mixins/__init__.py +0 -0
  14. sqlframe/base/mixins/catalog_mixins.py +419 -0
  15. sqlframe/base/mixins/readwriter_mixins.py +118 -0
  16. sqlframe/base/normalize.py +84 -0
  17. sqlframe/base/operations.py +87 -0
  18. sqlframe/base/readerwriter.py +679 -0
  19. sqlframe/base/session.py +585 -0
  20. sqlframe/base/transforms.py +13 -0
  21. sqlframe/base/types.py +418 -0
  22. sqlframe/base/util.py +242 -0
  23. sqlframe/base/window.py +139 -0
  24. sqlframe/bigquery/__init__.py +23 -0
  25. sqlframe/bigquery/catalog.py +255 -0
  26. sqlframe/bigquery/column.py +1 -0
  27. sqlframe/bigquery/dataframe.py +54 -0
  28. sqlframe/bigquery/functions.py +378 -0
  29. sqlframe/bigquery/group.py +14 -0
  30. sqlframe/bigquery/readwriter.py +29 -0
  31. sqlframe/bigquery/session.py +89 -0
  32. sqlframe/bigquery/types.py +1 -0
  33. sqlframe/bigquery/window.py +1 -0
  34. sqlframe/duckdb/__init__.py +20 -0
  35. sqlframe/duckdb/catalog.py +108 -0
  36. sqlframe/duckdb/column.py +1 -0
  37. sqlframe/duckdb/dataframe.py +55 -0
  38. sqlframe/duckdb/functions.py +47 -0
  39. sqlframe/duckdb/group.py +14 -0
  40. sqlframe/duckdb/readwriter.py +111 -0
  41. sqlframe/duckdb/session.py +65 -0
  42. sqlframe/duckdb/types.py +1 -0
  43. sqlframe/duckdb/window.py +1 -0
  44. sqlframe/postgres/__init__.py +23 -0
  45. sqlframe/postgres/catalog.py +106 -0
  46. sqlframe/postgres/column.py +1 -0
  47. sqlframe/postgres/dataframe.py +54 -0
  48. sqlframe/postgres/functions.py +61 -0
  49. sqlframe/postgres/group.py +14 -0
  50. sqlframe/postgres/readwriter.py +29 -0
  51. sqlframe/postgres/session.py +68 -0
  52. sqlframe/postgres/types.py +1 -0
  53. sqlframe/postgres/window.py +1 -0
  54. sqlframe/redshift/__init__.py +23 -0
  55. sqlframe/redshift/catalog.py +127 -0
  56. sqlframe/redshift/column.py +1 -0
  57. sqlframe/redshift/dataframe.py +54 -0
  58. sqlframe/redshift/functions.py +18 -0
  59. sqlframe/redshift/group.py +14 -0
  60. sqlframe/redshift/readwriter.py +29 -0
  61. sqlframe/redshift/session.py +53 -0
  62. sqlframe/redshift/types.py +1 -0
  63. sqlframe/redshift/window.py +1 -0
  64. sqlframe/snowflake/__init__.py +26 -0
  65. sqlframe/snowflake/catalog.py +134 -0
  66. sqlframe/snowflake/column.py +1 -0
  67. sqlframe/snowflake/dataframe.py +54 -0
  68. sqlframe/snowflake/functions.py +18 -0
  69. sqlframe/snowflake/group.py +14 -0
  70. sqlframe/snowflake/readwriter.py +29 -0
  71. sqlframe/snowflake/session.py +53 -0
  72. sqlframe/snowflake/types.py +1 -0
  73. sqlframe/snowflake/window.py +1 -0
  74. sqlframe/spark/__init__.py +23 -0
  75. sqlframe/spark/catalog.py +1028 -0
  76. sqlframe/spark/column.py +1 -0
  77. sqlframe/spark/dataframe.py +54 -0
  78. sqlframe/spark/functions.py +22 -0
  79. sqlframe/spark/group.py +14 -0
  80. sqlframe/spark/readwriter.py +29 -0
  81. sqlframe/spark/session.py +90 -0
  82. sqlframe/spark/types.py +1 -0
  83. sqlframe/spark/window.py +1 -0
  84. sqlframe/standalone/__init__.py +26 -0
  85. sqlframe/standalone/catalog.py +13 -0
  86. sqlframe/standalone/column.py +1 -0
  87. sqlframe/standalone/dataframe.py +36 -0
  88. sqlframe/standalone/functions.py +1 -0
  89. sqlframe/standalone/group.py +14 -0
  90. sqlframe/standalone/readwriter.py +19 -0
  91. sqlframe/standalone/session.py +40 -0
  92. sqlframe/standalone/types.py +1 -0
  93. sqlframe/standalone/window.py +1 -0
  94. sqlframe-1.1.3.dist-info/LICENSE +21 -0
  95. sqlframe-1.1.3.dist-info/METADATA +172 -0
  96. sqlframe-1.1.3.dist-info/RECORD +98 -0
  97. sqlframe-1.1.3.dist-info/WHEEL +5 -0
  98. sqlframe-1.1.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1678 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import typing as t
7
+
8
+ from sqlglot import exp as expression
9
+ from sqlglot.helper import ensure_list
10
+ from sqlglot.helper import flatten as _flatten
11
+
12
+ from sqlframe.base.column import Column
13
+ from sqlframe.base.decorators import func_metadata as meta
14
+
15
+ if t.TYPE_CHECKING:
16
+ from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
17
+ from sqlframe.base.session import DF
18
+ from sqlframe.base.types import ArrayType, StructType
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @meta()
24
+ def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
25
+ from sqlframe.base.session import _BaseSession
26
+
27
+ dialect = _BaseSession().input_dialect
28
+ if isinstance(column_name, str):
29
+ return Column(
30
+ expression.to_column(column_name, dialect=dialect).transform(
31
+ dialect.normalize_identifier
32
+ )
33
+ )
34
+ return Column(column_name)
35
+
36
+
37
+ @meta()
38
+ def lit(value: t.Optional[t.Any] = None) -> Column:
39
+ if isinstance(value, str):
40
+ return Column(expression.Literal.string(value))
41
+ return Column(value)
42
+
43
+
44
+ @meta()
45
+ def greatest(*cols: ColumnOrName) -> Column:
46
+ if len(cols) > 1:
47
+ return Column.invoke_expression_over_column(
48
+ cols[0], expression.Greatest, expressions=cols[1:]
49
+ )
50
+ return Column.invoke_expression_over_column(cols[0], expression.Greatest)
51
+
52
+
53
+ @meta()
54
+ def least(*cols: ColumnOrName) -> Column:
55
+ if len(cols) > 1:
56
+ return Column.invoke_expression_over_column(cols[0], expression.Least, expressions=cols[1:])
57
+ return Column.invoke_expression_over_column(cols[0], expression.Least)
58
+
59
+
60
+ @meta(unsupported_engines="bigquery")
61
+ def count_distinct(col: ColumnOrName, *cols: ColumnOrName) -> Column:
62
+ columns = [Column.ensure_col(x) for x in [col] + list(cols)]
63
+ return Column(
64
+ expression.Count(this=expression.Distinct(expressions=[x.expression for x in columns]))
65
+ )
66
+
67
+
68
+ countDistinct = count_distinct
69
+
70
+
71
+ @meta()
72
+ def when(condition: Column, value: t.Any) -> Column:
73
+ true_value = value if isinstance(value, Column) else lit(value)
74
+ return Column(
75
+ expression.Case(
76
+ ifs=[expression.If(this=condition.column_expression, true=true_value.column_expression)]
77
+ )
78
+ )
79
+
80
+
81
+ @meta()
82
+ def asc(col: ColumnOrName) -> Column:
83
+ return Column.ensure_col(col).asc()
84
+
85
+
86
+ @meta()
87
+ def desc(col: ColumnOrName):
88
+ return Column.ensure_col(col).desc()
89
+
90
+
91
+ @meta(unsupported_engines="*")
92
+ def broadcast(df: DF) -> DF:
93
+ return df.hint("broadcast")
94
+
95
+
96
+ @meta()
97
+ def sqrt(col: ColumnOrName) -> Column:
98
+ return Column.invoke_expression_over_column(col, expression.Sqrt)
99
+
100
+
101
+ @meta()
102
+ def abs(col: ColumnOrName) -> Column:
103
+ return Column.invoke_expression_over_column(col, expression.Abs)
104
+
105
+
106
+ @meta()
107
+ def max(col: ColumnOrName) -> Column:
108
+ return Column.invoke_expression_over_column(col, expression.Max)
109
+
110
+
111
+ @meta()
112
+ def min(col: ColumnOrName) -> Column:
113
+ return Column.invoke_expression_over_column(col, expression.Min)
114
+
115
+
116
+ @meta(unsupported_engines="postgres")
117
+ def max_by(col: ColumnOrName, ord: ColumnOrName) -> Column:
118
+ return Column.invoke_expression_over_column(col, expression.ArgMax, expression=ord)
119
+
120
+
121
+ @meta(unsupported_engines="postgres")
122
+ def min_by(col: ColumnOrName, ord: ColumnOrName) -> Column:
123
+ return Column.invoke_expression_over_column(col, expression.ArgMin, expression=ord)
124
+
125
+
126
+ @meta()
127
+ def count(col: ColumnOrName) -> Column:
128
+ return Column.invoke_expression_over_column(col, expression.Count)
129
+
130
+
131
+ @meta()
132
+ def sum(col: ColumnOrName) -> Column:
133
+ return Column.invoke_expression_over_column(col, expression.Sum)
134
+
135
+
136
+ @meta()
137
+ def avg(col: ColumnOrName) -> Column:
138
+ return Column.invoke_expression_over_column(col, expression.Avg)
139
+
140
+
141
+ mean = avg
142
+
143
+
144
+ @meta()
145
+ def sumDistinct(col: ColumnOrName) -> Column:
146
+ return Column(
147
+ expression.Sum(this=expression.Distinct(expressions=[Column.ensure_col(col).expression]))
148
+ )
149
+
150
+
151
+ sum_distinct = sumDistinct
152
+
153
+
154
+ @meta(unsupported_engines="*")
155
+ def product(col: ColumnOrName) -> Column:
156
+ raise NotImplementedError("Product is not currently implemented")
157
+
158
+
159
+ @meta()
160
+ def acos(col: ColumnOrName) -> Column:
161
+ return Column.invoke_anonymous_function(col, "ACOS")
162
+
163
+
164
+ @meta(unsupported_engines="duckdb")
165
+ def acosh(col: ColumnOrName) -> Column:
166
+ return Column.invoke_anonymous_function(col, "ACOSH")
167
+
168
+
169
+ @meta()
170
+ def asin(col: ColumnOrName) -> Column:
171
+ return Column.invoke_anonymous_function(col, "ASIN")
172
+
173
+
174
+ @meta(unsupported_engines="duckdb")
175
+ def asinh(col: ColumnOrName) -> Column:
176
+ return Column.invoke_anonymous_function(col, "ASINH")
177
+
178
+
179
+ @meta()
180
+ def atan(col: ColumnOrName) -> Column:
181
+ return Column.invoke_anonymous_function(col, "ATAN")
182
+
183
+
184
+ @meta()
185
+ def atan2(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
186
+ col1_value = lit(col1) if isinstance(col1, (int, float)) else col1
187
+ col2_value = lit(col2) if isinstance(col2, (int, float)) else col2
188
+
189
+ return Column.invoke_anonymous_function(col1_value, "ATAN2", col2_value)
190
+
191
+
192
+ @meta(unsupported_engines="duckdb")
193
+ def atanh(col: ColumnOrName) -> Column:
194
+ return Column.invoke_anonymous_function(col, "ATANH")
195
+
196
+
197
+ @meta()
198
+ def cbrt(col: ColumnOrName) -> Column:
199
+ return Column.invoke_expression_over_column(col, expression.Cbrt)
200
+
201
+
202
+ @meta()
203
+ def ceil(col: ColumnOrName) -> Column:
204
+ return Column.invoke_expression_over_column(col, expression.Ceil)
205
+
206
+
207
+ ceiling = ceil
208
+
209
+
210
+ @meta()
211
+ def cos(col: ColumnOrName) -> Column:
212
+ return Column.invoke_anonymous_function(col, "COS")
213
+
214
+
215
+ @meta(unsupported_engines="duckdb")
216
+ def cosh(col: ColumnOrName) -> Column:
217
+ return Column.invoke_anonymous_function(col, "COSH")
218
+
219
+
220
+ @meta()
221
+ def cot(col: ColumnOrName) -> Column:
222
+ return Column.invoke_anonymous_function(col, "COT")
223
+
224
+
225
+ @meta(unsupported_engines=["duckdb", "postgres"])
226
+ def csc(col: ColumnOrName) -> Column:
227
+ return Column.invoke_anonymous_function(col, "CSC")
228
+
229
+
230
+ @meta()
231
+ def e() -> Column:
232
+ return Column(expression.Anonymous(this="e"))
233
+
234
+
235
+ @meta()
236
+ def exp(col: ColumnOrName) -> Column:
237
+ return Column.invoke_expression_over_column(col, expression.Exp)
238
+
239
+
240
+ @meta()
241
+ def expm1(col: ColumnOrName) -> Column:
242
+ return Column.invoke_anonymous_function(col, "EXPM1")
243
+
244
+
245
+ @meta()
246
+ def factorial(col: ColumnOrName) -> Column:
247
+ return Column.invoke_anonymous_function(col, "FACTORIAL")
248
+
249
+
250
+ @meta()
251
+ def floor(col: ColumnOrName) -> Column:
252
+ return Column.invoke_expression_over_column(col, expression.Floor)
253
+
254
+
255
+ @meta()
256
+ def log10(col: ColumnOrName) -> Column:
257
+ return Column.invoke_expression_over_column(lit(10), expression.Log, expression=col)
258
+
259
+
260
+ @meta()
261
+ def log1p(col: ColumnOrName) -> Column:
262
+ return Column.invoke_anonymous_function(col, "LOG1P")
263
+
264
+
265
+ @meta()
266
+ def log2(col: ColumnOrName) -> Column:
267
+ return Column.invoke_expression_over_column(lit(2), expression.Log, expression=col)
268
+
269
+
270
+ @meta()
271
+ def log(arg1: t.Union[ColumnOrName, float], arg2: t.Optional[ColumnOrName] = None) -> Column:
272
+ arg1_value = lit(arg1) if isinstance(arg1, (int, float)) else arg1
273
+
274
+ if arg2 is None:
275
+ return Column.invoke_expression_over_column(arg1_value, expression.Ln)
276
+ return Column.invoke_expression_over_column(arg1_value, expression.Log, expression=arg2)
277
+
278
+
279
+ @meta()
280
+ def rint(col: ColumnOrName) -> Column:
281
+ return Column.invoke_anonymous_function(col, "RINT")
282
+
283
+
284
+ @meta(unsupported_engines=["duckdb", "postgres"])
285
+ def sec(col: ColumnOrName) -> Column:
286
+ return Column.invoke_anonymous_function(col, "SEC")
287
+
288
+
289
+ @meta()
290
+ def signum(col: ColumnOrName) -> Column:
291
+ return Column.invoke_expression_over_column(col, expression.Sign)
292
+
293
+
294
+ @meta()
295
+ def sin(col: ColumnOrName) -> Column:
296
+ return Column.invoke_anonymous_function(col, "SIN")
297
+
298
+
299
+ @meta(unsupported_engines="duckdb")
300
+ def sinh(col: ColumnOrName) -> Column:
301
+ return Column.invoke_anonymous_function(col, "SINH")
302
+
303
+
304
+ @meta()
305
+ def tan(col: ColumnOrName) -> Column:
306
+ return Column.invoke_anonymous_function(col, "TAN")
307
+
308
+
309
+ @meta(unsupported_engines="duckdb")
310
+ def tanh(col: ColumnOrName) -> Column:
311
+ return Column.invoke_anonymous_function(col, "TANH")
312
+
313
+
314
+ @meta()
315
+ def degrees(col: ColumnOrName) -> Column:
316
+ return Column.invoke_anonymous_function(col, "DEGREES")
317
+
318
+
319
+ toDegrees = degrees
320
+
321
+
322
+ @meta()
323
+ def radians(col: ColumnOrName) -> Column:
324
+ return Column.invoke_anonymous_function(col, "RADIANS")
325
+
326
+
327
+ toRadians = radians
328
+
329
+
330
+ @meta()
331
+ def bitwiseNOT(col: ColumnOrName) -> Column:
332
+ return bitwise_not(col)
333
+
334
+
335
+ @meta()
336
+ def bitwise_not(col: ColumnOrName) -> Column:
337
+ return Column.invoke_expression_over_column(col, expression.BitwiseNot)
338
+
339
+
340
+ @meta()
341
+ def asc_nulls_first(col: ColumnOrName) -> Column:
342
+ return Column.ensure_col(col).asc_nulls_first()
343
+
344
+
345
+ @meta()
346
+ def asc_nulls_last(col: ColumnOrName) -> Column:
347
+ return Column.ensure_col(col).asc_nulls_last()
348
+
349
+
350
+ @meta()
351
+ def desc_nulls_first(col: ColumnOrName) -> Column:
352
+ return Column.ensure_col(col).desc_nulls_first()
353
+
354
+
355
+ @meta()
356
+ def desc_nulls_last(col: ColumnOrName) -> Column:
357
+ return Column.ensure_col(col).desc_nulls_last()
358
+
359
+
360
+ @meta()
361
+ def stddev(col: ColumnOrName) -> Column:
362
+ return Column.invoke_expression_over_column(col, expression.Stddev)
363
+
364
+
365
+ @meta()
366
+ def stddev_samp(col: ColumnOrName) -> Column:
367
+ return Column.invoke_expression_over_column(col, expression.StddevSamp)
368
+
369
+
370
+ @meta()
371
+ def stddev_pop(col: ColumnOrName) -> Column:
372
+ return Column.invoke_expression_over_column(col, expression.StddevPop)
373
+
374
+
375
+ @meta()
376
+ def variance(col: ColumnOrName) -> Column:
377
+ return Column.invoke_expression_over_column(col, expression.Variance)
378
+
379
+
380
+ @meta()
381
+ def var_samp(col: ColumnOrName) -> Column:
382
+ return Column.invoke_expression_over_column(col, expression.Variance)
383
+
384
+
385
+ @meta()
386
+ def var_pop(col: ColumnOrName) -> Column:
387
+ return Column.invoke_expression_over_column(col, expression.VariancePop)
388
+
389
+
390
+ @meta(unsupported_engines=["bigquery", "postgres"])
391
+ def skewness(col: ColumnOrName) -> Column:
392
+ return Column.invoke_anonymous_function(col, "SKEWNESS")
393
+
394
+
395
+ @meta(unsupported_engines=["bigquery", "postgres"])
396
+ def kurtosis(col: ColumnOrName) -> Column:
397
+ return Column.invoke_anonymous_function(col, "KURTOSIS")
398
+
399
+
400
+ @meta()
401
+ def collect_list(col: ColumnOrName) -> Column:
402
+ return Column.invoke_expression_over_column(col, expression.ArrayAgg)
403
+
404
+
405
+ @meta()
406
+ def collect_set(col: ColumnOrName) -> Column:
407
+ return Column.invoke_expression_over_column(col, expression.ArrayUniqueAgg)
408
+
409
+
410
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
411
+ def hypot(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
412
+ col1_value = lit(col1) if isinstance(col1, (int, float)) else col1
413
+ col2_value = lit(col2) if isinstance(col2, (int, float)) else col2
414
+
415
+ return Column.invoke_anonymous_function(col1_value, "HYPOT", col2_value)
416
+
417
+
418
+ @meta()
419
+ def pow(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
420
+ col1_value = lit(col1) if isinstance(col1, (int, float)) else col1
421
+ col2_value = lit(col2) if isinstance(col2, (int, float)) else col2
422
+
423
+ return Column.invoke_expression_over_column(col1_value, expression.Pow, expression=col2_value)
424
+
425
+
426
+ @meta()
427
+ def row_number() -> Column:
428
+ return Column(expression.Anonymous(this="ROW_NUMBER"))
429
+
430
+
431
+ @meta()
432
+ def dense_rank() -> Column:
433
+ return Column(expression.Anonymous(this="DENSE_RANK"))
434
+
435
+
436
+ @meta()
437
+ def rank() -> Column:
438
+ return Column(expression.Anonymous(this="RANK"))
439
+
440
+
441
+ @meta()
442
+ def cume_dist() -> Column:
443
+ return Column(expression.Anonymous(this="CUME_DIST"))
444
+
445
+
446
+ @meta()
447
+ def percent_rank() -> Column:
448
+ return Column(expression.Anonymous(this="PERCENT_RANK"))
449
+
450
+
451
+ @meta(unsupported_engines="postgres")
452
+ def approx_count_distinct(col: ColumnOrName, rsd: t.Optional[float] = None) -> Column:
453
+ if rsd is None:
454
+ return Column.invoke_expression_over_column(col, expression.ApproxDistinct)
455
+ return Column.invoke_expression_over_column(col, expression.ApproxDistinct, accuracy=lit(rsd))
456
+
457
+
458
+ approxCountDistinct = approx_count_distinct
459
+
460
+
461
+ @meta()
462
+ def coalesce(*cols: ColumnOrName) -> Column:
463
+ if len(cols) > 1:
464
+ return Column.invoke_expression_over_column(
465
+ cols[0], expression.Coalesce, expressions=cols[1:]
466
+ )
467
+ return Column.invoke_expression_over_column(cols[0], expression.Coalesce)
468
+
469
+
470
+ @meta()
471
+ def corr(col1: ColumnOrName, col2: ColumnOrName) -> Column:
472
+ return Column.invoke_expression_over_column(col1, expression.Corr, expression=col2)
473
+
474
+
475
+ @meta()
476
+ def covar_pop(col1: ColumnOrName, col2: ColumnOrName) -> Column:
477
+ return Column.invoke_expression_over_column(col1, expression.CovarPop, expression=col2)
478
+
479
+
480
+ @meta()
481
+ def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column:
482
+ return Column.invoke_expression_over_column(col1, expression.CovarSamp, expression=col2)
483
+
484
+
485
+ @meta(unsupported_engines=["bigquery", "postgres"])
486
+ def first(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
487
+ this = Column.invoke_expression_over_column(col, expression.First)
488
+ if ignorenulls:
489
+ return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
490
+ return this
491
+
492
+
493
+ @meta(unsupported_engines=["bigquery", "postgres"])
494
+ def grouping_id(*cols: ColumnOrName) -> Column:
495
+ if not cols:
496
+ return Column.invoke_anonymous_function(None, "GROUPING_ID")
497
+ if len(cols) == 1:
498
+ return Column.invoke_anonymous_function(cols[0], "GROUPING_ID")
499
+ return Column.invoke_anonymous_function(cols[0], "GROUPING_ID", *cols[1:])
500
+
501
+
502
+ @meta()
503
+ def input_file_name() -> Column:
504
+ from sqlframe.base.session import _BaseSession
505
+
506
+ return Column(expression.Literal.string(_BaseSession()._last_loaded_file or ""))
507
+
508
+
509
+ @meta()
510
+ def isnan(col: ColumnOrName) -> Column:
511
+ return Column.invoke_expression_over_column(col, expression.IsNan)
512
+
513
+
514
+ @meta()
515
+ def isnull(col: ColumnOrName) -> Column:
516
+ return Column.invoke_anonymous_function(col, "ISNULL")
517
+
518
+
519
+ @meta(unsupported_engines=["bigquery", "postgres"])
520
+ def last(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
521
+ this = Column.invoke_expression_over_column(col, expression.Last)
522
+ if ignorenulls:
523
+ return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
524
+ return this
525
+
526
+
527
+ @meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
528
+ def monotonically_increasing_id() -> Column:
529
+ return Column.invoke_anonymous_function(None, "MONOTONICALLY_INCREASING_ID")
530
+
531
+
532
+ @meta()
533
+ def nanvl(col1: ColumnOrName, col2: ColumnOrName) -> Column:
534
+ return Column.invoke_anonymous_function(col1, "NANVL", col2)
535
+
536
+
537
+ @meta(unsupported_engines="postgres")
538
+ def percentile_approx(
539
+ col: ColumnOrName,
540
+ percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
541
+ accuracy: t.Optional[t.Union[ColumnOrLiteral, int]] = None,
542
+ ) -> Column:
543
+ if accuracy:
544
+ return Column.invoke_expression_over_column(
545
+ col, expression.ApproxQuantile, quantile=lit(percentage), accuracy=accuracy
546
+ )
547
+ return Column.invoke_expression_over_column(
548
+ col, expression.ApproxQuantile, quantile=lit(percentage)
549
+ )
550
+
551
+
552
+ @meta(unsupported_engines="bigquery")
553
+ def percentile(
554
+ col: ColumnOrName,
555
+ percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
556
+ frequency: t.Optional[ColumnOrLiteral] = None,
557
+ ) -> Column:
558
+ if frequency:
559
+ logger.warning("Frequency is not supported in all engines")
560
+ return Column.invoke_expression_over_column(
561
+ col, expression.PercentileDisc, expression=lit(percentage)
562
+ )
563
+
564
+
565
+ @meta()
566
+ def rand(seed: t.Optional[int] = None) -> Column:
567
+ if seed is not None:
568
+ return Column.invoke_expression_over_column(None, expression.Rand, this=lit(seed))
569
+ return Column.invoke_expression_over_column(None, expression.Rand)
570
+
571
+
572
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
573
+ def randn(seed: t.Optional[int] = None) -> Column:
574
+ if seed is not None:
575
+ return Column.invoke_expression_over_column(None, expression.Randn, this=lit(seed))
576
+ return Column.invoke_expression_over_column(None, expression.Randn)
577
+
578
+
579
+ @meta()
580
+ def round(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
581
+ if scale is not None:
582
+ return Column.invoke_expression_over_column(col, expression.Round, decimals=scale)
583
+ return Column.invoke_expression_over_column(col, expression.Round)
584
+
585
+
586
+ @meta(unsupported_engines=["duckdb", "postgres"])
587
+ def bround(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
588
+ if scale is not None:
589
+ return Column.invoke_anonymous_function(col, "BROUND", lit(scale))
590
+ return Column.invoke_anonymous_function(col, "BROUND")
591
+
592
+
593
+ @meta()
594
+ def shiftleft(col: ColumnOrName, numBits: int) -> Column:
595
+ return Column.invoke_expression_over_column(
596
+ col, expression.BitwiseLeftShift, expression=lit(numBits)
597
+ )
598
+
599
+
600
+ shiftLeft = shiftleft
601
+
602
+
603
+ @meta()
604
+ def shiftright(col: ColumnOrName, numBits: int) -> Column:
605
+ return Column.invoke_expression_over_column(
606
+ col, expression.BitwiseRightShift, expression=lit(numBits)
607
+ )
608
+
609
+
610
+ shiftRight = shiftright
611
+
612
+
613
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
614
+ def shiftrightunsigned(col: ColumnOrName, numBits: int) -> Column:
615
+ return Column.invoke_anonymous_function(
616
+ Column.ensure_col(col).cast("bigint"), "SHIFTRIGHTUNSIGNED", lit(numBits)
617
+ )
618
+
619
+
620
+ shiftRightUnsigned = shiftrightunsigned
621
+
622
+
623
+ @meta()
624
+ def expr(str: str) -> Column:
625
+ return Column(str)
626
+
627
+
628
+ @meta(unsupported_engines=["postgres"])
629
+ def struct(col: t.Union[ColumnOrName, t.Iterable[ColumnOrName]], *cols: ColumnOrName) -> Column:
630
+ columns = ensure_list(col) + list(cols)
631
+ return Column.invoke_expression_over_column(None, expression.Struct, expressions=columns)
632
+
633
+
634
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
635
+ def conv(col: ColumnOrName, fromBase: int, toBase: int) -> Column:
636
+ return Column.invoke_anonymous_function(col, "CONV", lit(fromBase), lit(toBase))
637
+
638
+
639
+ @meta()
640
+ def lag(
641
+ col: ColumnOrName, offset: t.Optional[int] = 1, default: t.Optional[ColumnOrLiteral] = None
642
+ ) -> Column:
643
+ if default is not None:
644
+ return Column.invoke_expression_over_column(
645
+ col, expression.Lag, offset=lit(offset), default=default
646
+ )
647
+ return Column.invoke_expression_over_column(col, expression.Lag, offset=lit(offset))
648
+
649
+
650
+ @meta()
651
+ def lead(
652
+ col: ColumnOrName, offset: t.Optional[int] = 1, default: t.Optional[t.Any] = None
653
+ ) -> Column:
654
+ if default is not None:
655
+ return Column.invoke_expression_over_column(
656
+ col, expression.Lead, offset=lit(offset), default=default
657
+ )
658
+ return Column.invoke_expression_over_column(col, expression.Lead, offset=lit(offset))
659
+
660
+
661
+ @meta()
662
+ def nth_value(
663
+ col: ColumnOrName, offset: t.Optional[int] = 1, ignoreNulls: t.Optional[bool] = None
664
+ ) -> Column:
665
+ this = Column.invoke_expression_over_column(col, expression.NthValue, offset=lit(offset))
666
+ if ignoreNulls is not None:
667
+ return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
668
+ return this
669
+
670
+
671
+ @meta()
672
+ def ntile(n: int) -> Column:
673
+ return Column.invoke_anonymous_function(None, "NTILE", lit(n))
674
+
675
+
676
+ @meta()
677
+ def current_date() -> Column:
678
+ return Column.invoke_expression_over_column(None, expression.CurrentDate)
679
+
680
+
681
+ @meta()
682
+ def current_timestamp() -> Column:
683
+ return Column.invoke_expression_over_column(None, expression.CurrentTimestamp)
684
+
685
+
686
+ @meta()
687
+ def date_format(col: ColumnOrName, format: str) -> Column:
688
+ return Column.invoke_expression_over_column(
689
+ Column(expression.TimeStrToTime(this=Column.ensure_col(col).expression)),
690
+ expression.TimeToStr,
691
+ format=lit(format),
692
+ )
693
+
694
+
695
+ @meta()
696
+ def year(col: ColumnOrName) -> Column:
697
+ return Column.invoke_expression_over_column(
698
+ Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.Year
699
+ )
700
+
701
+
702
+ @meta()
703
+ def quarter(col: ColumnOrName) -> Column:
704
+ return Column(
705
+ expression.Anonymous(
706
+ this="QUARTER",
707
+ expressions=[expression.TsOrDsToDate(this=Column.ensure_col(col).expression)],
708
+ )
709
+ )
710
+
711
+
712
+ @meta()
713
+ def month(col: ColumnOrName) -> Column:
714
+ return Column.invoke_expression_over_column(
715
+ Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.Month
716
+ )
717
+
718
+
719
+ @meta()
720
+ def dayofweek(col: ColumnOrName) -> Column:
721
+ return Column.invoke_expression_over_column(
722
+ Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)),
723
+ expression.DayOfWeek,
724
+ )
725
+
726
+
727
+ @meta()
728
+ def dayofmonth(col: ColumnOrName) -> Column:
729
+ return Column.invoke_expression_over_column(
730
+ Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)),
731
+ expression.DayOfMonth,
732
+ )
733
+
734
+
735
+ @meta()
736
+ def dayofyear(col: ColumnOrName) -> Column:
737
+ return Column.invoke_expression_over_column(
738
+ Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)),
739
+ expression.DayOfYear,
740
+ )
741
+
742
+
743
+ @meta()
744
+ def hour(col: ColumnOrName) -> Column:
745
+ return Column.invoke_anonymous_function(col, "HOUR")
746
+
747
+
748
+ @meta()
749
+ def minute(col: ColumnOrName) -> Column:
750
+ return Column.invoke_anonymous_function(col, "MINUTE")
751
+
752
+
753
+ @meta()
754
+ def second(col: ColumnOrName) -> Column:
755
+ return Column.invoke_anonymous_function(col, "SECOND")
756
+
757
+
758
+ @meta()
759
+ def weekofyear(col: ColumnOrName) -> Column:
760
+ return Column.invoke_expression_over_column(
761
+ Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)),
762
+ expression.WeekOfYear,
763
+ )
764
+
765
+
766
+ @meta()
767
+ def make_date(year: ColumnOrName, month: ColumnOrName, day: ColumnOrName) -> Column:
768
+ return Column.invoke_anonymous_function(year, "MAKE_DATE", month, day)
769
+
770
+
771
+ @meta()
772
+ def date_add(
773
+ col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True
774
+ ) -> Column:
775
+ if isinstance(days, int):
776
+ if days < 0:
777
+ return date_sub(col, days * -1)
778
+ days = lit(days)
779
+ result = Column.invoke_expression_over_column(
780
+ Column.ensure_col(col).cast("date"),
781
+ expression.DateAdd,
782
+ expression=days,
783
+ unit=expression.Var(this="DAY"),
784
+ )
785
+ if cast_as_date:
786
+ return result.cast("date")
787
+ return result
788
+
789
+
790
+ @meta()
791
+ def date_sub(
792
+ col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True
793
+ ) -> Column:
794
+ """
795
+ Non-standard argument: cast_as_date
796
+ """
797
+ if isinstance(days, int):
798
+ if days < 0:
799
+ return date_add(col, days * -1)
800
+ days = lit(days)
801
+ result = Column.invoke_expression_over_column(
802
+ Column.ensure_col(col).cast("date"),
803
+ expression.DateSub,
804
+ expression=days,
805
+ unit=expression.Var(this="DAY"),
806
+ )
807
+ if cast_as_date:
808
+ return result.cast("date")
809
+ return result
810
+
811
+
812
+ @meta()
813
+ def date_diff(end: ColumnOrName, start: ColumnOrName) -> Column:
814
+ return Column.invoke_expression_over_column(
815
+ Column.ensure_col(end).cast("date"),
816
+ expression.DateDiff,
817
+ expression=Column.ensure_col(start).cast("date"),
818
+ )
819
+
820
+
821
+ @meta()
822
+ def add_months(
823
+ start: ColumnOrName, months: t.Union[ColumnOrName, int], cast_as_date: bool = True
824
+ ) -> Column:
825
+ """
826
+ Non-standard argument: cast_as_date
827
+ """
828
+ start_col = Column(start).cast("date")
829
+
830
+ if isinstance(months, int):
831
+ if months < 0:
832
+ end_col = Column(
833
+ expression.Interval(
834
+ this=lit(months * -1).expression, unit=expression.Var(this="MONTH")
835
+ )
836
+ )
837
+ result = start_col - end_col
838
+ else:
839
+ end_col = Column(
840
+ expression.Interval(this=lit(months).expression, unit=expression.Var(this="MONTH"))
841
+ )
842
+ result = start_col + end_col
843
+ else:
844
+ end_col = Column(
845
+ expression.Interval(
846
+ this=Column.ensure_col(months).expression, unit=expression.Var(this="MONTH")
847
+ )
848
+ )
849
+ result = start_col + end_col
850
+ if cast_as_date:
851
+ return result.cast("date")
852
+ return result
853
+
854
+
855
+ @meta()
856
+ def months_between(
857
+ date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None
858
+ ) -> Column:
859
+ if roundOff is None:
860
+ return Column.invoke_expression_over_column(
861
+ date1, expression.MonthsBetween, expression=date2
862
+ )
863
+
864
+ return Column.invoke_expression_over_column(
865
+ date1, expression.MonthsBetween, expression=date2, roundoff=lit(roundOff)
866
+ )
867
+
868
+
869
+ @meta()
870
+ def to_date(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
871
+ if format is not None:
872
+ return Column.invoke_expression_over_column(
873
+ col, expression.TsOrDsToDate, format=lit(format)
874
+ )
875
+ return Column.invoke_expression_over_column(col, expression.TsOrDsToDate)
876
+
877
+
878
+ @meta()
879
+ def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
880
+ if format is not None:
881
+ return Column.invoke_expression_over_column(col, expression.StrToTime, format=lit(format))
882
+
883
+ return Column.ensure_col(col).cast("timestamp")
884
+
885
+
886
+ @meta()
887
+ def trunc(col: ColumnOrName, format: str) -> Column:
888
+ return Column.invoke_expression_over_column(
889
+ Column(col).cast("date"), expression.DateTrunc, unit=lit(format)
890
+ ).cast("date")
891
+
892
+
893
+ @meta()
894
+ def date_trunc(format: str, timestamp: ColumnOrName) -> Column:
895
+ return Column.invoke_expression_over_column(
896
+ Column(timestamp).cast("timestamp"), expression.TimestampTrunc, unit=lit(format)
897
+ ).cast("timestamp")
898
+
899
+
900
+ @meta(unsupported_engines=["duckdb", "postgres"])
901
+ def next_day(col: ColumnOrName, dayOfWeek: str) -> Column:
902
+ return Column.invoke_anonymous_function(col, "NEXT_DAY", lit(dayOfWeek))
903
+
904
+
905
+ @meta(unsupported_engines=["duckdb", "postgres"])
906
+ def last_day(col: ColumnOrName) -> Column:
907
+ return Column.invoke_expression_over_column(col, expression.LastDay)
908
+
909
+
910
+ @meta()
911
+ def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
912
+ from sqlframe.base.session import _BaseSession
913
+
914
+ if format is None:
915
+ format = _BaseSession().DEFAULT_TIME_FORMAT
916
+ return Column.invoke_expression_over_column(col, expression.UnixToStr, format=lit(format))
917
+
918
+
919
+ @meta()
920
+ def unix_timestamp(
921
+ timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None
922
+ ) -> Column:
923
+ from sqlframe.base.session import _BaseSession
924
+
925
+ if format is None:
926
+ format = _BaseSession().DEFAULT_TIME_FORMAT
927
+ return Column.invoke_expression_over_column(
928
+ timestamp, expression.StrToUnix, format=lit(format)
929
+ ).cast("bigint")
930
+
931
+
932
+ @meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
933
+ def from_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column:
934
+ tz_column = tz if isinstance(tz, Column) else lit(tz)
935
+ return Column.invoke_expression_over_column(timestamp, expression.AtTimeZone, zone=tz_column)
936
+
937
+
938
+ @meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
939
+ def to_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column:
940
+ tz_column = tz if isinstance(tz, Column) else lit(tz)
941
+ return Column.invoke_expression_over_column(timestamp, expression.FromTimeZone, zone=tz_column)
942
+
943
+
944
+ @meta()
945
+ def timestamp_seconds(col: ColumnOrName) -> Column:
946
+ return Column.invoke_expression_over_column(col, expression.UnixToTime)
947
+
948
+
949
+ @meta(unsupported_engines=["duckdb", "postgres", "bigquery", "redshift", "snowflake", "spark"])
950
+ def window(
951
+ timeColumn: ColumnOrName,
952
+ windowDuration: str,
953
+ slideDuration: t.Optional[str] = None,
954
+ startTime: t.Optional[str] = None,
955
+ ) -> Column:
956
+ if slideDuration is not None and startTime is not None:
957
+ value = Column.invoke_anonymous_function(
958
+ timeColumn, "WINDOW", lit(windowDuration), lit(slideDuration), lit(startTime)
959
+ )
960
+ elif slideDuration is not None:
961
+ value = Column.invoke_anonymous_function(
962
+ timeColumn, "WINDOW", lit(windowDuration), lit(slideDuration)
963
+ )
964
+ elif startTime is not None:
965
+ value = Column.invoke_anonymous_function(
966
+ timeColumn, "WINDOW", lit(windowDuration), lit(windowDuration), lit(startTime)
967
+ )
968
+ else:
969
+ value = Column.invoke_anonymous_function(timeColumn, "WINDOW", lit(windowDuration))
970
+ return value
971
+
972
+
973
+ @meta(unsupported_engines=["duckdb", "postgres", "bigquery", "redshift", "snowflake", "spark"])
974
+ def session_window(timeColumn: ColumnOrName, gapDuration: ColumnOrName) -> Column:
975
+ gap_duration_column = gapDuration if isinstance(gapDuration, Column) else lit(gapDuration)
976
+ return Column.invoke_anonymous_function(timeColumn, "SESSION_WINDOW", gap_duration_column)
977
+
978
+
979
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
980
+ def crc32(col: ColumnOrName) -> Column:
981
+ return Column.invoke_anonymous_function(col, "CRC32")
982
+
983
+
984
+ @meta()
985
+ def md5(col: ColumnOrName) -> Column:
986
+ return Column.invoke_expression_over_column(col, expression.MD5)
987
+
988
+
989
+ @meta(unsupported_engines=["duckdb", "postgres"])
990
+ def sha1(col: ColumnOrName) -> Column:
991
+ return Column.invoke_expression_over_column(col, expression.SHA)
992
+
993
+
994
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
995
+ def sha2(col: ColumnOrName, numBits: int) -> Column:
996
+ return Column.invoke_expression_over_column(col, expression.SHA2, length=lit(numBits))
997
+
998
+
999
+ @meta(unsupported_engines=["postgres"])
1000
+ def hash(*cols: ColumnOrName) -> Column:
1001
+ args = cols[1:] if len(cols) > 1 else []
1002
+ return Column.invoke_anonymous_function(cols[0], "HASH", *args)
1003
+
1004
+
1005
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1006
+ def xxhash64(*cols: ColumnOrName) -> Column:
1007
+ args = cols[1:] if len(cols) > 1 else []
1008
+ return Column.invoke_anonymous_function(cols[0], "XXHASH64", *args)
1009
+
1010
+
1011
+ @meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
1012
+ def assert_true(col: ColumnOrName, errorMsg: t.Optional[ColumnOrName] = None) -> Column:
1013
+ if errorMsg is not None:
1014
+ error_msg_col = errorMsg if isinstance(errorMsg, Column) else lit(errorMsg)
1015
+ return Column.invoke_anonymous_function(col, "ASSERT_TRUE", error_msg_col)
1016
+ return Column.invoke_anonymous_function(col, "ASSERT_TRUE")
1017
+
1018
+
1019
+ @meta(unsupported_engines=["duckdb", "postgres", "bigquery", "snowflake", "redshift"])
1020
+ def raise_error(errorMsg: ColumnOrName) -> Column:
1021
+ error_msg_col = errorMsg if isinstance(errorMsg, Column) else lit(errorMsg)
1022
+ return Column.invoke_anonymous_function(error_msg_col, "RAISE_ERROR")
1023
+
1024
+
1025
+ @meta()
1026
+ def upper(col: ColumnOrName) -> Column:
1027
+ return Column.invoke_expression_over_column(col, expression.Upper)
1028
+
1029
+
1030
+ @meta()
1031
+ def lower(col: ColumnOrName) -> Column:
1032
+ return Column.invoke_expression_over_column(col, expression.Lower)
1033
+
1034
+
1035
+ @meta()
1036
+ def ascii(col: ColumnOrName) -> Column:
1037
+ return Column.invoke_anonymous_function(col, "ASCII")
1038
+
1039
+
1040
+ @meta()
1041
+ def base64(col: ColumnOrName) -> Column:
1042
+ return Column.invoke_expression_over_column(col, expression.ToBase64)
1043
+
1044
+
1045
+ @meta()
1046
+ def unbase64(col: ColumnOrName) -> Column:
1047
+ return Column.invoke_expression_over_column(col, expression.FromBase64)
1048
+
1049
+
1050
+ @meta()
1051
+ def ltrim(col: ColumnOrName) -> Column:
1052
+ return Column.invoke_anonymous_function(col, "LTRIM")
1053
+
1054
+
1055
+ @meta()
1056
+ def rtrim(col: ColumnOrName) -> Column:
1057
+ return Column.invoke_anonymous_function(col, "RTRIM")
1058
+
1059
+
1060
+ @meta()
1061
+ def trim(col: ColumnOrName) -> Column:
1062
+ return Column.invoke_expression_over_column(col, expression.Trim)
1063
+
1064
+
1065
+ @meta()
1066
+ def concat_ws(sep: str, *cols: ColumnOrName) -> Column:
1067
+ return Column.invoke_expression_over_column(
1068
+ None, expression.ConcatWs, expressions=[lit(sep)] + list(cols)
1069
+ )
1070
+
1071
+
1072
+ @meta(unsupported_engines="bigquery")
1073
+ def decode(col: ColumnOrName, charset: str) -> Column:
1074
+ return Column.invoke_expression_over_column(
1075
+ col, expression.Decode, charset=expression.Literal.string(charset)
1076
+ )
1077
+
1078
+
1079
+ @meta(unsupported_engines="bigquery")
1080
+ def encode(col: ColumnOrName, charset: str) -> Column:
1081
+ return Column.invoke_expression_over_column(
1082
+ col, expression.Encode, charset=expression.Literal.string(charset)
1083
+ )
1084
+
1085
+
1086
+ @meta(unsupported_engines="duckdb")
1087
+ def format_number(col: ColumnOrName, d: int) -> Column:
1088
+ return Column.invoke_anonymous_function(col, "FORMAT_NUMBER", lit(d))
1089
+
1090
+
1091
+ @meta()
1092
+ def format_string(format: str, *cols: ColumnOrName) -> Column:
1093
+ format_col = lit(format)
1094
+ columns = [Column.ensure_col(x) for x in cols]
1095
+ return Column.invoke_anonymous_function(format_col, "FORMAT_STRING", *columns)
1096
+
1097
+
1098
+ @meta()
1099
+ def instr(col: ColumnOrName, substr: str) -> Column:
1100
+ return Column.invoke_expression_over_column(col, expression.StrPosition, substr=lit(substr))
1101
+
1102
+
1103
+ @meta()
1104
+ def overlay(
1105
+ src: ColumnOrName,
1106
+ replace: ColumnOrName,
1107
+ pos: t.Union[ColumnOrName, int],
1108
+ len: t.Optional[t.Union[ColumnOrName, int]] = None,
1109
+ ) -> Column:
1110
+ pos_value = lit(pos) if isinstance(pos, int) else pos
1111
+ if len is not None:
1112
+ len_value = lit(len) if isinstance(len, int) else len
1113
+ return Column.invoke_anonymous_function(src, "OVERLAY", replace, pos_value, len_value)
1114
+ return Column.invoke_anonymous_function(src, "OVERLAY", replace, pos_value)
1115
+
1116
+
1117
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1118
+ def sentences(
1119
+ string: ColumnOrName,
1120
+ language: t.Optional[ColumnOrName] = None,
1121
+ country: t.Optional[ColumnOrName] = None,
1122
+ ) -> Column:
1123
+ if language is not None and country is not None:
1124
+ return Column.invoke_anonymous_function(string, "SENTENCES", language, country)
1125
+ if language is not None:
1126
+ return Column.invoke_anonymous_function(string, "SENTENCES", language)
1127
+ if country is not None:
1128
+ return Column.invoke_anonymous_function(string, "SENTENCES", lit("en"), country)
1129
+ return Column.invoke_anonymous_function(string, "SENTENCES")
1130
+
1131
+
1132
+ @meta()
1133
+ def substring(str: ColumnOrName, pos: int, len: int) -> Column:
1134
+ return Column.ensure_col(str).substr(pos, len)
1135
+
1136
+
1137
+ @meta(unsupported_engines=["duckdb", "postgres"])
1138
+ def substring_index(str: ColumnOrName, delim: str, count: int) -> Column:
1139
+ return Column.invoke_anonymous_function(str, "SUBSTRING_INDEX", lit(delim), lit(count))
1140
+
1141
+
1142
+ @meta(unsupported_engines="bigquery")
1143
+ def levenshtein(
1144
+ left: ColumnOrName, right: ColumnOrName, threshold: t.Optional[int] = None
1145
+ ) -> Column:
1146
+ value: t.Union[expression.Case, expression.Levenshtein] = expression.Levenshtein(
1147
+ this=Column.ensure_col(left).expression,
1148
+ expression=Column.ensure_col(right).expression,
1149
+ )
1150
+ if threshold is not None:
1151
+ value = (
1152
+ expression.case()
1153
+ .when(expression.LTE(this=value, expression=lit(threshold).expression), value)
1154
+ .else_(lit(-1).expression)
1155
+ )
1156
+ return Column(value)
1157
+
1158
+
1159
+ @meta(unsupported_engines="bigquery")
1160
+ def locate(substr: str, str: ColumnOrName, pos: t.Optional[int] = None) -> Column:
1161
+ substr_col = lit(substr)
1162
+ if pos is not None:
1163
+ return Column.invoke_expression_over_column(
1164
+ str, expression.StrPosition, substr=substr_col, position=pos
1165
+ )
1166
+ return Column.invoke_expression_over_column(str, expression.StrPosition, substr=substr_col)
1167
+
1168
+
1169
+ @meta()
1170
+ def lpad(col: ColumnOrName, len: int, pad: str) -> Column:
1171
+ return Column.invoke_anonymous_function(col, "LPAD", lit(len), lit(pad))
1172
+
1173
+
1174
+ @meta()
1175
+ def rpad(col: ColumnOrName, len: int, pad: str) -> Column:
1176
+ return Column.invoke_anonymous_function(col, "RPAD", lit(len), lit(pad))
1177
+
1178
+
1179
+ @meta()
1180
+ def repeat(col: ColumnOrName, n: int) -> Column:
1181
+ return Column.invoke_expression_over_column(col, expression.Repeat, times=lit(n))
1182
+
1183
+
1184
+ @meta()
1185
+ def split(str: ColumnOrName, pattern: str, limit: t.Optional[int] = None) -> Column:
1186
+ if limit is not None:
1187
+ return Column.invoke_expression_over_column(
1188
+ str, expression.RegexpSplit, expression=lit(pattern), limit=lit(limit)
1189
+ )
1190
+ return Column.invoke_expression_over_column(
1191
+ str, expression.RegexpSplit, expression=lit(pattern)
1192
+ )
1193
+
1194
+
1195
+ @meta(unsupported_engines="postgres")
1196
+ def regexp_extract(str: ColumnOrName, pattern: str, idx: t.Optional[int] = None) -> Column:
1197
+ if idx is not None:
1198
+ return Column.invoke_expression_over_column(
1199
+ str,
1200
+ expression.RegexpExtract,
1201
+ expression=lit(pattern),
1202
+ group=lit(idx),
1203
+ )
1204
+ return Column.invoke_expression_over_column(
1205
+ str, expression.RegexpExtract, expression=lit(pattern)
1206
+ )
1207
+
1208
+
1209
+ @meta()
1210
+ def regexp_replace(
1211
+ str: ColumnOrName, pattern: str, replacement: str, position: t.Optional[int] = None
1212
+ ) -> Column:
1213
+ if position is not None:
1214
+ return Column.invoke_expression_over_column(
1215
+ str,
1216
+ expression.RegexpReplace,
1217
+ expression=lit(pattern),
1218
+ replacement=lit(replacement),
1219
+ position=lit(position),
1220
+ )
1221
+ return Column.invoke_expression_over_column(
1222
+ str,
1223
+ expression.RegexpReplace,
1224
+ expression=lit(pattern),
1225
+ replacement=lit(replacement),
1226
+ )
1227
+
1228
+
1229
+ @meta(unsupported_engines="duckdb")
1230
+ def initcap(col: ColumnOrName) -> Column:
1231
+ return Column.invoke_expression_over_column(col, expression.Initcap)
1232
+
1233
+
1234
+ @meta()
1235
+ def soundex(col: ColumnOrName) -> Column:
1236
+ return Column.invoke_anonymous_function(col, "SOUNDEX")
1237
+
1238
+
1239
+ @meta(unsupported_engines="postgres")
1240
+ def bin(col: ColumnOrName) -> Column:
1241
+ return Column.invoke_anonymous_function(col, "BIN")
1242
+
1243
+
1244
+ @meta(unsupported_engines="postgres")
1245
+ def hex(col: ColumnOrName) -> Column:
1246
+ return Column.invoke_expression_over_column(col, expression.Hex)
1247
+
1248
+
1249
+ @meta(unsupported_engines="postgres")
1250
+ def unhex(col: ColumnOrName) -> Column:
1251
+ return Column.invoke_expression_over_column(col, expression.Unhex)
1252
+
1253
+
1254
+ @meta()
1255
+ def length(col: ColumnOrName) -> Column:
1256
+ return Column.invoke_expression_over_column(col, expression.Length)
1257
+
1258
+
1259
+ @meta(unsupported_engines="duckdb")
1260
+ def octet_length(col: ColumnOrName) -> Column:
1261
+ return Column.invoke_anonymous_function(col, "OCTET_LENGTH")
1262
+
1263
+
1264
+ @meta()
1265
+ def bit_length(col: ColumnOrName) -> Column:
1266
+ return Column.invoke_anonymous_function(col, "BIT_LENGTH")
1267
+
1268
+
1269
+ @meta()
1270
+ def translate(srcCol: ColumnOrName, matching: str, replace: str) -> Column:
1271
+ return Column.invoke_anonymous_function(srcCol, "TRANSLATE", lit(matching), lit(replace))
1272
+
1273
+
1274
+ @meta()
1275
+ def array(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
1276
+ columns = _flatten(cols) if not isinstance(cols[0], (str, Column)) else cols
1277
+ return Column.invoke_expression_over_column(None, expression.Array, expressions=columns)
1278
+
1279
+
1280
+ @meta(unsupported_engines=["bigquery", "postgres"])
1281
+ def create_map(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
1282
+ cols = list(_flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore
1283
+ return Column.invoke_expression_over_column(
1284
+ None,
1285
+ expression.VarMap,
1286
+ keys=array(*cols[::2]).expression,
1287
+ values=array(*cols[1::2]).expression,
1288
+ )
1289
+
1290
+
1291
+ @meta(unsupported_engines=["bigquery", "postgres"])
1292
+ def map_from_arrays(col1: ColumnOrName, col2: ColumnOrName) -> Column:
1293
+ return Column.invoke_expression_over_column(None, expression.Map, keys=col1, values=col2)
1294
+
1295
+
1296
+ @meta()
1297
+ def array_contains(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
1298
+ value_col = value if isinstance(value, Column) else lit(value)
1299
+ return Column.invoke_expression_over_column(
1300
+ col, expression.ArrayContains, expression=value_col.expression
1301
+ )
1302
+
1303
+
1304
+ @meta(unsupported_engines="bigquery")
1305
+ def arrays_overlap(col1: ColumnOrName, col2: ColumnOrName) -> Column:
1306
+ return Column.invoke_expression_over_column(col1, expression.ArrayOverlaps, expression=col2)
1307
+
1308
+
1309
+ @meta()
1310
+ def slice(
1311
+ x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int]
1312
+ ) -> Column:
1313
+ start_col = lit(start) if isinstance(start, int) else start
1314
+ length_col = lit(length) if isinstance(length, int) else length
1315
+ return Column.invoke_anonymous_function(x, "SLICE", start_col, length_col)
1316
+
1317
+
1318
+ @meta()
1319
+ def array_join(
1320
+ col: ColumnOrName, delimiter: str, null_replacement: t.Optional[str] = None
1321
+ ) -> Column:
1322
+ if null_replacement is not None:
1323
+ return Column.invoke_expression_over_column(
1324
+ col, expression.ArrayToString, expression=lit(delimiter), null=lit(null_replacement)
1325
+ )
1326
+ return Column.invoke_expression_over_column(
1327
+ col, expression.ArrayToString, expression=lit(delimiter)
1328
+ )
1329
+
1330
+
1331
+ @meta()
1332
+ def concat(*cols: ColumnOrName) -> Column:
1333
+ return Column.invoke_expression_over_column(None, expression.Concat, expressions=cols)
1334
+
1335
+
1336
+ @meta()
1337
+ def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
1338
+ value_col = value if isinstance(value, Column) else lit(value)
1339
+ # Some engines return NULL if item is not found but Spark expects 0 so we coalesce to 0
1340
+ return coalesce(Column.invoke_anonymous_function(col, "ARRAY_POSITION", value_col), lit(0))
1341
+
1342
+
1343
+ @meta()
1344
+ def element_at(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
1345
+ value_col = value if isinstance(value, Column) else lit(value)
1346
+ return Column.invoke_anonymous_function(col, "ELEMENT_AT", value_col)
1347
+
1348
+
1349
+ @meta()
1350
+ def array_remove(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
1351
+ value_col = value if isinstance(value, Column) else lit(value)
1352
+ return Column.invoke_anonymous_function(col, "ARRAY_REMOVE", value_col)
1353
+
1354
+
1355
+ @meta(unsupported_engines="postgres")
1356
+ def array_distinct(col: ColumnOrName) -> Column:
1357
+ return Column.invoke_anonymous_function(col, "ARRAY_DISTINCT")
1358
+
1359
+
1360
+ @meta(unsupported_engines=["bigquery", "postgres"])
1361
+ def array_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Column:
1362
+ return Column.invoke_anonymous_function(col1, "ARRAY_INTERSECT", Column.ensure_col(col2))
1363
+
1364
+
1365
+ @meta(unsupported_engines=["postgres"])
1366
+ def array_union(col1: ColumnOrName, col2: ColumnOrName) -> Column:
1367
+ return Column.invoke_anonymous_function(col1, "ARRAY_UNION", Column.ensure_col(col2))
1368
+
1369
+
1370
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1371
+ def array_except(col1: ColumnOrName, col2: ColumnOrName) -> Column:
1372
+ return Column.invoke_anonymous_function(col1, "ARRAY_EXCEPT", Column.ensure_col(col2))
1373
+
1374
+
1375
+ @meta()
1376
+ def explode(col: ColumnOrName) -> Column:
1377
+ return Column.invoke_expression_over_column(col, expression.Explode)
1378
+
1379
+
1380
+ @meta(unsupported_engines=["duckdb", "postgres"])
1381
+ def posexplode(col: ColumnOrName) -> Column:
1382
+ return Column.invoke_expression_over_column(col, expression.Posexplode)
1383
+
1384
+
1385
+ @meta(unsupported_engines=["duckdb", "postgres"])
1386
+ def explode_outer(col: ColumnOrName) -> Column:
1387
+ return Column.invoke_expression_over_column(col, expression.ExplodeOuter)
1388
+
1389
+
1390
+ @meta(unsupported_engines=["duckdb", "postgres"])
1391
+ def posexplode_outer(col: ColumnOrName) -> Column:
1392
+ return Column.invoke_expression_over_column(col, expression.PosexplodeOuter)
1393
+
1394
+
1395
+ @meta()
1396
+ def get_json_object(col: ColumnOrName, path: str) -> Column:
1397
+ return Column.invoke_expression_over_column(col, expression.JSONExtract, expression=lit(path))
1398
+
1399
+
1400
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1401
+ def json_tuple(col: ColumnOrName, *fields: str) -> Column:
1402
+ return Column.invoke_anonymous_function(col, "JSON_TUPLE", *[lit(field) for field in fields])
1403
+
1404
+
1405
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1406
+ def from_json(
1407
+ col: ColumnOrName,
1408
+ schema: t.Union[ArrayType, StructType, Column, str],
1409
+ options: t.Optional[t.Dict[str, str]] = None,
1410
+ ) -> Column:
1411
+ from sqlframe.base.types import ArrayType, StructType
1412
+
1413
+ if isinstance(schema, (ArrayType, StructType)):
1414
+ schema = schema.simpleString()
1415
+ schema = schema if isinstance(schema, Column) else lit(schema)
1416
+ if options is not None:
1417
+ options_col = create_map([lit(x) for x in _flatten(options.items())])
1418
+ return Column.invoke_anonymous_function(col, "FROM_JSON", schema, options_col)
1419
+ return Column.invoke_anonymous_function(col, "FROM_JSON", schema)
1420
+
1421
+
1422
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1423
+ def to_json(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
1424
+ if options is not None:
1425
+ options_col = create_map([lit(x) for x in _flatten(options.items())])
1426
+ return Column.invoke_expression_over_column(col, expression.JSONFormat, options=options_col)
1427
+ return Column.invoke_expression_over_column(col, expression.JSONFormat)
1428
+
1429
+
1430
+ @meta(unsupported_engines="*")
1431
+ def schema_of_json(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
1432
+ if options is not None:
1433
+ options_col = create_map([lit(x) for x in _flatten(options.items())])
1434
+ return Column.invoke_anonymous_function(col, "SCHEMA_OF_JSON", options_col)
1435
+ return Column.invoke_anonymous_function(col, "SCHEMA_OF_JSON")
1436
+
1437
+
1438
+ @meta(unsupported_engines="*")
1439
+ def schema_of_csv(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
1440
+ if options is not None:
1441
+ options_col = create_map([lit(x) for x in _flatten(options.items())])
1442
+ return Column.invoke_anonymous_function(col, "SCHEMA_OF_CSV", options_col)
1443
+ return Column.invoke_anonymous_function(col, "SCHEMA_OF_CSV")
1444
+
1445
+
1446
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1447
+ def to_csv(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
1448
+ if options is not None:
1449
+ options_col = create_map([lit(x) for x in _flatten(options.items())])
1450
+ return Column.invoke_anonymous_function(col, "TO_CSV", options_col)
1451
+ return Column.invoke_anonymous_function(col, "TO_CSV")
1452
+
1453
+
1454
+ @meta()
1455
+ def size(col: ColumnOrName) -> Column:
1456
+ return Column.invoke_expression_over_column(col, expression.ArraySize)
1457
+
1458
+
1459
+ @meta()
1460
+ def array_min(col: ColumnOrName) -> Column:
1461
+ return Column.invoke_anonymous_function(col, "ARRAY_MIN")
1462
+
1463
+
1464
+ @meta()
1465
+ def array_max(col: ColumnOrName) -> Column:
1466
+ return Column.invoke_anonymous_function(col, "ARRAY_MAX")
1467
+
1468
+
1469
+ @meta(unsupported_engines="postgres")
1470
+ def sort_array(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column:
1471
+ if asc is not None:
1472
+ return Column.invoke_expression_over_column(col, expression.SortArray, asc=lit(asc))
1473
+ return Column.invoke_expression_over_column(col, expression.SortArray)
1474
+
1475
+
1476
+ @meta(unsupported_engines="postgres")
1477
+ def array_sort(
1478
+ col: ColumnOrName,
1479
+ comparator: t.Optional[t.Union[t.Callable[[Column, Column], Column]]] = None,
1480
+ ) -> Column:
1481
+ if comparator is not None:
1482
+ f_expression = _get_lambda_from_func(comparator)
1483
+ return Column.invoke_expression_over_column(
1484
+ col, expression.ArraySort, expression=f_expression
1485
+ )
1486
+ return Column.invoke_expression_over_column(col, expression.ArraySort)
1487
+
1488
+
1489
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1490
+ def shuffle(col: ColumnOrName) -> Column:
1491
+ return Column.invoke_anonymous_function(col, "SHUFFLE")
1492
+
1493
+
1494
+ @meta()
1495
+ def reverse(col: ColumnOrName) -> Column:
1496
+ return Column.invoke_anonymous_function(col, "REVERSE")
1497
+
1498
+
1499
+ @meta(unsupported_engines=["bigquery", "postgres"])
1500
+ def flatten(col: ColumnOrName) -> Column:
1501
+ return Column.invoke_expression_over_column(col, expression.Flatten)
1502
+
1503
+
1504
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1505
+ def map_keys(col: ColumnOrName) -> Column:
1506
+ return Column.invoke_anonymous_function(col, "MAP_KEYS")
1507
+
1508
+
1509
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1510
+ def map_values(col: ColumnOrName) -> Column:
1511
+ return Column.invoke_anonymous_function(col, "MAP_VALUES")
1512
+
1513
+
1514
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1515
+ def map_entries(col: ColumnOrName) -> Column:
1516
+ return Column.invoke_anonymous_function(col, "MAP_ENTRIES")
1517
+
1518
+
1519
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1520
+ def map_from_entries(col: ColumnOrName) -> Column:
1521
+ return Column.invoke_expression_over_column(col, expression.MapFromEntries)
1522
+
1523
+
1524
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1525
+ def array_repeat(col: ColumnOrName, count: t.Union[ColumnOrName, int]) -> Column:
1526
+ count_col = count if isinstance(count, Column) else lit(count)
1527
+ return Column.invoke_anonymous_function(col, "ARRAY_REPEAT", count_col)
1528
+
1529
+
1530
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1531
+ def arrays_zip(*cols: ColumnOrName) -> Column:
1532
+ if len(cols) == 1:
1533
+ return Column.invoke_anonymous_function(cols[0], "ARRAYS_ZIP")
1534
+ return Column.invoke_anonymous_function(cols[0], "ARRAYS_ZIP", *cols[1:])
1535
+
1536
+
1537
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1538
+ def map_concat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
1539
+ columns = list(flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore
1540
+ if len(columns) == 1:
1541
+ return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT")
1542
+ return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT", *columns[1:])
1543
+
1544
+
1545
+ @meta(unsupported_engines="postgres")
1546
+ def sequence(
1547
+ start: ColumnOrName, stop: ColumnOrName, step: t.Optional[ColumnOrName] = None
1548
+ ) -> Column:
1549
+ if step is not None:
1550
+ return Column.invoke_anonymous_function(start, "SEQUENCE", stop, step)
1551
+ return Column.invoke_anonymous_function(start, "SEQUENCE", stop)
1552
+
1553
+
1554
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1555
+ def from_csv(
1556
+ col: ColumnOrName,
1557
+ schema: t.Union[Column, str],
1558
+ options: t.Optional[t.Dict[str, str]] = None,
1559
+ ) -> Column:
1560
+ schema = schema if isinstance(schema, Column) else lit(schema)
1561
+ if options is not None:
1562
+ option_cols = create_map([lit(x) for x in _flatten(options.items())])
1563
+ return Column.invoke_anonymous_function(col, "FROM_CSV", schema, option_cols)
1564
+ return Column.invoke_anonymous_function(col, "FROM_CSV", schema)
1565
+
1566
+
1567
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1568
+ def aggregate(
1569
+ col: ColumnOrName,
1570
+ initialValue: ColumnOrName,
1571
+ merge: t.Callable[[Column, Column], Column],
1572
+ finish: t.Optional[t.Callable[[Column], Column]] = None,
1573
+ ) -> Column:
1574
+ merge_exp = _get_lambda_from_func(merge)
1575
+ if finish is not None:
1576
+ finish_exp = _get_lambda_from_func(finish)
1577
+ return Column.invoke_expression_over_column(
1578
+ col,
1579
+ expression.Reduce,
1580
+ initial=initialValue,
1581
+ merge=Column(merge_exp),
1582
+ finish=Column(finish_exp),
1583
+ )
1584
+ return Column.invoke_expression_over_column(
1585
+ col, expression.Reduce, initial=initialValue, merge=Column(merge_exp)
1586
+ )
1587
+
1588
+
1589
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1590
+ def transform(
1591
+ col: ColumnOrName,
1592
+ f: t.Union[t.Callable[[Column], Column], t.Callable[[Column, Column], Column]],
1593
+ ) -> Column:
1594
+ f_expression = _get_lambda_from_func(f)
1595
+ return Column.invoke_expression_over_column(
1596
+ col, expression.Transform, expression=Column(f_expression)
1597
+ )
1598
+
1599
+
1600
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1601
+ def exists(col: ColumnOrName, f: t.Callable[[Column], Column]) -> Column:
1602
+ f_expression = _get_lambda_from_func(f)
1603
+ return Column.invoke_anonymous_function(col, "EXISTS", Column(f_expression))
1604
+
1605
+
1606
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1607
+ def forall(col: ColumnOrName, f: t.Callable[[Column], Column]) -> Column:
1608
+ f_expression = _get_lambda_from_func(f)
1609
+ return Column.invoke_anonymous_function(col, "FORALL", Column(f_expression))
1610
+
1611
+
1612
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1613
+ def filter(
1614
+ col: ColumnOrName,
1615
+ f: t.Union[t.Callable[[Column], Column], t.Callable[[Column, Column], Column]],
1616
+ ) -> Column:
1617
+ f_expression = _get_lambda_from_func(f)
1618
+ return Column.invoke_expression_over_column(
1619
+ col, expression.ArrayFilter, expression=f_expression
1620
+ )
1621
+
1622
+
1623
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1624
+ def zip_with(
1625
+ left: ColumnOrName, right: ColumnOrName, f: t.Callable[[Column, Column], Column]
1626
+ ) -> Column:
1627
+ f_expression = _get_lambda_from_func(f)
1628
+ return Column.invoke_anonymous_function(left, "ZIP_WITH", right, Column(f_expression))
1629
+
1630
+
1631
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1632
+ def transform_keys(col: ColumnOrName, f: t.Union[t.Callable[[Column, Column], Column]]) -> Column:
1633
+ f_expression = _get_lambda_from_func(f)
1634
+ return Column.invoke_anonymous_function(col, "TRANSFORM_KEYS", Column(f_expression))
1635
+
1636
+
1637
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1638
+ def transform_values(col: ColumnOrName, f: t.Union[t.Callable[[Column, Column], Column]]) -> Column:
1639
+ f_expression = _get_lambda_from_func(f)
1640
+ return Column.invoke_anonymous_function(col, "TRANSFORM_VALUES", Column(f_expression))
1641
+
1642
+
1643
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1644
+ def map_filter(col: ColumnOrName, f: t.Union[t.Callable[[Column, Column], Column]]) -> Column:
1645
+ f_expression = _get_lambda_from_func(f)
1646
+ return Column.invoke_anonymous_function(col, "MAP_FILTER", Column(f_expression))
1647
+
1648
+
1649
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
1650
+ def map_zip_with(
1651
+ col1: ColumnOrName,
1652
+ col2: ColumnOrName,
1653
+ f: t.Union[t.Callable[[Column, Column, Column], Column]],
1654
+ ) -> Column:
1655
+ f_expression = _get_lambda_from_func(f)
1656
+ return Column.invoke_anonymous_function(col1, "MAP_ZIP_WITH", col2, Column(f_expression))
1657
+
1658
+
1659
+ @meta(unsupported_engines="postgres")
1660
+ def typeof(col: ColumnOrName) -> Column:
1661
+ return Column.invoke_anonymous_function(col, "TYPEOF")
1662
+
1663
+
1664
+ @meta()
1665
+ def _lambda_quoted(value: str) -> t.Optional[bool]:
1666
+ return False if value == "_" else None
1667
+
1668
+
1669
+ @meta()
1670
+ def _get_lambda_from_func(lambda_expression: t.Callable):
1671
+ variables = [
1672
+ expression.to_identifier(x, quoted=_lambda_quoted(x))
1673
+ for x in lambda_expression.__code__.co_varnames
1674
+ ]
1675
+ return expression.Lambda(
1676
+ this=lambda_expression(*[Column(x) for x in variables]).expression,
1677
+ expressions=variables,
1678
+ )