sqlframe 3.10.0__py3-none-any.whl → 3.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/column.py +11 -8
- sqlframe/base/decorators.py +41 -2
- sqlframe/base/function_alternatives.py +445 -404
- sqlframe/base/functions.py +1100 -73
- sqlframe/base/session.py +34 -2
- sqlframe/bigquery/functions.py +1 -361
- sqlframe/bigquery/functions.pyi +63 -156
- sqlframe/bigquery/session.py +4 -0
- sqlframe/databricks/functions.py +0 -10
- sqlframe/databricks/functions.pyi +405 -413
- sqlframe/databricks/session.py +4 -0
- sqlframe/duckdb/functions.py +0 -40
- sqlframe/duckdb/functions.pyi +219 -216
- sqlframe/duckdb/session.py +4 -0
- sqlframe/postgres/functions.py +1 -60
- sqlframe/postgres/functions.pyi +197 -196
- sqlframe/postgres/session.py +4 -0
- sqlframe/redshift/functions.py +1 -4
- sqlframe/redshift/session.py +4 -0
- sqlframe/snowflake/functions.py +1 -55
- sqlframe/snowflake/functions.pyi +224 -220
- sqlframe/snowflake/session.py +4 -0
- sqlframe/spark/functions.py +0 -9
- sqlframe/spark/functions.pyi +411 -413
- sqlframe/spark/session.py +4 -0
- sqlframe/standalone/functions.py +1 -1
- sqlframe/standalone/session.py +4 -0
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/METADATA +4 -4
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/RECORD +33 -33
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/top_level.txt +0 -0
|
@@ -55,13 +55,7 @@ def kurtosis_from_kurtosis_pop(col: ColumnOrName) -> Column:
|
|
|
55
55
|
|
|
56
56
|
def collect_set_from_list_distinct(col: ColumnOrName) -> Column:
|
|
57
57
|
collect_list = get_func_from_session("collect_list")
|
|
58
|
-
return collect_list(Column(expression.Distinct(expressions=[Column(col).
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def first_always_ignore_nulls(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
|
|
62
|
-
from sqlframe.base.functions import first
|
|
63
|
-
|
|
64
|
-
return first(col)
|
|
58
|
+
return collect_list(Column(expression.Distinct(expressions=[Column(col).column_expression])))
|
|
65
59
|
|
|
66
60
|
|
|
67
61
|
def to_timestamp_with_time_zone(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
@@ -208,14 +202,18 @@ def skewness_from_skew(col: ColumnOrName) -> Column:
|
|
|
208
202
|
def isnan_using_equal(col: ColumnOrName) -> Column:
|
|
209
203
|
lit = get_func_from_session("lit")
|
|
210
204
|
return Column(
|
|
211
|
-
expression.EQ(
|
|
205
|
+
expression.EQ(
|
|
206
|
+
this=Column(col).column_expression, expression=lit(float("nan")).column_expression
|
|
207
|
+
)
|
|
212
208
|
)
|
|
213
209
|
|
|
214
210
|
|
|
215
211
|
def isnull_using_equal(col: ColumnOrName) -> Column:
|
|
216
212
|
lit = get_func_from_session("lit")
|
|
217
213
|
col_func = get_func_from_session("col")
|
|
218
|
-
return Column(
|
|
214
|
+
return Column(
|
|
215
|
+
expression.Is(this=col_func(col).column_expression, expression=lit(None).column_expression)
|
|
216
|
+
)
|
|
219
217
|
|
|
220
218
|
|
|
221
219
|
def nanvl_as_case(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
@@ -225,18 +223,6 @@ def nanvl_as_case(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
|
225
223
|
return when(~isnan(col1), col(col1)).otherwise(col(col2))
|
|
226
224
|
|
|
227
225
|
|
|
228
|
-
def percentile_approx_without_accuracy(
|
|
229
|
-
col: ColumnOrName,
|
|
230
|
-
percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
|
|
231
|
-
accuracy: t.Optional[float] = None,
|
|
232
|
-
) -> Column:
|
|
233
|
-
from sqlframe.base.functions import percentile_approx
|
|
234
|
-
|
|
235
|
-
if accuracy:
|
|
236
|
-
logger.warning("Accuracy is ignored since it is not supported in this dialect")
|
|
237
|
-
return percentile_approx(col, percentage)
|
|
238
|
-
|
|
239
|
-
|
|
240
226
|
def percentile_approx_without_accuracy_and_plural(
|
|
241
227
|
col: ColumnOrName,
|
|
242
228
|
percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
|
|
@@ -250,9 +236,9 @@ def percentile_approx_without_accuracy_and_plural(
|
|
|
250
236
|
return expression.Bracket(
|
|
251
237
|
this=expression.Anonymous(
|
|
252
238
|
this="APPROX_QUANTILES",
|
|
253
|
-
expressions=[col_func(col).
|
|
239
|
+
expressions=[col_func(col).column_expression, lit(100).column_expression],
|
|
254
240
|
),
|
|
255
|
-
expressions=[lit(int(percentage * 100)).cast("int").
|
|
241
|
+
expressions=[lit(int(percentage * 100)).cast("int").column_expression],
|
|
256
242
|
offset=0,
|
|
257
243
|
safe=False,
|
|
258
244
|
)
|
|
@@ -269,8 +255,6 @@ def percentile_approx_without_accuracy_and_max_array(
|
|
|
269
255
|
percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
|
|
270
256
|
accuracy: t.Optional[float] = None,
|
|
271
257
|
) -> Column:
|
|
272
|
-
from sqlframe.base.functions import percentile_approx
|
|
273
|
-
|
|
274
258
|
lit = get_func_from_session("lit")
|
|
275
259
|
array = get_func_from_session("array")
|
|
276
260
|
col_func = get_func_from_session("col")
|
|
@@ -278,14 +262,13 @@ def percentile_approx_without_accuracy_and_max_array(
|
|
|
278
262
|
def make_approx_percentile(percentage: float) -> expression.Anonymous:
|
|
279
263
|
return expression.Anonymous(
|
|
280
264
|
this="APPROX_PERCENTILE",
|
|
281
|
-
expressions=[col_func(col).
|
|
265
|
+
expressions=[col_func(col).column_expression, lit(percentage).column_expression],
|
|
282
266
|
)
|
|
283
267
|
|
|
284
268
|
if accuracy:
|
|
285
269
|
logger.warning("Accuracy is ignored since it is not supported in this dialect")
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
return percentile_approx(col, percentage)
|
|
270
|
+
|
|
271
|
+
return array(*[make_approx_percentile(p) for p in percentage]) # type: ignore
|
|
289
272
|
|
|
290
273
|
|
|
291
274
|
def percentile_without_disc(
|
|
@@ -298,8 +281,8 @@ def percentile_without_disc(
|
|
|
298
281
|
|
|
299
282
|
percentage_col = percentage if isinstance(percentage, Column) else lit(percentage)
|
|
300
283
|
func_expressions = [
|
|
301
|
-
col_func(col).
|
|
302
|
-
percentage_col.
|
|
284
|
+
col_func(col).column_expression,
|
|
285
|
+
percentage_col.column_expression,
|
|
303
286
|
]
|
|
304
287
|
if frequency:
|
|
305
288
|
func_expressions.append(frequency if isinstance(frequency, Column) else lit(frequency))
|
|
@@ -311,22 +294,6 @@ def percentile_without_disc(
|
|
|
311
294
|
)
|
|
312
295
|
|
|
313
296
|
|
|
314
|
-
def rand_no_seed(seed: t.Optional[ColumnOrLiteral] = None) -> Column:
|
|
315
|
-
from sqlframe.base.functions import rand
|
|
316
|
-
|
|
317
|
-
if seed:
|
|
318
|
-
logger.warning("Seed is ignored since it is not supported in this dialect")
|
|
319
|
-
return rand()
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
def round_cast_as_numeric(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
|
|
323
|
-
from sqlframe.base.functions import round
|
|
324
|
-
|
|
325
|
-
col_func = get_func_from_session("col")
|
|
326
|
-
|
|
327
|
-
return round(col_func(col).cast("numeric"), scale)
|
|
328
|
-
|
|
329
|
-
|
|
330
297
|
def bround_using_half_even(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
|
|
331
298
|
lit_func = get_func_from_session("lit")
|
|
332
299
|
|
|
@@ -340,7 +307,7 @@ def shiftleft_from_bitshiftleft(col: ColumnOrName, numBits: int) -> Column:
|
|
|
340
307
|
return Column(
|
|
341
308
|
expression.Anonymous(
|
|
342
309
|
this="BITSHIFTLEFT",
|
|
343
|
-
expressions=[col_func(col).
|
|
310
|
+
expressions=[col_func(col).column_expression, lit(numBits).column_expression],
|
|
344
311
|
)
|
|
345
312
|
)
|
|
346
313
|
|
|
@@ -352,7 +319,7 @@ def shiftright_from_bitshiftright(col: ColumnOrName, numBits: int) -> Column:
|
|
|
352
319
|
return Column(
|
|
353
320
|
expression.Anonymous(
|
|
354
321
|
this="BITSHIFTRIGHT",
|
|
355
|
-
expressions=[col_func(col).
|
|
322
|
+
expressions=[col_func(col).column_expression, lit(numBits).column_expression],
|
|
356
323
|
)
|
|
357
324
|
)
|
|
358
325
|
|
|
@@ -372,7 +339,7 @@ def struct_with_eq(
|
|
|
372
339
|
this=expression.parse_identifier(
|
|
373
340
|
column.alias_or_name, dialect=_BaseSession().input_dialect
|
|
374
341
|
),
|
|
375
|
-
expression=column.
|
|
342
|
+
expression=column.column_expression,
|
|
376
343
|
)
|
|
377
344
|
)
|
|
378
345
|
return Column(expression.Struct(expressions=expressions))
|
|
@@ -383,7 +350,8 @@ def year_from_extract(col: ColumnOrName) -> Column:
|
|
|
383
350
|
|
|
384
351
|
return Column(
|
|
385
352
|
expression.Extract(
|
|
386
|
-
this=expression.Var(this="year"),
|
|
353
|
+
this=expression.Var(this="year"),
|
|
354
|
+
expression=col_func(col).cast("date").column_expression,
|
|
387
355
|
)
|
|
388
356
|
)
|
|
389
357
|
|
|
@@ -393,7 +361,8 @@ def quarter_from_extract(col: ColumnOrName) -> Column:
|
|
|
393
361
|
|
|
394
362
|
return Column(
|
|
395
363
|
expression.Extract(
|
|
396
|
-
this=expression.Var(this="quarter"),
|
|
364
|
+
this=expression.Var(this="quarter"),
|
|
365
|
+
expression=col_func(col).cast("date").column_expression,
|
|
397
366
|
)
|
|
398
367
|
)
|
|
399
368
|
|
|
@@ -403,7 +372,8 @@ def month_from_extract(col: ColumnOrName) -> Column:
|
|
|
403
372
|
|
|
404
373
|
return Column(
|
|
405
374
|
expression.Extract(
|
|
406
|
-
this=expression.Var(this="month"),
|
|
375
|
+
this=expression.Var(this="month"),
|
|
376
|
+
expression=col_func(col).cast("date").column_expression,
|
|
407
377
|
)
|
|
408
378
|
)
|
|
409
379
|
|
|
@@ -413,7 +383,8 @@ def dayofweek_from_extract(col: ColumnOrName) -> Column:
|
|
|
413
383
|
|
|
414
384
|
return Column(
|
|
415
385
|
expression.Extract(
|
|
416
|
-
this=expression.Var(this="dayofweek"),
|
|
386
|
+
this=expression.Var(this="dayofweek"),
|
|
387
|
+
expression=col_func(col).cast("date").column_expression,
|
|
417
388
|
)
|
|
418
389
|
)
|
|
419
390
|
|
|
@@ -423,17 +394,8 @@ def dayofweek_from_extract_with_isodow(col: ColumnOrName) -> Column:
|
|
|
423
394
|
|
|
424
395
|
return Column(
|
|
425
396
|
expression.Extract(
|
|
426
|
-
this=expression.Var(this="isodow"),
|
|
427
|
-
|
|
428
|
-
)
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
def dayofmonth_from_extract(col: ColumnOrName) -> Column:
|
|
432
|
-
col_func = get_func_from_session("col")
|
|
433
|
-
|
|
434
|
-
return Column(
|
|
435
|
-
expression.Extract(
|
|
436
|
-
this=expression.Var(this="dayofmonth"), expression=col_func(col).cast("date").expression
|
|
397
|
+
this=expression.Var(this="isodow"),
|
|
398
|
+
expression=col_func(col).cast("date").column_expression,
|
|
437
399
|
)
|
|
438
400
|
)
|
|
439
401
|
|
|
@@ -443,7 +405,7 @@ def dayofmonth_from_extract_with_day(col: ColumnOrName) -> Column:
|
|
|
443
405
|
|
|
444
406
|
return Column(
|
|
445
407
|
expression.Extract(
|
|
446
|
-
this=expression.Var(this="day"), expression=col_func(col).cast("date").
|
|
408
|
+
this=expression.Var(this="day"), expression=col_func(col).cast("date").column_expression
|
|
447
409
|
)
|
|
448
410
|
)
|
|
449
411
|
|
|
@@ -453,7 +415,8 @@ def dayofyear_from_extract(col: ColumnOrName) -> Column:
|
|
|
453
415
|
|
|
454
416
|
return Column(
|
|
455
417
|
expression.Extract(
|
|
456
|
-
this=expression.Var(this="dayofyear"),
|
|
418
|
+
this=expression.Var(this="dayofyear"),
|
|
419
|
+
expression=col_func(col).cast("date").column_expression,
|
|
457
420
|
)
|
|
458
421
|
)
|
|
459
422
|
|
|
@@ -463,7 +426,7 @@ def dayofyear_from_extract_doy(col: ColumnOrName) -> Column:
|
|
|
463
426
|
|
|
464
427
|
return Column(
|
|
465
428
|
expression.Extract(
|
|
466
|
-
this=expression.Var(this="doy"), expression=col_func(col).cast("date").
|
|
429
|
+
this=expression.Var(this="doy"), expression=col_func(col).cast("date").column_expression
|
|
467
430
|
)
|
|
468
431
|
)
|
|
469
432
|
|
|
@@ -472,7 +435,9 @@ def hour_from_extract(col: ColumnOrName) -> Column:
|
|
|
472
435
|
col_func = get_func_from_session("col")
|
|
473
436
|
|
|
474
437
|
return Column(
|
|
475
|
-
expression.Extract(
|
|
438
|
+
expression.Extract(
|
|
439
|
+
this=expression.Var(this="hour"), expression=col_func(col).column_expression
|
|
440
|
+
)
|
|
476
441
|
)
|
|
477
442
|
|
|
478
443
|
|
|
@@ -480,7 +445,9 @@ def minute_from_extract(col: ColumnOrName) -> Column:
|
|
|
480
445
|
col_func = get_func_from_session("col")
|
|
481
446
|
|
|
482
447
|
return Column(
|
|
483
|
-
expression.Extract(
|
|
448
|
+
expression.Extract(
|
|
449
|
+
this=expression.Var(this="minute"), expression=col_func(col).column_expression
|
|
450
|
+
)
|
|
484
451
|
)
|
|
485
452
|
|
|
486
453
|
|
|
@@ -488,7 +455,9 @@ def second_from_extract(col: ColumnOrName) -> Column:
|
|
|
488
455
|
col_func = get_func_from_session("col")
|
|
489
456
|
|
|
490
457
|
return Column(
|
|
491
|
-
expression.Extract(
|
|
458
|
+
expression.Extract(
|
|
459
|
+
this=expression.Var(this="second"), expression=col_func(col).column_expression
|
|
460
|
+
)
|
|
492
461
|
)
|
|
493
462
|
|
|
494
463
|
|
|
@@ -497,7 +466,8 @@ def weekofyear_from_extract_as_week(col: ColumnOrName) -> Column:
|
|
|
497
466
|
|
|
498
467
|
return Column(
|
|
499
468
|
expression.Extract(
|
|
500
|
-
this=expression.Var(this="week"),
|
|
469
|
+
this=expression.Var(this="week"),
|
|
470
|
+
expression=col_func(col).cast("date").column_expression,
|
|
501
471
|
)
|
|
502
472
|
)
|
|
503
473
|
|
|
@@ -507,25 +477,12 @@ def weekofyear_from_extract_as_isoweek(col: ColumnOrName) -> Column:
|
|
|
507
477
|
|
|
508
478
|
return Column(
|
|
509
479
|
expression.Extract(
|
|
510
|
-
this=expression.Var(this="ISOWEEK"),
|
|
480
|
+
this=expression.Var(this="ISOWEEK"),
|
|
481
|
+
expression=col_func(col).cast("date").column_expression,
|
|
511
482
|
)
|
|
512
483
|
)
|
|
513
484
|
|
|
514
485
|
|
|
515
|
-
def make_date_casted_as_integer(
|
|
516
|
-
year: ColumnOrName, month: ColumnOrName, day: ColumnOrName
|
|
517
|
-
) -> Column:
|
|
518
|
-
from sqlframe.base.functions import make_date
|
|
519
|
-
|
|
520
|
-
col_func = get_func_from_session("col")
|
|
521
|
-
|
|
522
|
-
return make_date(
|
|
523
|
-
col_func(year).cast("integer"),
|
|
524
|
-
col_func(month).cast("integer"),
|
|
525
|
-
col_func(day).cast("integer"),
|
|
526
|
-
)
|
|
527
|
-
|
|
528
|
-
|
|
529
486
|
def make_date_from_date_func(year: ColumnOrName, month: ColumnOrName, day: ColumnOrName) -> Column:
|
|
530
487
|
col_func = get_func_from_session("col")
|
|
531
488
|
|
|
@@ -533,9 +490,9 @@ def make_date_from_date_func(year: ColumnOrName, month: ColumnOrName, day: Colum
|
|
|
533
490
|
expression.Anonymous(
|
|
534
491
|
this="DATE",
|
|
535
492
|
expressions=[
|
|
536
|
-
col_func(year).cast("integer").
|
|
537
|
-
col_func(month).cast("integer").
|
|
538
|
-
col_func(day).cast("integer").
|
|
493
|
+
col_func(year).cast("integer").column_expression,
|
|
494
|
+
col_func(month).cast("integer").column_expression,
|
|
495
|
+
col_func(day).cast("integer").column_expression,
|
|
539
496
|
],
|
|
540
497
|
)
|
|
541
498
|
)
|
|
@@ -548,9 +505,9 @@ def make_date_date_from_parts(year: ColumnOrName, month: ColumnOrName, day: Colu
|
|
|
548
505
|
expression.Anonymous(
|
|
549
506
|
this="DATE_FROM_PARTS",
|
|
550
507
|
expressions=[
|
|
551
|
-
col_func(year).cast("integer").
|
|
552
|
-
col_func(month).cast("integer").
|
|
553
|
-
col_func(day).cast("integer").
|
|
508
|
+
col_func(year).cast("integer").column_expression,
|
|
509
|
+
col_func(month).cast("integer").column_expression,
|
|
510
|
+
col_func(day).cast("integer").column_expression,
|
|
554
511
|
],
|
|
555
512
|
)
|
|
556
513
|
)
|
|
@@ -584,29 +541,6 @@ def date_sub_by_date_add(
|
|
|
584
541
|
return date_add_func(col, days * lit_func(-1), cast_as_date)
|
|
585
542
|
|
|
586
543
|
|
|
587
|
-
def to_date_from_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
588
|
-
from sqlframe.base.functions import to_date
|
|
589
|
-
|
|
590
|
-
to_timestamp = get_func_from_session("to_timestamp")
|
|
591
|
-
|
|
592
|
-
return to_date(to_timestamp(col, format))
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
def to_date_time_format(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
596
|
-
from sqlframe.base.functions import to_date
|
|
597
|
-
from sqlframe.base.session import _BaseSession
|
|
598
|
-
|
|
599
|
-
return to_date(col, format=format or _BaseSession().default_time_format)
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
def last_day_with_cast(col: ColumnOrName) -> Column:
|
|
603
|
-
from sqlframe.base.functions import last_day
|
|
604
|
-
|
|
605
|
-
col_func = get_func_from_session("col")
|
|
606
|
-
|
|
607
|
-
return last_day(col_func(col).cast("date"))
|
|
608
|
-
|
|
609
|
-
|
|
610
544
|
def sha1_force_sha1_and_to_hex(col: ColumnOrName) -> Column:
|
|
611
545
|
col_func = get_func_from_session("col")
|
|
612
546
|
|
|
@@ -616,7 +550,7 @@ def sha1_force_sha1_and_to_hex(col: ColumnOrName) -> Column:
|
|
|
616
550
|
expressions=[
|
|
617
551
|
expression.Anonymous(
|
|
618
552
|
this="SHA1",
|
|
619
|
-
expressions=[col_func(col).
|
|
553
|
+
expressions=[col_func(col).column_expression],
|
|
620
554
|
)
|
|
621
555
|
],
|
|
622
556
|
)
|
|
@@ -632,64 +566,17 @@ def hash_from_farm_fingerprint(*cols: ColumnOrName) -> Column:
|
|
|
632
566
|
return Column(
|
|
633
567
|
expression.Anonymous(
|
|
634
568
|
this="FARM_FINGERPRINT",
|
|
635
|
-
expressions=[col_func(cols[0]).
|
|
569
|
+
expressions=[col_func(cols[0]).column_expression],
|
|
636
570
|
)
|
|
637
571
|
)
|
|
638
572
|
|
|
639
573
|
|
|
640
|
-
def date_add_by_multiplication(
|
|
641
|
-
col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True
|
|
642
|
-
) -> Column:
|
|
643
|
-
from sqlframe.base.functions import date_add
|
|
644
|
-
|
|
645
|
-
col_func = get_func_from_session("col")
|
|
646
|
-
|
|
647
|
-
if isinstance(days, int):
|
|
648
|
-
value = date_add(col, days)
|
|
649
|
-
else:
|
|
650
|
-
value = date_add(col, 1, cast_as_date=False) * col_func(days)
|
|
651
|
-
if cast_as_date:
|
|
652
|
-
return value.cast("date")
|
|
653
|
-
return value
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
def date_sub_by_multiplication(
|
|
657
|
-
col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True
|
|
658
|
-
) -> Column:
|
|
659
|
-
from sqlframe.base.functions import date_sub
|
|
660
|
-
|
|
661
|
-
col_func = get_func_from_session("col")
|
|
662
|
-
|
|
663
|
-
if isinstance(days, int):
|
|
664
|
-
value = date_sub(col, days)
|
|
665
|
-
else:
|
|
666
|
-
value = date_sub(col, 1, cast_as_date=False) * col_func(days)
|
|
667
|
-
if cast_as_date:
|
|
668
|
-
return value.cast("date")
|
|
669
|
-
return value
|
|
670
|
-
|
|
671
|
-
|
|
672
574
|
def date_diff_with_subtraction(end: ColumnOrName, start: ColumnOrName) -> Column:
|
|
673
575
|
col_func = get_func_from_session("col")
|
|
674
576
|
|
|
675
577
|
return col_func(end).cast("date") - col_func(start).cast("date")
|
|
676
578
|
|
|
677
579
|
|
|
678
|
-
def add_months_by_multiplication(
|
|
679
|
-
start: ColumnOrName, months: t.Union[ColumnOrName, int], cast_as_date: bool = True
|
|
680
|
-
) -> Column:
|
|
681
|
-
from sqlframe.base.functions import add_months
|
|
682
|
-
|
|
683
|
-
col_func = get_func_from_session("col")
|
|
684
|
-
lit = get_func_from_session("lit")
|
|
685
|
-
|
|
686
|
-
multiple_value = lit(months) if isinstance(months, int) else col_func(months)
|
|
687
|
-
value = col_func(add_months(start, 1, cast_as_date=False).expression.unnest()) * multiple_value
|
|
688
|
-
if cast_as_date:
|
|
689
|
-
return value.cast("date")
|
|
690
|
-
return value
|
|
691
|
-
|
|
692
|
-
|
|
693
580
|
def add_months_using_func(
|
|
694
581
|
start: ColumnOrName, months: t.Union[ColumnOrName, int], cast_as_date: bool = True
|
|
695
582
|
) -> Column:
|
|
@@ -704,8 +591,8 @@ def add_months_using_func(
|
|
|
704
591
|
expression.Anonymous(
|
|
705
592
|
this="ADD_MONTHS",
|
|
706
593
|
expressions=[
|
|
707
|
-
Column.ensure_col(start).
|
|
708
|
-
months.
|
|
594
|
+
Column.ensure_col(start).column_expression,
|
|
595
|
+
months.column_expression, # type: ignore
|
|
709
596
|
],
|
|
710
597
|
)
|
|
711
598
|
)
|
|
@@ -726,8 +613,8 @@ def months_between_from_age_and_extract(
|
|
|
726
613
|
age_expression = expression.Anonymous(
|
|
727
614
|
this="AGE",
|
|
728
615
|
expressions=[
|
|
729
|
-
col_func(date1).cast("date").
|
|
730
|
-
col_func(date2).cast("date").
|
|
616
|
+
col_func(date1).cast("date").column_expression,
|
|
617
|
+
col_func(date2).cast("date").column_expression,
|
|
731
618
|
],
|
|
732
619
|
)
|
|
733
620
|
return (
|
|
@@ -740,23 +627,6 @@ def months_between_from_age_and_extract(
|
|
|
740
627
|
).cast("bigint")
|
|
741
628
|
|
|
742
629
|
|
|
743
|
-
def months_between_cast_as_date_cast_roundoff(
|
|
744
|
-
date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None
|
|
745
|
-
) -> Column:
|
|
746
|
-
from sqlframe.base.functions import months_between
|
|
747
|
-
|
|
748
|
-
col_func = get_func_from_session("col")
|
|
749
|
-
|
|
750
|
-
date1 = col_func(date1).cast("date")
|
|
751
|
-
date2 = col_func(date2).cast("date")
|
|
752
|
-
|
|
753
|
-
value = months_between(date1, date2)
|
|
754
|
-
|
|
755
|
-
if roundOff:
|
|
756
|
-
return value.cast("bigint")
|
|
757
|
-
return value
|
|
758
|
-
|
|
759
|
-
|
|
760
630
|
def from_unixtime_from_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
761
631
|
from sqlframe.base.session import _BaseSession
|
|
762
632
|
|
|
@@ -766,7 +636,7 @@ def from_unixtime_from_timestamp(col: ColumnOrName, format: t.Optional[str] = No
|
|
|
766
636
|
Column(
|
|
767
637
|
expression.Anonymous(
|
|
768
638
|
this="TO_TIMESTAMP",
|
|
769
|
-
expressions=[col_func(col).
|
|
639
|
+
expressions=[col_func(col).column_expression],
|
|
770
640
|
)
|
|
771
641
|
),
|
|
772
642
|
expression.TimeToStr,
|
|
@@ -781,7 +651,8 @@ def unix_timestamp_from_extract(
|
|
|
781
651
|
|
|
782
652
|
return Column(
|
|
783
653
|
expression.Extract(
|
|
784
|
-
this=expression.Var(this="epoch"),
|
|
654
|
+
this=expression.Var(this="epoch"),
|
|
655
|
+
expression=to_timestamp(timestamp, format).column_expression,
|
|
785
656
|
)
|
|
786
657
|
).cast("bigint")
|
|
787
658
|
|
|
@@ -790,10 +661,11 @@ def base64_from_blob(col: ColumnOrLiteral) -> Column:
|
|
|
790
661
|
return Column.invoke_expression_over_column(Column(col).cast("blob"), expression.ToBase64)
|
|
791
662
|
|
|
792
663
|
|
|
793
|
-
def
|
|
664
|
+
def base64_from_encode(col: ColumnOrLiteral) -> Column:
|
|
794
665
|
return Column(
|
|
795
666
|
expression.Encode(
|
|
796
|
-
this=Column(col).cast("bytea").
|
|
667
|
+
this=Column(col).cast("bytea").column_expression,
|
|
668
|
+
charset=expression.Literal.string("base64"),
|
|
797
669
|
)
|
|
798
670
|
)
|
|
799
671
|
|
|
@@ -802,14 +674,16 @@ def base64_from_base64_encode(col: ColumnOrLiteral) -> Column:
|
|
|
802
674
|
return Column(
|
|
803
675
|
expression.Anonymous(
|
|
804
676
|
this="BASE64_ENCODE",
|
|
805
|
-
expressions=[Column(col).
|
|
677
|
+
expressions=[Column(col).column_expression],
|
|
806
678
|
)
|
|
807
679
|
)
|
|
808
680
|
|
|
809
681
|
|
|
810
682
|
def unbase64_from_decode(col: ColumnOrLiteral) -> Column:
|
|
811
683
|
return Column(
|
|
812
|
-
expression.Decode(
|
|
684
|
+
expression.Decode(
|
|
685
|
+
this=Column(col).column_expression, charset=expression.Literal.string("base64")
|
|
686
|
+
)
|
|
813
687
|
)
|
|
814
688
|
|
|
815
689
|
|
|
@@ -817,7 +691,7 @@ def unbase64_from_base64_decode_string(col: ColumnOrLiteral) -> Column:
|
|
|
817
691
|
return Column(
|
|
818
692
|
expression.Anonymous(
|
|
819
693
|
this="BASE64_DECODE_STRING",
|
|
820
|
-
expressions=[Column(col).
|
|
694
|
+
expressions=[Column(col).column_expression],
|
|
821
695
|
)
|
|
822
696
|
)
|
|
823
697
|
|
|
@@ -825,7 +699,8 @@ def unbase64_from_base64_decode_string(col: ColumnOrLiteral) -> Column:
|
|
|
825
699
|
def decode_from_blob(col: ColumnOrLiteral, charset: str) -> Column:
|
|
826
700
|
return Column(
|
|
827
701
|
expression.Decode(
|
|
828
|
-
this=Column(col).cast("blob").
|
|
702
|
+
this=Column(col).cast("blob").column_expression,
|
|
703
|
+
charset=expression.Literal.string(charset),
|
|
829
704
|
)
|
|
830
705
|
)
|
|
831
706
|
|
|
@@ -834,7 +709,10 @@ def decode_from_convert_from(col: ColumnOrLiteral, charset: str) -> Column:
|
|
|
834
709
|
return Column(
|
|
835
710
|
expression.Anonymous(
|
|
836
711
|
this="CONVERT_FROM",
|
|
837
|
-
expressions=[
|
|
712
|
+
expressions=[
|
|
713
|
+
Column(col).cast("bytea").column_expression,
|
|
714
|
+
expression.Literal.string(charset),
|
|
715
|
+
],
|
|
838
716
|
)
|
|
839
717
|
)
|
|
840
718
|
|
|
@@ -845,7 +723,7 @@ def encode_from_convert_to(col: ColumnOrName, charset: str) -> Column:
|
|
|
845
723
|
return Column(
|
|
846
724
|
expression.Anonymous(
|
|
847
725
|
this="CONVERT_TO",
|
|
848
|
-
expressions=[col_func(col).
|
|
726
|
+
expressions=[col_func(col).column_expression, expression.Literal.string(charset)],
|
|
849
727
|
)
|
|
850
728
|
)
|
|
851
729
|
|
|
@@ -857,7 +735,7 @@ def concat_ws_from_array_to_string(sep: str, *cols: ColumnOrName) -> Column:
|
|
|
857
735
|
return Column(
|
|
858
736
|
expression.Anonymous(
|
|
859
737
|
this="ARRAY_TO_STRING",
|
|
860
|
-
expressions=[array(*cols).
|
|
738
|
+
expressions=[array(*cols).column_expression, lit(sep).column_expression],
|
|
861
739
|
)
|
|
862
740
|
)
|
|
863
741
|
|
|
@@ -867,7 +745,9 @@ def format_number_from_to_char(col: ColumnOrName, d: int) -> Column:
|
|
|
867
745
|
format = "FM" + ("999," * 5) + "990" + "D" + ("0" * d)
|
|
868
746
|
|
|
869
747
|
return Column(
|
|
870
|
-
expression.ToChar(
|
|
748
|
+
expression.ToChar(
|
|
749
|
+
this=round(col, d).column_expression, format=expression.Literal.string(format)
|
|
750
|
+
)
|
|
871
751
|
)
|
|
872
752
|
|
|
873
753
|
|
|
@@ -879,7 +759,7 @@ def format_string_with_format(format: str, *cols: ColumnOrName) -> Column:
|
|
|
879
759
|
this="FORMAT",
|
|
880
760
|
expressions=[
|
|
881
761
|
expression.Literal.string(format.replace("%d", "%s")),
|
|
882
|
-
*[col_func(x).cast("string").
|
|
762
|
+
*[col_func(x).cast("string").column_expression for x in ensure_list(cols)],
|
|
883
763
|
],
|
|
884
764
|
)
|
|
885
765
|
)
|
|
@@ -893,15 +773,15 @@ def format_string_with_pipes(format: str, *cols: ColumnOrName) -> Column:
|
|
|
893
773
|
if len(values) != len(cols) + 1:
|
|
894
774
|
raise ValueError("Number of values and columns do not match")
|
|
895
775
|
result = expression.DPipe(
|
|
896
|
-
this=lit(values[0]).
|
|
776
|
+
this=lit(values[0]).column_expression, expression=col_func(cols[0]).column_expression
|
|
897
777
|
)
|
|
898
778
|
for i, value in enumerate(values[1:], start=1):
|
|
899
779
|
if i == len(cols):
|
|
900
|
-
result = expression.DPipe(this=result, expression=lit(value).
|
|
780
|
+
result = expression.DPipe(this=result, expression=lit(value).column_expression)
|
|
901
781
|
else:
|
|
902
782
|
result = expression.DPipe(
|
|
903
|
-
this=expression.DPipe(this=result, expression=lit(value).
|
|
904
|
-
expression=col_func(cols[i]).
|
|
783
|
+
this=expression.DPipe(this=result, expression=lit(value).column_expression),
|
|
784
|
+
expression=col_func(cols[i]).column_expression,
|
|
905
785
|
)
|
|
906
786
|
return Column(result)
|
|
907
787
|
|
|
@@ -913,7 +793,7 @@ def instr_using_strpos(col: ColumnOrName, substr: str) -> Column:
|
|
|
913
793
|
return Column(
|
|
914
794
|
expression.Anonymous(
|
|
915
795
|
this="STRPOS",
|
|
916
|
-
expressions=[col_func(col).
|
|
796
|
+
expressions=[col_func(col).column_expression, lit(substr).column_expression],
|
|
917
797
|
)
|
|
918
798
|
)
|
|
919
799
|
|
|
@@ -932,11 +812,11 @@ def overlay_from_substr(
|
|
|
932
812
|
return Column(
|
|
933
813
|
expression.Concat(
|
|
934
814
|
expressions=[
|
|
935
|
-
substring(col_func(src), 1, col_func(pos) - lit(1)).
|
|
936
|
-
col_func(replace).
|
|
815
|
+
substring(col_func(src), 1, col_func(pos) - lit(1)).column_expression,
|
|
816
|
+
col_func(replace).column_expression,
|
|
937
817
|
substring(
|
|
938
818
|
col_func(src), col_func(pos) + col_func(length_value), length_func(src)
|
|
939
|
-
).
|
|
819
|
+
).column_expression,
|
|
940
820
|
]
|
|
941
821
|
)
|
|
942
822
|
)
|
|
@@ -950,21 +830,14 @@ def levenshtein_edit_distance(
|
|
|
950
830
|
return Column(
|
|
951
831
|
expression.Anonymous(
|
|
952
832
|
this="EDITDISTANCE",
|
|
953
|
-
expressions=[
|
|
833
|
+
expressions=[
|
|
834
|
+
Column.ensure_col(left).column_expression,
|
|
835
|
+
Column.ensure_col(right).column_expression,
|
|
836
|
+
],
|
|
954
837
|
)
|
|
955
838
|
)
|
|
956
839
|
|
|
957
840
|
|
|
958
|
-
def split_no_limit(str: ColumnOrName, pattern: str, limit: t.Optional[int] = None) -> Column:
|
|
959
|
-
from sqlframe.base.functions import split
|
|
960
|
-
|
|
961
|
-
col_func = get_func_from_session("col")
|
|
962
|
-
|
|
963
|
-
if limit is not None:
|
|
964
|
-
logger.warning("Limit is ignored since it is not supported in this dialect")
|
|
965
|
-
return split(col_func(str), pattern)
|
|
966
|
-
|
|
967
|
-
|
|
968
841
|
def split_from_regex_split_to_array(
|
|
969
842
|
str: ColumnOrName, pattern: str, limit: t.Optional[int] = None
|
|
970
843
|
) -> Column:
|
|
@@ -976,7 +849,7 @@ def split_from_regex_split_to_array(
|
|
|
976
849
|
expression.Anonymous(
|
|
977
850
|
this="REGEXP_SPLIT_TO_ARRAY",
|
|
978
851
|
expressions=[
|
|
979
|
-
col_func(str).
|
|
852
|
+
col_func(str).column_expression,
|
|
980
853
|
expression.Literal.string(pattern),
|
|
981
854
|
],
|
|
982
855
|
)
|
|
@@ -992,22 +865,11 @@ def split_with_split(str: ColumnOrName, pattern: str, limit: t.Optional[int] = N
|
|
|
992
865
|
return Column(
|
|
993
866
|
expression.Anonymous(
|
|
994
867
|
this="SPLIT",
|
|
995
|
-
expressions=[col_func(str).
|
|
868
|
+
expressions=[col_func(str).column_expression, lit(pattern).column_expression],
|
|
996
869
|
)
|
|
997
870
|
)
|
|
998
871
|
|
|
999
872
|
|
|
1000
|
-
def regexp_extract_coalesce_empty_str(
|
|
1001
|
-
str: ColumnOrName, pattern: str, idx: t.Optional[int] = None
|
|
1002
|
-
) -> Column:
|
|
1003
|
-
from sqlframe.base.functions import regexp_extract
|
|
1004
|
-
|
|
1005
|
-
coalesce = get_func_from_session("coalesce")
|
|
1006
|
-
lit_func = get_func_from_session("lit")
|
|
1007
|
-
|
|
1008
|
-
return coalesce(regexp_extract(str, pattern, idx), lit_func(""))
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
873
|
def array_contains_any(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1012
874
|
lit = get_func_from_session("lit")
|
|
1013
875
|
value_col = value if isinstance(value, Column) else lit(value)
|
|
@@ -1015,8 +877,10 @@ def array_contains_any(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
|
1015
877
|
|
|
1016
878
|
return Column(
|
|
1017
879
|
expression.EQ(
|
|
1018
|
-
this=value_col.
|
|
1019
|
-
expression=expression.Anonymous(
|
|
880
|
+
this=value_col.column_expression,
|
|
881
|
+
expression=expression.Anonymous(
|
|
882
|
+
this="ANY", expressions=[col_func(col).column_expression]
|
|
883
|
+
),
|
|
1020
884
|
)
|
|
1021
885
|
)
|
|
1022
886
|
|
|
@@ -1029,7 +893,10 @@ def arrays_overlap_using_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Co
|
|
|
1029
893
|
this=expression.ArraySize(
|
|
1030
894
|
this=expression.Anonymous(
|
|
1031
895
|
this="ARRAY_INTERSECT",
|
|
1032
|
-
expressions=[
|
|
896
|
+
expressions=[
|
|
897
|
+
col_func(col1).column_expression,
|
|
898
|
+
col_func(col2).column_expression,
|
|
899
|
+
],
|
|
1033
900
|
)
|
|
1034
901
|
),
|
|
1035
902
|
expression=expression.Literal.number(0),
|
|
@@ -1043,7 +910,7 @@ def arrays_overlap_renamed(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
|
1043
910
|
return Column(
|
|
1044
911
|
expression.Anonymous(
|
|
1045
912
|
this="ARRAYS_OVERLAP",
|
|
1046
|
-
expressions=[col_func(col1).
|
|
913
|
+
expressions=[col_func(col1).column_expression, col_func(col2).column_expression],
|
|
1047
914
|
)
|
|
1048
915
|
)
|
|
1049
916
|
|
|
@@ -1069,72 +936,24 @@ def slice_with_brackets(
|
|
|
1069
936
|
|
|
1070
937
|
return Column(
|
|
1071
938
|
expression.Bracket(
|
|
1072
|
-
this=col_func(x).
|
|
939
|
+
this=col_func(x).column_expression,
|
|
1073
940
|
expressions=[
|
|
1074
941
|
expression.Slice(
|
|
1075
|
-
this=start_col.
|
|
1076
|
-
expression=(start_col + length_col).
|
|
942
|
+
this=start_col.column_expression,
|
|
943
|
+
expression=(start_col + length_col).column_expression,
|
|
1077
944
|
)
|
|
1078
945
|
],
|
|
1079
946
|
)
|
|
1080
947
|
)
|
|
1081
948
|
|
|
1082
949
|
|
|
1083
|
-
def array_join_no_null_replacement(
|
|
1084
|
-
col: ColumnOrName, delimiter: str, null_replacement: t.Optional[str] = None
|
|
1085
|
-
) -> Column:
|
|
1086
|
-
from sqlframe.base.functions import array_join
|
|
1087
|
-
|
|
1088
|
-
if null_replacement is None:
|
|
1089
|
-
logger.warning("Null replacement is ignored since it is not supported in this dialect")
|
|
1090
|
-
return array_join(col, delimiter)
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
def array_join_null_replacement_with_transform(
|
|
1094
|
-
col: ColumnOrName, delimiter: str, null_replacement: t.Optional[str] = None
|
|
1095
|
-
) -> Column:
|
|
1096
|
-
from sqlframe.base.functions import array_join
|
|
1097
|
-
|
|
1098
|
-
col_func = get_func_from_session("col")
|
|
1099
|
-
|
|
1100
|
-
if null_replacement is None:
|
|
1101
|
-
return array_join(col, delimiter, null_replacement)
|
|
1102
|
-
col = Column(
|
|
1103
|
-
expression.Anonymous(
|
|
1104
|
-
this="LIST_TRANSFORM",
|
|
1105
|
-
expressions=[
|
|
1106
|
-
col_func(col).expression,
|
|
1107
|
-
expression.Lambda(
|
|
1108
|
-
this=expression.Coalesce(
|
|
1109
|
-
this=expression.Cast(
|
|
1110
|
-
this=expression.Identifier(this="x"),
|
|
1111
|
-
to=expression.DataType.build("STRING"),
|
|
1112
|
-
),
|
|
1113
|
-
expressions=[expression.Literal.string(null_replacement)],
|
|
1114
|
-
),
|
|
1115
|
-
expressions=[expression.Identifier(this="x")],
|
|
1116
|
-
),
|
|
1117
|
-
],
|
|
1118
|
-
)
|
|
1119
|
-
)
|
|
1120
|
-
return array_join(col, delimiter)
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
def array_contains_cast_variant(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1124
|
-
from sqlframe.base.functions import array_contains
|
|
1125
|
-
|
|
1126
|
-
lit = get_func_from_session("lit")
|
|
1127
|
-
value_col = value if isinstance(value, Column) else lit(value)
|
|
1128
|
-
return array_contains(col, value_col.cast("variant"))
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
950
|
def arrays_overlap_as_plural(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
1132
951
|
col_func = get_func_from_session("col")
|
|
1133
952
|
|
|
1134
953
|
return Column(
|
|
1135
954
|
expression.Anonymous(
|
|
1136
955
|
this="ARRAYS_OVERLAP",
|
|
1137
|
-
expressions=[col_func(col1).
|
|
956
|
+
expressions=[col_func(col1).column_expression, col_func(col2).column_expression],
|
|
1138
957
|
)
|
|
1139
958
|
)
|
|
1140
959
|
|
|
@@ -1166,7 +985,7 @@ def array_intersect_using_intersection(col1: ColumnOrName, col2: ColumnOrName) -
|
|
|
1166
985
|
return Column(
|
|
1167
986
|
expression.Anonymous(
|
|
1168
987
|
this="ARRAY_INTERSECTION",
|
|
1169
|
-
expressions=[col_func(col1).
|
|
988
|
+
expressions=[col_func(col1).column_expression, col_func(col2).column_expression],
|
|
1170
989
|
)
|
|
1171
990
|
)
|
|
1172
991
|
|
|
@@ -1176,9 +995,14 @@ def element_at_using_brackets(col: ColumnOrName, value: ColumnOrLiteral) -> Colu
|
|
|
1176
995
|
lit = get_func_from_session("lit")
|
|
1177
996
|
# SQLGlot will auto add 1 to whatever we pass in for the brackets even though the value is already 1 based.
|
|
1178
997
|
value = value if isinstance(value, Column) else lit(value)
|
|
1179
|
-
if [x for x in value.
|
|
998
|
+
if [x for x in value.column_expression.find_all(expression.Literal) if x.is_number]:
|
|
1180
999
|
value = value - lit(1)
|
|
1181
|
-
return Column(
|
|
1000
|
+
return Column(
|
|
1001
|
+
expression.Bracket(
|
|
1002
|
+
this=col_func(col).column_expression,
|
|
1003
|
+
expressions=[value.column_expression], # type: ignore
|
|
1004
|
+
)
|
|
1005
|
+
)
|
|
1182
1006
|
|
|
1183
1007
|
|
|
1184
1008
|
def array_remove_using_filter(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
@@ -1190,10 +1014,10 @@ def array_remove_using_filter(col: ColumnOrName, value: ColumnOrLiteral) -> Colu
|
|
|
1190
1014
|
expression.Anonymous(
|
|
1191
1015
|
this="LIST_FILTER",
|
|
1192
1016
|
expressions=[
|
|
1193
|
-
col_func(col).
|
|
1017
|
+
col_func(col).column_expression,
|
|
1194
1018
|
expression.Lambda(
|
|
1195
1019
|
this=expression.NEQ(
|
|
1196
|
-
this=expression.Identifier(this="x"), expression=value.
|
|
1020
|
+
this=expression.Identifier(this="x"), expression=value.column_expression
|
|
1197
1021
|
),
|
|
1198
1022
|
expressions=[expression.Identifier(this="x")],
|
|
1199
1023
|
),
|
|
@@ -1211,7 +1035,10 @@ def array_union_using_list_concat(col1: ColumnOrName, col2: ColumnOrName) -> Col
|
|
|
1211
1035
|
expressions=[
|
|
1212
1036
|
expression.Anonymous(
|
|
1213
1037
|
this="LIST_CONCAT",
|
|
1214
|
-
expressions=[
|
|
1038
|
+
expressions=[
|
|
1039
|
+
col_func(col1).column_expression,
|
|
1040
|
+
col_func(col2).column_expression,
|
|
1041
|
+
],
|
|
1215
1042
|
)
|
|
1216
1043
|
],
|
|
1217
1044
|
)
|
|
@@ -1224,7 +1051,7 @@ def array_union_using_array_concat(col1: ColumnOrName, col2: ColumnOrName) -> Co
|
|
|
1224
1051
|
|
|
1225
1052
|
return array_distinct(
|
|
1226
1053
|
expression.ArrayConcat(
|
|
1227
|
-
this=col_func(col1).
|
|
1054
|
+
this=col_func(col1).column_expression, expressions=[col_func(col2).column_expression]
|
|
1228
1055
|
)
|
|
1229
1056
|
)
|
|
1230
1057
|
|
|
@@ -1235,7 +1062,7 @@ def get_json_object_using_arrow_op(col: ColumnOrName, path: str) -> Column:
|
|
|
1235
1062
|
return Column(
|
|
1236
1063
|
expression.JSONExtract(
|
|
1237
1064
|
this=expression.Cast(
|
|
1238
|
-
this=col_func(col).
|
|
1065
|
+
this=col_func(col).column_expression, to=expression.DataType.build("JSON")
|
|
1239
1066
|
),
|
|
1240
1067
|
expression=expression.JSONPath(
|
|
1241
1068
|
expressions=[expression.JSONPathRoot(), expression.JSONPathKey(this=path)]
|
|
@@ -1245,30 +1072,11 @@ def get_json_object_using_arrow_op(col: ColumnOrName, path: str) -> Column:
|
|
|
1245
1072
|
)
|
|
1246
1073
|
|
|
1247
1074
|
|
|
1248
|
-
def get_json_object_cast_object(col: ColumnOrName, path: str) -> Column:
|
|
1249
|
-
from sqlframe.base.functions import get_json_object
|
|
1250
|
-
|
|
1251
|
-
col_func = get_func_from_session("col")
|
|
1252
|
-
|
|
1253
|
-
return get_json_object(col_func(col).cast("variant"), path)
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
1075
|
def get_json_object_using_function(col: ColumnOrName, path: str) -> Column:
|
|
1257
1076
|
lit = get_func_from_session("lit")
|
|
1258
1077
|
return Column.invoke_anonymous_function(col, "GET_JSON_OBJECT", lit(path))
|
|
1259
1078
|
|
|
1260
1079
|
|
|
1261
|
-
def create_map_with_cast(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
|
|
1262
|
-
from sqlframe.base.functions import create_map
|
|
1263
|
-
|
|
1264
|
-
col = get_func_from_session("col")
|
|
1265
|
-
|
|
1266
|
-
columns = list(_flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols
|
|
1267
|
-
col1_dtype = col(columns[0]).dtype or "VARCHAR"
|
|
1268
|
-
col2_dtype = col(columns[1]).dtype or "VARCHAR"
|
|
1269
|
-
return create_map(*cols).cast(f"MAP({col1_dtype}, {col2_dtype})")
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
1080
|
def array_min_from_sort(col: ColumnOrName) -> Column:
|
|
1273
1081
|
element_at = get_func_from_session("element_at")
|
|
1274
1082
|
array_sort = get_func_from_session("array_sort")
|
|
@@ -1283,7 +1091,7 @@ def array_min_from_subquery(col: ColumnOrName) -> Column:
|
|
|
1283
1091
|
select = expression.Select(
|
|
1284
1092
|
expressions=[
|
|
1285
1093
|
expression.Min(
|
|
1286
|
-
this=col_func("x").
|
|
1094
|
+
this=col_func("x").column_expression,
|
|
1287
1095
|
)
|
|
1288
1096
|
],
|
|
1289
1097
|
)
|
|
@@ -1311,7 +1119,7 @@ def array_max_from_subquery(col: ColumnOrName) -> Column:
|
|
|
1311
1119
|
select = expression.Select(
|
|
1312
1120
|
expressions=[
|
|
1313
1121
|
expression.Max(
|
|
1314
|
-
this=col_func("x").
|
|
1122
|
+
this=col_func("x").column_expression,
|
|
1315
1123
|
)
|
|
1316
1124
|
],
|
|
1317
1125
|
)
|
|
@@ -1328,13 +1136,13 @@ def array_max_from_subquery(col: ColumnOrName) -> Column:
|
|
|
1328
1136
|
def sort_array_using_array_sort(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column:
|
|
1329
1137
|
col_func = get_func_from_session("col")
|
|
1330
1138
|
lit_func = get_func_from_session("lit")
|
|
1331
|
-
expressions = [col_func(col).
|
|
1139
|
+
expressions = [col_func(col).column_expression]
|
|
1332
1140
|
asc = asc if asc is not None else True
|
|
1333
|
-
expressions.append(lit_func(asc).
|
|
1141
|
+
expressions.append(lit_func(asc).column_expression)
|
|
1334
1142
|
if asc:
|
|
1335
|
-
expressions.append(lit_func(True).
|
|
1143
|
+
expressions.append(lit_func(True).column_expression)
|
|
1336
1144
|
else:
|
|
1337
|
-
expressions.append(lit_func(False).
|
|
1145
|
+
expressions.append(lit_func(False).column_expression)
|
|
1338
1146
|
|
|
1339
1147
|
return Column(
|
|
1340
1148
|
expression.Anonymous(
|
|
@@ -1350,7 +1158,7 @@ def flatten_using_array_flatten(col: ColumnOrName) -> Column:
|
|
|
1350
1158
|
return Column(
|
|
1351
1159
|
expression.Anonymous(
|
|
1352
1160
|
this="ARRAY_FLATTEN",
|
|
1353
|
-
expressions=[col_func(col).
|
|
1161
|
+
expressions=[col_func(col).column_expression],
|
|
1354
1162
|
)
|
|
1355
1163
|
)
|
|
1356
1164
|
|
|
@@ -1371,9 +1179,9 @@ def sequence_from_generate_series(
|
|
|
1371
1179
|
expression.Anonymous(
|
|
1372
1180
|
this="GENERATE_SERIES",
|
|
1373
1181
|
expressions=[
|
|
1374
|
-
col_func(start).
|
|
1375
|
-
col_func(stop).
|
|
1376
|
-
col_func(step).
|
|
1182
|
+
col_func(start).column_expression,
|
|
1183
|
+
col_func(stop).column_expression,
|
|
1184
|
+
col_func(step).column_expression if step else expression.Literal.number(1),
|
|
1377
1185
|
],
|
|
1378
1186
|
)
|
|
1379
1187
|
)
|
|
@@ -1388,9 +1196,9 @@ def sequence_from_generate_array(
|
|
|
1388
1196
|
expression.Anonymous(
|
|
1389
1197
|
this="GENERATE_ARRAY",
|
|
1390
1198
|
expressions=[
|
|
1391
|
-
col_func(start).
|
|
1392
|
-
col_func(stop).
|
|
1393
|
-
col_func(step).
|
|
1199
|
+
col_func(start).column_expression,
|
|
1200
|
+
col_func(stop).column_expression,
|
|
1201
|
+
col_func(step).column_expression if step else expression.Literal.number(1),
|
|
1394
1202
|
],
|
|
1395
1203
|
)
|
|
1396
1204
|
)
|
|
@@ -1407,11 +1215,11 @@ def sequence_from_array_generate_range(
|
|
|
1407
1215
|
expression.Anonymous(
|
|
1408
1216
|
this="ARRAY_GENERATE_RANGE",
|
|
1409
1217
|
expressions=[
|
|
1410
|
-
col_func(start).
|
|
1218
|
+
col_func(start).column_expression,
|
|
1411
1219
|
(
|
|
1412
1220
|
col_func(stop) + when(col_func(stop) > lit(0), lit(1)).otherwise(lit(-1))
|
|
1413
|
-
).
|
|
1414
|
-
col_func(step).
|
|
1221
|
+
).column_expression,
|
|
1222
|
+
col_func(step).column_expression if step else lit(1).column_expression,
|
|
1415
1223
|
],
|
|
1416
1224
|
)
|
|
1417
1225
|
)
|
|
@@ -1434,7 +1242,7 @@ def hex_casted_as_bytes(col: ColumnOrName) -> Column:
|
|
|
1434
1242
|
return Column(
|
|
1435
1243
|
expression.Anonymous(
|
|
1436
1244
|
this="TO_HEX",
|
|
1437
|
-
expressions=[col_func(col).cast("bytes", dialect="bigquery").
|
|
1245
|
+
expressions=[col_func(col).cast("bytes", dialect="bigquery").column_expression],
|
|
1438
1246
|
)
|
|
1439
1247
|
)
|
|
1440
1248
|
|
|
@@ -1445,7 +1253,7 @@ def hex_using_encode(col: ColumnOrName) -> Column:
|
|
|
1445
1253
|
return Column(
|
|
1446
1254
|
expression.Anonymous(
|
|
1447
1255
|
this="HEX_ENCODE",
|
|
1448
|
-
expressions=[col_func(col).
|
|
1256
|
+
expressions=[col_func(col).column_expression],
|
|
1449
1257
|
)
|
|
1450
1258
|
)
|
|
1451
1259
|
|
|
@@ -1456,7 +1264,7 @@ def unhex_hex_decode_str(col: ColumnOrName) -> Column:
|
|
|
1456
1264
|
return Column(
|
|
1457
1265
|
expression.Anonymous(
|
|
1458
1266
|
this="HEX_DECODE_STRING",
|
|
1459
|
-
expressions=[col_func(col).
|
|
1267
|
+
expressions=[col_func(col).column_expression],
|
|
1460
1268
|
)
|
|
1461
1269
|
)
|
|
1462
1270
|
|
|
@@ -1465,63 +1273,13 @@ def bit_length_from_length(col: ColumnOrName) -> Column:
|
|
|
1465
1273
|
lit = get_func_from_session("lit")
|
|
1466
1274
|
col_func = get_func_from_session("col")
|
|
1467
1275
|
|
|
1468
|
-
return Column(expression.Length(this=col_func(col).
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
def any_value_always_ignore_nulls(
|
|
1472
|
-
col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None
|
|
1473
|
-
) -> Column:
|
|
1474
|
-
from sqlframe.base.functions import any_value
|
|
1475
|
-
|
|
1476
|
-
if not ignoreNulls:
|
|
1477
|
-
logger.warning("Nulls are always ignored when using `ANY_VALUE` on this engine")
|
|
1478
|
-
return any_value(col)
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
def any_value_ignore_nulls_not_supported(
|
|
1482
|
-
col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None
|
|
1483
|
-
) -> Column:
|
|
1484
|
-
from sqlframe.base.functions import any_value
|
|
1485
|
-
|
|
1486
|
-
if ignoreNulls:
|
|
1487
|
-
logger.warning("Ignoring nulls is not supported in this dialect")
|
|
1488
|
-
return any_value(col)
|
|
1276
|
+
return Column(expression.Length(this=col_func(col).column_expression)) * lit(8)
|
|
1489
1277
|
|
|
1490
1278
|
|
|
1491
1279
|
def current_user_from_session_user() -> Column:
|
|
1492
1280
|
return Column(expression.Anonymous(this="SESSION_USER"))
|
|
1493
1281
|
|
|
1494
1282
|
|
|
1495
|
-
def extract_convert_to_var(field: ColumnOrName, source: ColumnOrName) -> Column:
|
|
1496
|
-
from sqlframe.base.functions import extract
|
|
1497
|
-
|
|
1498
|
-
field = expression.Var(this=Column.ensure_col(field).alias_or_name) # type: ignore
|
|
1499
|
-
return extract(field, source) # type: ignore
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
def left_cast_len(str: ColumnOrName, len: ColumnOrName) -> Column:
|
|
1503
|
-
from sqlframe.base.functions import left
|
|
1504
|
-
|
|
1505
|
-
len = Column.ensure_col(len).cast("integer")
|
|
1506
|
-
return left(str, len)
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
def right_cast_len(str: ColumnOrName, len: ColumnOrName) -> Column:
|
|
1510
|
-
from sqlframe.base.functions import right
|
|
1511
|
-
|
|
1512
|
-
len = Column.ensure_col(len).cast("integer")
|
|
1513
|
-
return right(str, len)
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
def position_cast_start(
|
|
1517
|
-
substr: ColumnOrName, str: ColumnOrName, start: t.Optional[ColumnOrName] = None
|
|
1518
|
-
) -> Column:
|
|
1519
|
-
from sqlframe.base.functions import position
|
|
1520
|
-
|
|
1521
|
-
start = Column.ensure_col(start).cast("integer") if start else None
|
|
1522
|
-
return position(substr, str, start)
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
1283
|
def position_as_strpos(
|
|
1526
1284
|
substr: ColumnOrName, str: ColumnOrName, start: t.Optional[ColumnOrName] = None
|
|
1527
1285
|
) -> Column:
|
|
@@ -1540,26 +1298,6 @@ def to_number_using_to_double(col: ColumnOrName, format: ColumnOrName) -> Column
|
|
|
1540
1298
|
return Column.invoke_anonymous_function(col, "TO_DOUBLE", format)
|
|
1541
1299
|
|
|
1542
1300
|
|
|
1543
|
-
def try_element_at_zero_based(col: ColumnOrName, extraction: ColumnOrName) -> Column:
|
|
1544
|
-
from sqlframe.base.functions import try_element_at
|
|
1545
|
-
|
|
1546
|
-
lit = get_func_from_session("lit")
|
|
1547
|
-
index = Column.ensure_col(extraction)
|
|
1548
|
-
if isinstance(index.expression, expression.Literal) and index.expression.is_number:
|
|
1549
|
-
index = index - lit(1)
|
|
1550
|
-
return try_element_at(col, index)
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
def to_unix_timestamp_include_default_format(
|
|
1554
|
-
timestamp: ColumnOrName,
|
|
1555
|
-
format: t.Optional[ColumnOrName] = None,
|
|
1556
|
-
) -> Column:
|
|
1557
|
-
from sqlframe.base.functions import to_unix_timestamp
|
|
1558
|
-
from sqlframe.base.session import _BaseSession
|
|
1559
|
-
|
|
1560
|
-
return to_unix_timestamp(timestamp, format or _BaseSession().default_time_format)
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
1301
|
def array_append_list_append(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1564
1302
|
lit = get_func_from_session("lit")
|
|
1565
1303
|
value = value if isinstance(value, Column) else lit(value)
|
|
@@ -1641,6 +1379,14 @@ def typeof_from_variant(col: ColumnOrName) -> Column:
|
|
|
1641
1379
|
return Column.invoke_anonymous_function(col, "TYPEOF")
|
|
1642
1380
|
|
|
1643
1381
|
|
|
1382
|
+
def typeof_bgutil(col: ColumnOrName) -> Column:
|
|
1383
|
+
return Column(
|
|
1384
|
+
expression.Anonymous(
|
|
1385
|
+
this="bqutil.fn.typeof", expressions=[Column.ensure_col(col).column_expression]
|
|
1386
|
+
)
|
|
1387
|
+
)
|
|
1388
|
+
|
|
1389
|
+
|
|
1644
1390
|
def regexp_replace_global_option(
|
|
1645
1391
|
str: ColumnOrName, pattern: str, replacement: str, position: t.Optional[int] = None
|
|
1646
1392
|
) -> Column:
|
|
@@ -1664,6 +1410,301 @@ def regexp_replace_global_option(
|
|
|
1664
1410
|
)
|
|
1665
1411
|
|
|
1666
1412
|
|
|
1413
|
+
def degrees_bgutil(col: ColumnOrName) -> Column:
|
|
1414
|
+
return Column(
|
|
1415
|
+
expression.Anonymous(
|
|
1416
|
+
this="bqutil.fn.degrees", expressions=[Column.ensure_col(col).column_expression]
|
|
1417
|
+
)
|
|
1418
|
+
)
|
|
1419
|
+
|
|
1420
|
+
|
|
1421
|
+
def radians_bgutil(col: ColumnOrName) -> Column:
|
|
1422
|
+
return Column(
|
|
1423
|
+
expression.Anonymous(
|
|
1424
|
+
this="bqutil.fn.radians", expressions=[Column.ensure_col(col).column_expression]
|
|
1425
|
+
)
|
|
1426
|
+
)
|
|
1427
|
+
|
|
1428
|
+
|
|
1429
|
+
def bround_bgutil(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
|
|
1430
|
+
from sqlframe.base.session import _BaseSession
|
|
1431
|
+
|
|
1432
|
+
lit = get_func_from_session("lit", _BaseSession())
|
|
1433
|
+
|
|
1434
|
+
expressions = [Column.ensure_col(col).cast("bignumeric").column_expression]
|
|
1435
|
+
if scale is not None:
|
|
1436
|
+
expressions.append(lit(scale).column_expression)
|
|
1437
|
+
return Column(
|
|
1438
|
+
expression.Anonymous(
|
|
1439
|
+
this="bqutil.fn.cw_round_half_even",
|
|
1440
|
+
expressions=expressions,
|
|
1441
|
+
)
|
|
1442
|
+
)
|
|
1443
|
+
|
|
1444
|
+
|
|
1445
|
+
def months_between_bgutils(
|
|
1446
|
+
date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None
|
|
1447
|
+
) -> Column:
|
|
1448
|
+
roundOff = True if roundOff is None else roundOff
|
|
1449
|
+
round = get_func_from_session("round")
|
|
1450
|
+
lit = get_func_from_session("lit")
|
|
1451
|
+
|
|
1452
|
+
value = Column(
|
|
1453
|
+
expression.Anonymous(
|
|
1454
|
+
this="bqutil.fn.cw_months_between",
|
|
1455
|
+
expressions=[
|
|
1456
|
+
Column.ensure_col(date1).cast("datetime").column_expression,
|
|
1457
|
+
Column.ensure_col(date2).cast("datetime").column_expression,
|
|
1458
|
+
],
|
|
1459
|
+
)
|
|
1460
|
+
)
|
|
1461
|
+
if roundOff:
|
|
1462
|
+
value = round(value, lit(8))
|
|
1463
|
+
return value
|
|
1464
|
+
|
|
1465
|
+
|
|
1466
|
+
def next_day_bgutil(col: ColumnOrName, dayOfWeek: str) -> Column:
|
|
1467
|
+
lit = get_func_from_session("lit")
|
|
1468
|
+
|
|
1469
|
+
return Column(
|
|
1470
|
+
expression.Anonymous(
|
|
1471
|
+
this="bqutil.fn.cw_next_day",
|
|
1472
|
+
expressions=[
|
|
1473
|
+
Column.ensure_col(col).cast("date").column_expression,
|
|
1474
|
+
lit(dayOfWeek).column_expression,
|
|
1475
|
+
],
|
|
1476
|
+
)
|
|
1477
|
+
)
|
|
1478
|
+
|
|
1479
|
+
|
|
1480
|
+
def from_unixtime_bigutil(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
1481
|
+
from sqlframe.base.session import _BaseSession
|
|
1482
|
+
|
|
1483
|
+
session: _BaseSession = _BaseSession()
|
|
1484
|
+
|
|
1485
|
+
expressions = [Column.ensure_col(col).column_expression]
|
|
1486
|
+
return Column(
|
|
1487
|
+
expression.Anonymous(
|
|
1488
|
+
this="FORMAT_TIMESTAMP",
|
|
1489
|
+
expressions=[
|
|
1490
|
+
session.format_time(format),
|
|
1491
|
+
Column(
|
|
1492
|
+
expression.Anonymous(this="TIMESTAMP_SECONDS", expressions=expressions)
|
|
1493
|
+
).column_expression,
|
|
1494
|
+
],
|
|
1495
|
+
)
|
|
1496
|
+
)
|
|
1497
|
+
|
|
1498
|
+
|
|
1499
|
+
def unix_timestamp_bgutil(
|
|
1500
|
+
timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None
|
|
1501
|
+
) -> Column:
|
|
1502
|
+
from sqlframe.base.session import _BaseSession
|
|
1503
|
+
|
|
1504
|
+
lit = get_func_from_session("lit")
|
|
1505
|
+
return Column(
|
|
1506
|
+
expression.Anonymous(
|
|
1507
|
+
this="UNIX_SECONDS",
|
|
1508
|
+
expressions=[
|
|
1509
|
+
expression.Anonymous(
|
|
1510
|
+
this="PARSE_TIMESTAMP",
|
|
1511
|
+
expressions=[
|
|
1512
|
+
_BaseSession().format_time(format),
|
|
1513
|
+
Column.ensure_col(timestamp).column_expression,
|
|
1514
|
+
lit("UTC").column_expression,
|
|
1515
|
+
],
|
|
1516
|
+
)
|
|
1517
|
+
],
|
|
1518
|
+
)
|
|
1519
|
+
)
|
|
1520
|
+
|
|
1521
|
+
|
|
1522
|
+
def format_number_bgutil(col: ColumnOrName, d: int) -> Column:
|
|
1523
|
+
round = get_func_from_session("round")
|
|
1524
|
+
lit = get_func_from_session("lit")
|
|
1525
|
+
|
|
1526
|
+
return Column(
|
|
1527
|
+
expression.Anonymous(
|
|
1528
|
+
this="FORMAT",
|
|
1529
|
+
expressions=[
|
|
1530
|
+
lit(f"%'.{d}f").column_expression,
|
|
1531
|
+
round(Column.ensure_col(col).cast("float"), d).column_expression,
|
|
1532
|
+
],
|
|
1533
|
+
)
|
|
1534
|
+
)
|
|
1535
|
+
|
|
1536
|
+
|
|
1537
|
+
def substring_index_bgutil(str: ColumnOrName, delim: str, count: int) -> Column:
|
|
1538
|
+
lit = get_func_from_session("lit")
|
|
1539
|
+
|
|
1540
|
+
return Column(
|
|
1541
|
+
expression.Anonymous(
|
|
1542
|
+
this="bqutil.fn.cw_substring_index",
|
|
1543
|
+
expressions=[
|
|
1544
|
+
Column.ensure_col(str).column_expression,
|
|
1545
|
+
lit(delim).column_expression,
|
|
1546
|
+
lit(count).column_expression,
|
|
1547
|
+
],
|
|
1548
|
+
)
|
|
1549
|
+
)
|
|
1550
|
+
|
|
1551
|
+
|
|
1552
|
+
def bin_bgutil(col: ColumnOrName) -> Column:
|
|
1553
|
+
return (
|
|
1554
|
+
Column(
|
|
1555
|
+
expression.Anonymous(
|
|
1556
|
+
this="bqutil.fn.to_binary",
|
|
1557
|
+
expressions=[Column.ensure_col(col).column_expression],
|
|
1558
|
+
)
|
|
1559
|
+
)
|
|
1560
|
+
.cast("int")
|
|
1561
|
+
.cast("string")
|
|
1562
|
+
)
|
|
1563
|
+
|
|
1564
|
+
|
|
1565
|
+
def slice_bgutil(
|
|
1566
|
+
x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int]
|
|
1567
|
+
) -> Column:
|
|
1568
|
+
lit = get_func_from_session("lit")
|
|
1569
|
+
|
|
1570
|
+
start_col = start if isinstance(start, Column) else lit(start)
|
|
1571
|
+
length_col = length if isinstance(length, Column) else lit(length)
|
|
1572
|
+
|
|
1573
|
+
subquery = (
|
|
1574
|
+
expression.select(
|
|
1575
|
+
expression.column("x"),
|
|
1576
|
+
)
|
|
1577
|
+
.from_(
|
|
1578
|
+
expression.Unnest(
|
|
1579
|
+
expressions=[Column.ensure_col(x).column_expression],
|
|
1580
|
+
alias=expression.TableAlias(
|
|
1581
|
+
columns=[expression.to_identifier("x")],
|
|
1582
|
+
),
|
|
1583
|
+
offset=expression.to_identifier("offset"),
|
|
1584
|
+
)
|
|
1585
|
+
)
|
|
1586
|
+
.where(
|
|
1587
|
+
expression.Between(
|
|
1588
|
+
this=expression.column("offset"),
|
|
1589
|
+
low=(start_col - lit(1)).column_expression,
|
|
1590
|
+
high=(start_col + length_col).column_expression,
|
|
1591
|
+
)
|
|
1592
|
+
)
|
|
1593
|
+
)
|
|
1594
|
+
|
|
1595
|
+
return Column(
|
|
1596
|
+
expression.Anonymous(
|
|
1597
|
+
this="ARRAY",
|
|
1598
|
+
expressions=[subquery],
|
|
1599
|
+
)
|
|
1600
|
+
)
|
|
1601
|
+
|
|
1602
|
+
|
|
1603
|
+
def array_position_bgutil(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1604
|
+
lit = get_func_from_session("lit")
|
|
1605
|
+
|
|
1606
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
1607
|
+
|
|
1608
|
+
return Column(
|
|
1609
|
+
expression.Coalesce(
|
|
1610
|
+
this=expression.Anonymous(
|
|
1611
|
+
this="bqutil.fn.find_in_set",
|
|
1612
|
+
expressions=[
|
|
1613
|
+
value_col.column_expression,
|
|
1614
|
+
expression.Anonymous(
|
|
1615
|
+
this="ARRAY_TO_STRING",
|
|
1616
|
+
expressions=[
|
|
1617
|
+
Column.ensure_col(col).column_expression,
|
|
1618
|
+
lit(",").column_expression,
|
|
1619
|
+
],
|
|
1620
|
+
),
|
|
1621
|
+
],
|
|
1622
|
+
),
|
|
1623
|
+
expressions=[lit(0).column_expression],
|
|
1624
|
+
)
|
|
1625
|
+
)
|
|
1626
|
+
|
|
1627
|
+
|
|
1628
|
+
def array_remove_bgutil(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1629
|
+
lit = get_func_from_session("lit")
|
|
1630
|
+
|
|
1631
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
1632
|
+
|
|
1633
|
+
filter_subquery = expression.select(
|
|
1634
|
+
"*",
|
|
1635
|
+
).from_(
|
|
1636
|
+
expression.Unnest(
|
|
1637
|
+
expressions=[Column.ensure_col(col).column_expression],
|
|
1638
|
+
alias=expression.TableAlias(
|
|
1639
|
+
columns=[expression.to_identifier("x")],
|
|
1640
|
+
),
|
|
1641
|
+
)
|
|
1642
|
+
)
|
|
1643
|
+
|
|
1644
|
+
agg_subquery = (
|
|
1645
|
+
expression.select(
|
|
1646
|
+
expression.Anonymous(
|
|
1647
|
+
this="ARRAY_AGG",
|
|
1648
|
+
expressions=[expression.column("x")],
|
|
1649
|
+
),
|
|
1650
|
+
)
|
|
1651
|
+
.from_(filter_subquery.subquery("t"))
|
|
1652
|
+
.where(
|
|
1653
|
+
expression.NEQ(
|
|
1654
|
+
this=expression.column("x", "t"),
|
|
1655
|
+
expression=value_col.column_expression,
|
|
1656
|
+
)
|
|
1657
|
+
)
|
|
1658
|
+
)
|
|
1659
|
+
|
|
1660
|
+
return Column(agg_subquery.subquery())
|
|
1661
|
+
|
|
1662
|
+
|
|
1663
|
+
def array_distinct_bgutil(col: ColumnOrName) -> Column:
|
|
1664
|
+
return Column(
|
|
1665
|
+
expression.Anonymous(
|
|
1666
|
+
this="bqutil.fn.cw_array_distinct",
|
|
1667
|
+
expressions=[Column.ensure_col(col).column_expression],
|
|
1668
|
+
)
|
|
1669
|
+
)
|
|
1670
|
+
|
|
1671
|
+
|
|
1672
|
+
def array_min_bgutil(col: ColumnOrName) -> Column:
|
|
1673
|
+
return Column(
|
|
1674
|
+
expression.Anonymous(
|
|
1675
|
+
this="bqutil.fn.cw_array_min",
|
|
1676
|
+
expressions=[Column.ensure_col(col).column_expression],
|
|
1677
|
+
)
|
|
1678
|
+
)
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
def array_max_bgutil(col: ColumnOrName) -> Column:
|
|
1682
|
+
return Column(
|
|
1683
|
+
expression.Anonymous(
|
|
1684
|
+
this="bqutil.fn.cw_array_max",
|
|
1685
|
+
expressions=[Column.ensure_col(col).column_expression],
|
|
1686
|
+
)
|
|
1687
|
+
)
|
|
1688
|
+
|
|
1689
|
+
|
|
1690
|
+
def sort_array_bgutil(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column:
|
|
1691
|
+
order = "ASC" if asc or asc is None else "DESC"
|
|
1692
|
+
subquery = (
|
|
1693
|
+
expression.select("x")
|
|
1694
|
+
.from_(
|
|
1695
|
+
expression.Unnest(
|
|
1696
|
+
expressions=[Column.ensure_col(col).column_expression],
|
|
1697
|
+
alias=expression.TableAlias(
|
|
1698
|
+
columns=[expression.to_identifier("x")],
|
|
1699
|
+
),
|
|
1700
|
+
)
|
|
1701
|
+
)
|
|
1702
|
+
.order_by(f"x {order}")
|
|
1703
|
+
)
|
|
1704
|
+
|
|
1705
|
+
return Column(expression.Anonymous(this="ARRAY", expressions=[subquery]))
|
|
1706
|
+
|
|
1707
|
+
|
|
1667
1708
|
def _is_string_using_typeof_varchar(col: ColumnOrName) -> Column:
|
|
1668
1709
|
typeof = get_func_from_session("typeof")
|
|
1669
1710
|
lit = get_func_from_session("lit")
|