sqlframe 3.10.0__py3-none-any.whl → 3.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/column.py +11 -8
- sqlframe/base/decorators.py +41 -2
- sqlframe/base/function_alternatives.py +445 -404
- sqlframe/base/functions.py +1100 -73
- sqlframe/base/session.py +34 -2
- sqlframe/bigquery/functions.py +1 -361
- sqlframe/bigquery/functions.pyi +63 -156
- sqlframe/bigquery/session.py +4 -0
- sqlframe/databricks/functions.py +0 -10
- sqlframe/databricks/functions.pyi +405 -413
- sqlframe/databricks/session.py +4 -0
- sqlframe/duckdb/functions.py +0 -40
- sqlframe/duckdb/functions.pyi +219 -216
- sqlframe/duckdb/session.py +4 -0
- sqlframe/postgres/functions.py +1 -60
- sqlframe/postgres/functions.pyi +197 -196
- sqlframe/postgres/session.py +4 -0
- sqlframe/redshift/functions.py +1 -4
- sqlframe/redshift/session.py +4 -0
- sqlframe/snowflake/functions.py +1 -55
- sqlframe/snowflake/functions.pyi +224 -220
- sqlframe/snowflake/session.py +4 -0
- sqlframe/spark/functions.py +0 -9
- sqlframe/spark/functions.pyi +411 -413
- sqlframe/spark/session.py +4 -0
- sqlframe/standalone/functions.py +1 -1
- sqlframe/standalone/session.py +4 -0
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/METADATA +4 -4
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/RECORD +33 -33
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.10.0.dist-info → sqlframe-3.11.0.dist-info}/top_level.txt +0 -0
    
        sqlframe/base/functions.py
    CHANGED
    
    | @@ -21,12 +21,18 @@ if t.TYPE_CHECKING: | |
| 21 21 | 
             
                from pyspark.sql.session import SparkContext
         | 
| 22 22 |  | 
| 23 23 | 
             
                from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
         | 
| 24 | 
            -
                from sqlframe.base.session import DF
         | 
| 24 | 
            +
                from sqlframe.base.session import DF, _BaseSession
         | 
| 25 25 | 
             
                from sqlframe.base.types import ArrayType, StructType
         | 
| 26 26 |  | 
| 27 27 | 
             
            logger = logging.getLogger(__name__)
         | 
| 28 28 |  | 
| 29 29 |  | 
| 30 | 
            +
            def _get_session() -> _BaseSession:
         | 
| 31 | 
            +
                from sqlframe.base.session import _BaseSession
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                return _BaseSession()
         | 
| 34 | 
            +
             | 
| 35 | 
            +
             | 
| 30 36 | 
             
            @meta()
         | 
| 31 37 | 
             
            def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
         | 
| 32 38 | 
             
                from sqlframe.base.session import _BaseSession
         | 
| @@ -68,7 +74,9 @@ def least(*cols: ColumnOrName) -> Column: | |
| 68 74 | 
             
            def count_distinct(col: ColumnOrName, *cols: ColumnOrName) -> Column:
         | 
| 69 75 | 
             
                columns = [Column.ensure_col(x) for x in [col] + list(cols)]
         | 
| 70 76 | 
             
                return Column(
         | 
| 71 | 
            -
                    expression.Count( | 
| 77 | 
            +
                    expression.Count(
         | 
| 78 | 
            +
                        this=expression.Distinct(expressions=[x.column_expression for x in columns])
         | 
| 79 | 
            +
                    )
         | 
| 72 80 | 
             
                )
         | 
| 73 81 |  | 
| 74 82 |  | 
| @@ -151,7 +159,9 @@ mean = avg | |
| 151 159 | 
             
            @meta()
         | 
| 152 160 | 
             
            def sumDistinct(col: ColumnOrName) -> Column:
         | 
| 153 161 | 
             
                return Column(
         | 
| 154 | 
            -
                    expression.Sum( | 
| 162 | 
            +
                    expression.Sum(
         | 
| 163 | 
            +
                        this=expression.Distinct(expressions=[Column.ensure_col(col).column_expression])
         | 
| 164 | 
            +
                    )
         | 
| 155 165 | 
             
                )
         | 
| 156 166 |  | 
| 157 167 |  | 
| @@ -237,6 +247,19 @@ def csc(col: ColumnOrName) -> Column: | |
| 237 247 |  | 
| 238 248 | 
             
            @meta()
         | 
| 239 249 | 
             
            def e() -> Column:
         | 
| 250 | 
            +
                from sqlframe.base.function_alternatives import e_literal
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                session = _get_session()
         | 
| 253 | 
            +
             | 
| 254 | 
            +
                if (
         | 
| 255 | 
            +
                    session._is_bigquery
         | 
| 256 | 
            +
                    or session._is_duckdb
         | 
| 257 | 
            +
                    or session._is_postgres
         | 
| 258 | 
            +
                    or session._is_redshift
         | 
| 259 | 
            +
                    or session._is_snowflake
         | 
| 260 | 
            +
                ):
         | 
| 261 | 
            +
                    return e_literal()
         | 
| 262 | 
            +
             | 
| 240 263 | 
             
                return Column(expression.Anonymous(this="e"))
         | 
| 241 264 |  | 
| 242 265 |  | 
| @@ -247,11 +270,31 @@ def exp(col: ColumnOrName) -> Column: | |
| 247 270 |  | 
| 248 271 | 
             
            @meta()
         | 
| 249 272 | 
             
            def expm1(col: ColumnOrName) -> Column:
         | 
| 273 | 
            +
                from sqlframe.base.function_alternatives import expm1_from_exp
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                session = _get_session()
         | 
| 276 | 
            +
             | 
| 277 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake:
         | 
| 278 | 
            +
                    return expm1_from_exp(col)
         | 
| 279 | 
            +
             | 
| 250 280 | 
             
                return Column.invoke_anonymous_function(col, "EXPM1")
         | 
| 251 281 |  | 
| 252 282 |  | 
| 253 283 | 
             
            @meta()
         | 
| 254 284 | 
             
            def factorial(col: ColumnOrName) -> Column:
         | 
| 285 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 286 | 
            +
                    factorial_ensure_int,
         | 
| 287 | 
            +
                    factorial_from_case_statement,
         | 
| 288 | 
            +
                )
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                session = _get_session()
         | 
| 291 | 
            +
             | 
| 292 | 
            +
                if session._is_duckdb:
         | 
| 293 | 
            +
                    return factorial_ensure_int(col)
         | 
| 294 | 
            +
             | 
| 295 | 
            +
                if session._is_bigquery:
         | 
| 296 | 
            +
                    return factorial_from_case_statement(col)
         | 
| 297 | 
            +
             | 
| 255 298 | 
             
                return Column.invoke_anonymous_function(col, "FACTORIAL")
         | 
| 256 299 |  | 
| 257 300 |  | 
| @@ -267,6 +310,13 @@ def log10(col: ColumnOrName) -> Column: | |
| 267 310 |  | 
| 268 311 | 
             
            @meta()
         | 
| 269 312 | 
             
            def log1p(col: ColumnOrName) -> Column:
         | 
| 313 | 
            +
                from sqlframe.base.function_alternatives import log1p_from_log
         | 
| 314 | 
            +
             | 
| 315 | 
            +
                session = _get_session()
         | 
| 316 | 
            +
             | 
| 317 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake:
         | 
| 318 | 
            +
                    return log1p_from_log(col)
         | 
| 319 | 
            +
             | 
| 270 320 | 
             
                return Column.invoke_anonymous_function(col, "LOG1P")
         | 
| 271 321 |  | 
| 272 322 |  | 
| @@ -286,6 +336,13 @@ def log(arg1: t.Union[ColumnOrName, float], arg2: t.Optional[ColumnOrName] = Non | |
| 286 336 |  | 
| 287 337 | 
             
            @meta()
         | 
| 288 338 | 
             
            def rint(col: ColumnOrName) -> Column:
         | 
| 339 | 
            +
                from sqlframe.base.function_alternatives import rint_from_round
         | 
| 340 | 
            +
             | 
| 341 | 
            +
                session = _get_session()
         | 
| 342 | 
            +
             | 
| 343 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake:
         | 
| 344 | 
            +
                    return rint_from_round(col)
         | 
| 345 | 
            +
             | 
| 289 346 | 
             
                return Column.invoke_anonymous_function(col, "RINT")
         | 
| 290 347 |  | 
| 291 348 |  | 
| @@ -321,6 +378,13 @@ def tanh(col: ColumnOrName) -> Column: | |
| 321 378 |  | 
| 322 379 | 
             
            @meta()
         | 
| 323 380 | 
             
            def degrees(col: ColumnOrName) -> Column:
         | 
| 381 | 
            +
                from sqlframe.base.function_alternatives import degrees_bgutil
         | 
| 382 | 
            +
             | 
| 383 | 
            +
                session = _get_session()
         | 
| 384 | 
            +
             | 
| 385 | 
            +
                if session._is_bigquery:
         | 
| 386 | 
            +
                    return degrees_bgutil(col)
         | 
| 387 | 
            +
             | 
| 324 388 | 
             
                return Column.invoke_anonymous_function(col, "DEGREES")
         | 
| 325 389 |  | 
| 326 390 |  | 
| @@ -329,6 +393,13 @@ toDegrees = degrees | |
| 329 393 |  | 
| 330 394 | 
             
            @meta()
         | 
| 331 395 | 
             
            def radians(col: ColumnOrName) -> Column:
         | 
| 396 | 
            +
                from sqlframe.base.function_alternatives import radians_bgutil
         | 
| 397 | 
            +
             | 
| 398 | 
            +
                session = _get_session()
         | 
| 399 | 
            +
             | 
| 400 | 
            +
                if session._is_bigquery:
         | 
| 401 | 
            +
                    return radians_bgutil(col)
         | 
| 402 | 
            +
             | 
| 332 403 | 
             
                return Column.invoke_anonymous_function(col, "RADIANS")
         | 
| 333 404 |  | 
| 334 405 |  | 
| @@ -342,6 +413,13 @@ def bitwiseNOT(col: ColumnOrName) -> Column: | |
| 342 413 |  | 
| 343 414 | 
             
            @meta()
         | 
| 344 415 | 
             
            def bitwise_not(col: ColumnOrName) -> Column:
         | 
| 416 | 
            +
                from sqlframe.base.function_alternatives import bitwise_not_from_bitnot
         | 
| 417 | 
            +
             | 
| 418 | 
            +
                session = _get_session()
         | 
| 419 | 
            +
             | 
| 420 | 
            +
                if session._is_snowflake:
         | 
| 421 | 
            +
                    return bitwise_not_from_bitnot(col)
         | 
| 422 | 
            +
             | 
| 345 423 | 
             
                return Column.invoke_expression_over_column(col, expression.BitwiseNot)
         | 
| 346 424 |  | 
| 347 425 |  | 
| @@ -397,11 +475,25 @@ def var_pop(col: ColumnOrName) -> Column: | |
| 397 475 |  | 
| 398 476 | 
             
            @meta(unsupported_engines=["bigquery", "postgres"])
         | 
| 399 477 | 
             
            def skewness(col: ColumnOrName) -> Column:
         | 
| 478 | 
            +
                from sqlframe.base.function_alternatives import skewness_from_skew
         | 
| 479 | 
            +
             | 
| 480 | 
            +
                session = _get_session()
         | 
| 481 | 
            +
             | 
| 482 | 
            +
                if session._is_snowflake:
         | 
| 483 | 
            +
                    return skewness_from_skew(col)
         | 
| 484 | 
            +
             | 
| 400 485 | 
             
                return Column.invoke_anonymous_function(col, "SKEWNESS")
         | 
| 401 486 |  | 
| 402 487 |  | 
| 403 488 | 
             
            @meta(unsupported_engines=["bigquery", "postgres"])
         | 
| 404 489 | 
             
            def kurtosis(col: ColumnOrName) -> Column:
         | 
| 490 | 
            +
                from sqlframe.base.function_alternatives import kurtosis_from_kurtosis_pop
         | 
| 491 | 
            +
             | 
| 492 | 
            +
                session = _get_session()
         | 
| 493 | 
            +
             | 
| 494 | 
            +
                if session._is_duckdb:
         | 
| 495 | 
            +
                    return kurtosis_from_kurtosis_pop(col)
         | 
| 496 | 
            +
             | 
| 405 497 | 
             
                return Column.invoke_anonymous_function(col, "KURTOSIS")
         | 
| 406 498 |  | 
| 407 499 |  | 
| @@ -412,6 +504,13 @@ def collect_list(col: ColumnOrName) -> Column: | |
| 412 504 |  | 
| 413 505 | 
             
            @meta()
         | 
| 414 506 | 
             
            def collect_set(col: ColumnOrName) -> Column:
         | 
| 507 | 
            +
                from sqlframe.base.function_alternatives import collect_set_from_list_distinct
         | 
| 508 | 
            +
             | 
| 509 | 
            +
                session = _get_session()
         | 
| 510 | 
            +
             | 
| 511 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_postgres:
         | 
| 512 | 
            +
                    return collect_set_from_list_distinct(col)
         | 
| 513 | 
            +
             | 
| 415 514 | 
             
                return Column.invoke_expression_over_column(col, expression.ArrayUniqueAgg)
         | 
| 416 515 |  | 
| 417 516 |  | 
| @@ -495,6 +594,11 @@ def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column: | |
| 495 594 |  | 
| 496 595 | 
             
            @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
         | 
| 497 596 | 
             
            def first(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
         | 
| 597 | 
            +
                session = _get_session()
         | 
| 598 | 
            +
             | 
| 599 | 
            +
                if session._is_duckdb:
         | 
| 600 | 
            +
                    ignorenulls = None
         | 
| 601 | 
            +
             | 
| 498 602 | 
             
                this = Column.invoke_expression_over_column(col, expression.First)
         | 
| 499 603 | 
             
                if ignorenulls:
         | 
| 500 604 | 
             
                    return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
         | 
| @@ -519,11 +623,25 @@ def input_file_name() -> Column: | |
| 519 623 |  | 
| 520 624 | 
             
            @meta()
         | 
| 521 625 | 
             
            def isnan(col: ColumnOrName) -> Column:
         | 
| 626 | 
            +
                from sqlframe.base.function_alternatives import isnan_using_equal
         | 
| 627 | 
            +
             | 
| 628 | 
            +
                session = _get_session()
         | 
| 629 | 
            +
             | 
| 630 | 
            +
                if session._is_postgres or session._is_snowflake:
         | 
| 631 | 
            +
                    return isnan_using_equal(col)
         | 
| 632 | 
            +
             | 
| 522 633 | 
             
                return Column.invoke_expression_over_column(col, expression.IsNan)
         | 
| 523 634 |  | 
| 524 635 |  | 
| 525 636 | 
             
            @meta()
         | 
| 526 637 | 
             
            def isnull(col: ColumnOrName) -> Column:
         | 
| 638 | 
            +
                from sqlframe.base.function_alternatives import isnull_using_equal
         | 
| 639 | 
            +
             | 
| 640 | 
            +
                session = _get_session()
         | 
| 641 | 
            +
             | 
| 642 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake:
         | 
| 643 | 
            +
                    return isnull_using_equal(col)
         | 
| 644 | 
            +
             | 
| 527 645 | 
             
                return Column.invoke_anonymous_function(col, "ISNULL")
         | 
| 528 646 |  | 
| 529 647 |  | 
| @@ -542,6 +660,13 @@ def monotonically_increasing_id() -> Column: | |
| 542 660 |  | 
| 543 661 | 
             
            @meta()
         | 
| 544 662 | 
             
            def nanvl(col1: ColumnOrName, col2: ColumnOrName) -> Column:
         | 
| 663 | 
            +
                from sqlframe.base.function_alternatives import nanvl_as_case
         | 
| 664 | 
            +
             | 
| 665 | 
            +
                session = _get_session()
         | 
| 666 | 
            +
             | 
| 667 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake:
         | 
| 668 | 
            +
                    return nanvl_as_case(col1, col2)
         | 
| 669 | 
            +
             | 
| 545 670 | 
             
                return Column.invoke_anonymous_function(col1, "NANVL", col2)
         | 
| 546 671 |  | 
| 547 672 |  | 
| @@ -551,6 +676,24 @@ def percentile_approx( | |
| 551 676 | 
             
                percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
         | 
| 552 677 | 
             
                accuracy: t.Optional[t.Union[ColumnOrLiteral, int]] = None,
         | 
| 553 678 | 
             
            ) -> Column:
         | 
| 679 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 680 | 
            +
                    percentile_approx_without_accuracy_and_max_array,
         | 
| 681 | 
            +
                    percentile_approx_without_accuracy_and_plural,
         | 
| 682 | 
            +
                )
         | 
| 683 | 
            +
             | 
| 684 | 
            +
                session = _get_session()
         | 
| 685 | 
            +
             | 
| 686 | 
            +
                if session._is_bigquery:
         | 
| 687 | 
            +
                    return percentile_approx_without_accuracy_and_plural(col, percentage, accuracy)  # type: ignore
         | 
| 688 | 
            +
             | 
| 689 | 
            +
                if session._is_duckdb:
         | 
| 690 | 
            +
                    if accuracy:
         | 
| 691 | 
            +
                        logger.warning("Accuracy is ignored since it is not supported in this dialect")
         | 
| 692 | 
            +
                    accuracy = None
         | 
| 693 | 
            +
             | 
| 694 | 
            +
                if session._is_snowflake and isinstance(percentage, (list, tuple)):
         | 
| 695 | 
            +
                    return percentile_approx_without_accuracy_and_max_array(col, percentage, accuracy)  # type: ignore
         | 
| 696 | 
            +
             | 
| 554 697 | 
             
                if accuracy:
         | 
| 555 698 | 
             
                    return Column.invoke_expression_over_column(
         | 
| 556 699 | 
             
                        col, expression.ApproxQuantile, quantile=lit(percentage), accuracy=accuracy
         | 
| @@ -566,6 +709,13 @@ def percentile( | |
| 566 709 | 
             
                percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
         | 
| 567 710 | 
             
                frequency: t.Optional[ColumnOrLiteral] = None,
         | 
| 568 711 | 
             
            ) -> Column:
         | 
| 712 | 
            +
                from sqlframe.base.function_alternatives import percentile_without_disc
         | 
| 713 | 
            +
             | 
| 714 | 
            +
                session = _get_session()
         | 
| 715 | 
            +
             | 
| 716 | 
            +
                if session._is_databricks or session._is_spark:
         | 
| 717 | 
            +
                    return percentile_without_disc(col, percentage, frequency)
         | 
| 718 | 
            +
             | 
| 569 719 | 
             
                if frequency:
         | 
| 570 720 | 
             
                    logger.warning("Frequency is not supported in all engines")
         | 
| 571 721 | 
             
                return Column.invoke_expression_over_column(
         | 
| @@ -575,6 +725,13 @@ def percentile( | |
| 575 725 |  | 
| 576 726 | 
             
            @meta()
         | 
| 577 727 | 
             
            def rand(seed: t.Optional[int] = None) -> Column:
         | 
| 728 | 
            +
                session = _get_session()
         | 
| 729 | 
            +
             | 
| 730 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_postgres:
         | 
| 731 | 
            +
                    if seed:
         | 
| 732 | 
            +
                        logger.warning("Seed is ignored since it is not supported in this dialect")
         | 
| 733 | 
            +
                    seed = None
         | 
| 734 | 
            +
             | 
| 578 735 | 
             
                if seed is not None:
         | 
| 579 736 | 
             
                    return Column.invoke_expression_over_column(None, expression.Rand, this=lit(seed))
         | 
| 580 737 | 
             
                return Column.invoke_expression_over_column(None, expression.Rand)
         | 
| @@ -589,6 +746,11 @@ def randn(seed: t.Optional[int] = None) -> Column: | |
| 589 746 |  | 
| 590 747 | 
             
            @meta()
         | 
| 591 748 | 
             
            def round(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
         | 
| 749 | 
            +
                session = _get_session()
         | 
| 750 | 
            +
             | 
| 751 | 
            +
                if session._is_postgres:
         | 
| 752 | 
            +
                    col = Column.ensure_col(col).cast("numeric")
         | 
| 753 | 
            +
             | 
| 592 754 | 
             
                if scale is not None:
         | 
| 593 755 | 
             
                    return Column.invoke_expression_over_column(col, expression.Round, decimals=scale)
         | 
| 594 756 | 
             
                return Column.invoke_expression_over_column(col, expression.Round)
         | 
| @@ -596,6 +758,19 @@ def round(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: | |
| 596 758 |  | 
| 597 759 | 
             
            @meta(unsupported_engines=["duckdb", "postgres"])
         | 
| 598 760 | 
             
            def bround(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
         | 
| 761 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 762 | 
            +
                    bround_bgutil,
         | 
| 763 | 
            +
                    bround_using_half_even,
         | 
| 764 | 
            +
                )
         | 
| 765 | 
            +
             | 
| 766 | 
            +
                session = _get_session()
         | 
| 767 | 
            +
             | 
| 768 | 
            +
                if session._is_bigquery:
         | 
| 769 | 
            +
                    return bround_bgutil(col, scale)
         | 
| 770 | 
            +
             | 
| 771 | 
            +
                if session._is_snowflake:
         | 
| 772 | 
            +
                    return bround_using_half_even(col, scale)
         | 
| 773 | 
            +
             | 
| 599 774 | 
             
                if scale is not None:
         | 
| 600 775 | 
             
                    return Column.invoke_anonymous_function(col, "BROUND", lit(scale))
         | 
| 601 776 | 
             
                return Column.invoke_anonymous_function(col, "BROUND")
         | 
| @@ -603,6 +778,13 @@ def bround(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: | |
| 603 778 |  | 
| 604 779 | 
             
            @meta()
         | 
| 605 780 | 
             
            def shiftleft(col: ColumnOrName, numBits: int) -> Column:
         | 
| 781 | 
            +
                from sqlframe.base.function_alternatives import shiftleft_from_bitshiftleft
         | 
| 782 | 
            +
             | 
| 783 | 
            +
                session = _get_session()
         | 
| 784 | 
            +
             | 
| 785 | 
            +
                if session._is_snowflake:
         | 
| 786 | 
            +
                    return shiftleft_from_bitshiftleft(col, numBits)
         | 
| 787 | 
            +
             | 
| 606 788 | 
             
                return Column.invoke_expression_over_column(
         | 
| 607 789 | 
             
                    col, expression.BitwiseLeftShift, expression=lit(numBits)
         | 
| 608 790 | 
             
                )
         | 
| @@ -613,6 +795,13 @@ shiftLeft = shiftleft | |
| 613 795 |  | 
| 614 796 | 
             
            @meta()
         | 
| 615 797 | 
             
            def shiftright(col: ColumnOrName, numBits: int) -> Column:
         | 
| 798 | 
            +
                from sqlframe.base.function_alternatives import shiftright_from_bitshiftright
         | 
| 799 | 
            +
             | 
| 800 | 
            +
                session = _get_session()
         | 
| 801 | 
            +
             | 
| 802 | 
            +
                if session._is_snowflake:
         | 
| 803 | 
            +
                    return shiftright_from_bitshiftright(col, numBits)
         | 
| 804 | 
            +
             | 
| 616 805 | 
             
                return Column.invoke_expression_over_column(
         | 
| 617 806 | 
             
                    col, expression.BitwiseRightShift, expression=lit(numBits)
         | 
| 618 807 | 
             
                )
         | 
| @@ -638,6 +827,13 @@ def expr(str: str) -> Column: | |
| 638 827 |  | 
| 639 828 | 
             
            @meta(unsupported_engines=["postgres"])
         | 
| 640 829 | 
             
            def struct(col: t.Union[ColumnOrName, t.Iterable[ColumnOrName]], *cols: ColumnOrName) -> Column:
         | 
| 830 | 
            +
                from sqlframe.base.function_alternatives import struct_with_eq
         | 
| 831 | 
            +
             | 
| 832 | 
            +
                session = _get_session()
         | 
| 833 | 
            +
             | 
| 834 | 
            +
                if session._is_snowflake:
         | 
| 835 | 
            +
                    return struct_with_eq(col, *cols)
         | 
| 836 | 
            +
             | 
| 641 837 | 
             
                columns = ensure_list(col) + list(cols)
         | 
| 642 838 | 
             
                return Column.invoke_expression_over_column(None, expression.Struct, expressions=columns)
         | 
| 643 839 |  | 
| @@ -699,7 +895,7 @@ def date_format(col: ColumnOrName, format: str) -> Column: | |
| 699 895 | 
             
                from sqlframe.base.session import _BaseSession
         | 
| 700 896 |  | 
| 701 897 | 
             
                return Column.invoke_expression_over_column(
         | 
| 702 | 
            -
                    Column(expression.TimeStrToTime(this=Column.ensure_col(col). | 
| 898 | 
            +
                    Column(expression.TimeStrToTime(this=Column.ensure_col(col).column_expression)),
         | 
| 703 899 | 
             
                    expression.TimeToStr,
         | 
| 704 900 | 
             
                    format=_BaseSession().format_time(format),
         | 
| 705 901 | 
             
                )
         | 
| @@ -707,77 +903,185 @@ def date_format(col: ColumnOrName, format: str) -> Column: | |
| 707 903 |  | 
| 708 904 | 
             
            @meta()
         | 
| 709 905 | 
             
            def year(col: ColumnOrName) -> Column:
         | 
| 906 | 
            +
                from sqlframe.base.function_alternatives import year_from_extract
         | 
| 907 | 
            +
             | 
| 908 | 
            +
                session = _get_session()
         | 
| 909 | 
            +
             | 
| 910 | 
            +
                if session._is_bigquery or session._is_postgres:
         | 
| 911 | 
            +
                    return year_from_extract(col)
         | 
| 912 | 
            +
             | 
| 710 913 | 
             
                return Column.invoke_expression_over_column(
         | 
| 711 | 
            -
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col). | 
| 914 | 
            +
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col).column_expression)),
         | 
| 915 | 
            +
                    expression.Year,
         | 
| 712 916 | 
             
                )
         | 
| 713 917 |  | 
| 714 918 |  | 
| 715 919 | 
             
            @meta()
         | 
| 716 920 | 
             
            def quarter(col: ColumnOrName) -> Column:
         | 
| 921 | 
            +
                from sqlframe.base.function_alternatives import quarter_from_extract
         | 
| 922 | 
            +
             | 
| 923 | 
            +
                session = _get_session()
         | 
| 924 | 
            +
             | 
| 925 | 
            +
                if session._is_bigquery or session._is_postgres:
         | 
| 926 | 
            +
                    return quarter_from_extract(col)
         | 
| 927 | 
            +
             | 
| 717 928 | 
             
                return Column(
         | 
| 718 929 | 
             
                    expression.Anonymous(
         | 
| 719 930 | 
             
                        this="QUARTER",
         | 
| 720 | 
            -
                        expressions=[expression.TsOrDsToDate(this=Column.ensure_col(col). | 
| 931 | 
            +
                        expressions=[expression.TsOrDsToDate(this=Column.ensure_col(col).column_expression)],
         | 
| 721 932 | 
             
                    )
         | 
| 722 933 | 
             
                )
         | 
| 723 934 |  | 
| 724 935 |  | 
| 725 936 | 
             
            @meta()
         | 
| 726 937 | 
             
            def month(col: ColumnOrName) -> Column:
         | 
| 938 | 
            +
                from sqlframe.base.function_alternatives import month_from_extract
         | 
| 939 | 
            +
             | 
| 940 | 
            +
                session = _get_session()
         | 
| 941 | 
            +
             | 
| 942 | 
            +
                if session._is_bigquery or session._is_postgres:
         | 
| 943 | 
            +
                    return month_from_extract(col)
         | 
| 944 | 
            +
             | 
| 727 945 | 
             
                return Column.invoke_expression_over_column(
         | 
| 728 | 
            -
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col). | 
| 946 | 
            +
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col).column_expression)),
         | 
| 947 | 
            +
                    expression.Month,
         | 
| 729 948 | 
             
                )
         | 
| 730 949 |  | 
| 731 950 |  | 
| 732 951 | 
             
            @meta()
         | 
| 733 952 | 
             
            def dayofweek(col: ColumnOrName) -> Column:
         | 
| 953 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 954 | 
            +
                    dayofweek_from_extract,
         | 
| 955 | 
            +
                    dayofweek_from_extract_with_isodow,
         | 
| 956 | 
            +
                )
         | 
| 957 | 
            +
             | 
| 958 | 
            +
                session = _get_session()
         | 
| 959 | 
            +
             | 
| 960 | 
            +
                if session._is_bigquery:
         | 
| 961 | 
            +
                    return dayofweek_from_extract(col)
         | 
| 962 | 
            +
             | 
| 963 | 
            +
                if session._is_postgres:
         | 
| 964 | 
            +
                    return dayofweek_from_extract_with_isodow(col)
         | 
| 965 | 
            +
             | 
| 734 966 | 
             
                return Column.invoke_expression_over_column(
         | 
| 735 | 
            -
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col). | 
| 967 | 
            +
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col).column_expression)),
         | 
| 736 968 | 
             
                    expression.DayOfWeek,
         | 
| 737 969 | 
             
                )
         | 
| 738 970 |  | 
| 739 971 |  | 
| 740 972 | 
             
            @meta()
         | 
| 741 973 | 
             
            def dayofmonth(col: ColumnOrName) -> Column:
         | 
| 974 | 
            +
                from sqlframe.base.function_alternatives import dayofmonth_from_extract_with_day
         | 
| 975 | 
            +
             | 
| 976 | 
            +
                session = _get_session()
         | 
| 977 | 
            +
             | 
| 978 | 
            +
                if session._is_bigquery or session._is_postgres:
         | 
| 979 | 
            +
                    return dayofmonth_from_extract_with_day(col)
         | 
| 980 | 
            +
             | 
| 742 981 | 
             
                return Column.invoke_expression_over_column(
         | 
| 743 | 
            -
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col). | 
| 982 | 
            +
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col).column_expression)),
         | 
| 744 983 | 
             
                    expression.DayOfMonth,
         | 
| 745 984 | 
             
                )
         | 
| 746 985 |  | 
| 747 986 |  | 
| 748 987 | 
             
            @meta()
         | 
| 749 988 | 
             
            def dayofyear(col: ColumnOrName) -> Column:
         | 
| 989 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 990 | 
            +
                    dayofyear_from_extract,
         | 
| 991 | 
            +
                    dayofyear_from_extract_doy,
         | 
| 992 | 
            +
                )
         | 
| 993 | 
            +
             | 
| 994 | 
            +
                session = _get_session()
         | 
| 995 | 
            +
             | 
| 996 | 
            +
                if session._is_bigquery:
         | 
| 997 | 
            +
                    return dayofyear_from_extract(col)
         | 
| 998 | 
            +
             | 
| 999 | 
            +
                if session._is_postgres:
         | 
| 1000 | 
            +
                    return dayofyear_from_extract_doy(col)
         | 
| 1001 | 
            +
             | 
| 750 1002 | 
             
                return Column.invoke_expression_over_column(
         | 
| 751 | 
            -
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col). | 
| 1003 | 
            +
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col).column_expression)),
         | 
| 752 1004 | 
             
                    expression.DayOfYear,
         | 
| 753 1005 | 
             
                )
         | 
| 754 1006 |  | 
| 755 1007 |  | 
| 756 1008 | 
             
            @meta()
         | 
| 757 1009 | 
             
            def hour(col: ColumnOrName) -> Column:
         | 
| 1010 | 
            +
                from sqlframe.base.function_alternatives import hour_from_extract
         | 
| 1011 | 
            +
             | 
| 1012 | 
            +
                session = _get_session()
         | 
| 1013 | 
            +
             | 
| 1014 | 
            +
                if session._is_bigquery or session._is_postgres:
         | 
| 1015 | 
            +
                    return hour_from_extract(col)
         | 
| 1016 | 
            +
             | 
| 758 1017 | 
             
                return Column.invoke_anonymous_function(col, "HOUR")
         | 
| 759 1018 |  | 
| 760 1019 |  | 
| 761 1020 | 
             
            @meta()
         | 
| 762 1021 | 
             
            def minute(col: ColumnOrName) -> Column:
         | 
| 1022 | 
            +
                from sqlframe.base.function_alternatives import minute_from_extract
         | 
| 1023 | 
            +
             | 
| 1024 | 
            +
                session = _get_session()
         | 
| 1025 | 
            +
             | 
| 1026 | 
            +
                if session._is_bigquery or session._is_postgres:
         | 
| 1027 | 
            +
                    return minute_from_extract(col)
         | 
| 1028 | 
            +
             | 
| 763 1029 | 
             
                return Column.invoke_anonymous_function(col, "MINUTE")
         | 
| 764 1030 |  | 
| 765 1031 |  | 
| 766 1032 | 
             
            @meta()
         | 
| 767 1033 | 
             
            def second(col: ColumnOrName) -> Column:
         | 
| 1034 | 
            +
                from sqlframe.base.function_alternatives import second_from_extract
         | 
| 1035 | 
            +
             | 
| 1036 | 
            +
                session = _get_session()
         | 
| 1037 | 
            +
             | 
| 1038 | 
            +
                if session._is_bigquery or session._is_postgres:
         | 
| 1039 | 
            +
                    return second_from_extract(col)
         | 
| 1040 | 
            +
             | 
| 768 1041 | 
             
                return Column.invoke_anonymous_function(col, "SECOND")
         | 
| 769 1042 |  | 
| 770 1043 |  | 
| 771 1044 | 
             
            @meta()
         | 
| 772 1045 | 
             
            def weekofyear(col: ColumnOrName) -> Column:
         | 
| 1046 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1047 | 
            +
                    weekofyear_from_extract_as_isoweek,
         | 
| 1048 | 
            +
                    weekofyear_from_extract_as_week,
         | 
| 1049 | 
            +
                )
         | 
| 1050 | 
            +
             | 
| 1051 | 
            +
                session = _get_session()
         | 
| 1052 | 
            +
             | 
| 1053 | 
            +
                if session._is_bigquery:
         | 
| 1054 | 
            +
                    return weekofyear_from_extract_as_isoweek(col)
         | 
| 1055 | 
            +
             | 
| 1056 | 
            +
                if session._is_postgres:
         | 
| 1057 | 
            +
                    return weekofyear_from_extract_as_week(col)
         | 
| 1058 | 
            +
             | 
| 773 1059 | 
             
                return Column.invoke_expression_over_column(
         | 
| 774 | 
            -
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col). | 
| 1060 | 
            +
                    Column(expression.TsOrDsToDate(this=Column.ensure_col(col).column_expression)),
         | 
| 775 1061 | 
             
                    expression.WeekOfYear,
         | 
| 776 1062 | 
             
                )
         | 
| 777 1063 |  | 
| 778 1064 |  | 
| 779 1065 | 
             
            @meta()
         | 
| 780 1066 | 
             
            def make_date(year: ColumnOrName, month: ColumnOrName, day: ColumnOrName) -> Column:
         | 
| 1067 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1068 | 
            +
                    make_date_date_from_parts,
         | 
| 1069 | 
            +
                    make_date_from_date_func,
         | 
| 1070 | 
            +
                )
         | 
| 1071 | 
            +
             | 
| 1072 | 
            +
                session = _get_session()
         | 
| 1073 | 
            +
             | 
| 1074 | 
            +
                if session._is_bigquery:
         | 
| 1075 | 
            +
                    return make_date_from_date_func(year, month, day)
         | 
| 1076 | 
            +
             | 
| 1077 | 
            +
                if session._is_postgres:
         | 
| 1078 | 
            +
                    year = Column.ensure_col(year).cast("integer")
         | 
| 1079 | 
            +
                    month = Column.ensure_col(month).cast("integer")
         | 
| 1080 | 
            +
                    day = Column.ensure_col(day).cast("integer")
         | 
| 1081 | 
            +
             | 
| 1082 | 
            +
                if session._is_snowflake:
         | 
| 1083 | 
            +
                    return make_date_date_from_parts(year, month, day)
         | 
| 1084 | 
            +
             | 
| 781 1085 | 
             
                return Column.invoke_anonymous_function(year, "MAKE_DATE", month, day)
         | 
| 782 1086 |  | 
| 783 1087 |  | 
| @@ -785,9 +1089,22 @@ def make_date(year: ColumnOrName, month: ColumnOrName, day: ColumnOrName) -> Col | |
| 785 1089 | 
             
            def date_add(
         | 
| 786 1090 | 
             
                col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True
         | 
| 787 1091 | 
             
            ) -> Column:
         | 
| 1092 | 
            +
                from sqlframe.base.function_alternatives import date_add_no_date_sub
         | 
| 1093 | 
            +
             | 
| 1094 | 
            +
                session = _get_session()
         | 
| 1095 | 
            +
                date_sub_func = get_func_from_session("date_sub")
         | 
| 1096 | 
            +
                original_days = None
         | 
| 1097 | 
            +
             | 
| 1098 | 
            +
                if session._is_postgres and not isinstance(days, int):
         | 
| 1099 | 
            +
                    original_days = days
         | 
| 1100 | 
            +
                    days = 1
         | 
| 1101 | 
            +
             | 
| 1102 | 
            +
                if session._is_snowflake:
         | 
| 1103 | 
            +
                    return date_add_no_date_sub(col, days, cast_as_date)
         | 
| 1104 | 
            +
             | 
| 788 1105 | 
             
                if isinstance(days, int):
         | 
| 789 1106 | 
             
                    if days < 0:
         | 
| 790 | 
            -
                        return  | 
| 1107 | 
            +
                        return date_sub_func(col, days * -1)
         | 
| 791 1108 | 
             
                    days = lit(days)
         | 
| 792 1109 | 
             
                result = Column.invoke_expression_over_column(
         | 
| 793 1110 | 
             
                    Column.ensure_col(col).cast("date"),
         | 
| @@ -795,6 +1112,10 @@ def date_add( | |
| 795 1112 | 
             
                    expression=days,
         | 
| 796 1113 | 
             
                    unit=expression.Var(this="DAY"),
         | 
| 797 1114 | 
             
                )
         | 
| 1115 | 
            +
             | 
| 1116 | 
            +
                if session._is_postgres and original_days is not None:
         | 
| 1117 | 
            +
                    result = result * Column.ensure_col(original_days)
         | 
| 1118 | 
            +
             | 
| 798 1119 | 
             
                if cast_as_date:
         | 
| 799 1120 | 
             
                    return result.cast("date")
         | 
| 800 1121 | 
             
                return result
         | 
| @@ -807,9 +1128,22 @@ def date_sub( | |
| 807 1128 | 
             
                """
         | 
| 808 1129 | 
             
                Non-standard argument: cast_as_date
         | 
| 809 1130 | 
             
                """
         | 
| 1131 | 
            +
                from sqlframe.base.function_alternatives import date_sub_by_date_add
         | 
| 1132 | 
            +
             | 
| 1133 | 
            +
                session = _get_session()
         | 
| 1134 | 
            +
                date_add_func = get_func_from_session("date_add")
         | 
| 1135 | 
            +
                original_days = None
         | 
| 1136 | 
            +
             | 
| 1137 | 
            +
                if session._is_postgres and not isinstance(days, int):
         | 
| 1138 | 
            +
                    original_days = days
         | 
| 1139 | 
            +
                    days = 1
         | 
| 1140 | 
            +
             | 
| 1141 | 
            +
                if session._is_snowflake:
         | 
| 1142 | 
            +
                    return date_sub_by_date_add(col, days, cast_as_date)
         | 
| 1143 | 
            +
             | 
| 810 1144 | 
             
                if isinstance(days, int):
         | 
| 811 1145 | 
             
                    if days < 0:
         | 
| 812 | 
            -
                        return  | 
| 1146 | 
            +
                        return date_add_func(col, days * -1)
         | 
| 813 1147 | 
             
                    days = lit(days)
         | 
| 814 1148 | 
             
                result = Column.invoke_expression_over_column(
         | 
| 815 1149 | 
             
                    Column.ensure_col(col).cast("date"),
         | 
| @@ -817,6 +1151,10 @@ def date_sub( | |
| 817 1151 | 
             
                    expression=days,
         | 
| 818 1152 | 
             
                    unit=expression.Var(this="DAY"),
         | 
| 819 1153 | 
             
                )
         | 
| 1154 | 
            +
             | 
| 1155 | 
            +
                if session._is_postgres and original_days is not None:
         | 
| 1156 | 
            +
                    result = result * Column.ensure_col(original_days)
         | 
| 1157 | 
            +
             | 
| 820 1158 | 
             
                if cast_as_date:
         | 
| 821 1159 | 
             
                    return result.cast("date")
         | 
| 822 1160 | 
             
                return result
         | 
| @@ -824,6 +1162,13 @@ def date_sub( | |
| 824 1162 |  | 
| 825 1163 | 
             
            @meta()
         | 
| 826 1164 | 
             
            def date_diff(end: ColumnOrName, start: ColumnOrName) -> Column:
         | 
| 1165 | 
            +
                from sqlframe.base.function_alternatives import date_diff_with_subtraction
         | 
| 1166 | 
            +
             | 
| 1167 | 
            +
                session = _get_session()
         | 
| 1168 | 
            +
             | 
| 1169 | 
            +
                if session._is_postgres:
         | 
| 1170 | 
            +
                    return date_diff_with_subtraction(end, start)
         | 
| 1171 | 
            +
             | 
| 827 1172 | 
             
                return Column.invoke_expression_over_column(
         | 
| 828 1173 | 
             
                    Column.ensure_col(end).cast("date"),
         | 
| 829 1174 | 
             
                    expression.DateDiff,
         | 
| @@ -838,28 +1183,51 @@ def add_months( | |
| 838 1183 | 
             
                """
         | 
| 839 1184 | 
             
                Non-standard argument: cast_as_date
         | 
| 840 1185 | 
             
                """
         | 
| 1186 | 
            +
                from sqlframe.base.function_alternatives import add_months_using_func
         | 
| 1187 | 
            +
             | 
| 1188 | 
            +
                lit = get_func_from_session("lit")
         | 
| 1189 | 
            +
                session = _get_session()
         | 
| 1190 | 
            +
                original_months = months
         | 
| 1191 | 
            +
             | 
| 1192 | 
            +
                if session._is_databricks or session._is_postgres or session._is_spark:
         | 
| 1193 | 
            +
                    months = 1
         | 
| 1194 | 
            +
             | 
| 1195 | 
            +
                if session._is_snowflake:
         | 
| 1196 | 
            +
                    return add_months_using_func(start, months, cast_as_date)
         | 
| 1197 | 
            +
             | 
| 841 1198 | 
             
                start_col = Column(start).cast("date")
         | 
| 842 1199 |  | 
| 843 1200 | 
             
                if isinstance(months, int):
         | 
| 844 1201 | 
             
                    if months < 0:
         | 
| 845 1202 | 
             
                        end_col = Column(
         | 
| 846 1203 | 
             
                            expression.Interval(
         | 
| 847 | 
            -
                                this=lit(months * -1). | 
| 1204 | 
            +
                                this=lit(months * -1).column_expression, unit=expression.Var(this="MONTH")
         | 
| 848 1205 | 
             
                            )
         | 
| 849 1206 | 
             
                        )
         | 
| 850 1207 | 
             
                        result = start_col - end_col
         | 
| 851 1208 | 
             
                    else:
         | 
| 852 1209 | 
             
                        end_col = Column(
         | 
| 853 | 
            -
                            expression.Interval( | 
| 1210 | 
            +
                            expression.Interval(
         | 
| 1211 | 
            +
                                this=lit(months).column_expression, unit=expression.Var(this="MONTH")
         | 
| 1212 | 
            +
                            )
         | 
| 854 1213 | 
             
                        )
         | 
| 855 1214 | 
             
                        result = start_col + end_col
         | 
| 856 1215 | 
             
                else:
         | 
| 857 1216 | 
             
                    end_col = Column(
         | 
| 858 1217 | 
             
                        expression.Interval(
         | 
| 859 | 
            -
                            this=Column.ensure_col(months). | 
| 1218 | 
            +
                            this=Column.ensure_col(months).column_expression, unit=expression.Var(this="MONTH")
         | 
| 860 1219 | 
             
                        )
         | 
| 861 1220 | 
             
                    )
         | 
| 862 1221 | 
             
                    result = start_col + end_col
         | 
| 1222 | 
            +
             | 
| 1223 | 
            +
                if session._is_databricks or session._is_postgres or session._is_spark:
         | 
| 1224 | 
            +
                    multiple_value = (
         | 
| 1225 | 
            +
                        lit(original_months)
         | 
| 1226 | 
            +
                        if isinstance(original_months, int)
         | 
| 1227 | 
            +
                        else Column.ensure_col(original_months)
         | 
| 1228 | 
            +
                    )
         | 
| 1229 | 
            +
                    result = Column.ensure_col(result.column_expression.unnest()) * multiple_value
         | 
| 1230 | 
            +
             | 
| 863 1231 | 
             
                if cast_as_date:
         | 
| 864 1232 | 
             
                    return result.cast("date")
         | 
| 865 1233 | 
             
                return result
         | 
| @@ -869,35 +1237,79 @@ def add_months( | |
| 869 1237 | 
             
            def months_between(
         | 
| 870 1238 | 
             
                date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None
         | 
| 871 1239 | 
             
            ) -> Column:
         | 
| 1240 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1241 | 
            +
                    months_between_bgutils,
         | 
| 1242 | 
            +
                    months_between_from_age_and_extract,
         | 
| 1243 | 
            +
                )
         | 
| 1244 | 
            +
             | 
| 1245 | 
            +
                session = _get_session()
         | 
| 1246 | 
            +
                original_roundoff = roundOff
         | 
| 1247 | 
            +
             | 
| 1248 | 
            +
                if session._is_bigquery:
         | 
| 1249 | 
            +
                    return months_between_bgutils(date1, date2, roundOff)
         | 
| 1250 | 
            +
             | 
| 1251 | 
            +
                if session._is_postgres:
         | 
| 1252 | 
            +
                    return months_between_from_age_and_extract(date1, date2, roundOff)
         | 
| 1253 | 
            +
             | 
| 1254 | 
            +
                if session._is_snowflake:
         | 
| 1255 | 
            +
                    date1 = Column.ensure_col(date1).cast("date")
         | 
| 1256 | 
            +
                    date2 = Column.ensure_col(date2).cast("date")
         | 
| 1257 | 
            +
                    roundOff = None
         | 
| 1258 | 
            +
             | 
| 872 1259 | 
             
                if roundOff is None:
         | 
| 873 | 
            -
                     | 
| 1260 | 
            +
                    result = Column.invoke_expression_over_column(
         | 
| 874 1261 | 
             
                        date1, expression.MonthsBetween, expression=date2
         | 
| 875 1262 | 
             
                    )
         | 
| 1263 | 
            +
                else:
         | 
| 1264 | 
            +
                    result = Column.invoke_expression_over_column(
         | 
| 1265 | 
            +
                        date1, expression.MonthsBetween, expression=date2, roundoff=lit(roundOff)
         | 
| 1266 | 
            +
                    )
         | 
| 876 1267 |  | 
| 877 | 
            -
                 | 
| 878 | 
            -
                     | 
| 879 | 
            -
                 | 
| 1268 | 
            +
                if session._is_snowflake and original_roundoff:
         | 
| 1269 | 
            +
                    return result.cast("bigint")
         | 
| 1270 | 
            +
                return result
         | 
| 880 1271 |  | 
| 881 1272 |  | 
| 882 1273 | 
             
            @meta()
         | 
| 883 1274 | 
             
            def to_date(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
         | 
| 884 | 
            -
                 | 
| 1275 | 
            +
                session = _get_session()
         | 
| 1276 | 
            +
             | 
| 1277 | 
            +
                if session._is_bigquery:
         | 
| 1278 | 
            +
                    to_timestamp_func = get_func_from_session("to_timestamp")
         | 
| 1279 | 
            +
                    col = to_timestamp_func(col, format)
         | 
| 1280 | 
            +
             | 
| 1281 | 
            +
                if session._is_snowflake:
         | 
| 1282 | 
            +
                    format = format or session.default_time_format
         | 
| 885 1283 |  | 
| 886 | 
            -
                # format = lit(format or spark_default_date_format())
         | 
| 887 1284 | 
             
                if format is not None:
         | 
| 888 1285 | 
             
                    return Column.invoke_expression_over_column(
         | 
| 889 | 
            -
                        col, expression.TsOrDsToDate, format= | 
| 1286 | 
            +
                        col, expression.TsOrDsToDate, format=session.format_time(format)
         | 
| 890 1287 | 
             
                    )
         | 
| 891 1288 | 
             
                return Column.invoke_expression_over_column(col, expression.TsOrDsToDate)
         | 
| 892 1289 |  | 
| 893 1290 |  | 
| 894 1291 | 
             
            @meta()
         | 
| 895 1292 | 
             
            def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
         | 
| 896 | 
            -
                from sqlframe.base. | 
| 1293 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1294 | 
            +
                    to_timestamp_just_timestamp,
         | 
| 1295 | 
            +
                    to_timestamp_tz,
         | 
| 1296 | 
            +
                    to_timestamp_with_time_zone,
         | 
| 1297 | 
            +
                )
         | 
| 1298 | 
            +
             | 
| 1299 | 
            +
                session = _get_session()
         | 
| 1300 | 
            +
             | 
| 1301 | 
            +
                if session._is_duckdb:
         | 
| 1302 | 
            +
                    return to_timestamp_tz(col, format)
         | 
| 1303 | 
            +
             | 
| 1304 | 
            +
                if session._is_bigquery:
         | 
| 1305 | 
            +
                    return to_timestamp_just_timestamp(col, format)
         | 
| 1306 | 
            +
             | 
| 1307 | 
            +
                if session._is_postgres:
         | 
| 1308 | 
            +
                    return to_timestamp_with_time_zone(col, format)
         | 
| 897 1309 |  | 
| 898 1310 | 
             
                if format is not None:
         | 
| 899 1311 | 
             
                    return Column.invoke_expression_over_column(
         | 
| 900 | 
            -
                        col, expression.StrToTime, format= | 
| 1312 | 
            +
                        col, expression.StrToTime, format=session.format_time(format)
         | 
| 901 1313 | 
             
                    )
         | 
| 902 1314 |  | 
| 903 1315 | 
             
                return Column.ensure_col(col).cast("timestampltz")
         | 
| @@ -919,22 +1331,45 @@ def date_trunc(format: str, timestamp: ColumnOrName) -> Column: | |
| 919 1331 |  | 
| 920 1332 | 
             
            @meta(unsupported_engines=["duckdb", "postgres"])
         | 
| 921 1333 | 
             
            def next_day(col: ColumnOrName, dayOfWeek: str) -> Column:
         | 
| 1334 | 
            +
                from sqlframe.base.function_alternatives import next_day_bgutil
         | 
| 1335 | 
            +
             | 
| 1336 | 
            +
                session = _get_session()
         | 
| 1337 | 
            +
             | 
| 1338 | 
            +
                if session._is_bigquery:
         | 
| 1339 | 
            +
                    return next_day_bgutil(col, dayOfWeek)
         | 
| 1340 | 
            +
             | 
| 922 1341 | 
             
                return Column.invoke_anonymous_function(col, "NEXT_DAY", lit(dayOfWeek))
         | 
| 923 1342 |  | 
| 924 1343 |  | 
| 925 1344 | 
             
            @meta()
         | 
| 926 1345 | 
             
            def last_day(col: ColumnOrName) -> Column:
         | 
| 1346 | 
            +
                session = _get_session()
         | 
| 1347 | 
            +
             | 
| 1348 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake:
         | 
| 1349 | 
            +
                    col = Column.ensure_col(col).cast("date")
         | 
| 1350 | 
            +
             | 
| 927 1351 | 
             
                return Column.invoke_expression_over_column(col, expression.LastDay)
         | 
| 928 1352 |  | 
| 929 1353 |  | 
| 930 1354 | 
             
            @meta()
         | 
| 931 1355 | 
             
            def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
         | 
| 932 | 
            -
                from sqlframe.base. | 
| 1356 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1357 | 
            +
                    from_unixtime_bigutil,
         | 
| 1358 | 
            +
                    from_unixtime_from_timestamp,
         | 
| 1359 | 
            +
                )
         | 
| 1360 | 
            +
             | 
| 1361 | 
            +
                session = _get_session()
         | 
| 1362 | 
            +
             | 
| 1363 | 
            +
                if session._is_bigquery:
         | 
| 1364 | 
            +
                    return from_unixtime_bigutil(col, format)
         | 
| 1365 | 
            +
             | 
| 1366 | 
            +
                if session._is_postgres or session._is_snowflake:
         | 
| 1367 | 
            +
                    return from_unixtime_from_timestamp(col, format)
         | 
| 933 1368 |  | 
| 934 1369 | 
             
                return Column.invoke_expression_over_column(
         | 
| 935 1370 | 
             
                    col,
         | 
| 936 1371 | 
             
                    expression.UnixToStr,
         | 
| 937 | 
            -
                    format= | 
| 1372 | 
            +
                    format=session.format_time(format),
         | 
| 938 1373 | 
             
                )
         | 
| 939 1374 |  | 
| 940 1375 |  | 
| @@ -942,12 +1377,23 @@ def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column: | |
| 942 1377 | 
             
            def unix_timestamp(
         | 
| 943 1378 | 
             
                timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None
         | 
| 944 1379 | 
             
            ) -> Column:
         | 
| 945 | 
            -
                from sqlframe.base. | 
| 1380 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1381 | 
            +
                    unix_timestamp_bgutil,
         | 
| 1382 | 
            +
                    unix_timestamp_from_extract,
         | 
| 1383 | 
            +
                )
         | 
| 1384 | 
            +
             | 
| 1385 | 
            +
                session = _get_session()
         | 
| 1386 | 
            +
             | 
| 1387 | 
            +
                if session._is_bigquery:
         | 
| 1388 | 
            +
                    return unix_timestamp_bgutil(timestamp, format)
         | 
| 1389 | 
            +
             | 
| 1390 | 
            +
                if session._is_postgres or session._is_snowflake:
         | 
| 1391 | 
            +
                    return unix_timestamp_from_extract(timestamp, format)
         | 
| 946 1392 |  | 
| 947 1393 | 
             
                return Column.invoke_expression_over_column(
         | 
| 948 1394 | 
             
                    timestamp,
         | 
| 949 1395 | 
             
                    expression.StrToUnix,
         | 
| 950 | 
            -
                    format= | 
| 1396 | 
            +
                    format=session.format_time(format),
         | 
| 951 1397 | 
             
                ).cast("bigint")
         | 
| 952 1398 |  | 
| 953 1399 |  | 
| @@ -1010,6 +1456,13 @@ def md5(col: ColumnOrName) -> Column: | |
| 1010 1456 |  | 
| 1011 1457 | 
             
            @meta(unsupported_engines=["duckdb", "postgres"])
         | 
| 1012 1458 | 
             
            def sha1(col: ColumnOrName) -> Column:
         | 
| 1459 | 
            +
                from sqlframe.base.function_alternatives import sha1_force_sha1_and_to_hex
         | 
| 1460 | 
            +
             | 
| 1461 | 
            +
                session = _get_session()
         | 
| 1462 | 
            +
             | 
| 1463 | 
            +
                if session._is_bigquery:
         | 
| 1464 | 
            +
                    return sha1_force_sha1_and_to_hex(col)
         | 
| 1465 | 
            +
             | 
| 1013 1466 | 
             
                return Column.invoke_expression_over_column(col, expression.SHA)
         | 
| 1014 1467 |  | 
| 1015 1468 |  | 
| @@ -1020,6 +1473,13 @@ def sha2(col: ColumnOrName, numBits: int) -> Column: | |
| 1020 1473 |  | 
| 1021 1474 | 
             
            @meta(unsupported_engines=["postgres"])
         | 
| 1022 1475 | 
             
            def hash(*cols: ColumnOrName) -> Column:
         | 
| 1476 | 
            +
                from sqlframe.base.function_alternatives import hash_from_farm_fingerprint
         | 
| 1477 | 
            +
             | 
| 1478 | 
            +
                session = _get_session()
         | 
| 1479 | 
            +
             | 
| 1480 | 
            +
                if session._is_bigquery:
         | 
| 1481 | 
            +
                    return hash_from_farm_fingerprint(*cols)
         | 
| 1482 | 
            +
             | 
| 1023 1483 | 
             
                args = cols[1:] if len(cols) > 1 else []
         | 
| 1024 1484 | 
             
                return Column.invoke_anonymous_function(cols[0], "HASH", *args)
         | 
| 1025 1485 |  | 
| @@ -1061,11 +1521,41 @@ def ascii(col: ColumnOrName) -> Column: | |
| 1061 1521 |  | 
| 1062 1522 | 
             
            @meta()
         | 
| 1063 1523 | 
             
            def base64(col: ColumnOrName) -> Column:
         | 
| 1524 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1525 | 
            +
                    base64_from_base64_encode,
         | 
| 1526 | 
            +
                    base64_from_blob,
         | 
| 1527 | 
            +
                    base64_from_encode,
         | 
| 1528 | 
            +
                )
         | 
| 1529 | 
            +
             | 
| 1530 | 
            +
                session = _get_session()
         | 
| 1531 | 
            +
             | 
| 1532 | 
            +
                if session._is_bigquery or session._is_duckdb:
         | 
| 1533 | 
            +
                    return base64_from_blob(col)
         | 
| 1534 | 
            +
             | 
| 1535 | 
            +
                if session._is_postgres:
         | 
| 1536 | 
            +
                    return base64_from_encode(col)
         | 
| 1537 | 
            +
             | 
| 1538 | 
            +
                if session._is_snowflake:
         | 
| 1539 | 
            +
                    return base64_from_base64_encode(col)
         | 
| 1540 | 
            +
             | 
| 1064 1541 | 
             
                return Column.invoke_expression_over_column(col, expression.ToBase64)
         | 
| 1065 1542 |  | 
| 1066 1543 |  | 
| 1067 1544 | 
             
            @meta()
         | 
| 1068 1545 | 
             
            def unbase64(col: ColumnOrName) -> Column:
         | 
| 1546 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1547 | 
            +
                    unbase64_from_base64_decode_string,
         | 
| 1548 | 
            +
                    unbase64_from_decode,
         | 
| 1549 | 
            +
                )
         | 
| 1550 | 
            +
             | 
| 1551 | 
            +
                session = _get_session()
         | 
| 1552 | 
            +
             | 
| 1553 | 
            +
                if session._is_postgres:
         | 
| 1554 | 
            +
                    return unbase64_from_decode(col)
         | 
| 1555 | 
            +
             | 
| 1556 | 
            +
                if session._is_snowflake:
         | 
| 1557 | 
            +
                    return unbase64_from_base64_decode_string(col)
         | 
| 1558 | 
            +
             | 
| 1069 1559 | 
             
                return Column.invoke_expression_over_column(col, expression.FromBase64)
         | 
| 1070 1560 |  | 
| 1071 1561 |  | 
| @@ -1086,6 +1576,13 @@ def trim(col: ColumnOrName) -> Column: | |
| 1086 1576 |  | 
| 1087 1577 | 
             
            @meta()
         | 
| 1088 1578 | 
             
            def concat_ws(sep: str, *cols: ColumnOrName) -> Column:
         | 
| 1579 | 
            +
                from sqlframe.base.function_alternatives import concat_ws_from_array_to_string
         | 
| 1580 | 
            +
             | 
| 1581 | 
            +
                session = _get_session()
         | 
| 1582 | 
            +
             | 
| 1583 | 
            +
                if session._is_bigquery:
         | 
| 1584 | 
            +
                    return concat_ws_from_array_to_string(sep, *cols)
         | 
| 1585 | 
            +
             | 
| 1089 1586 | 
             
                return Column.invoke_expression_over_column(
         | 
| 1090 1587 | 
             
                    None, expression.ConcatWs, expressions=[lit(sep)] + list(cols)
         | 
| 1091 1588 | 
             
                )
         | 
| @@ -1093,6 +1590,19 @@ def concat_ws(sep: str, *cols: ColumnOrName) -> Column: | |
| 1093 1590 |  | 
| 1094 1591 | 
             
            @meta(unsupported_engines=["bigquery", "snowflake"])
         | 
| 1095 1592 | 
             
            def decode(col: ColumnOrName, charset: str) -> Column:
         | 
| 1593 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1594 | 
            +
                    decode_from_blob,
         | 
| 1595 | 
            +
                    decode_from_convert_from,
         | 
| 1596 | 
            +
                )
         | 
| 1597 | 
            +
             | 
| 1598 | 
            +
                session = _get_session()
         | 
| 1599 | 
            +
             | 
| 1600 | 
            +
                if session._is_duckdb:
         | 
| 1601 | 
            +
                    return decode_from_blob(col, charset)
         | 
| 1602 | 
            +
             | 
| 1603 | 
            +
                if session._is_postgres:
         | 
| 1604 | 
            +
                    return decode_from_convert_from(col, charset)
         | 
| 1605 | 
            +
             | 
| 1096 1606 | 
             
                return Column.invoke_expression_over_column(
         | 
| 1097 1607 | 
             
                    col, expression.Decode, charset=expression.Literal.string(charset)
         | 
| 1098 1608 | 
             
                )
         | 
| @@ -1100,6 +1610,13 @@ def decode(col: ColumnOrName, charset: str) -> Column: | |
| 1100 1610 |  | 
| 1101 1611 | 
             
            @meta(unsupported_engines=["bigquery", "snowflake"])
         | 
| 1102 1612 | 
             
            def encode(col: ColumnOrName, charset: str) -> Column:
         | 
| 1613 | 
            +
                from sqlframe.base.function_alternatives import encode_from_convert_to
         | 
| 1614 | 
            +
             | 
| 1615 | 
            +
                session = _get_session()
         | 
| 1616 | 
            +
             | 
| 1617 | 
            +
                if session._is_postgres:
         | 
| 1618 | 
            +
                    return encode_from_convert_to(col, charset)
         | 
| 1619 | 
            +
             | 
| 1103 1620 | 
             
                return Column.invoke_expression_over_column(
         | 
| 1104 1621 | 
             
                    col, expression.Encode, charset=expression.Literal.string(charset)
         | 
| 1105 1622 | 
             
                )
         | 
| @@ -1107,11 +1624,37 @@ def encode(col: ColumnOrName, charset: str) -> Column: | |
| 1107 1624 |  | 
| 1108 1625 | 
             
            @meta(unsupported_engines="duckdb")
         | 
| 1109 1626 | 
             
            def format_number(col: ColumnOrName, d: int) -> Column:
         | 
| 1627 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1628 | 
            +
                    format_number_bgutil,
         | 
| 1629 | 
            +
                    format_number_from_to_char,
         | 
| 1630 | 
            +
                )
         | 
| 1631 | 
            +
             | 
| 1632 | 
            +
                session = _get_session()
         | 
| 1633 | 
            +
             | 
| 1634 | 
            +
                if session._is_bigquery:
         | 
| 1635 | 
            +
                    return format_number_bgutil(col, d)
         | 
| 1636 | 
            +
             | 
| 1637 | 
            +
                if session._is_postgres or session._is_snowflake:
         | 
| 1638 | 
            +
                    return format_number_from_to_char(col, d)
         | 
| 1639 | 
            +
             | 
| 1110 1640 | 
             
                return Column.invoke_anonymous_function(col, "FORMAT_NUMBER", lit(d))
         | 
| 1111 1641 |  | 
| 1112 1642 |  | 
| 1113 1643 | 
             
            @meta(unsupported_engines="snowflake")
         | 
| 1114 1644 | 
             
            def format_string(format: str, *cols: ColumnOrName) -> Column:
         | 
| 1645 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1646 | 
            +
                    format_string_with_format,
         | 
| 1647 | 
            +
                    format_string_with_pipes,
         | 
| 1648 | 
            +
                )
         | 
| 1649 | 
            +
             | 
| 1650 | 
            +
                session = _get_session()
         | 
| 1651 | 
            +
             | 
| 1652 | 
            +
                if session._is_duckdb:
         | 
| 1653 | 
            +
                    return format_string_with_pipes(format, *cols)
         | 
| 1654 | 
            +
             | 
| 1655 | 
            +
                if session._is_bigquery or session._is_postgres:
         | 
| 1656 | 
            +
                    return format_string_with_format(format, *cols)
         | 
| 1657 | 
            +
             | 
| 1115 1658 | 
             
                format_col = lit(format)
         | 
| 1116 1659 | 
             
                columns = [Column.ensure_col(x) for x in cols]
         | 
| 1117 1660 | 
             
                return Column.invoke_anonymous_function(format_col, "FORMAT_STRING", *columns)
         | 
| @@ -1119,6 +1662,13 @@ def format_string(format: str, *cols: ColumnOrName) -> Column: | |
| 1119 1662 |  | 
| 1120 1663 | 
             
            @meta()
         | 
| 1121 1664 | 
             
            def instr(col: ColumnOrName, substr: str) -> Column:
         | 
| 1665 | 
            +
                from sqlframe.base.function_alternatives import instr_using_strpos
         | 
| 1666 | 
            +
             | 
| 1667 | 
            +
                session = _get_session()
         | 
| 1668 | 
            +
             | 
| 1669 | 
            +
                if session._is_bigquery:
         | 
| 1670 | 
            +
                    return instr_using_strpos(col, substr)
         | 
| 1671 | 
            +
             | 
| 1122 1672 | 
             
                return Column.invoke_expression_over_column(col, expression.StrPosition, substr=lit(substr))
         | 
| 1123 1673 |  | 
| 1124 1674 |  | 
| @@ -1129,13 +1679,19 @@ def overlay( | |
| 1129 1679 | 
             
                pos: t.Union[ColumnOrName, int],
         | 
| 1130 1680 | 
             
                len: t.Optional[t.Union[ColumnOrName, int]] = None,
         | 
| 1131 1681 | 
             
            ) -> Column:
         | 
| 1682 | 
            +
                from sqlframe.base.function_alternatives import overlay_from_substr
         | 
| 1683 | 
            +
             | 
| 1684 | 
            +
                session = _get_session()
         | 
| 1685 | 
            +
             | 
| 1686 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_snowflake:
         | 
| 1687 | 
            +
                    return overlay_from_substr(src, replace, pos, len)
         | 
| 1132 1688 | 
             
                return Column.invoke_expression_over_column(
         | 
| 1133 1689 | 
             
                    src,
         | 
| 1134 1690 | 
             
                    expression.Overlay,
         | 
| 1135 1691 | 
             
                    **{
         | 
| 1136 | 
            -
                        "expression": Column(replace). | 
| 1137 | 
            -
                        "from": lit(pos). | 
| 1138 | 
            -
                        "for": lit(len). | 
| 1692 | 
            +
                        "expression": Column(replace).column_expression,
         | 
| 1693 | 
            +
                        "from": lit(pos).column_expression,
         | 
| 1694 | 
            +
                        "for": lit(len).column_expression if len is not None else None,
         | 
| 1139 1695 | 
             
                    },
         | 
| 1140 1696 | 
             
                )
         | 
| 1141 1697 |  | 
| @@ -1162,6 +1718,13 @@ def substring(str: ColumnOrName, pos: int, len: int) -> Column: | |
| 1162 1718 |  | 
| 1163 1719 | 
             
            @meta(unsupported_engines=["duckdb", "postgres", "snowflake"])
         | 
| 1164 1720 | 
             
            def substring_index(str: ColumnOrName, delim: str, count: int) -> Column:
         | 
| 1721 | 
            +
                from sqlframe.base.function_alternatives import substring_index_bgutil
         | 
| 1722 | 
            +
             | 
| 1723 | 
            +
                session = _get_session()
         | 
| 1724 | 
            +
             | 
| 1725 | 
            +
                if session._is_bigquery:
         | 
| 1726 | 
            +
                    return substring_index_bgutil(str, delim, count)
         | 
| 1727 | 
            +
             | 
| 1165 1728 | 
             
                return Column.invoke_anonymous_function(str, "SUBSTRING_INDEX", lit(delim), lit(count))
         | 
| 1166 1729 |  | 
| 1167 1730 |  | 
| @@ -1169,15 +1732,22 @@ def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: | |
| 1169 1732 | 
             
            def levenshtein(
         | 
| 1170 1733 | 
             
                left: ColumnOrName, right: ColumnOrName, threshold: t.Optional[int] = None
         | 
| 1171 1734 | 
             
            ) -> Column:
         | 
| 1735 | 
            +
                from sqlframe.base.function_alternatives import levenshtein_edit_distance
         | 
| 1736 | 
            +
             | 
| 1737 | 
            +
                session = _get_session()
         | 
| 1738 | 
            +
             | 
| 1739 | 
            +
                if session._is_snowflake:
         | 
| 1740 | 
            +
                    return levenshtein_edit_distance(left, right, threshold)
         | 
| 1741 | 
            +
             | 
| 1172 1742 | 
             
                value: t.Union[expression.Case, expression.Levenshtein] = expression.Levenshtein(
         | 
| 1173 | 
            -
                    this=Column.ensure_col(left). | 
| 1174 | 
            -
                    expression=Column.ensure_col(right). | 
| 1743 | 
            +
                    this=Column.ensure_col(left).column_expression,
         | 
| 1744 | 
            +
                    expression=Column.ensure_col(right).column_expression,
         | 
| 1175 1745 | 
             
                )
         | 
| 1176 1746 | 
             
                if threshold is not None:
         | 
| 1177 1747 | 
             
                    value = (
         | 
| 1178 1748 | 
             
                        expression.case()
         | 
| 1179 | 
            -
                        .when(expression.LTE(this=value, expression=lit(threshold). | 
| 1180 | 
            -
                        .else_(lit(-1). | 
| 1749 | 
            +
                        .when(expression.LTE(this=value, expression=lit(threshold).column_expression), value)
         | 
| 1750 | 
            +
                        .else_(lit(-1).column_expression)
         | 
| 1181 1751 | 
             
                    )
         | 
| 1182 1752 | 
             
                return Column(value)
         | 
| 1183 1753 |  | 
| @@ -1196,9 +1766,9 @@ def locate(substr: str, str: ColumnOrName, pos: t.Optional[int] = None) -> Colum | |
| 1196 1766 | 
             
            def lpad(col: ColumnOrName, len: int, pad: str) -> Column:
         | 
| 1197 1767 | 
             
                return Column(
         | 
| 1198 1768 | 
             
                    expression.Pad(
         | 
| 1199 | 
            -
                        this=Column.ensure_col(col). | 
| 1200 | 
            -
                        expression=lit(len). | 
| 1201 | 
            -
                        fill_pattern=lit(pad). | 
| 1769 | 
            +
                        this=Column.ensure_col(col).column_expression,
         | 
| 1770 | 
            +
                        expression=lit(len).column_expression,
         | 
| 1771 | 
            +
                        fill_pattern=lit(pad).column_expression,
         | 
| 1202 1772 | 
             
                        # We can use `invoke_expression_over_column` because this is an actual bool instead of literal bool
         | 
| 1203 1773 | 
             
                        is_left=True,
         | 
| 1204 1774 | 
             
                    )
         | 
| @@ -1209,9 +1779,9 @@ def lpad(col: ColumnOrName, len: int, pad: str) -> Column: | |
| 1209 1779 | 
             
            def rpad(col: ColumnOrName, len: int, pad: str) -> Column:
         | 
| 1210 1780 | 
             
                return Column(
         | 
| 1211 1781 | 
             
                    expression.Pad(
         | 
| 1212 | 
            -
                        this=Column.ensure_col(col). | 
| 1213 | 
            -
                        expression=lit(len). | 
| 1214 | 
            -
                        fill_pattern=lit(pad). | 
| 1782 | 
            +
                        this=Column.ensure_col(col).column_expression,
         | 
| 1783 | 
            +
                        expression=lit(len).column_expression,
         | 
| 1784 | 
            +
                        fill_pattern=lit(pad).column_expression,
         | 
| 1215 1785 | 
             
                        # We can use `invoke_expression_over_column` because this is an actual bool instead of literal bool
         | 
| 1216 1786 | 
             
                        is_left=False,
         | 
| 1217 1787 | 
             
                    )
         | 
| @@ -1225,6 +1795,24 @@ def repeat(col: ColumnOrName, n: int) -> Column: | |
| 1225 1795 |  | 
| 1226 1796 | 
             
            @meta()
         | 
| 1227 1797 | 
             
            def split(str: ColumnOrName, pattern: str, limit: t.Optional[int] = None) -> Column:
         | 
| 1798 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1799 | 
            +
                    split_from_regex_split_to_array,
         | 
| 1800 | 
            +
                    split_with_split,
         | 
| 1801 | 
            +
                )
         | 
| 1802 | 
            +
             | 
| 1803 | 
            +
                session = _get_session()
         | 
| 1804 | 
            +
             | 
| 1805 | 
            +
                if session._is_duckdb:
         | 
| 1806 | 
            +
                    if limit is not None:
         | 
| 1807 | 
            +
                        logger.warning("Limit is ignored since it is not supported in this dialect")
         | 
| 1808 | 
            +
                    limit = None
         | 
| 1809 | 
            +
             | 
| 1810 | 
            +
                if session._is_bigquery or session._is_snowflake:
         | 
| 1811 | 
            +
                    return split_with_split(str, pattern, limit)
         | 
| 1812 | 
            +
             | 
| 1813 | 
            +
                if session._is_postgres:
         | 
| 1814 | 
            +
                    return split_from_regex_split_to_array(str, pattern, limit)
         | 
| 1815 | 
            +
             | 
| 1228 1816 | 
             
                if limit is not None:
         | 
| 1229 1817 | 
             
                    return Column.invoke_expression_over_column(
         | 
| 1230 1818 | 
             
                        str, expression.RegexpSplit, expression=lit(pattern), limit=lit(limit)
         | 
| @@ -1236,22 +1824,39 @@ def split(str: ColumnOrName, pattern: str, limit: t.Optional[int] = None) -> Col | |
| 1236 1824 |  | 
| 1237 1825 | 
             
            @meta(unsupported_engines="postgres")
         | 
| 1238 1826 | 
             
            def regexp_extract(str: ColumnOrName, pattern: str, idx: t.Optional[int] = None) -> Column:
         | 
| 1827 | 
            +
                session = _get_session()
         | 
| 1828 | 
            +
             | 
| 1239 1829 | 
             
                if idx is not None:
         | 
| 1240 | 
            -
                     | 
| 1830 | 
            +
                    result = Column.invoke_expression_over_column(
         | 
| 1241 1831 | 
             
                        str,
         | 
| 1242 1832 | 
             
                        expression.RegexpExtract,
         | 
| 1243 1833 | 
             
                        expression=lit(pattern),
         | 
| 1244 1834 | 
             
                        group=lit(idx),
         | 
| 1245 1835 | 
             
                    )
         | 
| 1246 | 
            -
                 | 
| 1247 | 
            -
                     | 
| 1248 | 
            -
             | 
| 1836 | 
            +
                else:
         | 
| 1837 | 
            +
                    result = Column.invoke_expression_over_column(
         | 
| 1838 | 
            +
                        str, expression.RegexpExtract, expression=lit(pattern)
         | 
| 1839 | 
            +
                    )
         | 
| 1840 | 
            +
             | 
| 1841 | 
            +
                if session._is_snowflake:
         | 
| 1842 | 
            +
                    coalesce_func = get_func_from_session("coalesce")
         | 
| 1843 | 
            +
             | 
| 1844 | 
            +
                    result = coalesce_func(result, lit(""))
         | 
| 1845 | 
            +
             | 
| 1846 | 
            +
                return result
         | 
| 1249 1847 |  | 
| 1250 1848 |  | 
| 1251 1849 | 
             
            @meta()
         | 
| 1252 1850 | 
             
            def regexp_replace(
         | 
| 1253 1851 | 
             
                str: ColumnOrName, pattern: str, replacement: str, position: t.Optional[int] = None
         | 
| 1254 1852 | 
             
            ) -> Column:
         | 
| 1853 | 
            +
                from sqlframe.base.function_alternatives import regexp_replace_global_option
         | 
| 1854 | 
            +
             | 
| 1855 | 
            +
                session = _get_session()
         | 
| 1856 | 
            +
             | 
| 1857 | 
            +
                if session._is_duckdb or session._is_postgres:
         | 
| 1858 | 
            +
                    return regexp_replace_global_option(str, pattern, replacement, position)
         | 
| 1859 | 
            +
             | 
| 1255 1860 | 
             
                if position is not None:
         | 
| 1256 1861 | 
             
                    return Column.invoke_expression_over_column(
         | 
| 1257 1862 | 
             
                        str,
         | 
| @@ -1280,16 +1885,43 @@ def soundex(col: ColumnOrName) -> Column: | |
| 1280 1885 |  | 
| 1281 1886 | 
             
            @meta(unsupported_engines=["postgres", "snowflake"])
         | 
| 1282 1887 | 
             
            def bin(col: ColumnOrName) -> Column:
         | 
| 1888 | 
            +
                from sqlframe.base.function_alternatives import bin_bgutil
         | 
| 1889 | 
            +
             | 
| 1890 | 
            +
                session = _get_session()
         | 
| 1891 | 
            +
             | 
| 1892 | 
            +
                if session._is_bigquery:
         | 
| 1893 | 
            +
                    return bin_bgutil(col)
         | 
| 1894 | 
            +
             | 
| 1283 1895 | 
             
                return Column.invoke_anonymous_function(col, "BIN")
         | 
| 1284 1896 |  | 
| 1285 1897 |  | 
| 1286 1898 | 
             
            @meta(unsupported_engines="postgres")
         | 
| 1287 1899 | 
             
            def hex(col: ColumnOrName) -> Column:
         | 
| 1900 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1901 | 
            +
                    hex_casted_as_bytes,
         | 
| 1902 | 
            +
                    hex_using_encode,
         | 
| 1903 | 
            +
                )
         | 
| 1904 | 
            +
             | 
| 1905 | 
            +
                session = _get_session()
         | 
| 1906 | 
            +
             | 
| 1907 | 
            +
                if session._is_bigquery:
         | 
| 1908 | 
            +
                    return hex_casted_as_bytes(col)
         | 
| 1909 | 
            +
             | 
| 1910 | 
            +
                if session._is_snowflake:
         | 
| 1911 | 
            +
                    return hex_using_encode(col)
         | 
| 1912 | 
            +
             | 
| 1288 1913 | 
             
                return Column.invoke_expression_over_column(col, expression.Hex)
         | 
| 1289 1914 |  | 
| 1290 1915 |  | 
| 1291 1916 | 
             
            @meta(unsupported_engines="postgres")
         | 
| 1292 1917 | 
             
            def unhex(col: ColumnOrName) -> Column:
         | 
| 1918 | 
            +
                from sqlframe.base.function_alternatives import unhex_hex_decode_str
         | 
| 1919 | 
            +
             | 
| 1920 | 
            +
                session = _get_session()
         | 
| 1921 | 
            +
             | 
| 1922 | 
            +
                if session._is_snowflake:
         | 
| 1923 | 
            +
                    return unhex_hex_decode_str(col)
         | 
| 1924 | 
            +
             | 
| 1293 1925 | 
             
                return Column.invoke_expression_over_column(col, expression.Unhex)
         | 
| 1294 1926 |  | 
| 1295 1927 |  | 
| @@ -1305,6 +1937,13 @@ def octet_length(col: ColumnOrName) -> Column: | |
| 1305 1937 |  | 
| 1306 1938 | 
             
            @meta()
         | 
| 1307 1939 | 
             
            def bit_length(col: ColumnOrName) -> Column:
         | 
| 1940 | 
            +
                from sqlframe.base.function_alternatives import bit_length_from_length
         | 
| 1941 | 
            +
             | 
| 1942 | 
            +
                session = _get_session()
         | 
| 1943 | 
            +
             | 
| 1944 | 
            +
                if session._is_bigquery:
         | 
| 1945 | 
            +
                    return bit_length_from_length(col)
         | 
| 1946 | 
            +
             | 
| 1308 1947 | 
             
                return Column.invoke_anonymous_function(col, "BIT_LENGTH")
         | 
| 1309 1948 |  | 
| 1310 1949 |  | 
| @@ -1326,6 +1965,19 @@ def array_agg(col: ColumnOrName) -> Column: | |
| 1326 1965 |  | 
| 1327 1966 | 
             
            @meta()
         | 
| 1328 1967 | 
             
            def array_append(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
         | 
| 1968 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 1969 | 
            +
                    array_append_list_append,
         | 
| 1970 | 
            +
                    array_append_using_array_cat,
         | 
| 1971 | 
            +
                )
         | 
| 1972 | 
            +
             | 
| 1973 | 
            +
                session = _get_session()
         | 
| 1974 | 
            +
             | 
| 1975 | 
            +
                if session._is_bigquery:
         | 
| 1976 | 
            +
                    return array_append_using_array_cat(col, value)
         | 
| 1977 | 
            +
             | 
| 1978 | 
            +
                if session._is_duckdb:
         | 
| 1979 | 
            +
                    return array_append_list_append(col, value)
         | 
| 1980 | 
            +
             | 
| 1329 1981 | 
             
                value = value if isinstance(value, Column) else lit(value)
         | 
| 1330 1982 | 
             
                return Column.invoke_anonymous_function(col, "ARRAY_APPEND", value)
         | 
| 1331 1983 |  | 
| @@ -1388,13 +2040,21 @@ def getbit(col: ColumnOrName, pos: ColumnOrName) -> Column: | |
| 1388 2040 |  | 
| 1389 2041 | 
             
            @meta(unsupported_engines=["bigquery", "postgres"])
         | 
| 1390 2042 | 
             
            def create_map(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
         | 
| 2043 | 
            +
                session = _get_session()
         | 
| 2044 | 
            +
             | 
| 1391 2045 | 
             
                cols = list(_flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols  # type: ignore
         | 
| 1392 | 
            -
                 | 
| 2046 | 
            +
                result = Column.invoke_expression_over_column(
         | 
| 1393 2047 | 
             
                    None,
         | 
| 1394 2048 | 
             
                    expression.VarMap,
         | 
| 1395 | 
            -
                    keys=array(*cols[::2]). | 
| 1396 | 
            -
                    values=array(*cols[1::2]). | 
| 2049 | 
            +
                    keys=array(*cols[::2]).column_expression,
         | 
| 2050 | 
            +
                    values=array(*cols[1::2]).column_expression,
         | 
| 1397 2051 | 
             
                )
         | 
| 2052 | 
            +
                if not session._is_snowflake:
         | 
| 2053 | 
            +
                    return result
         | 
| 2054 | 
            +
             | 
| 2055 | 
            +
                col1_dtype = col(cols[0]).dtype or "VARCHAR"
         | 
| 2056 | 
            +
                col2_dtype = col(cols[1]).dtype or "VARCHAR"
         | 
| 2057 | 
            +
                return result.cast(f"MAP({col1_dtype}, {col2_dtype})")
         | 
| 1398 2058 |  | 
| 1399 2059 |  | 
| 1400 2060 | 
             
            @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
         | 
| @@ -1404,14 +2064,43 @@ def map_from_arrays(col1: ColumnOrName, col2: ColumnOrName) -> Column: | |
| 1404 2064 |  | 
| 1405 2065 | 
             
            @meta()
         | 
| 1406 2066 | 
             
            def array_contains(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
         | 
| 1407 | 
            -
                 | 
| 2067 | 
            +
                from sqlframe.base.function_alternatives import array_contains_any
         | 
| 2068 | 
            +
             | 
| 2069 | 
            +
                session = _get_session()
         | 
| 2070 | 
            +
                lit_func = get_func_from_session("lit")
         | 
| 2071 | 
            +
             | 
| 2072 | 
            +
                if session._is_postgres:
         | 
| 2073 | 
            +
                    return array_contains_any(col, value)
         | 
| 2074 | 
            +
             | 
| 2075 | 
            +
                value = value if isinstance(value, Column) else lit_func(value)
         | 
| 2076 | 
            +
             | 
| 2077 | 
            +
                if session._is_snowflake:
         | 
| 2078 | 
            +
                    value = value.cast("variant")
         | 
| 2079 | 
            +
             | 
| 1408 2080 | 
             
                return Column.invoke_expression_over_column(
         | 
| 1409 | 
            -
                    col, expression.ArrayContains, expression= | 
| 2081 | 
            +
                    col, expression.ArrayContains, expression=value.column_expression
         | 
| 1410 2082 | 
             
                )
         | 
| 1411 2083 |  | 
| 1412 2084 |  | 
| 1413 2085 | 
             
            @meta(unsupported_engines="bigquery")
         | 
| 1414 2086 | 
             
            def arrays_overlap(col1: ColumnOrName, col2: ColumnOrName) -> Column:
         | 
| 2087 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2088 | 
            +
                    arrays_overlap_as_plural,
         | 
| 2089 | 
            +
                    arrays_overlap_renamed,
         | 
| 2090 | 
            +
                    arrays_overlap_using_intersect,
         | 
| 2091 | 
            +
                )
         | 
| 2092 | 
            +
             | 
| 2093 | 
            +
                session = _get_session()
         | 
| 2094 | 
            +
             | 
| 2095 | 
            +
                if session._is_duckdb:
         | 
| 2096 | 
            +
                    return arrays_overlap_using_intersect(col1, col2)
         | 
| 2097 | 
            +
             | 
| 2098 | 
            +
                if session._is_databricks or session._is_spark:
         | 
| 2099 | 
            +
                    return arrays_overlap_renamed(col1, col2)
         | 
| 2100 | 
            +
             | 
| 2101 | 
            +
                if session._is_snowflake:
         | 
| 2102 | 
            +
                    return arrays_overlap_as_plural(col1, col2)
         | 
| 2103 | 
            +
             | 
| 1415 2104 | 
             
                return Column.invoke_expression_over_column(col1, expression.ArrayOverlaps, expression=col2)
         | 
| 1416 2105 |  | 
| 1417 2106 |  | 
| @@ -1419,6 +2108,27 @@ def arrays_overlap(col1: ColumnOrName, col2: ColumnOrName) -> Column: | |
| 1419 2108 | 
             
            def slice(
         | 
| 1420 2109 | 
             
                x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int]
         | 
| 1421 2110 | 
             
            ) -> Column:
         | 
| 2111 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2112 | 
            +
                    slice_as_array_slice,
         | 
| 2113 | 
            +
                    slice_as_list_slice,
         | 
| 2114 | 
            +
                    slice_bgutil,
         | 
| 2115 | 
            +
                    slice_with_brackets,
         | 
| 2116 | 
            +
                )
         | 
| 2117 | 
            +
             | 
| 2118 | 
            +
                session = _get_session()
         | 
| 2119 | 
            +
             | 
| 2120 | 
            +
                if session._is_bigquery:
         | 
| 2121 | 
            +
                    return slice_bgutil(x, start, length)
         | 
| 2122 | 
            +
             | 
| 2123 | 
            +
                if session._is_duckdb:
         | 
| 2124 | 
            +
                    return slice_as_list_slice(x, start, length)
         | 
| 2125 | 
            +
             | 
| 2126 | 
            +
                if session._is_postgres:
         | 
| 2127 | 
            +
                    return slice_with_brackets(x, start, length)
         | 
| 2128 | 
            +
             | 
| 2129 | 
            +
                if session._is_snowflake:
         | 
| 2130 | 
            +
                    return slice_as_array_slice(x, start, length)
         | 
| 2131 | 
            +
             | 
| 1422 2132 | 
             
                start_col = lit(start) if isinstance(start, int) else start
         | 
| 1423 2133 | 
             
                length_col = lit(length) if isinstance(length, int) else length
         | 
| 1424 2134 | 
             
                return Column.invoke_anonymous_function(x, "SLICE", start_col, length_col)
         | 
| @@ -1428,6 +2138,13 @@ def slice( | |
| 1428 2138 | 
             
            def array_join(
         | 
| 1429 2139 | 
             
                col: ColumnOrName, delimiter: str, null_replacement: t.Optional[str] = None
         | 
| 1430 2140 | 
             
            ) -> Column:
         | 
| 2141 | 
            +
                session = _get_session()
         | 
| 2142 | 
            +
             | 
| 2143 | 
            +
                if session._is_snowflake:
         | 
| 2144 | 
            +
                    if null_replacement is not None:
         | 
| 2145 | 
            +
                        logger.warning("Null replacement is ignored since it is not supported in this dialect")
         | 
| 2146 | 
            +
                    null_replacement = None
         | 
| 2147 | 
            +
             | 
| 1431 2148 | 
             
                if null_replacement is not None:
         | 
| 1432 2149 | 
             
                    return Column.invoke_expression_over_column(
         | 
| 1433 2150 | 
             
                        col, expression.ArrayToString, expression=lit(delimiter), null=lit(null_replacement)
         | 
| @@ -1444,6 +2161,19 @@ def concat(*cols: ColumnOrName) -> Column: | |
| 1444 2161 |  | 
| 1445 2162 | 
             
            @meta()
         | 
| 1446 2163 | 
             
            def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
         | 
| 2164 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2165 | 
            +
                    array_position_bgutil,
         | 
| 2166 | 
            +
                    array_position_cast_variant_and_flip,
         | 
| 2167 | 
            +
                )
         | 
| 2168 | 
            +
             | 
| 2169 | 
            +
                session = _get_session()
         | 
| 2170 | 
            +
             | 
| 2171 | 
            +
                if session._is_bigquery:
         | 
| 2172 | 
            +
                    return array_position_bgutil(col, value)
         | 
| 2173 | 
            +
             | 
| 2174 | 
            +
                if session._is_snowflake:
         | 
| 2175 | 
            +
                    return array_position_cast_variant_and_flip(col, value)
         | 
| 2176 | 
            +
             | 
| 1447 2177 | 
             
                value_col = value if isinstance(value, Column) else lit(value)
         | 
| 1448 2178 | 
             
                # Some engines return NULL if item is not found but Spark expects 0 so we coalesce to 0
         | 
| 1449 2179 | 
             
                return coalesce(Column.invoke_anonymous_function(col, "ARRAY_POSITION", value_col), lit(0))
         | 
| @@ -1451,28 +2181,75 @@ def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column: | |
| 1451 2181 |  | 
| 1452 2182 | 
             
            @meta()
         | 
| 1453 2183 | 
             
            def element_at(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
         | 
| 2184 | 
            +
                from sqlframe.base.function_alternatives import element_at_using_brackets
         | 
| 2185 | 
            +
             | 
| 2186 | 
            +
                session = _get_session()
         | 
| 2187 | 
            +
             | 
| 2188 | 
            +
                if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake:
         | 
| 2189 | 
            +
                    return element_at_using_brackets(col, value)
         | 
| 2190 | 
            +
             | 
| 1454 2191 | 
             
                value_col = value if isinstance(value, Column) else lit(value)
         | 
| 1455 2192 | 
             
                return Column.invoke_anonymous_function(col, "ELEMENT_AT", value_col)
         | 
| 1456 2193 |  | 
| 1457 2194 |  | 
| 1458 2195 | 
             
            @meta()
         | 
| 1459 2196 | 
             
            def array_remove(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
         | 
| 2197 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2198 | 
            +
                    array_remove_bgutil,
         | 
| 2199 | 
            +
                    array_remove_using_filter,
         | 
| 2200 | 
            +
                )
         | 
| 2201 | 
            +
             | 
| 2202 | 
            +
                session = _get_session()
         | 
| 2203 | 
            +
             | 
| 2204 | 
            +
                if session._is_bigquery:
         | 
| 2205 | 
            +
                    return array_remove_bgutil(col, value)
         | 
| 2206 | 
            +
             | 
| 2207 | 
            +
                if session._is_duckdb:
         | 
| 2208 | 
            +
                    return array_remove_using_filter(col, value)
         | 
| 2209 | 
            +
             | 
| 1460 2210 | 
             
                value_col = value if isinstance(value, Column) else lit(value)
         | 
| 1461 2211 | 
             
                return Column.invoke_anonymous_function(col, "ARRAY_REMOVE", value_col)
         | 
| 1462 2212 |  | 
| 1463 2213 |  | 
| 1464 2214 | 
             
            @meta(unsupported_engines="postgres")
         | 
| 1465 2215 | 
             
            def array_distinct(col: ColumnOrName) -> Column:
         | 
| 2216 | 
            +
                from sqlframe.base.function_alternatives import array_distinct_bgutil
         | 
| 2217 | 
            +
             | 
| 2218 | 
            +
                session = _get_session()
         | 
| 2219 | 
            +
             | 
| 2220 | 
            +
                if session._is_bigquery:
         | 
| 2221 | 
            +
                    return array_distinct_bgutil(col)
         | 
| 2222 | 
            +
             | 
| 1466 2223 | 
             
                return Column.invoke_anonymous_function(col, "ARRAY_DISTINCT")
         | 
| 1467 2224 |  | 
| 1468 2225 |  | 
| 1469 2226 | 
             
            @meta(unsupported_engines=["bigquery", "postgres"])
         | 
| 1470 2227 | 
             
            def array_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Column:
         | 
| 2228 | 
            +
                from sqlframe.base.function_alternatives import array_intersect_using_intersection
         | 
| 2229 | 
            +
             | 
| 2230 | 
            +
                session = _get_session()
         | 
| 2231 | 
            +
             | 
| 2232 | 
            +
                if session._is_snowflake:
         | 
| 2233 | 
            +
                    return array_intersect_using_intersection(col1, col2)
         | 
| 2234 | 
            +
             | 
| 1471 2235 | 
             
                return Column.invoke_anonymous_function(col1, "ARRAY_INTERSECT", Column.ensure_col(col2))
         | 
| 1472 2236 |  | 
| 1473 2237 |  | 
| 1474 2238 | 
             
            @meta(unsupported_engines=["postgres"])
         | 
| 1475 2239 | 
             
            def array_union(col1: ColumnOrName, col2: ColumnOrName) -> Column:
         | 
| 2240 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2241 | 
            +
                    array_union_using_array_concat,
         | 
| 2242 | 
            +
                    array_union_using_list_concat,
         | 
| 2243 | 
            +
                )
         | 
| 2244 | 
            +
             | 
| 2245 | 
            +
                session = _get_session()
         | 
| 2246 | 
            +
             | 
| 2247 | 
            +
                if session._is_duckdb:
         | 
| 2248 | 
            +
                    return array_union_using_list_concat(col1, col2)
         | 
| 2249 | 
            +
             | 
| 2250 | 
            +
                if session._is_bigquery or session._is_snowflake:
         | 
| 2251 | 
            +
                    return array_union_using_array_concat(col1, col2)
         | 
| 2252 | 
            +
             | 
| 1476 2253 | 
             
                return Column.invoke_anonymous_function(col1, "ARRAY_UNION", Column.ensure_col(col2))
         | 
| 1477 2254 |  | 
| 1478 2255 |  | 
| @@ -1504,6 +2281,19 @@ def posexplode_outer(col: ColumnOrName) -> Column: | |
| 1504 2281 | 
             
            # Snowflake doesn't support JSONPath which is what this function uses
         | 
| 1505 2282 | 
             
            @meta(unsupported_engines="snowflake")
         | 
| 1506 2283 | 
             
            def get_json_object(col: ColumnOrName, path: str) -> Column:
         | 
| 2284 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2285 | 
            +
                    get_json_object_using_arrow_op,
         | 
| 2286 | 
            +
                    get_json_object_using_function,
         | 
| 2287 | 
            +
                )
         | 
| 2288 | 
            +
             | 
| 2289 | 
            +
                session = _get_session()
         | 
| 2290 | 
            +
             | 
| 2291 | 
            +
                if session._is_databricks:
         | 
| 2292 | 
            +
                    return get_json_object_using_function(col, path)
         | 
| 2293 | 
            +
             | 
| 2294 | 
            +
                if session._is_postgres:
         | 
| 2295 | 
            +
                    return get_json_object_using_arrow_op(col, path)
         | 
| 2296 | 
            +
             | 
| 1507 2297 | 
             
                return Column.invoke_expression_over_column(col, expression.JSONExtract, expression=lit(path))
         | 
| 1508 2298 |  | 
| 1509 2299 |  | 
| @@ -1572,16 +2362,63 @@ def size(col: ColumnOrName) -> Column: | |
| 1572 2362 |  | 
| 1573 2363 | 
             
            @meta()
         | 
| 1574 2364 | 
             
            def array_min(col: ColumnOrName) -> Column:
         | 
| 2365 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2366 | 
            +
                    array_min_bgutil,
         | 
| 2367 | 
            +
                    array_min_from_sort,
         | 
| 2368 | 
            +
                    array_min_from_subquery,
         | 
| 2369 | 
            +
                )
         | 
| 2370 | 
            +
             | 
| 2371 | 
            +
                session = _get_session()
         | 
| 2372 | 
            +
             | 
| 2373 | 
            +
                if session._is_bigquery:
         | 
| 2374 | 
            +
                    return array_min_bgutil(col)
         | 
| 2375 | 
            +
             | 
| 2376 | 
            +
                if session._is_duckdb:
         | 
| 2377 | 
            +
                    return array_min_from_sort(col)
         | 
| 2378 | 
            +
             | 
| 2379 | 
            +
                if session._is_postgres:
         | 
| 2380 | 
            +
                    return array_min_from_subquery(col)
         | 
| 2381 | 
            +
             | 
| 1575 2382 | 
             
                return Column.invoke_anonymous_function(col, "ARRAY_MIN")
         | 
| 1576 2383 |  | 
| 1577 2384 |  | 
| 1578 2385 | 
             
            @meta()
         | 
| 1579 2386 | 
             
            def array_max(col: ColumnOrName) -> Column:
         | 
| 2387 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2388 | 
            +
                    array_max_bgutil,
         | 
| 2389 | 
            +
                    array_max_from_sort,
         | 
| 2390 | 
            +
                    array_max_from_subquery,
         | 
| 2391 | 
            +
                )
         | 
| 2392 | 
            +
             | 
| 2393 | 
            +
                session = _get_session()
         | 
| 2394 | 
            +
             | 
| 2395 | 
            +
                if session._is_bigquery:
         | 
| 2396 | 
            +
                    return array_max_bgutil(col)
         | 
| 2397 | 
            +
             | 
| 2398 | 
            +
                if session._is_duckdb:
         | 
| 2399 | 
            +
                    return array_max_from_sort(col)
         | 
| 2400 | 
            +
             | 
| 2401 | 
            +
                if session._is_postgres:
         | 
| 2402 | 
            +
                    return array_max_from_subquery(col)
         | 
| 2403 | 
            +
             | 
| 1580 2404 | 
             
                return Column.invoke_anonymous_function(col, "ARRAY_MAX")
         | 
| 1581 2405 |  | 
| 1582 2406 |  | 
| 1583 2407 | 
             
            @meta(unsupported_engines="postgres")
         | 
| 1584 2408 | 
             
            def sort_array(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column:
         | 
| 2409 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2410 | 
            +
                    sort_array_bgutil,
         | 
| 2411 | 
            +
                    sort_array_using_array_sort,
         | 
| 2412 | 
            +
                )
         | 
| 2413 | 
            +
             | 
| 2414 | 
            +
                session = _get_session()
         | 
| 2415 | 
            +
             | 
| 2416 | 
            +
                if session._is_bigquery:
         | 
| 2417 | 
            +
                    return sort_array_bgutil(col, asc)
         | 
| 2418 | 
            +
             | 
| 2419 | 
            +
                if session._is_snowflake:
         | 
| 2420 | 
            +
                    return sort_array_using_array_sort(col, asc)
         | 
| 2421 | 
            +
             | 
| 1585 2422 | 
             
                if asc is not None:
         | 
| 1586 2423 | 
             
                    return Column.invoke_expression_over_column(col, expression.SortArray, asc=lit(asc))
         | 
| 1587 2424 | 
             
                return Column.invoke_expression_over_column(col, expression.SortArray)
         | 
| @@ -1592,6 +2429,12 @@ def array_sort( | |
| 1592 2429 | 
             
                col: ColumnOrName,
         | 
| 1593 2430 | 
             
                comparator: t.Optional[t.Union[t.Callable[[Column, Column], Column]]] = None,
         | 
| 1594 2431 | 
             
            ) -> Column:
         | 
| 2432 | 
            +
                session = _get_session()
         | 
| 2433 | 
            +
                sort_array_func = get_func_from_session("sort_array")
         | 
| 2434 | 
            +
             | 
| 2435 | 
            +
                if session._is_bigquery:
         | 
| 2436 | 
            +
                    return sort_array_func(col, comparator)
         | 
| 2437 | 
            +
             | 
| 1595 2438 | 
             
                if comparator is not None:
         | 
| 1596 2439 | 
             
                    f_expression = _get_lambda_from_func(comparator)
         | 
| 1597 2440 | 
             
                    return Column.invoke_expression_over_column(
         | 
| @@ -1612,6 +2455,13 @@ def reverse(col: ColumnOrName) -> Column: | |
| 1612 2455 |  | 
| 1613 2456 | 
             
            @meta(unsupported_engines=["bigquery", "postgres"])
         | 
| 1614 2457 | 
             
            def flatten(col: ColumnOrName) -> Column:
         | 
| 2458 | 
            +
                from sqlframe.base.function_alternatives import flatten_using_array_flatten
         | 
| 2459 | 
            +
             | 
| 2460 | 
            +
                session = _get_session()
         | 
| 2461 | 
            +
             | 
| 2462 | 
            +
                if session._is_snowflake:
         | 
| 2463 | 
            +
                    return flatten_using_array_flatten(col)
         | 
| 2464 | 
            +
             | 
| 1615 2465 | 
             
                return Column.invoke_expression_over_column(col, expression.Flatten)
         | 
| 1616 2466 |  | 
| 1617 2467 |  | 
| @@ -1650,6 +2500,13 @@ def arrays_zip(*cols: ColumnOrName) -> Column: | |
| 1650 2500 |  | 
| 1651 2501 | 
             
            @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
         | 
| 1652 2502 | 
             
            def map_concat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
         | 
| 2503 | 
            +
                from sqlframe.base.function_alternatives import map_concat_using_map_cat
         | 
| 2504 | 
            +
             | 
| 2505 | 
            +
                session = _get_session()
         | 
| 2506 | 
            +
             | 
| 2507 | 
            +
                if session._is_snowflake:
         | 
| 2508 | 
            +
                    return map_concat_using_map_cat(*cols)
         | 
| 2509 | 
            +
             | 
| 1653 2510 | 
             
                columns = list(flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols  # type: ignore
         | 
| 1654 2511 | 
             
                if len(columns) == 1:
         | 
| 1655 2512 | 
             
                    return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT")  # type: ignore
         | 
| @@ -1660,11 +2517,28 @@ def map_concat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column | |
| 1660 2517 | 
             
            def sequence(
         | 
| 1661 2518 | 
             
                start: ColumnOrName, stop: ColumnOrName, step: t.Optional[ColumnOrName] = None
         | 
| 1662 2519 | 
             
            ) -> Column:
         | 
| 2520 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2521 | 
            +
                    sequence_from_array_generate_range,
         | 
| 2522 | 
            +
                    sequence_from_generate_array,
         | 
| 2523 | 
            +
                    sequence_from_generate_series,
         | 
| 2524 | 
            +
                )
         | 
| 2525 | 
            +
             | 
| 2526 | 
            +
                session = _get_session()
         | 
| 2527 | 
            +
             | 
| 2528 | 
            +
                if session._is_bigquery:
         | 
| 2529 | 
            +
                    return sequence_from_generate_array(start, stop, step)
         | 
| 2530 | 
            +
             | 
| 2531 | 
            +
                if session._is_duckdb:
         | 
| 2532 | 
            +
                    return sequence_from_generate_series(start, stop, step)
         | 
| 2533 | 
            +
             | 
| 2534 | 
            +
                if session._is_snowflake:
         | 
| 2535 | 
            +
                    return sequence_from_array_generate_range(start, stop, step)
         | 
| 2536 | 
            +
             | 
| 1663 2537 | 
             
                return Column(
         | 
| 1664 2538 | 
             
                    expression.GenerateSeries(
         | 
| 1665 | 
            -
                        start=Column.ensure_col(start). | 
| 1666 | 
            -
                        end=Column.ensure_col(stop). | 
| 1667 | 
            -
                        step=Column.ensure_col(step). | 
| 2539 | 
            +
                        start=Column.ensure_col(start).column_expression,
         | 
| 2540 | 
            +
                        end=Column.ensure_col(stop).column_expression,
         | 
| 2541 | 
            +
                        step=Column.ensure_col(step).column_expression if step is not None else None,
         | 
| 1668 2542 | 
             
                    )
         | 
| 1669 2543 | 
             
                )
         | 
| 1670 2544 |  | 
| @@ -1778,6 +2652,23 @@ def map_zip_with( | |
| 1778 2652 |  | 
| 1779 2653 | 
             
            @meta()
         | 
| 1780 2654 | 
             
            def typeof(col: ColumnOrName) -> Column:
         | 
| 2655 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 2656 | 
            +
                    typeof_bgutil,
         | 
| 2657 | 
            +
                    typeof_from_variant,
         | 
| 2658 | 
            +
                    typeof_pg_typeof,
         | 
| 2659 | 
            +
                )
         | 
| 2660 | 
            +
             | 
| 2661 | 
            +
                session = _get_session()
         | 
| 2662 | 
            +
             | 
| 2663 | 
            +
                if session._is_bigquery:
         | 
| 2664 | 
            +
                    return typeof_bgutil(col)
         | 
| 2665 | 
            +
             | 
| 2666 | 
            +
                if session._is_postgres:
         | 
| 2667 | 
            +
                    return typeof_pg_typeof(col)
         | 
| 2668 | 
            +
             | 
| 2669 | 
            +
                if session._is_snowflake:
         | 
| 2670 | 
            +
                    return typeof_from_variant(col)
         | 
| 2671 | 
            +
             | 
| 1781 2672 | 
             
                return Column.invoke_anonymous_function(col, "TYPEOF")
         | 
| 1782 2673 |  | 
| 1783 2674 |  | 
| @@ -1913,9 +2804,21 @@ def to_binary(col: ColumnOrName, format: t.Optional[ColumnOrName] = None) -> Col | |
| 1913 2804 |  | 
| 1914 2805 | 
             
            @meta()
         | 
| 1915 2806 | 
             
            def any_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None) -> Column:
         | 
| 2807 | 
            +
                session = _get_session()
         | 
| 2808 | 
            +
             | 
| 2809 | 
            +
                if session._is_duckdb:
         | 
| 2810 | 
            +
                    if not ignoreNulls:
         | 
| 2811 | 
            +
                        logger.warning("Nulls are always ignored when using `ANY_VALUE` on this engine")
         | 
| 2812 | 
            +
                    ignoreNulls = None
         | 
| 2813 | 
            +
             | 
| 2814 | 
            +
                if session._is_bigquery or session._is_postgres or session._is_snowflake:
         | 
| 2815 | 
            +
                    if ignoreNulls:
         | 
| 2816 | 
            +
                        logger.warning("Ignoring nulls is not supported in this dialect")
         | 
| 2817 | 
            +
                    ignoreNulls = None
         | 
| 2818 | 
            +
             | 
| 1916 2819 | 
             
                column = Column.invoke_expression_over_column(col, expression.AnyValue)
         | 
| 1917 2820 | 
             
                if ignoreNulls:
         | 
| 1918 | 
            -
                    return Column(expression.IgnoreNulls(this=column. | 
| 2821 | 
            +
                    return Column(expression.IgnoreNulls(this=column.column_expression))
         | 
| 1919 2822 | 
             
                return column
         | 
| 1920 2823 |  | 
| 1921 2824 |  | 
| @@ -2056,7 +2959,7 @@ def cardinality(col: ColumnOrName) -> Column: | |
| 2056 2959 |  | 
| 2057 2960 | 
             
            @meta()
         | 
| 2058 2961 | 
             
            def char(col: ColumnOrName) -> Column:
         | 
| 2059 | 
            -
                return Column(expression.Chr(expressions=Column.ensure_col(col). | 
| 2962 | 
            +
                return Column(expression.Chr(expressions=Column.ensure_col(col).column_expression))
         | 
| 2060 2963 |  | 
| 2061 2964 |  | 
| 2062 2965 | 
             
            @meta(unsupported_engines="*")
         | 
| @@ -2072,7 +2975,7 @@ def character_length(str: ColumnOrName) -> Column: | |
| 2072 2975 | 
             
            @meta(unsupported_engines=["bigquery", "postgres"])
         | 
| 2073 2976 | 
             
            def contains(left: ColumnOrName, right: ColumnOrName) -> Column:
         | 
| 2074 2977 | 
             
                return Column.invoke_expression_over_column(
         | 
| 2075 | 
            -
                    left, expression.Contains, expression=Column.ensure_col(right). | 
| 2978 | 
            +
                    left, expression.Contains, expression=Column.ensure_col(right).column_expression
         | 
| 2076 2979 | 
             
                )
         | 
| 2077 2980 |  | 
| 2078 2981 |  | 
| @@ -2084,9 +2987,9 @@ def convert_timezone( | |
| 2084 2987 |  | 
| 2085 2988 | 
             
                return Column(
         | 
| 2086 2989 | 
             
                    expression.ConvertTimezone(
         | 
| 2087 | 
            -
                        timestamp=to_timestamp(Column.ensure_col(sourceTs)). | 
| 2088 | 
            -
                        source_tz=sourceTz. | 
| 2089 | 
            -
                        target_tz=Column.ensure_col(targetTz). | 
| 2990 | 
            +
                        timestamp=to_timestamp(Column.ensure_col(sourceTs)).column_expression,
         | 
| 2991 | 
            +
                        source_tz=sourceTz.column_expression if sourceTz else None,
         | 
| 2992 | 
            +
                        target_tz=Column.ensure_col(targetTz).column_expression,
         | 
| 2090 2993 | 
             
                    )
         | 
| 2091 2994 | 
             
                )
         | 
| 2092 2995 |  | 
| @@ -2180,6 +3083,13 @@ def current_timezone() -> Column: | |
| 2180 3083 |  | 
| 2181 3084 | 
             
            @meta()
         | 
| 2182 3085 | 
             
            def current_user() -> Column:
         | 
| 3086 | 
            +
                from sqlframe.base.function_alternatives import current_user_from_session_user
         | 
| 3087 | 
            +
             | 
| 3088 | 
            +
                session = _get_session()
         | 
| 3089 | 
            +
             | 
| 3090 | 
            +
                if session._is_bigquery:
         | 
| 3091 | 
            +
                    return current_user_from_session_user()
         | 
| 3092 | 
            +
             | 
| 2183 3093 | 
             
                return Column.invoke_expression_over_column(None, expression.CurrentUser)
         | 
| 2184 3094 |  | 
| 2185 3095 |  | 
| @@ -2204,6 +3114,21 @@ def datepart(field: ColumnOrName, source: ColumnOrName) -> Column: | |
| 2204 3114 |  | 
| 2205 3115 | 
             
            @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
         | 
| 2206 3116 | 
             
            def day(col: ColumnOrName) -> Column:
         | 
| 3117 | 
            +
                from sqlframe.base.function_alternatives import day_with_try_to_timestamp
         | 
| 3118 | 
            +
             | 
| 3119 | 
            +
                session = _get_session()
         | 
| 3120 | 
            +
             | 
| 3121 | 
            +
                if session._is_duckdb:
         | 
| 3122 | 
            +
                    try_to_timestamp = get_func_from_session("try_to_timestamp")
         | 
| 3123 | 
            +
                    to_date = get_func_from_session("to_date")
         | 
| 3124 | 
            +
                    when = get_func_from_session("when")
         | 
| 3125 | 
            +
                    _is_string = get_func_from_session("_is_string")
         | 
| 3126 | 
            +
                    coalesce = get_func_from_session("coalesce")
         | 
| 3127 | 
            +
                    col = when(
         | 
| 3128 | 
            +
                        _is_string(col),
         | 
| 3129 | 
            +
                        coalesce(try_to_timestamp(col), to_date(col)),
         | 
| 3130 | 
            +
                    ).otherwise(col)
         | 
| 3131 | 
            +
             | 
| 2207 3132 | 
             
                return Column.invoke_expression_over_column(col, expression.Day)
         | 
| 2208 3133 |  | 
| 2209 3134 |  | 
| @@ -2222,6 +3147,19 @@ def elt(*inputs: ColumnOrName) -> Column: | |
| 2222 3147 |  | 
| 2223 3148 | 
             
            @meta()
         | 
| 2224 3149 | 
             
            def endswith(str: ColumnOrName, suffix: ColumnOrName) -> Column:
         | 
| 3150 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 3151 | 
            +
                    endswith_using_like,
         | 
| 3152 | 
            +
                    endswith_with_underscore,
         | 
| 3153 | 
            +
                )
         | 
| 3154 | 
            +
             | 
| 3155 | 
            +
                session = _get_session()
         | 
| 3156 | 
            +
             | 
| 3157 | 
            +
                if session._is_bigquery or session._is_duckdb:
         | 
| 3158 | 
            +
                    return endswith_with_underscore(str, suffix)
         | 
| 3159 | 
            +
             | 
| 3160 | 
            +
                if session._is_postgres:
         | 
| 3161 | 
            +
                    return endswith_using_like(str, suffix)
         | 
| 3162 | 
            +
             | 
| 2225 3163 | 
             
                return Column.invoke_anonymous_function(str, "endswith", suffix)
         | 
| 2226 3164 |  | 
| 2227 3165 |  | 
| @@ -2237,6 +3175,11 @@ def every(col: ColumnOrName) -> Column: | |
| 2237 3175 |  | 
| 2238 3176 | 
             
            @meta()
         | 
| 2239 3177 | 
             
            def extract(field: ColumnOrName, source: ColumnOrName) -> Column:
         | 
| 3178 | 
            +
                session = _get_session()
         | 
| 3179 | 
            +
             | 
| 3180 | 
            +
                if session._is_bigquery:
         | 
| 3181 | 
            +
                    field = expression.Var(this=Column.ensure_col(field).alias_or_name)  # type: ignore
         | 
| 3182 | 
            +
             | 
| 2240 3183 | 
             
                return Column.invoke_expression_over_column(field, expression.Extract, expression=source)
         | 
| 2241 3184 |  | 
| 2242 3185 |  | 
| @@ -2250,7 +3193,7 @@ def first_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column] | |
| 2250 3193 | 
             
                column = Column.invoke_expression_over_column(col, expression.FirstValue)
         | 
| 2251 3194 |  | 
| 2252 3195 | 
             
                if ignoreNulls:
         | 
| 2253 | 
            -
                    return Column(expression.IgnoreNulls(this=column. | 
| 3196 | 
            +
                    return Column(expression.IgnoreNulls(this=column.column_expression))
         | 
| 2254 3197 | 
             
                return column
         | 
| 2255 3198 |  | 
| 2256 3199 |  | 
| @@ -2665,8 +3608,8 @@ def ilike( | |
| 2665 3608 | 
             
                if escapeChar is not None:
         | 
| 2666 3609 | 
             
                    return Column(
         | 
| 2667 3610 | 
             
                        expression.Escape(
         | 
| 2668 | 
            -
                            this=column. | 
| 2669 | 
            -
                            expression=Column.ensure_col(escapeChar). | 
| 3611 | 
            +
                            this=column.column_expression,
         | 
| 3612 | 
            +
                            expression=Column.ensure_col(escapeChar).column_expression,
         | 
| 2670 3613 | 
             
                        )
         | 
| 2671 3614 | 
             
                    )
         | 
| 2672 3615 | 
             
                return column
         | 
| @@ -2913,7 +3856,7 @@ def last_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] | |
| 2913 3856 | 
             
                column = Column.invoke_expression_over_column(col, expression.LastValue)
         | 
| 2914 3857 |  | 
| 2915 3858 | 
             
                if ignoreNulls:
         | 
| 2916 | 
            -
                    return Column(expression.IgnoreNulls(this=column. | 
| 3859 | 
            +
                    return Column(expression.IgnoreNulls(this=column.column_expression))
         | 
| 2917 3860 | 
             
                return column
         | 
| 2918 3861 |  | 
| 2919 3862 |  | 
| @@ -2963,6 +3906,11 @@ def left(str: ColumnOrName, len: ColumnOrName) -> Column: | |
| 2963 3906 | 
             
                >>> df.select(left(df.a, df.b).alias('r')).collect()
         | 
| 2964 3907 | 
             
                [Row(r='Spa')]
         | 
| 2965 3908 | 
             
                """
         | 
| 3909 | 
            +
                session = _get_session()
         | 
| 3910 | 
            +
             | 
| 3911 | 
            +
                if session._is_postgres:
         | 
| 3912 | 
            +
                    len = Column.ensure_col(len).cast("integer")
         | 
| 3913 | 
            +
             | 
| 2966 3914 | 
             
                return Column.invoke_expression_over_column(str, expression.Left, expression=len)
         | 
| 2967 3915 |  | 
| 2968 3916 |  | 
| @@ -3014,8 +3962,8 @@ def like( | |
| 3014 3962 | 
             
                if escapeChar is not None:
         | 
| 3015 3963 | 
             
                    return Column(
         | 
| 3016 3964 | 
             
                        expression.Escape(
         | 
| 3017 | 
            -
                            this=column. | 
| 3018 | 
            -
                            expression=Column.ensure_col(escapeChar). | 
| 3965 | 
            +
                            this=column.column_expression,
         | 
| 3966 | 
            +
                            expression=Column.ensure_col(escapeChar).column_expression,
         | 
| 3019 3967 | 
             
                        )
         | 
| 3020 3968 | 
             
                    )
         | 
| 3021 3969 | 
             
                return column
         | 
| @@ -3853,6 +4801,16 @@ def position( | |
| 3853 4801 | 
             
                |                4|
         | 
| 3854 4802 | 
             
                +-----------------+
         | 
| 3855 4803 | 
             
                """
         | 
| 4804 | 
            +
                from sqlframe.base.function_alternatives import position_as_strpos
         | 
| 4805 | 
            +
             | 
| 4806 | 
            +
                session = _get_session()
         | 
| 4807 | 
            +
             | 
| 4808 | 
            +
                if session._is_bigquery:
         | 
| 4809 | 
            +
                    return position_as_strpos(substr, str, start)
         | 
| 4810 | 
            +
             | 
| 4811 | 
            +
                if session._is_postgres:
         | 
| 4812 | 
            +
                    start = Column.ensure_col(start).cast("integer") if start else None
         | 
| 4813 | 
            +
             | 
| 3856 4814 | 
             
                if start is not None:
         | 
| 3857 4815 | 
             
                    return Column.invoke_expression_over_column(
         | 
| 3858 4816 | 
             
                        str, expression.StrPosition, substr=substr, position=start
         | 
| @@ -4038,6 +4996,13 @@ def regexp(str: ColumnOrName, regexp: ColumnOrName) -> Column: | |
| 4038 4996 | 
             
                |               true|
         | 
| 4039 4997 | 
             
                +-------------------+
         | 
| 4040 4998 | 
             
                """
         | 
| 4999 | 
            +
                from sqlframe.base.function_alternatives import regexp_extract_only_one_group
         | 
| 5000 | 
            +
             | 
| 5001 | 
            +
                session = _get_session()
         | 
| 5002 | 
            +
             | 
| 5003 | 
            +
                if session._is_bigquery:
         | 
| 5004 | 
            +
                    return regexp_extract_only_one_group(str, regexp)  # type: ignore
         | 
| 5005 | 
            +
             | 
| 4041 5006 | 
             
                return Column.invoke_anonymous_function(str, "regexp", regexp)
         | 
| 4042 5007 |  | 
| 4043 5008 |  | 
| @@ -4575,6 +5540,11 @@ def right(str: ColumnOrName, len: ColumnOrName) -> Column: | |
| 4575 5540 | 
             
                >>> df.select(right(df.a, df.b).alias('r')).collect()
         | 
| 4576 5541 | 
             
                [Row(r='SQL')]
         | 
| 4577 5542 | 
             
                """
         | 
| 5543 | 
            +
                session = _get_session()
         | 
| 5544 | 
            +
             | 
| 5545 | 
            +
                if session._is_postgres:
         | 
| 5546 | 
            +
                    len = Column.ensure_col(len).cast("integer")
         | 
| 5547 | 
            +
             | 
| 4578 5548 | 
             
                return Column.invoke_expression_over_column(str, expression.Right, expression=len)
         | 
| 4579 5549 |  | 
| 4580 5550 |  | 
| @@ -5030,6 +6000,13 @@ def to_number(col: ColumnOrName, format: ColumnOrName) -> Column: | |
| 5030 6000 | 
             
                >>> df.select(to_number(df.e, lit("$99.99")).alias('r')).collect()
         | 
| 5031 6001 | 
             
                [Row(r=Decimal('78.12'))]
         | 
| 5032 6002 | 
             
                """
         | 
| 6003 | 
            +
                from sqlframe.base.function_alternatives import to_number_using_to_double
         | 
| 6004 | 
            +
             | 
| 6005 | 
            +
                session = _get_session()
         | 
| 6006 | 
            +
             | 
| 6007 | 
            +
                if session._is_snowflake:
         | 
| 6008 | 
            +
                    return to_number_using_to_double(col, format)
         | 
| 6009 | 
            +
             | 
| 5033 6010 | 
             
                return Column.invoke_expression_over_column(col, expression.ToNumber, format=format)
         | 
| 5034 6011 |  | 
| 5035 6012 |  | 
| @@ -5149,11 +6126,14 @@ def to_unix_timestamp( | |
| 5149 6126 | 
             
                [Row(r=None)]
         | 
| 5150 6127 | 
             
                >>> spark.conf.unset("spark.sql.session.timeZone")
         | 
| 5151 6128 | 
             
                """
         | 
| 5152 | 
            -
                 | 
| 6129 | 
            +
                session = _get_session()
         | 
| 6130 | 
            +
             | 
| 6131 | 
            +
                if session._is_duckdb:
         | 
| 6132 | 
            +
                    format = format or _BaseSession().default_time_format
         | 
| 5153 6133 |  | 
| 5154 6134 | 
             
                if format is not None:
         | 
| 5155 6135 | 
             
                    return Column.invoke_expression_over_column(
         | 
| 5156 | 
            -
                        timestamp, expression.StrToUnix, format= | 
| 6136 | 
            +
                        timestamp, expression.StrToUnix, format=session.format_time(format)
         | 
| 5157 6137 | 
             
                    )
         | 
| 5158 6138 | 
             
                else:
         | 
| 5159 6139 | 
             
                    return Column.invoke_expression_over_column(timestamp, expression.StrToUnix)
         | 
| @@ -5306,10 +6286,21 @@ def try_element_at(col: ColumnOrName, extraction: ColumnOrName) -> Column: | |
| 5306 6286 | 
             
                >>> df.select(try_element_at(df.data, lit("a")).alias('r')).collect()
         | 
| 5307 6287 | 
             
                [Row(r=1.0)]
         | 
| 5308 6288 | 
             
                """
         | 
| 6289 | 
            +
                session = _get_session()
         | 
| 6290 | 
            +
             | 
| 6291 | 
            +
                if session._is_databricks or session._is_duckdb or session._is_postgres or session._is_spark:
         | 
| 6292 | 
            +
                    lit = get_func_from_session("lit")
         | 
| 6293 | 
            +
                    extraction = Column.ensure_col(extraction)
         | 
| 6294 | 
            +
                    if (
         | 
| 6295 | 
            +
                        isinstance(extraction.column_expression, expression.Literal)
         | 
| 6296 | 
            +
                        and extraction.column_expression.is_number
         | 
| 6297 | 
            +
                    ):
         | 
| 6298 | 
            +
                        extraction = extraction - lit(1)
         | 
| 6299 | 
            +
             | 
| 5309 6300 | 
             
                return Column(
         | 
| 5310 6301 | 
             
                    expression.Bracket(
         | 
| 5311 | 
            -
                        this=Column.ensure_col(col). | 
| 5312 | 
            -
                        expressions=[Column.ensure_col(extraction). | 
| 6302 | 
            +
                        this=Column.ensure_col(col).column_expression,
         | 
| 6303 | 
            +
                        expressions=[Column.ensure_col(extraction).column_expression],
         | 
| 5313 6304 | 
             
                        safe=True,
         | 
| 5314 6305 | 
             
                    )
         | 
| 5315 6306 | 
             
                )
         | 
| @@ -5340,12 +6331,27 @@ def try_to_timestamp(col: ColumnOrName, format: t.Optional[ColumnOrName] = None) | |
| 5340 6331 | 
             
                >>> df.select(try_to_timestamp(df.t, lit('yyyy-MM-dd HH:mm:ss')).alias('dt')).collect()
         | 
| 5341 6332 | 
             
                [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
         | 
| 5342 6333 | 
             
                """
         | 
| 5343 | 
            -
                from sqlframe.base. | 
| 6334 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 6335 | 
            +
                    try_to_timestamp_pgtemp,
         | 
| 6336 | 
            +
                    try_to_timestamp_safe,
         | 
| 6337 | 
            +
                    try_to_timestamp_strptime,
         | 
| 6338 | 
            +
                )
         | 
| 6339 | 
            +
             | 
| 6340 | 
            +
                session = _get_session()
         | 
| 6341 | 
            +
             | 
| 6342 | 
            +
                if session._is_bigquery:
         | 
| 6343 | 
            +
                    return try_to_timestamp_safe(col, format)
         | 
| 6344 | 
            +
             | 
| 6345 | 
            +
                if session._is_duckdb:
         | 
| 6346 | 
            +
                    return try_to_timestamp_strptime(col, format)
         | 
| 6347 | 
            +
             | 
| 6348 | 
            +
                if session._is_postgres:
         | 
| 6349 | 
            +
                    return try_to_timestamp_pgtemp(col, format)
         | 
| 5344 6350 |  | 
| 5345 6351 | 
             
                return Column.invoke_anonymous_function(
         | 
| 5346 6352 | 
             
                    col,
         | 
| 5347 6353 | 
             
                    "try_to_timestamp",
         | 
| 5348 | 
            -
                     | 
| 6354 | 
            +
                    session.format_execution_time(format),  # type: ignore
         | 
| 5349 6355 | 
             
                )
         | 
| 5350 6356 |  | 
| 5351 6357 |  | 
| @@ -5841,6 +6847,27 @@ def years(col: ColumnOrName) -> Column: | |
| 5841 6847 | 
             
            # SQLFrame specific
         | 
| 5842 6848 | 
             
            @meta()
         | 
| 5843 6849 | 
             
            def _is_string(col: ColumnOrName) -> Column:
         | 
| 6850 | 
            +
                from sqlframe.base.function_alternatives import (
         | 
| 6851 | 
            +
                    _is_string_using_typeof_char_varying,
         | 
| 6852 | 
            +
                    _is_string_using_typeof_string,
         | 
| 6853 | 
            +
                    _is_string_using_typeof_string_lcase,
         | 
| 6854 | 
            +
                    _is_string_using_typeof_varchar,
         | 
| 6855 | 
            +
                )
         | 
| 6856 | 
            +
             | 
| 6857 | 
            +
                session = _get_session()
         | 
| 6858 | 
            +
             | 
| 6859 | 
            +
                if session._is_bigquery:
         | 
| 6860 | 
            +
                    return _is_string_using_typeof_string(col)
         | 
| 6861 | 
            +
             | 
| 6862 | 
            +
                if session._is_duckdb:
         | 
| 6863 | 
            +
                    return _is_string_using_typeof_varchar(col)
         | 
| 6864 | 
            +
             | 
| 6865 | 
            +
                if session._is_postgres:
         | 
| 6866 | 
            +
                    return _is_string_using_typeof_char_varying(col)
         | 
| 6867 | 
            +
             | 
| 6868 | 
            +
                if session._is_databricks or session._is_spark:
         | 
| 6869 | 
            +
                    return _is_string_using_typeof_string_lcase(col)
         | 
| 6870 | 
            +
             | 
| 5844 6871 | 
             
                col = Column.invoke_anonymous_function(col, "TO_VARIANT")
         | 
| 5845 6872 | 
             
                return Column.invoke_anonymous_function(col, "IS_VARCHAR")
         | 
| 5846 6873 |  | 
| @@ -5889,7 +6916,7 @@ def _get_lambda_from_func(lambda_expression: t.Callable): | |
| 5889 6916 | 
             
                    for x in lambda_expression.__code__.co_varnames
         | 
| 5890 6917 | 
             
                ]
         | 
| 5891 6918 | 
             
                return expression.Lambda(
         | 
| 5892 | 
            -
                    this=lambda_expression(*[Column(x) for x in variables]). | 
| 6919 | 
            +
                    this=lambda_expression(*[Column(x) for x in variables]).column_expression,
         | 
| 5893 6920 | 
             
                    expressions=variables,
         | 
| 5894 6921 | 
             
                )
         | 
| 5895 6922 |  |