PyPI - sqlframe - Versions diffs - 1.1.3__py3-none-any.whl - Mend

sqlframe 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

sqlframe/__init__.py +0 -0
sqlframe/_version.py +16 -0
sqlframe/base/__init__.py +0 -0
sqlframe/base/_typing.py +39 -0
sqlframe/base/catalog.py +1163 -0
sqlframe/base/column.py +388 -0
sqlframe/base/dataframe.py +1519 -0
sqlframe/base/decorators.py +51 -0
sqlframe/base/exceptions.py +14 -0
sqlframe/base/function_alternatives.py +1055 -0
sqlframe/base/functions.py +1678 -0
sqlframe/base/group.py +102 -0
sqlframe/base/mixins/__init__.py +0 -0
sqlframe/base/mixins/catalog_mixins.py +419 -0
sqlframe/base/mixins/readwriter_mixins.py +118 -0
sqlframe/base/normalize.py +84 -0
sqlframe/base/operations.py +87 -0
sqlframe/base/readerwriter.py +679 -0
sqlframe/base/session.py +585 -0
sqlframe/base/transforms.py +13 -0
sqlframe/base/types.py +418 -0
sqlframe/base/util.py +242 -0
sqlframe/base/window.py +139 -0
sqlframe/bigquery/__init__.py +23 -0
sqlframe/bigquery/catalog.py +255 -0
sqlframe/bigquery/column.py +1 -0
sqlframe/bigquery/dataframe.py +54 -0
sqlframe/bigquery/functions.py +378 -0
sqlframe/bigquery/group.py +14 -0
sqlframe/bigquery/readwriter.py +29 -0
sqlframe/bigquery/session.py +89 -0
sqlframe/bigquery/types.py +1 -0
sqlframe/bigquery/window.py +1 -0
sqlframe/duckdb/__init__.py +20 -0
sqlframe/duckdb/catalog.py +108 -0
sqlframe/duckdb/column.py +1 -0
sqlframe/duckdb/dataframe.py +55 -0
sqlframe/duckdb/functions.py +47 -0
sqlframe/duckdb/group.py +14 -0
sqlframe/duckdb/readwriter.py +111 -0
sqlframe/duckdb/session.py +65 -0
sqlframe/duckdb/types.py +1 -0
sqlframe/duckdb/window.py +1 -0
sqlframe/postgres/__init__.py +23 -0
sqlframe/postgres/catalog.py +106 -0
sqlframe/postgres/column.py +1 -0
sqlframe/postgres/dataframe.py +54 -0
sqlframe/postgres/functions.py +61 -0
sqlframe/postgres/group.py +14 -0
sqlframe/postgres/readwriter.py +29 -0
sqlframe/postgres/session.py +68 -0
sqlframe/postgres/types.py +1 -0
sqlframe/postgres/window.py +1 -0
sqlframe/redshift/__init__.py +23 -0
sqlframe/redshift/catalog.py +127 -0
sqlframe/redshift/column.py +1 -0
sqlframe/redshift/dataframe.py +54 -0
sqlframe/redshift/functions.py +18 -0
sqlframe/redshift/group.py +14 -0
sqlframe/redshift/readwriter.py +29 -0
sqlframe/redshift/session.py +53 -0
sqlframe/redshift/types.py +1 -0
sqlframe/redshift/window.py +1 -0
sqlframe/snowflake/__init__.py +26 -0
sqlframe/snowflake/catalog.py +134 -0
sqlframe/snowflake/column.py +1 -0
sqlframe/snowflake/dataframe.py +54 -0
sqlframe/snowflake/functions.py +18 -0
sqlframe/snowflake/group.py +14 -0
sqlframe/snowflake/readwriter.py +29 -0
sqlframe/snowflake/session.py +53 -0
sqlframe/snowflake/types.py +1 -0
sqlframe/snowflake/window.py +1 -0
sqlframe/spark/__init__.py +23 -0
sqlframe/spark/catalog.py +1028 -0
sqlframe/spark/column.py +1 -0
sqlframe/spark/dataframe.py +54 -0
sqlframe/spark/functions.py +22 -0
sqlframe/spark/group.py +14 -0
sqlframe/spark/readwriter.py +29 -0
sqlframe/spark/session.py +90 -0
sqlframe/spark/types.py +1 -0
sqlframe/spark/window.py +1 -0
sqlframe/standalone/__init__.py +26 -0
sqlframe/standalone/catalog.py +13 -0
sqlframe/standalone/column.py +1 -0
sqlframe/standalone/dataframe.py +36 -0
sqlframe/standalone/functions.py +1 -0
sqlframe/standalone/group.py +14 -0
sqlframe/standalone/readwriter.py +19 -0
sqlframe/standalone/session.py +40 -0
sqlframe/standalone/types.py +1 -0
sqlframe/standalone/window.py +1 -0
sqlframe-1.1.3.dist-info/LICENSE +21 -0
sqlframe-1.1.3.dist-info/METADATA +172 -0
sqlframe-1.1.3.dist-info/RECORD +98 -0
sqlframe-1.1.3.dist-info/WHEEL +5 -0
sqlframe-1.1.3.dist-info/top_level.txt +1 -0

sqlframe/base/window.py ADDED Viewed

@@ -0,0 +1,139 @@
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
+from __future__ import annotations
+import sys
+import typing as t
+from sqlglot import expressions as exp
+from sqlglot.helper import flatten
+from sqlframe.base import functions as F
+if t.TYPE_CHECKING:
+    from sqlframe.base._typing import ColumnOrName
+class Window:
+    _JAVA_MIN_LONG = -(1 << 63)  # -9223372036854775808
+    _JAVA_MAX_LONG = (1 << 63) - 1  # 9223372036854775807
+    _PRECEDING_THRESHOLD = max(-sys.maxsize, _JAVA_MIN_LONG)
+    _FOLLOWING_THRESHOLD = min(sys.maxsize, _JAVA_MAX_LONG)
+    unboundedPreceding: int = _JAVA_MIN_LONG
+    unboundedFollowing: int = _JAVA_MAX_LONG
+    currentRow: int = 0
+    @classmethod
+    def partitionBy(cls, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+        return WindowSpec().partitionBy(*cols)
+    @classmethod
+    def orderBy(cls, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+        return WindowSpec().orderBy(*cols)
+    @classmethod
+    def rowsBetween(cls, start: int, end: int) -> WindowSpec:
+        return WindowSpec().rowsBetween(start, end)
+    @classmethod
+    def rangeBetween(cls, start: int, end: int) -> WindowSpec:
+        return WindowSpec().rangeBetween(start, end)
+class WindowSpec:
+    def __init__(self, expression: exp.Expression = exp.Window()):
+        self.expression = expression
+    def copy(self):
+        return WindowSpec(self.expression.copy())
+    def sql(self, **kwargs) -> str:
+        from sqlframe.base.session import _BaseSession
+        return self.expression.sql(dialect=_BaseSession().input_dialect, **kwargs)
+    def partitionBy(self, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+        from sqlframe.base.column import Column
+        cols = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols  # type: ignore
+        expressions = [Column.ensure_col(x).expression for x in cols]  # type: ignore
+        window_spec = self.copy()
+        partition_by_expressions = window_spec.expression.args.get("partition_by", [])
+        partition_by_expressions.extend(expressions)
+        window_spec.expression.set("partition_by", partition_by_expressions)
+        return window_spec
+    def orderBy(self, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+        from sqlframe.base.column import Column
+        cols = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols  # type: ignore
+        expressions = [Column.ensure_col(x).expression for x in cols]  # type: ignore
+        window_spec = self.copy()
+        if window_spec.expression.args.get("order") is None:
+            window_spec.expression.set("order", exp.Order(expressions=[]))
+        order_by = window_spec.expression.args["order"].expressions
+        order_by.extend(expressions)
+        window_spec.expression.args["order"].set("expressions", order_by)
+        return window_spec
+    def _calc_start_end(
+        self, start: int, end: int
+    ) -> t.Dict[str, t.Optional[t.Union[str, exp.Expression]]]:
+        kwargs: t.Dict[str, t.Optional[t.Union[str, exp.Expression]]] = {
+            "start_side": None,
+            "end_side": None,
+        }
+        if start == Window.currentRow:
+            kwargs["start"] = "CURRENT ROW"
+        else:
+            kwargs = {
+                **kwargs,
+                **{
+                    "start_side": "PRECEDING",
+                    "start": (
+                        "UNBOUNDED"
+                        if start <= Window.unboundedPreceding
+                        else F.lit(start).expression
+                    ),
+                },
+            }
+        if end == Window.currentRow:
+            kwargs["end"] = "CURRENT ROW"
+        else:
+            kwargs = {
+                **kwargs,
+                **{
+                    "end_side": "FOLLOWING",
+                    "end": (
+                        "UNBOUNDED" if end >= Window.unboundedFollowing else F.lit(end).expression
+                    ),
+                },
+            }
+        return kwargs
+    def rowsBetween(self, start: int, end: int) -> WindowSpec:
+        window_spec = self.copy()
+        spec = self._calc_start_end(start, end)
+        spec["kind"] = "ROWS"
+        window_spec.expression.set(
+            "spec",
+            exp.WindowSpec(
+                **{**window_spec.expression.args.get("spec", exp.WindowSpec()).args, **spec}
+            ),
+        )
+        return window_spec
+    def rangeBetween(self, start: int, end: int) -> WindowSpec:
+        window_spec = self.copy()
+        spec = self._calc_start_end(start, end)
+        spec["kind"] = "RANGE"
+        window_spec.expression.set(
+            "spec",
+            exp.WindowSpec(
+                **{**window_spec.expression.args.get("spec", exp.WindowSpec()).args, **spec}
+            ),
+        )
+        return window_spec

sqlframe/bigquery/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+from sqlframe.bigquery.catalog import BigQueryCatalog
+from sqlframe.bigquery.column import Column
+from sqlframe.bigquery.dataframe import BigQueryDataFrame, BigQueryDataFrameNaFunctions
+from sqlframe.bigquery.group import BigQueryGroupedData
+from sqlframe.bigquery.readwriter import (
+    BigQueryDataFrameReader,
+    BigQueryDataFrameWriter,
+)
+from sqlframe.bigquery.session import BigQuerySession
+from sqlframe.bigquery.window import Window, WindowSpec
+__all__ = [
+    "BigQueryCatalog",
+    "Column",
+    "BigQueryDataFrame",
+    "BigQueryDataFrameNaFunctions",
+    "BigQueryGroupedData",
+    "BigQueryDataFrameReader",
+    "BigQueryDataFrameWriter",
+    "BigQuerySession",
+    "Window",
+    "WindowSpec",
+]

sqlframe/bigquery/catalog.py ADDED Viewed

@@ -0,0 +1,255 @@
+from __future__ import annotations
+import fnmatch
+import typing as t
+from sqlglot import exp
+from sqlframe.base.catalog import CatalogMetadata, Column, Function
+from sqlframe.base.decorators import normalize
+from sqlframe.base.mixins.catalog_mixins import (
+    ListDatabasesFromInfoSchemaMixin,
+    ListTablesFromInfoSchemaMixin,
+    _BaseInfoSchemaMixin,
+)
+from sqlframe.base.util import schema_, to_schema
+if t.TYPE_CHECKING:
+    from google.cloud.bigquery import StandardSqlDataType
+    from sqlframe.bigquery.dataframe import BigQueryDataFrame  # noqa
+    from sqlframe.bigquery.session import BigQuerySession  # noqa
+class BigQueryCatalog(
+    ListDatabasesFromInfoSchemaMixin["BigQuerySession", "BigQueryDataFrame"],
+    ListTablesFromInfoSchemaMixin["BigQuerySession", "BigQueryDataFrame"],
+    _BaseInfoSchemaMixin["BigQuerySession", "BigQueryDataFrame"],
+):
+    QUALIFY_INFO_SCHEMA_WITH_DATABASE = True
+    UPPERCASE_INFO_SCHEMA = True
+    def setCurrentCatalog(self, catalogName: str) -> None:
+        self.session.default_project = catalogName
+    def currentCatalog(self) -> str:
+        return self.session.default_project
+    def setCurrentDatabase(self, dbName: str) -> None:
+        self.session.default_dataset = dbName
+    def currentDatabase(self) -> str:
+        if not self.session.default_dataset:
+            raise ValueError(
+                "No default dataset set. Define `default_dataset` when creating `BigQuerySession`."
+            )
+        return to_schema(self.session.default_dataset).db
+    @normalize(["tableName", "dbName"])
+    def listColumns(self, tableName: str, dbName: t.Optional[str] = None) -> t.List[Column]:
+        """Returns a t.List of columns for the given table/view in the specified database.
+        .. versionadded:: 2.0.0
+        Parameters
+        ----------
+        tableName : str
+            name of the table to t.List columns.
+            .. versionchanged:: 3.4.0
+               Allow ``tableName`` to be qualified with catalog name when ``dbName`` is None.
+        dbName : str, t.Optional
+            name of the database to find the table to t.List columns.
+        Returns
+        -------
+        t.List
+            A t.List of :class:`Column`.
+        Notes
+        -----
+        The order of arguments here is different from that of its JVM counterpart
+        because Python does not support method overloading.
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary views.
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet")
+        >>> spark.catalog.t.listColumns("tblA")
+        [Column(name='name', description=None, dataType='string', nullable=True, ...
+        >>> _ = spark.sql("DROP TABLE tblA")
+        """
+        # Source: https://github.com/TobikoData/sqlmesh/blob/4bf5e7aa9302e877273812842eba0b457e28af9e/sqlmesh/core/engine_adapter/bigquery.py#L186-L205
+        def dtype_to_sql(dtype: t.Optional[StandardSqlDataType]) -> str:
+            assert dtype
+            kind = dtype.type_kind
+            assert kind
+            # Not using the enum value to preserve compatibility with older versions
+            # of the BigQuery library.
+            if kind.name == "ARRAY":
+                return f"ARRAY<{dtype_to_sql(dtype.array_element_type)}>"
+            if kind.name == "STRUCT":
+                struct_type = dtype.struct_type
+                assert struct_type
+                fields = ", ".join(
+                    f"{field.name} {dtype_to_sql(field.type)}" for field in struct_type.fields
+                )
+                return f"STRUCT<{fields}>"
+            if kind.name == "TYPE_KIND_UNSPECIFIED":
+                return "JSON"
+            return kind.name
+        if df := self.session.temp_views.get(tableName):
+            return [
+                Column(
+                    name=x,
+                    description=None,
+                    dataType="",
+                    nullable=True,
+                    isPartition=False,
+                    isBucket=False,
+                )
+                for x in df.columns
+            ]
+        table = exp.to_table(tableName, dialect=self.session.input_dialect)
+        schema = to_schema(dbName, dialect=self.session.input_dialect) if dbName else None
+        if not table.db:
+            if schema and schema.db:
+                table.set("db", schema.args["db"])
+            else:
+                table.set(
+                    "db",
+                    exp.parse_identifier(
+                        self.currentDatabase(), dialect=self.session.input_dialect
+                    ),
+                )
+        if not table.catalog:
+            if schema and schema.catalog:
+                table.set("catalog", schema.args["catalog"])
+            else:
+                table.set(
+                    "catalog",
+                    exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
+                )
+        bq_table = self.session._client.get_table(table=".".join(part.name for part in table.parts))
+        columns = [
+            Column(
+                name=field.name,
+                description=field.description,
+                dataType=exp.DataType.build(
+                    dtype_to_sql(field.to_standard_sql().type), dialect=self.session.input_dialect
+                ).sql(dialect=self.session.input_dialect),
+                nullable=field.is_nullable,
+                isPartition=False,
+                isBucket=False,
+            )
+            for field in bq_table.schema
+        ]
+        if bq_table.time_partitioning and not bq_table.time_partitioning.field:
+            columns.append(
+                Column(
+                    name="_PARTITIONTIME",
+                    description=None,
+                    dataType=exp.DataType.build("TIMESTAMP").sql(
+                        dialect=self.session.input_dialect
+                    ),
+                    nullable=False,
+                    isPartition=True,
+                    isBucket=False,
+                )
+            )
+            if bq_table.time_partitioning.type_ == "DAY":
+                columns.append(
+                    Column(
+                        name="_PARTITIONDATE",
+                        description=None,
+                        dataType=exp.DataType.build("DATE").sql(dialect=self.session.input_dialect),
+                        nullable=False,
+                        isPartition=True,
+                        isBucket=False,
+                    )
+                )
+        return columns
+    def listCatalogs(self, pattern: t.Optional[str] = None) -> t.List[CatalogMetadata]:
+        return [CatalogMetadata(name=self.session.default_project, description=None)]
+    def listFunctions(
+        self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
+    ) -> t.List[Function]:
+        """
+        Returns a t.List of functions registered in the specified database.
+        .. versionadded:: 3.4.0
+        Parameters
+        ----------
+        dbName : str
+            name of the database to t.List the functions.
+            ``dbName`` can be qualified with catalog name.
+        pattern : str
+            The pattern that the function name needs to match.
+            .. versionchanged: 3.5.0
+                Adds ``pattern`` argument.
+        Returns
+        -------
+        t.List
+            A t.List of :class:`Function`.
+        Notes
+        -----
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary functions.
+        Examples
+        --------
+        >>> spark.catalog.t.listFunctions()
+        [Function(name=...
+        >>> spark.catalog.t.listFunctions(pattern="to_*")
+        [Function(name=...
+        >>> spark.catalog.t.listFunctions(pattern="*not_existing_func*")
+        []
+        """
+        if not dbName:
+            schema = schema_(
+                db=exp.parse_identifier(self.currentDatabase(), dialect=self.session.input_dialect),
+                catalog=exp.parse_identifier(
+                    self.currentCatalog(), dialect=self.session.input_dialect
+                ),
+            )
+        else:
+            schema = to_schema(dbName, dialect=self.session.input_dialect)
+        table = self._get_info_schema_table("routines", database=schema.db)
+        select = (
+            exp.select("routine_name", "specific_schema", "specific_catalog")
+            .from_(table)
+            .where(exp.column("specific_schema").eq(schema.db))
+        )
+        if schema.catalog:
+            select = select.where(exp.column("specific_catalog").eq(schema.catalog))
+        functions = self.session._fetch_rows(select)
+        if pattern:
+            functions = [x for x in functions if fnmatch.fnmatch(x["routine_name"], pattern)]
+        return [
+            Function(
+                name=x["routine_name"],
+                catalog=x["specific_catalog"],
+                namespace=[x["specific_schema"]],
+                description=None,
+                className="",
+                isTemporary=False,
+            )
+            for x in functions
+        ]

sqlframe/bigquery/column.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from sqlframe.base.column import Column

sqlframe/bigquery/dataframe.py ADDED Viewed

@@ -0,0 +1,54 @@
+from __future__ import annotations
+import logging
+import sys
+import typing as t
+from sqlframe.base.dataframe import (
+    _BaseDataFrame,
+    _BaseDataFrameNaFunctions,
+    _BaseDataFrameStatFunctions,
+)
+from sqlframe.bigquery.group import BigQueryGroupedData
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if t.TYPE_CHECKING:
+    from sqlframe.bigquery.readwriter import BigQueryDataFrameWriter
+    from sqlframe.bigquery.session import BigQuerySession
+logger = logging.getLogger(__name__)
+class BigQueryDataFrameNaFunctions(_BaseDataFrameNaFunctions["BigQueryDataFrame"]):
+    pass
+class BigQueryDataFrameStatFunctions(_BaseDataFrameStatFunctions["BigQueryDataFrame"]):
+    pass
+class BigQueryDataFrame(
+    _BaseDataFrame[
+        "BigQuerySession",
+        "BigQueryDataFrameWriter",
+        "BigQueryDataFrameNaFunctions",
+        "BigQueryDataFrameStatFunctions",
+        "BigQueryGroupedData",
+    ]
+):
+    _na = BigQueryDataFrameNaFunctions
+    _stat = BigQueryDataFrameStatFunctions
+    _group_data = BigQueryGroupedData
+    def cache(self) -> Self:
+        logger.warning("BigQuery does not support caching. Ignoring cache() call.")
+        return self
+    def persist(self) -> Self:
+        logger.warning("BigQuery does not support persist. Ignoring persist() call.")
+        return self