sqlframe 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.10.0'
16
- __version_tuple__ = version_tuple = (1, 10, 0)
15
+ __version__ = version = '1.11.0'
16
+ __version_tuple__ = version_tuple = (1, 11, 0)
@@ -22,6 +22,7 @@ from sqlglot.optimizer.pushdown_projections import pushdown_projections
22
22
  from sqlglot.optimizer.qualify import qualify
23
23
  from sqlglot.optimizer.qualify_columns import quote_identifiers
24
24
 
25
+ from sqlframe.base.catalog import Column as CatalogColumn
25
26
  from sqlframe.base.decorators import normalize
26
27
  from sqlframe.base.operations import Operation, operation
27
28
  from sqlframe.base.transforms import replace_id_value
@@ -29,6 +30,7 @@ from sqlframe.base.util import (
29
30
  get_func_from_session,
30
31
  get_tables_from_expression_with_join,
31
32
  quote_preserving_alias_or_name,
33
+ sqlglot_to_spark,
32
34
  verify_openai_installed,
33
35
  )
34
36
 
@@ -231,6 +233,10 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
231
233
  def __copy__(self):
232
234
  return self.copy()
233
235
 
236
+ @property
237
+ def _typed_columns(self) -> t.List[CatalogColumn]:
238
+ raise NotImplementedError
239
+
234
240
  @property
235
241
  def write(self) -> WRITER:
236
242
  return self.session._writer(self)
@@ -293,7 +299,24 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
293
299
  StructType([StructField('age', LongType(), True),
294
300
  StructField('name', StringType(), True)])
295
301
  """
296
- raise NotImplementedError
302
+ from sqlframe.base import types
303
+
304
+ try:
305
+ return types.StructType(
306
+ [
307
+ types.StructField(
308
+ c.name,
309
+ sqlglot_to_spark(
310
+ exp.DataType.build(c.dataType, dialect=self.session.output_dialect)
311
+ ),
312
+ )
313
+ for c in self._typed_columns
314
+ ]
315
+ )
316
+ except NotImplementedError as e:
317
+ raise NotImplementedError(
318
+ "This engine does not support schema inference likely since it does not have an active connection."
319
+ ) from e
297
320
 
298
321
  def _replace_cte_names_with_hashes(self, expression: exp.Select):
299
322
  replacement_mapping = {}
@@ -1537,6 +1560,36 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1537
1560
  table.add_row(list(row))
1538
1561
  print(table)
1539
1562
 
1563
+ def printSchema(self, level: t.Optional[int] = None) -> None:
1564
+ def print_schema(
1565
+ column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
1566
+ ):
1567
+ if level and current_level >= level:
1568
+ return
1569
+ if current_level > 0:
1570
+ print(" | " * current_level, end="")
1571
+ print(
1572
+ f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
1573
+ )
1574
+ if column_type.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
1575
+ for column_def in column_type.expressions:
1576
+ print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
1577
+ if column_type.this == exp.DataType.Type.ARRAY:
1578
+ for data_type in column_type.expressions:
1579
+ print_schema("element", data_type, True, current_level + 1)
1580
+ if column_type.this == exp.DataType.Type.MAP:
1581
+ print_schema("key", column_type.expressions[0], True, current_level + 1)
1582
+ print_schema("value", column_type.expressions[1], True, current_level + 1)
1583
+
1584
+ print("root")
1585
+ for column in self._typed_columns:
1586
+ print_schema(
1587
+ column.name,
1588
+ exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
1589
+ column.nullable,
1590
+ 0,
1591
+ )
1592
+
1540
1593
  def toPandas(self) -> pd.DataFrame:
1541
1594
  sql_kwargs = dict(
1542
1595
  pretty=False, optimize=False, dialect=self.session.output_dialect, as_list=True
@@ -12,3 +12,15 @@ class RowError(SQLFrameException):
12
12
 
13
13
  class TableSchemaError(SQLFrameException):
14
14
  pass
15
+
16
+
17
+ class PandasDiffError(SQLFrameException):
18
+ pass
19
+
20
+
21
+ class DataFrameDiffError(SQLFrameException):
22
+ pass
23
+
24
+
25
+ class SchemaDiffError(SQLFrameException):
26
+ pass
@@ -1,3 +1,5 @@
1
+ import logging
2
+ import sys
1
3
  import typing as t
2
4
 
3
5
  from sqlglot import exp
@@ -12,11 +14,30 @@ from sqlframe.base.dataframe import (
12
14
  _BaseDataFrame,
13
15
  )
14
16
 
17
+ if sys.version_info >= (3, 11):
18
+ from typing import Self
19
+ else:
20
+ from typing_extensions import Self
15
21
 
16
- class PrintSchemaFromTempObjectsMixin(
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class NoCachePersistSupportMixin(_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
27
+ def cache(self) -> Self:
28
+ logger.warning("This engine does not support caching. Ignoring cache() call.")
29
+ return self
30
+
31
+ def persist(self) -> Self:
32
+ logger.warning("This engine does not support persist. Ignoring persist() call.")
33
+ return self
34
+
35
+
36
+ class TypedColumnsFromTempViewMixin(
17
37
  _BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
18
38
  ):
19
- def _get_columns_from_temp_object(self) -> t.List[Column]:
39
+ @property
40
+ def _typed_columns(self) -> t.List[Column]:
20
41
  table = exp.to_table(self.session._random_id)
21
42
  self.session._execute(
22
43
  exp.Create(
@@ -27,37 +48,7 @@ class PrintSchemaFromTempObjectsMixin(
27
48
  expression=self.expression,
28
49
  )
29
50
  )
51
+
30
52
  return self.session.catalog.listColumns(
31
53
  table.sql(dialect=self.session.input_dialect), include_temp=True
32
54
  )
33
-
34
- def printSchema(self, level: t.Optional[int] = None) -> None:
35
- def print_schema(
36
- column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
37
- ):
38
- if level and current_level >= level:
39
- return
40
- if current_level > 0:
41
- print(" | " * current_level, end="")
42
- print(
43
- f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
44
- )
45
- if column_type.this == exp.DataType.Type.STRUCT:
46
- for column_def in column_type.expressions:
47
- print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
48
- if column_type.this == exp.DataType.Type.ARRAY:
49
- for data_type in column_type.expressions:
50
- print_schema("element", data_type, True, current_level + 1)
51
- if column_type.this == exp.DataType.Type.MAP:
52
- print_schema("key", column_type.expressions[0], True, current_level + 1)
53
- print_schema("value", column_type.expressions[1], True, current_level + 1)
54
-
55
- columns = self._get_columns_from_temp_object()
56
- print("root")
57
- for column in columns:
58
- print_schema(
59
- column.name,
60
- exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
61
- column.nullable,
62
- 0,
63
- )
sqlframe/base/types.py CHANGED
@@ -22,14 +22,14 @@ class DataType:
22
22
  return not self.__eq__(other)
23
23
 
24
24
  def __str__(self) -> str:
25
- return self.typeName()
25
+ return self.simpleString()
26
26
 
27
27
  @classmethod
28
28
  def typeName(cls) -> str:
29
29
  return cls.__name__[:-4].lower()
30
30
 
31
31
  def simpleString(self) -> str:
32
- return str(self)
32
+ return self.typeName()
33
33
 
34
34
  def jsonValue(self) -> t.Union[str, t.Dict[str, t.Any]]:
35
35
  return str(self)
sqlframe/base/util.py CHANGED
@@ -291,3 +291,54 @@ def quote_preserving_alias_or_name(col: t.Union[exp.Column, exp.Alias]) -> str:
291
291
  return col.sql(dialect=_BaseSession().input_dialect)
292
292
  # We may get things like `Null()` expression or maybe literals so we just return the alias or name in those cases
293
293
  return col.alias_or_name
294
+
295
+
296
+ def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
297
+ from sqlframe.base import types
298
+
299
+ primitive_mapping = {
300
+ exp.DataType.Type.VARCHAR: types.VarcharType,
301
+ exp.DataType.Type.CHAR: types.CharType,
302
+ exp.DataType.Type.TEXT: types.StringType,
303
+ exp.DataType.Type.BINARY: types.BinaryType,
304
+ exp.DataType.Type.BOOLEAN: types.BooleanType,
305
+ exp.DataType.Type.INT: types.IntegerType,
306
+ exp.DataType.Type.BIGINT: types.LongType,
307
+ exp.DataType.Type.SMALLINT: types.ShortType,
308
+ exp.DataType.Type.FLOAT: types.FloatType,
309
+ exp.DataType.Type.DOUBLE: types.DoubleType,
310
+ exp.DataType.Type.DECIMAL: types.DecimalType,
311
+ exp.DataType.Type.TIMESTAMP: types.TimestampType,
312
+ exp.DataType.Type.TIMESTAMPTZ: types.TimestampType,
313
+ exp.DataType.Type.TIMESTAMPLTZ: types.TimestampType,
314
+ exp.DataType.Type.TIMESTAMPNTZ: types.TimestampType,
315
+ exp.DataType.Type.DATE: types.DateType,
316
+ }
317
+ if sqlglot_dtype.this in primitive_mapping:
318
+ pyspark_class = primitive_mapping[sqlglot_dtype.this]
319
+ if issubclass(pyspark_class, types.DataTypeWithLength) and sqlglot_dtype.expressions:
320
+ return pyspark_class(length=int(sqlglot_dtype.expressions[0].this.this))
321
+ elif issubclass(pyspark_class, types.DecimalType) and sqlglot_dtype.expressions:
322
+ return pyspark_class(
323
+ precision=int(sqlglot_dtype.expressions[0].this.this),
324
+ scale=int(sqlglot_dtype.expressions[1].this.this),
325
+ )
326
+ return pyspark_class()
327
+ if sqlglot_dtype.this == exp.DataType.Type.ARRAY:
328
+ return types.ArrayType(sqlglot_to_spark(sqlglot_dtype.expressions[0]))
329
+ elif sqlglot_dtype.this == exp.DataType.Type.MAP:
330
+ return types.MapType(
331
+ sqlglot_to_spark(sqlglot_dtype.expressions[0]),
332
+ sqlglot_to_spark(sqlglot_dtype.expressions[1]),
333
+ )
334
+ elif sqlglot_dtype.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
335
+ return types.StructType(
336
+ [
337
+ types.StructField(
338
+ name=field.this.alias_or_name,
339
+ dataType=sqlglot_to_spark(field.args["kind"]),
340
+ )
341
+ for field in sqlglot_dtype.expressions
342
+ ]
343
+ )
344
+ raise NotImplementedError(f"Unsupported data type: {sqlglot_dtype}")
@@ -1,21 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import sys
5
4
  import typing as t
6
5
 
6
+ from sqlframe.base.catalog import Column as CatalogColumn
7
7
  from sqlframe.base.dataframe import (
8
8
  _BaseDataFrame,
9
9
  _BaseDataFrameNaFunctions,
10
10
  _BaseDataFrameStatFunctions,
11
11
  )
12
+ from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
12
13
  from sqlframe.bigquery.group import BigQueryGroupedData
13
14
 
14
- if sys.version_info >= (3, 11):
15
- from typing import Self
16
- else:
17
- from typing_extensions import Self
18
-
19
15
  if t.TYPE_CHECKING:
20
16
  from sqlframe.bigquery.readwriter import BigQueryDataFrameWriter
21
17
  from sqlframe.bigquery.session import BigQuerySession
@@ -33,22 +29,46 @@ class BigQueryDataFrameStatFunctions(_BaseDataFrameStatFunctions["BigQueryDataFr
33
29
 
34
30
 
35
31
  class BigQueryDataFrame(
32
+ NoCachePersistSupportMixin,
36
33
  _BaseDataFrame[
37
34
  "BigQuerySession",
38
35
  "BigQueryDataFrameWriter",
39
36
  "BigQueryDataFrameNaFunctions",
40
37
  "BigQueryDataFrameStatFunctions",
41
38
  "BigQueryGroupedData",
42
- ]
39
+ ],
43
40
  ):
44
41
  _na = BigQueryDataFrameNaFunctions
45
42
  _stat = BigQueryDataFrameStatFunctions
46
43
  _group_data = BigQueryGroupedData
47
44
 
48
- def cache(self) -> Self:
49
- logger.warning("BigQuery does not support caching. Ignoring cache() call.")
50
- return self
45
+ @property
46
+ def _typed_columns(self) -> t.List[CatalogColumn]:
47
+ from google.cloud import bigquery
48
+
49
+ def field_to_column(field: bigquery.SchemaField) -> CatalogColumn:
50
+ if field.field_type == "RECORD":
51
+ data_type = "STRUCT<"
52
+ for subfield in field.fields:
53
+ column = field_to_column(subfield)
54
+ data_type += f"{column.name} {column.dataType},"
55
+ data_type += ">"
56
+ elif field.field_type == "INTEGER":
57
+ data_type = "INT64"
58
+ else:
59
+ data_type = field.field_type
60
+ if field.mode == "REPEATED":
61
+ data_type = f"ARRAY<{data_type}>"
62
+ return CatalogColumn(
63
+ name=field.name,
64
+ dataType=data_type,
65
+ nullable=field.is_nullable,
66
+ description=None,
67
+ isPartition=False,
68
+ isBucket=False,
69
+ )
51
70
 
52
- def persist(self) -> Self:
53
- logger.warning("BigQuery does not support persist. Ignoring persist() call.")
54
- return self
71
+ job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
72
+ sql = self.session._to_sql(self.expression)
73
+ query_job = self.session._client.query(sql, job_config=job_config)
74
+ return [field_to_column(field) for field in query_job.schema]
@@ -9,14 +9,12 @@ from sqlframe.base.dataframe import (
9
9
  _BaseDataFrameNaFunctions,
10
10
  _BaseDataFrameStatFunctions,
11
11
  )
12
- from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
12
+ from sqlframe.base.mixins.dataframe_mixins import (
13
+ NoCachePersistSupportMixin,
14
+ TypedColumnsFromTempViewMixin,
15
+ )
13
16
  from sqlframe.duckdb.group import DuckDBGroupedData
14
17
 
15
- if sys.version_info >= (3, 11):
16
- from typing import Self
17
- else:
18
- from typing_extensions import Self
19
-
20
18
  if t.TYPE_CHECKING:
21
19
  from sqlframe.duckdb.session import DuckDBSession # noqa
22
20
  from sqlframe.duckdb.readwriter import DuckDBDataFrameWriter # noqa
@@ -35,7 +33,8 @@ class DuckDBDataFrameStatFunctions(_BaseDataFrameStatFunctions["DuckDBDataFrame"
35
33
 
36
34
 
37
35
  class DuckDBDataFrame(
38
- PrintSchemaFromTempObjectsMixin,
36
+ NoCachePersistSupportMixin,
37
+ TypedColumnsFromTempViewMixin,
39
38
  _BaseDataFrame[
40
39
  "DuckDBSession",
41
40
  "DuckDBDataFrameWriter",
@@ -47,11 +46,3 @@ class DuckDBDataFrame(
47
46
  _na = DuckDBDataFrameNaFunctions
48
47
  _stat = DuckDBDataFrameStatFunctions
49
48
  _group_data = DuckDBGroupedData
50
-
51
- def cache(self) -> Self:
52
- logger.warning("DuckDB does not support caching. Ignoring cache() call.")
53
- return self
54
-
55
- def persist(self) -> Self:
56
- logger.warning("DuckDB does not support persist. Ignoring persist() call.")
57
- return self
@@ -7,16 +7,17 @@ import typing as t
7
7
 
8
8
  from sqlglot import exp, parse_one
9
9
 
10
- from sqlframe.base.catalog import Function, _BaseCatalog
10
+ from sqlframe.base.catalog import Column, Function, _BaseCatalog
11
+ from sqlframe.base.decorators import normalize
11
12
  from sqlframe.base.mixins.catalog_mixins import (
12
13
  GetCurrentCatalogFromFunctionMixin,
13
14
  GetCurrentDatabaseFromFunctionMixin,
14
15
  ListCatalogsFromInfoSchemaMixin,
15
- ListColumnsFromInfoSchemaMixin,
16
16
  ListDatabasesFromInfoSchemaMixin,
17
17
  ListTablesFromInfoSchemaMixin,
18
18
  SetCurrentDatabaseFromSearchPathMixin,
19
19
  )
20
+ from sqlframe.base.util import to_schema
20
21
 
21
22
  if t.TYPE_CHECKING:
22
23
  from sqlframe.postgres.session import PostgresSession # noqa
@@ -30,12 +31,131 @@ class PostgresCatalog(
30
31
  ListCatalogsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
31
32
  SetCurrentDatabaseFromSearchPathMixin["PostgresSession", "PostgresDataFrame"],
32
33
  ListTablesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
33
- ListColumnsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
34
34
  _BaseCatalog["PostgresSession", "PostgresDataFrame"],
35
35
  ):
36
36
  CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
37
37
  TEMP_SCHEMA_FILTER = exp.column("table_schema").like("pg_temp_%")
38
38
 
39
+ @normalize(["tableName", "dbName"])
40
+ def listColumns(
41
+ self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
42
+ ) -> t.List[Column]:
43
+ """Returns a t.List of columns for the given table/view in the specified database.
44
+
45
+ .. versionadded:: 2.0.0
46
+
47
+ Parameters
48
+ ----------
49
+ tableName : str
50
+ name of the table to t.List columns.
51
+
52
+ .. versionchanged:: 3.4.0
53
+ Allow ``tableName`` to be qualified with catalog name when ``dbName`` is None.
54
+
55
+ dbName : str, t.Optional
56
+ name of the database to find the table to t.List columns.
57
+
58
+ Returns
59
+ -------
60
+ t.List
61
+ A t.List of :class:`Column`.
62
+
63
+ Notes
64
+ -----
65
+ The order of arguments here is different from that of its JVM counterpart
66
+ because Python does not support method overloading.
67
+
68
+ If no database is specified, the current database and catalog
69
+ are used. This API includes all temporary views.
70
+
71
+ Examples
72
+ --------
73
+ >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
74
+ >>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet")
75
+ >>> spark.catalog.t.listColumns("tblA")
76
+ [Column(name='name', description=None, dataType='string', nullable=True, ...
77
+ >>> _ = spark.sql("DROP TABLE tblA")
78
+ """
79
+ if df := self.session.temp_views.get(tableName):
80
+ return [
81
+ Column(
82
+ name=x,
83
+ description=None,
84
+ dataType="",
85
+ nullable=True,
86
+ isPartition=False,
87
+ isBucket=False,
88
+ )
89
+ for x in df.columns
90
+ ]
91
+
92
+ table = exp.to_table(tableName, dialect=self.session.input_dialect)
93
+ schema = to_schema(dbName, dialect=self.session.input_dialect) if dbName else None
94
+ if not table.db:
95
+ if schema and schema.db:
96
+ table.set("db", schema.args["db"])
97
+ else:
98
+ table.set(
99
+ "db",
100
+ exp.parse_identifier(
101
+ self.currentDatabase(), dialect=self.session.input_dialect
102
+ ),
103
+ )
104
+ if not table.catalog:
105
+ if schema and schema.catalog:
106
+ table.set("catalog", schema.args["catalog"])
107
+ else:
108
+ table.set(
109
+ "catalog",
110
+ exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
111
+ )
112
+ source_table = self._get_info_schema_table("columns", database=table.db)
113
+ select = parse_one(
114
+ f"""
115
+ SELECT
116
+ att.attname AS column_name,
117
+ pg_catalog.format_type(att.atttypid, NULL) AS data_type,
118
+ col.is_nullable
119
+ FROM
120
+ pg_catalog.pg_attribute att
121
+ JOIN
122
+ pg_catalog.pg_class cls ON cls.oid = att.attrelid
123
+ JOIN
124
+ pg_catalog.pg_namespace nsp ON nsp.oid = cls.relnamespace
125
+ JOIN
126
+ information_schema.columns col ON col.table_schema = nsp.nspname AND col.table_name = cls.relname AND col.column_name = att.attname
127
+ WHERE
128
+ cls.relname = '{table.name}' AND -- replace with your table name
129
+ att.attnum > 0 AND
130
+ NOT att.attisdropped
131
+ ORDER BY
132
+ att.attnum;
133
+ """,
134
+ dialect="postgres",
135
+ )
136
+ if table.db:
137
+ schema_filter: exp.Expression = exp.column("table_schema").eq(table.db)
138
+ if include_temp and self.TEMP_SCHEMA_FILTER:
139
+ schema_filter = exp.Or(this=schema_filter, expression=self.TEMP_SCHEMA_FILTER)
140
+ select = select.where(schema_filter) # type: ignore
141
+ if table.catalog:
142
+ catalog_filter: exp.Expression = exp.column("table_catalog").eq(table.catalog)
143
+ if include_temp and self.TEMP_CATALOG_FILTER:
144
+ catalog_filter = exp.Or(this=catalog_filter, expression=self.TEMP_CATALOG_FILTER)
145
+ select = select.where(catalog_filter) # type: ignore
146
+ results = self.session._fetch_rows(select)
147
+ return [
148
+ Column(
149
+ name=x["column_name"],
150
+ description=None,
151
+ dataType=x["data_type"],
152
+ nullable=x["is_nullable"] == "YES",
153
+ isPartition=False,
154
+ isBucket=False,
155
+ )
156
+ for x in results
157
+ ]
158
+
39
159
  def listFunctions(
40
160
  self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
41
161
  ) -> t.List[Function]:
@@ -9,7 +9,10 @@ from sqlframe.base.dataframe import (
9
9
  _BaseDataFrameNaFunctions,
10
10
  _BaseDataFrameStatFunctions,
11
11
  )
12
- from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
12
+ from sqlframe.base.mixins.dataframe_mixins import (
13
+ NoCachePersistSupportMixin,
14
+ TypedColumnsFromTempViewMixin,
15
+ )
13
16
  from sqlframe.postgres.group import PostgresGroupedData
14
17
 
15
18
  if sys.version_info >= (3, 11):
@@ -34,7 +37,8 @@ class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFr
34
37
 
35
38
 
36
39
  class PostgresDataFrame(
37
- PrintSchemaFromTempObjectsMixin,
40
+ NoCachePersistSupportMixin,
41
+ TypedColumnsFromTempViewMixin,
38
42
  _BaseDataFrame[
39
43
  "PostgresSession",
40
44
  "PostgresDataFrameWriter",
@@ -46,11 +50,3 @@ class PostgresDataFrame(
46
50
  _na = PostgresDataFrameNaFunctions
47
51
  _stat = PostgresDataFrameStatFunctions
48
52
  _group_data = PostgresGroupedData
49
-
50
- def cache(self) -> Self:
51
- logger.warning("Postgres does not support caching. Ignoring cache() call.")
52
- return self
53
-
54
- def persist(self) -> Self:
55
- logger.warning("Postgres does not support persist. Ignoring persist() call.")
56
- return self
@@ -9,13 +9,9 @@ from sqlframe.base.dataframe import (
9
9
  _BaseDataFrameNaFunctions,
10
10
  _BaseDataFrameStatFunctions,
11
11
  )
12
+ from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
12
13
  from sqlframe.redshift.group import RedshiftGroupedData
13
14
 
14
- if sys.version_info >= (3, 11):
15
- from typing import Self
16
- else:
17
- from typing_extensions import Self
18
-
19
15
  if t.TYPE_CHECKING:
20
16
  from sqlframe.redshift.readwriter import RedshiftDataFrameWriter
21
17
  from sqlframe.redshift.session import RedshiftSession
@@ -33,22 +29,15 @@ class RedshiftDataFrameStatFunctions(_BaseDataFrameStatFunctions["RedshiftDataFr
33
29
 
34
30
 
35
31
  class RedshiftDataFrame(
32
+ NoCachePersistSupportMixin,
36
33
  _BaseDataFrame[
37
34
  "RedshiftSession",
38
35
  "RedshiftDataFrameWriter",
39
36
  "RedshiftDataFrameNaFunctions",
40
37
  "RedshiftDataFrameStatFunctions",
41
38
  "RedshiftGroupedData",
42
- ]
39
+ ],
43
40
  ):
44
41
  _na = RedshiftDataFrameNaFunctions
45
42
  _stat = RedshiftDataFrameStatFunctions
46
43
  _group_data = RedshiftGroupedData
47
-
48
- def cache(self) -> Self:
49
- logger.warning("Redshift does not support caching. Ignoring cache() call.")
50
- return self
51
-
52
- def persist(self) -> Self:
53
- logger.warning("Redshift does not support persist. Ignoring persist() call.")
54
- return self
@@ -4,18 +4,15 @@ import logging
4
4
  import sys
5
5
  import typing as t
6
6
 
7
+ from sqlframe.base.catalog import Column as CatalogColumn
7
8
  from sqlframe.base.dataframe import (
8
9
  _BaseDataFrame,
9
10
  _BaseDataFrameNaFunctions,
10
11
  _BaseDataFrameStatFunctions,
11
12
  )
13
+ from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
12
14
  from sqlframe.snowflake.group import SnowflakeGroupedData
13
15
 
14
- if sys.version_info >= (3, 11):
15
- from typing import Self
16
- else:
17
- from typing_extensions import Self
18
-
19
16
  if t.TYPE_CHECKING:
20
17
  from sqlframe.snowflake.readwriter import SnowflakeDataFrameWriter
21
18
  from sqlframe.snowflake.session import SnowflakeSession
@@ -33,22 +30,35 @@ class SnowflakeDataFrameStatFunctions(_BaseDataFrameStatFunctions["SnowflakeData
33
30
 
34
31
 
35
32
  class SnowflakeDataFrame(
33
+ NoCachePersistSupportMixin,
36
34
  _BaseDataFrame[
37
35
  "SnowflakeSession",
38
36
  "SnowflakeDataFrameWriter",
39
37
  "SnowflakeDataFrameNaFunctions",
40
38
  "SnowflakeDataFrameStatFunctions",
41
39
  "SnowflakeGroupedData",
42
- ]
40
+ ],
43
41
  ):
44
42
  _na = SnowflakeDataFrameNaFunctions
45
43
  _stat = SnowflakeDataFrameStatFunctions
46
44
  _group_data = SnowflakeGroupedData
47
45
 
48
- def cache(self) -> Self:
49
- logger.warning("Snowflake does not support caching. Ignoring cache() call.")
50
- return self
51
-
52
- def persist(self) -> Self:
53
- logger.warning("Snowflake does not support persist. Ignoring persist() call.")
54
- return self
46
+ @property
47
+ def _typed_columns(self) -> t.List[CatalogColumn]:
48
+ df = self._convert_leaf_to_cte()
49
+ df = df.limit(0)
50
+ self.session._execute(df.expression)
51
+ query_id = self.session._cur.sfqid
52
+ columns = []
53
+ for row in self.session._fetch_rows(f"DESCRIBE RESULT '{query_id}'"):
54
+ columns.append(
55
+ CatalogColumn(
56
+ name=row.name,
57
+ dataType=row.type,
58
+ nullable=row["null?"] == "Y",
59
+ description=row.comment,
60
+ isPartition=False,
61
+ isBucket=False,
62
+ )
63
+ )
64
+ return columns
@@ -1,26 +1,23 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import sys
5
4
  import typing as t
6
5
 
6
+ from sqlglot import exp
7
+
8
+ from sqlframe.base.catalog import Column
7
9
  from sqlframe.base.dataframe import (
8
10
  _BaseDataFrame,
9
11
  _BaseDataFrameNaFunctions,
10
12
  _BaseDataFrameStatFunctions,
11
13
  )
14
+ from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
12
15
  from sqlframe.spark.group import SparkGroupedData
13
16
 
14
- if sys.version_info >= (3, 11):
15
- from typing import Self
16
- else:
17
- from typing_extensions import Self
18
-
19
17
  if t.TYPE_CHECKING:
20
18
  from sqlframe.spark.readwriter import SparkDataFrameWriter
21
19
  from sqlframe.spark.session import SparkSession
22
20
 
23
-
24
21
  logger = logging.getLogger(__name__)
25
22
 
26
23
 
@@ -33,22 +30,35 @@ class SparkDataFrameStatFunctions(_BaseDataFrameStatFunctions["SparkDataFrame"])
33
30
 
34
31
 
35
32
  class SparkDataFrame(
33
+ NoCachePersistSupportMixin,
36
34
  _BaseDataFrame[
37
35
  "SparkSession",
38
36
  "SparkDataFrameWriter",
39
37
  "SparkDataFrameNaFunctions",
40
38
  "SparkDataFrameStatFunctions",
41
39
  "SparkGroupedData",
42
- ]
40
+ ],
43
41
  ):
44
42
  _na = SparkDataFrameNaFunctions
45
43
  _stat = SparkDataFrameStatFunctions
46
44
  _group_data = SparkGroupedData
47
45
 
48
- def cache(self) -> Self:
49
- logger.warning("Spark does not support caching. Ignoring cache() call.")
50
- return self
51
-
52
- def persist(self) -> Self:
53
- logger.warning("Spark does not support persist. Ignoring persist() call.")
54
- return self
46
+ @property
47
+ def _typed_columns(self) -> t.List[Column]:
48
+ columns = []
49
+ for field in self.session.spark_session.sql(
50
+ self.session._to_sql(self.expression)
51
+ ).schema.fields:
52
+ columns.append(
53
+ Column(
54
+ name=field.name,
55
+ dataType=exp.DataType.build(field.dataType.simpleString(), dialect="spark").sql(
56
+ dialect="spark"
57
+ ),
58
+ nullable=field.nullable,
59
+ description=None,
60
+ isPartition=False,
61
+ isBucket=False,
62
+ )
63
+ )
64
+ return columns
@@ -0,0 +1,3 @@
1
+ from sqlframe.testing.utils import assertDataFrameEqual, assertSchemaEqual
2
+
3
+ __all__ = ["assertDataFrameEqual", "assertSchemaEqual"]
@@ -0,0 +1,320 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+ from __future__ import annotations
3
+
4
+ import difflib
5
+ import os
6
+ import typing as t
7
+ from itertools import zip_longest
8
+
9
+ from sqlframe.base import types
10
+ from sqlframe.base.dataframe import _BaseDataFrame
11
+ from sqlframe.base.exceptions import (
12
+ DataFrameDiffError,
13
+ SchemaDiffError,
14
+ SQLFrameException,
15
+ )
16
+ from sqlframe.base.util import verify_pandas_installed
17
+
18
+ if t.TYPE_CHECKING:
19
+ import pandas as pd
20
+
21
+
22
+ def _terminal_color_support():
23
+ try:
24
+ # determine if environment supports color
25
+ script = "$(test $(tput colors)) && $(test $(tput colors) -ge 8) && echo true || echo false"
26
+ return os.popen(script).read()
27
+ except Exception:
28
+ return False
29
+
30
+
31
+ def _context_diff(actual: t.List[str], expected: t.List[str], n: int = 3):
32
+ """
33
+ Modified from difflib context_diff API,
34
+ see original code here: https://github.com/python/cpython/blob/main/Lib/difflib.py#L1180
35
+ """
36
+
37
+ def red(s: str) -> str:
38
+ red_color = "\033[31m"
39
+ no_color = "\033[0m"
40
+ return red_color + str(s) + no_color
41
+
42
+ prefix = dict(insert="+ ", delete="- ", replace="! ", equal=" ")
43
+ for group in difflib.SequenceMatcher(None, actual, expected).get_grouped_opcodes(n):
44
+ yield "*** actual ***"
45
+ if any(tag in {"replace", "delete"} for tag, _, _, _, _ in group):
46
+ for tag, i1, i2, _, _ in group:
47
+ for line in actual[i1:i2]:
48
+ if tag != "equal" and _terminal_color_support():
49
+ yield red(prefix[tag] + str(line))
50
+ else:
51
+ yield prefix[tag] + str(line)
52
+
53
+ yield "\n"
54
+
55
+ yield "*** expected ***"
56
+ if any(tag in {"replace", "insert"} for tag, _, _, _, _ in group):
57
+ for tag, _, _, j1, j2 in group:
58
+ for line in expected[j1:j2]:
59
+ if tag != "equal" and _terminal_color_support():
60
+ yield red(prefix[tag] + str(line))
61
+ else:
62
+ yield prefix[tag] + str(line)
63
+
64
+
65
+ # Source: https://github.com/apache/spark/blob/master/python/pyspark/testing/utils.py#L519
66
+ def assertDataFrameEqual(
67
+ actual: t.Union[_BaseDataFrame, pd.DataFrame, t.List[types.Row]],
68
+ expected: t.Union[_BaseDataFrame, pd.DataFrame, t.List[types.Row]],
69
+ checkRowOrder: bool = False,
70
+ rtol: float = 1e-5,
71
+ atol: float = 1e-8,
72
+ ):
73
+ r"""
74
+ A util function to assert equality between `actual` and `expected`
75
+ (DataFrames or lists of Rows), with optional parameters `checkRowOrder`, `rtol`, and `atol`.
76
+
77
+ Supports Spark, Spark Connect, pandas, and pandas-on-Spark DataFrames.
78
+ For more information about pandas-on-Spark DataFrame equality, see the docs for
79
+ `assertPandasOnSparkEqual`.
80
+
81
+ .. versionadded:: 3.5.0
82
+
83
+ Parameters
84
+ ----------
85
+ actual : DataFrame (Spark, Spark Connect, pandas, or pandas-on-Spark) or list of Rows
86
+ The DataFrame that is being compared or tested.
87
+ expected : DataFrame (Spark, Spark Connect, pandas, or pandas-on-Spark) or list of Rows
88
+ The expected result of the operation, for comparison with the actual result.
89
+ checkRowOrder : bool, optional
90
+ A flag indicating whether the order of rows should be considered in the comparison.
91
+ If set to `False` (default), the row order is not taken into account.
92
+ If set to `True`, the order of rows is important and will be checked during comparison.
93
+ (See Notes)
94
+ rtol : float, optional
95
+ The relative tolerance, used in asserting approximate equality for float values in actual
96
+ and expected. Set to 1e-5 by default. (See Notes)
97
+ atol : float, optional
98
+ The absolute tolerance, used in asserting approximate equality for float values in actual
99
+ and expected. Set to 1e-8 by default. (See Notes)
100
+
101
+ Notes
102
+ -----
103
+ When `assertDataFrameEqual` fails, the error message uses the Python `difflib` library to
104
+ display a diff log of each row that differs in `actual` and `expected`.
105
+
106
+ For `checkRowOrder`, note that PySpark DataFrame ordering is non-deterministic, unless
107
+ explicitly sorted.
108
+
109
+ Note that schema equality is checked only when `expected` is a DataFrame (not a list of Rows).
110
+
111
+ For DataFrames with float values, assertDataFrame asserts approximate equality.
112
+ Two float values a and b are approximately equal if the following equation is True:
113
+
114
+ ``absolute(a - b) <= (atol + rtol * absolute(b))``.
115
+
116
+ Examples
117
+ --------
118
+ >>> df1 = spark.createDataFrame(data=[("1", 1000), ("2", 3000)], schema=["id", "amount"])
119
+ >>> df2 = spark.createDataFrame(data=[("1", 1000), ("2", 3000)], schema=["id", "amount"])
120
+ >>> assertDataFrameEqual(df1, df2) # pass, DataFrames are identical
121
+
122
+ >>> df1 = spark.createDataFrame(data=[("1", 0.1), ("2", 3.23)], schema=["id", "amount"])
123
+ >>> df2 = spark.createDataFrame(data=[("1", 0.109), ("2", 3.23)], schema=["id", "amount"])
124
+ >>> assertDataFrameEqual(df1, df2, rtol=1e-1) # pass, DataFrames are approx equal by rtol
125
+
126
+ >>> df1 = spark.createDataFrame(data=[(1, 1000), (2, 3000)], schema=["id", "amount"])
127
+ >>> list_of_rows = [Row(1, 1000), Row(2, 3000)]
128
+ >>> assertDataFrameEqual(df1, list_of_rows) # pass, actual and expected data are equal
129
+
130
+ >>> import pyspark.pandas as ps
131
+ >>> df1 = ps.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
132
+ >>> df2 = ps.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
133
+ >>> assertDataFrameEqual(df1, df2) # pass, pandas-on-Spark DataFrames are equal
134
+
135
+ >>> df1 = spark.createDataFrame(
136
+ ... data=[("1", 1000.00), ("2", 3000.00), ("3", 2000.00)], schema=["id", "amount"])
137
+ >>> df2 = spark.createDataFrame(
138
+ ... data=[("1", 1001.00), ("2", 3000.00), ("3", 2003.00)], schema=["id", "amount"])
139
+ >>> assertDataFrameEqual(df1, df2) # doctest: +IGNORE_EXCEPTION_DETAIL
140
+ Traceback (most recent call last):
141
+ ...
142
+ PySparkAssertionError: [DIFFERENT_ROWS] Results do not match: ( 66.66667 % )
143
+ *** actual ***
144
+ ! Row(id='1', amount=1000.0)
145
+ Row(id='2', amount=3000.0)
146
+ ! Row(id='3', amount=2000.0)
147
+ *** expected ***
148
+ ! Row(id='1', amount=1001.0)
149
+ Row(id='2', amount=3000.0)
150
+ ! Row(id='3', amount=2003.0)
151
+ """
152
+ import pandas as pd
153
+
154
+ if actual is None and expected is None:
155
+ return True
156
+ elif actual is None or expected is None:
157
+ raise SQLFrameException("Missing required arguments: actual and expected")
158
+
159
+ def compare_rows(r1: types.Row, r2: types.Row):
160
+ def compare_vals(val1, val2):
161
+ if isinstance(val1, list) and isinstance(val2, list):
162
+ return len(val1) == len(val2) and all(
163
+ compare_vals(x, y) for x, y in zip(val1, val2)
164
+ )
165
+ elif isinstance(val1, types.Row) and isinstance(val2, types.Row):
166
+ return all(compare_vals(x, y) for x, y in zip(val1, val2))
167
+ elif isinstance(val1, dict) and isinstance(val2, dict):
168
+ return (
169
+ len(val1.keys()) == len(val2.keys())
170
+ and val1.keys() == val2.keys()
171
+ and all(compare_vals(val1[k], val2[k]) for k in val1.keys())
172
+ )
173
+ elif isinstance(val1, float) and isinstance(val2, float):
174
+ if abs(val1 - val2) > (atol + rtol * abs(val2)):
175
+ return False
176
+ else:
177
+ if val1 != val2:
178
+ return False
179
+ return True
180
+
181
+ if r1 is None and r2 is None:
182
+ return True
183
+ elif r1 is None or r2 is None:
184
+ return False
185
+
186
+ return compare_vals(r1, r2)
187
+
188
+ def assert_rows_equal(rows1: t.List[types.Row], rows2: t.List[types.Row]):
189
+ zipped = list(zip_longest(rows1, rows2))
190
+ diff_rows_cnt = 0
191
+ diff_rows = False
192
+
193
+ rows_str1 = ""
194
+ rows_str2 = ""
195
+
196
+ # count different rows
197
+ for r1, r2 in zipped:
198
+ rows_str1 += str(r1) + "\n"
199
+ rows_str2 += str(r2) + "\n"
200
+ if not compare_rows(r1, r2):
201
+ diff_rows_cnt += 1
202
+ diff_rows = True
203
+
204
+ generated_diff = _context_diff(
205
+ actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=len(zipped)
206
+ )
207
+
208
+ if diff_rows:
209
+ error_msg = "Results do not match: "
210
+ percent_diff = (diff_rows_cnt / len(zipped)) * 100
211
+ error_msg += "( %.5f %% )" % percent_diff
212
+ error_msg += "\n" + "\n".join(generated_diff)
213
+ raise DataFrameDiffError("Rows are different:\n%s" % error_msg)
214
+
215
+ # convert actual and expected to list
216
+ if not isinstance(actual, list) and not isinstance(expected, list):
217
+ # only compare schema if expected is not a List
218
+ assertSchemaEqual(actual.schema, expected.schema) # type: ignore
219
+
220
+ if not isinstance(actual, list):
221
+ actual_list = actual.collect() # type: ignore
222
+ else:
223
+ actual_list = actual
224
+
225
+ if not isinstance(expected, list):
226
+ expected_list = expected.collect() # type: ignore
227
+ else:
228
+ expected_list = expected
229
+
230
+ if not checkRowOrder:
231
+ # rename duplicate columns for sorting
232
+ actual_list = sorted(actual_list, key=lambda x: str(x))
233
+ expected_list = sorted(expected_list, key=lambda x: str(x))
234
+
235
+ assert_rows_equal(actual_list, expected_list)
236
+
237
+
238
+ def assertSchemaEqual(actual: types.StructType, expected: types.StructType):
239
+ r"""
240
+ A util function to assert equality between DataFrame schemas `actual` and `expected`.
241
+
242
+ .. versionadded:: 3.5.0
243
+
244
+ Parameters
245
+ ----------
246
+ actual : StructType
247
+ The DataFrame schema that is being compared or tested.
248
+ expected : StructType
249
+ The expected schema, for comparison with the actual schema.
250
+
251
+ Notes
252
+ -----
253
+ When assertSchemaEqual fails, the error message uses the Python `difflib` library to display
254
+ a diff log of the `actual` and `expected` schemas.
255
+
256
+ Examples
257
+ --------
258
+ >>> from pyspark.sql.types import StructType, StructField, ArrayType, IntegerType, DoubleType
259
+ >>> s1 = StructType([StructField("names", ArrayType(DoubleType(), True), True)])
260
+ >>> s2 = StructType([StructField("names", ArrayType(DoubleType(), True), True)])
261
+ >>> assertSchemaEqual(s1, s2) # pass, schemas are identical
262
+
263
+ >>> df1 = spark.createDataFrame(data=[(1, 1000), (2, 3000)], schema=["id", "number"])
264
+ >>> df2 = spark.createDataFrame(data=[("1", 1000), ("2", 5000)], schema=["id", "amount"])
265
+ >>> assertSchemaEqual(df1.schema, df2.schema) # doctest: +IGNORE_EXCEPTION_DETAIL
266
+ Traceback (most recent call last):
267
+ ...
268
+ PySparkAssertionError: [DIFFERENT_SCHEMA] Schemas do not match.
269
+ --- actual
270
+ +++ expected
271
+ - StructType([StructField('id', LongType(), True), StructField('number', LongType(), True)])
272
+ ? ^^ ^^^^^
273
+ + StructType([StructField('id', StringType(), True), StructField('amount', LongType(), True)])
274
+ ? ^^^^ ++++ ^
275
+ """
276
+ if not isinstance(actual, types.StructType):
277
+ raise RuntimeError("actual must be a StructType")
278
+ if not isinstance(expected, types.StructType):
279
+ raise RuntimeError("expected must be a StructType")
280
+
281
+ def compare_schemas_ignore_nullable(s1: types.StructType, s2: types.StructType):
282
+ if len(s1) != len(s2):
283
+ return False
284
+ zipped = zip_longest(s1, s2)
285
+ for sf1, sf2 in zipped:
286
+ if not compare_structfields_ignore_nullable(sf1, sf2):
287
+ return False
288
+ return True
289
+
290
+ def compare_structfields_ignore_nullable(
291
+ actualSF: types.StructField, expectedSF: types.StructField
292
+ ):
293
+ if actualSF is None and expectedSF is None:
294
+ return True
295
+ elif actualSF is None or expectedSF is None:
296
+ return False
297
+ if actualSF.name != expectedSF.name:
298
+ return False
299
+ else:
300
+ return compare_datatypes_ignore_nullable(actualSF.dataType, expectedSF.dataType)
301
+
302
+ def compare_datatypes_ignore_nullable(dt1: t.Any, dt2: t.Any):
303
+ # checks datatype equality, using recursion to ignore nullable
304
+ if dt1.typeName() == dt2.typeName():
305
+ if dt1.typeName() == "array":
306
+ return compare_datatypes_ignore_nullable(dt1.elementType, dt2.elementType)
307
+ elif dt1.typeName() == "struct":
308
+ return compare_schemas_ignore_nullable(dt1, dt2)
309
+ else:
310
+ return True
311
+ else:
312
+ return False
313
+
314
+ # ignore nullable flag by default
315
+ if not compare_schemas_ignore_nullable(actual, expected):
316
+ generated_diff = difflib.ndiff(str(actual).splitlines(), str(expected).splitlines())
317
+
318
+ error_msg = "\n".join(generated_diff)
319
+
320
+ raise SchemaDiffError(error_msg)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.10.0
3
+ Version: 1.11.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,12 +1,12 @@
1
1
  sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- sqlframe/_version.py,sha256=PEdW0PLUrZm2JiH_V3EAqPOK-ZxEDfT2nPsBGV10Pow,413
2
+ sqlframe/_version.py,sha256=rZqhcUFwPMyj_mTWUN2A6qcFr8Ptv08CSbXbruC3jR4,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
5
5
  sqlframe/base/catalog.py,sha256=ATDGirouUjal05P4ymL-wIi8rgjg_8w4PoACamiO64A,37245
6
6
  sqlframe/base/column.py,sha256=5bfJWj9dnStHUxLSrWMD-gwiC4-aHKC8lhoC62nhM1k,16153
7
- sqlframe/base/dataframe.py,sha256=uL4neDTMy1a9XJH46YLQryzdDci4iDxNXBtiJOzfHfs,67718
7
+ sqlframe/base/dataframe.py,sha256=Tf5euWTGxFmYirgHK5ZXUI41so5ruo-asVmUwj9DFdo,70015
8
8
  sqlframe/base/decorators.py,sha256=I5osMgx9BuCgbtp4jVM2DNwYJVLzCv-OtTedhQEik0g,1882
9
- sqlframe/base/exceptions.py,sha256=pCB9hXX4jxZWzNg3JN1i38cv3BmpUlee5NoLYx3YXIQ,208
9
+ sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=QESqZy7Osp9-CV5Yoi6XFat5SE8PzCVZ3o7gOFmIY7g,45888
11
11
  sqlframe/base/functions.py,sha256=hJDpE7GYQpQ1iHjdr1hG_hu0mAIb60vNoghjEcgMREI,187550
12
12
  sqlframe/base/group.py,sha256=TES9CleVmH3x-0X-tqmuUKfCKSWjH5vg1aU3R6dDmFc,4059
@@ -15,17 +15,17 @@ sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3
15
15
  sqlframe/base/readerwriter.py,sha256=5NPQMiOrw6I54U243R_6-ynnWYsNksgqwRpPp4IFjIw,25288
16
16
  sqlframe/base/session.py,sha256=2C0OsPoP49AuqVNtPiazTdVpwQA1668g5WOydrYP6SA,22001
17
17
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
18
- sqlframe/base/types.py,sha256=QbJaJvSv2jaKRxOnIVpun1zHPn8AqJeWVWMbDvNwavE,11977
19
- sqlframe/base/util.py,sha256=o97w8XUXqLDGEjwwjJG-87LRINDsozmVPUo1gB7yw4w,9364
18
+ sqlframe/base/types.py,sha256=K6mjafbX7oIk65CapwamcO2I8nf-poRIpKKt9XDNEaQ,11987
19
+ sqlframe/base/util.py,sha256=tWccrZne-Acn4N2RxYr87mfI_GDMf_K9hRD7BnhGBq0,11756
20
20
  sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
21
21
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  sqlframe/base/mixins/catalog_mixins.py,sha256=NhuPGxIqPjyuC_V_NALN1sn9v9h0-xwFOlJyJgsvyek,14212
23
- sqlframe/base/mixins/dataframe_mixins.py,sha256=U2tKIY5pCLnoPy1boAQ1YWLgK1E-ZT4x47oRWtGoYLQ,2360
23
+ sqlframe/base/mixins/dataframe_mixins.py,sha256=FoR3bymPk-vT7NhJsDwZe6ONHheMd5UJhzID2lM1joQ,1411
24
24
  sqlframe/base/mixins/readwriter_mixins.py,sha256=QnxGVL8ftZfYlBNG0Bl24N_bnA2YioSxUsTSgKIbuvQ,4723
25
25
  sqlframe/bigquery/__init__.py,sha256=i2NsMbiXOj2xphCtPuNk6cVw4iYeq5_B1I9dVI9aGAk,712
26
26
  sqlframe/bigquery/catalog.py,sha256=h3aQAQAJg6MMvFpP8Ku0S4pcx30n5qYrqHhWSomxb6A,9319
27
27
  sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
28
- sqlframe/bigquery/dataframe.py,sha256=fPQ6043aSS_ds30WsvrYOgNZJPH0jq7BeNHGLQ2MEW4,1372
28
+ sqlframe/bigquery/dataframe.py,sha256=Y2uy4FEYw0KxIHgnaA9uMwdIzxJzTlD_NSzIe7P7kxA,2405
29
29
  sqlframe/bigquery/functions.py,sha256=2YqJmBG0F0o10cztFZoP-G4px1QMKuHST6jlj1snUfY,11331
30
30
  sqlframe/bigquery/functions.pyi,sha256=JiyLFLiO0jyJec6j1o4ujPVQ7Tma-c9YHlm-3UQYD9M,13642
31
31
  sqlframe/bigquery/group.py,sha256=UVBNBRTo8OqS-_cS5YwvTeJYgYxeG-d6R3kfyHmlFqw,391
@@ -36,7 +36,7 @@ sqlframe/bigquery/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,3
36
36
  sqlframe/duckdb/__init__.py,sha256=t85TA3ufZtL1weQNFmEs8itCSwbJFtw03-p0GT4XGf8,669
37
37
  sqlframe/duckdb/catalog.py,sha256=rt3XuP3m4DbhuibOFyvx_95F2zZa6uDwCI_TmcvKy1A,3895
38
38
  sqlframe/duckdb/column.py,sha256=wkEPcp3xVsH5nC3kpacXqNkRv9htPtBgt-0uFRxIRNs,56
39
- sqlframe/duckdb/dataframe.py,sha256=RPdXSOv_wCB0R5E5fzRMYOGFHilb4egqRk3UgiT6WEU,1530
39
+ sqlframe/duckdb/dataframe.py,sha256=WmBrrmrfxDpYuikSMFqacgV2Jawkx4sSYE-_mwnL4Jw,1225
40
40
  sqlframe/duckdb/functions.py,sha256=pz40eqR4U_s42p7UeaefJD5yU1vo6mqNoVz0iKN5eRk,1671
41
41
  sqlframe/duckdb/functions.pyi,sha256=nU-6a2cfLDkuMCdYrNRLfa6-i8Aa0CxQQ1nLT6roIdI,5813
42
42
  sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
@@ -45,9 +45,9 @@ sqlframe/duckdb/session.py,sha256=j75iIsmaxl5x7oqyhN_VolvEclKj7QmaFfIis-SmoKM,21
45
45
  sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
46
46
  sqlframe/duckdb/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
47
47
  sqlframe/postgres/__init__.py,sha256=Sz_MtgV_oh_QhfZTC7iKM07ICUmNcJEDV0kEkSW9ZKU,712
48
- sqlframe/postgres/catalog.py,sha256=L5heEav8PTtKJDofJTf-51_cCLpZud5lDvZC-RFZIaw,3722
48
+ sqlframe/postgres/catalog.py,sha256=uGMKo4RXOU6fA4IjcfebukEI18QswVk3cnB_G7S6_Fw,8130
49
49
  sqlframe/postgres/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
50
- sqlframe/postgres/dataframe.py,sha256=feGvQo7GD-YGmWWGc5h94CMVZm0gcgUQsdlAktXS4Ac,1492
50
+ sqlframe/postgres/dataframe.py,sha256=f-w6UHxZtmeZ5oMbaqJaZ8FrYeOhzyveNlZOK57ke0k,1289
51
51
  sqlframe/postgres/functions.py,sha256=b9ccP5vY8EDZXkJbhE_LjAlH50_6wcUF9VbzPrariec,2374
52
52
  sqlframe/postgres/functions.pyi,sha256=um-qE2g9iPs0-53vJ46lArbfvDqAbFIwrxLJgcrPM_8,5536
53
53
  sqlframe/postgres/group.py,sha256=KUXeSFKWTSH9yCRJAhW85OvjZaG6Zr4In9LR_ie3yGU,391
@@ -58,7 +58,7 @@ sqlframe/postgres/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,3
58
58
  sqlframe/redshift/__init__.py,sha256=jamKYQtQaKjjXnQ01QGPHvatbrZSw9sWno_VOUGSz6I,712
59
59
  sqlframe/redshift/catalog.py,sha256=JBDWIu4FQhi4_POB9pxW0T5A-6qdSK7BCq_Cp-V6tIM,4717
60
60
  sqlframe/redshift/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
61
- sqlframe/redshift/dataframe.py,sha256=mtxmKVnvuYNQnirEvuXICY53WRiN8L1QCtSsvPJ-4jE,1372
61
+ sqlframe/redshift/dataframe.py,sha256=aTC0DOPDFwWH1_b9T0Pif80cYSGudIp0D-cmkR7Ci_M,1104
62
62
  sqlframe/redshift/functions.py,sha256=DR5kodYAcKatUqopwrEQtxryI4ZSqaH47_y3WLht4Wg,455
63
63
  sqlframe/redshift/group.py,sha256=5MGZYJfHpzoRSQ0N_pn4KUk4Mk2gocQwU3K1-jAbvGg,391
64
64
  sqlframe/redshift/readwriter.py,sha256=g3FYKSsJKqcSnElprzzz29ZctoXq9tRB0Mj9Bm1HycI,870
@@ -68,7 +68,7 @@ sqlframe/redshift/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,3
68
68
  sqlframe/snowflake/__init__.py,sha256=nuQ3cuHjDpW4ELZfbd2qOYmtXmcYl7MtsrdOrRdozo0,746
69
69
  sqlframe/snowflake/catalog.py,sha256=uDjBgDdCyxaDkGNX_8tb-lol7MwwazcClUBAZsOSj70,5014
70
70
  sqlframe/snowflake/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
71
- sqlframe/snowflake/dataframe.py,sha256=OJ27NudBUE3XX9mc8ywooGhYV4ijF9nX2K_nkHRcTx4,1393
71
+ sqlframe/snowflake/dataframe.py,sha256=jUyQNCe3K6SH4PtmrR67YN0SLqkHakMxLiB261fDgkc,1862
72
72
  sqlframe/snowflake/functions.py,sha256=cIO56ZsOpjg6ICLjTh-osG1h1UjjEtM39_ieMiWkmyI,2466
73
73
  sqlframe/snowflake/functions.pyi,sha256=MkNif_sIceHMNhl-qvLir2DJ1jPqwyaahltdpgY4Jq0,6213
74
74
  sqlframe/snowflake/group.py,sha256=pPP1l2RRo_LgkXrji8a87n2PKo-63ZRPT-WUtvVcBME,395
@@ -79,7 +79,7 @@ sqlframe/snowflake/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,
79
79
  sqlframe/spark/__init__.py,sha256=WhYQAZMJN1EMNAVGUH7BEinxNdYtXOrrr-6HUniJOyI,649
80
80
  sqlframe/spark/catalog.py,sha256=rIX5DtPnINbcPZRUe4Z1bOpkJoNRlrO9qWkUeTQClNc,32612
81
81
  sqlframe/spark/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
82
- sqlframe/spark/dataframe.py,sha256=V3z5Bx9snLgYh4bDwJfJb5mj1P7UsZF8DMlLwZXopBg,1309
82
+ sqlframe/spark/dataframe.py,sha256=_TD-h7oz0-i80r90v17UoLDoIzcGNchU2SL13ujOOic,1779
83
83
  sqlframe/spark/functions.py,sha256=PkK4MBpVADhnDbrgFDii5zFaNrhi4y-OYX3Lcu-SW0k,530
84
84
  sqlframe/spark/functions.pyi,sha256=bjz6s8E6OB0c4KfTTsls7rhb_R9mIYvkaeaXefMziqM,11617
85
85
  sqlframe/spark/group.py,sha256=MrvV_v-YkBc6T1zz882WrEqtWjlooWIyHBCmTQg3fCA,379
@@ -97,8 +97,10 @@ sqlframe/standalone/readwriter.py,sha256=EZNyDJ4ID6sGNog3uP4-e9RvchX4biJJDNtc5hk
97
97
  sqlframe/standalone/session.py,sha256=wQmdu2sv6KMTAv0LRFk7TY7yzlh3xvmsyqilEtRecbY,1191
98
98
  sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
99
99
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
100
- sqlframe-1.10.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
101
- sqlframe-1.10.0.dist-info/METADATA,sha256=Z983jSJyRrSOcfu5Y8DFI-aROxse_TVWFPVGkJ-WV70,7497
102
- sqlframe-1.10.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
103
- sqlframe-1.10.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
104
- sqlframe-1.10.0.dist-info/RECORD,,
100
+ sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
101
+ sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
102
+ sqlframe-1.11.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
103
+ sqlframe-1.11.0.dist-info/METADATA,sha256=JTMUu99Ygcz_fK15KTHUb9OqQcPiQoUjQ1-7RQ09COE,7497
104
+ sqlframe-1.11.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
105
+ sqlframe-1.11.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
106
+ sqlframe-1.11.0.dist-info/RECORD,,