sqlframe 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/catalog.py +2 -1
- sqlframe/base/dataframe.py +7 -4
- sqlframe/base/session.py +4 -12
- sqlframe/base/util.py +2 -0
- sqlframe/duckdb/readwriter.py +17 -5
- {sqlframe-1.0.0.dist-info → sqlframe-1.1.0.dist-info}/METADATA +2 -2
- {sqlframe-1.0.0.dist-info → sqlframe-1.1.0.dist-info}/RECORD +11 -11
- {sqlframe-1.0.0.dist-info → sqlframe-1.1.0.dist-info}/LICENSE +0 -0
- {sqlframe-1.0.0.dist-info → sqlframe-1.1.0.dist-info}/WHEEL +0 -0
- {sqlframe-1.0.0.dist-info → sqlframe-1.1.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/catalog.py
CHANGED
|
@@ -8,7 +8,7 @@ from sqlglot import MappingSchema, exp
|
|
|
8
8
|
|
|
9
9
|
from sqlframe.base.decorators import normalize
|
|
10
10
|
from sqlframe.base.exceptions import TableSchemaError
|
|
11
|
-
from sqlframe.base.util import to_schema
|
|
11
|
+
from sqlframe.base.util import ensure_column_mapping, to_schema
|
|
12
12
|
|
|
13
13
|
if t.TYPE_CHECKING:
|
|
14
14
|
from sqlglot.schema import ColumnMapping
|
|
@@ -82,6 +82,7 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
|
|
|
82
82
|
raise TableSchemaError(
|
|
83
83
|
"This session does not have access to a catalog that can lookup column information. See docs for explicitly defining columns or using a session that can automatically determine this."
|
|
84
84
|
)
|
|
85
|
+
column_mapping = ensure_column_mapping(column_mapping) # type: ignore
|
|
85
86
|
self._schema.add_table(table, column_mapping, dialect=self.session.input_dialect)
|
|
86
87
|
|
|
87
88
|
@normalize(["dbName"])
|
sqlframe/base/dataframe.py
CHANGED
|
@@ -417,7 +417,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
417
417
|
from sqlframe.base.session import _BaseSession
|
|
418
418
|
|
|
419
419
|
value = expression.sql(dialect=_BaseSession().input_dialect).encode("utf-8")
|
|
420
|
-
hash = f"t{zlib.crc32(value)}"[:
|
|
420
|
+
hash = f"t{zlib.crc32(value)}"[:9]
|
|
421
421
|
return self.session._normalize_string(hash)
|
|
422
422
|
|
|
423
423
|
def _get_select_expressions(
|
|
@@ -606,8 +606,11 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
606
606
|
return df._convert_leaf_to_cte(sequence_id=new_sequence_id)
|
|
607
607
|
|
|
608
608
|
@operation(Operation.WHERE)
|
|
609
|
-
def where(self, column: t.Union[Column, bool], **kwargs) -> Self:
|
|
610
|
-
|
|
609
|
+
def where(self, column: t.Union[Column, str, bool], **kwargs) -> Self:
|
|
610
|
+
if isinstance(column, str):
|
|
611
|
+
col = sqlglot.parse_one(column, dialect=self.session.input_dialect)
|
|
612
|
+
else:
|
|
613
|
+
col = self._ensure_and_normalize_col(column)
|
|
611
614
|
return self.copy(expression=self.expression.where(col.expression))
|
|
612
615
|
|
|
613
616
|
filter = where
|
|
@@ -1094,7 +1097,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
1094
1097
|
)
|
|
1095
1098
|
if existing_col_index:
|
|
1096
1099
|
expression = self.expression.copy()
|
|
1097
|
-
expression.expressions[existing_col_index] = col.expression
|
|
1100
|
+
expression.expressions[existing_col_index] = col.alias(colName).expression
|
|
1098
1101
|
return self.copy(expression=expression)
|
|
1099
1102
|
return self.copy().select(col.alias(colName), append=True)
|
|
1100
1103
|
|
sqlframe/base/session.py
CHANGED
|
@@ -313,24 +313,16 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
313
313
|
sel_expression = exp.Select(**select_kwargs)
|
|
314
314
|
if empty_df:
|
|
315
315
|
sel_expression = sel_expression.where(exp.false())
|
|
316
|
-
# if empty_df:
|
|
317
|
-
# if not column_mapping:
|
|
318
|
-
# # If we don't have rows or columns then we just return a null with a false expression
|
|
319
|
-
# sel_expression = (
|
|
320
|
-
# exp.Select().select("null").from_("VALUES (NULL)").where(exp.false())
|
|
321
|
-
# )
|
|
322
|
-
# else:
|
|
323
|
-
# # Ensure no results are returned if the dataframe is expected to be empty instead of
|
|
324
|
-
# # a row of null values
|
|
325
|
-
# sel_expression = sel_expression.where(exp.false())
|
|
326
316
|
return self._create_df(sel_expression)
|
|
327
317
|
|
|
328
|
-
def sql(self, sqlQuery: t.Union[str, exp.Expression]) -> DF:
|
|
329
|
-
expression =
|
|
318
|
+
def sql(self, sqlQuery: t.Union[str, exp.Expression], optimize: bool = True) -> DF:
|
|
319
|
+
expression = (
|
|
330
320
|
sqlglot.parse_one(sqlQuery, read=self.input_dialect)
|
|
331
321
|
if isinstance(sqlQuery, str)
|
|
332
322
|
else sqlQuery
|
|
333
323
|
)
|
|
324
|
+
if optimize:
|
|
325
|
+
expression = self._optimize(expression)
|
|
334
326
|
if self.temp_views:
|
|
335
327
|
replacement_mapping = {}
|
|
336
328
|
for table in expression.find_all(exp.Table):
|
sqlframe/base/util.py
CHANGED
|
@@ -113,6 +113,8 @@ def ensure_column_mapping(schema: t.Union[str, StructType]) -> t.Dict:
|
|
|
113
113
|
}
|
|
114
114
|
# TODO: Make a protocol with a `simpleString` attribute as what it looks for instead of the actual
|
|
115
115
|
# `StructType` object.
|
|
116
|
+
elif hasattr(schema, "simpleString"):
|
|
117
|
+
return {struct_field.name: struct_field.dataType.simpleString() for struct_field in schema}
|
|
116
118
|
return sqlglot_ensure_column_mapping(schema) # type: ignore
|
|
117
119
|
|
|
118
120
|
|
sqlframe/duckdb/readwriter.py
CHANGED
|
@@ -5,6 +5,9 @@ from __future__ import annotations
|
|
|
5
5
|
import logging
|
|
6
6
|
import typing as t
|
|
7
7
|
|
|
8
|
+
from sqlglot import exp
|
|
9
|
+
from sqlglot.helper import ensure_list
|
|
10
|
+
|
|
8
11
|
from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
|
|
9
12
|
from sqlframe.base.util import ensure_column_mapping, to_csv
|
|
10
13
|
|
|
@@ -69,13 +72,22 @@ class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFra
|
|
|
69
72
|
|100|NULL|
|
|
70
73
|
+---+----+
|
|
71
74
|
"""
|
|
75
|
+
if schema:
|
|
76
|
+
column_mapping = ensure_column_mapping(schema)
|
|
77
|
+
select_columns = [x.expression for x in self._to_casted_columns(column_mapping)]
|
|
78
|
+
if format == "csv":
|
|
79
|
+
duckdb_columns = ", ".join(
|
|
80
|
+
[f"'{column}': '{dtype}'" for column, dtype in column_mapping.items()]
|
|
81
|
+
)
|
|
82
|
+
options["columns"] = "{" + duckdb_columns + "}"
|
|
83
|
+
else:
|
|
84
|
+
select_columns = [exp.Star()]
|
|
72
85
|
if format:
|
|
73
|
-
|
|
86
|
+
paths = ",".join([f"'{path}'" for path in ensure_list(path)])
|
|
87
|
+
from_clause = f"read_{format}([{paths}], {to_csv(options)})"
|
|
74
88
|
else:
|
|
75
|
-
|
|
76
|
-
df = self.session.sql(
|
|
77
|
-
if schema:
|
|
78
|
-
df = df.select(*self._to_casted_columns(ensure_column_mapping(schema)))
|
|
89
|
+
from_clause = f"'{path}'"
|
|
90
|
+
df = self.session.sql(exp.select(*select_columns).from_(from_clause), optimize=False)
|
|
79
91
|
self.session._last_loaded_file = path # type: ignore
|
|
80
92
|
return df
|
|
81
93
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Taking the Spark out of PySpark by converting to SQL
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
@@ -18,7 +18,7 @@ Requires-Python: >=3.8
|
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: prettytable (<3.11.0)
|
|
21
|
-
Requires-Dist: sqlglot (<
|
|
21
|
+
Requires-Dist: sqlglot (<24.1,>=24.0.0)
|
|
22
22
|
Provides-Extra: bigquery
|
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
|
|
24
24
|
Requires-Dist: google-cloud-bigquery[pandas] (<4,>=3) ; extra == 'bigquery'
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
sqlframe/_version.py,sha256=
|
|
2
|
+
sqlframe/_version.py,sha256=CqDGE4B1ZqZ-56mxeOFcXRTmlxrdOh4ayrjbcPjziE4,411
|
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
|
|
5
|
-
sqlframe/base/catalog.py,sha256=
|
|
5
|
+
sqlframe/base/catalog.py,sha256=P55_mLrk9KUC2LRYyLgSzVAan1Lx6EBNfdDjMEtc9DA,37086
|
|
6
6
|
sqlframe/base/column.py,sha256=K9TtpBjVsFK9NtEX9ZQscU6qZIKiVVh1zj3jG9HifyA,15110
|
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
|
7
|
+
sqlframe/base/dataframe.py,sha256=lNBMm79rX1DAt5vj4qKuxDwyhJdnhDROcOPcqVDxNHE,58971
|
|
8
8
|
sqlframe/base/decorators.py,sha256=fnqT1Hqa0J_gUurDcVY1Dcscj6SXFxFJ5PKAw-xe5sU,2097
|
|
9
9
|
sqlframe/base/exceptions.py,sha256=pCB9hXX4jxZWzNg3JN1i38cv3BmpUlee5NoLYx3YXIQ,208
|
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=to0kv3MTJmQFeVTMcitz0AxBIoUJC3cu5LkEY5aJpoo,31318
|
|
@@ -13,10 +13,10 @@ sqlframe/base/group.py,sha256=sKoaI2aLMih9nJTQfqzfJ00NbBcGQtArWXYHT40motQ,4060
|
|
|
13
13
|
sqlframe/base/normalize.py,sha256=Ie6IcrD9dL-xBUKgDoh_c_gfLw68tBK5AmiprCA8MXE,3633
|
|
14
14
|
sqlframe/base/operations.py,sha256=fVlAse6-WdQnEaHghRZVHXOesQ3OnKQwBnVYv5nVRiI,3457
|
|
15
15
|
sqlframe/base/readerwriter.py,sha256=cgg7KuO7Eu8fScKOg1KyNFAcgnsjpU6yusPVs0o52a4,25213
|
|
16
|
-
sqlframe/base/session.py,sha256=
|
|
16
|
+
sqlframe/base/session.py,sha256=evVdd-FGKkp-Wg80UG5289iRtBihLFfkqrcXTH64_R8,21926
|
|
17
17
|
sqlframe/base/transforms.py,sha256=EKwUpfp83bncEs_MNmI2OO7gV6vA_Rr89ZWmE4eETSw,468
|
|
18
18
|
sqlframe/base/types.py,sha256=1CwMW9Q1inYzQcPTyjv1QANtVSHha8ZmBigmopQET98,11925
|
|
19
|
-
sqlframe/base/util.py,sha256=
|
|
19
|
+
sqlframe/base/util.py,sha256=SeUC2pcSBGnsS1W5PL1p-IGC6bJG8_2a7En2hxSTmpA,7597
|
|
20
20
|
sqlframe/base/window.py,sha256=yyKvoNi41vL2t7XK2Ysjp8Q2FNIu3BYv-9EPtp5og6k,4944
|
|
21
21
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
sqlframe/base/mixins/catalog_mixins.py,sha256=ZNzNn-cWB0RwT7L1KZCWYke2JlP-cZze0MDNOzSfHew,14093
|
|
@@ -37,7 +37,7 @@ sqlframe/duckdb/column.py,sha256=wkEPcp3xVsH5nC3kpacXqNkRv9htPtBgt-0uFRxIRNs,56
|
|
|
37
37
|
sqlframe/duckdb/dataframe.py,sha256=9T6GV4JScaApFSA4T7fixot78HMUgkjGxU7TgjolOOM,1410
|
|
38
38
|
sqlframe/duckdb/functions.py,sha256=srvzbk_Wg-wQPFGYp624dRDyYJghi47M8E-Tu7pBdY0,1507
|
|
39
39
|
sqlframe/duckdb/group.py,sha256=sYTExtNprfbW74LWc_Lyjc1G6K1FogQsdILU2599Bq8,384
|
|
40
|
-
sqlframe/duckdb/readwriter.py,sha256=
|
|
40
|
+
sqlframe/duckdb/readwriter.py,sha256=TC0LigUmCRpcdx4B8Mb5ap5ifFBrjmbqXmhUB5rG87U,4376
|
|
41
41
|
sqlframe/duckdb/session.py,sha256=TCAVsSqBGGj1Otb2iIkSkWqjbzzg1MeDAafGN928-O8,1893
|
|
42
42
|
sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
43
43
|
sqlframe/duckdb/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
@@ -91,8 +91,8 @@ sqlframe/standalone/readwriter.py,sha256=n2uoebNdL_t6_eaXNkpu7Zv2UmZ9I3rASuo01gG
|
|
|
91
91
|
sqlframe/standalone/session.py,sha256=xWxBh-OtH--LmWtpDboOBpwKLcaBK5JV-IF2gCra5k0,1192
|
|
92
92
|
sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
93
93
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
94
|
-
sqlframe-1.
|
|
95
|
-
sqlframe-1.
|
|
96
|
-
sqlframe-1.
|
|
97
|
-
sqlframe-1.
|
|
98
|
-
sqlframe-1.
|
|
94
|
+
sqlframe-1.1.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
|
95
|
+
sqlframe-1.1.0.dist-info/METADATA,sha256=RBSfrpj8FYCqz79aL88JvpDRIkKZvbASKiwT0YqwXm0,6873
|
|
96
|
+
sqlframe-1.1.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
97
|
+
sqlframe-1.1.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
|
98
|
+
sqlframe-1.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|