sqlframe 3.39.2__py3-none-any.whl → 3.39.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +3 -3
- sqlframe/base/dataframe.py +25 -8
- sqlframe/base/functions.py +9 -12
- sqlframe/base/session.py +6 -1
- sqlframe/base/util.py +15 -0
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.4.dist-info}/METADATA +3 -2
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.4.dist-info}/RECORD +10 -10
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.4.dist-info}/LICENSE +0 -0
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.4.dist-info}/WHEEL +0 -0
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.4.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '3.39.
|
32
|
-
__version_tuple__ = version_tuple = (3, 39,
|
31
|
+
__version__ = version = '3.39.4'
|
32
|
+
__version_tuple__ = version_tuple = (3, 39, 4)
|
33
33
|
|
34
|
-
__commit_id__ = commit_id = '
|
34
|
+
__commit_id__ = commit_id = 'g7103a1e73'
|
sqlframe/base/dataframe.py
CHANGED
@@ -31,6 +31,7 @@ from sqlframe.base.util import (
|
|
31
31
|
get_func_from_session,
|
32
32
|
get_tables_from_expression_with_join,
|
33
33
|
normalize_string,
|
34
|
+
partition_to,
|
34
35
|
quote_preserving_alias_or_name,
|
35
36
|
sqlglot_to_spark,
|
36
37
|
verify_openai_installed,
|
@@ -1633,14 +1634,30 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1633
1634
|
|
1634
1635
|
@operation(Operation.SELECT)
|
1635
1636
|
def drop(self, *cols: t.Union[str, Column]) -> Self:
|
1636
|
-
|
1637
|
-
|
1638
|
-
|
1639
|
-
|
1640
|
-
|
1641
|
-
|
1642
|
-
|
1643
|
-
|
1637
|
+
# Separate string column names from Column objects for different handling
|
1638
|
+
column_objs, column_names = partition_to(lambda x: isinstance(x, str), cols, list, set)
|
1639
|
+
|
1640
|
+
# Normalize only the Column objects (strings will be handled as unqualified)
|
1641
|
+
drop_cols = self._ensure_and_normalize_cols(column_objs) if column_objs else []
|
1642
|
+
|
1643
|
+
# Work directly with the expression's select columns to preserve table qualifiers
|
1644
|
+
current_expressions = self.expression.expressions
|
1645
|
+
drop_sql = {drop_col.expression.sql() for drop_col in drop_cols}
|
1646
|
+
|
1647
|
+
# Create a more sophisticated matching function that considers table qualifiers
|
1648
|
+
def should_drop_expression(expr: exp.Expression) -> bool:
|
1649
|
+
# Check against fully qualified Column objects and
|
1650
|
+
# Check against unqualified string column names (drop ALL columns with this name)
|
1651
|
+
if expr.sql() in drop_sql or (
|
1652
|
+
isinstance(expr, exp.Column) and expr.alias_or_name in column_names
|
1653
|
+
):
|
1654
|
+
return True
|
1655
|
+
return False
|
1656
|
+
|
1657
|
+
new_expressions = [expr for expr in current_expressions if not should_drop_expression(expr)]
|
1658
|
+
return self.select.__wrapped__( # type: ignore
|
1659
|
+
self, *new_expressions, skip_update_display_name_mapping=True
|
1660
|
+
)
|
1644
1661
|
|
1645
1662
|
@operation(Operation.LIMIT)
|
1646
1663
|
def limit(self, num: int) -> Self:
|
sqlframe/base/functions.py
CHANGED
@@ -37,9 +37,7 @@ def _get_session() -> _BaseSession:
|
|
37
37
|
|
38
38
|
@meta()
|
39
39
|
def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
|
40
|
-
|
41
|
-
|
42
|
-
dialect = _BaseSession().input_dialect
|
40
|
+
dialect = _get_session().input_dialect
|
43
41
|
if isinstance(column_name, str):
|
44
42
|
col_expression = expression.to_column(column_name, dialect=dialect).transform(
|
45
43
|
dialect.normalize_identifier
|
@@ -662,9 +660,7 @@ def grouping_id(*cols: ColumnOrName) -> Column:
|
|
662
660
|
|
663
661
|
@meta()
|
664
662
|
def input_file_name() -> Column:
|
665
|
-
|
666
|
-
|
667
|
-
return Column(expression.Literal.string(_BaseSession()._last_loaded_file or ""))
|
663
|
+
return Column(expression.Literal.string(_get_session()._last_loaded_file or ""))
|
668
664
|
|
669
665
|
|
670
666
|
@meta()
|
@@ -959,12 +955,10 @@ def current_timestamp() -> Column:
|
|
959
955
|
|
960
956
|
@meta()
|
961
957
|
def date_format(col: ColumnOrName, format: str) -> Column:
|
962
|
-
from sqlframe.base.session import _BaseSession
|
963
|
-
|
964
958
|
return Column.invoke_expression_over_column(
|
965
959
|
Column(expression.TimeStrToTime(this=Column.ensure_col(col).column_expression)),
|
966
960
|
expression.TimeToStr,
|
967
|
-
format=
|
961
|
+
format=_get_session().format_time(format),
|
968
962
|
)
|
969
963
|
|
970
964
|
|
@@ -1450,6 +1444,9 @@ def unix_timestamp(
|
|
1450
1444
|
|
1451
1445
|
session = _get_session()
|
1452
1446
|
|
1447
|
+
if session._is_duckdb or session._is_postgres or session._is_snowflake or session._is_bigquery:
|
1448
|
+
timestamp = Column.ensure_col(timestamp).cast("string")
|
1449
|
+
|
1453
1450
|
if session._is_bigquery:
|
1454
1451
|
return unix_timestamp_bgutil(timestamp, format)
|
1455
1452
|
|
@@ -3375,10 +3372,9 @@ def get(col: ColumnOrName, index: t.Union[ColumnOrName, int]) -> Column:
|
|
3375
3372
|
def get_active_spark_context() -> SparkContext:
|
3376
3373
|
"""Raise RuntimeError if SparkContext is not initialized,
|
3377
3374
|
otherwise, returns the active SparkContext."""
|
3378
|
-
from sqlframe.base.session import _BaseSession
|
3379
3375
|
from sqlframe.spark.session import SparkSession
|
3380
3376
|
|
3381
|
-
session
|
3377
|
+
session = _get_session()
|
3382
3378
|
if not isinstance(session, SparkSession):
|
3383
3379
|
raise RuntimeError("This function is only available in SparkSession.")
|
3384
3380
|
return session.spark_session.sparkContext
|
@@ -6341,7 +6337,8 @@ def to_unix_timestamp(
|
|
6341
6337
|
session = _get_session()
|
6342
6338
|
|
6343
6339
|
if session._is_duckdb:
|
6344
|
-
format = format or
|
6340
|
+
format = format or session.default_time_format
|
6341
|
+
timestamp = Column.ensure_col(timestamp).cast("string")
|
6345
6342
|
|
6346
6343
|
if format is not None:
|
6347
6344
|
return Column.invoke_expression_over_column(
|
sqlframe/base/session.py
CHANGED
@@ -179,7 +179,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
179
179
|
return self._table(self, *args, **kwargs)
|
180
180
|
|
181
181
|
def __new__(cls, *args, **kwargs):
|
182
|
-
if _BaseSession._instance is None:
|
182
|
+
if _BaseSession._instance is None or not isinstance(_BaseSession._instance, cls):
|
183
183
|
_BaseSession._instance = super().__new__(cls)
|
184
184
|
return _BaseSession._instance
|
185
185
|
|
@@ -194,6 +194,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
194
194
|
def getActiveSession(self) -> Self:
|
195
195
|
return self
|
196
196
|
|
197
|
+
def stop(self) -> None:
|
198
|
+
if connection := getattr(self, "_connection", None):
|
199
|
+
connection.close()
|
200
|
+
_BaseSession._instance = None
|
201
|
+
|
197
202
|
def range(
|
198
203
|
self,
|
199
204
|
start: int,
|
sqlframe/base/util.py
CHANGED
@@ -6,6 +6,7 @@ import string
|
|
6
6
|
import typing as t
|
7
7
|
import unicodedata
|
8
8
|
|
9
|
+
from more_itertools import partition
|
9
10
|
from sqlglot import expressions as exp
|
10
11
|
from sqlglot import parse_one, to_table
|
11
12
|
from sqlglot.dialects import DuckDB
|
@@ -537,3 +538,17 @@ def is_relativedelta_like(value: t.Any) -> bool:
|
|
537
538
|
and hasattr(value, "weeks")
|
538
539
|
and hasattr(value, "leapdays")
|
539
540
|
)
|
541
|
+
|
542
|
+
|
543
|
+
T = t.TypeVar("T")
|
544
|
+
R1 = t.TypeVar("R1")
|
545
|
+
R2 = t.TypeVar("R2")
|
546
|
+
|
547
|
+
|
548
|
+
def partition_to(
|
549
|
+
pred: t.Callable[[T], bool],
|
550
|
+
iterable: t.Iterable[T],
|
551
|
+
result1: t.Type[R1],
|
552
|
+
result2: t.Type[R2],
|
553
|
+
) -> tuple[R1, R2]:
|
554
|
+
return (lambda x, y: (result1(x), result2(y)))(*partition(pred, iterable)) # type: ignore
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.39.
|
3
|
+
Version: 3.39.4
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -16,8 +16,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
16
|
Requires-Python: >=3.9
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
|
+
Requires-Dist: more-itertools
|
19
20
|
Requires-Dist: prettytable <4
|
20
|
-
Requires-Dist: sqlglot <27.
|
21
|
+
Requires-Dist: sqlglot <27.10,>=24.0.0
|
21
22
|
Requires-Dist: typing-extensions
|
22
23
|
Provides-Extra: bigquery
|
23
24
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -1,25 +1,25 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=RTACos9x6Q52oWKZxADQ1aU73aw2iil2MarQYuwWMsM,714
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
6
6
|
sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
|
7
7
|
sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
|
8
|
-
sqlframe/base/dataframe.py,sha256
|
8
|
+
sqlframe/base/dataframe.py,sha256=-jeoqP5jS8Rk1fp_Og9ie_e2fjo19uX7JVpi9PeU5qI,86943
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
12
|
-
sqlframe/base/functions.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=lMwNtOAj7MbmaFtweo5N8DJp-8ent1fT6lr3J3YcQsA,227753
|
13
13
|
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
16
16
|
sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
|
17
|
-
sqlframe/base/session.py,sha256=
|
17
|
+
sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
|
18
18
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
19
19
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
20
20
|
sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
|
21
21
|
sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
|
22
|
-
sqlframe/base/util.py,sha256=
|
22
|
+
sqlframe/base/util.py,sha256=11rBF_GBFXGBCllSdlWWWo8EiZZATJn4me3u7OUNIFg,19782
|
23
23
|
sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
|
24
24
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.39.
|
134
|
-
sqlframe-3.39.
|
135
|
-
sqlframe-3.39.
|
136
|
-
sqlframe-3.39.
|
137
|
-
sqlframe-3.39.
|
133
|
+
sqlframe-3.39.4.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.39.4.dist-info/METADATA,sha256=wv8nBkcg2ofsdZYo3qC0g8q_-QR_flvR1YPoKQ6uGeU,9070
|
135
|
+
sqlframe-3.39.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.39.4.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.39.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|