sqlframe 3.39.2__py3-none-any.whl → 3.39.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +3 -3
- sqlframe/base/dataframe.py +53 -24
- sqlframe/base/functions.py +4 -0
- sqlframe/base/operations.py +4 -3
- sqlframe/base/session.py +6 -1
- sqlframe/base/util.py +15 -0
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.3.dist-info}/METADATA +2 -1
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.3.dist-info}/RECORD +11 -11
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.3.dist-info}/LICENSE +0 -0
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.3.dist-info}/WHEEL +0 -0
- {sqlframe-3.39.2.dist-info → sqlframe-3.39.3.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '3.39.
|
32
|
-
__version_tuple__ = version_tuple = (3, 39,
|
31
|
+
__version__ = version = '3.39.3'
|
32
|
+
__version_tuple__ = version_tuple = (3, 39, 3)
|
33
33
|
|
34
|
-
__commit_id__ = commit_id = '
|
34
|
+
__commit_id__ = commit_id = 'g9d915cb1e'
|
sqlframe/base/dataframe.py
CHANGED
@@ -16,6 +16,7 @@ from dataclasses import dataclass
|
|
16
16
|
from uuid import uuid4
|
17
17
|
|
18
18
|
import sqlglot
|
19
|
+
from more_itertools import partition
|
19
20
|
from prettytable import PrettyTable
|
20
21
|
from sqlglot import Dialect, maybe_parse
|
21
22
|
from sqlglot import expressions as exp
|
@@ -31,6 +32,7 @@ from sqlframe.base.util import (
|
|
31
32
|
get_func_from_session,
|
32
33
|
get_tables_from_expression_with_join,
|
33
34
|
normalize_string,
|
35
|
+
partition_to,
|
34
36
|
quote_preserving_alias_or_name,
|
35
37
|
sqlglot_to_spark,
|
36
38
|
verify_openai_installed,
|
@@ -540,16 +542,23 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
540
542
|
expression.set("with", exp.With(expressions=existing_ctes))
|
541
543
|
return expression
|
542
544
|
|
545
|
+
@classmethod
|
546
|
+
def _get_outer_select_expressions(
|
547
|
+
cls, item: exp.Expression
|
548
|
+
) -> t.List[t.Union[exp.Column, exp.Alias]]:
|
549
|
+
outer_select = item.find(exp.Select)
|
550
|
+
if outer_select:
|
551
|
+
return outer_select.expressions
|
552
|
+
return []
|
553
|
+
|
543
554
|
@classmethod
|
544
555
|
def _get_outer_select_columns(cls, item: exp.Expression) -> t.List[Column]:
|
545
556
|
from sqlframe.base.session import _BaseSession
|
546
557
|
|
547
558
|
col = get_func_from_session("col", _BaseSession())
|
548
559
|
|
549
|
-
|
550
|
-
|
551
|
-
return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
|
552
|
-
return []
|
560
|
+
outer_expressions = cls._get_outer_select_expressions(item)
|
561
|
+
return [col(quote_preserving_alias_or_name(x)) for x in outer_expressions]
|
553
562
|
|
554
563
|
def _create_hash_from_expression(self, expression: exp.Expression) -> str:
|
555
564
|
from sqlframe.base.session import _BaseSession
|
@@ -1503,20 +1512,23 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1503
1512
|
"""
|
1504
1513
|
return func(self, *args, **kwargs) # type: ignore
|
1505
1514
|
|
1506
|
-
@operation(Operation.
|
1515
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1507
1516
|
def withColumn(self, colName: str, col: Column) -> Self:
|
1508
1517
|
return self.withColumns.__wrapped__(self, {colName: col}) # type: ignore
|
1509
1518
|
|
1510
|
-
@operation(Operation.
|
1519
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1511
1520
|
def withColumnRenamed(self, existing: str, new: str) -> Self:
|
1521
|
+
col_func = get_func_from_session("col", self.session)
|
1512
1522
|
expression = self.expression.copy()
|
1513
1523
|
existing = self.session._normalize_string(existing)
|
1514
|
-
|
1524
|
+
outer_expressions = self._get_outer_select_expressions(expression)
|
1515
1525
|
results = []
|
1516
1526
|
found_match = False
|
1517
|
-
for
|
1518
|
-
|
1519
|
-
|
1527
|
+
for expr in outer_expressions:
|
1528
|
+
column = col_func(expr.copy())
|
1529
|
+
if existing == quote_preserving_alias_or_name(expr):
|
1530
|
+
if isinstance(column.expression, exp.Alias):
|
1531
|
+
column.expression.set("alias", exp.to_identifier(new))
|
1520
1532
|
self._update_display_name_mapping([column], [new])
|
1521
1533
|
found_match = True
|
1522
1534
|
results.append(column)
|
@@ -1524,7 +1536,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1524
1536
|
raise ValueError("Tried to rename a column that doesn't exist")
|
1525
1537
|
return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True) # type: ignore
|
1526
1538
|
|
1527
|
-
@operation(Operation.
|
1539
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1528
1540
|
def withColumnsRenamed(self, colsMap: t.Dict[str, str]) -> Self:
|
1529
1541
|
"""
|
1530
1542
|
Returns a new :class:`DataFrame` by renaming multiple columns. If a non-existing column is
|
@@ -1570,7 +1582,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1570
1582
|
|
1571
1583
|
return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True) # type: ignore
|
1572
1584
|
|
1573
|
-
@operation(Operation.
|
1585
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1574
1586
|
def withColumns(self, *colsMap: t.Dict[str, Column]) -> Self:
|
1575
1587
|
"""
|
1576
1588
|
Returns a new :class:`DataFrame` by adding multiple columns or replacing the
|
@@ -1608,13 +1620,14 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1608
1620
|
"""
|
1609
1621
|
if len(colsMap) != 1:
|
1610
1622
|
raise ValueError("Only a single map is supported")
|
1623
|
+
col_func = get_func_from_session("col")
|
1611
1624
|
col_map = {
|
1612
1625
|
self._ensure_and_normalize_col(k): (self._ensure_and_normalize_col(v), k)
|
1613
1626
|
for k, v in colsMap[0].items()
|
1614
1627
|
}
|
1615
|
-
|
1616
|
-
existing_col_names = [x.alias_or_name for x in
|
1617
|
-
select_columns =
|
1628
|
+
existing_expr = self._get_outer_select_expressions(self.expression)
|
1629
|
+
existing_col_names = [x.alias_or_name for x in existing_expr]
|
1630
|
+
select_columns = [col_func(x) for x in existing_expr]
|
1618
1631
|
for col, (col_value, display_name) in col_map.items():
|
1619
1632
|
column_name = col.alias_or_name
|
1620
1633
|
existing_col_index = (
|
@@ -1631,16 +1644,32 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1631
1644
|
)
|
1632
1645
|
return self.select.__wrapped__(self, *select_columns, skip_update_display_name_mapping=True) # type: ignore
|
1633
1646
|
|
1634
|
-
@operation(Operation.
|
1647
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1635
1648
|
def drop(self, *cols: t.Union[str, Column]) -> Self:
|
1636
|
-
|
1637
|
-
|
1638
|
-
|
1639
|
-
|
1640
|
-
|
1641
|
-
|
1642
|
-
|
1643
|
-
|
1649
|
+
# Separate string column names from Column objects for different handling
|
1650
|
+
column_objs, column_names = partition_to(lambda x: isinstance(x, str), cols, list, set)
|
1651
|
+
|
1652
|
+
# Normalize only the Column objects (strings will be handled as unqualified)
|
1653
|
+
drop_cols = self._ensure_and_normalize_cols(column_objs) if column_objs else []
|
1654
|
+
|
1655
|
+
# Work directly with the expression's select columns to preserve table qualifiers
|
1656
|
+
current_expressions = self.expression.expressions
|
1657
|
+
drop_sql = {drop_col.expression.sql() for drop_col in drop_cols}
|
1658
|
+
|
1659
|
+
# Create a more sophisticated matching function that considers table qualifiers
|
1660
|
+
def should_drop_expression(expr: exp.Expression) -> bool:
|
1661
|
+
# Check against fully qualified Column objects and
|
1662
|
+
# Check against unqualified string column names (drop ALL columns with this name)
|
1663
|
+
if expr.sql() in drop_sql or (
|
1664
|
+
isinstance(expr, exp.Column) and expr.alias_or_name in column_names
|
1665
|
+
):
|
1666
|
+
return True
|
1667
|
+
return False
|
1668
|
+
|
1669
|
+
new_expressions = [expr for expr in current_expressions if not should_drop_expression(expr)]
|
1670
|
+
return self.select.__wrapped__( # type: ignore
|
1671
|
+
self, *new_expressions, skip_update_display_name_mapping=True
|
1672
|
+
)
|
1644
1673
|
|
1645
1674
|
@operation(Operation.LIMIT)
|
1646
1675
|
def limit(self, num: int) -> Self:
|
sqlframe/base/functions.py
CHANGED
@@ -1450,6 +1450,9 @@ def unix_timestamp(
|
|
1450
1450
|
|
1451
1451
|
session = _get_session()
|
1452
1452
|
|
1453
|
+
if session._is_duckdb or session._is_postgres or session._is_snowflake or session._is_bigquery:
|
1454
|
+
timestamp = Column.ensure_col(timestamp).cast("string")
|
1455
|
+
|
1453
1456
|
if session._is_bigquery:
|
1454
1457
|
return unix_timestamp_bgutil(timestamp, format)
|
1455
1458
|
|
@@ -6342,6 +6345,7 @@ def to_unix_timestamp(
|
|
6342
6345
|
|
6343
6346
|
if session._is_duckdb:
|
6344
6347
|
format = format or _BaseSession().default_time_format
|
6348
|
+
timestamp = Column.ensure_col(timestamp).cast("string")
|
6345
6349
|
|
6346
6350
|
if format is not None:
|
6347
6351
|
return Column.invoke_expression_over_column(
|
sqlframe/base/operations.py
CHANGED
@@ -27,9 +27,10 @@ class Operation(IntEnum):
|
|
27
27
|
WHERE = 2
|
28
28
|
GROUP_BY = 3
|
29
29
|
HAVING = 4
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
SELECT_CONSTRAINED = 5
|
31
|
+
SELECT = 6
|
32
|
+
ORDER_BY = 7
|
33
|
+
LIMIT = 8
|
33
34
|
|
34
35
|
|
35
36
|
# We want to decorate a function (self: DF, *args, **kwargs) -> T
|
sqlframe/base/session.py
CHANGED
@@ -179,7 +179,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
179
179
|
return self._table(self, *args, **kwargs)
|
180
180
|
|
181
181
|
def __new__(cls, *args, **kwargs):
|
182
|
-
if _BaseSession._instance is None:
|
182
|
+
if _BaseSession._instance is None or not isinstance(_BaseSession._instance, cls):
|
183
183
|
_BaseSession._instance = super().__new__(cls)
|
184
184
|
return _BaseSession._instance
|
185
185
|
|
@@ -194,6 +194,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
194
194
|
def getActiveSession(self) -> Self:
|
195
195
|
return self
|
196
196
|
|
197
|
+
def stop(self) -> None:
|
198
|
+
if connection := getattr(self, "_connection", None):
|
199
|
+
connection.close()
|
200
|
+
_BaseSession._instance = None
|
201
|
+
|
197
202
|
def range(
|
198
203
|
self,
|
199
204
|
start: int,
|
sqlframe/base/util.py
CHANGED
@@ -6,6 +6,7 @@ import string
|
|
6
6
|
import typing as t
|
7
7
|
import unicodedata
|
8
8
|
|
9
|
+
from more_itertools import partition
|
9
10
|
from sqlglot import expressions as exp
|
10
11
|
from sqlglot import parse_one, to_table
|
11
12
|
from sqlglot.dialects import DuckDB
|
@@ -537,3 +538,17 @@ def is_relativedelta_like(value: t.Any) -> bool:
|
|
537
538
|
and hasattr(value, "weeks")
|
538
539
|
and hasattr(value, "leapdays")
|
539
540
|
)
|
541
|
+
|
542
|
+
|
543
|
+
T = t.TypeVar("T")
|
544
|
+
R1 = t.TypeVar("R1")
|
545
|
+
R2 = t.TypeVar("R2")
|
546
|
+
|
547
|
+
|
548
|
+
def partition_to(
|
549
|
+
pred: t.Callable[[T], bool],
|
550
|
+
iterable: t.Iterable[T],
|
551
|
+
result1: t.Type[R1],
|
552
|
+
result2: t.Type[R2],
|
553
|
+
) -> tuple[R1, R2]:
|
554
|
+
return (lambda x, y: (result1(x), result2(y)))(*partition(pred, iterable)) # type: ignore
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.39.
|
3
|
+
Version: 3.39.3
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
16
|
Requires-Python: >=3.9
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
|
+
Requires-Dist: more-itertools
|
19
20
|
Requires-Dist: prettytable <4
|
20
21
|
Requires-Dist: sqlglot <27.9,>=24.0.0
|
21
22
|
Requires-Dist: typing-extensions
|
@@ -1,25 +1,25 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=Vixv4hfZnHHXCXSmZD4wlHJUBkhCMzDLIyo5HqkJdes,714
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
6
6
|
sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
|
7
7
|
sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
|
8
|
-
sqlframe/base/dataframe.py,sha256=
|
8
|
+
sqlframe/base/dataframe.py,sha256=HHjDaeap4_w4HRRj87lhQjFTczxLKhFD8b-9vhK2KsY,87592
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
12
|
-
sqlframe/base/functions.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=RVNoRzM19BUwypdc0izYrrQe2Fe4_e9SbtpDkdD2bec,227981
|
13
13
|
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
|
-
sqlframe/base/operations.py,sha256=
|
15
|
+
sqlframe/base/operations.py,sha256=8dkMNqjG3xP1w_6euAj8FpwweD7t590HYjoeoCr5LqI,4465
|
16
16
|
sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
|
17
|
-
sqlframe/base/session.py,sha256=
|
17
|
+
sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
|
18
18
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
19
19
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
20
20
|
sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
|
21
21
|
sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
|
22
|
-
sqlframe/base/util.py,sha256=
|
22
|
+
sqlframe/base/util.py,sha256=11rBF_GBFXGBCllSdlWWWo8EiZZATJn4me3u7OUNIFg,19782
|
23
23
|
sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
|
24
24
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.39.
|
134
|
-
sqlframe-3.39.
|
135
|
-
sqlframe-3.39.
|
136
|
-
sqlframe-3.39.
|
137
|
-
sqlframe-3.39.
|
133
|
+
sqlframe-3.39.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.39.3.dist-info/METADATA,sha256=eyKm8nGawKAujUOiCBn4PEFpSh_UzsnEV7LpKQVecRM,9069
|
135
|
+
sqlframe-3.39.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.39.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.39.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|