sqlframe 3.39.1__py3-none-any.whl → 3.39.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +3 -3
- sqlframe/base/dataframe.py +53 -24
- sqlframe/base/functions.py +12 -4
- sqlframe/base/operations.py +4 -3
- sqlframe/base/session.py +6 -1
- sqlframe/base/util.py +15 -0
- {sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/METADATA +3 -2
- {sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/RECORD +11 -11
- {sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/LICENSE +0 -0
- {sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/WHEEL +0 -0
- {sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '3.39.
|
32
|
-
__version_tuple__ = version_tuple = (3, 39,
|
31
|
+
__version__ = version = '3.39.3'
|
32
|
+
__version_tuple__ = version_tuple = (3, 39, 3)
|
33
33
|
|
34
|
-
__commit_id__ = commit_id = '
|
34
|
+
__commit_id__ = commit_id = 'g9d915cb1e'
|
sqlframe/base/dataframe.py
CHANGED
@@ -16,6 +16,7 @@ from dataclasses import dataclass
|
|
16
16
|
from uuid import uuid4
|
17
17
|
|
18
18
|
import sqlglot
|
19
|
+
from more_itertools import partition
|
19
20
|
from prettytable import PrettyTable
|
20
21
|
from sqlglot import Dialect, maybe_parse
|
21
22
|
from sqlglot import expressions as exp
|
@@ -31,6 +32,7 @@ from sqlframe.base.util import (
|
|
31
32
|
get_func_from_session,
|
32
33
|
get_tables_from_expression_with_join,
|
33
34
|
normalize_string,
|
35
|
+
partition_to,
|
34
36
|
quote_preserving_alias_or_name,
|
35
37
|
sqlglot_to_spark,
|
36
38
|
verify_openai_installed,
|
@@ -540,16 +542,23 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
540
542
|
expression.set("with", exp.With(expressions=existing_ctes))
|
541
543
|
return expression
|
542
544
|
|
545
|
+
@classmethod
|
546
|
+
def _get_outer_select_expressions(
|
547
|
+
cls, item: exp.Expression
|
548
|
+
) -> t.List[t.Union[exp.Column, exp.Alias]]:
|
549
|
+
outer_select = item.find(exp.Select)
|
550
|
+
if outer_select:
|
551
|
+
return outer_select.expressions
|
552
|
+
return []
|
553
|
+
|
543
554
|
@classmethod
|
544
555
|
def _get_outer_select_columns(cls, item: exp.Expression) -> t.List[Column]:
|
545
556
|
from sqlframe.base.session import _BaseSession
|
546
557
|
|
547
558
|
col = get_func_from_session("col", _BaseSession())
|
548
559
|
|
549
|
-
|
550
|
-
|
551
|
-
return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
|
552
|
-
return []
|
560
|
+
outer_expressions = cls._get_outer_select_expressions(item)
|
561
|
+
return [col(quote_preserving_alias_or_name(x)) for x in outer_expressions]
|
553
562
|
|
554
563
|
def _create_hash_from_expression(self, expression: exp.Expression) -> str:
|
555
564
|
from sqlframe.base.session import _BaseSession
|
@@ -1503,20 +1512,23 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1503
1512
|
"""
|
1504
1513
|
return func(self, *args, **kwargs) # type: ignore
|
1505
1514
|
|
1506
|
-
@operation(Operation.
|
1515
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1507
1516
|
def withColumn(self, colName: str, col: Column) -> Self:
|
1508
1517
|
return self.withColumns.__wrapped__(self, {colName: col}) # type: ignore
|
1509
1518
|
|
1510
|
-
@operation(Operation.
|
1519
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1511
1520
|
def withColumnRenamed(self, existing: str, new: str) -> Self:
|
1521
|
+
col_func = get_func_from_session("col", self.session)
|
1512
1522
|
expression = self.expression.copy()
|
1513
1523
|
existing = self.session._normalize_string(existing)
|
1514
|
-
|
1524
|
+
outer_expressions = self._get_outer_select_expressions(expression)
|
1515
1525
|
results = []
|
1516
1526
|
found_match = False
|
1517
|
-
for
|
1518
|
-
|
1519
|
-
|
1527
|
+
for expr in outer_expressions:
|
1528
|
+
column = col_func(expr.copy())
|
1529
|
+
if existing == quote_preserving_alias_or_name(expr):
|
1530
|
+
if isinstance(column.expression, exp.Alias):
|
1531
|
+
column.expression.set("alias", exp.to_identifier(new))
|
1520
1532
|
self._update_display_name_mapping([column], [new])
|
1521
1533
|
found_match = True
|
1522
1534
|
results.append(column)
|
@@ -1524,7 +1536,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1524
1536
|
raise ValueError("Tried to rename a column that doesn't exist")
|
1525
1537
|
return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True) # type: ignore
|
1526
1538
|
|
1527
|
-
@operation(Operation.
|
1539
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1528
1540
|
def withColumnsRenamed(self, colsMap: t.Dict[str, str]) -> Self:
|
1529
1541
|
"""
|
1530
1542
|
Returns a new :class:`DataFrame` by renaming multiple columns. If a non-existing column is
|
@@ -1570,7 +1582,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1570
1582
|
|
1571
1583
|
return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True) # type: ignore
|
1572
1584
|
|
1573
|
-
@operation(Operation.
|
1585
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1574
1586
|
def withColumns(self, *colsMap: t.Dict[str, Column]) -> Self:
|
1575
1587
|
"""
|
1576
1588
|
Returns a new :class:`DataFrame` by adding multiple columns or replacing the
|
@@ -1608,13 +1620,14 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1608
1620
|
"""
|
1609
1621
|
if len(colsMap) != 1:
|
1610
1622
|
raise ValueError("Only a single map is supported")
|
1623
|
+
col_func = get_func_from_session("col")
|
1611
1624
|
col_map = {
|
1612
1625
|
self._ensure_and_normalize_col(k): (self._ensure_and_normalize_col(v), k)
|
1613
1626
|
for k, v in colsMap[0].items()
|
1614
1627
|
}
|
1615
|
-
|
1616
|
-
existing_col_names = [x.alias_or_name for x in
|
1617
|
-
select_columns =
|
1628
|
+
existing_expr = self._get_outer_select_expressions(self.expression)
|
1629
|
+
existing_col_names = [x.alias_or_name for x in existing_expr]
|
1630
|
+
select_columns = [col_func(x) for x in existing_expr]
|
1618
1631
|
for col, (col_value, display_name) in col_map.items():
|
1619
1632
|
column_name = col.alias_or_name
|
1620
1633
|
existing_col_index = (
|
@@ -1631,16 +1644,32 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1631
1644
|
)
|
1632
1645
|
return self.select.__wrapped__(self, *select_columns, skip_update_display_name_mapping=True) # type: ignore
|
1633
1646
|
|
1634
|
-
@operation(Operation.
|
1647
|
+
@operation(Operation.SELECT_CONSTRAINED)
|
1635
1648
|
def drop(self, *cols: t.Union[str, Column]) -> Self:
|
1636
|
-
|
1637
|
-
|
1638
|
-
|
1639
|
-
|
1640
|
-
|
1641
|
-
|
1642
|
-
|
1643
|
-
|
1649
|
+
# Separate string column names from Column objects for different handling
|
1650
|
+
column_objs, column_names = partition_to(lambda x: isinstance(x, str), cols, list, set)
|
1651
|
+
|
1652
|
+
# Normalize only the Column objects (strings will be handled as unqualified)
|
1653
|
+
drop_cols = self._ensure_and_normalize_cols(column_objs) if column_objs else []
|
1654
|
+
|
1655
|
+
# Work directly with the expression's select columns to preserve table qualifiers
|
1656
|
+
current_expressions = self.expression.expressions
|
1657
|
+
drop_sql = {drop_col.expression.sql() for drop_col in drop_cols}
|
1658
|
+
|
1659
|
+
# Create a more sophisticated matching function that considers table qualifiers
|
1660
|
+
def should_drop_expression(expr: exp.Expression) -> bool:
|
1661
|
+
# Check against fully qualified Column objects and
|
1662
|
+
# Check against unqualified string column names (drop ALL columns with this name)
|
1663
|
+
if expr.sql() in drop_sql or (
|
1664
|
+
isinstance(expr, exp.Column) and expr.alias_or_name in column_names
|
1665
|
+
):
|
1666
|
+
return True
|
1667
|
+
return False
|
1668
|
+
|
1669
|
+
new_expressions = [expr for expr in current_expressions if not should_drop_expression(expr)]
|
1670
|
+
return self.select.__wrapped__( # type: ignore
|
1671
|
+
self, *new_expressions, skip_update_display_name_mapping=True
|
1672
|
+
)
|
1644
1673
|
|
1645
1674
|
@operation(Operation.LIMIT)
|
1646
1675
|
def limit(self, num: int) -> Self:
|
sqlframe/base/functions.py
CHANGED
@@ -1450,6 +1450,9 @@ def unix_timestamp(
|
|
1450
1450
|
|
1451
1451
|
session = _get_session()
|
1452
1452
|
|
1453
|
+
if session._is_duckdb or session._is_postgres or session._is_snowflake or session._is_bigquery:
|
1454
|
+
timestamp = Column.ensure_col(timestamp).cast("string")
|
1455
|
+
|
1453
1456
|
if session._is_bigquery:
|
1454
1457
|
return unix_timestamp_bgutil(timestamp, format)
|
1455
1458
|
|
@@ -1984,7 +1987,7 @@ def initcap(col: ColumnOrName) -> Column:
|
|
1984
1987
|
|
1985
1988
|
@meta()
|
1986
1989
|
def soundex(col: ColumnOrName) -> Column:
|
1987
|
-
return Column.
|
1990
|
+
return Column.invoke_expression_over_column(col, expression.Soundex)
|
1988
1991
|
|
1989
1992
|
|
1990
1993
|
@meta(unsupported_engines=["postgres", "snowflake"])
|
@@ -2053,7 +2056,11 @@ def bit_length(col: ColumnOrName) -> Column:
|
|
2053
2056
|
|
2054
2057
|
@meta()
|
2055
2058
|
def translate(srcCol: ColumnOrName, matching: str, replace: str) -> Column:
|
2056
|
-
return Column.
|
2059
|
+
return Column.invoke_expression_over_column(
|
2060
|
+
srcCol,
|
2061
|
+
expression.Translate,
|
2062
|
+
**{"from": lit(matching).column_expression, "to": lit(replace).column_expression},
|
2063
|
+
)
|
2057
2064
|
|
2058
2065
|
|
2059
2066
|
@meta()
|
@@ -3380,7 +3387,7 @@ def get_active_spark_context() -> SparkContext:
|
|
3380
3387
|
return session.spark_session.sparkContext
|
3381
3388
|
|
3382
3389
|
|
3383
|
-
@meta(
|
3390
|
+
@meta()
|
3384
3391
|
def grouping(col: ColumnOrName) -> Column:
|
3385
3392
|
"""
|
3386
3393
|
Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated
|
@@ -3413,7 +3420,7 @@ def grouping(col: ColumnOrName) -> Column:
|
|
3413
3420
|
| Bob| 0| 5|
|
3414
3421
|
+-----+--------------+--------+
|
3415
3422
|
"""
|
3416
|
-
return Column.
|
3423
|
+
return Column(expression.Grouping(expressions=[Column.ensure_col(col).column_expression]))
|
3417
3424
|
|
3418
3425
|
|
3419
3426
|
@meta(unsupported_engines="*")
|
@@ -6338,6 +6345,7 @@ def to_unix_timestamp(
|
|
6338
6345
|
|
6339
6346
|
if session._is_duckdb:
|
6340
6347
|
format = format or _BaseSession().default_time_format
|
6348
|
+
timestamp = Column.ensure_col(timestamp).cast("string")
|
6341
6349
|
|
6342
6350
|
if format is not None:
|
6343
6351
|
return Column.invoke_expression_over_column(
|
sqlframe/base/operations.py
CHANGED
@@ -27,9 +27,10 @@ class Operation(IntEnum):
|
|
27
27
|
WHERE = 2
|
28
28
|
GROUP_BY = 3
|
29
29
|
HAVING = 4
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
SELECT_CONSTRAINED = 5
|
31
|
+
SELECT = 6
|
32
|
+
ORDER_BY = 7
|
33
|
+
LIMIT = 8
|
33
34
|
|
34
35
|
|
35
36
|
# We want to decorate a function (self: DF, *args, **kwargs) -> T
|
sqlframe/base/session.py
CHANGED
@@ -179,7 +179,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
179
179
|
return self._table(self, *args, **kwargs)
|
180
180
|
|
181
181
|
def __new__(cls, *args, **kwargs):
|
182
|
-
if _BaseSession._instance is None:
|
182
|
+
if _BaseSession._instance is None or not isinstance(_BaseSession._instance, cls):
|
183
183
|
_BaseSession._instance = super().__new__(cls)
|
184
184
|
return _BaseSession._instance
|
185
185
|
|
@@ -194,6 +194,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
194
194
|
def getActiveSession(self) -> Self:
|
195
195
|
return self
|
196
196
|
|
197
|
+
def stop(self) -> None:
|
198
|
+
if connection := getattr(self, "_connection", None):
|
199
|
+
connection.close()
|
200
|
+
_BaseSession._instance = None
|
201
|
+
|
197
202
|
def range(
|
198
203
|
self,
|
199
204
|
start: int,
|
sqlframe/base/util.py
CHANGED
@@ -6,6 +6,7 @@ import string
|
|
6
6
|
import typing as t
|
7
7
|
import unicodedata
|
8
8
|
|
9
|
+
from more_itertools import partition
|
9
10
|
from sqlglot import expressions as exp
|
10
11
|
from sqlglot import parse_one, to_table
|
11
12
|
from sqlglot.dialects import DuckDB
|
@@ -537,3 +538,17 @@ def is_relativedelta_like(value: t.Any) -> bool:
|
|
537
538
|
and hasattr(value, "weeks")
|
538
539
|
and hasattr(value, "leapdays")
|
539
540
|
)
|
541
|
+
|
542
|
+
|
543
|
+
T = t.TypeVar("T")
|
544
|
+
R1 = t.TypeVar("R1")
|
545
|
+
R2 = t.TypeVar("R2")
|
546
|
+
|
547
|
+
|
548
|
+
def partition_to(
|
549
|
+
pred: t.Callable[[T], bool],
|
550
|
+
iterable: t.Iterable[T],
|
551
|
+
result1: t.Type[R1],
|
552
|
+
result2: t.Type[R2],
|
553
|
+
) -> tuple[R1, R2]:
|
554
|
+
return (lambda x, y: (result1(x), result2(y)))(*partition(pred, iterable)) # type: ignore
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.39.
|
3
|
+
Version: 3.39.3
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -16,8 +16,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
16
|
Requires-Python: >=3.9
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
|
+
Requires-Dist: more-itertools
|
19
20
|
Requires-Dist: prettytable <4
|
20
|
-
Requires-Dist: sqlglot <27.
|
21
|
+
Requires-Dist: sqlglot <27.9,>=24.0.0
|
21
22
|
Requires-Dist: typing-extensions
|
22
23
|
Provides-Extra: bigquery
|
23
24
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -1,25 +1,25 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=Vixv4hfZnHHXCXSmZD4wlHJUBkhCMzDLIyo5HqkJdes,714
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
6
6
|
sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
|
7
7
|
sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
|
8
|
-
sqlframe/base/dataframe.py,sha256=
|
8
|
+
sqlframe/base/dataframe.py,sha256=HHjDaeap4_w4HRRj87lhQjFTczxLKhFD8b-9vhK2KsY,87592
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
12
|
-
sqlframe/base/functions.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=RVNoRzM19BUwypdc0izYrrQe2Fe4_e9SbtpDkdD2bec,227981
|
13
13
|
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
|
-
sqlframe/base/operations.py,sha256=
|
15
|
+
sqlframe/base/operations.py,sha256=8dkMNqjG3xP1w_6euAj8FpwweD7t590HYjoeoCr5LqI,4465
|
16
16
|
sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
|
17
|
-
sqlframe/base/session.py,sha256=
|
17
|
+
sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
|
18
18
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
19
19
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
20
20
|
sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
|
21
21
|
sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
|
22
|
-
sqlframe/base/util.py,sha256=
|
22
|
+
sqlframe/base/util.py,sha256=11rBF_GBFXGBCllSdlWWWo8EiZZATJn4me3u7OUNIFg,19782
|
23
23
|
sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
|
24
24
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.39.
|
134
|
-
sqlframe-3.39.
|
135
|
-
sqlframe-3.39.
|
136
|
-
sqlframe-3.39.
|
137
|
-
sqlframe-3.39.
|
133
|
+
sqlframe-3.39.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.39.3.dist-info/METADATA,sha256=eyKm8nGawKAujUOiCBn4PEFpSh_UzsnEV7LpKQVecRM,9069
|
135
|
+
sqlframe-3.39.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.39.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.39.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|