sqlframe 3.39.3__py3-none-any.whl → 3.39.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +3 -3
- sqlframe/base/dataframe.py +16 -28
- sqlframe/base/functions.py +5 -12
- sqlframe/base/operations.py +3 -4
- {sqlframe-3.39.3.dist-info → sqlframe-3.39.4.dist-info}/METADATA +2 -2
- {sqlframe-3.39.3.dist-info → sqlframe-3.39.4.dist-info}/RECORD +9 -9
- {sqlframe-3.39.3.dist-info → sqlframe-3.39.4.dist-info}/LICENSE +0 -0
- {sqlframe-3.39.3.dist-info → sqlframe-3.39.4.dist-info}/WHEEL +0 -0
- {sqlframe-3.39.3.dist-info → sqlframe-3.39.4.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '3.39.
|
32
|
-
__version_tuple__ = version_tuple = (3, 39,
|
31
|
+
__version__ = version = '3.39.4'
|
32
|
+
__version_tuple__ = version_tuple = (3, 39, 4)
|
33
33
|
|
34
|
-
__commit_id__ = commit_id = '
|
34
|
+
__commit_id__ = commit_id = 'g7103a1e73'
|
sqlframe/base/dataframe.py
CHANGED
@@ -16,7 +16,6 @@ from dataclasses import dataclass
|
|
16
16
|
from uuid import uuid4
|
17
17
|
|
18
18
|
import sqlglot
|
19
|
-
from more_itertools import partition
|
20
19
|
from prettytable import PrettyTable
|
21
20
|
from sqlglot import Dialect, maybe_parse
|
22
21
|
from sqlglot import expressions as exp
|
@@ -542,23 +541,16 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
542
541
|
expression.set("with", exp.With(expressions=existing_ctes))
|
543
542
|
return expression
|
544
543
|
|
545
|
-
@classmethod
|
546
|
-
def _get_outer_select_expressions(
|
547
|
-
cls, item: exp.Expression
|
548
|
-
) -> t.List[t.Union[exp.Column, exp.Alias]]:
|
549
|
-
outer_select = item.find(exp.Select)
|
550
|
-
if outer_select:
|
551
|
-
return outer_select.expressions
|
552
|
-
return []
|
553
|
-
|
554
544
|
@classmethod
|
555
545
|
def _get_outer_select_columns(cls, item: exp.Expression) -> t.List[Column]:
|
556
546
|
from sqlframe.base.session import _BaseSession
|
557
547
|
|
558
548
|
col = get_func_from_session("col", _BaseSession())
|
559
549
|
|
560
|
-
|
561
|
-
|
550
|
+
outer_select = item.find(exp.Select)
|
551
|
+
if outer_select:
|
552
|
+
return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
|
553
|
+
return []
|
562
554
|
|
563
555
|
def _create_hash_from_expression(self, expression: exp.Expression) -> str:
|
564
556
|
from sqlframe.base.session import _BaseSession
|
@@ -1512,23 +1504,20 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1512
1504
|
"""
|
1513
1505
|
return func(self, *args, **kwargs) # type: ignore
|
1514
1506
|
|
1515
|
-
@operation(Operation.
|
1507
|
+
@operation(Operation.SELECT)
|
1516
1508
|
def withColumn(self, colName: str, col: Column) -> Self:
|
1517
1509
|
return self.withColumns.__wrapped__(self, {colName: col}) # type: ignore
|
1518
1510
|
|
1519
|
-
@operation(Operation.
|
1511
|
+
@operation(Operation.SELECT)
|
1520
1512
|
def withColumnRenamed(self, existing: str, new: str) -> Self:
|
1521
|
-
col_func = get_func_from_session("col", self.session)
|
1522
1513
|
expression = self.expression.copy()
|
1523
1514
|
existing = self.session._normalize_string(existing)
|
1524
|
-
|
1515
|
+
columns = self._get_outer_select_columns(expression)
|
1525
1516
|
results = []
|
1526
1517
|
found_match = False
|
1527
|
-
for
|
1528
|
-
column
|
1529
|
-
|
1530
|
-
if isinstance(column.expression, exp.Alias):
|
1531
|
-
column.expression.set("alias", exp.to_identifier(new))
|
1518
|
+
for column in columns:
|
1519
|
+
if column.alias_or_name == existing:
|
1520
|
+
column = column.alias(new)
|
1532
1521
|
self._update_display_name_mapping([column], [new])
|
1533
1522
|
found_match = True
|
1534
1523
|
results.append(column)
|
@@ -1536,7 +1525,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1536
1525
|
raise ValueError("Tried to rename a column that doesn't exist")
|
1537
1526
|
return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True) # type: ignore
|
1538
1527
|
|
1539
|
-
@operation(Operation.
|
1528
|
+
@operation(Operation.SELECT)
|
1540
1529
|
def withColumnsRenamed(self, colsMap: t.Dict[str, str]) -> Self:
|
1541
1530
|
"""
|
1542
1531
|
Returns a new :class:`DataFrame` by renaming multiple columns. If a non-existing column is
|
@@ -1582,7 +1571,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1582
1571
|
|
1583
1572
|
return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True) # type: ignore
|
1584
1573
|
|
1585
|
-
@operation(Operation.
|
1574
|
+
@operation(Operation.SELECT)
|
1586
1575
|
def withColumns(self, *colsMap: t.Dict[str, Column]) -> Self:
|
1587
1576
|
"""
|
1588
1577
|
Returns a new :class:`DataFrame` by adding multiple columns or replacing the
|
@@ -1620,14 +1609,13 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1620
1609
|
"""
|
1621
1610
|
if len(colsMap) != 1:
|
1622
1611
|
raise ValueError("Only a single map is supported")
|
1623
|
-
col_func = get_func_from_session("col")
|
1624
1612
|
col_map = {
|
1625
1613
|
self._ensure_and_normalize_col(k): (self._ensure_and_normalize_col(v), k)
|
1626
1614
|
for k, v in colsMap[0].items()
|
1627
1615
|
}
|
1628
|
-
|
1629
|
-
existing_col_names = [x.alias_or_name for x in
|
1630
|
-
select_columns =
|
1616
|
+
existing_cols = self._get_outer_select_columns(self.expression)
|
1617
|
+
existing_col_names = [x.alias_or_name for x in existing_cols]
|
1618
|
+
select_columns = existing_cols
|
1631
1619
|
for col, (col_value, display_name) in col_map.items():
|
1632
1620
|
column_name = col.alias_or_name
|
1633
1621
|
existing_col_index = (
|
@@ -1644,7 +1632,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1644
1632
|
)
|
1645
1633
|
return self.select.__wrapped__(self, *select_columns, skip_update_display_name_mapping=True) # type: ignore
|
1646
1634
|
|
1647
|
-
@operation(Operation.
|
1635
|
+
@operation(Operation.SELECT)
|
1648
1636
|
def drop(self, *cols: t.Union[str, Column]) -> Self:
|
1649
1637
|
# Separate string column names from Column objects for different handling
|
1650
1638
|
column_objs, column_names = partition_to(lambda x: isinstance(x, str), cols, list, set)
|
sqlframe/base/functions.py
CHANGED
@@ -37,9 +37,7 @@ def _get_session() -> _BaseSession:
|
|
37
37
|
|
38
38
|
@meta()
|
39
39
|
def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
|
40
|
-
|
41
|
-
|
42
|
-
dialect = _BaseSession().input_dialect
|
40
|
+
dialect = _get_session().input_dialect
|
43
41
|
if isinstance(column_name, str):
|
44
42
|
col_expression = expression.to_column(column_name, dialect=dialect).transform(
|
45
43
|
dialect.normalize_identifier
|
@@ -662,9 +660,7 @@ def grouping_id(*cols: ColumnOrName) -> Column:
|
|
662
660
|
|
663
661
|
@meta()
|
664
662
|
def input_file_name() -> Column:
|
665
|
-
|
666
|
-
|
667
|
-
return Column(expression.Literal.string(_BaseSession()._last_loaded_file or ""))
|
663
|
+
return Column(expression.Literal.string(_get_session()._last_loaded_file or ""))
|
668
664
|
|
669
665
|
|
670
666
|
@meta()
|
@@ -959,12 +955,10 @@ def current_timestamp() -> Column:
|
|
959
955
|
|
960
956
|
@meta()
|
961
957
|
def date_format(col: ColumnOrName, format: str) -> Column:
|
962
|
-
from sqlframe.base.session import _BaseSession
|
963
|
-
|
964
958
|
return Column.invoke_expression_over_column(
|
965
959
|
Column(expression.TimeStrToTime(this=Column.ensure_col(col).column_expression)),
|
966
960
|
expression.TimeToStr,
|
967
|
-
format=
|
961
|
+
format=_get_session().format_time(format),
|
968
962
|
)
|
969
963
|
|
970
964
|
|
@@ -3378,10 +3372,9 @@ def get(col: ColumnOrName, index: t.Union[ColumnOrName, int]) -> Column:
|
|
3378
3372
|
def get_active_spark_context() -> SparkContext:
|
3379
3373
|
"""Raise RuntimeError if SparkContext is not initialized,
|
3380
3374
|
otherwise, returns the active SparkContext."""
|
3381
|
-
from sqlframe.base.session import _BaseSession
|
3382
3375
|
from sqlframe.spark.session import SparkSession
|
3383
3376
|
|
3384
|
-
session
|
3377
|
+
session = _get_session()
|
3385
3378
|
if not isinstance(session, SparkSession):
|
3386
3379
|
raise RuntimeError("This function is only available in SparkSession.")
|
3387
3380
|
return session.spark_session.sparkContext
|
@@ -6344,7 +6337,7 @@ def to_unix_timestamp(
|
|
6344
6337
|
session = _get_session()
|
6345
6338
|
|
6346
6339
|
if session._is_duckdb:
|
6347
|
-
format = format or
|
6340
|
+
format = format or session.default_time_format
|
6348
6341
|
timestamp = Column.ensure_col(timestamp).cast("string")
|
6349
6342
|
|
6350
6343
|
if format is not None:
|
sqlframe/base/operations.py
CHANGED
@@ -27,10 +27,9 @@ class Operation(IntEnum):
|
|
27
27
|
WHERE = 2
|
28
28
|
GROUP_BY = 3
|
29
29
|
HAVING = 4
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
LIMIT = 8
|
30
|
+
SELECT = 5
|
31
|
+
ORDER_BY = 6
|
32
|
+
LIMIT = 7
|
34
33
|
|
35
34
|
|
36
35
|
# We want to decorate a function (self: DF, *args, **kwargs) -> T
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.39.
|
3
|
+
Version: 3.39.4
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: more-itertools
|
20
20
|
Requires-Dist: prettytable <4
|
21
|
-
Requires-Dist: sqlglot <27.
|
21
|
+
Requires-Dist: sqlglot <27.10,>=24.0.0
|
22
22
|
Requires-Dist: typing-extensions
|
23
23
|
Provides-Extra: bigquery
|
24
24
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -1,18 +1,18 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=RTACos9x6Q52oWKZxADQ1aU73aw2iil2MarQYuwWMsM,714
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
6
6
|
sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
|
7
7
|
sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
|
8
|
-
sqlframe/base/dataframe.py,sha256
|
8
|
+
sqlframe/base/dataframe.py,sha256=-jeoqP5jS8Rk1fp_Og9ie_e2fjo19uX7JVpi9PeU5qI,86943
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
12
|
-
sqlframe/base/functions.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=lMwNtOAj7MbmaFtweo5N8DJp-8ent1fT6lr3J3YcQsA,227753
|
13
13
|
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
|
-
sqlframe/base/operations.py,sha256=
|
15
|
+
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
16
16
|
sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
|
17
17
|
sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
|
18
18
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.39.
|
134
|
-
sqlframe-3.39.
|
135
|
-
sqlframe-3.39.
|
136
|
-
sqlframe-3.39.
|
137
|
-
sqlframe-3.39.
|
133
|
+
sqlframe-3.39.4.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.39.4.dist-info/METADATA,sha256=wv8nBkcg2ofsdZYo3qC0g8q_-QR_flvR1YPoKQ6uGeU,9070
|
135
|
+
sqlframe-3.39.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.39.4.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.39.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|