sqlframe 3.39.2__py3-none-any.whl → 3.39.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.39.2'
32
- __version_tuple__ = version_tuple = (3, 39, 2)
31
+ __version__ = version = '3.39.3'
32
+ __version_tuple__ = version_tuple = (3, 39, 3)
33
33
 
34
- __commit_id__ = commit_id = 'g772b3a6bf'
34
+ __commit_id__ = commit_id = 'g9d915cb1e'
@@ -16,6 +16,7 @@ from dataclasses import dataclass
16
16
  from uuid import uuid4
17
17
 
18
18
  import sqlglot
19
+ from more_itertools import partition
19
20
  from prettytable import PrettyTable
20
21
  from sqlglot import Dialect, maybe_parse
21
22
  from sqlglot import expressions as exp
@@ -31,6 +32,7 @@ from sqlframe.base.util import (
31
32
  get_func_from_session,
32
33
  get_tables_from_expression_with_join,
33
34
  normalize_string,
35
+ partition_to,
34
36
  quote_preserving_alias_or_name,
35
37
  sqlglot_to_spark,
36
38
  verify_openai_installed,
@@ -540,16 +542,23 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
540
542
  expression.set("with", exp.With(expressions=existing_ctes))
541
543
  return expression
542
544
 
545
+ @classmethod
546
+ def _get_outer_select_expressions(
547
+ cls, item: exp.Expression
548
+ ) -> t.List[t.Union[exp.Column, exp.Alias]]:
549
+ outer_select = item.find(exp.Select)
550
+ if outer_select:
551
+ return outer_select.expressions
552
+ return []
553
+
543
554
  @classmethod
544
555
  def _get_outer_select_columns(cls, item: exp.Expression) -> t.List[Column]:
545
556
  from sqlframe.base.session import _BaseSession
546
557
 
547
558
  col = get_func_from_session("col", _BaseSession())
548
559
 
549
- outer_select = item.find(exp.Select)
550
- if outer_select:
551
- return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
552
- return []
560
+ outer_expressions = cls._get_outer_select_expressions(item)
561
+ return [col(quote_preserving_alias_or_name(x)) for x in outer_expressions]
553
562
 
554
563
  def _create_hash_from_expression(self, expression: exp.Expression) -> str:
555
564
  from sqlframe.base.session import _BaseSession
@@ -1503,20 +1512,23 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1503
1512
  """
1504
1513
  return func(self, *args, **kwargs) # type: ignore
1505
1514
 
1506
- @operation(Operation.SELECT)
1515
+ @operation(Operation.SELECT_CONSTRAINED)
1507
1516
  def withColumn(self, colName: str, col: Column) -> Self:
1508
1517
  return self.withColumns.__wrapped__(self, {colName: col}) # type: ignore
1509
1518
 
1510
- @operation(Operation.SELECT)
1519
+ @operation(Operation.SELECT_CONSTRAINED)
1511
1520
  def withColumnRenamed(self, existing: str, new: str) -> Self:
1521
+ col_func = get_func_from_session("col", self.session)
1512
1522
  expression = self.expression.copy()
1513
1523
  existing = self.session._normalize_string(existing)
1514
- columns = self._get_outer_select_columns(expression)
1524
+ outer_expressions = self._get_outer_select_expressions(expression)
1515
1525
  results = []
1516
1526
  found_match = False
1517
- for column in columns:
1518
- if column.alias_or_name == existing:
1519
- column = column.alias(new)
1527
+ for expr in outer_expressions:
1528
+ column = col_func(expr.copy())
1529
+ if existing == quote_preserving_alias_or_name(expr):
1530
+ if isinstance(column.expression, exp.Alias):
1531
+ column.expression.set("alias", exp.to_identifier(new))
1520
1532
  self._update_display_name_mapping([column], [new])
1521
1533
  found_match = True
1522
1534
  results.append(column)
@@ -1524,7 +1536,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1524
1536
  raise ValueError("Tried to rename a column that doesn't exist")
1525
1537
  return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True) # type: ignore
1526
1538
 
1527
- @operation(Operation.SELECT)
1539
+ @operation(Operation.SELECT_CONSTRAINED)
1528
1540
  def withColumnsRenamed(self, colsMap: t.Dict[str, str]) -> Self:
1529
1541
  """
1530
1542
  Returns a new :class:`DataFrame` by renaming multiple columns. If a non-existing column is
@@ -1570,7 +1582,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1570
1582
 
1571
1583
  return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True) # type: ignore
1572
1584
 
1573
- @operation(Operation.SELECT)
1585
+ @operation(Operation.SELECT_CONSTRAINED)
1574
1586
  def withColumns(self, *colsMap: t.Dict[str, Column]) -> Self:
1575
1587
  """
1576
1588
  Returns a new :class:`DataFrame` by adding multiple columns or replacing the
@@ -1608,13 +1620,14 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1608
1620
  """
1609
1621
  if len(colsMap) != 1:
1610
1622
  raise ValueError("Only a single map is supported")
1623
+ col_func = get_func_from_session("col")
1611
1624
  col_map = {
1612
1625
  self._ensure_and_normalize_col(k): (self._ensure_and_normalize_col(v), k)
1613
1626
  for k, v in colsMap[0].items()
1614
1627
  }
1615
- existing_cols = self._get_outer_select_columns(self.expression)
1616
- existing_col_names = [x.alias_or_name for x in existing_cols]
1617
- select_columns = existing_cols
1628
+ existing_expr = self._get_outer_select_expressions(self.expression)
1629
+ existing_col_names = [x.alias_or_name for x in existing_expr]
1630
+ select_columns = [col_func(x) for x in existing_expr]
1618
1631
  for col, (col_value, display_name) in col_map.items():
1619
1632
  column_name = col.alias_or_name
1620
1633
  existing_col_index = (
@@ -1631,16 +1644,32 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1631
1644
  )
1632
1645
  return self.select.__wrapped__(self, *select_columns, skip_update_display_name_mapping=True) # type: ignore
1633
1646
 
1634
- @operation(Operation.SELECT)
1647
+ @operation(Operation.SELECT_CONSTRAINED)
1635
1648
  def drop(self, *cols: t.Union[str, Column]) -> Self:
1636
- all_columns = self._get_outer_select_columns(self.expression)
1637
- drop_cols = self._ensure_and_normalize_cols(cols)
1638
- new_columns = [
1639
- col
1640
- for col in all_columns
1641
- if col.alias_or_name not in [drop_column.alias_or_name for drop_column in drop_cols]
1642
- ]
1643
- return self.copy().select(*new_columns, append=False)
1649
+ # Separate string column names from Column objects for different handling
1650
+ column_objs, column_names = partition_to(lambda x: isinstance(x, str), cols, list, set)
1651
+
1652
+ # Normalize only the Column objects (strings will be handled as unqualified)
1653
+ drop_cols = self._ensure_and_normalize_cols(column_objs) if column_objs else []
1654
+
1655
+ # Work directly with the expression's select columns to preserve table qualifiers
1656
+ current_expressions = self.expression.expressions
1657
+ drop_sql = {drop_col.expression.sql() for drop_col in drop_cols}
1658
+
1659
+ # Create a more sophisticated matching function that considers table qualifiers
1660
+ def should_drop_expression(expr: exp.Expression) -> bool:
1661
+ # Check against fully qualified Column objects and
1662
+ # Check against unqualified string column names (drop ALL columns with this name)
1663
+ if expr.sql() in drop_sql or (
1664
+ isinstance(expr, exp.Column) and expr.alias_or_name in column_names
1665
+ ):
1666
+ return True
1667
+ return False
1668
+
1669
+ new_expressions = [expr for expr in current_expressions if not should_drop_expression(expr)]
1670
+ return self.select.__wrapped__( # type: ignore
1671
+ self, *new_expressions, skip_update_display_name_mapping=True
1672
+ )
1644
1673
 
1645
1674
  @operation(Operation.LIMIT)
1646
1675
  def limit(self, num: int) -> Self:
@@ -1450,6 +1450,9 @@ def unix_timestamp(
1450
1450
 
1451
1451
  session = _get_session()
1452
1452
 
1453
+ if session._is_duckdb or session._is_postgres or session._is_snowflake or session._is_bigquery:
1454
+ timestamp = Column.ensure_col(timestamp).cast("string")
1455
+
1453
1456
  if session._is_bigquery:
1454
1457
  return unix_timestamp_bgutil(timestamp, format)
1455
1458
 
@@ -6342,6 +6345,7 @@ def to_unix_timestamp(
6342
6345
 
6343
6346
  if session._is_duckdb:
6344
6347
  format = format or _BaseSession().default_time_format
6348
+ timestamp = Column.ensure_col(timestamp).cast("string")
6345
6349
 
6346
6350
  if format is not None:
6347
6351
  return Column.invoke_expression_over_column(
@@ -27,9 +27,10 @@ class Operation(IntEnum):
27
27
  WHERE = 2
28
28
  GROUP_BY = 3
29
29
  HAVING = 4
30
- SELECT = 5
31
- ORDER_BY = 6
32
- LIMIT = 7
30
+ SELECT_CONSTRAINED = 5
31
+ SELECT = 6
32
+ ORDER_BY = 7
33
+ LIMIT = 8
33
34
 
34
35
 
35
36
  # We want to decorate a function (self: DF, *args, **kwargs) -> T
sqlframe/base/session.py CHANGED
@@ -179,7 +179,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
179
179
  return self._table(self, *args, **kwargs)
180
180
 
181
181
  def __new__(cls, *args, **kwargs):
182
- if _BaseSession._instance is None:
182
+ if _BaseSession._instance is None or not isinstance(_BaseSession._instance, cls):
183
183
  _BaseSession._instance = super().__new__(cls)
184
184
  return _BaseSession._instance
185
185
 
@@ -194,6 +194,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
194
194
  def getActiveSession(self) -> Self:
195
195
  return self
196
196
 
197
+ def stop(self) -> None:
198
+ if connection := getattr(self, "_connection", None):
199
+ connection.close()
200
+ _BaseSession._instance = None
201
+
197
202
  def range(
198
203
  self,
199
204
  start: int,
sqlframe/base/util.py CHANGED
@@ -6,6 +6,7 @@ import string
6
6
  import typing as t
7
7
  import unicodedata
8
8
 
9
+ from more_itertools import partition
9
10
  from sqlglot import expressions as exp
10
11
  from sqlglot import parse_one, to_table
11
12
  from sqlglot.dialects import DuckDB
@@ -537,3 +538,17 @@ def is_relativedelta_like(value: t.Any) -> bool:
537
538
  and hasattr(value, "weeks")
538
539
  and hasattr(value, "leapdays")
539
540
  )
541
+
542
+
543
+ T = t.TypeVar("T")
544
+ R1 = t.TypeVar("R1")
545
+ R2 = t.TypeVar("R2")
546
+
547
+
548
+ def partition_to(
549
+ pred: t.Callable[[T], bool],
550
+ iterable: t.Iterable[T],
551
+ result1: t.Type[R1],
552
+ result2: t.Type[R2],
553
+ ) -> tuple[R1, R2]:
554
+ return (lambda x, y: (result1(x), result2(y)))(*partition(pred, iterable)) # type: ignore
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.39.2
3
+ Version: 3.39.3
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3 :: Only
16
16
  Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
+ Requires-Dist: more-itertools
19
20
  Requires-Dist: prettytable <4
20
21
  Requires-Dist: sqlglot <27.9,>=24.0.0
21
22
  Requires-Dist: typing-extensions
@@ -1,25 +1,25 @@
1
1
  sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=2ZMyDPGKBrqT_KjBcZ7ni5_lsj0fVr5EDt184buBf6w,714
2
+ sqlframe/_version.py,sha256=Vixv4hfZnHHXCXSmZD4wlHJUBkhCMzDLIyo5HqkJdes,714
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
6
6
  sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
7
7
  sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
8
- sqlframe/base/dataframe.py,sha256=0diYONDlet8iZt49LC3vcmfXHAAZ2MovPL2pTXYHj2U,85974
8
+ sqlframe/base/dataframe.py,sha256=HHjDaeap4_w4HRRj87lhQjFTczxLKhFD8b-9vhK2KsY,87592
9
9
  sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
11
  sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
12
- sqlframe/base/functions.py,sha256=9hW5aYke5EFU4C7Epx-TlyG2ZxjYnFGskv4LwHiQ2dw,227752
12
+ sqlframe/base/functions.py,sha256=RVNoRzM19BUwypdc0izYrrQe2Fe4_e9SbtpDkdD2bec,227981
13
13
  sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
14
14
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
15
- sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
15
+ sqlframe/base/operations.py,sha256=8dkMNqjG3xP1w_6euAj8FpwweD7t590HYjoeoCr5LqI,4465
16
16
  sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
17
- sqlframe/base/session.py,sha256=8oaEgGbyctKKEaI0GW6k7Praku7nwx3YRYgAW3mZNk0,27481
17
+ sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
18
18
  sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
19
19
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
20
20
  sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
21
21
  sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
22
- sqlframe/base/util.py,sha256=D4HAhtu4DMz5mXyxlUHRP_GrsjLJACpBYlLriyGoT0g,19435
22
+ sqlframe/base/util.py,sha256=11rBF_GBFXGBCllSdlWWWo8EiZZATJn4me3u7OUNIFg,19782
23
23
  sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
24
24
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.39.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.39.2.dist-info/METADATA,sha256=M0k0V_XPUzeL9-tCwZWKMMv9DVhVstFonKVOWRc7wRk,9039
135
- sqlframe-3.39.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.39.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.39.2.dist-info/RECORD,,
133
+ sqlframe-3.39.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.39.3.dist-info/METADATA,sha256=eyKm8nGawKAujUOiCBn4PEFpSh_UzsnEV7LpKQVecRM,9069
135
+ sqlframe-3.39.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.39.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.39.3.dist-info/RECORD,,