sqlframe 3.39.2__py3-none-any.whl → 3.39.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.39.2'
32
- __version_tuple__ = version_tuple = (3, 39, 2)
31
+ __version__ = version = '3.39.4'
32
+ __version_tuple__ = version_tuple = (3, 39, 4)
33
33
 
34
- __commit_id__ = commit_id = 'g772b3a6bf'
34
+ __commit_id__ = commit_id = 'g7103a1e73'
@@ -31,6 +31,7 @@ from sqlframe.base.util import (
31
31
  get_func_from_session,
32
32
  get_tables_from_expression_with_join,
33
33
  normalize_string,
34
+ partition_to,
34
35
  quote_preserving_alias_or_name,
35
36
  sqlglot_to_spark,
36
37
  verify_openai_installed,
@@ -1633,14 +1634,30 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1633
1634
 
1634
1635
  @operation(Operation.SELECT)
1635
1636
  def drop(self, *cols: t.Union[str, Column]) -> Self:
1636
- all_columns = self._get_outer_select_columns(self.expression)
1637
- drop_cols = self._ensure_and_normalize_cols(cols)
1638
- new_columns = [
1639
- col
1640
- for col in all_columns
1641
- if col.alias_or_name not in [drop_column.alias_or_name for drop_column in drop_cols]
1642
- ]
1643
- return self.copy().select(*new_columns, append=False)
1637
+ # Separate string column names from Column objects for different handling
1638
+ column_objs, column_names = partition_to(lambda x: isinstance(x, str), cols, list, set)
1639
+
1640
+ # Normalize only the Column objects (strings will be handled as unqualified)
1641
+ drop_cols = self._ensure_and_normalize_cols(column_objs) if column_objs else []
1642
+
1643
+ # Work directly with the expression's select columns to preserve table qualifiers
1644
+ current_expressions = self.expression.expressions
1645
+ drop_sql = {drop_col.expression.sql() for drop_col in drop_cols}
1646
+
1647
+ # Create a more sophisticated matching function that considers table qualifiers
1648
+ def should_drop_expression(expr: exp.Expression) -> bool:
1649
+ # Check against fully qualified Column objects and
1650
+ # Check against unqualified string column names (drop ALL columns with this name)
1651
+ if expr.sql() in drop_sql or (
1652
+ isinstance(expr, exp.Column) and expr.alias_or_name in column_names
1653
+ ):
1654
+ return True
1655
+ return False
1656
+
1657
+ new_expressions = [expr for expr in current_expressions if not should_drop_expression(expr)]
1658
+ return self.select.__wrapped__( # type: ignore
1659
+ self, *new_expressions, skip_update_display_name_mapping=True
1660
+ )
1644
1661
 
1645
1662
  @operation(Operation.LIMIT)
1646
1663
  def limit(self, num: int) -> Self:
@@ -37,9 +37,7 @@ def _get_session() -> _BaseSession:
37
37
 
38
38
  @meta()
39
39
  def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
40
- from sqlframe.base.session import _BaseSession
41
-
42
- dialect = _BaseSession().input_dialect
40
+ dialect = _get_session().input_dialect
43
41
  if isinstance(column_name, str):
44
42
  col_expression = expression.to_column(column_name, dialect=dialect).transform(
45
43
  dialect.normalize_identifier
@@ -662,9 +660,7 @@ def grouping_id(*cols: ColumnOrName) -> Column:
662
660
 
663
661
  @meta()
664
662
  def input_file_name() -> Column:
665
- from sqlframe.base.session import _BaseSession
666
-
667
- return Column(expression.Literal.string(_BaseSession()._last_loaded_file or ""))
663
+ return Column(expression.Literal.string(_get_session()._last_loaded_file or ""))
668
664
 
669
665
 
670
666
  @meta()
@@ -959,12 +955,10 @@ def current_timestamp() -> Column:
959
955
 
960
956
  @meta()
961
957
  def date_format(col: ColumnOrName, format: str) -> Column:
962
- from sqlframe.base.session import _BaseSession
963
-
964
958
  return Column.invoke_expression_over_column(
965
959
  Column(expression.TimeStrToTime(this=Column.ensure_col(col).column_expression)),
966
960
  expression.TimeToStr,
967
- format=_BaseSession().format_time(format),
961
+ format=_get_session().format_time(format),
968
962
  )
969
963
 
970
964
 
@@ -1450,6 +1444,9 @@ def unix_timestamp(
1450
1444
 
1451
1445
  session = _get_session()
1452
1446
 
1447
+ if session._is_duckdb or session._is_postgres or session._is_snowflake or session._is_bigquery:
1448
+ timestamp = Column.ensure_col(timestamp).cast("string")
1449
+
1453
1450
  if session._is_bigquery:
1454
1451
  return unix_timestamp_bgutil(timestamp, format)
1455
1452
 
@@ -3375,10 +3372,9 @@ def get(col: ColumnOrName, index: t.Union[ColumnOrName, int]) -> Column:
3375
3372
  def get_active_spark_context() -> SparkContext:
3376
3373
  """Raise RuntimeError if SparkContext is not initialized,
3377
3374
  otherwise, returns the active SparkContext."""
3378
- from sqlframe.base.session import _BaseSession
3379
3375
  from sqlframe.spark.session import SparkSession
3380
3376
 
3381
- session: _BaseSession = _BaseSession()
3377
+ session = _get_session()
3382
3378
  if not isinstance(session, SparkSession):
3383
3379
  raise RuntimeError("This function is only available in SparkSession.")
3384
3380
  return session.spark_session.sparkContext
@@ -6341,7 +6337,8 @@ def to_unix_timestamp(
6341
6337
  session = _get_session()
6342
6338
 
6343
6339
  if session._is_duckdb:
6344
- format = format or _BaseSession().default_time_format
6340
+ format = format or session.default_time_format
6341
+ timestamp = Column.ensure_col(timestamp).cast("string")
6345
6342
 
6346
6343
  if format is not None:
6347
6344
  return Column.invoke_expression_over_column(
sqlframe/base/session.py CHANGED
@@ -179,7 +179,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
179
179
  return self._table(self, *args, **kwargs)
180
180
 
181
181
  def __new__(cls, *args, **kwargs):
182
- if _BaseSession._instance is None:
182
+ if _BaseSession._instance is None or not isinstance(_BaseSession._instance, cls):
183
183
  _BaseSession._instance = super().__new__(cls)
184
184
  return _BaseSession._instance
185
185
 
@@ -194,6 +194,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
194
194
  def getActiveSession(self) -> Self:
195
195
  return self
196
196
 
197
+ def stop(self) -> None:
198
+ if connection := getattr(self, "_connection", None):
199
+ connection.close()
200
+ _BaseSession._instance = None
201
+
197
202
  def range(
198
203
  self,
199
204
  start: int,
sqlframe/base/util.py CHANGED
@@ -6,6 +6,7 @@ import string
6
6
  import typing as t
7
7
  import unicodedata
8
8
 
9
+ from more_itertools import partition
9
10
  from sqlglot import expressions as exp
10
11
  from sqlglot import parse_one, to_table
11
12
  from sqlglot.dialects import DuckDB
@@ -537,3 +538,17 @@ def is_relativedelta_like(value: t.Any) -> bool:
537
538
  and hasattr(value, "weeks")
538
539
  and hasattr(value, "leapdays")
539
540
  )
541
+
542
+
543
+ T = t.TypeVar("T")
544
+ R1 = t.TypeVar("R1")
545
+ R2 = t.TypeVar("R2")
546
+
547
+
548
+ def partition_to(
549
+ pred: t.Callable[[T], bool],
550
+ iterable: t.Iterable[T],
551
+ result1: t.Type[R1],
552
+ result2: t.Type[R2],
553
+ ) -> tuple[R1, R2]:
554
+ return (lambda x, y: (result1(x), result2(y)))(*partition(pred, iterable)) # type: ignore
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.39.2
3
+ Version: 3.39.4
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -16,8 +16,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
16
16
  Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
+ Requires-Dist: more-itertools
19
20
  Requires-Dist: prettytable <4
20
- Requires-Dist: sqlglot <27.9,>=24.0.0
21
+ Requires-Dist: sqlglot <27.10,>=24.0.0
21
22
  Requires-Dist: typing-extensions
22
23
  Provides-Extra: bigquery
23
24
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
@@ -1,25 +1,25 @@
1
1
  sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=2ZMyDPGKBrqT_KjBcZ7ni5_lsj0fVr5EDt184buBf6w,714
2
+ sqlframe/_version.py,sha256=RTACos9x6Q52oWKZxADQ1aU73aw2iil2MarQYuwWMsM,714
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
6
6
  sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
7
7
  sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
8
- sqlframe/base/dataframe.py,sha256=0diYONDlet8iZt49LC3vcmfXHAAZ2MovPL2pTXYHj2U,85974
8
+ sqlframe/base/dataframe.py,sha256=-jeoqP5jS8Rk1fp_Og9ie_e2fjo19uX7JVpi9PeU5qI,86943
9
9
  sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
11
  sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
12
- sqlframe/base/functions.py,sha256=9hW5aYke5EFU4C7Epx-TlyG2ZxjYnFGskv4LwHiQ2dw,227752
12
+ sqlframe/base/functions.py,sha256=lMwNtOAj7MbmaFtweo5N8DJp-8ent1fT6lr3J3YcQsA,227753
13
13
  sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
14
14
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
15
15
  sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
16
16
  sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
17
- sqlframe/base/session.py,sha256=8oaEgGbyctKKEaI0GW6k7Praku7nwx3YRYgAW3mZNk0,27481
17
+ sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
18
18
  sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
19
19
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
20
20
  sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
21
21
  sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
22
- sqlframe/base/util.py,sha256=D4HAhtu4DMz5mXyxlUHRP_GrsjLJACpBYlLriyGoT0g,19435
22
+ sqlframe/base/util.py,sha256=11rBF_GBFXGBCllSdlWWWo8EiZZATJn4me3u7OUNIFg,19782
23
23
  sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
24
24
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.39.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.39.2.dist-info/METADATA,sha256=M0k0V_XPUzeL9-tCwZWKMMv9DVhVstFonKVOWRc7wRk,9039
135
- sqlframe-3.39.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.39.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.39.2.dist-info/RECORD,,
133
+ sqlframe-3.39.4.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.39.4.dist-info/METADATA,sha256=wv8nBkcg2ofsdZYo3qC0g8q_-QR_flvR1YPoKQ6uGeU,9070
135
+ sqlframe-3.39.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.39.4.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.39.4.dist-info/RECORD,,