sqlframe 3.32.0__py3-none-any.whl → 3.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '3.32.0'
21
- __version_tuple__ = version_tuple = (3, 32, 0)
20
+ __version__ = version = '3.33.0'
21
+ __version_tuple__ = version_tuple = (3, 33, 0)
sqlframe/base/catalog.py CHANGED
@@ -6,9 +6,16 @@ import typing as t
6
6
  from collections import defaultdict
7
7
 
8
8
  from sqlglot import MappingSchema, exp
9
+ from sqlglot.helper import seq_get
9
10
 
11
+ from sqlframe.base import types
10
12
  from sqlframe.base.exceptions import TableSchemaError
11
- from sqlframe.base.util import ensure_column_mapping, normalize_string, to_schema
13
+ from sqlframe.base.util import (
14
+ ensure_column_mapping,
15
+ normalize_string,
16
+ spark_to_sqlglot,
17
+ to_schema,
18
+ )
12
19
 
13
20
  if t.TYPE_CHECKING:
14
21
  from sqlglot.schema import ColumnMapping
@@ -99,6 +106,10 @@ class _BaseCatalog(t.Generic[SESSION, DF, TABLE]):
99
106
  "This session does not have access to a catalog that can lookup column information. See docs for explicitly defining columns or using a session that can automatically determine this."
100
107
  )
101
108
  column_mapping = ensure_column_mapping(column_mapping) # type: ignore
109
+ if isinstance(column_mapping, dict) and isinstance(
110
+ seq_get(list(column_mapping.values()), 0), types.DataType
111
+ ):
112
+ column_mapping = {k: spark_to_sqlglot(v) for k, v in column_mapping.items()}
102
113
  for column_name in column_mapping:
103
114
  column = exp.to_column(column_name, dialect=self.session.input_dialect)
104
115
  if column.this.quoted:
sqlframe/base/column.py CHANGED
@@ -517,3 +517,7 @@ class Column:
517
517
  +---+
518
518
  """
519
519
  return self.getItem(name)
520
+
521
+ def contains(self, value: t.Union[str, Column]) -> Column:
522
+ value = self._lit(value) if not isinstance(value, Column) else value
523
+ return self.invoke_expression_over_column(self, exp.Contains, expression=value.expression)
@@ -260,10 +260,6 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
260
260
  def __copy__(self):
261
261
  return self.copy()
262
262
 
263
- def __repr__(self) -> str:
264
- fields = [f"{field.name}: {field.dataType}" for field in self.schema]
265
- return "DataFrame[" + ", ".join(fields) + "]"
266
-
267
263
  def _display_(self) -> str:
268
264
  return self.__repr__()
269
265
 
@@ -78,17 +78,6 @@ def to_timestamp_tz(col: ColumnOrName, format: t.Optional[str] = None) -> Column
78
78
  return Column.ensure_col(col).cast("timestamptz", dialect="duckdb")
79
79
 
80
80
 
81
- def to_timestamp_just_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
82
- from sqlframe.base.session import _BaseSession
83
-
84
- if format is not None:
85
- return Column.invoke_expression_over_column(
86
- col, expression.StrToTime, format=_BaseSession().format_time(format)
87
- )
88
-
89
- return Column.ensure_col(col).cast("datetime", dialect="bigquery")
90
-
91
-
92
81
  def bitwise_not_from_bitnot(col: ColumnOrName) -> Column:
93
82
  return Column.invoke_anonymous_function(col, "BITNOT")
94
83
 
@@ -1356,7 +1356,6 @@ def to_date(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
1356
1356
  @meta()
1357
1357
  def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
1358
1358
  from sqlframe.base.function_alternatives import (
1359
- to_timestamp_just_timestamp,
1360
1359
  to_timestamp_tz,
1361
1360
  to_timestamp_with_time_zone,
1362
1361
  )
@@ -1366,9 +1365,6 @@ def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
1366
1365
  if session._is_duckdb:
1367
1366
  return to_timestamp_tz(col, format)
1368
1367
 
1369
- if session._is_bigquery:
1370
- return to_timestamp_just_timestamp(col, format)
1371
-
1372
1368
  if session._is_postgres:
1373
1369
  return to_timestamp_with_time_zone(col, format)
1374
1370
 
@@ -3068,7 +3064,7 @@ def character_length(str: ColumnOrName) -> Column:
3068
3064
  return Column.invoke_expression_over_column(str, expression.Length)
3069
3065
 
3070
3066
 
3071
- @meta(unsupported_engines=["bigquery", "postgres"])
3067
+ @meta(unsupported_engines=["postgres"])
3072
3068
  def contains(left: ColumnOrName, right: ColumnOrName) -> Column:
3073
3069
  return Column.invoke_expression_over_column(
3074
3070
  left, expression.Contains, expression=Column.ensure_col(right).column_expression
@@ -6594,27 +6590,16 @@ def unix_micros(col: ColumnOrName) -> Column:
6594
6590
  """
6595
6591
  from sqlframe.base.function_alternatives import unix_micros_multiply_epoch
6596
6592
 
6593
+ to_timestamp = get_func_from_session("to_timestamp")
6594
+
6597
6595
  if _get_session()._is_duckdb:
6598
6596
  return Column.invoke_anonymous_function(col, "epoch_us")
6599
6597
 
6600
- if _get_session()._is_bigquery:
6601
- return Column(
6602
- expression.Anonymous(
6603
- this="UNIX_MICROS",
6604
- expressions=[
6605
- expression.Anonymous(
6606
- this="TIMESTAMP",
6607
- expressions=[
6608
- Column.ensure_col(col).column_expression,
6609
- ],
6610
- )
6611
- ],
6612
- )
6613
- )
6614
-
6615
6598
  if _get_session()._is_postgres or _get_session()._is_snowflake:
6616
6599
  return unix_micros_multiply_epoch(col)
6617
6600
 
6601
+ col = to_timestamp(col)
6602
+
6618
6603
  return Column.invoke_anonymous_function(col, "unix_micros")
6619
6604
 
6620
6605
 
@@ -6666,22 +6651,6 @@ def unix_seconds(col: ColumnOrName) -> Column:
6666
6651
  if _get_session()._is_postgres:
6667
6652
  return unix_seconds_extract_epoch(col)
6668
6653
 
6669
- if _get_session()._is_bigquery:
6670
- return Column(
6671
- expression.Anonymous(
6672
- this="UNIX_SECONDS",
6673
- expressions=[
6674
- expression.Anonymous(
6675
- this="TIMESTAMP",
6676
- expressions=[
6677
- Column.ensure_col(col).column_expression,
6678
- expression.Literal.string("UTC"),
6679
- ],
6680
- )
6681
- ],
6682
- )
6683
- )
6684
-
6685
6654
  return Column.invoke_expression_over_column(col, expression.UnixSeconds)
6686
6655
 
6687
6656
 
sqlframe/base/session.py CHANGED
@@ -304,7 +304,10 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
304
304
  elif isinstance(value, float):
305
305
  return "double"
306
306
  elif isinstance(value, datetime.datetime):
307
- return "timestamp"
307
+ if value.tzinfo:
308
+ # Spark defaults `timestamp` to be a timestamp with timezone
309
+ return "timestamp"
310
+ return "timestampntz"
308
311
  elif isinstance(value, datetime.date):
309
312
  return "date"
310
313
  elif isinstance(value, str):
sqlframe/base/util.py CHANGED
@@ -347,6 +347,93 @@ def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
347
347
  raise NotImplementedError(f"Unsupported data type: {sqlglot_dtype}")
348
348
 
349
349
 
350
+ def spark_to_sqlglot(spark_dtype: types.DataType) -> exp.DataType:
351
+ """
352
+ Convert a Spark data type to a SQLGlot data type.
353
+
354
+ This function is the opposite of sqlglot_to_spark.
355
+
356
+ Args:
357
+ spark_dtype: A Spark data type
358
+
359
+ Returns:
360
+ The equivalent SQLGlot data type
361
+ """
362
+ from sqlframe.base import types
363
+
364
+ # Handle primitive types
365
+ if isinstance(spark_dtype, types.StringType):
366
+ return exp.DataType(this=exp.DataType.Type.TEXT)
367
+ elif isinstance(spark_dtype, types.VarcharType):
368
+ return exp.DataType(
369
+ this=exp.DataType.Type.VARCHAR,
370
+ expressions=[exp.DataTypeParam(this=exp.Literal.number(spark_dtype.length))],
371
+ )
372
+ elif isinstance(spark_dtype, types.CharType):
373
+ return exp.DataType(
374
+ this=exp.DataType.Type.CHAR,
375
+ expressions=[exp.DataTypeParam(this=exp.Literal.number(spark_dtype.length))],
376
+ )
377
+ elif isinstance(spark_dtype, types.BinaryType):
378
+ return exp.DataType(this=exp.DataType.Type.BINARY)
379
+ elif isinstance(spark_dtype, types.BooleanType):
380
+ return exp.DataType(this=exp.DataType.Type.BOOLEAN)
381
+ elif isinstance(spark_dtype, types.IntegerType):
382
+ return exp.DataType(this=exp.DataType.Type.INT)
383
+ elif isinstance(spark_dtype, types.LongType):
384
+ return exp.DataType(this=exp.DataType.Type.BIGINT)
385
+ elif isinstance(spark_dtype, types.ShortType):
386
+ return exp.DataType(this=exp.DataType.Type.SMALLINT)
387
+ elif isinstance(spark_dtype, types.ByteType):
388
+ return exp.DataType(this=exp.DataType.Type.TINYINT)
389
+ elif isinstance(spark_dtype, types.FloatType):
390
+ return exp.DataType(this=exp.DataType.Type.FLOAT)
391
+ elif isinstance(spark_dtype, types.DoubleType):
392
+ return exp.DataType(this=exp.DataType.Type.DOUBLE)
393
+ elif isinstance(spark_dtype, types.DecimalType):
394
+ if spark_dtype.precision is not None and spark_dtype.scale is not None:
395
+ return exp.DataType(
396
+ this=exp.DataType.Type.DECIMAL,
397
+ expressions=[
398
+ exp.DataTypeParam(this=exp.Literal.number(spark_dtype.precision)),
399
+ exp.DataTypeParam(this=exp.Literal.number(spark_dtype.scale)),
400
+ ],
401
+ )
402
+ return exp.DataType(this=exp.DataType.Type.DECIMAL)
403
+ elif isinstance(spark_dtype, types.TimestampType):
404
+ return exp.DataType(this=exp.DataType.Type.TIMESTAMP)
405
+ elif isinstance(spark_dtype, types.TimestampNTZType):
406
+ return exp.DataType(this=exp.DataType.Type.TIMESTAMPNTZ)
407
+ elif isinstance(spark_dtype, types.DateType):
408
+ return exp.DataType(this=exp.DataType.Type.DATE)
409
+
410
+ # Handle complex types
411
+ elif isinstance(spark_dtype, types.ArrayType):
412
+ return exp.DataType(
413
+ this=exp.DataType.Type.ARRAY, expressions=[spark_to_sqlglot(spark_dtype.elementType)]
414
+ )
415
+ elif isinstance(spark_dtype, types.MapType):
416
+ return exp.DataType(
417
+ this=exp.DataType.Type.MAP,
418
+ expressions=[
419
+ spark_to_sqlglot(spark_dtype.keyType),
420
+ spark_to_sqlglot(spark_dtype.valueType),
421
+ ],
422
+ )
423
+ elif isinstance(spark_dtype, types.StructType):
424
+ return exp.DataType(
425
+ this=exp.DataType.Type.STRUCT,
426
+ expressions=[
427
+ exp.ColumnDef(
428
+ this=exp.to_identifier(field.name), kind=spark_to_sqlglot(field.dataType)
429
+ )
430
+ for field in spark_dtype
431
+ ],
432
+ )
433
+
434
+ raise NotImplementedError(f"Unsupported data type: {spark_dtype}")
435
+
436
+
350
437
  def normalize_string(
351
438
  value: t.Union[str, exp.Expression],
352
439
  from_dialect: DialectType = None,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.32.0
3
+ Version: 3.33.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,25 +1,25 @@
1
1
  sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=EatXYAvXc8eLZh8r-obXLCaLyBPqfgPtx9AXEI2rZ_E,513
2
+ sqlframe/_version.py,sha256=ov9Fneoqycyrmto-1SaECibEXFbQJlZrt2fFCQsBGtY,513
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
6
- sqlframe/base/catalog.py,sha256=ZuU_qmt4yjSoTYgecSGnOhitOdh3rJbGCUjnUBp5mlc,38564
7
- sqlframe/base/column.py,sha256=sp3fJstA49FslE2CcgvVFHyi7Jxsxk8qHTd-Z0cAEWc,19932
8
- sqlframe/base/dataframe.py,sha256=V_qRAPsdUji12PwWg7Ce8Cln9fsBoVY-3s4J4KwbINs,84538
6
+ sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
7
+ sqlframe/base/column.py,sha256=zDG9YT-5M7H8tDISOkJ6uMBU7Q3enTmc6d7rzZ08q40,20172
8
+ sqlframe/base/dataframe.py,sha256=6L8xTdwwQCkUzpJ6K3QlCcz5zqk2QQmGzteI-1EJ23A,84374
9
9
  sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
- sqlframe/base/function_alternatives.py,sha256=Bs1bwl25fN3Yy9rb4GnUWBGunQ1C_yelkb2yV9DSZIY,53918
12
- sqlframe/base/functions.py,sha256=i93fc9t7HooXMo8p35VLHd3FeYazVZztVIWqGBmsMYA,227188
11
+ sqlframe/base/function_alternatives.py,sha256=dEymHSOQgUzhoYtfY5acC9AxpMoGoHXX7v6yTadKzn8,53527
12
+ sqlframe/base/functions.py,sha256=jNuCezcQl3j7hj1JsukaZLIvJqDtgQetiHnnTK5LU5w,226189
13
13
  sqlframe/base/group.py,sha256=OY4w1WRsCqLgW-Pi7DjF63zbbxSLISCF3qjAbzI2CQ4,4283
14
14
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
15
15
  sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
16
16
  sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
17
- sqlframe/base/session.py,sha256=tSNlIlo7XeZUQFsZ3wnffqFLdxynY9i7oz60GUF-V14,27104
17
+ sqlframe/base/session.py,sha256=djXPmuW0cIQYuoE7hegfyvZuKC2D3ABZCjvw-fa1C24,27260
18
18
  sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
19
19
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
20
20
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
21
21
  sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
22
- sqlframe/base/util.py,sha256=P8NcogrbNGAS69uWfsBx-50c-4QgVQxUqZC2IrWX_Ts,15522
22
+ sqlframe/base/util.py,sha256=gv_kRc3LxCuQy3t4dHFldV7elB8RU5PMqIN5-xSkWSo,19107
23
23
  sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
24
24
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.32.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.32.0.dist-info/METADATA,sha256=JXzQRL-VpYCTvuSF7QmUH0spMzGT2B5CoCIDDUNwq40,8987
135
- sqlframe-3.32.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.32.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.32.0.dist-info/RECORD,,
133
+ sqlframe-3.33.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.33.0.dist-info/METADATA,sha256=I0vB586w0J4WQoBE4hd1YDnh2fger8Ra7zEXdcbe9AY,8987
135
+ sqlframe-3.33.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.33.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.33.0.dist-info/RECORD,,