sqlframe 3.32.0__py3-none-any.whl → 3.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/catalog.py +12 -1
- sqlframe/base/column.py +4 -0
- sqlframe/base/dataframe.py +0 -4
- sqlframe/base/function_alternatives.py +0 -11
- sqlframe/base/functions.py +5 -36
- sqlframe/base/session.py +4 -1
- sqlframe/base/util.py +87 -0
- {sqlframe-3.32.0.dist-info → sqlframe-3.33.0.dist-info}/METADATA +1 -1
- {sqlframe-3.32.0.dist-info → sqlframe-3.33.0.dist-info}/RECORD +13 -13
- {sqlframe-3.32.0.dist-info → sqlframe-3.33.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.32.0.dist-info → sqlframe-3.33.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.32.0.dist-info → sqlframe-3.33.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/catalog.py
CHANGED
@@ -6,9 +6,16 @@ import typing as t
|
|
6
6
|
from collections import defaultdict
|
7
7
|
|
8
8
|
from sqlglot import MappingSchema, exp
|
9
|
+
from sqlglot.helper import seq_get
|
9
10
|
|
11
|
+
from sqlframe.base import types
|
10
12
|
from sqlframe.base.exceptions import TableSchemaError
|
11
|
-
from sqlframe.base.util import
|
13
|
+
from sqlframe.base.util import (
|
14
|
+
ensure_column_mapping,
|
15
|
+
normalize_string,
|
16
|
+
spark_to_sqlglot,
|
17
|
+
to_schema,
|
18
|
+
)
|
12
19
|
|
13
20
|
if t.TYPE_CHECKING:
|
14
21
|
from sqlglot.schema import ColumnMapping
|
@@ -99,6 +106,10 @@ class _BaseCatalog(t.Generic[SESSION, DF, TABLE]):
|
|
99
106
|
"This session does not have access to a catalog that can lookup column information. See docs for explicitly defining columns or using a session that can automatically determine this."
|
100
107
|
)
|
101
108
|
column_mapping = ensure_column_mapping(column_mapping) # type: ignore
|
109
|
+
if isinstance(column_mapping, dict) and isinstance(
|
110
|
+
seq_get(list(column_mapping.values()), 0), types.DataType
|
111
|
+
):
|
112
|
+
column_mapping = {k: spark_to_sqlglot(v) for k, v in column_mapping.items()}
|
102
113
|
for column_name in column_mapping:
|
103
114
|
column = exp.to_column(column_name, dialect=self.session.input_dialect)
|
104
115
|
if column.this.quoted:
|
sqlframe/base/column.py
CHANGED
@@ -517,3 +517,7 @@ class Column:
|
|
517
517
|
+---+
|
518
518
|
"""
|
519
519
|
return self.getItem(name)
|
520
|
+
|
521
|
+
def contains(self, value: t.Union[str, Column]) -> Column:
|
522
|
+
value = self._lit(value) if not isinstance(value, Column) else value
|
523
|
+
return self.invoke_expression_over_column(self, exp.Contains, expression=value.expression)
|
sqlframe/base/dataframe.py
CHANGED
@@ -260,10 +260,6 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
260
260
|
def __copy__(self):
|
261
261
|
return self.copy()
|
262
262
|
|
263
|
-
def __repr__(self) -> str:
|
264
|
-
fields = [f"{field.name}: {field.dataType}" for field in self.schema]
|
265
|
-
return "DataFrame[" + ", ".join(fields) + "]"
|
266
|
-
|
267
263
|
def _display_(self) -> str:
|
268
264
|
return self.__repr__()
|
269
265
|
|
@@ -78,17 +78,6 @@ def to_timestamp_tz(col: ColumnOrName, format: t.Optional[str] = None) -> Column
|
|
78
78
|
return Column.ensure_col(col).cast("timestamptz", dialect="duckdb")
|
79
79
|
|
80
80
|
|
81
|
-
def to_timestamp_just_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
82
|
-
from sqlframe.base.session import _BaseSession
|
83
|
-
|
84
|
-
if format is not None:
|
85
|
-
return Column.invoke_expression_over_column(
|
86
|
-
col, expression.StrToTime, format=_BaseSession().format_time(format)
|
87
|
-
)
|
88
|
-
|
89
|
-
return Column.ensure_col(col).cast("datetime", dialect="bigquery")
|
90
|
-
|
91
|
-
|
92
81
|
def bitwise_not_from_bitnot(col: ColumnOrName) -> Column:
|
93
82
|
return Column.invoke_anonymous_function(col, "BITNOT")
|
94
83
|
|
sqlframe/base/functions.py
CHANGED
@@ -1356,7 +1356,6 @@ def to_date(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
1356
1356
|
@meta()
|
1357
1357
|
def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
1358
1358
|
from sqlframe.base.function_alternatives import (
|
1359
|
-
to_timestamp_just_timestamp,
|
1360
1359
|
to_timestamp_tz,
|
1361
1360
|
to_timestamp_with_time_zone,
|
1362
1361
|
)
|
@@ -1366,9 +1365,6 @@ def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
1366
1365
|
if session._is_duckdb:
|
1367
1366
|
return to_timestamp_tz(col, format)
|
1368
1367
|
|
1369
|
-
if session._is_bigquery:
|
1370
|
-
return to_timestamp_just_timestamp(col, format)
|
1371
|
-
|
1372
1368
|
if session._is_postgres:
|
1373
1369
|
return to_timestamp_with_time_zone(col, format)
|
1374
1370
|
|
@@ -3068,7 +3064,7 @@ def character_length(str: ColumnOrName) -> Column:
|
|
3068
3064
|
return Column.invoke_expression_over_column(str, expression.Length)
|
3069
3065
|
|
3070
3066
|
|
3071
|
-
@meta(unsupported_engines=["
|
3067
|
+
@meta(unsupported_engines=["postgres"])
|
3072
3068
|
def contains(left: ColumnOrName, right: ColumnOrName) -> Column:
|
3073
3069
|
return Column.invoke_expression_over_column(
|
3074
3070
|
left, expression.Contains, expression=Column.ensure_col(right).column_expression
|
@@ -6594,27 +6590,16 @@ def unix_micros(col: ColumnOrName) -> Column:
|
|
6594
6590
|
"""
|
6595
6591
|
from sqlframe.base.function_alternatives import unix_micros_multiply_epoch
|
6596
6592
|
|
6593
|
+
to_timestamp = get_func_from_session("to_timestamp")
|
6594
|
+
|
6597
6595
|
if _get_session()._is_duckdb:
|
6598
6596
|
return Column.invoke_anonymous_function(col, "epoch_us")
|
6599
6597
|
|
6600
|
-
if _get_session()._is_bigquery:
|
6601
|
-
return Column(
|
6602
|
-
expression.Anonymous(
|
6603
|
-
this="UNIX_MICROS",
|
6604
|
-
expressions=[
|
6605
|
-
expression.Anonymous(
|
6606
|
-
this="TIMESTAMP",
|
6607
|
-
expressions=[
|
6608
|
-
Column.ensure_col(col).column_expression,
|
6609
|
-
],
|
6610
|
-
)
|
6611
|
-
],
|
6612
|
-
)
|
6613
|
-
)
|
6614
|
-
|
6615
6598
|
if _get_session()._is_postgres or _get_session()._is_snowflake:
|
6616
6599
|
return unix_micros_multiply_epoch(col)
|
6617
6600
|
|
6601
|
+
col = to_timestamp(col)
|
6602
|
+
|
6618
6603
|
return Column.invoke_anonymous_function(col, "unix_micros")
|
6619
6604
|
|
6620
6605
|
|
@@ -6666,22 +6651,6 @@ def unix_seconds(col: ColumnOrName) -> Column:
|
|
6666
6651
|
if _get_session()._is_postgres:
|
6667
6652
|
return unix_seconds_extract_epoch(col)
|
6668
6653
|
|
6669
|
-
if _get_session()._is_bigquery:
|
6670
|
-
return Column(
|
6671
|
-
expression.Anonymous(
|
6672
|
-
this="UNIX_SECONDS",
|
6673
|
-
expressions=[
|
6674
|
-
expression.Anonymous(
|
6675
|
-
this="TIMESTAMP",
|
6676
|
-
expressions=[
|
6677
|
-
Column.ensure_col(col).column_expression,
|
6678
|
-
expression.Literal.string("UTC"),
|
6679
|
-
],
|
6680
|
-
)
|
6681
|
-
],
|
6682
|
-
)
|
6683
|
-
)
|
6684
|
-
|
6685
6654
|
return Column.invoke_expression_over_column(col, expression.UnixSeconds)
|
6686
6655
|
|
6687
6656
|
|
sqlframe/base/session.py
CHANGED
@@ -304,7 +304,10 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
304
304
|
elif isinstance(value, float):
|
305
305
|
return "double"
|
306
306
|
elif isinstance(value, datetime.datetime):
|
307
|
-
|
307
|
+
if value.tzinfo:
|
308
|
+
# Spark defaults `timestamp` to be a timestamp with timezone
|
309
|
+
return "timestamp"
|
310
|
+
return "timestampntz"
|
308
311
|
elif isinstance(value, datetime.date):
|
309
312
|
return "date"
|
310
313
|
elif isinstance(value, str):
|
sqlframe/base/util.py
CHANGED
@@ -347,6 +347,93 @@ def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
|
|
347
347
|
raise NotImplementedError(f"Unsupported data type: {sqlglot_dtype}")
|
348
348
|
|
349
349
|
|
350
|
+
def spark_to_sqlglot(spark_dtype: types.DataType) -> exp.DataType:
|
351
|
+
"""
|
352
|
+
Convert a Spark data type to a SQLGlot data type.
|
353
|
+
|
354
|
+
This function is the opposite of sqlglot_to_spark.
|
355
|
+
|
356
|
+
Args:
|
357
|
+
spark_dtype: A Spark data type
|
358
|
+
|
359
|
+
Returns:
|
360
|
+
The equivalent SQLGlot data type
|
361
|
+
"""
|
362
|
+
from sqlframe.base import types
|
363
|
+
|
364
|
+
# Handle primitive types
|
365
|
+
if isinstance(spark_dtype, types.StringType):
|
366
|
+
return exp.DataType(this=exp.DataType.Type.TEXT)
|
367
|
+
elif isinstance(spark_dtype, types.VarcharType):
|
368
|
+
return exp.DataType(
|
369
|
+
this=exp.DataType.Type.VARCHAR,
|
370
|
+
expressions=[exp.DataTypeParam(this=exp.Literal.number(spark_dtype.length))],
|
371
|
+
)
|
372
|
+
elif isinstance(spark_dtype, types.CharType):
|
373
|
+
return exp.DataType(
|
374
|
+
this=exp.DataType.Type.CHAR,
|
375
|
+
expressions=[exp.DataTypeParam(this=exp.Literal.number(spark_dtype.length))],
|
376
|
+
)
|
377
|
+
elif isinstance(spark_dtype, types.BinaryType):
|
378
|
+
return exp.DataType(this=exp.DataType.Type.BINARY)
|
379
|
+
elif isinstance(spark_dtype, types.BooleanType):
|
380
|
+
return exp.DataType(this=exp.DataType.Type.BOOLEAN)
|
381
|
+
elif isinstance(spark_dtype, types.IntegerType):
|
382
|
+
return exp.DataType(this=exp.DataType.Type.INT)
|
383
|
+
elif isinstance(spark_dtype, types.LongType):
|
384
|
+
return exp.DataType(this=exp.DataType.Type.BIGINT)
|
385
|
+
elif isinstance(spark_dtype, types.ShortType):
|
386
|
+
return exp.DataType(this=exp.DataType.Type.SMALLINT)
|
387
|
+
elif isinstance(spark_dtype, types.ByteType):
|
388
|
+
return exp.DataType(this=exp.DataType.Type.TINYINT)
|
389
|
+
elif isinstance(spark_dtype, types.FloatType):
|
390
|
+
return exp.DataType(this=exp.DataType.Type.FLOAT)
|
391
|
+
elif isinstance(spark_dtype, types.DoubleType):
|
392
|
+
return exp.DataType(this=exp.DataType.Type.DOUBLE)
|
393
|
+
elif isinstance(spark_dtype, types.DecimalType):
|
394
|
+
if spark_dtype.precision is not None and spark_dtype.scale is not None:
|
395
|
+
return exp.DataType(
|
396
|
+
this=exp.DataType.Type.DECIMAL,
|
397
|
+
expressions=[
|
398
|
+
exp.DataTypeParam(this=exp.Literal.number(spark_dtype.precision)),
|
399
|
+
exp.DataTypeParam(this=exp.Literal.number(spark_dtype.scale)),
|
400
|
+
],
|
401
|
+
)
|
402
|
+
return exp.DataType(this=exp.DataType.Type.DECIMAL)
|
403
|
+
elif isinstance(spark_dtype, types.TimestampType):
|
404
|
+
return exp.DataType(this=exp.DataType.Type.TIMESTAMP)
|
405
|
+
elif isinstance(spark_dtype, types.TimestampNTZType):
|
406
|
+
return exp.DataType(this=exp.DataType.Type.TIMESTAMPNTZ)
|
407
|
+
elif isinstance(spark_dtype, types.DateType):
|
408
|
+
return exp.DataType(this=exp.DataType.Type.DATE)
|
409
|
+
|
410
|
+
# Handle complex types
|
411
|
+
elif isinstance(spark_dtype, types.ArrayType):
|
412
|
+
return exp.DataType(
|
413
|
+
this=exp.DataType.Type.ARRAY, expressions=[spark_to_sqlglot(spark_dtype.elementType)]
|
414
|
+
)
|
415
|
+
elif isinstance(spark_dtype, types.MapType):
|
416
|
+
return exp.DataType(
|
417
|
+
this=exp.DataType.Type.MAP,
|
418
|
+
expressions=[
|
419
|
+
spark_to_sqlglot(spark_dtype.keyType),
|
420
|
+
spark_to_sqlglot(spark_dtype.valueType),
|
421
|
+
],
|
422
|
+
)
|
423
|
+
elif isinstance(spark_dtype, types.StructType):
|
424
|
+
return exp.DataType(
|
425
|
+
this=exp.DataType.Type.STRUCT,
|
426
|
+
expressions=[
|
427
|
+
exp.ColumnDef(
|
428
|
+
this=exp.to_identifier(field.name), kind=spark_to_sqlglot(field.dataType)
|
429
|
+
)
|
430
|
+
for field in spark_dtype
|
431
|
+
],
|
432
|
+
)
|
433
|
+
|
434
|
+
raise NotImplementedError(f"Unsupported data type: {spark_dtype}")
|
435
|
+
|
436
|
+
|
350
437
|
def normalize_string(
|
351
438
|
value: t.Union[str, exp.Expression],
|
352
439
|
from_dialect: DialectType = None,
|
@@ -1,25 +1,25 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=ov9Fneoqycyrmto-1SaECibEXFbQJlZrt2fFCQsBGtY,513
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
6
|
-
sqlframe/base/catalog.py,sha256
|
7
|
-
sqlframe/base/column.py,sha256=
|
8
|
-
sqlframe/base/dataframe.py,sha256=
|
6
|
+
sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
|
7
|
+
sqlframe/base/column.py,sha256=zDG9YT-5M7H8tDISOkJ6uMBU7Q3enTmc6d7rzZ08q40,20172
|
8
|
+
sqlframe/base/dataframe.py,sha256=6L8xTdwwQCkUzpJ6K3QlCcz5zqk2QQmGzteI-1EJ23A,84374
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
|
-
sqlframe/base/function_alternatives.py,sha256=
|
12
|
-
sqlframe/base/functions.py,sha256=
|
11
|
+
sqlframe/base/function_alternatives.py,sha256=dEymHSOQgUzhoYtfY5acC9AxpMoGoHXX7v6yTadKzn8,53527
|
12
|
+
sqlframe/base/functions.py,sha256=jNuCezcQl3j7hj1JsukaZLIvJqDtgQetiHnnTK5LU5w,226189
|
13
13
|
sqlframe/base/group.py,sha256=OY4w1WRsCqLgW-Pi7DjF63zbbxSLISCF3qjAbzI2CQ4,4283
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
16
16
|
sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
|
17
|
-
sqlframe/base/session.py,sha256=
|
17
|
+
sqlframe/base/session.py,sha256=djXPmuW0cIQYuoE7hegfyvZuKC2D3ABZCjvw-fa1C24,27260
|
18
18
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
19
19
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
20
20
|
sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
|
21
21
|
sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
|
22
|
-
sqlframe/base/util.py,sha256=
|
22
|
+
sqlframe/base/util.py,sha256=gv_kRc3LxCuQy3t4dHFldV7elB8RU5PMqIN5-xSkWSo,19107
|
23
23
|
sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
|
24
24
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
137
|
-
sqlframe-3.
|
133
|
+
sqlframe-3.33.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.33.0.dist-info/METADATA,sha256=I0vB586w0J4WQoBE4hd1YDnh2fger8Ra7zEXdcbe9AY,8987
|
135
|
+
sqlframe-3.33.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.33.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.33.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|