sqlframe 3.18.0__py3-none-any.whl → 3.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +1 -1
- sqlframe/base/functions.py +52 -9
- sqlframe/base/session.py +8 -7
- sqlframe/spark/session.py +1 -1
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/METADATA +2 -2
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/RECORD +10 -10
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
@@ -1066,7 +1066,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1066
1066
|
left_col.sql(dialect=self.session.input_dialect),
|
1067
1067
|
right_col.sql(dialect=self.session.input_dialect),
|
1068
1068
|
).alias(left_col.alias_or_name)
|
1069
|
-
if
|
1069
|
+
if join_type == "full outer"
|
1070
1070
|
else left_col.alias_or_name
|
1071
1071
|
for left_col, right_col in join_column_pairs
|
1072
1072
|
]
|
sqlframe/base/functions.py
CHANGED
@@ -8,6 +8,7 @@ import typing as t
|
|
8
8
|
|
9
9
|
from sqlglot import Dialect
|
10
10
|
from sqlglot import exp as expression
|
11
|
+
from sqlglot.dialects.dialect import time_format
|
11
12
|
from sqlglot.helper import ensure_list
|
12
13
|
from sqlglot.helper import flatten as _flatten
|
13
14
|
|
@@ -2016,9 +2017,12 @@ def array_prepend(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
2016
2017
|
return Column.invoke_anonymous_function(col, "ARRAY_PREPEND", value)
|
2017
2018
|
|
2018
2019
|
|
2019
|
-
@meta(
|
2020
|
+
@meta()
|
2020
2021
|
def array_size(col: ColumnOrName) -> Column:
|
2021
|
-
|
2022
|
+
session = _get_session()
|
2023
|
+
if session._is_spark or session._is_databricks:
|
2024
|
+
return Column.invoke_anonymous_function(col, "ARRAY_SIZE")
|
2025
|
+
return Column.invoke_expression_over_column(col, expression.ArraySize)
|
2022
2026
|
|
2023
2027
|
|
2024
2028
|
@meta(unsupported_engines="*")
|
@@ -6088,7 +6092,7 @@ def to_timestamp_ltz(
|
|
6088
6092
|
return Column.invoke_anonymous_function(timestamp, "to_timestamp_ltz")
|
6089
6093
|
|
6090
6094
|
|
6091
|
-
@meta(
|
6095
|
+
@meta()
|
6092
6096
|
def to_timestamp_ntz(
|
6093
6097
|
timestamp: ColumnOrName,
|
6094
6098
|
format: t.Optional[ColumnOrName] = None,
|
@@ -6118,6 +6122,32 @@ def to_timestamp_ntz(
|
|
6118
6122
|
... # doctest: +SKIP
|
6119
6123
|
[Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
|
6120
6124
|
"""
|
6125
|
+
session = _get_session()
|
6126
|
+
|
6127
|
+
if session._is_duckdb:
|
6128
|
+
to_timestamp_func = get_func_from_session("to_timestamp")
|
6129
|
+
return to_timestamp_func(timestamp, format)
|
6130
|
+
|
6131
|
+
if session._is_bigquery:
|
6132
|
+
if format is not None:
|
6133
|
+
return Column.invoke_anonymous_function(
|
6134
|
+
session.format_execution_time(format), # type: ignore
|
6135
|
+
"parse_datetime",
|
6136
|
+
timestamp,
|
6137
|
+
)
|
6138
|
+
else:
|
6139
|
+
return Column.ensure_col(timestamp).cast("datetime", dialect="bigquery")
|
6140
|
+
|
6141
|
+
if session._is_postgres:
|
6142
|
+
if format is not None:
|
6143
|
+
return Column.invoke_anonymous_function(
|
6144
|
+
timestamp,
|
6145
|
+
"to_timestamp",
|
6146
|
+
session.format_execution_time(format), # type: ignore
|
6147
|
+
)
|
6148
|
+
else:
|
6149
|
+
return Column.ensure_col(timestamp).cast("timestamp", dialect="postgres")
|
6150
|
+
|
6121
6151
|
if format is not None:
|
6122
6152
|
return Column.invoke_anonymous_function(timestamp, "to_timestamp_ntz", format)
|
6123
6153
|
else:
|
@@ -6442,12 +6472,25 @@ def unix_micros(col: ColumnOrName) -> Column:
|
|
6442
6472
|
"""
|
6443
6473
|
from sqlframe.base.function_alternatives import unix_micros_multiply_epoch
|
6444
6474
|
|
6445
|
-
if (
|
6446
|
-
|
6447
|
-
|
6448
|
-
|
6449
|
-
|
6450
|
-
|
6475
|
+
if _get_session()._is_duckdb:
|
6476
|
+
return Column.invoke_anonymous_function(col, "epoch_us")
|
6477
|
+
|
6478
|
+
if _get_session()._is_bigquery:
|
6479
|
+
return Column(
|
6480
|
+
expression.Anonymous(
|
6481
|
+
this="UNIX_MICROS",
|
6482
|
+
expressions=[
|
6483
|
+
expression.Anonymous(
|
6484
|
+
this="TIMESTAMP",
|
6485
|
+
expressions=[
|
6486
|
+
Column.ensure_col(col).column_expression,
|
6487
|
+
],
|
6488
|
+
)
|
6489
|
+
],
|
6490
|
+
)
|
6491
|
+
)
|
6492
|
+
|
6493
|
+
if _get_session()._is_postgres or _get_session()._is_snowflake:
|
6451
6494
|
return unix_micros_multiply_epoch(col)
|
6452
6495
|
|
6453
6496
|
return Column.invoke_anonymous_function(col, "unix_micros")
|
sqlframe/base/session.py
CHANGED
@@ -267,10 +267,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
267
267
|
else:
|
268
268
|
column_mapping = {}
|
269
269
|
|
270
|
-
column_mapping = {
|
271
|
-
normalize_identifiers(k, self.input_dialect).sql(dialect=self.input_dialect): v
|
272
|
-
for k, v in column_mapping.items()
|
273
|
-
}
|
274
270
|
empty_df = not data
|
275
271
|
rows = [[None] * len(column_mapping)] if empty_df else list(data) # type: ignore
|
276
272
|
|
@@ -327,7 +323,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
327
323
|
if isinstance(sample_row, Row):
|
328
324
|
sample_row = sample_row.asDict()
|
329
325
|
if isinstance(sample_row, dict):
|
330
|
-
sample_row = normalize_dict(self, sample_row)
|
331
326
|
default_data_type = get_default_data_type(sample_row[name])
|
332
327
|
updated_mapping[name] = (
|
333
328
|
exp.DataType.build(default_data_type, dialect="spark")
|
@@ -387,7 +382,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
387
382
|
sel_expression = exp.Select(**select_kwargs)
|
388
383
|
if empty_df:
|
389
384
|
sel_expression = sel_expression.where(exp.false())
|
390
|
-
|
385
|
+
df = self._create_df(sel_expression)
|
386
|
+
df._update_display_name_mapping(
|
387
|
+
df._ensure_and_normalize_cols(list(column_mapping.keys())), list(column_mapping.keys())
|
388
|
+
)
|
389
|
+
return df
|
391
390
|
|
392
391
|
def sql(
|
393
392
|
self,
|
@@ -526,7 +525,9 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
526
525
|
col_id._meta = {"case_sensitive": True, **(col_id._meta or {})}
|
527
526
|
case_sensitive_cols.append(col_id)
|
528
527
|
columns = [
|
529
|
-
normalize_string(
|
528
|
+
normalize_string(
|
529
|
+
x, from_dialect="execution", to_dialect="output", to_string_literal=True
|
530
|
+
)
|
530
531
|
for x in case_sensitive_cols
|
531
532
|
]
|
532
533
|
return [self._to_row(columns, row) for row in result]
|
sqlframe/spark/session.py
CHANGED
@@ -86,7 +86,7 @@ class SparkSession(
|
|
86
86
|
col_id = exp.parse_identifier(k, dialect=self.execution_dialect)
|
87
87
|
col_id._meta = {"case_sensitive": True, **(col_id._meta or {})}
|
88
88
|
col_name = normalize_string(
|
89
|
-
col_id, from_dialect="execution", to_dialect="output",
|
89
|
+
col_id, from_dialect="execution", to_dialect="output", to_string_literal=True
|
90
90
|
)
|
91
91
|
rows_normalized[col_name] = v
|
92
92
|
results.append(Row(**rows_normalized))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.19.0
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: prettytable <4
|
20
|
-
Requires-Dist: sqlglot <26.
|
20
|
+
Requires-Dist: sqlglot <26.7,>=24.0.0
|
21
21
|
Requires-Dist: typing-extensions
|
22
22
|
Provides-Extra: bigquery
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -1,19 +1,19 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=bRGLbmtauY86O6qq58KRvSDdCcwrGM24X-Zm0Elw0sU,413
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
6
6
|
sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
7
|
+
sqlframe/base/dataframe.py,sha256=mKXbIKYiKH5mh6qj0Dg7L_znmCL85q9kHlmHtCW4kJ4,79352
|
8
8
|
sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
|
11
|
-
sqlframe/base/functions.py,sha256=
|
11
|
+
sqlframe/base/functions.py,sha256=j_Sh4qIcR-2lesJT_2TzBlTIM46os35AcmMuwBm86DE,222512
|
12
12
|
sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
|
13
13
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
14
14
|
sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
|
15
15
|
sqlframe/base/readerwriter.py,sha256=w8926cqIrXF7NGHiINw5UHzP_3xpjsqbijTBTzycBRM,26605
|
16
|
-
sqlframe/base/session.py,sha256=
|
16
|
+
sqlframe/base/session.py,sha256=G5_bI_z1iJtAGm2SgEdjkKiyJmS0yOUopx9P5TEGdR4,27273
|
17
17
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
18
18
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
19
19
|
sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
|
@@ -110,7 +110,7 @@ sqlframe/spark/functions.py,sha256=MYCgHsjRQWylT-rezWRBuLV6BivcaVarbaQtP4T0toQ,3
|
|
110
110
|
sqlframe/spark/functions.pyi,sha256=GyOdUzv2Z7Qt99JAKEPKgV2t2Rn274OuqwAfcoAXlN0,24259
|
111
111
|
sqlframe/spark/group.py,sha256=MrvV_v-YkBc6T1zz882WrEqtWjlooWIyHBCmTQg3fCA,379
|
112
112
|
sqlframe/spark/readwriter.py,sha256=zXZcCPWpQMMN90wdIx8AD4Y5tWBcpRSL4-yKX2aZyik,874
|
113
|
-
sqlframe/spark/session.py,sha256=
|
113
|
+
sqlframe/spark/session.py,sha256=irlsTky06pKRKAyPLwVzUtLGe4O8mALSgxIqLvqJNF8,5675
|
114
114
|
sqlframe/spark/table.py,sha256=puWV8h_CqA64zwpzq0ydY9LoygMAvprkODyxyzZeF9M,186
|
115
115
|
sqlframe/spark/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
116
116
|
sqlframe/spark/udf.py,sha256=owB8NDaGVkUQ0WGm7SZt2t9zfvLFCfi0W48QiPfgjck,1153
|
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
129
129
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
130
130
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
131
131
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
132
|
-
sqlframe-3.
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
132
|
+
sqlframe-3.19.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
133
|
+
sqlframe-3.19.0.dist-info/METADATA,sha256=t_G87pTEVYezUc-A5TIumPN-sHNsgTjW8vNgZ4Jvjpw,8970
|
134
|
+
sqlframe-3.19.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
135
|
+
sqlframe-3.19.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
136
|
+
sqlframe-3.19.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|