sqlframe 3.18.0__py3-none-any.whl → 3.19.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +1 -1
- sqlframe/base/functions.py +52 -9
- sqlframe/base/session.py +8 -7
- sqlframe/spark/session.py +1 -1
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/METADATA +2 -2
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/RECORD +10 -10
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.18.0.dist-info → sqlframe-3.19.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
@@ -1066,7 +1066,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1066
1066
|
left_col.sql(dialect=self.session.input_dialect),
|
1067
1067
|
right_col.sql(dialect=self.session.input_dialect),
|
1068
1068
|
).alias(left_col.alias_or_name)
|
1069
|
-
if
|
1069
|
+
if join_type == "full outer"
|
1070
1070
|
else left_col.alias_or_name
|
1071
1071
|
for left_col, right_col in join_column_pairs
|
1072
1072
|
]
|
sqlframe/base/functions.py
CHANGED
@@ -8,6 +8,7 @@ import typing as t
|
|
8
8
|
|
9
9
|
from sqlglot import Dialect
|
10
10
|
from sqlglot import exp as expression
|
11
|
+
from sqlglot.dialects.dialect import time_format
|
11
12
|
from sqlglot.helper import ensure_list
|
12
13
|
from sqlglot.helper import flatten as _flatten
|
13
14
|
|
@@ -2016,9 +2017,12 @@ def array_prepend(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
2016
2017
|
return Column.invoke_anonymous_function(col, "ARRAY_PREPEND", value)
|
2017
2018
|
|
2018
2019
|
|
2019
|
-
@meta(
|
2020
|
+
@meta()
|
2020
2021
|
def array_size(col: ColumnOrName) -> Column:
|
2021
|
-
|
2022
|
+
session = _get_session()
|
2023
|
+
if session._is_spark or session._is_databricks:
|
2024
|
+
return Column.invoke_anonymous_function(col, "ARRAY_SIZE")
|
2025
|
+
return Column.invoke_expression_over_column(col, expression.ArraySize)
|
2022
2026
|
|
2023
2027
|
|
2024
2028
|
@meta(unsupported_engines="*")
|
@@ -6088,7 +6092,7 @@ def to_timestamp_ltz(
|
|
6088
6092
|
return Column.invoke_anonymous_function(timestamp, "to_timestamp_ltz")
|
6089
6093
|
|
6090
6094
|
|
6091
|
-
@meta(
|
6095
|
+
@meta()
|
6092
6096
|
def to_timestamp_ntz(
|
6093
6097
|
timestamp: ColumnOrName,
|
6094
6098
|
format: t.Optional[ColumnOrName] = None,
|
@@ -6118,6 +6122,32 @@ def to_timestamp_ntz(
|
|
6118
6122
|
... # doctest: +SKIP
|
6119
6123
|
[Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
|
6120
6124
|
"""
|
6125
|
+
session = _get_session()
|
6126
|
+
|
6127
|
+
if session._is_duckdb:
|
6128
|
+
to_timestamp_func = get_func_from_session("to_timestamp")
|
6129
|
+
return to_timestamp_func(timestamp, format)
|
6130
|
+
|
6131
|
+
if session._is_bigquery:
|
6132
|
+
if format is not None:
|
6133
|
+
return Column.invoke_anonymous_function(
|
6134
|
+
session.format_execution_time(format), # type: ignore
|
6135
|
+
"parse_datetime",
|
6136
|
+
timestamp,
|
6137
|
+
)
|
6138
|
+
else:
|
6139
|
+
return Column.ensure_col(timestamp).cast("datetime", dialect="bigquery")
|
6140
|
+
|
6141
|
+
if session._is_postgres:
|
6142
|
+
if format is not None:
|
6143
|
+
return Column.invoke_anonymous_function(
|
6144
|
+
timestamp,
|
6145
|
+
"to_timestamp",
|
6146
|
+
session.format_execution_time(format), # type: ignore
|
6147
|
+
)
|
6148
|
+
else:
|
6149
|
+
return Column.ensure_col(timestamp).cast("timestamp", dialect="postgres")
|
6150
|
+
|
6121
6151
|
if format is not None:
|
6122
6152
|
return Column.invoke_anonymous_function(timestamp, "to_timestamp_ntz", format)
|
6123
6153
|
else:
|
@@ -6442,12 +6472,25 @@ def unix_micros(col: ColumnOrName) -> Column:
|
|
6442
6472
|
"""
|
6443
6473
|
from sqlframe.base.function_alternatives import unix_micros_multiply_epoch
|
6444
6474
|
|
6445
|
-
if (
|
6446
|
-
|
6447
|
-
|
6448
|
-
|
6449
|
-
|
6450
|
-
|
6475
|
+
if _get_session()._is_duckdb:
|
6476
|
+
return Column.invoke_anonymous_function(col, "epoch_us")
|
6477
|
+
|
6478
|
+
if _get_session()._is_bigquery:
|
6479
|
+
return Column(
|
6480
|
+
expression.Anonymous(
|
6481
|
+
this="UNIX_MICROS",
|
6482
|
+
expressions=[
|
6483
|
+
expression.Anonymous(
|
6484
|
+
this="TIMESTAMP",
|
6485
|
+
expressions=[
|
6486
|
+
Column.ensure_col(col).column_expression,
|
6487
|
+
],
|
6488
|
+
)
|
6489
|
+
],
|
6490
|
+
)
|
6491
|
+
)
|
6492
|
+
|
6493
|
+
if _get_session()._is_postgres or _get_session()._is_snowflake:
|
6451
6494
|
return unix_micros_multiply_epoch(col)
|
6452
6495
|
|
6453
6496
|
return Column.invoke_anonymous_function(col, "unix_micros")
|
sqlframe/base/session.py
CHANGED
@@ -267,10 +267,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
267
267
|
else:
|
268
268
|
column_mapping = {}
|
269
269
|
|
270
|
-
column_mapping = {
|
271
|
-
normalize_identifiers(k, self.input_dialect).sql(dialect=self.input_dialect): v
|
272
|
-
for k, v in column_mapping.items()
|
273
|
-
}
|
274
270
|
empty_df = not data
|
275
271
|
rows = [[None] * len(column_mapping)] if empty_df else list(data) # type: ignore
|
276
272
|
|
@@ -327,7 +323,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
327
323
|
if isinstance(sample_row, Row):
|
328
324
|
sample_row = sample_row.asDict()
|
329
325
|
if isinstance(sample_row, dict):
|
330
|
-
sample_row = normalize_dict(self, sample_row)
|
331
326
|
default_data_type = get_default_data_type(sample_row[name])
|
332
327
|
updated_mapping[name] = (
|
333
328
|
exp.DataType.build(default_data_type, dialect="spark")
|
@@ -387,7 +382,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
387
382
|
sel_expression = exp.Select(**select_kwargs)
|
388
383
|
if empty_df:
|
389
384
|
sel_expression = sel_expression.where(exp.false())
|
390
|
-
|
385
|
+
df = self._create_df(sel_expression)
|
386
|
+
df._update_display_name_mapping(
|
387
|
+
df._ensure_and_normalize_cols(list(column_mapping.keys())), list(column_mapping.keys())
|
388
|
+
)
|
389
|
+
return df
|
391
390
|
|
392
391
|
def sql(
|
393
392
|
self,
|
@@ -526,7 +525,9 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
526
525
|
col_id._meta = {"case_sensitive": True, **(col_id._meta or {})}
|
527
526
|
case_sensitive_cols.append(col_id)
|
528
527
|
columns = [
|
529
|
-
normalize_string(
|
528
|
+
normalize_string(
|
529
|
+
x, from_dialect="execution", to_dialect="output", to_string_literal=True
|
530
|
+
)
|
530
531
|
for x in case_sensitive_cols
|
531
532
|
]
|
532
533
|
return [self._to_row(columns, row) for row in result]
|
sqlframe/spark/session.py
CHANGED
@@ -86,7 +86,7 @@ class SparkSession(
|
|
86
86
|
col_id = exp.parse_identifier(k, dialect=self.execution_dialect)
|
87
87
|
col_id._meta = {"case_sensitive": True, **(col_id._meta or {})}
|
88
88
|
col_name = normalize_string(
|
89
|
-
col_id, from_dialect="execution", to_dialect="output",
|
89
|
+
col_id, from_dialect="execution", to_dialect="output", to_string_literal=True
|
90
90
|
)
|
91
91
|
rows_normalized[col_name] = v
|
92
92
|
results.append(Row(**rows_normalized))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.19.0
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: prettytable <4
|
20
|
-
Requires-Dist: sqlglot <26.
|
20
|
+
Requires-Dist: sqlglot <26.7,>=24.0.0
|
21
21
|
Requires-Dist: typing-extensions
|
22
22
|
Provides-Extra: bigquery
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -1,19 +1,19 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=bRGLbmtauY86O6qq58KRvSDdCcwrGM24X-Zm0Elw0sU,413
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
6
6
|
sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
7
|
+
sqlframe/base/dataframe.py,sha256=mKXbIKYiKH5mh6qj0Dg7L_znmCL85q9kHlmHtCW4kJ4,79352
|
8
8
|
sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
|
11
|
-
sqlframe/base/functions.py,sha256=
|
11
|
+
sqlframe/base/functions.py,sha256=j_Sh4qIcR-2lesJT_2TzBlTIM46os35AcmMuwBm86DE,222512
|
12
12
|
sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
|
13
13
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
14
14
|
sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
|
15
15
|
sqlframe/base/readerwriter.py,sha256=w8926cqIrXF7NGHiINw5UHzP_3xpjsqbijTBTzycBRM,26605
|
16
|
-
sqlframe/base/session.py,sha256=
|
16
|
+
sqlframe/base/session.py,sha256=G5_bI_z1iJtAGm2SgEdjkKiyJmS0yOUopx9P5TEGdR4,27273
|
17
17
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
18
18
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
19
19
|
sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
|
@@ -110,7 +110,7 @@ sqlframe/spark/functions.py,sha256=MYCgHsjRQWylT-rezWRBuLV6BivcaVarbaQtP4T0toQ,3
|
|
110
110
|
sqlframe/spark/functions.pyi,sha256=GyOdUzv2Z7Qt99JAKEPKgV2t2Rn274OuqwAfcoAXlN0,24259
|
111
111
|
sqlframe/spark/group.py,sha256=MrvV_v-YkBc6T1zz882WrEqtWjlooWIyHBCmTQg3fCA,379
|
112
112
|
sqlframe/spark/readwriter.py,sha256=zXZcCPWpQMMN90wdIx8AD4Y5tWBcpRSL4-yKX2aZyik,874
|
113
|
-
sqlframe/spark/session.py,sha256=
|
113
|
+
sqlframe/spark/session.py,sha256=irlsTky06pKRKAyPLwVzUtLGe4O8mALSgxIqLvqJNF8,5675
|
114
114
|
sqlframe/spark/table.py,sha256=puWV8h_CqA64zwpzq0ydY9LoygMAvprkODyxyzZeF9M,186
|
115
115
|
sqlframe/spark/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
116
116
|
sqlframe/spark/udf.py,sha256=owB8NDaGVkUQ0WGm7SZt2t9zfvLFCfi0W48QiPfgjck,1153
|
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
129
129
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
130
130
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
131
131
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
132
|
-
sqlframe-3.
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
132
|
+
sqlframe-3.19.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
133
|
+
sqlframe-3.19.0.dist-info/METADATA,sha256=t_G87pTEVYezUc-A5TIumPN-sHNsgTjW8vNgZ4Jvjpw,8970
|
134
|
+
sqlframe-3.19.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
135
|
+
sqlframe-3.19.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
136
|
+
sqlframe-3.19.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|