sqlframe 3.18.0__py3-none-any.whl → 3.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.18.0'
16
- __version_tuple__ = version_tuple = (3, 18, 0)
15
+ __version__ = version = '3.19.0'
16
+ __version_tuple__ = version_tuple = (3, 19, 0)
@@ -1066,7 +1066,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1066
1066
  left_col.sql(dialect=self.session.input_dialect),
1067
1067
  right_col.sql(dialect=self.session.input_dialect),
1068
1068
  ).alias(left_col.alias_or_name)
1069
- if how == "full"
1069
+ if join_type == "full outer"
1070
1070
  else left_col.alias_or_name
1071
1071
  for left_col, right_col in join_column_pairs
1072
1072
  ]
@@ -8,6 +8,7 @@ import typing as t
8
8
 
9
9
  from sqlglot import Dialect
10
10
  from sqlglot import exp as expression
11
+ from sqlglot.dialects.dialect import time_format
11
12
  from sqlglot.helper import ensure_list
12
13
  from sqlglot.helper import flatten as _flatten
13
14
 
@@ -2016,9 +2017,12 @@ def array_prepend(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
2016
2017
  return Column.invoke_anonymous_function(col, "ARRAY_PREPEND", value)
2017
2018
 
2018
2019
 
2019
- @meta(unsupported_engines="*")
2020
+ @meta()
2020
2021
  def array_size(col: ColumnOrName) -> Column:
2021
- return Column.invoke_anonymous_function(col, "ARRAY_SIZE")
2022
+ session = _get_session()
2023
+ if session._is_spark or session._is_databricks:
2024
+ return Column.invoke_anonymous_function(col, "ARRAY_SIZE")
2025
+ return Column.invoke_expression_over_column(col, expression.ArraySize)
2022
2026
 
2023
2027
 
2024
2028
  @meta(unsupported_engines="*")
@@ -6088,7 +6092,7 @@ def to_timestamp_ltz(
6088
6092
  return Column.invoke_anonymous_function(timestamp, "to_timestamp_ltz")
6089
6093
 
6090
6094
 
6091
- @meta(unsupported_engines="*")
6095
+ @meta()
6092
6096
  def to_timestamp_ntz(
6093
6097
  timestamp: ColumnOrName,
6094
6098
  format: t.Optional[ColumnOrName] = None,
@@ -6118,6 +6122,32 @@ def to_timestamp_ntz(
6118
6122
  ... # doctest: +SKIP
6119
6123
  [Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
6120
6124
  """
6125
+ session = _get_session()
6126
+
6127
+ if session._is_duckdb:
6128
+ to_timestamp_func = get_func_from_session("to_timestamp")
6129
+ return to_timestamp_func(timestamp, format)
6130
+
6131
+ if session._is_bigquery:
6132
+ if format is not None:
6133
+ return Column.invoke_anonymous_function(
6134
+ session.format_execution_time(format), # type: ignore
6135
+ "parse_datetime",
6136
+ timestamp,
6137
+ )
6138
+ else:
6139
+ return Column.ensure_col(timestamp).cast("datetime", dialect="bigquery")
6140
+
6141
+ if session._is_postgres:
6142
+ if format is not None:
6143
+ return Column.invoke_anonymous_function(
6144
+ timestamp,
6145
+ "to_timestamp",
6146
+ session.format_execution_time(format), # type: ignore
6147
+ )
6148
+ else:
6149
+ return Column.ensure_col(timestamp).cast("timestamp", dialect="postgres")
6150
+
6121
6151
  if format is not None:
6122
6152
  return Column.invoke_anonymous_function(timestamp, "to_timestamp_ntz", format)
6123
6153
  else:
@@ -6442,12 +6472,25 @@ def unix_micros(col: ColumnOrName) -> Column:
6442
6472
  """
6443
6473
  from sqlframe.base.function_alternatives import unix_micros_multiply_epoch
6444
6474
 
6445
- if (
6446
- _get_session()._is_bigquery
6447
- or _get_session()._is_duckdb
6448
- or _get_session()._is_postgres
6449
- or _get_session()._is_snowflake
6450
- ):
6475
+ if _get_session()._is_duckdb:
6476
+ return Column.invoke_anonymous_function(col, "epoch_us")
6477
+
6478
+ if _get_session()._is_bigquery:
6479
+ return Column(
6480
+ expression.Anonymous(
6481
+ this="UNIX_MICROS",
6482
+ expressions=[
6483
+ expression.Anonymous(
6484
+ this="TIMESTAMP",
6485
+ expressions=[
6486
+ Column.ensure_col(col).column_expression,
6487
+ ],
6488
+ )
6489
+ ],
6490
+ )
6491
+ )
6492
+
6493
+ if _get_session()._is_postgres or _get_session()._is_snowflake:
6451
6494
  return unix_micros_multiply_epoch(col)
6452
6495
 
6453
6496
  return Column.invoke_anonymous_function(col, "unix_micros")
sqlframe/base/session.py CHANGED
@@ -267,10 +267,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
267
267
  else:
268
268
  column_mapping = {}
269
269
 
270
- column_mapping = {
271
- normalize_identifiers(k, self.input_dialect).sql(dialect=self.input_dialect): v
272
- for k, v in column_mapping.items()
273
- }
274
270
  empty_df = not data
275
271
  rows = [[None] * len(column_mapping)] if empty_df else list(data) # type: ignore
276
272
 
@@ -327,7 +323,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
327
323
  if isinstance(sample_row, Row):
328
324
  sample_row = sample_row.asDict()
329
325
  if isinstance(sample_row, dict):
330
- sample_row = normalize_dict(self, sample_row)
331
326
  default_data_type = get_default_data_type(sample_row[name])
332
327
  updated_mapping[name] = (
333
328
  exp.DataType.build(default_data_type, dialect="spark")
@@ -387,7 +382,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
387
382
  sel_expression = exp.Select(**select_kwargs)
388
383
  if empty_df:
389
384
  sel_expression = sel_expression.where(exp.false())
390
- return self._create_df(sel_expression)
385
+ df = self._create_df(sel_expression)
386
+ df._update_display_name_mapping(
387
+ df._ensure_and_normalize_cols(list(column_mapping.keys())), list(column_mapping.keys())
388
+ )
389
+ return df
391
390
 
392
391
  def sql(
393
392
  self,
@@ -526,7 +525,9 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
526
525
  col_id._meta = {"case_sensitive": True, **(col_id._meta or {})}
527
526
  case_sensitive_cols.append(col_id)
528
527
  columns = [
529
- normalize_string(x, from_dialect="execution", to_dialect="output")
528
+ normalize_string(
529
+ x, from_dialect="execution", to_dialect="output", to_string_literal=True
530
+ )
530
531
  for x in case_sensitive_cols
531
532
  ]
532
533
  return [self._to_row(columns, row) for row in result]
sqlframe/spark/session.py CHANGED
@@ -86,7 +86,7 @@ class SparkSession(
86
86
  col_id = exp.parse_identifier(k, dialect=self.execution_dialect)
87
87
  col_id._meta = {"case_sensitive": True, **(col_id._meta or {})}
88
88
  col_name = normalize_string(
89
- col_id, from_dialect="execution", to_dialect="output", is_column=True
89
+ col_id, from_dialect="execution", to_dialect="output", to_string_literal=True
90
90
  )
91
91
  rows_normalized[col_name] = v
92
92
  results.append(Row(**rows_normalized))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.18.0
3
+ Version: 3.19.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: prettytable <4
20
- Requires-Dist: sqlglot <26.5,>=24.0.0
20
+ Requires-Dist: sqlglot <26.7,>=24.0.0
21
21
  Requires-Dist: typing-extensions
22
22
  Provides-Extra: bigquery
23
23
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
@@ -1,19 +1,19 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=sbqQ7wMrRCbK9h4-ahk9DoaetF-oPVAV49BG-2oJfiQ,413
2
+ sqlframe/_version.py,sha256=bRGLbmtauY86O6qq58KRvSDdCcwrGM24X-Zm0Elw0sU,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
7
- sqlframe/base/dataframe.py,sha256=5gF_zvafC60rZ1OdnB4klKALw35mmxllYTVgIIiZPY0,79340
7
+ sqlframe/base/dataframe.py,sha256=mKXbIKYiKH5mh6qj0Dg7L_znmCL85q9kHlmHtCW4kJ4,79352
8
8
  sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
11
- sqlframe/base/functions.py,sha256=1LHxazgC9tZ_GzyWNsjU945SRnAsQjUH2easMJLU3h4,221012
11
+ sqlframe/base/functions.py,sha256=j_Sh4qIcR-2lesJT_2TzBlTIM46os35AcmMuwBm86DE,222512
12
12
  sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
15
15
  sqlframe/base/readerwriter.py,sha256=w8926cqIrXF7NGHiINw5UHzP_3xpjsqbijTBTzycBRM,26605
16
- sqlframe/base/session.py,sha256=aSp83JXEW_zEMfe1JxPFcjqq2yUThwYboCnk0LqhUko,27290
16
+ sqlframe/base/session.py,sha256=G5_bI_z1iJtAGm2SgEdjkKiyJmS0yOUopx9P5TEGdR4,27273
17
17
  sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
18
18
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
19
19
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
@@ -110,7 +110,7 @@ sqlframe/spark/functions.py,sha256=MYCgHsjRQWylT-rezWRBuLV6BivcaVarbaQtP4T0toQ,3
110
110
  sqlframe/spark/functions.pyi,sha256=GyOdUzv2Z7Qt99JAKEPKgV2t2Rn274OuqwAfcoAXlN0,24259
111
111
  sqlframe/spark/group.py,sha256=MrvV_v-YkBc6T1zz882WrEqtWjlooWIyHBCmTQg3fCA,379
112
112
  sqlframe/spark/readwriter.py,sha256=zXZcCPWpQMMN90wdIx8AD4Y5tWBcpRSL4-yKX2aZyik,874
113
- sqlframe/spark/session.py,sha256=fYu8aVSDRAJ7ZnA7zgba7acXjP8ROJshfX5UYmEq5mI,5667
113
+ sqlframe/spark/session.py,sha256=irlsTky06pKRKAyPLwVzUtLGe4O8mALSgxIqLvqJNF8,5675
114
114
  sqlframe/spark/table.py,sha256=puWV8h_CqA64zwpzq0ydY9LoygMAvprkODyxyzZeF9M,186
115
115
  sqlframe/spark/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
116
116
  sqlframe/spark/udf.py,sha256=owB8NDaGVkUQ0WGm7SZt2t9zfvLFCfi0W48QiPfgjck,1153
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.18.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.18.0.dist-info/METADATA,sha256=5riMB4lxhi4MDtVWW-kgUK6_PQwdjLDaC5NXSYNNrQ8,8970
134
- sqlframe-3.18.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.18.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.18.0.dist-info/RECORD,,
132
+ sqlframe-3.19.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.19.0.dist-info/METADATA,sha256=t_G87pTEVYezUc-A5TIumPN-sHNsgTjW8vNgZ4Jvjpw,8970
134
+ sqlframe-3.19.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.19.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.19.0.dist-info/RECORD,,