sqlframe 3.18.0__py3-none-any.whl → 3.19.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.18.0'
16
- __version_tuple__ = version_tuple = (3, 18, 0)
15
+ __version__ = version = '3.19.0'
16
+ __version_tuple__ = version_tuple = (3, 19, 0)
@@ -1066,7 +1066,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1066
1066
  left_col.sql(dialect=self.session.input_dialect),
1067
1067
  right_col.sql(dialect=self.session.input_dialect),
1068
1068
  ).alias(left_col.alias_or_name)
1069
- if how == "full"
1069
+ if join_type == "full outer"
1070
1070
  else left_col.alias_or_name
1071
1071
  for left_col, right_col in join_column_pairs
1072
1072
  ]
@@ -8,6 +8,7 @@ import typing as t
8
8
 
9
9
  from sqlglot import Dialect
10
10
  from sqlglot import exp as expression
11
+ from sqlglot.dialects.dialect import time_format
11
12
  from sqlglot.helper import ensure_list
12
13
  from sqlglot.helper import flatten as _flatten
13
14
 
@@ -2016,9 +2017,12 @@ def array_prepend(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
2016
2017
  return Column.invoke_anonymous_function(col, "ARRAY_PREPEND", value)
2017
2018
 
2018
2019
 
2019
- @meta(unsupported_engines="*")
2020
+ @meta()
2020
2021
  def array_size(col: ColumnOrName) -> Column:
2021
- return Column.invoke_anonymous_function(col, "ARRAY_SIZE")
2022
+ session = _get_session()
2023
+ if session._is_spark or session._is_databricks:
2024
+ return Column.invoke_anonymous_function(col, "ARRAY_SIZE")
2025
+ return Column.invoke_expression_over_column(col, expression.ArraySize)
2022
2026
 
2023
2027
 
2024
2028
  @meta(unsupported_engines="*")
@@ -6088,7 +6092,7 @@ def to_timestamp_ltz(
6088
6092
  return Column.invoke_anonymous_function(timestamp, "to_timestamp_ltz")
6089
6093
 
6090
6094
 
6091
- @meta(unsupported_engines="*")
6095
+ @meta()
6092
6096
  def to_timestamp_ntz(
6093
6097
  timestamp: ColumnOrName,
6094
6098
  format: t.Optional[ColumnOrName] = None,
@@ -6118,6 +6122,32 @@ def to_timestamp_ntz(
6118
6122
  ... # doctest: +SKIP
6119
6123
  [Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
6120
6124
  """
6125
+ session = _get_session()
6126
+
6127
+ if session._is_duckdb:
6128
+ to_timestamp_func = get_func_from_session("to_timestamp")
6129
+ return to_timestamp_func(timestamp, format)
6130
+
6131
+ if session._is_bigquery:
6132
+ if format is not None:
6133
+ return Column.invoke_anonymous_function(
6134
+ session.format_execution_time(format), # type: ignore
6135
+ "parse_datetime",
6136
+ timestamp,
6137
+ )
6138
+ else:
6139
+ return Column.ensure_col(timestamp).cast("datetime", dialect="bigquery")
6140
+
6141
+ if session._is_postgres:
6142
+ if format is not None:
6143
+ return Column.invoke_anonymous_function(
6144
+ timestamp,
6145
+ "to_timestamp",
6146
+ session.format_execution_time(format), # type: ignore
6147
+ )
6148
+ else:
6149
+ return Column.ensure_col(timestamp).cast("timestamp", dialect="postgres")
6150
+
6121
6151
  if format is not None:
6122
6152
  return Column.invoke_anonymous_function(timestamp, "to_timestamp_ntz", format)
6123
6153
  else:
@@ -6442,12 +6472,25 @@ def unix_micros(col: ColumnOrName) -> Column:
6442
6472
  """
6443
6473
  from sqlframe.base.function_alternatives import unix_micros_multiply_epoch
6444
6474
 
6445
- if (
6446
- _get_session()._is_bigquery
6447
- or _get_session()._is_duckdb
6448
- or _get_session()._is_postgres
6449
- or _get_session()._is_snowflake
6450
- ):
6475
+ if _get_session()._is_duckdb:
6476
+ return Column.invoke_anonymous_function(col, "epoch_us")
6477
+
6478
+ if _get_session()._is_bigquery:
6479
+ return Column(
6480
+ expression.Anonymous(
6481
+ this="UNIX_MICROS",
6482
+ expressions=[
6483
+ expression.Anonymous(
6484
+ this="TIMESTAMP",
6485
+ expressions=[
6486
+ Column.ensure_col(col).column_expression,
6487
+ ],
6488
+ )
6489
+ ],
6490
+ )
6491
+ )
6492
+
6493
+ if _get_session()._is_postgres or _get_session()._is_snowflake:
6451
6494
  return unix_micros_multiply_epoch(col)
6452
6495
 
6453
6496
  return Column.invoke_anonymous_function(col, "unix_micros")
sqlframe/base/session.py CHANGED
@@ -267,10 +267,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
267
267
  else:
268
268
  column_mapping = {}
269
269
 
270
- column_mapping = {
271
- normalize_identifiers(k, self.input_dialect).sql(dialect=self.input_dialect): v
272
- for k, v in column_mapping.items()
273
- }
274
270
  empty_df = not data
275
271
  rows = [[None] * len(column_mapping)] if empty_df else list(data) # type: ignore
276
272
 
@@ -327,7 +323,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
327
323
  if isinstance(sample_row, Row):
328
324
  sample_row = sample_row.asDict()
329
325
  if isinstance(sample_row, dict):
330
- sample_row = normalize_dict(self, sample_row)
331
326
  default_data_type = get_default_data_type(sample_row[name])
332
327
  updated_mapping[name] = (
333
328
  exp.DataType.build(default_data_type, dialect="spark")
@@ -387,7 +382,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
387
382
  sel_expression = exp.Select(**select_kwargs)
388
383
  if empty_df:
389
384
  sel_expression = sel_expression.where(exp.false())
390
- return self._create_df(sel_expression)
385
+ df = self._create_df(sel_expression)
386
+ df._update_display_name_mapping(
387
+ df._ensure_and_normalize_cols(list(column_mapping.keys())), list(column_mapping.keys())
388
+ )
389
+ return df
391
390
 
392
391
  def sql(
393
392
  self,
@@ -526,7 +525,9 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
526
525
  col_id._meta = {"case_sensitive": True, **(col_id._meta or {})}
527
526
  case_sensitive_cols.append(col_id)
528
527
  columns = [
529
- normalize_string(x, from_dialect="execution", to_dialect="output")
528
+ normalize_string(
529
+ x, from_dialect="execution", to_dialect="output", to_string_literal=True
530
+ )
530
531
  for x in case_sensitive_cols
531
532
  ]
532
533
  return [self._to_row(columns, row) for row in result]
sqlframe/spark/session.py CHANGED
@@ -86,7 +86,7 @@ class SparkSession(
86
86
  col_id = exp.parse_identifier(k, dialect=self.execution_dialect)
87
87
  col_id._meta = {"case_sensitive": True, **(col_id._meta or {})}
88
88
  col_name = normalize_string(
89
- col_id, from_dialect="execution", to_dialect="output", is_column=True
89
+ col_id, from_dialect="execution", to_dialect="output", to_string_literal=True
90
90
  )
91
91
  rows_normalized[col_name] = v
92
92
  results.append(Row(**rows_normalized))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.18.0
3
+ Version: 3.19.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: prettytable <4
20
- Requires-Dist: sqlglot <26.5,>=24.0.0
20
+ Requires-Dist: sqlglot <26.7,>=24.0.0
21
21
  Requires-Dist: typing-extensions
22
22
  Provides-Extra: bigquery
23
23
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
@@ -1,19 +1,19 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=sbqQ7wMrRCbK9h4-ahk9DoaetF-oPVAV49BG-2oJfiQ,413
2
+ sqlframe/_version.py,sha256=bRGLbmtauY86O6qq58KRvSDdCcwrGM24X-Zm0Elw0sU,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
7
- sqlframe/base/dataframe.py,sha256=5gF_zvafC60rZ1OdnB4klKALw35mmxllYTVgIIiZPY0,79340
7
+ sqlframe/base/dataframe.py,sha256=mKXbIKYiKH5mh6qj0Dg7L_znmCL85q9kHlmHtCW4kJ4,79352
8
8
  sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
11
- sqlframe/base/functions.py,sha256=1LHxazgC9tZ_GzyWNsjU945SRnAsQjUH2easMJLU3h4,221012
11
+ sqlframe/base/functions.py,sha256=j_Sh4qIcR-2lesJT_2TzBlTIM46os35AcmMuwBm86DE,222512
12
12
  sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
15
15
  sqlframe/base/readerwriter.py,sha256=w8926cqIrXF7NGHiINw5UHzP_3xpjsqbijTBTzycBRM,26605
16
- sqlframe/base/session.py,sha256=aSp83JXEW_zEMfe1JxPFcjqq2yUThwYboCnk0LqhUko,27290
16
+ sqlframe/base/session.py,sha256=G5_bI_z1iJtAGm2SgEdjkKiyJmS0yOUopx9P5TEGdR4,27273
17
17
  sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
18
18
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
19
19
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
@@ -110,7 +110,7 @@ sqlframe/spark/functions.py,sha256=MYCgHsjRQWylT-rezWRBuLV6BivcaVarbaQtP4T0toQ,3
110
110
  sqlframe/spark/functions.pyi,sha256=GyOdUzv2Z7Qt99JAKEPKgV2t2Rn274OuqwAfcoAXlN0,24259
111
111
  sqlframe/spark/group.py,sha256=MrvV_v-YkBc6T1zz882WrEqtWjlooWIyHBCmTQg3fCA,379
112
112
  sqlframe/spark/readwriter.py,sha256=zXZcCPWpQMMN90wdIx8AD4Y5tWBcpRSL4-yKX2aZyik,874
113
- sqlframe/spark/session.py,sha256=fYu8aVSDRAJ7ZnA7zgba7acXjP8ROJshfX5UYmEq5mI,5667
113
+ sqlframe/spark/session.py,sha256=irlsTky06pKRKAyPLwVzUtLGe4O8mALSgxIqLvqJNF8,5675
114
114
  sqlframe/spark/table.py,sha256=puWV8h_CqA64zwpzq0ydY9LoygMAvprkODyxyzZeF9M,186
115
115
  sqlframe/spark/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
116
116
  sqlframe/spark/udf.py,sha256=owB8NDaGVkUQ0WGm7SZt2t9zfvLFCfi0W48QiPfgjck,1153
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.18.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.18.0.dist-info/METADATA,sha256=5riMB4lxhi4MDtVWW-kgUK6_PQwdjLDaC5NXSYNNrQ8,8970
134
- sqlframe-3.18.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.18.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.18.0.dist-info/RECORD,,
132
+ sqlframe-3.19.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.19.0.dist-info/METADATA,sha256=t_G87pTEVYezUc-A5TIumPN-sHNsgTjW8vNgZ4Jvjpw,8970
134
+ sqlframe-3.19.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.19.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.19.0.dist-info/RECORD,,