sqlframe 1.12.0__py3-none-any.whl → 1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.12.0'
16
- __version_tuple__ = version_tuple = (1, 12, 0)
15
+ __version__ = version = '1.14.0'
16
+ __version_tuple__ = version_tuple = (1, 14, 0)
@@ -6,11 +6,16 @@ import re
6
6
  import typing as t
7
7
 
8
8
  from sqlglot import exp as expression
9
+ from sqlglot.dialects.dialect import build_formatted_time
9
10
  from sqlglot.helper import ensure_list
10
11
  from sqlglot.helper import flatten as _flatten
11
12
 
12
13
  from sqlframe.base.column import Column
13
- from sqlframe.base.util import get_func_from_session
14
+ from sqlframe.base.util import (
15
+ format_time_from_spark,
16
+ get_func_from_session,
17
+ spark_default_time_format,
18
+ )
14
19
 
15
20
  if t.TYPE_CHECKING:
16
21
  from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
@@ -556,6 +561,14 @@ def to_date_from_timestamp(col: ColumnOrName, format: t.Optional[str] = None) ->
556
561
  return to_date(to_timestamp(col, format))
557
562
 
558
563
 
564
+ def to_date_time_format(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
565
+ from sqlframe.base.functions import to_date
566
+
567
+ lit = get_func_from_session("lit")
568
+ format = lit(format or spark_default_time_format())
569
+ return to_date(col, format=format)
570
+
571
+
559
572
  def last_day_with_cast(col: ColumnOrName) -> Column:
560
573
  from sqlframe.base.functions import last_day
561
574
 
@@ -715,14 +728,10 @@ def months_between_cast_as_date_cast_roundoff(
715
728
 
716
729
 
717
730
  def from_unixtime_from_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
718
- from sqlframe.base.session import _BaseSession
719
-
720
- session: _BaseSession = _BaseSession()
721
731
  lit = get_func_from_session("lit")
722
732
  col_func = get_func_from_session("col")
723
733
 
724
- if format is None:
725
- format = session.DEFAULT_TIME_FORMAT
734
+ format = lit(format or spark_default_time_format())
726
735
  return Column.invoke_expression_over_column(
727
736
  Column(
728
737
  expression.Anonymous(
@@ -731,7 +740,7 @@ def from_unixtime_from_timestamp(col: ColumnOrName, format: t.Optional[str] = No
731
740
  )
732
741
  ),
733
742
  expression.TimeToStr,
734
- format=lit(format),
743
+ format=format_time_from_spark(format), # type: ignore
735
744
  )
736
745
 
737
746
 
@@ -1511,10 +1520,106 @@ def to_unix_timestamp_include_default_format(
1511
1520
  format: t.Optional[ColumnOrName] = None,
1512
1521
  ) -> Column:
1513
1522
  from sqlframe.base.functions import to_unix_timestamp
1523
+ from sqlframe.base.session import _BaseSession
1524
+
1525
+ if not format:
1526
+ format = _BaseSession().output_dialect.TIME_FORMAT
1527
+ else:
1528
+ format = format_time_from_spark(format)
1529
+ return to_unix_timestamp(timestamp, format)
1530
+
1531
+
1532
+ def array_append_list_append(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
1533
+ lit = get_func_from_session("lit")
1534
+ value = value if isinstance(value, Column) else lit(value)
1535
+ return Column.invoke_anonymous_function(col, "LIST_APPEND", value)
1514
1536
 
1537
+
1538
+ def array_append_using_array_cat(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
1515
1539
  lit = get_func_from_session("lit")
1540
+ array = get_func_from_session("array")
1541
+ value = value if isinstance(value, Column) else lit(value)
1542
+ return Column.invoke_anonymous_function(col, "ARRAY_CONCAT", array(value))
1516
1543
 
1517
- if not format:
1518
- format = lit("%Y-%m-%d %H:%M:%S")
1519
1544
 
1520
- return to_unix_timestamp(timestamp, format)
1545
+ def day_with_try_to_timestamp(col: ColumnOrName) -> Column:
1546
+ from sqlframe.base.functions import day
1547
+
1548
+ try_to_timestamp = get_func_from_session("try_to_timestamp")
1549
+ to_date = get_func_from_session("to_date")
1550
+ when = get_func_from_session("when")
1551
+ _is_string = get_func_from_session("_is_string")
1552
+ coalesce = get_func_from_session("coalesce")
1553
+ return day(
1554
+ when(
1555
+ _is_string(col),
1556
+ coalesce(try_to_timestamp(col), to_date(col)),
1557
+ ).otherwise(col)
1558
+ )
1559
+
1560
+
1561
+ def try_to_timestamp_strptime(col: ColumnOrName, format: t.Optional[ColumnOrName] = None) -> Column:
1562
+ lit = get_func_from_session("lit")
1563
+
1564
+ format = lit(format or spark_default_time_format())
1565
+ return Column.invoke_anonymous_function(col, "TRY_STRPTIME", format_time_from_spark(format)) # type: ignore
1566
+
1567
+
1568
+ def try_to_timestamp_safe(col: ColumnOrName, format: t.Optional[ColumnOrName] = None) -> Column:
1569
+ lit = get_func_from_session("lit")
1570
+
1571
+ format = lit(format or spark_default_time_format())
1572
+ return Column.invoke_anonymous_function(
1573
+ format_time_from_spark(format), # type: ignore
1574
+ "SAFE.PARSE_TIMESTAMP",
1575
+ col, # type: ignore
1576
+ )
1577
+
1578
+
1579
+ def try_to_timestamp_pgtemp(col: ColumnOrName, format: t.Optional[ColumnOrName] = None) -> Column:
1580
+ lit = get_func_from_session("lit")
1581
+
1582
+ format = lit(format or spark_default_time_format())
1583
+ return Column.invoke_anonymous_function(
1584
+ col,
1585
+ "pg_temp.TRY_TO_TIMESTAMP",
1586
+ format_time_from_spark(format), # type: ignore
1587
+ )
1588
+
1589
+
1590
+ def typeof_pg_typeof(col: ColumnOrName) -> Column:
1591
+ return Column.invoke_anonymous_function(col, "pg_typeof").cast("regtype").cast("text")
1592
+
1593
+
1594
+ def typeof_from_variant(col: ColumnOrName) -> Column:
1595
+ col = Column.invoke_anonymous_function(col, "TO_VARIANT")
1596
+ return Column.invoke_anonymous_function(col, "TYPEOF")
1597
+
1598
+
1599
+ def _is_string_using_typeof_varchar(col: ColumnOrName) -> Column:
1600
+ typeof = get_func_from_session("typeof")
1601
+ lit = get_func_from_session("lit")
1602
+ return lit(typeof(col) == lit("VARCHAR"))
1603
+
1604
+
1605
+ def _is_string_using_typeof_char_varying(col: ColumnOrName) -> Column:
1606
+ typeof = get_func_from_session("typeof")
1607
+ lit = get_func_from_session("lit")
1608
+ return lit(
1609
+ (typeof(col) == lit("text"))
1610
+ | (typeof(col) == lit("character varying"))
1611
+ | (typeof(col) == lit("unknown"))
1612
+ | (typeof(col) == lit("text"))
1613
+ )
1614
+
1615
+
1616
+ def _is_string_using_typeof_string(col: ColumnOrName) -> Column:
1617
+ typeof = get_func_from_session("typeof")
1618
+ lit = get_func_from_session("lit")
1619
+ return lit(typeof(col) == lit("STRING"))
1620
+
1621
+
1622
+ def _is_string_using_typeof_string_lcase(col: ColumnOrName) -> Column:
1623
+ typeof = get_func_from_session("typeof")
1624
+ lit = get_func_from_session("lit")
1625
+ return lit(typeof(col) == lit("string"))
@@ -6,12 +6,19 @@ import decimal
6
6
  import logging
7
7
  import typing as t
8
8
 
9
+ from sqlglot import Dialect
9
10
  from sqlglot import exp as expression
10
11
  from sqlglot.helper import ensure_list
11
12
  from sqlglot.helper import flatten as _flatten
12
13
 
13
14
  from sqlframe.base.column import Column
14
15
  from sqlframe.base.decorators import func_metadata as meta
16
+ from sqlframe.base.util import (
17
+ format_time_from_spark,
18
+ get_func_from_session,
19
+ spark_default_date_format,
20
+ spark_default_time_format,
21
+ )
15
22
 
16
23
  if t.TYPE_CHECKING:
17
24
  from pyspark.sql.session import SparkContext
@@ -695,7 +702,7 @@ def date_format(col: ColumnOrName, format: str) -> Column:
695
702
  return Column.invoke_expression_over_column(
696
703
  Column(expression.TimeStrToTime(this=Column.ensure_col(col).expression)),
697
704
  expression.TimeToStr,
698
- format=lit(format),
705
+ format=format_time_from_spark(format),
699
706
  )
700
707
 
701
708
 
@@ -875,17 +882,21 @@ def months_between(
875
882
 
876
883
  @meta()
877
884
  def to_date(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
885
+ format = lit(format or spark_default_date_format())
878
886
  if format is not None:
879
887
  return Column.invoke_expression_over_column(
880
- col, expression.TsOrDsToDate, format=lit(format)
888
+ col, expression.TsOrDsToDate, format=format_time_from_spark(format)
881
889
  )
882
890
  return Column.invoke_expression_over_column(col, expression.TsOrDsToDate)
883
891
 
884
892
 
885
893
  @meta()
886
894
  def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
895
+ format = lit(format or spark_default_time_format())
887
896
  if format is not None:
888
- return Column.invoke_expression_over_column(col, expression.StrToTime, format=lit(format))
897
+ return Column.invoke_expression_over_column(
898
+ col, expression.StrToTime, format=format_time_from_spark(format)
899
+ )
889
900
 
890
901
  return Column.ensure_col(col).cast("timestamp")
891
902
 
@@ -916,23 +927,23 @@ def last_day(col: ColumnOrName) -> Column:
916
927
 
917
928
  @meta()
918
929
  def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
919
- from sqlframe.base.session import _BaseSession
920
-
921
- if format is None:
922
- format = _BaseSession().DEFAULT_TIME_FORMAT
923
- return Column.invoke_expression_over_column(col, expression.UnixToStr, format=lit(format))
930
+ format = lit(format or spark_default_time_format())
931
+ return Column.invoke_expression_over_column(
932
+ col,
933
+ expression.UnixToStr,
934
+ format=format_time_from_spark(format), # type: ignore
935
+ )
924
936
 
925
937
 
926
938
  @meta()
927
939
  def unix_timestamp(
928
940
  timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None
929
941
  ) -> Column:
930
- from sqlframe.base.session import _BaseSession
931
-
932
- if format is None:
933
- format = _BaseSession().DEFAULT_TIME_FORMAT
942
+ format = lit(format or spark_default_time_format())
934
943
  return Column.invoke_expression_over_column(
935
- timestamp, expression.StrToUnix, format=lit(format)
944
+ timestamp,
945
+ expression.StrToUnix,
946
+ format=format_time_from_spark(format), # type: ignore
936
947
  ).cast("bigint")
937
948
 
938
949
 
@@ -1289,7 +1300,7 @@ def array_agg(col: ColumnOrName) -> Column:
1289
1300
  return Column.invoke_expression_over_column(col, expression.ArrayAgg)
1290
1301
 
1291
1302
 
1292
- @meta(unsupported_engines="*")
1303
+ @meta()
1293
1304
  def array_append(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
1294
1305
  value = value if isinstance(value, Column) else lit(value)
1295
1306
  return Column.invoke_anonymous_function(col, "ARRAY_APPEND", value)
@@ -1737,7 +1748,7 @@ def map_zip_with(
1737
1748
  return Column.invoke_anonymous_function(col1, "MAP_ZIP_WITH", col2, Column(f_expression))
1738
1749
 
1739
1750
 
1740
- @meta(unsupported_engines=["postgres", "snowflake"])
1751
+ @meta()
1741
1752
  def typeof(col: ColumnOrName) -> Column:
1742
1753
  return Column.invoke_anonymous_function(col, "TYPEOF")
1743
1754
 
@@ -2156,7 +2167,7 @@ def datepart(field: ColumnOrName, source: ColumnOrName) -> Column:
2156
2167
  return Column.invoke_anonymous_function(field, "datepart", source)
2157
2168
 
2158
2169
 
2159
- @meta(unsupported_engines="*")
2170
+ @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
2160
2171
  def day(col: ColumnOrName) -> Column:
2161
2172
  return Column.invoke_expression_over_column(col, expression.Day)
2162
2173
 
@@ -5106,8 +5117,11 @@ def to_unix_timestamp(
5106
5117
  [Row(r=None)]
5107
5118
  >>> spark.conf.unset("spark.sql.session.timeZone")
5108
5119
  """
5120
+ format = lit(spark_default_time_format()) if format is None else format
5109
5121
  if format is not None:
5110
- return Column.invoke_expression_over_column(timestamp, expression.StrToUnix, format=format)
5122
+ return Column.invoke_expression_over_column(
5123
+ timestamp, expression.StrToUnix, format=format_time_from_spark(format)
5124
+ )
5111
5125
  else:
5112
5126
  return Column.invoke_expression_over_column(timestamp, expression.StrToUnix)
5113
5127
 
@@ -5268,7 +5282,7 @@ def try_element_at(col: ColumnOrName, extraction: ColumnOrName) -> Column:
5268
5282
  )
5269
5283
 
5270
5284
 
5271
- @meta(unsupported_engines="*")
5285
+ @meta()
5272
5286
  def try_to_timestamp(col: ColumnOrName, format: t.Optional[ColumnOrName] = None) -> Column:
5273
5287
  """
5274
5288
  Parses the `col` with the `format` to a timestamp. The function always
@@ -5293,10 +5307,8 @@ def try_to_timestamp(col: ColumnOrName, format: t.Optional[ColumnOrName] = None)
5293
5307
  >>> df.select(try_to_timestamp(df.t, lit('yyyy-MM-dd HH:mm:ss')).alias('dt')).collect()
5294
5308
  [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
5295
5309
  """
5296
- if format is not None:
5297
- return Column.invoke_anonymous_function(col, "try_to_timestamp", format)
5298
- else:
5299
- return Column.invoke_anonymous_function(col, "try_to_timestamp")
5310
+ format = lit(format or spark_default_time_format())
5311
+ return Column.invoke_anonymous_function(col, "try_to_timestamp", format_time_from_spark(format)) # type: ignore
5300
5312
 
5301
5313
 
5302
5314
  @meta()
@@ -5324,7 +5336,7 @@ def ucase(str: ColumnOrName) -> Column:
5324
5336
  return Column.invoke_expression_over_column(str, expression.Upper)
5325
5337
 
5326
5338
 
5327
- @meta()
5339
+ @meta(unsupported_engines=["bigquery", "snowflake"])
5328
5340
  def unix_date(col: ColumnOrName) -> Column:
5329
5341
  """Returns the number of days since 1970-01-01.
5330
5342
 
@@ -5788,6 +5800,20 @@ def years(col: ColumnOrName) -> Column:
5788
5800
  return Column.invoke_anonymous_function(col, "years")
5789
5801
 
5790
5802
 
5803
+ # SQLFrame specific
5804
+ @meta()
5805
+ def _is_string(col: ColumnOrName) -> Column:
5806
+ col = Column.invoke_anonymous_function(col, "TO_VARIANT")
5807
+ return Column.invoke_anonymous_function(col, "IS_VARCHAR")
5808
+
5809
+
5810
+ @meta()
5811
+ def _is_date(col: ColumnOrName) -> Column:
5812
+ typeof = get_func_from_session("typeof")
5813
+ upper = get_func_from_session("upper")
5814
+ return lit(upper(typeof(col)) == lit("DATE"))
5815
+
5816
+
5791
5817
  @meta()
5792
5818
  def _lambda_quoted(value: str) -> t.Optional[bool]:
5793
5819
  return False if value == "_" else None
sqlframe/base/session.py CHANGED
@@ -72,7 +72,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
72
72
  _df: t.Type[DF]
73
73
 
74
74
  SANITIZE_COLUMN_NAMES = False
75
- DEFAULT_TIME_FORMAT = "yyyy-MM-dd HH:mm:ss"
76
75
 
77
76
  def __init__(
78
77
  self,
@@ -114,6 +113,10 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
114
113
  def _cur(self) -> DBAPICursorWithPandas:
115
114
  return self._conn.cursor()
116
115
 
116
+ @property
117
+ def default_time_format(self) -> str:
118
+ return self.output_dialect.TIME_FORMAT.strip("'")
119
+
117
120
  def _sanitize_column_name(self, name: str) -> str:
118
121
  if self.SANITIZE_COLUMN_NAMES:
119
122
  return name.replace("(", "_").replace(")", "_")
sqlframe/base/util.py CHANGED
@@ -13,7 +13,12 @@ if t.TYPE_CHECKING:
13
13
  from pyspark.sql.dataframe import SparkSession as PySparkSession
14
14
 
15
15
  from sqlframe.base import types
16
- from sqlframe.base._typing import OptionalPrimitiveType, SchemaInput
16
+ from sqlframe.base._typing import (
17
+ ColumnOrLiteral,
18
+ OptionalPrimitiveType,
19
+ SchemaInput,
20
+ )
21
+ from sqlframe.base.column import Column
17
22
  from sqlframe.base.session import _BaseSession
18
23
  from sqlframe.base.types import StructType
19
24
 
@@ -342,3 +347,25 @@ def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
342
347
  ]
343
348
  )
344
349
  raise NotImplementedError(f"Unsupported data type: {sqlglot_dtype}")
350
+
351
+
352
+ def format_time_from_spark(value: ColumnOrLiteral) -> Column:
353
+ from sqlframe.base.column import Column
354
+ from sqlframe.base.session import _BaseSession
355
+
356
+ lit = get_func_from_session("lit")
357
+ value = lit(value) if not isinstance(value, Column) else value
358
+ formatted_time = Dialect["spark"].format_time(value.expression)
359
+ return Column(
360
+ _BaseSession()
361
+ .output_dialect.generator()
362
+ .format_time(exp.StrToTime(this=exp.Null(), format=formatted_time))
363
+ )
364
+
365
+
366
+ def spark_default_time_format() -> str:
367
+ return Dialect["spark"].TIME_FORMAT.strip("'")
368
+
369
+
370
+ def spark_default_date_format() -> str:
371
+ return Dialect["spark"].DATE_FORMAT.strip("'")
@@ -7,7 +7,11 @@ import typing as t
7
7
  from sqlglot import exp as sqlglot_expression
8
8
 
9
9
  import sqlframe.base.functions
10
- from sqlframe.base.util import get_func_from_session
10
+ from sqlframe.base.util import (
11
+ format_time_from_spark,
12
+ get_func_from_session,
13
+ spark_default_time_format,
14
+ )
11
15
  from sqlframe.bigquery.column import Column
12
16
 
13
17
  if t.TYPE_CHECKING:
@@ -68,6 +72,9 @@ from sqlframe.base.function_alternatives import ( # noqa
68
72
  array_union_using_array_concat as array_union,
69
73
  sequence_from_generate_array as sequence,
70
74
  position_as_strpos as position,
75
+ try_to_timestamp_safe as try_to_timestamp,
76
+ _is_string_using_typeof_string as _is_string,
77
+ array_append_using_array_cat as array_append,
71
78
  )
72
79
 
73
80
 
@@ -148,23 +155,15 @@ def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
148
155
 
149
156
  session: _BaseSession = _BaseSession()
150
157
  lit = get_func_from_session("lit")
151
- to_timestamp = get_func_from_session("to_timestamp")
152
158
 
153
159
  expressions = [Column.ensure_col(col).expression]
154
- if format is not None:
155
- expressions.append(lit(format).expression)
156
160
  return Column(
157
161
  sqlglot_expression.Anonymous(
158
162
  this="FORMAT_TIMESTAMP",
159
163
  expressions=[
160
- lit(session.DEFAULT_TIME_FORMAT).expression,
161
- to_timestamp(
162
- Column(
163
- sqlglot_expression.Anonymous(
164
- this="TIMESTAMP_SECONDS", expressions=expressions
165
- )
166
- ),
167
- format,
164
+ lit(session.default_time_format).expression,
165
+ Column(
166
+ sqlglot_expression.Anonymous(this="TIMESTAMP_SECONDS", expressions=expressions)
168
167
  ).expression,
169
168
  ],
170
169
  )
@@ -174,12 +173,9 @@ def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
174
173
  def unix_timestamp(
175
174
  timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None
176
175
  ) -> Column:
177
- from sqlframe.base.session import _BaseSession
178
-
179
176
  lit = get_func_from_session("lit")
180
177
 
181
- if format is None:
182
- format = _BaseSession().DEFAULT_TIME_FORMAT
178
+ format = lit(format or spark_default_time_format())
183
179
  return Column(
184
180
  sqlglot_expression.Anonymous(
185
181
  this="UNIX_SECONDS",
@@ -187,7 +183,7 @@ def unix_timestamp(
187
183
  sqlglot_expression.Anonymous(
188
184
  this="PARSE_TIMESTAMP",
189
185
  expressions=[
190
- lit(format).expression,
186
+ format_time_from_spark(format).expression,
191
187
  Column.ensure_col(timestamp).expression,
192
188
  lit("UTC").expression,
193
189
  ],
@@ -267,7 +267,6 @@ from sqlframe.base.functions import trunc as trunc
267
267
  from sqlframe.base.functions import ucase as ucase
268
268
  from sqlframe.base.functions import unbase64 as unbase64
269
269
  from sqlframe.base.functions import unhex as unhex
270
- from sqlframe.base.functions import unix_date as unix_date
271
270
  from sqlframe.base.functions import upper as upper
272
271
  from sqlframe.base.functions import user as user
273
272
  from sqlframe.base.functions import var_pop as var_pop
@@ -32,7 +32,6 @@ class BigQuerySession(
32
32
  _writer = BigQueryDataFrameWriter
33
33
  _df = BigQueryDataFrame
34
34
 
35
- DEFAULT_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
36
35
  QUALIFY_INFO_SCHEMA_WITH_DATABASE = True
37
36
  SANITIZE_COLUMN_NAMES = True
38
37
 
@@ -46,5 +46,8 @@ from sqlframe.base.function_alternatives import ( # noqa
46
46
  array_max_from_sort as array_max,
47
47
  sequence_from_generate_series as sequence,
48
48
  try_element_at_zero_based as try_element_at,
49
- to_unix_timestamp_include_default_format as to_unix_timestamp,
49
+ day_with_try_to_timestamp as day,
50
+ try_to_timestamp_strptime as try_to_timestamp,
51
+ _is_string_using_typeof_varchar as _is_string,
52
+ array_append_list_append as array_append,
50
53
  )
@@ -33,8 +33,6 @@ class DuckDBSession(
33
33
  _writer = DuckDBDataFrameWriter
34
34
  _df = DuckDBDataFrame
35
35
 
36
- DEFAULT_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
37
-
38
36
  def __init__(self, conn: t.Optional[DuckDBPyConnection] = None, *args, **kwargs):
39
37
  import duckdb
40
38
  from duckdb.typing import VARCHAR
@@ -64,4 +64,7 @@ from sqlframe.base.function_alternatives import ( # noqa
64
64
  right_cast_len as right,
65
65
  position_cast_start as position,
66
66
  try_element_at_zero_based as try_element_at,
67
+ try_to_timestamp_pgtemp as try_to_timestamp,
68
+ typeof_pg_typeof as typeof,
69
+ _is_string_using_typeof_char_varying as _is_string,
67
70
  )
@@ -34,12 +34,18 @@ class PostgresSession(
34
34
  _writer = PostgresDataFrameWriter
35
35
  _df = PostgresDataFrame
36
36
 
37
- DEFAULT_TIME_FORMAT = "yyyy-MM-dd HH:MI:SS"
38
-
39
37
  def __init__(self, conn: t.Optional[psycopg2_connection] = None):
40
38
  if not hasattr(self, "_conn"):
41
39
  super().__init__(conn)
42
40
  self._execute("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch")
41
+ self._execute("""CREATE OR REPLACE FUNCTION pg_temp.try_to_timestamp(input_text TEXT, format TEXT)
42
+ RETURNS TIMESTAMP AS $$
43
+ BEGIN
44
+ RETURN TO_TIMESTAMP(input_text, format);
45
+ EXCEPTION WHEN OTHERS THEN
46
+ RETURN NULL;
47
+ END;
48
+ $$ LANGUAGE plpgsql;""")
43
49
 
44
50
  def _fetch_rows(
45
51
  self, sql: t.Union[str, exp.Expression], *, quote_identifiers: bool = True
@@ -63,4 +63,6 @@ from sqlframe.base.function_alternatives import ( # noqa
63
63
  map_concat_using_map_cat as map_concat,
64
64
  sequence_from_array_generate_range as sequence,
65
65
  to_number_using_to_double as to_number,
66
+ typeof_from_variant as typeof,
67
+ to_date_time_format as to_date,
66
68
  )
@@ -207,7 +207,6 @@ from sqlframe.base.functions import (
207
207
  trim as trim,
208
208
  trunc as trunc,
209
209
  ucase as ucase,
210
- unix_date as unix_date,
211
210
  upper as upper,
212
211
  user as user,
213
212
  var_pop as var_pop,
@@ -57,8 +57,6 @@ class SnowflakeSession(
57
57
  _writer = SnowflakeDataFrameWriter
58
58
  _df = SnowflakeDataFrame
59
59
 
60
- DEFAULT_TIME_FORMAT = "YYYY-MM-DD HH:MI:SS"
61
-
62
60
  def __init__(self, conn: t.Optional[SnowflakeConnection] = None):
63
61
  import snowflake
64
62
 
@@ -17,4 +17,5 @@ from sqlframe.base.function_alternatives import ( # noqa
17
17
  percentile_without_disc as percentile,
18
18
  add_months_by_multiplication as add_months,
19
19
  arrays_overlap_renamed as arrays_overlap,
20
+ _is_string_using_typeof_string_lcase as _is_string,
20
21
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.12.0
3
+ Version: 1.14.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -18,7 +18,7 @@ Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
20
  Requires-Dist: prettytable (<3.11.0)
21
- Requires-Dist: sqlglot (<25.4,>=24.0.0)
21
+ Requires-Dist: sqlglot (<25.5,>=24.0.0)
22
22
  Requires-Dist: typing-extensions (<5,>=4.8)
23
23
  Provides-Extra: bigquery
24
24
  Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
@@ -35,7 +35,7 @@ Requires-Dist: pyspark (<3.6,>=2) ; extra == 'dev'
35
35
  Requires-Dist: pytest-postgresql (<7,>=6) ; extra == 'dev'
36
36
  Requires-Dist: pytest-xdist (<3.7,>=3.6) ; extra == 'dev'
37
37
  Requires-Dist: pytest (<8.3,>=8.2.0) ; extra == 'dev'
38
- Requires-Dist: ruff (<0.5,>=0.4.4) ; extra == 'dev'
38
+ Requires-Dist: ruff (<0.6,>=0.4.4) ; extra == 'dev'
39
39
  Requires-Dist: types-psycopg2 (<3,>=2.9) ; extra == 'dev'
40
40
  Requires-Dist: pre-commit (>=3.5) ; (python_version == "3.8") and extra == 'dev'
41
41
  Requires-Dist: pre-commit (<3.8,>=3.7) ; (python_version >= "3.9") and extra == 'dev'
@@ -1,5 +1,5 @@
1
1
  sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- sqlframe/_version.py,sha256=cgR9Mx-45EIoPNcTiuL_LLmCR2oVTYIPi5z0W11owvc,413
2
+ sqlframe/_version.py,sha256=P7Qh1JtG4Is-HJ-aT0XbCm84DoRrTTlY2cMmIjRTLGQ,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
5
5
  sqlframe/base/catalog.py,sha256=ATDGirouUjal05P4ymL-wIi8rgjg_8w4PoACamiO64A,37245
@@ -7,16 +7,16 @@ sqlframe/base/column.py,sha256=y41rFV7y_seTNkAK3SSqnggGi2otXt0ejKzsMyHCYT4,17515
7
7
  sqlframe/base/dataframe.py,sha256=75ZM9r52fufFmVShtntcDUr6dZ1stX9HDmXLuDrYTAU,71004
8
8
  sqlframe/base/decorators.py,sha256=I5osMgx9BuCgbtp4jVM2DNwYJVLzCv-OtTedhQEik0g,1882
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
- sqlframe/base/function_alternatives.py,sha256=l6Fu0mZ-eleObpYcCAnOXV1HvuHugeoCFUcSV7NmFis,45916
11
- sqlframe/base/functions.py,sha256=hSLuyO03m2dXPJdmVKp9of-_xj4V2sUhSzJ65Ti240Q,187616
10
+ sqlframe/base/function_alternatives.py,sha256=B4UkHuUD1COc4xRm20pNtDloyMU6mhhesEZ3yWwaBJE,49702
11
+ sqlframe/base/functions.py,sha256=L_I028dDt2th9DeKIheidMLY8jjFICvze4Gw7F62NUk,188446
12
12
  sqlframe/base/group.py,sha256=TES9CleVmH3x-0X-tqmuUKfCKSWjH5vg1aU3R6dDmFc,4059
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3456
15
15
  sqlframe/base/readerwriter.py,sha256=5NPQMiOrw6I54U243R_6-ynnWYsNksgqwRpPp4IFjIw,25288
16
- sqlframe/base/session.py,sha256=2C0OsPoP49AuqVNtPiazTdVpwQA1668g5WOydrYP6SA,22001
16
+ sqlframe/base/session.py,sha256=gg0OX6MK6sV4t91aS7GtyYGXhefXnm33433XDw5GpLY,22068
17
17
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
18
18
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
19
- sqlframe/base/util.py,sha256=tWccrZne-Acn4N2RxYr87mfI_GDMf_K9hRD7BnhGBq0,11756
19
+ sqlframe/base/util.py,sha256=l6zu-3SzE2e0-iDMH0GD55gsxYx362tVc0QA6eLPvYk,12530
20
20
  sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
21
21
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  sqlframe/base/mixins/catalog_mixins.py,sha256=NhuPGxIqPjyuC_V_NALN1sn9v9h0-xwFOlJyJgsvyek,14212
@@ -26,33 +26,33 @@ sqlframe/bigquery/__init__.py,sha256=i2NsMbiXOj2xphCtPuNk6cVw4iYeq5_B1I9dVI9aGAk
26
26
  sqlframe/bigquery/catalog.py,sha256=h3aQAQAJg6MMvFpP8Ku0S4pcx30n5qYrqHhWSomxb6A,9319
27
27
  sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
28
28
  sqlframe/bigquery/dataframe.py,sha256=Y2uy4FEYw0KxIHgnaA9uMwdIzxJzTlD_NSzIe7P7kxA,2405
29
- sqlframe/bigquery/functions.py,sha256=ifJxEyHDwSp2iA-yBt7XHLh9GhVPpgzs5YckMFvag8w,11370
30
- sqlframe/bigquery/functions.pyi,sha256=JiyLFLiO0jyJec6j1o4ujPVQ7Tma-c9YHlm-3UQYD9M,13642
29
+ sqlframe/bigquery/functions.py,sha256=Suy4KX75pYIpYrXgN1Af0NLf73ahuS7PmhNY2FkQhnk,11255
30
+ sqlframe/bigquery/functions.pyi,sha256=BCYqHpZzv4KWVtTuiC9wCSzXdxeHsz9gwkEvKzwHnoY,13583
31
31
  sqlframe/bigquery/group.py,sha256=UVBNBRTo8OqS-_cS5YwvTeJYgYxeG-d6R3kfyHmlFqw,391
32
32
  sqlframe/bigquery/readwriter.py,sha256=WAD3ZMwkkjOpvPPoZXfaLLNM6tRTeUvdEj-hQZAzXeo,870
33
- sqlframe/bigquery/session.py,sha256=1-hE1Wr2b6SqfD4M_-OGMqjaSbhD6wSQd74v71xHZv8,2709
33
+ sqlframe/bigquery/session.py,sha256=rOFAtCGWXjtM5Vc5_XiDlJv7H8K_Ga9h2M97if3F0mM,2663
34
34
  sqlframe/bigquery/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
35
35
  sqlframe/bigquery/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
36
36
  sqlframe/duckdb/__init__.py,sha256=t85TA3ufZtL1weQNFmEs8itCSwbJFtw03-p0GT4XGf8,669
37
37
  sqlframe/duckdb/catalog.py,sha256=rt3XuP3m4DbhuibOFyvx_95F2zZa6uDwCI_TmcvKy1A,3895
38
38
  sqlframe/duckdb/column.py,sha256=wkEPcp3xVsH5nC3kpacXqNkRv9htPtBgt-0uFRxIRNs,56
39
39
  sqlframe/duckdb/dataframe.py,sha256=WmBrrmrfxDpYuikSMFqacgV2Jawkx4sSYE-_mwnL4Jw,1225
40
- sqlframe/duckdb/functions.py,sha256=pz40eqR4U_s42p7UeaefJD5yU1vo6mqNoVz0iKN5eRk,1671
40
+ sqlframe/duckdb/functions.py,sha256=RXwfP3Ls9OYpNNHT5f0yjKxxaDqrqSAj2qCD1esMW-U,1790
41
41
  sqlframe/duckdb/functions.pyi,sha256=nU-6a2cfLDkuMCdYrNRLfa6-i8Aa0CxQQ1nLT6roIdI,5813
42
42
  sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
43
43
  sqlframe/duckdb/readwriter.py,sha256=6xiyE3JKzY9ieKqvbAOBlifiHE6NpYISHul3Idlmoa0,4542
44
- sqlframe/duckdb/session.py,sha256=j75iIsmaxl5x7oqyhN_VolvEclKj7QmaFfIis-SmoKM,2147
44
+ sqlframe/duckdb/session.py,sha256=pk1b-eR2RPr7SJYftnHKfZj5EXuN-D5xanUHLoWNMYU,2100
45
45
  sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
46
46
  sqlframe/duckdb/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
47
47
  sqlframe/postgres/__init__.py,sha256=Sz_MtgV_oh_QhfZTC7iKM07ICUmNcJEDV0kEkSW9ZKU,712
48
48
  sqlframe/postgres/catalog.py,sha256=uGMKo4RXOU6fA4IjcfebukEI18QswVk3cnB_G7S6_Fw,8130
49
49
  sqlframe/postgres/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
50
50
  sqlframe/postgres/dataframe.py,sha256=f-w6UHxZtmeZ5oMbaqJaZ8FrYeOhzyveNlZOK57ke0k,1289
51
- sqlframe/postgres/functions.py,sha256=b9ccP5vY8EDZXkJbhE_LjAlH50_6wcUF9VbzPrariec,2374
51
+ sqlframe/postgres/functions.py,sha256=HV9wLwrFtlwRJlTW4ipEx4XerFLREeGdKnBbqYe70WE,2511
52
52
  sqlframe/postgres/functions.pyi,sha256=um-qE2g9iPs0-53vJ46lArbfvDqAbFIwrxLJgcrPM_8,5536
53
53
  sqlframe/postgres/group.py,sha256=KUXeSFKWTSH9yCRJAhW85OvjZaG6Zr4In9LR_ie3yGU,391
54
54
  sqlframe/postgres/readwriter.py,sha256=L1e3yKXzFVNR_W5s1DHaWol7G8x7l4jcZ5sLGualyMk,870
55
- sqlframe/postgres/session.py,sha256=oKh8-j9MN6msVheQNCYoGmej9ktFLTTHmlMP58uZ3nw,1936
55
+ sqlframe/postgres/session.py,sha256=YuWvzuPhhCt9bSN_ZCUp9WYWx4XW7_PGbMdTGRuqGzE,2147
56
56
  sqlframe/postgres/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
57
57
  sqlframe/postgres/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
58
58
  sqlframe/redshift/__init__.py,sha256=jamKYQtQaKjjXnQ01QGPHvatbrZSw9sWno_VOUGSz6I,712
@@ -69,18 +69,18 @@ sqlframe/snowflake/__init__.py,sha256=nuQ3cuHjDpW4ELZfbd2qOYmtXmcYl7MtsrdOrRdozo
69
69
  sqlframe/snowflake/catalog.py,sha256=uDjBgDdCyxaDkGNX_8tb-lol7MwwazcClUBAZsOSj70,5014
70
70
  sqlframe/snowflake/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
71
71
  sqlframe/snowflake/dataframe.py,sha256=jUyQNCe3K6SH4PtmrR67YN0SLqkHakMxLiB261fDgkc,1862
72
- sqlframe/snowflake/functions.py,sha256=cIO56ZsOpjg6ICLjTh-osG1h1UjjEtM39_ieMiWkmyI,2466
73
- sqlframe/snowflake/functions.pyi,sha256=MkNif_sIceHMNhl-qvLir2DJ1jPqwyaahltdpgY4Jq0,6213
72
+ sqlframe/snowflake/functions.py,sha256=lyKIiChgfr97EaxI4tAapaFSToUgT2RqyqfiHl91ZNo,2537
73
+ sqlframe/snowflake/functions.pyi,sha256=wqUPXuZxjRY0rPY2BRAb7XXkWYP1DyuDzvlriRySdSw,6185
74
74
  sqlframe/snowflake/group.py,sha256=pPP1l2RRo_LgkXrji8a87n2PKo-63ZRPT-WUtvVcBME,395
75
75
  sqlframe/snowflake/readwriter.py,sha256=yhRc2HcMq6PwV3ghZWC-q-qaE7LE4aEjZEXCip4OOlQ,884
76
- sqlframe/snowflake/session.py,sha256=bDOlnuIiQ9j_zfF7F5H1gTLmpHUjruIxr2CfXcS_7YU,3284
76
+ sqlframe/snowflake/session.py,sha256=_EBXr6-GQAksAYqAE-Dv3HJMnrAEGINH0IzCDoIswJE,3235
77
77
  sqlframe/snowflake/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
78
78
  sqlframe/snowflake/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
79
79
  sqlframe/spark/__init__.py,sha256=WhYQAZMJN1EMNAVGUH7BEinxNdYtXOrrr-6HUniJOyI,649
80
80
  sqlframe/spark/catalog.py,sha256=rIX5DtPnINbcPZRUe4Z1bOpkJoNRlrO9qWkUeTQClNc,32612
81
81
  sqlframe/spark/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
82
82
  sqlframe/spark/dataframe.py,sha256=_TD-h7oz0-i80r90v17UoLDoIzcGNchU2SL13ujOOic,1779
83
- sqlframe/spark/functions.py,sha256=PkK4MBpVADhnDbrgFDii5zFaNrhi4y-OYX3Lcu-SW0k,530
83
+ sqlframe/spark/functions.py,sha256=AQfqfvaojJzCuo9DyDklz0JYZPhn_3dzWvztsklBO0o,586
84
84
  sqlframe/spark/functions.pyi,sha256=bjz6s8E6OB0c4KfTTsls7rhb_R9mIYvkaeaXefMziqM,11617
85
85
  sqlframe/spark/group.py,sha256=MrvV_v-YkBc6T1zz882WrEqtWjlooWIyHBCmTQg3fCA,379
86
86
  sqlframe/spark/readwriter.py,sha256=w68EImTcGJv64X7pc1tk5tDjDxb1nAnn-MiIaaN9Dc8,812
@@ -99,8 +99,8 @@ sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,
99
99
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
100
100
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
101
101
  sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
102
- sqlframe-1.12.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
103
- sqlframe-1.12.0.dist-info/METADATA,sha256=pSBFDDNjWo6RscllTU6EquCE1DG0C8FXmuad1RINxgk,7497
104
- sqlframe-1.12.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
105
- sqlframe-1.12.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
106
- sqlframe-1.12.0.dist-info/RECORD,,
102
+ sqlframe-1.14.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
103
+ sqlframe-1.14.0.dist-info/METADATA,sha256=Bm9d-eqk6pN20TX6hzR2xeppjjWuelyqOf87i8e4eRQ,7497
104
+ sqlframe-1.14.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
105
+ sqlframe-1.14.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
106
+ sqlframe-1.14.0.dist-info/RECORD,,