sqlframe 3.10.1__py3-none-any.whl → 3.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. sqlframe/_version.py +2 -2
  2. sqlframe/base/column.py +11 -8
  3. sqlframe/base/dataframe.py +5 -3
  4. sqlframe/base/decorators.py +41 -2
  5. sqlframe/base/function_alternatives.py +445 -404
  6. sqlframe/base/functions.py +1100 -73
  7. sqlframe/base/mixins/dataframe_mixins.py +3 -3
  8. sqlframe/base/operations.py +5 -5
  9. sqlframe/base/session.py +36 -4
  10. sqlframe/bigquery/dataframe.py +2 -2
  11. sqlframe/bigquery/functions.py +1 -361
  12. sqlframe/bigquery/functions.pyi +63 -156
  13. sqlframe/bigquery/session.py +4 -0
  14. sqlframe/databricks/dataframe.py +2 -2
  15. sqlframe/databricks/functions.py +0 -10
  16. sqlframe/databricks/functions.pyi +405 -413
  17. sqlframe/databricks/session.py +4 -0
  18. sqlframe/duckdb/dataframe.py +2 -2
  19. sqlframe/duckdb/functions.py +0 -40
  20. sqlframe/duckdb/functions.pyi +219 -216
  21. sqlframe/duckdb/session.py +4 -0
  22. sqlframe/postgres/dataframe.py +2 -2
  23. sqlframe/postgres/functions.py +1 -60
  24. sqlframe/postgres/functions.pyi +197 -196
  25. sqlframe/postgres/session.py +4 -0
  26. sqlframe/redshift/dataframe.py +2 -2
  27. sqlframe/redshift/functions.py +1 -4
  28. sqlframe/redshift/session.py +4 -0
  29. sqlframe/snowflake/dataframe.py +2 -2
  30. sqlframe/snowflake/functions.py +1 -55
  31. sqlframe/snowflake/functions.pyi +224 -220
  32. sqlframe/snowflake/session.py +4 -0
  33. sqlframe/spark/dataframe.py +2 -2
  34. sqlframe/spark/functions.py +0 -9
  35. sqlframe/spark/functions.pyi +411 -413
  36. sqlframe/spark/session.py +4 -0
  37. sqlframe/standalone/dataframe.py +2 -2
  38. sqlframe/standalone/functions.py +1 -1
  39. sqlframe/standalone/session.py +4 -0
  40. sqlframe/testing/utils.py +3 -3
  41. {sqlframe-3.10.1.dist-info → sqlframe-3.12.0.dist-info}/METADATA +2 -2
  42. {sqlframe-3.10.1.dist-info → sqlframe-3.12.0.dist-info}/RECORD +45 -45
  43. {sqlframe-3.10.1.dist-info → sqlframe-3.12.0.dist-info}/LICENSE +0 -0
  44. {sqlframe-3.10.1.dist-info → sqlframe-3.12.0.dist-info}/WHEEL +0 -0
  45. {sqlframe-3.10.1.dist-info → sqlframe-3.12.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.10.1'
16
- __version_tuple__ = version_tuple = (3, 10, 1)
15
+ __version__ = version = '3.12.0'
16
+ __version_tuple__ = version_tuple = (3, 12, 0)
sqlframe/base/column.py CHANGED
@@ -9,6 +9,7 @@ import typing as t
9
9
  import sqlglot
10
10
  from sqlglot import Dialect
11
11
  from sqlglot import expressions as exp
12
+ from sqlglot.expressions import paren
12
13
  from sqlglot.helper import flatten, is_iterable
13
14
  from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
14
15
 
@@ -63,10 +64,10 @@ class Column:
63
64
  return self.binary_op(exp.LTE, other)
64
65
 
65
66
  def __and__(self, other: ColumnOrLiteral) -> Column:
66
- return self.binary_op(exp.And, other)
67
+ return self.binary_op(exp.And, other, paren=True)
67
68
 
68
69
  def __or__(self, other: ColumnOrLiteral) -> Column:
69
- return self.binary_op(exp.Or, other)
70
+ return self.binary_op(exp.Or, other, paren=True)
70
71
 
71
72
  def __mod__(self, other: ColumnOrLiteral) -> Column:
72
73
  return self.binary_op(exp.Mod, other, paren=True)
@@ -181,7 +182,7 @@ class Column:
181
182
  ) -> Column:
182
183
  columns = [] if column is None else [cls.ensure_col(column)]
183
184
  column_args = [cls.ensure_col(arg) for arg in args]
184
- expressions = [x.expression for x in columns + column_args]
185
+ expressions = [x.column_expression for x in columns + column_args]
185
186
  new_expression = exp.Anonymous(this=func_name.upper(), expressions=expressions)
186
187
  return Column(new_expression)
187
188
 
@@ -192,9 +193,9 @@ class Column:
192
193
  ensured_column = None if column is None else cls.ensure_col(column)
193
194
  ensure_expression_values = {
194
195
  k: (
195
- [cls.ensure_col(x).expression for x in v]
196
+ [cls.ensure_col(x).column_expression for x in v]
196
197
  if is_iterable(v)
197
- else cls.ensure_col(v).expression
198
+ else cls.ensure_col(v).column_expression
198
199
  )
199
200
  for k, v in kwargs.items()
200
201
  if v is not None
@@ -316,10 +317,12 @@ class Column:
316
317
  from sqlframe.base.functions import when
317
318
 
318
319
  column_with_if = when(condition, value)
319
- if not isinstance(self.expression, exp.Case):
320
+ if not isinstance(self.column_expression, exp.Case):
320
321
  return column_with_if
321
322
  new_column = self.copy()
322
- new_column.expression.args["ifs"].extend(column_with_if.expression.args["ifs"])
323
+ new_column.column_expression.args["ifs"].extend(
324
+ column_with_if.column_expression.args["ifs"]
325
+ )
323
326
  return new_column
324
327
 
325
328
  def otherwise(self, value: t.Any) -> Column:
@@ -327,7 +330,7 @@ class Column:
327
330
 
328
331
  true_value = value if isinstance(value, Column) else lit(value)
329
332
  new_column = self.copy()
330
- new_column.expression.set("default", true_value.column_expression)
333
+ new_column.column_expression.set("default", true_value.column_expression)
331
334
  return new_column
332
335
 
333
336
  def isNull(self) -> Column:
@@ -80,7 +80,7 @@ JOIN_HINTS = {
80
80
  }
81
81
 
82
82
 
83
- DF = t.TypeVar("DF", bound="_BaseDataFrame")
83
+ DF = t.TypeVar("DF", bound="BaseDataFrame")
84
84
 
85
85
 
86
86
  class OpenAIMode(enum.Enum):
@@ -198,7 +198,7 @@ class _BaseDataFrameStatFunctions(t.Generic[DF]):
198
198
  STAT = t.TypeVar("STAT", bound=_BaseDataFrameStatFunctions)
199
199
 
200
200
 
201
- class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
201
+ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
202
202
  _na: t.Type[NA]
203
203
  _stat: t.Type[STAT]
204
204
  _group_data: t.Type[GROUP_DATA]
@@ -1624,7 +1624,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1624
1624
  raise NotImplementedError("Vertical show is not yet supported")
1625
1625
  if truncate:
1626
1626
  logger.warning("Truncate is ignored so full results will be displayed")
1627
- result = self.limit(n).collect()
1627
+ # Make sure that the limit we add doesn't affect the results
1628
+ df = self._convert_leaf_to_cte()
1629
+ result = df.limit(n).collect()
1628
1630
  table = PrettyTable()
1629
1631
  if row := seq_get(result, 0):
1630
1632
  table.field_names = row._unique_field_names
@@ -1,15 +1,54 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import re
3
4
  import typing as t
4
5
 
6
+ from sqlglot import exp
5
7
  from sqlglot.helper import ensure_list
6
8
 
9
+ from sqlframe.base.column import Column
10
+
7
11
  CALLING_CLASS = t.TypeVar("CALLING_CLASS")
8
12
 
9
13
 
10
14
  def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None) -> t.Callable:
11
15
  def _metadata(func: t.Callable) -> t.Callable:
12
- func.unsupported_engines = ensure_list(unsupported_engines) if unsupported_engines else [] # type: ignore
13
- return func
16
+ def wrapper(*args, **kwargs):
17
+ funcs_to_not_auto_alias = [
18
+ "posexplode",
19
+ "explode_outer",
20
+ "json_tuple",
21
+ "posexplode_outer",
22
+ "stack",
23
+ "inline",
24
+ "inline_outer",
25
+ "window",
26
+ "session_window",
27
+ "window_time",
28
+ ]
29
+
30
+ result = func(*args, **kwargs)
31
+ if (
32
+ isinstance(result, Column)
33
+ and isinstance(result.column_expression, exp.Func)
34
+ and not isinstance(result.expression, exp.Alias)
35
+ and func.__name__ not in funcs_to_not_auto_alias
36
+ ):
37
+ col_name = result.column_expression.find(exp.Identifier)
38
+ if col_name:
39
+ col_name = col_name.name
40
+ else:
41
+ col_name = result.column_expression.find(exp.Literal)
42
+ if col_name:
43
+ col_name = col_name.this
44
+ alias_name = f"{func.__name__}__{col_name or ''}__"
45
+ # BigQuery has restrictions on alias names so we constrain it to alphanumeric characters and underscores
46
+ return result.alias(re.sub("\W", "_", alias_name)) # type: ignore
47
+ return result
48
+
49
+ wrapper.unsupported_engines = ( # type: ignore
50
+ ensure_list(unsupported_engines) if unsupported_engines else []
51
+ )
52
+ return wrapper
14
53
 
15
54
  return _metadata