sqlframe 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. sqlframe/__init__.py +0 -0
  2. sqlframe/_version.py +16 -0
  3. sqlframe/base/__init__.py +0 -0
  4. sqlframe/base/_typing.py +39 -0
  5. sqlframe/base/catalog.py +1163 -0
  6. sqlframe/base/column.py +388 -0
  7. sqlframe/base/dataframe.py +1519 -0
  8. sqlframe/base/decorators.py +51 -0
  9. sqlframe/base/exceptions.py +14 -0
  10. sqlframe/base/function_alternatives.py +1055 -0
  11. sqlframe/base/functions.py +1678 -0
  12. sqlframe/base/group.py +102 -0
  13. sqlframe/base/mixins/__init__.py +0 -0
  14. sqlframe/base/mixins/catalog_mixins.py +419 -0
  15. sqlframe/base/mixins/readwriter_mixins.py +118 -0
  16. sqlframe/base/normalize.py +84 -0
  17. sqlframe/base/operations.py +87 -0
  18. sqlframe/base/readerwriter.py +679 -0
  19. sqlframe/base/session.py +585 -0
  20. sqlframe/base/transforms.py +13 -0
  21. sqlframe/base/types.py +418 -0
  22. sqlframe/base/util.py +242 -0
  23. sqlframe/base/window.py +139 -0
  24. sqlframe/bigquery/__init__.py +23 -0
  25. sqlframe/bigquery/catalog.py +255 -0
  26. sqlframe/bigquery/column.py +1 -0
  27. sqlframe/bigquery/dataframe.py +54 -0
  28. sqlframe/bigquery/functions.py +378 -0
  29. sqlframe/bigquery/group.py +14 -0
  30. sqlframe/bigquery/readwriter.py +29 -0
  31. sqlframe/bigquery/session.py +89 -0
  32. sqlframe/bigquery/types.py +1 -0
  33. sqlframe/bigquery/window.py +1 -0
  34. sqlframe/duckdb/__init__.py +20 -0
  35. sqlframe/duckdb/catalog.py +108 -0
  36. sqlframe/duckdb/column.py +1 -0
  37. sqlframe/duckdb/dataframe.py +55 -0
  38. sqlframe/duckdb/functions.py +47 -0
  39. sqlframe/duckdb/group.py +14 -0
  40. sqlframe/duckdb/readwriter.py +111 -0
  41. sqlframe/duckdb/session.py +65 -0
  42. sqlframe/duckdb/types.py +1 -0
  43. sqlframe/duckdb/window.py +1 -0
  44. sqlframe/postgres/__init__.py +23 -0
  45. sqlframe/postgres/catalog.py +106 -0
  46. sqlframe/postgres/column.py +1 -0
  47. sqlframe/postgres/dataframe.py +54 -0
  48. sqlframe/postgres/functions.py +61 -0
  49. sqlframe/postgres/group.py +14 -0
  50. sqlframe/postgres/readwriter.py +29 -0
  51. sqlframe/postgres/session.py +68 -0
  52. sqlframe/postgres/types.py +1 -0
  53. sqlframe/postgres/window.py +1 -0
  54. sqlframe/redshift/__init__.py +23 -0
  55. sqlframe/redshift/catalog.py +127 -0
  56. sqlframe/redshift/column.py +1 -0
  57. sqlframe/redshift/dataframe.py +54 -0
  58. sqlframe/redshift/functions.py +18 -0
  59. sqlframe/redshift/group.py +14 -0
  60. sqlframe/redshift/readwriter.py +29 -0
  61. sqlframe/redshift/session.py +53 -0
  62. sqlframe/redshift/types.py +1 -0
  63. sqlframe/redshift/window.py +1 -0
  64. sqlframe/snowflake/__init__.py +26 -0
  65. sqlframe/snowflake/catalog.py +134 -0
  66. sqlframe/snowflake/column.py +1 -0
  67. sqlframe/snowflake/dataframe.py +54 -0
  68. sqlframe/snowflake/functions.py +18 -0
  69. sqlframe/snowflake/group.py +14 -0
  70. sqlframe/snowflake/readwriter.py +29 -0
  71. sqlframe/snowflake/session.py +53 -0
  72. sqlframe/snowflake/types.py +1 -0
  73. sqlframe/snowflake/window.py +1 -0
  74. sqlframe/spark/__init__.py +23 -0
  75. sqlframe/spark/catalog.py +1028 -0
  76. sqlframe/spark/column.py +1 -0
  77. sqlframe/spark/dataframe.py +54 -0
  78. sqlframe/spark/functions.py +22 -0
  79. sqlframe/spark/group.py +14 -0
  80. sqlframe/spark/readwriter.py +29 -0
  81. sqlframe/spark/session.py +90 -0
  82. sqlframe/spark/types.py +1 -0
  83. sqlframe/spark/window.py +1 -0
  84. sqlframe/standalone/__init__.py +26 -0
  85. sqlframe/standalone/catalog.py +13 -0
  86. sqlframe/standalone/column.py +1 -0
  87. sqlframe/standalone/dataframe.py +36 -0
  88. sqlframe/standalone/functions.py +1 -0
  89. sqlframe/standalone/group.py +14 -0
  90. sqlframe/standalone/readwriter.py +19 -0
  91. sqlframe/standalone/session.py +40 -0
  92. sqlframe/standalone/types.py +1 -0
  93. sqlframe/standalone/window.py +1 -0
  94. sqlframe-1.1.3.dist-info/LICENSE +21 -0
  95. sqlframe-1.1.3.dist-info/METADATA +172 -0
  96. sqlframe-1.1.3.dist-info/RECORD +98 -0
  97. sqlframe-1.1.3.dist-info/WHEEL +5 -0
  98. sqlframe-1.1.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,84 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+
3
+ from __future__ import annotations
4
+
5
+ import typing as t
6
+
7
+ from sqlglot import expressions as exp
8
+ from sqlglot.helper import ensure_list
9
+
10
+ from sqlframe.base.column import Column
11
+ from sqlframe.base.util import get_tables_from_expression_with_join
12
+
13
+ if t.TYPE_CHECKING:
14
+ from sqlframe.base.dataframe import SESSION
15
+
16
+ NORMALIZE_INPUT = t.TypeVar("NORMALIZE_INPUT", bound=t.Union[str, exp.Expression, Column])
17
+
18
+
19
+ def normalize(session: SESSION, expression_context: exp.Select, expr: t.List[NORMALIZE_INPUT]):
20
+ expr = ensure_list(expr)
21
+ expressions = _ensure_expressions(expr)
22
+ for expression in expressions:
23
+ identifiers = expression.find_all(exp.Identifier)
24
+ for identifier in identifiers:
25
+ identifier.transform(session.input_dialect.normalize_identifier)
26
+ replace_alias_name_with_cte_name(session, expression_context, identifier)
27
+ replace_branch_and_sequence_ids_with_cte_name(session, expression_context, identifier)
28
+
29
+
30
+ def replace_alias_name_with_cte_name(
31
+ session: SESSION, expression_context: exp.Select, id: exp.Identifier
32
+ ):
33
+ normalized_id = session._normalize_string(id.alias_or_name)
34
+ if normalized_id in session.name_to_sequence_id_mapping:
35
+ for cte in reversed(expression_context.ctes):
36
+ if cte.args["sequence_id"] in session.name_to_sequence_id_mapping[normalized_id]:
37
+ _set_alias_name(id, cte.alias_or_name)
38
+ break
39
+
40
+
41
+ def replace_branch_and_sequence_ids_with_cte_name(
42
+ session: SESSION, expression_context: exp.Select, id: exp.Identifier
43
+ ):
44
+ normalized_id = session._normalize_string(id.alias_or_name)
45
+ if normalized_id in session.known_ids:
46
+ # Check if we have a join and if both the tables in that join share a common branch id
47
+ # If so we need to have this reference the left table by default unless the id is a sequence
48
+ # id then it keeps that reference. This handles the weird edge case in spark that shouldn't
49
+ # be common in practice
50
+ if expression_context.args.get("joins") and normalized_id in session.known_branch_ids:
51
+ join_table_aliases = [
52
+ x.alias_or_name for x in get_tables_from_expression_with_join(expression_context)
53
+ ]
54
+ ctes_in_join = [
55
+ cte for cte in expression_context.ctes if cte.alias_or_name in join_table_aliases
56
+ ]
57
+ if ctes_in_join[0].args["branch_id"] == ctes_in_join[1].args["branch_id"]:
58
+ assert len(ctes_in_join) == 2
59
+ _set_alias_name(id, ctes_in_join[0].alias_or_name)
60
+ return
61
+
62
+ for cte in reversed(expression_context.ctes):
63
+ if normalized_id in (cte.args["branch_id"], cte.args["sequence_id"]):
64
+ _set_alias_name(id, cte.alias_or_name)
65
+ return
66
+
67
+
68
+ def _set_alias_name(id: exp.Identifier, name: str):
69
+ id.set("this", name)
70
+ id.set("quoted", False)
71
+
72
+
73
+ def _ensure_expressions(values: t.List[NORMALIZE_INPUT]) -> t.List[exp.Expression]:
74
+ results = []
75
+ for value in values:
76
+ if isinstance(value, str):
77
+ results.append(Column.ensure_col(value).expression)
78
+ elif isinstance(value, Column):
79
+ results.append(value.expression)
80
+ elif isinstance(value, exp.Expression):
81
+ results.append(value)
82
+ else:
83
+ raise ValueError(f"Got an invalid type to normalize: {type(value)}")
84
+ return results
@@ -0,0 +1,87 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ import typing as t
7
+ from enum import IntEnum
8
+
9
+ if t.TYPE_CHECKING:
10
+ from sqlframe.base.dataframe import _BaseDataFrame
11
+ from sqlframe.base.group import _BaseGroupedData
12
+
13
+
14
+ class Operation(IntEnum):
15
+ INIT = -1
16
+ NO_OP = 0
17
+ FROM = 1
18
+ WHERE = 2
19
+ GROUP_BY = 3
20
+ HAVING = 4
21
+ SELECT = 5
22
+ ORDER_BY = 6
23
+ LIMIT = 7
24
+
25
+
26
+ def operation(op: Operation) -> t.Callable[[t.Callable], t.Callable]:
27
+ """
28
+ Decorator used around DataFrame methods to indicate what type of operation is being performed from the
29
+ ordered Operation enums. This is used to determine which operations should be performed on a CTE vs.
30
+ included with the previous operation.
31
+
32
+ Ex: After a user does a join we want to allow them to select which columns for the different
33
+ tables that they want to carry through to the following operation. If we put that join in
34
+ a CTE preemptively then the user would not have a chance to select which column they want
35
+ in cases where there is overlap in names.
36
+ """
37
+
38
+ def decorator(func: t.Callable) -> t.Callable:
39
+ @functools.wraps(func)
40
+ def wrapper(self: _BaseDataFrame, *args, **kwargs) -> _BaseDataFrame:
41
+ if self.last_op == Operation.INIT:
42
+ self = self._convert_leaf_to_cte()
43
+ self.last_op = Operation.NO_OP
44
+ last_op = self.last_op
45
+ new_op = op if op != Operation.NO_OP else last_op
46
+ if new_op < last_op or (last_op == new_op == Operation.SELECT):
47
+ self = self._convert_leaf_to_cte()
48
+ df: t.Union[_BaseDataFrame, _BaseGroupedData] = func(self, *args, **kwargs)
49
+ df.last_op = new_op # type: ignore
50
+ return df # type: ignore
51
+
52
+ wrapper.__wrapped__ = func # type: ignore
53
+ return wrapper
54
+
55
+ return decorator
56
+
57
+
58
+ def group_operation(op: Operation) -> t.Callable[[t.Callable], t.Callable]:
59
+ """
60
+ Decorator used around DataFrame methods to indicate what type of operation is being performed from the
61
+ ordered Operation enums. This is used to determine which operations should be performed on a CTE vs.
62
+ included with the previous operation.
63
+
64
+ Ex: After a user does a join we want to allow them to select which columns for the different
65
+ tables that they want to carry through to the following operation. If we put that join in
66
+ a CTE preemptively then the user would not have a chance to select which column they want
67
+ in cases where there is overlap in names.
68
+ """
69
+
70
+ def decorator(func: t.Callable) -> t.Callable:
71
+ @functools.wraps(func)
72
+ def wrapper(self: _BaseGroupedData, *args, **kwargs) -> _BaseDataFrame:
73
+ if self._df.last_op == Operation.INIT:
74
+ self._df = self._df._convert_leaf_to_cte()
75
+ self._df.last_op = Operation.NO_OP
76
+ last_op = self._df.last_op
77
+ new_op = op if op != Operation.NO_OP else last_op
78
+ if new_op < last_op or (last_op == new_op == Operation.SELECT):
79
+ self._df = self._df._convert_leaf_to_cte()
80
+ df: _BaseDataFrame = func(self, *args, **kwargs)
81
+ df.last_op = new_op # type: ignore
82
+ return df
83
+
84
+ wrapper.__wrapped__ = func # type: ignore
85
+ return wrapper
86
+
87
+ return decorator