sqlframe 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. sqlframe/__init__.py +0 -0
  2. sqlframe/_version.py +16 -0
  3. sqlframe/base/__init__.py +0 -0
  4. sqlframe/base/_typing.py +39 -0
  5. sqlframe/base/catalog.py +1163 -0
  6. sqlframe/base/column.py +388 -0
  7. sqlframe/base/dataframe.py +1519 -0
  8. sqlframe/base/decorators.py +51 -0
  9. sqlframe/base/exceptions.py +14 -0
  10. sqlframe/base/function_alternatives.py +1055 -0
  11. sqlframe/base/functions.py +1678 -0
  12. sqlframe/base/group.py +102 -0
  13. sqlframe/base/mixins/__init__.py +0 -0
  14. sqlframe/base/mixins/catalog_mixins.py +419 -0
  15. sqlframe/base/mixins/readwriter_mixins.py +118 -0
  16. sqlframe/base/normalize.py +84 -0
  17. sqlframe/base/operations.py +87 -0
  18. sqlframe/base/readerwriter.py +679 -0
  19. sqlframe/base/session.py +585 -0
  20. sqlframe/base/transforms.py +13 -0
  21. sqlframe/base/types.py +418 -0
  22. sqlframe/base/util.py +242 -0
  23. sqlframe/base/window.py +139 -0
  24. sqlframe/bigquery/__init__.py +23 -0
  25. sqlframe/bigquery/catalog.py +255 -0
  26. sqlframe/bigquery/column.py +1 -0
  27. sqlframe/bigquery/dataframe.py +54 -0
  28. sqlframe/bigquery/functions.py +378 -0
  29. sqlframe/bigquery/group.py +14 -0
  30. sqlframe/bigquery/readwriter.py +29 -0
  31. sqlframe/bigquery/session.py +89 -0
  32. sqlframe/bigquery/types.py +1 -0
  33. sqlframe/bigquery/window.py +1 -0
  34. sqlframe/duckdb/__init__.py +20 -0
  35. sqlframe/duckdb/catalog.py +108 -0
  36. sqlframe/duckdb/column.py +1 -0
  37. sqlframe/duckdb/dataframe.py +55 -0
  38. sqlframe/duckdb/functions.py +47 -0
  39. sqlframe/duckdb/group.py +14 -0
  40. sqlframe/duckdb/readwriter.py +111 -0
  41. sqlframe/duckdb/session.py +65 -0
  42. sqlframe/duckdb/types.py +1 -0
  43. sqlframe/duckdb/window.py +1 -0
  44. sqlframe/postgres/__init__.py +23 -0
  45. sqlframe/postgres/catalog.py +106 -0
  46. sqlframe/postgres/column.py +1 -0
  47. sqlframe/postgres/dataframe.py +54 -0
  48. sqlframe/postgres/functions.py +61 -0
  49. sqlframe/postgres/group.py +14 -0
  50. sqlframe/postgres/readwriter.py +29 -0
  51. sqlframe/postgres/session.py +68 -0
  52. sqlframe/postgres/types.py +1 -0
  53. sqlframe/postgres/window.py +1 -0
  54. sqlframe/redshift/__init__.py +23 -0
  55. sqlframe/redshift/catalog.py +127 -0
  56. sqlframe/redshift/column.py +1 -0
  57. sqlframe/redshift/dataframe.py +54 -0
  58. sqlframe/redshift/functions.py +18 -0
  59. sqlframe/redshift/group.py +14 -0
  60. sqlframe/redshift/readwriter.py +29 -0
  61. sqlframe/redshift/session.py +53 -0
  62. sqlframe/redshift/types.py +1 -0
  63. sqlframe/redshift/window.py +1 -0
  64. sqlframe/snowflake/__init__.py +26 -0
  65. sqlframe/snowflake/catalog.py +134 -0
  66. sqlframe/snowflake/column.py +1 -0
  67. sqlframe/snowflake/dataframe.py +54 -0
  68. sqlframe/snowflake/functions.py +18 -0
  69. sqlframe/snowflake/group.py +14 -0
  70. sqlframe/snowflake/readwriter.py +29 -0
  71. sqlframe/snowflake/session.py +53 -0
  72. sqlframe/snowflake/types.py +1 -0
  73. sqlframe/snowflake/window.py +1 -0
  74. sqlframe/spark/__init__.py +23 -0
  75. sqlframe/spark/catalog.py +1028 -0
  76. sqlframe/spark/column.py +1 -0
  77. sqlframe/spark/dataframe.py +54 -0
  78. sqlframe/spark/functions.py +22 -0
  79. sqlframe/spark/group.py +14 -0
  80. sqlframe/spark/readwriter.py +29 -0
  81. sqlframe/spark/session.py +90 -0
  82. sqlframe/spark/types.py +1 -0
  83. sqlframe/spark/window.py +1 -0
  84. sqlframe/standalone/__init__.py +26 -0
  85. sqlframe/standalone/catalog.py +13 -0
  86. sqlframe/standalone/column.py +1 -0
  87. sqlframe/standalone/dataframe.py +36 -0
  88. sqlframe/standalone/functions.py +1 -0
  89. sqlframe/standalone/group.py +14 -0
  90. sqlframe/standalone/readwriter.py +19 -0
  91. sqlframe/standalone/session.py +40 -0
  92. sqlframe/standalone/types.py +1 -0
  93. sqlframe/standalone/window.py +1 -0
  94. sqlframe-1.1.3.dist-info/LICENSE +21 -0
  95. sqlframe-1.1.3.dist-info/METADATA +172 -0
  96. sqlframe-1.1.3.dist-info/RECORD +98 -0
  97. sqlframe-1.1.3.dist-info/WHEEL +5 -0
  98. sqlframe-1.1.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,388 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ import math
7
+ import typing as t
8
+
9
+ import sqlglot
10
+ from sqlglot import expressions as exp
11
+ from sqlglot.helper import flatten, is_iterable
12
+
13
+ from sqlframe.base.types import DataType
14
+ from sqlframe.base.util import get_func_from_session
15
+
16
+ if t.TYPE_CHECKING:
17
+ from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
18
+ from sqlframe.base.window import WindowSpec
19
+
20
+
21
+ class Column:
22
+ def __init__(self, expression: t.Optional[t.Union[ColumnOrLiteral, exp.Expression]]):
23
+ from sqlframe.base.session import _BaseSession
24
+
25
+ if isinstance(expression, Column):
26
+ expression = expression.expression # type: ignore
27
+ elif expression is None or not isinstance(expression, (str, exp.Expression)):
28
+ expression = self._lit(expression).expression # type: ignore
29
+ elif not isinstance(expression, exp.Column):
30
+ expression = sqlglot.maybe_parse(
31
+ expression, dialect=_BaseSession().input_dialect
32
+ ).transform(_BaseSession().input_dialect.normalize_identifier, copy=False)
33
+ if expression is None:
34
+ raise ValueError(f"Could not parse {expression}")
35
+
36
+ self.expression: exp.Expression = expression # type: ignore
37
+
38
+ def __repr__(self):
39
+ return repr(self.expression)
40
+
41
+ def __hash__(self):
42
+ return hash(self.expression)
43
+
44
+ def __eq__(self, other: ColumnOrLiteral) -> Column: # type: ignore
45
+ return self.binary_op(exp.EQ, other)
46
+
47
+ def __ne__(self, other: ColumnOrLiteral) -> Column: # type: ignore
48
+ return self.binary_op(exp.NEQ, other)
49
+
50
+ def __gt__(self, other: ColumnOrLiteral) -> Column:
51
+ return self.binary_op(exp.GT, other)
52
+
53
+ def __ge__(self, other: ColumnOrLiteral) -> Column:
54
+ return self.binary_op(exp.GTE, other)
55
+
56
+ def __lt__(self, other: ColumnOrLiteral) -> Column:
57
+ return self.binary_op(exp.LT, other)
58
+
59
+ def __le__(self, other: ColumnOrLiteral) -> Column:
60
+ return self.binary_op(exp.LTE, other)
61
+
62
+ def __and__(self, other: ColumnOrLiteral) -> Column:
63
+ return self.binary_op(exp.And, other)
64
+
65
+ def __or__(self, other: ColumnOrLiteral) -> Column:
66
+ return self.binary_op(exp.Or, other)
67
+
68
+ def __mod__(self, other: ColumnOrLiteral) -> Column:
69
+ return self.binary_op(exp.Mod, other, paren=True)
70
+
71
+ def __add__(self, other: ColumnOrLiteral) -> Column:
72
+ return self.binary_op(exp.Add, other, paren=True)
73
+
74
+ def __sub__(self, other: ColumnOrLiteral) -> Column:
75
+ return self.binary_op(exp.Sub, other, paren=True)
76
+
77
+ def __mul__(self, other: ColumnOrLiteral) -> Column:
78
+ return self.binary_op(exp.Mul, other, paren=True)
79
+
80
+ def __truediv__(self, other: ColumnOrLiteral) -> Column:
81
+ return self.binary_op(exp.Div, other, paren=True)
82
+
83
+ def __div__(self, other: ColumnOrLiteral) -> Column:
84
+ return self.binary_op(exp.Div, other, paren=True)
85
+
86
+ def __neg__(self) -> Column:
87
+ return self.unary_op(exp.Neg)
88
+
89
+ def __radd__(self, other: ColumnOrLiteral) -> Column:
90
+ return self.inverse_binary_op(exp.Add, other, paren=True)
91
+
92
+ def __rsub__(self, other: ColumnOrLiteral) -> Column:
93
+ return self.inverse_binary_op(exp.Sub, other, paren=True)
94
+
95
+ def __rmul__(self, other: ColumnOrLiteral) -> Column:
96
+ return self.inverse_binary_op(exp.Mul, other, paren=True)
97
+
98
+ def __rdiv__(self, other: ColumnOrLiteral) -> Column:
99
+ return self.inverse_binary_op(exp.Div, other, paren=True)
100
+
101
+ def __rtruediv__(self, other: ColumnOrLiteral) -> Column:
102
+ return self.inverse_binary_op(exp.Div, other, paren=True)
103
+
104
+ def __rmod__(self, other: ColumnOrLiteral) -> Column:
105
+ return self.inverse_binary_op(exp.Mod, other, paren=True)
106
+
107
+ def __pow__(self, power: ColumnOrLiteral, modulo=None):
108
+ return Column(exp.Pow(this=self.expression, expression=Column(power).expression))
109
+
110
+ def __rpow__(self, power: ColumnOrLiteral):
111
+ return Column(exp.Pow(this=Column(power).expression, expression=self.expression))
112
+
113
+ def __invert__(self):
114
+ return self.unary_op(exp.Not)
115
+
116
+ def __rand__(self, other: ColumnOrLiteral) -> Column:
117
+ return self.inverse_binary_op(exp.And, other)
118
+
119
+ def __ror__(self, other: ColumnOrLiteral) -> Column:
120
+ return self.inverse_binary_op(exp.Or, other)
121
+
122
+ @classmethod
123
+ def ensure_col(cls, value: t.Optional[t.Union[ColumnOrName, exp.Expression]]) -> Column:
124
+ col = get_func_from_session("col")
125
+
126
+ return col(value)
127
+
128
+ @classmethod
129
+ def ensure_cols(cls, args: t.List[t.Union[ColumnOrName, exp.Expression]]) -> t.List[Column]:
130
+ return [cls.ensure_col(x) if not isinstance(x, Column) else x for x in args]
131
+
132
+ @classmethod
133
+ def _lit(cls, value: ColumnOrLiteral) -> Column:
134
+ from sqlframe.base.session import _BaseSession
135
+ from sqlframe.base.types import Row
136
+
137
+ if isinstance(value, Row):
138
+ columns = [
139
+ exp.PropertyEQ(
140
+ this=exp.to_identifier(k).transform(
141
+ _BaseSession().input_dialect.normalize_identifier, copy=False
142
+ ),
143
+ expression=cls._lit(v).expression,
144
+ )
145
+ for k, v in value.asDict().items()
146
+ ]
147
+ return cls(exp.Struct(expressions=columns))
148
+ if isinstance(value, (list, set)):
149
+ return cls(exp.Array(expressions=[cls._lit(x).expression for x in value]))
150
+ if isinstance(value, tuple):
151
+ return cls(exp.Tuple(expressions=[cls._lit(x).expression for x in value]))
152
+ if isinstance(value, dict):
153
+ return cls(
154
+ exp.VarMap(
155
+ keys=exp.Array(expressions=[cls._lit(k).expression for k in value.keys()]),
156
+ values=exp.Array(expressions=[cls._lit(v).expression for v in value.values()]),
157
+ )
158
+ )
159
+ elif value is not None and isinstance(value, float) and math.isnan(value):
160
+ return cls(exp.cast(exp.Literal.string("NaN"), exp.DataType.build("float")))
161
+ elif isinstance(value, datetime.datetime):
162
+ if value.tzinfo is None:
163
+ value = value.isoformat(sep=" ")
164
+ return cls(exp.cast(exp.Literal.string(value), exp.DataType.Type.TIMESTAMP))
165
+ else:
166
+ value = value.astimezone(datetime.timezone.utc).isoformat(sep=" ")
167
+ return cls(exp.cast(exp.Literal.string(value), exp.DataType.Type.TIMESTAMPTZ))
168
+ return cls(exp.convert(value))
169
+
170
+ @classmethod
171
+ def invoke_anonymous_function(
172
+ cls, column: t.Optional[ColumnOrName], func_name: str, *args: t.Optional[ColumnOrName]
173
+ ) -> Column:
174
+ columns = [] if column is None else [cls.ensure_col(column)]
175
+ column_args = [cls.ensure_col(arg) for arg in args]
176
+ expressions = [x.expression for x in columns + column_args]
177
+ new_expression = exp.Anonymous(this=func_name.upper(), expressions=expressions)
178
+ return Column(new_expression)
179
+
180
+ @classmethod
181
+ def invoke_expression_over_column(
182
+ cls, column: t.Optional[ColumnOrName], callable_expression: t.Callable, **kwargs
183
+ ) -> Column:
184
+ ensured_column = None if column is None else cls.ensure_col(column)
185
+ ensure_expression_values = {
186
+ k: (
187
+ [cls.ensure_col(x).expression for x in v]
188
+ if is_iterable(v)
189
+ else cls.ensure_col(v).expression
190
+ )
191
+ for k, v in kwargs.items()
192
+ if v is not None
193
+ }
194
+ new_expression = (
195
+ callable_expression(**ensure_expression_values)
196
+ if ensured_column is None
197
+ else callable_expression(
198
+ this=ensured_column.column_expression, **ensure_expression_values
199
+ )
200
+ )
201
+ return Column(new_expression)
202
+
203
+ def binary_op(
204
+ self, klass: t.Callable, other: ColumnOrLiteral, paren: bool = False, **kwargs
205
+ ) -> Column:
206
+ op = klass(
207
+ this=self.column_expression, expression=Column(other).column_expression, **kwargs
208
+ )
209
+ if paren:
210
+ return Column(exp.Paren(this=op))
211
+ return Column(op)
212
+
213
+ def inverse_binary_op(
214
+ self, klass: t.Callable, other: ColumnOrLiteral, paren: bool = False, **kwargs
215
+ ) -> Column:
216
+ op = klass(
217
+ this=Column(other).column_expression, expression=self.column_expression, **kwargs
218
+ )
219
+ if paren:
220
+ return Column(exp.Paren(this=op))
221
+ return Column(op)
222
+
223
+ def unary_op(self, klass: t.Callable, **kwargs) -> Column:
224
+ return Column(klass(this=self.column_expression, **kwargs))
225
+
226
+ @property
227
+ def is_alias(self):
228
+ return isinstance(self.expression, exp.Alias)
229
+
230
+ @property
231
+ def is_column(self):
232
+ return isinstance(self.expression, exp.Column)
233
+
234
+ @property
235
+ def column_expression(self) -> t.Union[exp.Column, exp.Literal]:
236
+ return self.expression.unalias()
237
+
238
+ @property
239
+ def alias_or_name(self) -> str:
240
+ return self.expression.alias_or_name
241
+
242
+ @classmethod
243
+ def ensure_literal(cls, value) -> Column:
244
+ from sqlframe.base.functions import lit
245
+
246
+ if isinstance(value, cls):
247
+ value = value.expression
248
+ if not isinstance(value, exp.Literal):
249
+ return lit(value)
250
+ return Column(value)
251
+
252
+ def copy(self) -> Column:
253
+ return Column(self.expression.copy())
254
+
255
+ def set_table_name(self, table_name: str, copy=False) -> Column:
256
+ expression = self.expression.copy() if copy else self.expression
257
+ expression.set("table", exp.to_identifier(table_name))
258
+ return Column(expression)
259
+
260
+ def sql(self, **kwargs) -> str:
261
+ from sqlframe.base.session import _BaseSession
262
+
263
+ return self.expression.sql(**{"dialect": _BaseSession().input_dialect, **kwargs})
264
+
265
+ def alias(self, name: str) -> Column:
266
+ from sqlframe.base.session import _BaseSession
267
+
268
+ dialect = _BaseSession().input_dialect
269
+ alias: exp.Expression = exp.parse_identifier(name, dialect=dialect)
270
+ new_expression = exp.Alias(
271
+ this=self.column_expression,
272
+ alias=alias.this if isinstance(alias, exp.Column) else alias,
273
+ )
274
+ return Column(new_expression)
275
+
276
+ def asc(self) -> Column:
277
+ new_expression = exp.Ordered(this=self.column_expression, desc=False, nulls_first=True)
278
+ return Column(new_expression)
279
+
280
+ def desc(self) -> Column:
281
+ new_expression = exp.Ordered(this=self.column_expression, desc=True, nulls_first=False)
282
+ return Column(new_expression)
283
+
284
+ asc_nulls_first = asc
285
+
286
+ def asc_nulls_last(self) -> Column:
287
+ new_expression = exp.Ordered(this=self.column_expression, desc=False, nulls_first=False)
288
+ return Column(new_expression)
289
+
290
+ def desc_nulls_first(self) -> Column:
291
+ new_expression = exp.Ordered(this=self.column_expression, desc=True, nulls_first=True)
292
+ return Column(new_expression)
293
+
294
+ desc_nulls_last = desc
295
+
296
+ def when(self, condition: Column, value: t.Any) -> Column:
297
+ from sqlframe.base.functions import when
298
+
299
+ column_with_if = when(condition, value)
300
+ if not isinstance(self.expression, exp.Case):
301
+ return column_with_if
302
+ new_column = self.copy()
303
+ new_column.expression.args["ifs"].extend(column_with_if.expression.args["ifs"])
304
+ return new_column
305
+
306
+ def otherwise(self, value: t.Any) -> Column:
307
+ from sqlframe.base.functions import lit
308
+
309
+ true_value = value if isinstance(value, Column) else lit(value)
310
+ new_column = self.copy()
311
+ new_column.expression.set("default", true_value.column_expression)
312
+ return new_column
313
+
314
+ def isNull(self) -> Column:
315
+ new_expression = exp.Is(this=self.column_expression, expression=exp.Null())
316
+ return Column(new_expression)
317
+
318
+ def isNotNull(self) -> Column:
319
+ new_expression = exp.Not(this=exp.Is(this=self.column_expression, expression=exp.Null()))
320
+ return Column(new_expression)
321
+
322
+ def cast(self, dataType: t.Union[str, DataType]) -> Column:
323
+ from sqlframe.base.session import _BaseSession
324
+
325
+ if isinstance(dataType, DataType):
326
+ dataType = dataType.simpleString()
327
+ return Column(
328
+ exp.cast(self.column_expression, dataType, dialect=_BaseSession().input_dialect)
329
+ )
330
+
331
+ def startswith(self, value: t.Union[str, Column]) -> Column:
332
+ value = self._lit(value) if not isinstance(value, Column) else value
333
+ return self.invoke_expression_over_column(self, exp.StartsWith, expression=value.expression)
334
+
335
+ def endswith(self, value: t.Union[str, Column]) -> Column:
336
+ value = self._lit(value) if not isinstance(value, Column) else value
337
+ return self.invoke_anonymous_function(self, "ENDSWITH", value)
338
+
339
+ def rlike(self, regexp: str) -> Column:
340
+ return self.invoke_expression_over_column(
341
+ column=self, callable_expression=exp.RegexpLike, expression=self._lit(regexp).expression
342
+ )
343
+
344
+ def like(self, other: str):
345
+ return self.invoke_expression_over_column(
346
+ self, exp.Like, expression=self._lit(other).expression
347
+ )
348
+
349
+ def ilike(self, other: str):
350
+ return self.invoke_expression_over_column(
351
+ self, exp.ILike, expression=self._lit(other).expression
352
+ )
353
+
354
+ def substr(self, startPos: t.Union[int, Column], length: t.Union[int, Column]) -> Column:
355
+ startPos = self._lit(startPos) if not isinstance(startPos, Column) else startPos
356
+ length = self._lit(length) if not isinstance(length, Column) else length
357
+ return Column.invoke_expression_over_column(
358
+ self, exp.Substring, start=startPos.expression, length=length.expression
359
+ )
360
+
361
+ def isin(self, *cols: t.Union[ColumnOrLiteral, t.Iterable[ColumnOrLiteral]]):
362
+ columns = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols # type: ignore
363
+ expressions = [self._lit(x).expression for x in columns]
364
+ return Column.invoke_expression_over_column(self, exp.In, expressions=expressions) # type: ignore
365
+
366
+ def between(
367
+ self,
368
+ lowerBound: t.Union[ColumnOrLiteral],
369
+ upperBound: t.Union[ColumnOrLiteral],
370
+ ) -> Column:
371
+ lower_bound_exp = (
372
+ self._lit(lowerBound) if not isinstance(lowerBound, Column) else lowerBound
373
+ )
374
+ upper_bound_exp = (
375
+ self._lit(upperBound) if not isinstance(upperBound, Column) else upperBound
376
+ )
377
+ return Column(
378
+ exp.Between(
379
+ this=self.column_expression,
380
+ low=lower_bound_exp.expression,
381
+ high=upper_bound_exp.expression,
382
+ )
383
+ )
384
+
385
+ def over(self, window: WindowSpec) -> Column:
386
+ window_expression = window.expression.copy()
387
+ window_expression.set("this", self.column_expression)
388
+ return Column(window_expression)