sqlframe 3.10.1__py3-none-any.whl → 3.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/column.py +11 -8
- sqlframe/base/dataframe.py +3 -1
- sqlframe/base/decorators.py +41 -2
- sqlframe/base/function_alternatives.py +445 -404
- sqlframe/base/functions.py +1100 -73
- sqlframe/base/session.py +34 -2
- sqlframe/bigquery/functions.py +1 -361
- sqlframe/bigquery/functions.pyi +63 -156
- sqlframe/bigquery/session.py +4 -0
- sqlframe/databricks/functions.py +0 -10
- sqlframe/databricks/functions.pyi +405 -413
- sqlframe/databricks/session.py +4 -0
- sqlframe/duckdb/functions.py +0 -40
- sqlframe/duckdb/functions.pyi +219 -216
- sqlframe/duckdb/session.py +4 -0
- sqlframe/postgres/functions.py +1 -60
- sqlframe/postgres/functions.pyi +197 -196
- sqlframe/postgres/session.py +4 -0
- sqlframe/redshift/functions.py +1 -4
- sqlframe/redshift/session.py +4 -0
- sqlframe/snowflake/functions.py +1 -55
- sqlframe/snowflake/functions.pyi +224 -220
- sqlframe/snowflake/session.py +4 -0
- sqlframe/spark/functions.py +0 -9
- sqlframe/spark/functions.pyi +411 -413
- sqlframe/spark/session.py +4 -0
- sqlframe/standalone/functions.py +1 -1
- sqlframe/standalone/session.py +4 -0
- {sqlframe-3.10.1.dist-info → sqlframe-3.11.0.dist-info}/METADATA +2 -2
- {sqlframe-3.10.1.dist-info → sqlframe-3.11.0.dist-info}/RECORD +34 -34
- {sqlframe-3.10.1.dist-info → sqlframe-3.11.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.10.1.dist-info → sqlframe-3.11.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.10.1.dist-info → sqlframe-3.11.0.dist-info}/top_level.txt +0 -0
sqlframe/base/session.py
CHANGED
|
@@ -329,9 +329,9 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN, UDF_REGISTRATION
|
|
|
329
329
|
row = row.asDict()
|
|
330
330
|
if isinstance(row, dict):
|
|
331
331
|
row = row.values() # type: ignore
|
|
332
|
-
data_expressions.append(exp.tuple_(*[F.lit(x).
|
|
332
|
+
data_expressions.append(exp.tuple_(*[F.lit(x).column_expression for x in row]))
|
|
333
333
|
else:
|
|
334
|
-
data_expressions.append(exp.tuple_(*[F.lit(row).
|
|
334
|
+
data_expressions.append(exp.tuple_(*[F.lit(row).column_expression]))
|
|
335
335
|
|
|
336
336
|
if column_mapping:
|
|
337
337
|
sel_columns = [
|
|
@@ -583,6 +583,38 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN, UDF_REGISTRATION
|
|
|
583
583
|
converted_values.append(cls._to_value(value))
|
|
584
584
|
return _create_row(columns, converted_values)
|
|
585
585
|
|
|
586
|
+
@property
|
|
587
|
+
def _is_bigquery(self) -> bool:
|
|
588
|
+
return False
|
|
589
|
+
|
|
590
|
+
@property
|
|
591
|
+
def _is_databricks(self) -> bool:
|
|
592
|
+
return False
|
|
593
|
+
|
|
594
|
+
@property
|
|
595
|
+
def _is_duckdb(self) -> bool:
|
|
596
|
+
return False
|
|
597
|
+
|
|
598
|
+
@property
|
|
599
|
+
def _is_postgres(self) -> bool:
|
|
600
|
+
return False
|
|
601
|
+
|
|
602
|
+
@property
|
|
603
|
+
def _is_redshift(self) -> bool:
|
|
604
|
+
return False
|
|
605
|
+
|
|
606
|
+
@property
|
|
607
|
+
def _is_snowflake(self) -> bool:
|
|
608
|
+
return False
|
|
609
|
+
|
|
610
|
+
@property
|
|
611
|
+
def _is_spark(self) -> bool:
|
|
612
|
+
return False
|
|
613
|
+
|
|
614
|
+
@property
|
|
615
|
+
def _is_standalone(self) -> bool:
|
|
616
|
+
return False
|
|
617
|
+
|
|
586
618
|
class Builder:
|
|
587
619
|
SQLFRAME_INPUT_DIALECT_KEY = "sqlframe.input.dialect"
|
|
588
620
|
SQLFRAME_OUTPUT_DIALECT_KEY = "sqlframe.output.dialect"
|
sqlframe/bigquery/functions.py
CHANGED
|
@@ -2,17 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import sys
|
|
5
|
-
import typing as t
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
from sqlframe.base.util import (
|
|
10
|
-
get_func_from_session,
|
|
11
|
-
)
|
|
12
|
-
from sqlframe.bigquery.column import Column
|
|
13
|
-
|
|
14
|
-
if t.TYPE_CHECKING:
|
|
15
|
-
from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
|
|
6
|
+
import sqlframe.base.functions # noqa
|
|
16
7
|
|
|
17
8
|
module = sys.modules["sqlframe.base.functions"]
|
|
18
9
|
globals().update(
|
|
@@ -24,354 +15,3 @@ globals().update(
|
|
|
24
15
|
and "*" not in func.unsupported_engines
|
|
25
16
|
}
|
|
26
17
|
)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
from sqlframe.base.function_alternatives import ( # noqa
|
|
30
|
-
any_value_ignore_nulls_not_supported as any_value,
|
|
31
|
-
current_user_from_session_user as current_user,
|
|
32
|
-
e_literal as e,
|
|
33
|
-
expm1_from_exp as expm1,
|
|
34
|
-
extract_convert_to_var as extract,
|
|
35
|
-
factorial_from_case_statement as factorial,
|
|
36
|
-
log1p_from_log as log1p,
|
|
37
|
-
rint_from_round as rint,
|
|
38
|
-
collect_set_from_list_distinct as collect_set,
|
|
39
|
-
isnull_using_equal as isnull,
|
|
40
|
-
nanvl_as_case as nanvl,
|
|
41
|
-
percentile_approx_without_accuracy_and_plural as percentile_approx,
|
|
42
|
-
rand_no_seed as rand,
|
|
43
|
-
year_from_extract as year,
|
|
44
|
-
quarter_from_extract as quarter,
|
|
45
|
-
month_from_extract as month,
|
|
46
|
-
dayofweek_from_extract as dayofweek,
|
|
47
|
-
dayofmonth_from_extract_with_day as dayofmonth,
|
|
48
|
-
dayofyear_from_extract as dayofyear,
|
|
49
|
-
hour_from_extract as hour,
|
|
50
|
-
minute_from_extract as minute,
|
|
51
|
-
second_from_extract as second,
|
|
52
|
-
weekofyear_from_extract_as_isoweek as weekofyear,
|
|
53
|
-
make_date_from_date_func as make_date,
|
|
54
|
-
to_date_from_timestamp as to_date,
|
|
55
|
-
last_day_with_cast as last_day,
|
|
56
|
-
sha1_force_sha1_and_to_hex as sha,
|
|
57
|
-
sha1_force_sha1_and_to_hex as sha1,
|
|
58
|
-
hash_from_farm_fingerprint as hash,
|
|
59
|
-
base64_from_blob as base64,
|
|
60
|
-
concat_ws_from_array_to_string as concat_ws,
|
|
61
|
-
format_string_with_format as format_string,
|
|
62
|
-
instr_using_strpos as instr,
|
|
63
|
-
overlay_from_substr as overlay,
|
|
64
|
-
split_with_split as split,
|
|
65
|
-
regexp_extract_only_one_group as regexp_extract,
|
|
66
|
-
hex_casted_as_bytes as hex,
|
|
67
|
-
bit_length_from_length as bit_length,
|
|
68
|
-
element_at_using_brackets as element_at,
|
|
69
|
-
array_union_using_array_concat as array_union,
|
|
70
|
-
sequence_from_generate_array as sequence,
|
|
71
|
-
position_as_strpos as position,
|
|
72
|
-
try_to_timestamp_safe as try_to_timestamp,
|
|
73
|
-
_is_string_using_typeof_string as _is_string,
|
|
74
|
-
array_append_using_array_cat as array_append,
|
|
75
|
-
endswith_with_underscore as endswith,
|
|
76
|
-
to_timestamp_just_timestamp as to_timestamp,
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def typeof(col: ColumnOrName) -> Column:
|
|
81
|
-
return Column(
|
|
82
|
-
sqlglot_expression.Anonymous(
|
|
83
|
-
this="bqutil.fn.typeof", expressions=[Column.ensure_col(col).expression]
|
|
84
|
-
)
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def degrees(col: ColumnOrName) -> Column:
|
|
89
|
-
return Column(
|
|
90
|
-
sqlglot_expression.Anonymous(
|
|
91
|
-
this="bqutil.fn.degrees", expressions=[Column.ensure_col(col).expression]
|
|
92
|
-
)
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def radians(col: ColumnOrName) -> Column:
|
|
97
|
-
return Column(
|
|
98
|
-
sqlglot_expression.Anonymous(
|
|
99
|
-
this="bqutil.fn.radians", expressions=[Column.ensure_col(col).expression]
|
|
100
|
-
)
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def bround(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
|
|
105
|
-
from sqlframe.base.session import _BaseSession
|
|
106
|
-
|
|
107
|
-
lit = get_func_from_session("lit", _BaseSession())
|
|
108
|
-
|
|
109
|
-
expressions = [Column.ensure_col(col).cast("bignumeric").expression]
|
|
110
|
-
if scale is not None:
|
|
111
|
-
expressions.append(lit(scale).expression)
|
|
112
|
-
return Column(
|
|
113
|
-
sqlglot_expression.Anonymous(
|
|
114
|
-
this="bqutil.fn.cw_round_half_even",
|
|
115
|
-
expressions=expressions,
|
|
116
|
-
)
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def months_between(
|
|
121
|
-
date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None
|
|
122
|
-
) -> Column:
|
|
123
|
-
roundOff = True if roundOff is None else roundOff
|
|
124
|
-
round = get_func_from_session("round")
|
|
125
|
-
lit = get_func_from_session("lit")
|
|
126
|
-
|
|
127
|
-
value = Column(
|
|
128
|
-
sqlglot_expression.Anonymous(
|
|
129
|
-
this="bqutil.fn.cw_months_between",
|
|
130
|
-
expressions=[
|
|
131
|
-
Column.ensure_col(date1).cast("datetime").expression,
|
|
132
|
-
Column.ensure_col(date2).cast("datetime").expression,
|
|
133
|
-
],
|
|
134
|
-
)
|
|
135
|
-
)
|
|
136
|
-
if roundOff:
|
|
137
|
-
value = round(value, lit(8))
|
|
138
|
-
return value
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def next_day(col: ColumnOrName, dayOfWeek: str) -> Column:
|
|
142
|
-
lit = get_func_from_session("lit")
|
|
143
|
-
|
|
144
|
-
return Column(
|
|
145
|
-
sqlglot_expression.Anonymous(
|
|
146
|
-
this="bqutil.fn.cw_next_day",
|
|
147
|
-
expressions=[Column.ensure_col(col).cast("date").expression, lit(dayOfWeek).expression],
|
|
148
|
-
)
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
153
|
-
from sqlframe.base.session import _BaseSession
|
|
154
|
-
|
|
155
|
-
session: _BaseSession = _BaseSession()
|
|
156
|
-
|
|
157
|
-
expressions = [Column.ensure_col(col).expression]
|
|
158
|
-
return Column(
|
|
159
|
-
sqlglot_expression.Anonymous(
|
|
160
|
-
this="FORMAT_TIMESTAMP",
|
|
161
|
-
expressions=[
|
|
162
|
-
session.format_time(format),
|
|
163
|
-
Column(
|
|
164
|
-
sqlglot_expression.Anonymous(this="TIMESTAMP_SECONDS", expressions=expressions)
|
|
165
|
-
).expression,
|
|
166
|
-
],
|
|
167
|
-
)
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
def unix_timestamp(
|
|
172
|
-
timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None
|
|
173
|
-
) -> Column:
|
|
174
|
-
from sqlframe.base.session import _BaseSession
|
|
175
|
-
|
|
176
|
-
lit = get_func_from_session("lit")
|
|
177
|
-
return Column(
|
|
178
|
-
sqlglot_expression.Anonymous(
|
|
179
|
-
this="UNIX_SECONDS",
|
|
180
|
-
expressions=[
|
|
181
|
-
sqlglot_expression.Anonymous(
|
|
182
|
-
this="PARSE_TIMESTAMP",
|
|
183
|
-
expressions=[
|
|
184
|
-
_BaseSession().format_time(format),
|
|
185
|
-
Column.ensure_col(timestamp).expression,
|
|
186
|
-
lit("UTC").expression,
|
|
187
|
-
],
|
|
188
|
-
)
|
|
189
|
-
],
|
|
190
|
-
)
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def format_number(col: ColumnOrName, d: int) -> Column:
|
|
195
|
-
round = get_func_from_session("round")
|
|
196
|
-
lit = get_func_from_session("lit")
|
|
197
|
-
|
|
198
|
-
return Column(
|
|
199
|
-
sqlglot_expression.Anonymous(
|
|
200
|
-
this="FORMAT",
|
|
201
|
-
expressions=[
|
|
202
|
-
lit(f"%'.{d}f").expression,
|
|
203
|
-
round(Column.ensure_col(col).cast("float"), d).expression,
|
|
204
|
-
],
|
|
205
|
-
)
|
|
206
|
-
)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
def substring_index(str: ColumnOrName, delim: str, count: int) -> Column:
|
|
210
|
-
lit = get_func_from_session("lit")
|
|
211
|
-
|
|
212
|
-
return Column(
|
|
213
|
-
sqlglot_expression.Anonymous(
|
|
214
|
-
this="bqutil.fn.cw_substring_index",
|
|
215
|
-
expressions=[
|
|
216
|
-
Column.ensure_col(str).expression,
|
|
217
|
-
lit(delim).expression,
|
|
218
|
-
lit(count).expression,
|
|
219
|
-
],
|
|
220
|
-
)
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def bin(col: ColumnOrName) -> Column:
|
|
225
|
-
return (
|
|
226
|
-
Column(
|
|
227
|
-
sqlglot_expression.Anonymous(
|
|
228
|
-
this="bqutil.fn.to_binary",
|
|
229
|
-
expressions=[Column.ensure_col(col).expression],
|
|
230
|
-
)
|
|
231
|
-
)
|
|
232
|
-
.cast("int")
|
|
233
|
-
.cast("string")
|
|
234
|
-
)
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
def slice(
|
|
238
|
-
x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int]
|
|
239
|
-
) -> Column:
|
|
240
|
-
lit = get_func_from_session("lit")
|
|
241
|
-
|
|
242
|
-
start_col = start if isinstance(start, Column) else lit(start)
|
|
243
|
-
length_col = length if isinstance(length, Column) else lit(length)
|
|
244
|
-
|
|
245
|
-
subquery = (
|
|
246
|
-
sqlglot_expression.select(
|
|
247
|
-
sqlglot_expression.column("x"),
|
|
248
|
-
)
|
|
249
|
-
.from_(
|
|
250
|
-
sqlglot_expression.Unnest(
|
|
251
|
-
expressions=[Column.ensure_col(x).expression],
|
|
252
|
-
alias=sqlglot_expression.TableAlias(
|
|
253
|
-
columns=[sqlglot_expression.to_identifier("x")],
|
|
254
|
-
),
|
|
255
|
-
offset=sqlglot_expression.to_identifier("offset"),
|
|
256
|
-
)
|
|
257
|
-
)
|
|
258
|
-
.where(
|
|
259
|
-
sqlglot_expression.Between(
|
|
260
|
-
this=sqlglot_expression.column("offset"),
|
|
261
|
-
low=(start_col - lit(1)).expression,
|
|
262
|
-
high=(start_col + length_col).expression,
|
|
263
|
-
)
|
|
264
|
-
)
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
return Column(
|
|
268
|
-
sqlglot_expression.Anonymous(
|
|
269
|
-
this="ARRAY",
|
|
270
|
-
expressions=[subquery],
|
|
271
|
-
)
|
|
272
|
-
)
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
276
|
-
lit = get_func_from_session("lit")
|
|
277
|
-
|
|
278
|
-
value_col = value if isinstance(value, Column) else lit(value)
|
|
279
|
-
|
|
280
|
-
return Column(
|
|
281
|
-
sqlglot_expression.Coalesce(
|
|
282
|
-
this=sqlglot_expression.Anonymous(
|
|
283
|
-
this="bqutil.fn.find_in_set",
|
|
284
|
-
expressions=[
|
|
285
|
-
value_col.expression,
|
|
286
|
-
sqlglot_expression.Anonymous(
|
|
287
|
-
this="ARRAY_TO_STRING",
|
|
288
|
-
expressions=[Column.ensure_col(col).expression, lit(",").expression],
|
|
289
|
-
),
|
|
290
|
-
],
|
|
291
|
-
),
|
|
292
|
-
expressions=[lit(0).expression],
|
|
293
|
-
)
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
def array_remove(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
298
|
-
lit = get_func_from_session("lit")
|
|
299
|
-
|
|
300
|
-
value_col = value if isinstance(value, Column) else lit(value)
|
|
301
|
-
|
|
302
|
-
filter_subquery = sqlglot_expression.select(
|
|
303
|
-
"*",
|
|
304
|
-
).from_(
|
|
305
|
-
sqlglot_expression.Unnest(
|
|
306
|
-
expressions=[Column.ensure_col(col).expression],
|
|
307
|
-
alias=sqlglot_expression.TableAlias(
|
|
308
|
-
columns=[sqlglot_expression.to_identifier("x")],
|
|
309
|
-
),
|
|
310
|
-
)
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
agg_subquery = (
|
|
314
|
-
sqlglot_expression.select(
|
|
315
|
-
sqlglot_expression.Anonymous(
|
|
316
|
-
this="ARRAY_AGG",
|
|
317
|
-
expressions=[sqlglot_expression.column("x")],
|
|
318
|
-
),
|
|
319
|
-
)
|
|
320
|
-
.from_(filter_subquery.subquery("t"))
|
|
321
|
-
.where(
|
|
322
|
-
sqlglot_expression.NEQ(
|
|
323
|
-
this=sqlglot_expression.column("x", "t"),
|
|
324
|
-
expression=value_col.expression,
|
|
325
|
-
)
|
|
326
|
-
)
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
return Column(agg_subquery.subquery())
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
def array_distinct(col: ColumnOrName) -> Column:
|
|
333
|
-
return Column(
|
|
334
|
-
sqlglot_expression.Anonymous(
|
|
335
|
-
this="bqutil.fn.cw_array_distinct",
|
|
336
|
-
expressions=[Column.ensure_col(col).expression],
|
|
337
|
-
)
|
|
338
|
-
)
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
def array_min(col: ColumnOrName) -> Column:
|
|
342
|
-
return Column(
|
|
343
|
-
sqlglot_expression.Anonymous(
|
|
344
|
-
this="bqutil.fn.cw_array_min",
|
|
345
|
-
expressions=[Column.ensure_col(col).expression],
|
|
346
|
-
)
|
|
347
|
-
)
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
def array_max(col: ColumnOrName) -> Column:
|
|
351
|
-
return Column(
|
|
352
|
-
sqlglot_expression.Anonymous(
|
|
353
|
-
this="bqutil.fn.cw_array_max",
|
|
354
|
-
expressions=[Column.ensure_col(col).expression],
|
|
355
|
-
)
|
|
356
|
-
)
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
def sort_array(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column:
|
|
360
|
-
order = "ASC" if asc or asc is None else "DESC"
|
|
361
|
-
subquery = (
|
|
362
|
-
sqlglot_expression.select("x")
|
|
363
|
-
.from_(
|
|
364
|
-
sqlglot_expression.Unnest(
|
|
365
|
-
expressions=[Column.ensure_col(col).expression],
|
|
366
|
-
alias=sqlglot_expression.TableAlias(
|
|
367
|
-
columns=[sqlglot_expression.to_identifier("x")],
|
|
368
|
-
),
|
|
369
|
-
)
|
|
370
|
-
)
|
|
371
|
-
.order_by(f"x {order}")
|
|
372
|
-
)
|
|
373
|
-
|
|
374
|
-
return Column(sqlglot_expression.Anonymous(this="ARRAY", expressions=[subquery]))
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
array_sort = sort_array
|