sqlframe 1.9.0__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +54 -1
- sqlframe/base/exceptions.py +12 -0
- sqlframe/base/function_alternatives.py +96 -0
- sqlframe/base/functions.py +4013 -1
- sqlframe/base/mixins/dataframe_mixins.py +24 -33
- sqlframe/base/session.py +2 -2
- sqlframe/base/types.py +3 -3
- sqlframe/base/util.py +56 -0
- sqlframe/bigquery/dataframe.py +33 -13
- sqlframe/bigquery/functions.py +4 -0
- sqlframe/bigquery/functions.pyi +37 -1
- sqlframe/duckdb/dataframe.py +6 -15
- sqlframe/duckdb/functions.py +3 -0
- sqlframe/duckdb/functions.pyi +29 -0
- sqlframe/postgres/catalog.py +123 -3
- sqlframe/postgres/dataframe.py +6 -10
- sqlframe/postgres/functions.py +6 -0
- sqlframe/postgres/functions.pyi +28 -0
- sqlframe/redshift/dataframe.py +3 -14
- sqlframe/snowflake/dataframe.py +23 -13
- sqlframe/snowflake/functions.py +3 -0
- sqlframe/snowflake/functions.pyi +27 -0
- sqlframe/spark/dataframe.py +25 -15
- sqlframe/spark/functions.pyi +161 -1
- sqlframe/testing/__init__.py +3 -0
- sqlframe/testing/utils.py +320 -0
- {sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/METADATA +1 -1
- {sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/RECORD +32 -30
- {sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/LICENSE +0 -0
- {sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/WHEEL +0 -0
- {sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
1
3
|
import typing as t
|
|
2
4
|
|
|
3
5
|
from sqlglot import exp
|
|
@@ -12,11 +14,30 @@ from sqlframe.base.dataframe import (
|
|
|
12
14
|
_BaseDataFrame,
|
|
13
15
|
)
|
|
14
16
|
|
|
17
|
+
if sys.version_info >= (3, 11):
|
|
18
|
+
from typing import Self
|
|
19
|
+
else:
|
|
20
|
+
from typing_extensions import Self
|
|
15
21
|
|
|
16
|
-
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class NoCachePersistSupportMixin(_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
27
|
+
def cache(self) -> Self:
|
|
28
|
+
logger.warning("This engine does not support caching. Ignoring cache() call.")
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def persist(self) -> Self:
|
|
32
|
+
logger.warning("This engine does not support persist. Ignoring persist() call.")
|
|
33
|
+
return self
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TypedColumnsFromTempViewMixin(
|
|
17
37
|
_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
|
|
18
38
|
):
|
|
19
|
-
|
|
39
|
+
@property
|
|
40
|
+
def _typed_columns(self) -> t.List[Column]:
|
|
20
41
|
table = exp.to_table(self.session._random_id)
|
|
21
42
|
self.session._execute(
|
|
22
43
|
exp.Create(
|
|
@@ -27,37 +48,7 @@ class PrintSchemaFromTempObjectsMixin(
|
|
|
27
48
|
expression=self.expression,
|
|
28
49
|
)
|
|
29
50
|
)
|
|
51
|
+
|
|
30
52
|
return self.session.catalog.listColumns(
|
|
31
53
|
table.sql(dialect=self.session.input_dialect), include_temp=True
|
|
32
54
|
)
|
|
33
|
-
|
|
34
|
-
def printSchema(self, level: t.Optional[int] = None) -> None:
|
|
35
|
-
def print_schema(
|
|
36
|
-
column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
|
|
37
|
-
):
|
|
38
|
-
if level and current_level >= level:
|
|
39
|
-
return
|
|
40
|
-
if current_level > 0:
|
|
41
|
-
print(" | " * current_level, end="")
|
|
42
|
-
print(
|
|
43
|
-
f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
|
|
44
|
-
)
|
|
45
|
-
if column_type.this == exp.DataType.Type.STRUCT:
|
|
46
|
-
for column_def in column_type.expressions:
|
|
47
|
-
print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
|
|
48
|
-
if column_type.this == exp.DataType.Type.ARRAY:
|
|
49
|
-
for data_type in column_type.expressions:
|
|
50
|
-
print_schema("element", data_type, True, current_level + 1)
|
|
51
|
-
if column_type.this == exp.DataType.Type.MAP:
|
|
52
|
-
print_schema("key", column_type.expressions[0], True, current_level + 1)
|
|
53
|
-
print_schema("value", column_type.expressions[1], True, current_level + 1)
|
|
54
|
-
|
|
55
|
-
columns = self._get_columns_from_temp_object()
|
|
56
|
-
print("root")
|
|
57
|
-
for column in columns:
|
|
58
|
-
print_schema(
|
|
59
|
-
column.name,
|
|
60
|
-
exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
|
|
61
|
-
column.nullable,
|
|
62
|
-
0,
|
|
63
|
-
)
|
sqlframe/base/session.py
CHANGED
|
@@ -442,12 +442,12 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
442
442
|
|
|
443
443
|
@classmethod
|
|
444
444
|
def _to_row(cls, columns: t.List[str], values: t.Iterable[t.Any]) -> Row:
|
|
445
|
-
from sqlframe.base.types import Row
|
|
445
|
+
from sqlframe.base.types import Row, _create_row
|
|
446
446
|
|
|
447
447
|
converted_values = []
|
|
448
448
|
for value in values:
|
|
449
449
|
converted_values.append(cls._to_value(value))
|
|
450
|
-
return
|
|
450
|
+
return _create_row(columns, converted_values)
|
|
451
451
|
|
|
452
452
|
def _fetch_rows(
|
|
453
453
|
self, sql: t.Union[str, exp.Expression], *, quote_identifiers: bool = True
|
sqlframe/base/types.py
CHANGED
|
@@ -22,14 +22,14 @@ class DataType:
|
|
|
22
22
|
return not self.__eq__(other)
|
|
23
23
|
|
|
24
24
|
def __str__(self) -> str:
|
|
25
|
-
return self.
|
|
25
|
+
return self.simpleString()
|
|
26
26
|
|
|
27
27
|
@classmethod
|
|
28
28
|
def typeName(cls) -> str:
|
|
29
29
|
return cls.__name__[:-4].lower()
|
|
30
30
|
|
|
31
31
|
def simpleString(self) -> str:
|
|
32
|
-
return
|
|
32
|
+
return self.typeName()
|
|
33
33
|
|
|
34
34
|
def jsonValue(self) -> t.Union[str, t.Dict[str, t.Any]]:
|
|
35
35
|
return str(self)
|
|
@@ -222,7 +222,7 @@ class StructType(DataType):
|
|
|
222
222
|
def _create_row(
|
|
223
223
|
fields: t.Union[Row, t.List[str]], values: t.Union[t.Tuple[t.Any, ...], t.List[t.Any]]
|
|
224
224
|
) -> Row:
|
|
225
|
-
row = Row(*values)
|
|
225
|
+
row = Row(*[float(x) if isinstance(x, Decimal) else x for x in values])
|
|
226
226
|
row.__fields__ = fields
|
|
227
227
|
return row
|
|
228
228
|
|
sqlframe/base/util.py
CHANGED
|
@@ -71,6 +71,11 @@ def get_column_mapping_from_schema_input(
|
|
|
71
71
|
col_name_type_strs = [x.strip() for x in schema.split(",")]
|
|
72
72
|
if len(col_name_type_strs) == 1 and len(col_name_type_strs[0].split(" ")) == 1:
|
|
73
73
|
value = {"value": col_name_type_strs[0].strip()}
|
|
74
|
+
elif schema.startswith("struct<") and schema.endswith(">"):
|
|
75
|
+
value = {
|
|
76
|
+
name_type_str.split(":")[0].strip(): name_type_str.split(":")[1].strip()
|
|
77
|
+
for name_type_str in schema[7:-1].split(",")
|
|
78
|
+
}
|
|
74
79
|
else:
|
|
75
80
|
value = {
|
|
76
81
|
name_type_str.split(" ")[0].strip(): name_type_str.split(" ")[1].strip()
|
|
@@ -286,3 +291,54 @@ def quote_preserving_alias_or_name(col: t.Union[exp.Column, exp.Alias]) -> str:
|
|
|
286
291
|
return col.sql(dialect=_BaseSession().input_dialect)
|
|
287
292
|
# We may get things like `Null()` expression or maybe literals so we just return the alias or name in those cases
|
|
288
293
|
return col.alias_or_name
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
|
|
297
|
+
from sqlframe.base import types
|
|
298
|
+
|
|
299
|
+
primitive_mapping = {
|
|
300
|
+
exp.DataType.Type.VARCHAR: types.VarcharType,
|
|
301
|
+
exp.DataType.Type.CHAR: types.CharType,
|
|
302
|
+
exp.DataType.Type.TEXT: types.StringType,
|
|
303
|
+
exp.DataType.Type.BINARY: types.BinaryType,
|
|
304
|
+
exp.DataType.Type.BOOLEAN: types.BooleanType,
|
|
305
|
+
exp.DataType.Type.INT: types.IntegerType,
|
|
306
|
+
exp.DataType.Type.BIGINT: types.LongType,
|
|
307
|
+
exp.DataType.Type.SMALLINT: types.ShortType,
|
|
308
|
+
exp.DataType.Type.FLOAT: types.FloatType,
|
|
309
|
+
exp.DataType.Type.DOUBLE: types.DoubleType,
|
|
310
|
+
exp.DataType.Type.DECIMAL: types.DecimalType,
|
|
311
|
+
exp.DataType.Type.TIMESTAMP: types.TimestampType,
|
|
312
|
+
exp.DataType.Type.TIMESTAMPTZ: types.TimestampType,
|
|
313
|
+
exp.DataType.Type.TIMESTAMPLTZ: types.TimestampType,
|
|
314
|
+
exp.DataType.Type.TIMESTAMPNTZ: types.TimestampType,
|
|
315
|
+
exp.DataType.Type.DATE: types.DateType,
|
|
316
|
+
}
|
|
317
|
+
if sqlglot_dtype.this in primitive_mapping:
|
|
318
|
+
pyspark_class = primitive_mapping[sqlglot_dtype.this]
|
|
319
|
+
if issubclass(pyspark_class, types.DataTypeWithLength) and sqlglot_dtype.expressions:
|
|
320
|
+
return pyspark_class(length=int(sqlglot_dtype.expressions[0].this.this))
|
|
321
|
+
elif issubclass(pyspark_class, types.DecimalType) and sqlglot_dtype.expressions:
|
|
322
|
+
return pyspark_class(
|
|
323
|
+
precision=int(sqlglot_dtype.expressions[0].this.this),
|
|
324
|
+
scale=int(sqlglot_dtype.expressions[1].this.this),
|
|
325
|
+
)
|
|
326
|
+
return pyspark_class()
|
|
327
|
+
if sqlglot_dtype.this == exp.DataType.Type.ARRAY:
|
|
328
|
+
return types.ArrayType(sqlglot_to_spark(sqlglot_dtype.expressions[0]))
|
|
329
|
+
elif sqlglot_dtype.this == exp.DataType.Type.MAP:
|
|
330
|
+
return types.MapType(
|
|
331
|
+
sqlglot_to_spark(sqlglot_dtype.expressions[0]),
|
|
332
|
+
sqlglot_to_spark(sqlglot_dtype.expressions[1]),
|
|
333
|
+
)
|
|
334
|
+
elif sqlglot_dtype.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
|
|
335
|
+
return types.StructType(
|
|
336
|
+
[
|
|
337
|
+
types.StructField(
|
|
338
|
+
name=field.this.alias_or_name,
|
|
339
|
+
dataType=sqlglot_to_spark(field.args["kind"]),
|
|
340
|
+
)
|
|
341
|
+
for field in sqlglot_dtype.expressions
|
|
342
|
+
]
|
|
343
|
+
)
|
|
344
|
+
raise NotImplementedError(f"Unsupported data type: {sqlglot_dtype}")
|
sqlframe/bigquery/dataframe.py
CHANGED
|
@@ -1,21 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
import sys
|
|
5
4
|
import typing as t
|
|
6
5
|
|
|
6
|
+
from sqlframe.base.catalog import Column as CatalogColumn
|
|
7
7
|
from sqlframe.base.dataframe import (
|
|
8
8
|
_BaseDataFrame,
|
|
9
9
|
_BaseDataFrameNaFunctions,
|
|
10
10
|
_BaseDataFrameStatFunctions,
|
|
11
11
|
)
|
|
12
|
+
from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
|
|
12
13
|
from sqlframe.bigquery.group import BigQueryGroupedData
|
|
13
14
|
|
|
14
|
-
if sys.version_info >= (3, 11):
|
|
15
|
-
from typing import Self
|
|
16
|
-
else:
|
|
17
|
-
from typing_extensions import Self
|
|
18
|
-
|
|
19
15
|
if t.TYPE_CHECKING:
|
|
20
16
|
from sqlframe.bigquery.readwriter import BigQueryDataFrameWriter
|
|
21
17
|
from sqlframe.bigquery.session import BigQuerySession
|
|
@@ -33,22 +29,46 @@ class BigQueryDataFrameStatFunctions(_BaseDataFrameStatFunctions["BigQueryDataFr
|
|
|
33
29
|
|
|
34
30
|
|
|
35
31
|
class BigQueryDataFrame(
|
|
32
|
+
NoCachePersistSupportMixin,
|
|
36
33
|
_BaseDataFrame[
|
|
37
34
|
"BigQuerySession",
|
|
38
35
|
"BigQueryDataFrameWriter",
|
|
39
36
|
"BigQueryDataFrameNaFunctions",
|
|
40
37
|
"BigQueryDataFrameStatFunctions",
|
|
41
38
|
"BigQueryGroupedData",
|
|
42
|
-
]
|
|
39
|
+
],
|
|
43
40
|
):
|
|
44
41
|
_na = BigQueryDataFrameNaFunctions
|
|
45
42
|
_stat = BigQueryDataFrameStatFunctions
|
|
46
43
|
_group_data = BigQueryGroupedData
|
|
47
44
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
45
|
+
@property
|
|
46
|
+
def _typed_columns(self) -> t.List[CatalogColumn]:
|
|
47
|
+
from google.cloud import bigquery
|
|
48
|
+
|
|
49
|
+
def field_to_column(field: bigquery.SchemaField) -> CatalogColumn:
|
|
50
|
+
if field.field_type == "RECORD":
|
|
51
|
+
data_type = "STRUCT<"
|
|
52
|
+
for subfield in field.fields:
|
|
53
|
+
column = field_to_column(subfield)
|
|
54
|
+
data_type += f"{column.name} {column.dataType},"
|
|
55
|
+
data_type += ">"
|
|
56
|
+
elif field.field_type == "INTEGER":
|
|
57
|
+
data_type = "INT64"
|
|
58
|
+
else:
|
|
59
|
+
data_type = field.field_type
|
|
60
|
+
if field.mode == "REPEATED":
|
|
61
|
+
data_type = f"ARRAY<{data_type}>"
|
|
62
|
+
return CatalogColumn(
|
|
63
|
+
name=field.name,
|
|
64
|
+
dataType=data_type,
|
|
65
|
+
nullable=field.is_nullable,
|
|
66
|
+
description=None,
|
|
67
|
+
isPartition=False,
|
|
68
|
+
isBucket=False,
|
|
69
|
+
)
|
|
51
70
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
71
|
+
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
|
|
72
|
+
sql = self.session._to_sql(self.expression)
|
|
73
|
+
query_job = self.session._client.query(sql, job_config=job_config)
|
|
74
|
+
return [field_to_column(field) for field in query_job.schema]
|
sqlframe/bigquery/functions.py
CHANGED
|
@@ -26,8 +26,11 @@ globals().update(
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
from sqlframe.base.function_alternatives import ( # noqa
|
|
29
|
+
any_value_ignore_nulls_not_supported as any_value,
|
|
30
|
+
current_user_from_session_user as current_user,
|
|
29
31
|
e_literal as e,
|
|
30
32
|
expm1_from_exp as expm1,
|
|
33
|
+
extract_convert_to_var as extract,
|
|
31
34
|
factorial_from_case_statement as factorial,
|
|
32
35
|
log1p_from_log as log1p,
|
|
33
36
|
rint_from_round as rint,
|
|
@@ -63,6 +66,7 @@ from sqlframe.base.function_alternatives import ( # noqa
|
|
|
63
66
|
element_at_using_brackets as element_at,
|
|
64
67
|
array_union_using_array_concat as array_union,
|
|
65
68
|
sequence_from_generate_array as sequence,
|
|
69
|
+
position_as_strpos as position,
|
|
66
70
|
)
|
|
67
71
|
|
|
68
72
|
|
sqlframe/bigquery/functions.pyi
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import typing as t
|
|
2
2
|
|
|
3
3
|
from sqlframe.base.column import Column as Column
|
|
4
|
+
from sqlframe.base.function_alternatives import ( # noqa
|
|
5
|
+
any_value_ignore_nulls_not_supported as any_value,
|
|
6
|
+
)
|
|
4
7
|
from sqlframe.base.function_alternatives import (
|
|
5
8
|
array_union_using_array_concat as array_union,
|
|
6
9
|
)
|
|
@@ -16,6 +19,9 @@ from sqlframe.base.function_alternatives import (
|
|
|
16
19
|
from sqlframe.base.function_alternatives import (
|
|
17
20
|
concat_ws_from_array_to_string as concat_ws,
|
|
18
21
|
)
|
|
22
|
+
from sqlframe.base.function_alternatives import (
|
|
23
|
+
current_user_from_session_user as current_user,
|
|
24
|
+
)
|
|
19
25
|
from sqlframe.base.function_alternatives import (
|
|
20
26
|
dayofmonth_from_extract_with_day as dayofmonth,
|
|
21
27
|
)
|
|
@@ -25,7 +31,7 @@ from sqlframe.base.function_alternatives import (
|
|
|
25
31
|
from sqlframe.base.function_alternatives import (
|
|
26
32
|
dayofyear_from_extract as dayofyear,
|
|
27
33
|
)
|
|
28
|
-
from sqlframe.base.function_alternatives import (
|
|
34
|
+
from sqlframe.base.function_alternatives import (
|
|
29
35
|
e_literal as e,
|
|
30
36
|
)
|
|
31
37
|
from sqlframe.base.function_alternatives import (
|
|
@@ -34,6 +40,9 @@ from sqlframe.base.function_alternatives import (
|
|
|
34
40
|
from sqlframe.base.function_alternatives import (
|
|
35
41
|
expm1_from_exp as expm1,
|
|
36
42
|
)
|
|
43
|
+
from sqlframe.base.function_alternatives import (
|
|
44
|
+
extract_convert_to_var as extract,
|
|
45
|
+
)
|
|
37
46
|
from sqlframe.base.function_alternatives import (
|
|
38
47
|
factorial_from_case_statement as factorial,
|
|
39
48
|
)
|
|
@@ -79,6 +88,9 @@ from sqlframe.base.function_alternatives import (
|
|
|
79
88
|
from sqlframe.base.function_alternatives import (
|
|
80
89
|
percentile_approx_without_accuracy_and_plural as percentile_approx,
|
|
81
90
|
)
|
|
91
|
+
from sqlframe.base.function_alternatives import (
|
|
92
|
+
position_as_strpos as position,
|
|
93
|
+
)
|
|
82
94
|
from sqlframe.base.function_alternatives import (
|
|
83
95
|
quarter_from_extract as quarter,
|
|
84
96
|
)
|
|
@@ -133,9 +145,13 @@ from sqlframe.base.functions import atanh as atanh
|
|
|
133
145
|
from sqlframe.base.functions import avg as avg
|
|
134
146
|
from sqlframe.base.functions import bitwise_not as bitwise_not
|
|
135
147
|
from sqlframe.base.functions import bitwiseNOT as bitwiseNOT
|
|
148
|
+
from sqlframe.base.functions import bool_and as bool_and
|
|
149
|
+
from sqlframe.base.functions import bool_or as bool_or
|
|
150
|
+
from sqlframe.base.functions import call_function as call_function
|
|
136
151
|
from sqlframe.base.functions import cbrt as cbrt
|
|
137
152
|
from sqlframe.base.functions import ceil as ceil
|
|
138
153
|
from sqlframe.base.functions import ceiling as ceiling
|
|
154
|
+
from sqlframe.base.functions import char as char
|
|
139
155
|
from sqlframe.base.functions import coalesce as coalesce
|
|
140
156
|
from sqlframe.base.functions import col as col
|
|
141
157
|
from sqlframe.base.functions import collect_list as collect_list
|
|
@@ -145,6 +161,7 @@ from sqlframe.base.functions import cos as cos
|
|
|
145
161
|
from sqlframe.base.functions import cosh as cosh
|
|
146
162
|
from sqlframe.base.functions import cot as cot
|
|
147
163
|
from sqlframe.base.functions import count as count
|
|
164
|
+
from sqlframe.base.functions import count_if as count_if
|
|
148
165
|
from sqlframe.base.functions import covar_pop as covar_pop
|
|
149
166
|
from sqlframe.base.functions import covar_samp as covar_samp
|
|
150
167
|
from sqlframe.base.functions import csc as csc
|
|
@@ -156,6 +173,8 @@ from sqlframe.base.functions import date_diff as date_diff
|
|
|
156
173
|
from sqlframe.base.functions import date_format as date_format
|
|
157
174
|
from sqlframe.base.functions import date_sub as date_sub
|
|
158
175
|
from sqlframe.base.functions import date_trunc as date_trunc
|
|
176
|
+
from sqlframe.base.functions import dateadd as dateadd
|
|
177
|
+
from sqlframe.base.functions import datediff as datediff
|
|
159
178
|
from sqlframe.base.functions import dense_rank as dense_rank
|
|
160
179
|
from sqlframe.base.functions import desc as desc
|
|
161
180
|
from sqlframe.base.functions import desc_nulls_first as desc_nulls_first
|
|
@@ -167,14 +186,18 @@ from sqlframe.base.functions import expr as expr
|
|
|
167
186
|
from sqlframe.base.functions import floor as floor
|
|
168
187
|
from sqlframe.base.functions import get_json_object as get_json_object
|
|
169
188
|
from sqlframe.base.functions import greatest as greatest
|
|
189
|
+
from sqlframe.base.functions import ifnull as ifnull
|
|
170
190
|
from sqlframe.base.functions import initcap as initcap
|
|
171
191
|
from sqlframe.base.functions import input_file_name as input_file_name
|
|
172
192
|
from sqlframe.base.functions import isnan as isnan
|
|
173
193
|
from sqlframe.base.functions import lag as lag
|
|
194
|
+
from sqlframe.base.functions import lcase as lcase
|
|
174
195
|
from sqlframe.base.functions import lead as lead
|
|
175
196
|
from sqlframe.base.functions import least as least
|
|
197
|
+
from sqlframe.base.functions import left as left
|
|
176
198
|
from sqlframe.base.functions import length as length
|
|
177
199
|
from sqlframe.base.functions import lit as lit
|
|
200
|
+
from sqlframe.base.functions import ln as ln
|
|
178
201
|
from sqlframe.base.functions import log as log
|
|
179
202
|
from sqlframe.base.functions import log2 as log2
|
|
180
203
|
from sqlframe.base.functions import log10 as log10
|
|
@@ -187,33 +210,43 @@ from sqlframe.base.functions import md5 as md5
|
|
|
187
210
|
from sqlframe.base.functions import mean as mean
|
|
188
211
|
from sqlframe.base.functions import min as min
|
|
189
212
|
from sqlframe.base.functions import min_by as min_by
|
|
213
|
+
from sqlframe.base.functions import now as now
|
|
190
214
|
from sqlframe.base.functions import nth_value as nth_value
|
|
191
215
|
from sqlframe.base.functions import ntile as ntile
|
|
192
216
|
from sqlframe.base.functions import nullif as nullif
|
|
217
|
+
from sqlframe.base.functions import nvl as nvl
|
|
218
|
+
from sqlframe.base.functions import nvl2 as nvl2
|
|
193
219
|
from sqlframe.base.functions import octet_length as octet_length
|
|
194
220
|
from sqlframe.base.functions import percent_rank as percent_rank
|
|
195
221
|
from sqlframe.base.functions import posexplode as posexplode
|
|
196
222
|
from sqlframe.base.functions import posexplode_outer as posexplode_outer
|
|
197
223
|
from sqlframe.base.functions import pow as pow
|
|
224
|
+
from sqlframe.base.functions import power as power
|
|
198
225
|
from sqlframe.base.functions import rank as rank
|
|
226
|
+
from sqlframe.base.functions import regexp_like as regexp_like
|
|
199
227
|
from sqlframe.base.functions import regexp_replace as regexp_replace
|
|
200
228
|
from sqlframe.base.functions import repeat as repeat
|
|
201
229
|
from sqlframe.base.functions import reverse as reverse
|
|
230
|
+
from sqlframe.base.functions import right as right
|
|
231
|
+
from sqlframe.base.functions import rlike as rlike
|
|
202
232
|
from sqlframe.base.functions import round as round
|
|
203
233
|
from sqlframe.base.functions import row_number as row_number
|
|
204
234
|
from sqlframe.base.functions import rpad as rpad
|
|
205
235
|
from sqlframe.base.functions import rtrim as rtrim
|
|
206
236
|
from sqlframe.base.functions import sec as sec
|
|
237
|
+
from sqlframe.base.functions import sha as sha
|
|
207
238
|
from sqlframe.base.functions import shiftLeft as shiftLeft
|
|
208
239
|
from sqlframe.base.functions import shiftleft as shiftleft
|
|
209
240
|
from sqlframe.base.functions import shiftRight as shiftRight
|
|
210
241
|
from sqlframe.base.functions import shiftright as shiftright
|
|
242
|
+
from sqlframe.base.functions import sign as sign
|
|
211
243
|
from sqlframe.base.functions import signum as signum
|
|
212
244
|
from sqlframe.base.functions import sin as sin
|
|
213
245
|
from sqlframe.base.functions import sinh as sinh
|
|
214
246
|
from sqlframe.base.functions import size as size
|
|
215
247
|
from sqlframe.base.functions import soundex as soundex
|
|
216
248
|
from sqlframe.base.functions import sqrt as sqrt
|
|
249
|
+
from sqlframe.base.functions import startswith as startswith
|
|
217
250
|
from sqlframe.base.functions import stddev as stddev
|
|
218
251
|
from sqlframe.base.functions import stddev_pop as stddev_pop
|
|
219
252
|
from sqlframe.base.functions import stddev_samp as stddev_samp
|
|
@@ -231,9 +264,12 @@ from sqlframe.base.functions import toRadians as toRadians
|
|
|
231
264
|
from sqlframe.base.functions import translate as translate
|
|
232
265
|
from sqlframe.base.functions import trim as trim
|
|
233
266
|
from sqlframe.base.functions import trunc as trunc
|
|
267
|
+
from sqlframe.base.functions import ucase as ucase
|
|
234
268
|
from sqlframe.base.functions import unbase64 as unbase64
|
|
235
269
|
from sqlframe.base.functions import unhex as unhex
|
|
270
|
+
from sqlframe.base.functions import unix_date as unix_date
|
|
236
271
|
from sqlframe.base.functions import upper as upper
|
|
272
|
+
from sqlframe.base.functions import user as user
|
|
237
273
|
from sqlframe.base.functions import var_pop as var_pop
|
|
238
274
|
from sqlframe.base.functions import var_samp as var_samp
|
|
239
275
|
from sqlframe.base.functions import variance as variance
|
sqlframe/duckdb/dataframe.py
CHANGED
|
@@ -9,14 +9,12 @@ from sqlframe.base.dataframe import (
|
|
|
9
9
|
_BaseDataFrameNaFunctions,
|
|
10
10
|
_BaseDataFrameStatFunctions,
|
|
11
11
|
)
|
|
12
|
-
from sqlframe.base.mixins.dataframe_mixins import
|
|
12
|
+
from sqlframe.base.mixins.dataframe_mixins import (
|
|
13
|
+
NoCachePersistSupportMixin,
|
|
14
|
+
TypedColumnsFromTempViewMixin,
|
|
15
|
+
)
|
|
13
16
|
from sqlframe.duckdb.group import DuckDBGroupedData
|
|
14
17
|
|
|
15
|
-
if sys.version_info >= (3, 11):
|
|
16
|
-
from typing import Self
|
|
17
|
-
else:
|
|
18
|
-
from typing_extensions import Self
|
|
19
|
-
|
|
20
18
|
if t.TYPE_CHECKING:
|
|
21
19
|
from sqlframe.duckdb.session import DuckDBSession # noqa
|
|
22
20
|
from sqlframe.duckdb.readwriter import DuckDBDataFrameWriter # noqa
|
|
@@ -35,7 +33,8 @@ class DuckDBDataFrameStatFunctions(_BaseDataFrameStatFunctions["DuckDBDataFrame"
|
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
class DuckDBDataFrame(
|
|
38
|
-
|
|
36
|
+
NoCachePersistSupportMixin,
|
|
37
|
+
TypedColumnsFromTempViewMixin,
|
|
39
38
|
_BaseDataFrame[
|
|
40
39
|
"DuckDBSession",
|
|
41
40
|
"DuckDBDataFrameWriter",
|
|
@@ -47,11 +46,3 @@ class DuckDBDataFrame(
|
|
|
47
46
|
_na = DuckDBDataFrameNaFunctions
|
|
48
47
|
_stat = DuckDBDataFrameStatFunctions
|
|
49
48
|
_group_data = DuckDBGroupedData
|
|
50
|
-
|
|
51
|
-
def cache(self) -> Self:
|
|
52
|
-
logger.warning("DuckDB does not support caching. Ignoring cache() call.")
|
|
53
|
-
return self
|
|
54
|
-
|
|
55
|
-
def persist(self) -> Self:
|
|
56
|
-
logger.warning("DuckDB does not support persist. Ignoring persist() call.")
|
|
57
|
-
return self
|
sqlframe/duckdb/functions.py
CHANGED
|
@@ -18,6 +18,7 @@ globals().update(
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
from sqlframe.base.function_alternatives import ( # noqa
|
|
21
|
+
any_value_always_ignore_nulls as any_value,
|
|
21
22
|
e_literal as e,
|
|
22
23
|
expm1_from_exp as expm1,
|
|
23
24
|
log1p_from_log as log1p,
|
|
@@ -44,4 +45,6 @@ from sqlframe.base.function_alternatives import ( # noqa
|
|
|
44
45
|
array_min_from_sort as array_min,
|
|
45
46
|
array_max_from_sort as array_max,
|
|
46
47
|
sequence_from_generate_series as sequence,
|
|
48
|
+
try_element_at_zero_based as try_element_at,
|
|
49
|
+
to_unix_timestamp_include_default_format as to_unix_timestamp,
|
|
47
50
|
)
|
sqlframe/duckdb/functions.pyi
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from sqlframe.base.function_alternatives import ( # noqa
|
|
2
|
+
any_value_always_ignore_nulls as any_value,
|
|
2
3
|
e_literal as e,
|
|
3
4
|
expm1_from_exp as expm1,
|
|
4
5
|
log1p_from_log as log1p,
|
|
@@ -25,6 +26,8 @@ from sqlframe.base.function_alternatives import ( # noqa
|
|
|
25
26
|
array_min_from_sort as array_min,
|
|
26
27
|
array_max_from_sort as array_max,
|
|
27
28
|
sequence_from_generate_series as sequence,
|
|
29
|
+
try_element_at_zero_based as try_element_at,
|
|
30
|
+
to_unix_timestamp_include_default_format as to_unix_timestamp,
|
|
28
31
|
)
|
|
29
32
|
from sqlframe.base.functions import (
|
|
30
33
|
abs as abs,
|
|
@@ -50,9 +53,13 @@ from sqlframe.base.functions import (
|
|
|
50
53
|
bit_length as bit_length,
|
|
51
54
|
bitwiseNOT as bitwiseNOT,
|
|
52
55
|
bitwise_not as bitwise_not,
|
|
56
|
+
bool_and as bool_and,
|
|
57
|
+
bool_or as bool_or,
|
|
58
|
+
call_function as call_function,
|
|
53
59
|
cbrt as cbrt,
|
|
54
60
|
ceil as ceil,
|
|
55
61
|
ceiling as ceiling,
|
|
62
|
+
char as char,
|
|
56
63
|
coalesce as coalesce,
|
|
57
64
|
col as col,
|
|
58
65
|
collect_list as collect_list,
|
|
@@ -64,17 +71,21 @@ from sqlframe.base.functions import (
|
|
|
64
71
|
count as count,
|
|
65
72
|
countDistinct as countDistinct,
|
|
66
73
|
count_distinct as count_distinct,
|
|
74
|
+
count_if as count_if,
|
|
67
75
|
covar_pop as covar_pop,
|
|
68
76
|
covar_samp as covar_samp,
|
|
69
77
|
create_map as create_map,
|
|
70
78
|
cume_dist as cume_dist,
|
|
71
79
|
current_date as current_date,
|
|
72
80
|
current_timestamp as current_timestamp,
|
|
81
|
+
current_user as current_user,
|
|
73
82
|
date_add as date_add,
|
|
74
83
|
date_diff as date_diff,
|
|
75
84
|
date_format as date_format,
|
|
76
85
|
date_sub as date_sub,
|
|
77
86
|
date_trunc as date_trunc,
|
|
87
|
+
dateadd as dateadd,
|
|
88
|
+
datediff as datediff,
|
|
78
89
|
dayofmonth as dayofmonth,
|
|
79
90
|
dayofweek as dayofweek,
|
|
80
91
|
dayofyear as dayofyear,
|
|
@@ -87,6 +98,7 @@ from sqlframe.base.functions import (
|
|
|
87
98
|
exp as exp,
|
|
88
99
|
explode as explode,
|
|
89
100
|
expr as expr,
|
|
101
|
+
extract as extract,
|
|
90
102
|
flatten as flatten,
|
|
91
103
|
floor as floor,
|
|
92
104
|
from_unixtime as from_unixtime,
|
|
@@ -96,16 +108,20 @@ from sqlframe.base.functions import (
|
|
|
96
108
|
hash as hash,
|
|
97
109
|
hex as hex,
|
|
98
110
|
hour as hour,
|
|
111
|
+
ifnull as ifnull,
|
|
99
112
|
input_file_name as input_file_name,
|
|
100
113
|
instr as instr,
|
|
101
114
|
isnan as isnan,
|
|
102
115
|
lag as lag,
|
|
103
116
|
last as last,
|
|
117
|
+
lcase as lcase,
|
|
104
118
|
lead as lead,
|
|
105
119
|
least as least,
|
|
120
|
+
left as left,
|
|
106
121
|
length as length,
|
|
107
122
|
levenshtein as levenshtein,
|
|
108
123
|
lit as lit,
|
|
124
|
+
ln as ln,
|
|
109
125
|
locate as locate,
|
|
110
126
|
log as log,
|
|
111
127
|
log10 as log10,
|
|
@@ -124,19 +140,27 @@ from sqlframe.base.functions import (
|
|
|
124
140
|
minute as minute,
|
|
125
141
|
month as month,
|
|
126
142
|
months_between as months_between,
|
|
143
|
+
now as now,
|
|
127
144
|
nth_value as nth_value,
|
|
128
145
|
ntile as ntile,
|
|
129
146
|
nullif as nullif,
|
|
147
|
+
nvl as nvl,
|
|
148
|
+
nvl2 as nvl2,
|
|
130
149
|
percent_rank as percent_rank,
|
|
131
150
|
percentile as percentile,
|
|
151
|
+
position as position,
|
|
132
152
|
pow as pow,
|
|
153
|
+
power as power,
|
|
133
154
|
quarter as quarter,
|
|
134
155
|
radians as radians,
|
|
135
156
|
rank as rank,
|
|
136
157
|
regexp_extract as regexp_extract,
|
|
158
|
+
regexp_like as regexp_like,
|
|
137
159
|
regexp_replace as regexp_replace,
|
|
138
160
|
repeat as repeat,
|
|
139
161
|
reverse as reverse,
|
|
162
|
+
right as right,
|
|
163
|
+
rlike as rlike,
|
|
140
164
|
round as round,
|
|
141
165
|
row_number as row_number,
|
|
142
166
|
rpad as rpad,
|
|
@@ -146,6 +170,7 @@ from sqlframe.base.functions import (
|
|
|
146
170
|
shiftRight as shiftRight,
|
|
147
171
|
shiftleft as shiftleft,
|
|
148
172
|
shiftright as shiftright,
|
|
173
|
+
sign as sign,
|
|
149
174
|
signum as signum,
|
|
150
175
|
sin as sin,
|
|
151
176
|
size as size,
|
|
@@ -153,6 +178,7 @@ from sqlframe.base.functions import (
|
|
|
153
178
|
sort_array as sort_array,
|
|
154
179
|
soundex as soundex,
|
|
155
180
|
sqrt as sqrt,
|
|
181
|
+
startswith as startswith,
|
|
156
182
|
stddev as stddev,
|
|
157
183
|
stddev_pop as stddev_pop,
|
|
158
184
|
stddev_samp as stddev_samp,
|
|
@@ -171,10 +197,13 @@ from sqlframe.base.functions import (
|
|
|
171
197
|
trim as trim,
|
|
172
198
|
trunc as trunc,
|
|
173
199
|
typeof as typeof,
|
|
200
|
+
ucase as ucase,
|
|
174
201
|
unbase64 as unbase64,
|
|
175
202
|
unhex as unhex,
|
|
203
|
+
unix_date as unix_date,
|
|
176
204
|
unix_timestamp as unix_timestamp,
|
|
177
205
|
upper as upper,
|
|
206
|
+
user as user,
|
|
178
207
|
var_pop as var_pop,
|
|
179
208
|
var_samp as var_samp,
|
|
180
209
|
variance as variance,
|