sqlframe 3.8.2__py3-none-any.whl → 3.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/__init__.py +1 -0
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +2 -2
- sqlframe/databricks/__init__.py +32 -0
- sqlframe/databricks/catalog.py +302 -0
- sqlframe/databricks/column.py +1 -0
- sqlframe/databricks/dataframe.py +69 -0
- sqlframe/databricks/functions.py +22 -0
- sqlframe/databricks/functions.pyi +416 -0
- sqlframe/databricks/group.py +14 -0
- sqlframe/databricks/readwriter.py +96 -0
- sqlframe/databricks/session.py +59 -0
- sqlframe/databricks/types.py +1 -0
- sqlframe/databricks/udf.py +11 -0
- sqlframe/databricks/window.py +1 -0
- {sqlframe-3.8.2.dist-info → sqlframe-3.9.0.dist-info}/METADATA +16 -5
- {sqlframe-3.8.2.dist-info → sqlframe-3.9.0.dist-info}/RECORD +20 -8
- {sqlframe-3.8.2.dist-info → sqlframe-3.9.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.8.2.dist-info → sqlframe-3.9.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.8.2.dist-info → sqlframe-3.9.0.dist-info}/top_level.txt +0 -0
sqlframe/__init__.py
CHANGED
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
|
@@ -629,10 +629,10 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
629
629
|
# We will drop the "view" if it exists before running the cache table
|
|
630
630
|
output_expressions.append(exp.Drop(this=cache_table, exists=True, kind="VIEW"))
|
|
631
631
|
elif expression_type == exp.Create:
|
|
632
|
-
expression = df.output_expression_container.copy()
|
|
632
|
+
expression = df.output_expression_container.copy() # type: ignore
|
|
633
633
|
expression.set("expression", select_expression)
|
|
634
634
|
elif expression_type == exp.Insert:
|
|
635
|
-
expression = df.output_expression_container.copy()
|
|
635
|
+
expression = df.output_expression_container.copy() # type: ignore
|
|
636
636
|
select_without_ctes = select_expression.copy()
|
|
637
637
|
select_without_ctes.set("with", None)
|
|
638
638
|
expression.set("expression", select_without_ctes)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from sqlframe.databricks.catalog import DatabricksCatalog
|
|
2
|
+
from sqlframe.databricks.column import Column
|
|
3
|
+
from sqlframe.databricks.dataframe import (
|
|
4
|
+
DatabricksDataFrame,
|
|
5
|
+
DatabricksDataFrameNaFunctions,
|
|
6
|
+
DatabricksDataFrameStatFunctions,
|
|
7
|
+
)
|
|
8
|
+
from sqlframe.databricks.group import DatabricksGroupedData
|
|
9
|
+
from sqlframe.databricks.readwriter import (
|
|
10
|
+
DatabricksDataFrameReader,
|
|
11
|
+
DatabricksDataFrameWriter,
|
|
12
|
+
)
|
|
13
|
+
from sqlframe.databricks.session import DatabricksSession
|
|
14
|
+
from sqlframe.databricks.types import Row
|
|
15
|
+
from sqlframe.databricks.udf import DatabricksUDFRegistration
|
|
16
|
+
from sqlframe.databricks.window import Window, WindowSpec
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"Column",
|
|
20
|
+
"Row",
|
|
21
|
+
"DatabricksCatalog",
|
|
22
|
+
"DatabricksDataFrame",
|
|
23
|
+
"DatabricksDataFrameNaFunctions",
|
|
24
|
+
"DatabricksGroupedData",
|
|
25
|
+
"DatabricksDataFrameReader",
|
|
26
|
+
"DatabricksDataFrameWriter",
|
|
27
|
+
"DatabricksSession",
|
|
28
|
+
"DatabricksDataFrameStatFunctions",
|
|
29
|
+
"DatabricksUDFRegistration",
|
|
30
|
+
"Window",
|
|
31
|
+
"WindowSpec",
|
|
32
|
+
]
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import fnmatch
|
|
6
|
+
import json
|
|
7
|
+
import typing as t
|
|
8
|
+
|
|
9
|
+
from sqlglot import exp, parse_one
|
|
10
|
+
|
|
11
|
+
from sqlframe.base.catalog import Column, Function, _BaseCatalog
|
|
12
|
+
from sqlframe.base.mixins.catalog_mixins import (
|
|
13
|
+
GetCurrentCatalogFromFunctionMixin,
|
|
14
|
+
GetCurrentDatabaseFromFunctionMixin,
|
|
15
|
+
ListCatalogsFromInfoSchemaMixin,
|
|
16
|
+
ListDatabasesFromInfoSchemaMixin,
|
|
17
|
+
ListTablesFromInfoSchemaMixin,
|
|
18
|
+
SetCurrentCatalogFromUseMixin,
|
|
19
|
+
SetCurrentDatabaseFromUseMixin,
|
|
20
|
+
)
|
|
21
|
+
from sqlframe.base.util import normalize_string, schema_, to_schema
|
|
22
|
+
|
|
23
|
+
if t.TYPE_CHECKING:
|
|
24
|
+
from sqlframe.databricks.session import DatabricksSession # noqa
|
|
25
|
+
from sqlframe.databricks.dataframe import DatabricksDataFrame # noqa
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DatabricksCatalog(
|
|
29
|
+
SetCurrentCatalogFromUseMixin["DatabricksSession", "DatabricksDataFrame"],
|
|
30
|
+
GetCurrentCatalogFromFunctionMixin["DatabricksSession", "DatabricksDataFrame"],
|
|
31
|
+
GetCurrentDatabaseFromFunctionMixin["DatabricksSession", "DatabricksDataFrame"],
|
|
32
|
+
ListDatabasesFromInfoSchemaMixin["DatabricksSession", "DatabricksDataFrame"],
|
|
33
|
+
ListCatalogsFromInfoSchemaMixin["DatabricksSession", "DatabricksDataFrame"],
|
|
34
|
+
SetCurrentDatabaseFromUseMixin["DatabricksSession", "DatabricksDataFrame"],
|
|
35
|
+
ListTablesFromInfoSchemaMixin["DatabricksSession", "DatabricksDataFrame"],
|
|
36
|
+
_BaseCatalog["DatabricksSession", "DatabricksDataFrame"],
|
|
37
|
+
):
|
|
38
|
+
CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.func("current_catalog")
|
|
39
|
+
UPPERCASE_INFO_SCHEMA = True
|
|
40
|
+
|
|
41
|
+
def listFunctions(
|
|
42
|
+
self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
|
|
43
|
+
) -> t.List[Function]:
|
|
44
|
+
"""
|
|
45
|
+
Returns a t.List of functions registered in the specified database.
|
|
46
|
+
|
|
47
|
+
.. versionadded:: 3.4.0
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
dbName : str
|
|
52
|
+
name of the database to t.List the functions.
|
|
53
|
+
``dbName`` can be qualified with catalog name.
|
|
54
|
+
pattern : str
|
|
55
|
+
The pattern that the function name needs to match.
|
|
56
|
+
|
|
57
|
+
.. versionchanged: 3.5.0
|
|
58
|
+
Adds ``pattern`` argument.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
t.List
|
|
63
|
+
A t.List of :class:`Function`.
|
|
64
|
+
|
|
65
|
+
Notes
|
|
66
|
+
-----
|
|
67
|
+
If no database is specified, the current database and catalog
|
|
68
|
+
are used. This API includes all temporary functions.
|
|
69
|
+
|
|
70
|
+
Examples
|
|
71
|
+
--------
|
|
72
|
+
>>> spark.catalog.t.listFunctions()
|
|
73
|
+
[Function(name=...
|
|
74
|
+
|
|
75
|
+
>>> spark.catalog.t.listFunctions(pattern="to_*")
|
|
76
|
+
[Function(name=...
|
|
77
|
+
|
|
78
|
+
>>> spark.catalog.t.listFunctions(pattern="*not_existing_func*")
|
|
79
|
+
[]
|
|
80
|
+
"""
|
|
81
|
+
if dbName is None:
|
|
82
|
+
schema = schema_(
|
|
83
|
+
db=exp.parse_identifier(
|
|
84
|
+
self.currentDatabase(), dialect=self.session.output_dialect
|
|
85
|
+
),
|
|
86
|
+
catalog=exp.parse_identifier(
|
|
87
|
+
self.currentCatalog(), dialect=self.session.output_dialect
|
|
88
|
+
),
|
|
89
|
+
)
|
|
90
|
+
else:
|
|
91
|
+
dbName = normalize_string(dbName, from_dialect="input", is_schema=True)
|
|
92
|
+
schema = to_schema(dbName, dialect=self.session.input_dialect)
|
|
93
|
+
if not schema.catalog:
|
|
94
|
+
schema.set(
|
|
95
|
+
"catalog",
|
|
96
|
+
exp.parse_identifier(
|
|
97
|
+
self.currentCatalog(), dialect=self.session.output_dialect
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
current_catalog = self.currentCatalog()
|
|
101
|
+
self.session._collect(f"USE CATALOG {schema.catalog}")
|
|
102
|
+
|
|
103
|
+
query = parse_one(
|
|
104
|
+
f"""SHOW USER FUNCTIONS IN {schema.sql(dialect=self.session.input_dialect)}""",
|
|
105
|
+
dialect=self.session.input_dialect,
|
|
106
|
+
)
|
|
107
|
+
functions = [
|
|
108
|
+
Function(
|
|
109
|
+
name=normalize_string(x["function"], from_dialect="execution", to_dialect="output"),
|
|
110
|
+
catalog=normalize_string(
|
|
111
|
+
schema.catalog, from_dialect="execution", to_dialect="output"
|
|
112
|
+
),
|
|
113
|
+
namespace=[
|
|
114
|
+
normalize_string(schema.db, from_dialect="execution", to_dialect="output")
|
|
115
|
+
],
|
|
116
|
+
description=None,
|
|
117
|
+
className="",
|
|
118
|
+
isTemporary=False,
|
|
119
|
+
)
|
|
120
|
+
for x in self.session._collect(query)
|
|
121
|
+
]
|
|
122
|
+
if pattern:
|
|
123
|
+
normalized_pattern = normalize_string(
|
|
124
|
+
pattern, from_dialect="input", to_dialect="output", is_pattern=True
|
|
125
|
+
)
|
|
126
|
+
functions = [x for x in functions if fnmatch.fnmatch(x.name, normalized_pattern)]
|
|
127
|
+
self.session._collect(f"USE CATALOG {current_catalog}")
|
|
128
|
+
return functions
|
|
129
|
+
|
|
130
|
+
def get_columns(self, table: exp.Table | str) -> t.Dict[str, exp.DataType]:
|
|
131
|
+
table = (
|
|
132
|
+
normalize_string(table, from_dialect="input", is_table=True)
|
|
133
|
+
if isinstance(table, str)
|
|
134
|
+
else table
|
|
135
|
+
)
|
|
136
|
+
table = exp.to_table(table, dialect=self.session.input_dialect)
|
|
137
|
+
if not table.catalog:
|
|
138
|
+
table.set(
|
|
139
|
+
"catalog",
|
|
140
|
+
exp.parse_identifier(
|
|
141
|
+
normalize_string(
|
|
142
|
+
self.currentCatalog(), from_dialect="output", to_dialect="input"
|
|
143
|
+
),
|
|
144
|
+
dialect=self.session.input_dialect,
|
|
145
|
+
),
|
|
146
|
+
)
|
|
147
|
+
if not table.db:
|
|
148
|
+
table.set(
|
|
149
|
+
"db",
|
|
150
|
+
exp.parse_identifier(
|
|
151
|
+
normalize_string(
|
|
152
|
+
self.currentDatabase(), from_dialect="output", to_dialect="input"
|
|
153
|
+
),
|
|
154
|
+
dialect=self.session.input_dialect,
|
|
155
|
+
),
|
|
156
|
+
)
|
|
157
|
+
sql = f"DESCRIBE TABLE {table.sql(dialect=self.session.input_dialect)}"
|
|
158
|
+
results = self.session._collect(sql)
|
|
159
|
+
return {
|
|
160
|
+
normalize_string(
|
|
161
|
+
row["col_name"],
|
|
162
|
+
from_dialect="execution",
|
|
163
|
+
to_dialect="output",
|
|
164
|
+
is_column=True,
|
|
165
|
+
): exp.DataType.build(
|
|
166
|
+
normalize_string(
|
|
167
|
+
row["data_type"],
|
|
168
|
+
from_dialect="execution",
|
|
169
|
+
to_dialect="output",
|
|
170
|
+
is_datatype=True,
|
|
171
|
+
),
|
|
172
|
+
dialect=self.session.output_dialect,
|
|
173
|
+
udt=True,
|
|
174
|
+
)
|
|
175
|
+
for row in results
|
|
176
|
+
if row["data_type"] != "" and row["data_type"] != "data_type"
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
def listColumns(
|
|
180
|
+
self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
|
|
181
|
+
) -> t.List[Column]:
|
|
182
|
+
"""Returns a t.List of columns for the given table/view in the specified database.
|
|
183
|
+
|
|
184
|
+
.. versionadded:: 2.0.0
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
tableName : str
|
|
189
|
+
name of the table to t.List columns.
|
|
190
|
+
|
|
191
|
+
.. versionchanged:: 3.4.0
|
|
192
|
+
Allow ``tableName`` to be qualified with catalog name when ``dbName`` is None.
|
|
193
|
+
|
|
194
|
+
dbName : str, t.Optional
|
|
195
|
+
name of the database to find the table to t.List columns.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
t.List
|
|
200
|
+
A t.List of :class:`Column`.
|
|
201
|
+
|
|
202
|
+
Notes
|
|
203
|
+
-----
|
|
204
|
+
The order of arguments here is different from that of its JVM counterpart
|
|
205
|
+
because Python does not support method overloading.
|
|
206
|
+
|
|
207
|
+
If no database is specified, the current database and catalog
|
|
208
|
+
are used. This API includes all temporary views.
|
|
209
|
+
|
|
210
|
+
Examples
|
|
211
|
+
--------
|
|
212
|
+
>>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
|
|
213
|
+
>>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet")
|
|
214
|
+
>>> spark.catalog.t.listColumns("tblA")
|
|
215
|
+
[Column(name='name', description=None, dataType='string', nullable=True, ...
|
|
216
|
+
>>> _ = spark.sql("DROP TABLE tblA")
|
|
217
|
+
"""
|
|
218
|
+
tableName = normalize_string(
|
|
219
|
+
tableName, from_dialect=self.session.input_dialect, is_table=True
|
|
220
|
+
)
|
|
221
|
+
dbName = (
|
|
222
|
+
normalize_string(dbName, from_dialect=self.session.input_dialect, is_schema=True)
|
|
223
|
+
if dbName
|
|
224
|
+
else None
|
|
225
|
+
)
|
|
226
|
+
if df := self.session.temp_views.get(tableName):
|
|
227
|
+
return [
|
|
228
|
+
Column(
|
|
229
|
+
name=x,
|
|
230
|
+
description=None,
|
|
231
|
+
dataType="",
|
|
232
|
+
nullable=True,
|
|
233
|
+
isPartition=False,
|
|
234
|
+
isBucket=False,
|
|
235
|
+
)
|
|
236
|
+
for x in df.columns
|
|
237
|
+
]
|
|
238
|
+
|
|
239
|
+
table = exp.to_table(tableName, dialect=self.session.input_dialect)
|
|
240
|
+
schema = to_schema(dbName, dialect=self.session.input_dialect) if dbName else None
|
|
241
|
+
if not table.db:
|
|
242
|
+
if schema and schema.db:
|
|
243
|
+
table.set("db", schema.args["db"])
|
|
244
|
+
else:
|
|
245
|
+
current_database = normalize_string(
|
|
246
|
+
self.currentDatabase(),
|
|
247
|
+
from_dialect=self.session.output_dialect,
|
|
248
|
+
to_dialect=self.session.input_dialect,
|
|
249
|
+
)
|
|
250
|
+
table.set(
|
|
251
|
+
"db",
|
|
252
|
+
exp.parse_identifier(current_database, dialect=self.session.input_dialect),
|
|
253
|
+
)
|
|
254
|
+
if not table.catalog:
|
|
255
|
+
if schema and schema.catalog:
|
|
256
|
+
table.set("catalog", schema.args["catalog"])
|
|
257
|
+
else:
|
|
258
|
+
current_catalog = normalize_string(
|
|
259
|
+
self.currentCatalog(),
|
|
260
|
+
from_dialect=self.session.output_dialect,
|
|
261
|
+
to_dialect=self.session.input_dialect,
|
|
262
|
+
)
|
|
263
|
+
table.set(
|
|
264
|
+
"catalog",
|
|
265
|
+
exp.parse_identifier(current_catalog, dialect=self.session.input_dialect),
|
|
266
|
+
)
|
|
267
|
+
sql = f"DESCRIBE TABLE {'.'.join(part.name for part in table.parts)}"
|
|
268
|
+
results = self.session._collect(sql)
|
|
269
|
+
|
|
270
|
+
is_partition = False
|
|
271
|
+
partitions = set([])
|
|
272
|
+
for row in results:
|
|
273
|
+
if row["col_name"] == "# Partition Information":
|
|
274
|
+
is_partition = True
|
|
275
|
+
if is_partition and row["data_type"] != "" and row["data_type"] != "data_type":
|
|
276
|
+
partitions.add(row["col_name"])
|
|
277
|
+
|
|
278
|
+
columns = []
|
|
279
|
+
for row in results:
|
|
280
|
+
if row["data_type"] == "" or row["data_type"] == "data_type":
|
|
281
|
+
break
|
|
282
|
+
columns.append(
|
|
283
|
+
Column(
|
|
284
|
+
name=normalize_string(
|
|
285
|
+
row["col_name"],
|
|
286
|
+
from_dialect=self.session.execution_dialect,
|
|
287
|
+
to_dialect=self.session.output_dialect,
|
|
288
|
+
),
|
|
289
|
+
description=row["comment"],
|
|
290
|
+
dataType=normalize_string(
|
|
291
|
+
row["data_type"],
|
|
292
|
+
from_dialect=self.session.execution_dialect,
|
|
293
|
+
to_dialect=self.session.output_dialect,
|
|
294
|
+
is_datatype=True,
|
|
295
|
+
),
|
|
296
|
+
nullable=True,
|
|
297
|
+
isPartition=True if row["col_name"] in partitions else False,
|
|
298
|
+
isBucket=False,
|
|
299
|
+
)
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
return columns
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from sqlframe.base.column import Column
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
import typing as t
|
|
6
|
+
|
|
7
|
+
from sqlframe.base.catalog import Column as CatalogColumn
|
|
8
|
+
from sqlframe.base.dataframe import (
|
|
9
|
+
_BaseDataFrame,
|
|
10
|
+
_BaseDataFrameNaFunctions,
|
|
11
|
+
_BaseDataFrameStatFunctions,
|
|
12
|
+
)
|
|
13
|
+
from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
|
|
14
|
+
from sqlframe.base.util import normalize_string
|
|
15
|
+
from sqlframe.databricks.group import DatabricksGroupedData
|
|
16
|
+
|
|
17
|
+
if t.TYPE_CHECKING:
|
|
18
|
+
from sqlframe.databricks.readwriter import DatabricksDataFrameWriter
|
|
19
|
+
from sqlframe.databricks.session import DatabricksSession
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DatabricksDataFrameNaFunctions(_BaseDataFrameNaFunctions["DatabricksDataFrame"]):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DatabricksDataFrameStatFunctions(_BaseDataFrameStatFunctions["DatabricksDataFrame"]):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DatabricksDataFrame(
|
|
34
|
+
NoCachePersistSupportMixin,
|
|
35
|
+
_BaseDataFrame[
|
|
36
|
+
"DatabricksSession",
|
|
37
|
+
"DatabricksDataFrameWriter",
|
|
38
|
+
"DatabricksDataFrameNaFunctions",
|
|
39
|
+
"DatabricksDataFrameStatFunctions",
|
|
40
|
+
"DatabricksGroupedData",
|
|
41
|
+
],
|
|
42
|
+
):
|
|
43
|
+
_na = DatabricksDataFrameNaFunctions
|
|
44
|
+
_stat = DatabricksDataFrameStatFunctions
|
|
45
|
+
_group_data = DatabricksGroupedData
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def _typed_columns(self) -> t.List[CatalogColumn]:
|
|
49
|
+
sql = self.session._to_sql(self.expression)
|
|
50
|
+
columns = []
|
|
51
|
+
for row in self.session._collect(r"DESCRIBE QUERY ({sql})".format(sql=sql)):
|
|
52
|
+
columns.append(
|
|
53
|
+
CatalogColumn(
|
|
54
|
+
name=normalize_string(
|
|
55
|
+
row.col_name, from_dialect="execution", to_dialect="output"
|
|
56
|
+
),
|
|
57
|
+
dataType=normalize_string(
|
|
58
|
+
row.data_type,
|
|
59
|
+
from_dialect="execution",
|
|
60
|
+
to_dialect="output",
|
|
61
|
+
is_datatype=True,
|
|
62
|
+
),
|
|
63
|
+
nullable=True,
|
|
64
|
+
description=row.comment,
|
|
65
|
+
isPartition=False,
|
|
66
|
+
isBucket=False,
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
return columns
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
import sqlframe.base.functions # noqa
|
|
5
|
+
|
|
6
|
+
module = sys.modules["sqlframe.base.functions"]
|
|
7
|
+
globals().update(
|
|
8
|
+
{
|
|
9
|
+
name: func
|
|
10
|
+
for name, func in inspect.getmembers(module, inspect.isfunction)
|
|
11
|
+
if hasattr(func, "unsupported_engines") and "databricks" not in func.unsupported_engines
|
|
12
|
+
}
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from sqlframe.base.function_alternatives import ( # noqa
|
|
17
|
+
percentile_without_disc as percentile,
|
|
18
|
+
add_months_by_multiplication as add_months,
|
|
19
|
+
arrays_overlap_renamed as arrays_overlap,
|
|
20
|
+
_is_string_using_typeof_string_lcase as _is_string,
|
|
21
|
+
try_element_at_zero_based as try_element_at,
|
|
22
|
+
)
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
from sqlframe.base.function_alternatives import ( # noqa
|
|
2
|
+
percentile_without_disc as percentile,
|
|
3
|
+
add_months_by_multiplication as add_months,
|
|
4
|
+
arrays_overlap_renamed as arrays_overlap,
|
|
5
|
+
try_element_at_zero_based as try_element_at,
|
|
6
|
+
)
|
|
7
|
+
from sqlframe.base.functions import (
|
|
8
|
+
abs as abs,
|
|
9
|
+
acos as acos,
|
|
10
|
+
acosh as acosh,
|
|
11
|
+
aes_decrypt as aes_decrypt,
|
|
12
|
+
aes_encrypt as aes_encrypt,
|
|
13
|
+
aggregate as aggregate,
|
|
14
|
+
any_value as any_value,
|
|
15
|
+
approxCountDistinct as approxCountDistinct,
|
|
16
|
+
approx_count_distinct as approx_count_distinct,
|
|
17
|
+
approx_percentile as approx_percentile,
|
|
18
|
+
array as array,
|
|
19
|
+
array_agg as array_agg,
|
|
20
|
+
array_append as array_append,
|
|
21
|
+
array_compact as array_compact,
|
|
22
|
+
array_contains as array_contains,
|
|
23
|
+
array_distinct as array_distinct,
|
|
24
|
+
array_except as array_except,
|
|
25
|
+
array_insert as array_insert,
|
|
26
|
+
array_intersect as array_intersect,
|
|
27
|
+
array_join as array_join,
|
|
28
|
+
array_max as array_max,
|
|
29
|
+
array_min as array_min,
|
|
30
|
+
array_position as array_position,
|
|
31
|
+
array_prepend as array_prepend,
|
|
32
|
+
array_remove as array_remove,
|
|
33
|
+
array_repeat as array_repeat,
|
|
34
|
+
array_size as array_size,
|
|
35
|
+
array_sort as array_sort,
|
|
36
|
+
array_union as array_union,
|
|
37
|
+
arrays_zip as arrays_zip,
|
|
38
|
+
asc as asc,
|
|
39
|
+
asc_nulls_first as asc_nulls_first,
|
|
40
|
+
asc_nulls_last as asc_nulls_last,
|
|
41
|
+
ascii as ascii,
|
|
42
|
+
asin as asin,
|
|
43
|
+
asinh as asinh,
|
|
44
|
+
assert_true as assert_true,
|
|
45
|
+
atan as atan,
|
|
46
|
+
atan2 as atan2,
|
|
47
|
+
atanh as atanh,
|
|
48
|
+
avg as avg,
|
|
49
|
+
base64 as base64,
|
|
50
|
+
bin as bin,
|
|
51
|
+
bit_and as bit_and,
|
|
52
|
+
bit_count as bit_count,
|
|
53
|
+
bit_get as bit_get,
|
|
54
|
+
bit_length as bit_length,
|
|
55
|
+
bit_or as bit_or,
|
|
56
|
+
bit_xor as bit_xor,
|
|
57
|
+
bitmap_bit_position as bitmap_bit_position,
|
|
58
|
+
bitmap_bucket_number as bitmap_bucket_number,
|
|
59
|
+
bitmap_construct_agg as bitmap_construct_agg,
|
|
60
|
+
bitmap_count as bitmap_count,
|
|
61
|
+
bitmap_or_agg as bitmap_or_agg,
|
|
62
|
+
bitwiseNOT as bitwiseNOT,
|
|
63
|
+
bitwise_not as bitwise_not,
|
|
64
|
+
bool_and as bool_and,
|
|
65
|
+
bool_or as bool_or,
|
|
66
|
+
broadcast as broadcast,
|
|
67
|
+
bround as bround,
|
|
68
|
+
btrim as btrim,
|
|
69
|
+
bucket as bucket,
|
|
70
|
+
call_function as call_function,
|
|
71
|
+
cardinality as cardinality,
|
|
72
|
+
cbrt as cbrt,
|
|
73
|
+
ceil as ceil,
|
|
74
|
+
ceiling as ceiling,
|
|
75
|
+
char as char,
|
|
76
|
+
char_length as char_length,
|
|
77
|
+
character_length as character_length,
|
|
78
|
+
coalesce as coalesce,
|
|
79
|
+
col as col,
|
|
80
|
+
collect_list as collect_list,
|
|
81
|
+
collect_set as collect_set,
|
|
82
|
+
concat as concat,
|
|
83
|
+
concat_ws as concat_ws,
|
|
84
|
+
contains as contains,
|
|
85
|
+
conv as conv,
|
|
86
|
+
convert_timezone as convert_timezone,
|
|
87
|
+
corr as corr,
|
|
88
|
+
cos as cos,
|
|
89
|
+
cosh as cosh,
|
|
90
|
+
cot as cot,
|
|
91
|
+
count as count,
|
|
92
|
+
countDistinct as countDistinct,
|
|
93
|
+
count_distinct as count_distinct,
|
|
94
|
+
count_if as count_if,
|
|
95
|
+
count_min_sketch as count_min_sketch,
|
|
96
|
+
covar_pop as covar_pop,
|
|
97
|
+
covar_samp as covar_samp,
|
|
98
|
+
crc32 as crc32,
|
|
99
|
+
create_map as create_map,
|
|
100
|
+
csc as csc,
|
|
101
|
+
cume_dist as cume_dist,
|
|
102
|
+
curdate as curdate,
|
|
103
|
+
current_catalog as current_catalog,
|
|
104
|
+
current_database as current_database,
|
|
105
|
+
current_date as current_date,
|
|
106
|
+
current_schema as current_schema,
|
|
107
|
+
current_timestamp as current_timestamp,
|
|
108
|
+
current_timezone as current_timezone,
|
|
109
|
+
current_user as current_user,
|
|
110
|
+
date_add as date_add,
|
|
111
|
+
date_diff as date_diff,
|
|
112
|
+
date_format as date_format,
|
|
113
|
+
date_from_unix_date as date_from_unix_date,
|
|
114
|
+
date_part as date_part,
|
|
115
|
+
date_sub as date_sub,
|
|
116
|
+
date_trunc as date_trunc,
|
|
117
|
+
dateadd as dateadd,
|
|
118
|
+
datediff as datediff,
|
|
119
|
+
datepart as datepart,
|
|
120
|
+
day as day,
|
|
121
|
+
dayofmonth as dayofmonth,
|
|
122
|
+
dayofweek as dayofweek,
|
|
123
|
+
dayofyear as dayofyear,
|
|
124
|
+
days as days,
|
|
125
|
+
decode as decode,
|
|
126
|
+
degrees as degrees,
|
|
127
|
+
dense_rank as dense_rank,
|
|
128
|
+
desc as desc,
|
|
129
|
+
desc_nulls_first as desc_nulls_first,
|
|
130
|
+
desc_nulls_last as desc_nulls_last,
|
|
131
|
+
e as e,
|
|
132
|
+
element_at as element_at,
|
|
133
|
+
elt as elt,
|
|
134
|
+
encode as encode,
|
|
135
|
+
endswith as endswith,
|
|
136
|
+
equal_null as equal_null,
|
|
137
|
+
every as every,
|
|
138
|
+
exists as exists,
|
|
139
|
+
exp as exp,
|
|
140
|
+
explode as explode,
|
|
141
|
+
explode_outer as explode_outer,
|
|
142
|
+
expm1 as expm1,
|
|
143
|
+
expr as expr,
|
|
144
|
+
extract as extract,
|
|
145
|
+
factorial as factorial,
|
|
146
|
+
filter as filter,
|
|
147
|
+
find_in_set as find_in_set,
|
|
148
|
+
first as first,
|
|
149
|
+
first_value as first_value,
|
|
150
|
+
flatten as flatten,
|
|
151
|
+
floor as floor,
|
|
152
|
+
forall as forall,
|
|
153
|
+
format_number as format_number,
|
|
154
|
+
format_string as format_string,
|
|
155
|
+
from_csv as from_csv,
|
|
156
|
+
from_json as from_json,
|
|
157
|
+
from_unixtime as from_unixtime,
|
|
158
|
+
from_utc_timestamp as from_utc_timestamp,
|
|
159
|
+
get as get,
|
|
160
|
+
get_active_spark_context as get_active_spark_context,
|
|
161
|
+
get_json_object as get_json_object,
|
|
162
|
+
getbit as getbit,
|
|
163
|
+
greatest as greatest,
|
|
164
|
+
grouping as grouping,
|
|
165
|
+
grouping_id as grouping_id,
|
|
166
|
+
hash as hash,
|
|
167
|
+
hex as hex,
|
|
168
|
+
histogram_numeric as histogram_numeric,
|
|
169
|
+
hll_sketch_agg as hll_sketch_agg,
|
|
170
|
+
hll_sketch_estimate as hll_sketch_estimate,
|
|
171
|
+
hll_union as hll_union,
|
|
172
|
+
hll_union_agg as hll_union_agg,
|
|
173
|
+
hour as hour,
|
|
174
|
+
hours as hours,
|
|
175
|
+
hypot as hypot,
|
|
176
|
+
ifnull as ifnull,
|
|
177
|
+
ilike as ilike,
|
|
178
|
+
initcap as initcap,
|
|
179
|
+
inline as inline,
|
|
180
|
+
inline_outer as inline_outer,
|
|
181
|
+
input_file_name as input_file_name,
|
|
182
|
+
instr as instr,
|
|
183
|
+
isnan as isnan,
|
|
184
|
+
isnotnull as isnotnull,
|
|
185
|
+
isnull as isnull,
|
|
186
|
+
java_method as java_method,
|
|
187
|
+
json_array_length as json_array_length,
|
|
188
|
+
json_object_keys as json_object_keys,
|
|
189
|
+
json_tuple as json_tuple,
|
|
190
|
+
kurtosis as kurtosis,
|
|
191
|
+
lag as lag,
|
|
192
|
+
last as last,
|
|
193
|
+
last_day as last_day,
|
|
194
|
+
last_value as last_value,
|
|
195
|
+
lcase as lcase,
|
|
196
|
+
lead as lead,
|
|
197
|
+
least as least,
|
|
198
|
+
left as left,
|
|
199
|
+
length as length,
|
|
200
|
+
levenshtein as levenshtein,
|
|
201
|
+
like as like,
|
|
202
|
+
lit as lit,
|
|
203
|
+
ln as ln,
|
|
204
|
+
localtimestamp as localtimestamp,
|
|
205
|
+
locate as locate,
|
|
206
|
+
log as log,
|
|
207
|
+
log10 as log10,
|
|
208
|
+
log1p as log1p,
|
|
209
|
+
log2 as log2,
|
|
210
|
+
lower as lower,
|
|
211
|
+
lpad as lpad,
|
|
212
|
+
ltrim as ltrim,
|
|
213
|
+
make_date as make_date,
|
|
214
|
+
make_dt_interval as make_dt_interval,
|
|
215
|
+
make_interval as make_interval,
|
|
216
|
+
make_timestamp as make_timestamp,
|
|
217
|
+
make_timestamp_ltz as make_timestamp_ltz,
|
|
218
|
+
make_timestamp_ntz as make_timestamp_ntz,
|
|
219
|
+
make_ym_interval as make_ym_interval,
|
|
220
|
+
map_concat as map_concat,
|
|
221
|
+
map_contains_key as map_contains_key,
|
|
222
|
+
map_entries as map_entries,
|
|
223
|
+
map_filter as map_filter,
|
|
224
|
+
map_from_arrays as map_from_arrays,
|
|
225
|
+
map_from_entries as map_from_entries,
|
|
226
|
+
map_keys as map_keys,
|
|
227
|
+
map_values as map_values,
|
|
228
|
+
map_zip_with as map_zip_with,
|
|
229
|
+
mask as mask,
|
|
230
|
+
max as max,
|
|
231
|
+
max_by as max_by,
|
|
232
|
+
md5 as md5,
|
|
233
|
+
mean as mean,
|
|
234
|
+
median as median,
|
|
235
|
+
min as min,
|
|
236
|
+
min_by as min_by,
|
|
237
|
+
minute as minute,
|
|
238
|
+
mode as mode,
|
|
239
|
+
monotonically_increasing_id as monotonically_increasing_id,
|
|
240
|
+
month as month,
|
|
241
|
+
months as months,
|
|
242
|
+
months_between as months_between,
|
|
243
|
+
named_struct as named_struct,
|
|
244
|
+
nanvl as nanvl,
|
|
245
|
+
negate as negate,
|
|
246
|
+
negative as negative,
|
|
247
|
+
next_day as next_day,
|
|
248
|
+
now as now,
|
|
249
|
+
nth_value as nth_value,
|
|
250
|
+
ntile as ntile,
|
|
251
|
+
nullif as nullif,
|
|
252
|
+
nvl as nvl,
|
|
253
|
+
nvl2 as nvl2,
|
|
254
|
+
octet_length as octet_length,
|
|
255
|
+
overlay as overlay,
|
|
256
|
+
parse_url as parse_url,
|
|
257
|
+
percent_rank as percent_rank,
|
|
258
|
+
percentile_approx as percentile_approx,
|
|
259
|
+
pi as pi,
|
|
260
|
+
pmod as pmod,
|
|
261
|
+
posexplode as posexplode,
|
|
262
|
+
posexplode_outer as posexplode_outer,
|
|
263
|
+
position as position,
|
|
264
|
+
positive as positive,
|
|
265
|
+
pow as pow,
|
|
266
|
+
power as power,
|
|
267
|
+
printf as printf,
|
|
268
|
+
quarter as quarter,
|
|
269
|
+
radians as radians,
|
|
270
|
+
raise_error as raise_error,
|
|
271
|
+
rand as rand,
|
|
272
|
+
randn as randn,
|
|
273
|
+
rank as rank,
|
|
274
|
+
reduce as reduce,
|
|
275
|
+
reflect as reflect,
|
|
276
|
+
regexp as regexp,
|
|
277
|
+
regexp_count as regexp_count,
|
|
278
|
+
regexp_extract as regexp_extract,
|
|
279
|
+
regexp_extract_all as regexp_extract_all,
|
|
280
|
+
regexp_instr as regexp_instr,
|
|
281
|
+
regexp_like as regexp_like,
|
|
282
|
+
regexp_replace as regexp_replace,
|
|
283
|
+
regexp_substr as regexp_substr,
|
|
284
|
+
regr_avgx as regr_avgx,
|
|
285
|
+
regr_avgy as regr_avgy,
|
|
286
|
+
regr_count as regr_count,
|
|
287
|
+
regr_intercept as regr_intercept,
|
|
288
|
+
regr_r2 as regr_r2,
|
|
289
|
+
regr_slope as regr_slope,
|
|
290
|
+
regr_sxx as regr_sxx,
|
|
291
|
+
regr_sxy as regr_sxy,
|
|
292
|
+
regr_syy as regr_syy,
|
|
293
|
+
repeat as repeat,
|
|
294
|
+
replace as replace,
|
|
295
|
+
reverse as reverse,
|
|
296
|
+
right as right,
|
|
297
|
+
rint as rint,
|
|
298
|
+
rlike as rlike,
|
|
299
|
+
round as round,
|
|
300
|
+
row_number as row_number,
|
|
301
|
+
rpad as rpad,
|
|
302
|
+
rtrim as rtrim,
|
|
303
|
+
schema_of_csv as schema_of_csv,
|
|
304
|
+
schema_of_json as schema_of_json,
|
|
305
|
+
sec as sec,
|
|
306
|
+
second as second,
|
|
307
|
+
sentences as sentences,
|
|
308
|
+
sequence as sequence,
|
|
309
|
+
sha as sha,
|
|
310
|
+
sha1 as sha1,
|
|
311
|
+
sha2 as sha2,
|
|
312
|
+
shiftLeft as shiftLeft,
|
|
313
|
+
shiftRight as shiftRight,
|
|
314
|
+
shiftRightUnsigned as shiftRightUnsigned,
|
|
315
|
+
shiftleft as shiftleft,
|
|
316
|
+
shiftright as shiftright,
|
|
317
|
+
shiftrightunsigned as shiftrightunsigned,
|
|
318
|
+
shuffle as shuffle,
|
|
319
|
+
sign as sign,
|
|
320
|
+
signum as signum,
|
|
321
|
+
sin as sin,
|
|
322
|
+
sinh as sinh,
|
|
323
|
+
size as size,
|
|
324
|
+
skewness as skewness,
|
|
325
|
+
slice as slice,
|
|
326
|
+
some as some,
|
|
327
|
+
sort_array as sort_array,
|
|
328
|
+
soundex as soundex,
|
|
329
|
+
spark_partition_id as spark_partition_id,
|
|
330
|
+
split as split,
|
|
331
|
+
split_part as split_part,
|
|
332
|
+
sqrt as sqrt,
|
|
333
|
+
stack as stack,
|
|
334
|
+
startswith as startswith,
|
|
335
|
+
std as std,
|
|
336
|
+
stddev as stddev,
|
|
337
|
+
stddev_pop as stddev_pop,
|
|
338
|
+
stddev_samp as stddev_samp,
|
|
339
|
+
str_to_map as str_to_map,
|
|
340
|
+
struct as struct,
|
|
341
|
+
substr as substr,
|
|
342
|
+
substring as substring,
|
|
343
|
+
substring_index as substring_index,
|
|
344
|
+
sum as sum,
|
|
345
|
+
sumDistinct as sumDistinct,
|
|
346
|
+
sum_distinct as sum_distinct,
|
|
347
|
+
tan as tan,
|
|
348
|
+
tanh as tanh,
|
|
349
|
+
timestamp_micros as timestamp_micros,
|
|
350
|
+
timestamp_millis as timestamp_millis,
|
|
351
|
+
timestamp_seconds as timestamp_seconds,
|
|
352
|
+
toDegrees as toDegrees,
|
|
353
|
+
toRadians as toRadians,
|
|
354
|
+
to_binary as to_binary,
|
|
355
|
+
to_char as to_char,
|
|
356
|
+
to_csv as to_csv,
|
|
357
|
+
to_date as to_date,
|
|
358
|
+
to_json as to_json,
|
|
359
|
+
to_number as to_number,
|
|
360
|
+
to_timestamp as to_timestamp,
|
|
361
|
+
to_timestamp_ltz as to_timestamp_ltz,
|
|
362
|
+
to_timestamp_ntz as to_timestamp_ntz,
|
|
363
|
+
to_unix_timestamp as to_unix_timestamp,
|
|
364
|
+
to_utc_timestamp as to_utc_timestamp,
|
|
365
|
+
to_varchar as to_varchar,
|
|
366
|
+
transform as transform,
|
|
367
|
+
transform_keys as transform_keys,
|
|
368
|
+
transform_values as transform_values,
|
|
369
|
+
translate as translate,
|
|
370
|
+
trim as trim,
|
|
371
|
+
trunc as trunc,
|
|
372
|
+
try_add as try_add,
|
|
373
|
+
try_aes_decrypt as try_aes_decrypt,
|
|
374
|
+
try_avg as try_avg,
|
|
375
|
+
try_divide as try_divide,
|
|
376
|
+
try_multiply as try_multiply,
|
|
377
|
+
try_subtract as try_subtract,
|
|
378
|
+
try_sum as try_sum,
|
|
379
|
+
try_to_binary as try_to_binary,
|
|
380
|
+
try_to_number as try_to_number,
|
|
381
|
+
try_to_timestamp as try_to_timestamp,
|
|
382
|
+
typeof as typeof,
|
|
383
|
+
ucase as ucase,
|
|
384
|
+
unbase64 as unbase64,
|
|
385
|
+
unhex as unhex,
|
|
386
|
+
unix_date as unix_date,
|
|
387
|
+
unix_micros as unix_micros,
|
|
388
|
+
unix_millis as unix_millis,
|
|
389
|
+
unix_seconds as unix_seconds,
|
|
390
|
+
unix_timestamp as unix_timestamp,
|
|
391
|
+
upper as upper,
|
|
392
|
+
url_decode as url_decode,
|
|
393
|
+
url_encode as url_encode,
|
|
394
|
+
user as user,
|
|
395
|
+
var_pop as var_pop,
|
|
396
|
+
var_samp as var_samp,
|
|
397
|
+
variance as variance,
|
|
398
|
+
version as version,
|
|
399
|
+
weekday as weekday,
|
|
400
|
+
weekofyear as weekofyear,
|
|
401
|
+
when as when,
|
|
402
|
+
width_bucket as width_bucket,
|
|
403
|
+
xpath as xpath,
|
|
404
|
+
xpath_boolean as xpath_boolean,
|
|
405
|
+
xpath_double as xpath_double,
|
|
406
|
+
xpath_float as xpath_float,
|
|
407
|
+
xpath_int as xpath_int,
|
|
408
|
+
xpath_long as xpath_long,
|
|
409
|
+
xpath_number as xpath_number,
|
|
410
|
+
xpath_short as xpath_short,
|
|
411
|
+
xpath_string as xpath_string,
|
|
412
|
+
xxhash64 as xxhash64,
|
|
413
|
+
year as year,
|
|
414
|
+
years as years,
|
|
415
|
+
zip_with as zip_with,
|
|
416
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typing as t
|
|
6
|
+
|
|
7
|
+
from sqlframe.base.group import _BaseGroupedData
|
|
8
|
+
|
|
9
|
+
if t.TYPE_CHECKING:
|
|
10
|
+
from sqlframe.databricks.dataframe import DatabricksDataFrame
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DatabricksGroupedData(_BaseGroupedData["DatabricksDataFrame"]):
|
|
14
|
+
pass
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
import typing as t
|
|
7
|
+
|
|
8
|
+
import sqlglot as sg
|
|
9
|
+
from sqlglot import exp
|
|
10
|
+
|
|
11
|
+
if sys.version_info >= (3, 11):
|
|
12
|
+
from typing import Self
|
|
13
|
+
else:
|
|
14
|
+
from typing_extensions import Self
|
|
15
|
+
|
|
16
|
+
from sqlframe.base.mixins.readwriter_mixins import PandasLoaderMixin, PandasWriterMixin
|
|
17
|
+
from sqlframe.base.readerwriter import (
|
|
18
|
+
_BaseDataFrameReader,
|
|
19
|
+
_BaseDataFrameWriter,
|
|
20
|
+
)
|
|
21
|
+
from sqlframe.base.util import normalize_string
|
|
22
|
+
|
|
23
|
+
if t.TYPE_CHECKING:
|
|
24
|
+
from sqlframe.databricks.session import DatabricksSession # noqa
|
|
25
|
+
from sqlframe.databricks.dataframe import DatabricksDataFrame # noqa
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DatabricksDataFrameReader(
|
|
29
|
+
PandasLoaderMixin["DatabricksSession", "DatabricksDataFrame"],
|
|
30
|
+
_BaseDataFrameReader["DatabricksSession", "DatabricksDataFrame"],
|
|
31
|
+
):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DatabricksDataFrameWriter(
|
|
36
|
+
PandasWriterMixin["DatabricksSession", "DatabricksDataFrame"],
|
|
37
|
+
_BaseDataFrameWriter["DatabricksSession", "DatabricksDataFrame"],
|
|
38
|
+
):
|
|
39
|
+
def saveAsTable(
|
|
40
|
+
self,
|
|
41
|
+
name: str,
|
|
42
|
+
format: t.Optional[str] = None,
|
|
43
|
+
mode: t.Optional[str] = None,
|
|
44
|
+
partitionBy: t.Optional[t.Union[str, t.List[str]]] = None,
|
|
45
|
+
clusterBy: t.Optional[t.Union[str, t.List[str]]] = None,
|
|
46
|
+
**options,
|
|
47
|
+
) -> Self:
|
|
48
|
+
if format is not None:
|
|
49
|
+
raise NotImplementedError("Providing Format in the save as table is not supported")
|
|
50
|
+
exists, replace, mode = None, None, mode or str(self._mode)
|
|
51
|
+
if mode == "append":
|
|
52
|
+
return self.insertInto(name)
|
|
53
|
+
if mode == "ignore":
|
|
54
|
+
exists = True
|
|
55
|
+
if mode == "overwrite":
|
|
56
|
+
replace = True
|
|
57
|
+
name = normalize_string(name, from_dialect="input", is_table=True)
|
|
58
|
+
|
|
59
|
+
properties: t.List[exp.Expression] = []
|
|
60
|
+
if partitionBy is not None:
|
|
61
|
+
if isinstance(partitionBy, str):
|
|
62
|
+
partition_by = [partitionBy]
|
|
63
|
+
else:
|
|
64
|
+
partition_by = partitionBy
|
|
65
|
+
properties.append(
|
|
66
|
+
exp.PartitionedByProperty(
|
|
67
|
+
this=exp.Tuple(expressions=list(map(sg.to_identifier, partition_by)))
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
if clusterBy is not None:
|
|
71
|
+
if isinstance(clusterBy, str):
|
|
72
|
+
cluster_by = [clusterBy]
|
|
73
|
+
else:
|
|
74
|
+
cluster_by = clusterBy
|
|
75
|
+
properties.append(
|
|
76
|
+
exp.Cluster(
|
|
77
|
+
expressions=[exp.Tuple(expressions=list(map(sg.to_identifier, cluster_by)))]
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
properties.extend(
|
|
82
|
+
exp.Property(this=sg.to_identifier(name), value=exp.convert(value))
|
|
83
|
+
for name, value in (options or {}).items()
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
output_expression_container = exp.Create(
|
|
87
|
+
this=exp.to_table(name, dialect=self._session.input_dialect),
|
|
88
|
+
kind="TABLE",
|
|
89
|
+
exists=exists,
|
|
90
|
+
replace=replace,
|
|
91
|
+
properties=exp.Properties(expressions=properties),
|
|
92
|
+
)
|
|
93
|
+
df = self._df.copy(output_expression_container=output_expression_container)
|
|
94
|
+
if self._session._has_connection:
|
|
95
|
+
df.collect()
|
|
96
|
+
return self.copy(_df=df)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
from sqlframe.base.session import _BaseSession
|
|
7
|
+
from sqlframe.databricks.catalog import DatabricksCatalog
|
|
8
|
+
from sqlframe.databricks.dataframe import DatabricksDataFrame
|
|
9
|
+
from sqlframe.databricks.readwriter import (
|
|
10
|
+
DatabricksDataFrameReader,
|
|
11
|
+
DatabricksDataFrameWriter,
|
|
12
|
+
)
|
|
13
|
+
from sqlframe.databricks.udf import DatabricksUDFRegistration
|
|
14
|
+
|
|
15
|
+
if t.TYPE_CHECKING:
|
|
16
|
+
from databricks.sql.client import Connection as DatabricksConnection
|
|
17
|
+
else:
|
|
18
|
+
DatabricksConnection = t.Any
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DatabricksSession(
|
|
22
|
+
_BaseSession[ # type: ignore
|
|
23
|
+
DatabricksCatalog,
|
|
24
|
+
DatabricksDataFrameReader,
|
|
25
|
+
DatabricksDataFrameWriter,
|
|
26
|
+
DatabricksDataFrame,
|
|
27
|
+
DatabricksConnection,
|
|
28
|
+
DatabricksUDFRegistration,
|
|
29
|
+
],
|
|
30
|
+
):
|
|
31
|
+
_catalog = DatabricksCatalog
|
|
32
|
+
_reader = DatabricksDataFrameReader
|
|
33
|
+
_writer = DatabricksDataFrameWriter
|
|
34
|
+
_df = DatabricksDataFrame
|
|
35
|
+
_udf_registration = DatabricksUDFRegistration
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
conn: t.Optional[DatabricksConnection] = None,
|
|
40
|
+
server_hostname: t.Optional[str] = None,
|
|
41
|
+
http_path: t.Optional[str] = None,
|
|
42
|
+
access_token: t.Optional[str] = None,
|
|
43
|
+
):
|
|
44
|
+
from databricks import sql
|
|
45
|
+
|
|
46
|
+
if not hasattr(self, "_conn"):
|
|
47
|
+
super().__init__(conn or sql.connect(server_hostname, http_path, access_token))
|
|
48
|
+
|
|
49
|
+
class Builder(_BaseSession.Builder):
|
|
50
|
+
DEFAULT_EXECUTION_DIALECT = "databricks"
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def session(self) -> DatabricksSession:
|
|
54
|
+
return DatabricksSession(**self._session_kwargs)
|
|
55
|
+
|
|
56
|
+
def getOrCreate(self) -> DatabricksSession:
|
|
57
|
+
return super().getOrCreate() # type: ignore
|
|
58
|
+
|
|
59
|
+
builder = Builder()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from sqlframe.base.types import *
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlframe.base.udf import _BaseUDFRegistration
|
|
6
|
+
|
|
7
|
+
if t.TYPE_CHECKING:
|
|
8
|
+
from sqlframe.databricks.session import DatabricksSession
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DatabricksUDFRegistration(_BaseUDFRegistration["DatabricksSession"]): ...
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from sqlframe.base.window import *
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.9.0
|
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
@@ -18,16 +18,18 @@ Requires-Python: >=3.8
|
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: prettytable (<3.12.1)
|
|
21
|
-
Requires-Dist: sqlglot (<25.
|
|
21
|
+
Requires-Dist: sqlglot (<25.33,>=24.0.0)
|
|
22
22
|
Requires-Dist: typing-extensions (<5,>=4.8)
|
|
23
23
|
Provides-Extra: bigquery
|
|
24
24
|
Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
|
|
25
25
|
Requires-Dist: google-cloud-bigquery[pandas] (<4,>=3) ; extra == 'bigquery'
|
|
26
|
+
Provides-Extra: databricks
|
|
27
|
+
Requires-Dist: databricks-sql-connector (<4,>=3.6) ; extra == 'databricks'
|
|
26
28
|
Provides-Extra: dev
|
|
27
29
|
Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'dev'
|
|
28
30
|
Requires-Dist: findspark (<3,>=2) ; extra == 'dev'
|
|
29
31
|
Requires-Dist: mypy (<1.14,>=1.10.0) ; extra == 'dev'
|
|
30
|
-
Requires-Dist: openai (<1.
|
|
32
|
+
Requires-Dist: openai (<1.56,>=1.30) ; extra == 'dev'
|
|
31
33
|
Requires-Dist: pandas-stubs (<3,>=2) ; extra == 'dev'
|
|
32
34
|
Requires-Dist: pandas (<3,>=2) ; extra == 'dev'
|
|
33
35
|
Requires-Dist: psycopg (<4,>=3.1) ; extra == 'dev'
|
|
@@ -36,7 +38,7 @@ Requires-Dist: pyspark (<3.6,>=2) ; extra == 'dev'
|
|
|
36
38
|
Requires-Dist: pytest-postgresql (<7,>=6) ; extra == 'dev'
|
|
37
39
|
Requires-Dist: pytest-xdist (<3.7,>=3.6) ; extra == 'dev'
|
|
38
40
|
Requires-Dist: pytest (<8.4,>=8.2.0) ; extra == 'dev'
|
|
39
|
-
Requires-Dist: ruff (<0.
|
|
41
|
+
Requires-Dist: ruff (<0.9,>=0.4.4) ; extra == 'dev'
|
|
40
42
|
Requires-Dist: types-psycopg2 (<3,>=2.9) ; extra == 'dev'
|
|
41
43
|
Requires-Dist: pre-commit (>=3.5) ; (python_version == "3.8") and extra == 'dev'
|
|
42
44
|
Requires-Dist: pre-commit (<4.1,>=3.7) ; (python_version >= "3.9") and extra == 'dev'
|
|
@@ -50,7 +52,7 @@ Provides-Extra: duckdb
|
|
|
50
52
|
Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'duckdb'
|
|
51
53
|
Requires-Dist: pandas (<3,>=2) ; extra == 'duckdb'
|
|
52
54
|
Provides-Extra: openai
|
|
53
|
-
Requires-Dist: openai (<1.
|
|
55
|
+
Requires-Dist: openai (<1.56,>=1.30) ; extra == 'openai'
|
|
54
56
|
Provides-Extra: pandas
|
|
55
57
|
Requires-Dist: pandas (<3,>=2) ; extra == 'pandas'
|
|
56
58
|
Provides-Extra: postgres
|
|
@@ -76,6 +78,11 @@ SQLFrame currently supports the following engines (many more in development):
|
|
|
76
78
|
* [Snowflake](https://sqlframe.readthedocs.io/en/stable/snowflake)
|
|
77
79
|
* [Spark](https://sqlframe.readthedocs.io/en/stable/spark)
|
|
78
80
|
|
|
81
|
+
There are also two engines in development. These engines lack test coverage and robust documentation, but are available for early testing:
|
|
82
|
+
|
|
83
|
+
* [Redshift](https://sqlframe.readthedocs.io/en/stable/redshift)
|
|
84
|
+
* [Databricks](https://sqlframe.readthedocs.io/en/stable/databricks)
|
|
85
|
+
|
|
79
86
|
SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
|
|
80
87
|
|
|
81
88
|
* [Standalone](https://sqlframe.readthedocs.io/en/stable/standalone)
|
|
@@ -100,6 +107,10 @@ pip install "sqlframe[postgres]"
|
|
|
100
107
|
pip install "sqlframe[snowflake]"
|
|
101
108
|
# Spark
|
|
102
109
|
pip install "sqlframe[spark]"
|
|
110
|
+
# Redshift (in development)
|
|
111
|
+
pip install "sqlframe[redshift]"
|
|
112
|
+
# Databricks (in development)
|
|
113
|
+
pip install "sqlframe[databricks]"
|
|
103
114
|
# Standalone
|
|
104
115
|
pip install sqlframe
|
|
105
116
|
```
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
sqlframe/__init__.py,sha256=
|
|
2
|
-
sqlframe/_version.py,sha256=
|
|
1
|
+
sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
|
|
2
|
+
sqlframe/_version.py,sha256=nlCEABnIq3wuDiPbHxDLhorQ-m5w3H6kBSUgZhHE6gc,411
|
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
|
6
6
|
sqlframe/base/column.py,sha256=06fhVZ2nCn2QLxnfjdK-oYKeTFJC_smgSxu7u2UYlVg,17878
|
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
|
7
|
+
sqlframe/base/dataframe.py,sha256=DtSeTMNdvfF7ItAIIOoZQlsW4J-GZKmmx3-pz7T9e90,72924
|
|
8
8
|
sqlframe/base/decorators.py,sha256=Jy4bf8MhZ-AJ6CWTj59bBJRqamtLbPC0USUMFrY6g0w,449
|
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=jofb2-nweefqcjUsd4xVqfRmJSZ-T_0Iq5roW2pL0OA,50768
|
|
@@ -35,6 +35,18 @@ sqlframe/bigquery/session.py,sha256=uSiEWWiDEryq3gIJJUmsu1DIalRGomNiymVulxt439c,
|
|
|
35
35
|
sqlframe/bigquery/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
36
36
|
sqlframe/bigquery/udf.py,sha256=ZZ1-P1zWZhQqmhBqwAxfNeKl31nDkkZgkuz7Dn28P_0,264
|
|
37
37
|
sqlframe/bigquery/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
38
|
+
sqlframe/databricks/__init__.py,sha256=BkB_eO1UYwcf8j6x7bi4BWmDCMkfn0CUMwossWgwaG4,993
|
|
39
|
+
sqlframe/databricks/catalog.py,sha256=LpAn2UahzyIC3VKvQ8_6OVevaxFS_mf2HM2K4e4azZ8,11309
|
|
40
|
+
sqlframe/databricks/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
|
41
|
+
sqlframe/databricks/dataframe.py,sha256=2VbzMs6ehrXUzgFCKNCARNMVO-FNnmEeku8hPIv948c,2141
|
|
42
|
+
sqlframe/databricks/functions.py,sha256=rDTnOyj_SKm6mzFtL4GraDNXRnbYyfpijtn8dOQDmog,640
|
|
43
|
+
sqlframe/databricks/functions.pyi,sha256=jiZr-EGGuXEodEeKq56MwibcXBk4Lpy4H3brFd0DDVA,11628
|
|
44
|
+
sqlframe/databricks/group.py,sha256=dU3g0DVLRlfOSCamKchQFXRd1WTFbdxoXkpEX8tPD6Y,399
|
|
45
|
+
sqlframe/databricks/readwriter.py,sha256=Lter5V3y6YqDSyv6FOrLF9JRfM6f5kIgB8AC-4nfJJo,3285
|
|
46
|
+
sqlframe/databricks/session.py,sha256=BOpYMy2bgtYZ7XvEhQD2gqzl5XHEvFHt3R2V40y2isI,1752
|
|
47
|
+
sqlframe/databricks/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
48
|
+
sqlframe/databricks/udf.py,sha256=3rmxv_6zSLfIxH8P8P050ZO-ki0aqBb9wWuUQBtl4m8,272
|
|
49
|
+
sqlframe/databricks/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
38
50
|
sqlframe/duckdb/__init__.py,sha256=KAw_uZEhFMwi3D9Wj6AgHAKqLNk-EAx2uDIYu56oL44,872
|
|
39
51
|
sqlframe/duckdb/catalog.py,sha256=YYYVmetLUaJOdObKw4AJ7L0P-msshkta4xHlcZQ9zEA,4795
|
|
40
52
|
sqlframe/duckdb/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
|
@@ -107,8 +119,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
|
107
119
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
108
120
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
|
109
121
|
sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
|
|
110
|
-
sqlframe-3.
|
|
111
|
-
sqlframe-3.
|
|
112
|
-
sqlframe-3.
|
|
113
|
-
sqlframe-3.
|
|
114
|
-
sqlframe-3.
|
|
122
|
+
sqlframe-3.9.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
|
123
|
+
sqlframe-3.9.0.dist-info/METADATA,sha256=AKqgRmEJB00qxx_FXfzKHeFS4346nx_W3i6jP62o7mo,9142
|
|
124
|
+
sqlframe-3.9.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
125
|
+
sqlframe-3.9.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
|
126
|
+
sqlframe-3.9.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|