ygg 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
- ygg-0.1.32.dist-info/RECORD +60 -0
- yggdrasil/__init__.py +2 -0
- yggdrasil/databricks/__init__.py +2 -0
- yggdrasil/databricks/compute/__init__.py +2 -0
- yggdrasil/databricks/compute/cluster.py +241 -2
- yggdrasil/databricks/compute/execution_context.py +100 -11
- yggdrasil/databricks/compute/remote.py +16 -0
- yggdrasil/databricks/jobs/__init__.py +5 -0
- yggdrasil/databricks/jobs/config.py +31 -34
- yggdrasil/databricks/sql/__init__.py +2 -0
- yggdrasil/databricks/sql/engine.py +217 -36
- yggdrasil/databricks/sql/exceptions.py +1 -0
- yggdrasil/databricks/sql/statement_result.py +148 -1
- yggdrasil/databricks/sql/types.py +49 -1
- yggdrasil/databricks/workspaces/__init__.py +4 -1
- yggdrasil/databricks/workspaces/filesytem.py +344 -0
- yggdrasil/databricks/workspaces/io.py +1123 -0
- yggdrasil/databricks/workspaces/path.py +1415 -0
- yggdrasil/databricks/workspaces/path_kind.py +13 -0
- yggdrasil/databricks/workspaces/workspace.py +298 -154
- yggdrasil/dataclasses/__init__.py +2 -0
- yggdrasil/dataclasses/dataclass.py +42 -1
- yggdrasil/libs/__init__.py +2 -0
- yggdrasil/libs/databrickslib.py +9 -0
- yggdrasil/libs/extensions/__init__.py +2 -0
- yggdrasil/libs/extensions/polars_extensions.py +72 -0
- yggdrasil/libs/extensions/spark_extensions.py +116 -0
- yggdrasil/libs/pandaslib.py +7 -0
- yggdrasil/libs/polarslib.py +7 -0
- yggdrasil/libs/sparklib.py +41 -0
- yggdrasil/pyutils/__init__.py +4 -0
- yggdrasil/pyutils/callable_serde.py +106 -0
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/modules.py +44 -1
- yggdrasil/pyutils/parallel.py +29 -0
- yggdrasil/pyutils/python_env.py +301 -0
- yggdrasil/pyutils/retry.py +57 -0
- yggdrasil/requests/__init__.py +4 -0
- yggdrasil/requests/msal.py +124 -3
- yggdrasil/requests/session.py +18 -0
- yggdrasil/types/__init__.py +2 -0
- yggdrasil/types/cast/__init__.py +2 -1
- yggdrasil/types/cast/arrow_cast.py +131 -0
- yggdrasil/types/cast/cast_options.py +119 -1
- yggdrasil/types/cast/pandas_cast.py +29 -0
- yggdrasil/types/cast/polars_cast.py +47 -0
- yggdrasil/types/cast/polars_pandas_cast.py +29 -0
- yggdrasil/types/cast/registry.py +176 -0
- yggdrasil/types/cast/spark_cast.py +76 -0
- yggdrasil/types/cast/spark_pandas_cast.py +29 -0
- yggdrasil/types/cast/spark_polars_cast.py +28 -0
- yggdrasil/types/libs.py +2 -0
- yggdrasil/types/python_arrow.py +191 -0
- yggdrasil/types/python_defaults.py +73 -0
- yggdrasil/version.py +1 -0
- ygg-0.1.30.dist-info/RECORD +0 -56
- yggdrasil/databricks/workspaces/databricks_path.py +0 -784
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,18 @@
|
|
|
1
|
+
"""Databricks SQL engine utilities and helpers."""
|
|
2
|
+
|
|
1
3
|
import dataclasses
|
|
2
|
-
import io
|
|
3
4
|
import logging
|
|
4
5
|
import random
|
|
5
6
|
import string
|
|
6
7
|
import time
|
|
7
|
-
from typing import Optional, Union, Any, Dict, List
|
|
8
|
+
from typing import Optional, Union, Any, Dict, List, Literal
|
|
8
9
|
|
|
9
10
|
import pyarrow as pa
|
|
10
11
|
import pyarrow.parquet as pq
|
|
11
12
|
|
|
12
13
|
from .statement_result import StatementResult
|
|
13
14
|
from .types import column_info_to_arrow_field
|
|
14
|
-
from .. import DatabricksPathKind
|
|
15
|
+
from .. import DatabricksPathKind, DatabricksPath
|
|
15
16
|
from ..workspaces import WorkspaceService
|
|
16
17
|
from ...libs.databrickslib import databricks_sdk
|
|
17
18
|
from ...libs.sparklib import SparkSession, SparkDataFrame, pyspark
|
|
@@ -56,11 +57,12 @@ __all__ = [
|
|
|
56
57
|
|
|
57
58
|
|
|
58
59
|
class SqlExecutionError(RuntimeError):
|
|
59
|
-
|
|
60
|
+
"""Raised when a SQL statement execution fails."""
|
|
60
61
|
|
|
61
62
|
|
|
62
63
|
@dataclasses.dataclass
|
|
63
64
|
class SQLEngine(WorkspaceService):
|
|
65
|
+
"""Execute SQL statements and manage tables via Databricks."""
|
|
64
66
|
warehouse_id: Optional[str] = None
|
|
65
67
|
catalog_name: Optional[str] = None
|
|
66
68
|
schema_name: Optional[str] = None
|
|
@@ -72,6 +74,17 @@ class SQLEngine(WorkspaceService):
|
|
|
72
74
|
table_name: Optional[str] = None,
|
|
73
75
|
safe_chars: bool = True
|
|
74
76
|
):
|
|
77
|
+
"""Build a fully qualified table name for the current catalog/schema.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
catalog_name: Optional catalog override.
|
|
81
|
+
schema_name: Optional schema override.
|
|
82
|
+
table_name: Table name to qualify.
|
|
83
|
+
safe_chars: Whether to wrap identifiers in backticks.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
The fully qualified table name.
|
|
87
|
+
"""
|
|
75
88
|
catalog_name = catalog_name or self.catalog_name
|
|
76
89
|
schema_name = schema_name or self.schema_name
|
|
77
90
|
|
|
@@ -87,6 +100,14 @@ class SQLEngine(WorkspaceService):
|
|
|
87
100
|
self,
|
|
88
101
|
full_name: str,
|
|
89
102
|
):
|
|
103
|
+
"""Parse a catalog.schema.table string into components.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
full_name: A fully qualified name or partial name.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
A tuple of (catalog_name, schema_name, table_name).
|
|
110
|
+
"""
|
|
90
111
|
parts = [
|
|
91
112
|
_.strip("`") for _ in full_name.split(".")
|
|
92
113
|
]
|
|
@@ -108,6 +129,14 @@ class SQLEngine(WorkspaceService):
|
|
|
108
129
|
self,
|
|
109
130
|
cluster_size: str = "Small"
|
|
110
131
|
):
|
|
132
|
+
"""Return a default SQL warehouse matching the desired size.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
cluster_size: Desired warehouse size filter.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
The matched warehouse object.
|
|
139
|
+
"""
|
|
111
140
|
wk = self.workspace.sdk()
|
|
112
141
|
existing = list(wk.warehouses.list())
|
|
113
142
|
first = None
|
|
@@ -131,6 +160,14 @@ class SQLEngine(WorkspaceService):
|
|
|
131
160
|
self,
|
|
132
161
|
cluster_size = "Small"
|
|
133
162
|
):
|
|
163
|
+
"""Return the configured warehouse id or a default one.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
cluster_size: Desired warehouse size filter.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
The warehouse id string.
|
|
170
|
+
"""
|
|
134
171
|
if not self.warehouse_id:
|
|
135
172
|
dft = self._default_warehouse(cluster_size=cluster_size)
|
|
136
173
|
|
|
@@ -139,6 +176,14 @@ class SQLEngine(WorkspaceService):
|
|
|
139
176
|
|
|
140
177
|
@staticmethod
|
|
141
178
|
def _random_suffix(prefix: str = "") -> str:
|
|
179
|
+
"""Generate a unique suffix for temporary resources.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
prefix: Optional prefix to prepend.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
A unique suffix string.
|
|
186
|
+
"""
|
|
142
187
|
unique = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(8))
|
|
143
188
|
timestamp = int(time.time() * 1000)
|
|
144
189
|
return f"{prefix}{timestamp}_{unique}"
|
|
@@ -147,6 +192,7 @@ class SQLEngine(WorkspaceService):
|
|
|
147
192
|
self,
|
|
148
193
|
statement: Optional[str] = None,
|
|
149
194
|
*,
|
|
195
|
+
engine: Optional[Literal["spark", "api"]] = None,
|
|
150
196
|
warehouse_id: Optional[str] = None,
|
|
151
197
|
byte_limit: Optional[int] = None,
|
|
152
198
|
disposition: Optional["Disposition"] = None,
|
|
@@ -158,6 +204,7 @@ class SQLEngine(WorkspaceService):
|
|
|
158
204
|
catalog_name: Optional[str] = None,
|
|
159
205
|
schema_name: Optional[str] = None,
|
|
160
206
|
table_name: Optional[str] = None,
|
|
207
|
+
wait_result: bool = True,
|
|
161
208
|
**kwargs,
|
|
162
209
|
) -> "StatementResult":
|
|
163
210
|
"""
|
|
@@ -167,19 +214,46 @@ class SQLEngine(WorkspaceService):
|
|
|
167
214
|
- On SUCCEEDED: return final statement object
|
|
168
215
|
- On FAILED / CANCELED: raise SqlExecutionError
|
|
169
216
|
- If wait=False: return initial execution handle without polling.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
statement: SQL statement to execute. If omitted, selects from the table.
|
|
220
|
+
engine: Execution engine ("spark" or "api").
|
|
221
|
+
warehouse_id: Optional warehouse id override.
|
|
222
|
+
byte_limit: Optional byte limit for results.
|
|
223
|
+
disposition: Result disposition mode.
|
|
224
|
+
format: Result format for Databricks SQL API.
|
|
225
|
+
on_wait_timeout: Timeout behavior for waiting.
|
|
226
|
+
parameters: Optional statement parameters.
|
|
227
|
+
row_limit: Optional row limit.
|
|
228
|
+
wait_timeout: Optional API wait timeout.
|
|
229
|
+
catalog_name: Optional catalog override.
|
|
230
|
+
schema_name: Optional schema override.
|
|
231
|
+
table_name: Optional table name override.
|
|
232
|
+
wait_result: Whether to block until completion.
|
|
233
|
+
**kwargs: Additional API parameters.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
A StatementResult wrapper for the execution.
|
|
170
237
|
"""
|
|
171
|
-
if
|
|
238
|
+
if not engine:
|
|
239
|
+
if pyspark is not None:
|
|
240
|
+
spark_session = SparkSession.getActiveSession()
|
|
241
|
+
|
|
242
|
+
if spark_session is not None:
|
|
243
|
+
engine = "spark"
|
|
244
|
+
|
|
245
|
+
if engine == "spark":
|
|
172
246
|
spark_session = SparkSession.getActiveSession()
|
|
173
247
|
|
|
174
|
-
if spark_session is
|
|
175
|
-
|
|
248
|
+
if spark_session is None:
|
|
249
|
+
raise ValueError("No spark session found to run sql query")
|
|
176
250
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
251
|
+
return StatementResult(
|
|
252
|
+
engine=self,
|
|
253
|
+
statement_id="sparksql",
|
|
254
|
+
disposition=Disposition.EXTERNAL_LINKS,
|
|
255
|
+
_spark_df=spark_session.sql(statement)
|
|
256
|
+
)
|
|
183
257
|
|
|
184
258
|
if format is None:
|
|
185
259
|
format = Format.ARROW_STREAM
|
|
@@ -217,7 +291,7 @@ class SQLEngine(WorkspaceService):
|
|
|
217
291
|
disposition=disposition
|
|
218
292
|
)
|
|
219
293
|
|
|
220
|
-
return execution
|
|
294
|
+
return execution.wait() if wait_result else wait_result
|
|
221
295
|
|
|
222
296
|
def spark_table(
|
|
223
297
|
self,
|
|
@@ -226,6 +300,17 @@ class SQLEngine(WorkspaceService):
|
|
|
226
300
|
schema_name: Optional[str] = None,
|
|
227
301
|
table_name: Optional[str] = None,
|
|
228
302
|
):
|
|
303
|
+
"""Return a DeltaTable handle for a given table name.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
full_name: Fully qualified table name.
|
|
307
|
+
catalog_name: Optional catalog override.
|
|
308
|
+
schema_name: Optional schema override.
|
|
309
|
+
table_name: Optional table name override.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
A Spark DeltaTable handle.
|
|
313
|
+
"""
|
|
229
314
|
if not full_name:
|
|
230
315
|
full_name = self.table_full_name(
|
|
231
316
|
catalog_name=catalog_name,
|
|
@@ -258,6 +343,27 @@ class SQLEngine(WorkspaceService):
|
|
|
258
343
|
spark_session: Optional[SparkSession] = None,
|
|
259
344
|
spark_options: Optional[Dict[str, Any]] = None
|
|
260
345
|
):
|
|
346
|
+
"""Insert data into a table using Spark or Arrow paths.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
data: Arrow or Spark data to insert.
|
|
350
|
+
location: Fully qualified table name override.
|
|
351
|
+
catalog_name: Optional catalog override.
|
|
352
|
+
schema_name: Optional schema override.
|
|
353
|
+
table_name: Optional table name override.
|
|
354
|
+
mode: Insert mode ("auto", "append", "overwrite").
|
|
355
|
+
cast_options: Optional casting options.
|
|
356
|
+
overwrite_schema: Whether to overwrite schema (Spark).
|
|
357
|
+
match_by: Optional merge keys for upserts.
|
|
358
|
+
zorder_by: Optional Z-ORDER columns.
|
|
359
|
+
optimize_after_merge: Whether to run OPTIMIZE after merge.
|
|
360
|
+
vacuum_hours: Optional VACUUM retention window.
|
|
361
|
+
spark_session: Optional SparkSession override.
|
|
362
|
+
spark_options: Optional Spark write options.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
None for Arrow inserts, or the Spark insert result.
|
|
366
|
+
"""
|
|
261
367
|
# -------- existing logic you provided (kept intact) ----------
|
|
262
368
|
if pyspark is not None:
|
|
263
369
|
spark_session = SparkSession.getActiveSession() if spark_session is None else spark_session
|
|
@@ -310,8 +416,30 @@ class SQLEngine(WorkspaceService):
|
|
|
310
416
|
zorder_by: list[str] = None,
|
|
311
417
|
optimize_after_merge: bool = False,
|
|
312
418
|
vacuum_hours: int | None = None, # e.g., 168 for 7 days
|
|
313
|
-
existing_schema: pa.Schema | None = None
|
|
419
|
+
existing_schema: pa.Schema | None = None,
|
|
420
|
+
temp_volume_path: Optional[Union[str, DatabricksPath]] = None
|
|
314
421
|
):
|
|
422
|
+
"""Insert Arrow data by staging to a temp volume and running SQL.
|
|
423
|
+
|
|
424
|
+
Args:
|
|
425
|
+
data: Arrow table/batch data to insert.
|
|
426
|
+
location: Fully qualified table name override.
|
|
427
|
+
catalog_name: Optional catalog override.
|
|
428
|
+
schema_name: Optional schema override.
|
|
429
|
+
table_name: Optional table name override.
|
|
430
|
+
mode: Insert mode ("auto", "append", "overwrite").
|
|
431
|
+
cast_options: Optional casting options.
|
|
432
|
+
overwrite_schema: Whether to overwrite schema.
|
|
433
|
+
match_by: Optional merge keys for upserts.
|
|
434
|
+
zorder_by: Optional Z-ORDER columns.
|
|
435
|
+
optimize_after_merge: Whether to run OPTIMIZE after merge.
|
|
436
|
+
vacuum_hours: Optional VACUUM retention window.
|
|
437
|
+
existing_schema: Optional pre-fetched schema.
|
|
438
|
+
temp_volume_path: Optional temp volume path override.
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
None.
|
|
442
|
+
"""
|
|
315
443
|
location, catalog_name, schema_name, table_name = self._check_location_params(
|
|
316
444
|
location=location,
|
|
317
445
|
catalog_name=catalog_name,
|
|
@@ -375,14 +503,14 @@ class SQLEngine(WorkspaceService):
|
|
|
375
503
|
data = convert(data, pa.Table, options=cast_options, target_field=existing_schema)
|
|
376
504
|
|
|
377
505
|
# Write in temp volume
|
|
378
|
-
|
|
506
|
+
temp_volume_path = connected.dbfs_path(
|
|
379
507
|
kind=DatabricksPathKind.VOLUME,
|
|
380
|
-
parts=[catalog_name, schema_name, "tmp",
|
|
381
|
-
)
|
|
382
|
-
|
|
508
|
+
parts=[catalog_name, schema_name, "tmp", "sql", transaction_id]
|
|
509
|
+
) if temp_volume_path is None else DatabricksPath.parse(obj=temp_volume_path, workspace=connected.workspace)
|
|
510
|
+
|
|
511
|
+
temp_volume_path.mkdir()
|
|
383
512
|
|
|
384
|
-
|
|
385
|
-
pq.write_table(data, f, compression="snappy")
|
|
513
|
+
temp_volume_path.write_arrow_table(data)
|
|
386
514
|
|
|
387
515
|
# get column list from arrow schema
|
|
388
516
|
columns = [c for c in existing_schema.names]
|
|
@@ -412,7 +540,7 @@ class SQLEngine(WorkspaceService):
|
|
|
412
540
|
|
|
413
541
|
merge_sql = f"""MERGE INTO {location} AS T
|
|
414
542
|
USING (
|
|
415
|
-
SELECT {cols_quoted} FROM parquet.`{
|
|
543
|
+
SELECT {cols_quoted} FROM parquet.`{temp_volume_path}`
|
|
416
544
|
) AS S
|
|
417
545
|
ON {on_condition}
|
|
418
546
|
{update_clause}
|
|
@@ -424,12 +552,12 @@ ON {on_condition}
|
|
|
424
552
|
if mode.lower() in ("overwrite",):
|
|
425
553
|
insert_sql = f"""INSERT OVERWRITE {location}
|
|
426
554
|
SELECT {cols_quoted}
|
|
427
|
-
FROM parquet.`{
|
|
555
|
+
FROM parquet.`{temp_volume_path}`"""
|
|
428
556
|
else:
|
|
429
557
|
# default: append
|
|
430
558
|
insert_sql = f"""INSERT INTO {location} ({cols_quoted})
|
|
431
559
|
SELECT {cols_quoted}
|
|
432
|
-
FROM parquet.`{
|
|
560
|
+
FROM parquet.`{temp_volume_path}`"""
|
|
433
561
|
statements.append(insert_sql)
|
|
434
562
|
|
|
435
563
|
# Execute statements (use your existing execute helper)
|
|
@@ -439,7 +567,7 @@ FROM parquet.`{databricks_tmp_folder}`"""
|
|
|
439
567
|
connected.execute(stmt.strip())
|
|
440
568
|
finally:
|
|
441
569
|
try:
|
|
442
|
-
|
|
570
|
+
temp_volume_path.rmdir(recursive=True)
|
|
443
571
|
except Exception as e:
|
|
444
572
|
logger.warning(e)
|
|
445
573
|
|
|
@@ -474,6 +602,26 @@ FROM parquet.`{databricks_tmp_folder}`"""
|
|
|
474
602
|
vacuum_hours: int | None = None, # e.g., 168 for 7 days
|
|
475
603
|
spark_options: Optional[Dict[str, Any]] = None,
|
|
476
604
|
):
|
|
605
|
+
"""Insert a Spark DataFrame into a Delta table with optional merge semantics.
|
|
606
|
+
|
|
607
|
+
Args:
|
|
608
|
+
data: Spark DataFrame to insert.
|
|
609
|
+
location: Fully qualified table name override.
|
|
610
|
+
catalog_name: Optional catalog override.
|
|
611
|
+
schema_name: Optional schema override.
|
|
612
|
+
table_name: Optional table name override.
|
|
613
|
+
mode: Insert mode ("auto", "append", "overwrite").
|
|
614
|
+
cast_options: Optional casting options.
|
|
615
|
+
overwrite_schema: Whether to overwrite schema.
|
|
616
|
+
match_by: Optional merge keys for upserts.
|
|
617
|
+
zorder_by: Optional Z-ORDER columns.
|
|
618
|
+
optimize_after_merge: Whether to run OPTIMIZE after merge.
|
|
619
|
+
vacuum_hours: Optional VACUUM retention window.
|
|
620
|
+
spark_options: Optional Spark write options.
|
|
621
|
+
|
|
622
|
+
Returns:
|
|
623
|
+
None.
|
|
624
|
+
"""
|
|
477
625
|
location, catalog_name, schema_name, table_name = self._check_location_params(
|
|
478
626
|
location=location,
|
|
479
627
|
catalog_name=catalog_name,
|
|
@@ -573,6 +721,17 @@ FROM parquet.`{databricks_tmp_folder}`"""
|
|
|
573
721
|
table_name: Optional[str] = None,
|
|
574
722
|
to_arrow_schema: bool = True
|
|
575
723
|
) -> Union[pa.Field, pa.Schema]:
|
|
724
|
+
"""Fetch a table schema from Unity Catalog as Arrow types.
|
|
725
|
+
|
|
726
|
+
Args:
|
|
727
|
+
catalog_name: Optional catalog override.
|
|
728
|
+
schema_name: Optional schema override.
|
|
729
|
+
table_name: Optional table name override.
|
|
730
|
+
to_arrow_schema: Whether to return an Arrow schema or field.
|
|
731
|
+
|
|
732
|
+
Returns:
|
|
733
|
+
Arrow Schema or Field representing the table.
|
|
734
|
+
"""
|
|
576
735
|
full_name = self.table_full_name(
|
|
577
736
|
catalog_name=catalog_name,
|
|
578
737
|
schema_name=schema_name,
|
|
@@ -603,6 +762,17 @@ FROM parquet.`{databricks_tmp_folder}`"""
|
|
|
603
762
|
schema_name: Optional[str] = None,
|
|
604
763
|
table_name: Optional[str] = None,
|
|
605
764
|
):
|
|
765
|
+
"""Drop a table if it exists.
|
|
766
|
+
|
|
767
|
+
Args:
|
|
768
|
+
location: Fully qualified table name override.
|
|
769
|
+
catalog_name: Optional catalog override.
|
|
770
|
+
schema_name: Optional schema override.
|
|
771
|
+
table_name: Optional table name override.
|
|
772
|
+
|
|
773
|
+
Returns:
|
|
774
|
+
The StatementResult from executing the drop statement.
|
|
775
|
+
"""
|
|
606
776
|
location, _, _, _ = self._check_location_params(
|
|
607
777
|
location=location,
|
|
608
778
|
catalog_name=catalog_name,
|
|
@@ -656,23 +826,22 @@ FROM parquet.`{databricks_tmp_folder}`"""
|
|
|
656
826
|
safe_chars=True
|
|
657
827
|
)
|
|
658
828
|
|
|
659
|
-
# Create the DDL statement
|
|
660
|
-
sql = [f"CREATE TABLE {'IF NOT EXISTS ' if if_not_exists else ''}{location} ("]
|
|
661
|
-
|
|
662
|
-
# Generate column definitions
|
|
663
|
-
column_defs = []
|
|
664
|
-
|
|
665
829
|
if pa.types.is_struct(field.type):
|
|
666
830
|
children = list(field.type)
|
|
667
831
|
else:
|
|
668
832
|
children = [field]
|
|
669
833
|
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
834
|
+
# Create the DDL statement
|
|
835
|
+
column_definitions = [
|
|
836
|
+
self._field_to_ddl(child)
|
|
837
|
+
for child in children
|
|
838
|
+
]
|
|
673
839
|
|
|
674
|
-
sql
|
|
675
|
-
|
|
840
|
+
sql = [
|
|
841
|
+
f"CREATE TABLE {'IF NOT EXISTS ' if if_not_exists else ''}{location} (",
|
|
842
|
+
",\n ".join(column_definitions),
|
|
843
|
+
")"
|
|
844
|
+
]
|
|
676
845
|
|
|
677
846
|
# Add partition by clause if provided
|
|
678
847
|
if partition_by and len(partition_by) > 0:
|
|
@@ -729,6 +898,18 @@ FROM parquet.`{databricks_tmp_folder}`"""
|
|
|
729
898
|
table_name: Optional[str] = None,
|
|
730
899
|
safe_chars: bool = True
|
|
731
900
|
):
|
|
901
|
+
"""Resolve location/catalog/schema/table parameters to a full name.
|
|
902
|
+
|
|
903
|
+
Args:
|
|
904
|
+
location: Fully qualified table name override.
|
|
905
|
+
catalog_name: Optional catalog override.
|
|
906
|
+
schema_name: Optional schema override.
|
|
907
|
+
table_name: Optional table name override.
|
|
908
|
+
safe_chars: Whether to wrap identifiers in backticks.
|
|
909
|
+
|
|
910
|
+
Returns:
|
|
911
|
+
A tuple of (location, catalog_name, schema_name, table_name).
|
|
912
|
+
"""
|
|
732
913
|
if location:
|
|
733
914
|
c, s, t = self._catalog_schema_table_names(location)
|
|
734
915
|
catalog_name, schema_name, table_name = catalog_name or c, schema_name or s, table_name or t
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Custom exceptions for Databricks SQL helpers."""
|