ygg 0.1.56__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
  2. ygg-0.1.60.dist-info/RECORD +74 -0
  3. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/WHEEL +1 -1
  4. yggdrasil/ai/__init__.py +2 -0
  5. yggdrasil/ai/session.py +89 -0
  6. yggdrasil/ai/sql_session.py +310 -0
  7. yggdrasil/databricks/__init__.py +0 -3
  8. yggdrasil/databricks/compute/cluster.py +68 -113
  9. yggdrasil/databricks/compute/command_execution.py +674 -0
  10. yggdrasil/databricks/compute/exceptions.py +7 -2
  11. yggdrasil/databricks/compute/execution_context.py +465 -277
  12. yggdrasil/databricks/compute/remote.py +4 -14
  13. yggdrasil/databricks/exceptions.py +10 -0
  14. yggdrasil/databricks/sql/__init__.py +0 -4
  15. yggdrasil/databricks/sql/engine.py +161 -173
  16. yggdrasil/databricks/sql/exceptions.py +9 -1
  17. yggdrasil/databricks/sql/statement_result.py +108 -120
  18. yggdrasil/databricks/sql/warehouse.py +331 -92
  19. yggdrasil/databricks/workspaces/io.py +92 -9
  20. yggdrasil/databricks/workspaces/path.py +120 -74
  21. yggdrasil/databricks/workspaces/workspace.py +212 -68
  22. yggdrasil/libs/databrickslib.py +23 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -0
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/mimetypes.py +0 -0
  30. yggdrasil/pyutils/python_env.py +13 -12
  31. yggdrasil/pyutils/waiting_config.py +171 -0
  32. yggdrasil/types/cast/arrow_cast.py +3 -0
  33. yggdrasil/types/cast/pandas_cast.py +157 -169
  34. yggdrasil/types/cast/polars_cast.py +11 -43
  35. yggdrasil/types/dummy_class.py +81 -0
  36. yggdrasil/version.py +1 -1
  37. ygg-0.1.56.dist-info/RECORD +0 -68
  38. yggdrasil/databricks/ai/__init__.py +0 -1
  39. yggdrasil/databricks/ai/loki.py +0 -374
  40. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
  41. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
  42. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
@@ -36,11 +36,8 @@ def databricks_remote_compute(
36
36
  cluster_name: Optional[str] = None,
37
37
  workspace: Optional[Union[Workspace, str]] = None,
38
38
  cluster: Optional["Cluster"] = None,
39
- timeout: Optional[dt.timedelta] = None,
40
39
  env_keys: Optional[List[str]] = None,
41
40
  force_local: bool = False,
42
- update_timeout: Optional[Union[float, dt.timedelta]] = None,
43
- **options
44
41
  ) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
45
42
  """Return a decorator that executes functions on a remote cluster.
46
43
 
@@ -50,11 +47,8 @@ def databricks_remote_compute(
50
47
  cluster_name: Optional cluster name to target.
51
48
  workspace: Workspace instance or host string for lookup.
52
49
  cluster: Pre-configured Cluster instance to reuse.
53
- timeout: Optional execution timeout for remote calls.
54
50
  env_keys: Optional environment variable names to forward.
55
51
  force_local: Force local execution
56
- update_timeout: creation or update wait timeout
57
- **options: Extra options forwarded to the execution decorator.
58
52
 
59
53
  Returns:
60
54
  A decorator that runs functions on the resolved Databricks cluster.
@@ -85,14 +79,10 @@ def databricks_remote_compute(
85
79
  workspace=workspace,
86
80
  cluster_name=cluster_name,
87
81
  single_user_name=workspace.current_user.user_name,
88
- update_timeout=update_timeout
82
+ wait_update=False
89
83
  )
90
84
 
91
- cluster.ensure_running(wait_timeout=None)
92
-
93
- return cluster.execution_decorator(
94
- _func=_func,
95
- env_keys=env_keys,
96
- timeout=timeout,
97
- **options
85
+ return cluster.system_context.decorate(
86
+ func=_func,
87
+ environ=env_keys,
98
88
  )
@@ -0,0 +1,10 @@
1
+ from ..exceptions import YGGException
2
+
3
+
4
+ __all__ = [
5
+ "DatabricksException"
6
+ ]
7
+
8
+
9
+ class DatabricksException(YGGException):
10
+ pass
@@ -2,7 +2,3 @@
2
2
 
3
3
  from .engine import SQLEngine, StatementResult
4
4
  from .exceptions import SqlStatementError
5
-
6
- # Backwards compatibility
7
- DBXSQL = SQLEngine
8
- DBXStatementResult = StatementResult
@@ -16,16 +16,20 @@ import logging
16
16
  import random
17
17
  import string
18
18
  import time
19
+ from threading import Thread
19
20
  from typing import Optional, Union, Any, Dict, List, Literal
20
21
 
21
22
  import pyarrow as pa
23
+ import pyarrow.dataset as pds
22
24
 
23
25
  from .statement_result import StatementResult
24
26
  from .types import column_info_to_arrow_field
25
- from .. import DatabricksPathKind, DatabricksPath
26
- from ..workspaces import WorkspaceService
27
- from ...libs.databrickslib import databricks_sdk
27
+ from .warehouse import SQLWarehouse
28
+ from ..workspaces import WorkspaceService, DatabricksPath
29
+ from ...ai.sql_session import SQLAISession, SQLFlavor
30
+ from ...libs.databrickslib import databricks_sdk, DatabricksDummyClass
28
31
  from ...libs.sparklib import SparkSession, SparkDataFrame, pyspark
32
+ from ...pyutils.waiting_config import WaitingConfigArg
29
33
  from ...types import is_arrow_type_string_like, is_arrow_type_binary_like
30
34
  from ...types.cast.cast_options import CastOptions
31
35
  from ...types.cast.registry import convert
@@ -43,13 +47,14 @@ except ImportError:
43
47
 
44
48
  if databricks_sdk is not None:
45
49
  from databricks.sdk.service.sql import (
46
- StatementResponse, Disposition, Format,
50
+ Disposition, Format,
47
51
  ExecuteStatementRequestOnWaitTimeout, StatementParameterListItem
48
52
  )
49
- StatementResponse = StatementResponse
50
53
  else:
51
- class StatementResponse: # pragma: no cover
52
- pass
54
+ Disposition = DatabricksDummyClass
55
+ Format = DatabricksDummyClass
56
+ ExecuteStatementRequestOnWaitTimeout = DatabricksDummyClass
57
+ StatementParameterListItem = DatabricksDummyClass
53
58
 
54
59
 
55
60
  logger = logging.getLogger(__name__)
@@ -57,7 +62,11 @@ logger = logging.getLogger(__name__)
57
62
  if pyspark is not None:
58
63
  import pyspark.sql.functions as F
59
64
 
60
- __all__ = ["SQLEngine", "StatementResult"]
65
+
66
+ __all__ = [
67
+ "SQLEngine",
68
+ "StatementResult"
69
+ ]
61
70
 
62
71
 
63
72
  @dataclasses.dataclass
@@ -88,10 +97,12 @@ def _needs_column_mapping(col_name: str) -> bool:
88
97
  @dataclasses.dataclass
89
98
  class SQLEngine(WorkspaceService):
90
99
  """Execute SQL statements and manage tables via Databricks SQL / Spark."""
91
- warehouse_id: Optional[str] = None
92
100
  catalog_name: Optional[str] = None
93
101
  schema_name: Optional[str] = None
94
102
 
103
+ _warehouse: Optional[SQLWarehouse] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
104
+ _ai_session: Optional[SQLAISession] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
105
+
95
106
  def table_full_name(
96
107
  self,
97
108
  catalog_name: Optional[str] = None,
@@ -147,68 +158,8 @@ class SQLEngine(WorkspaceService):
147
158
  return self.catalog_name, parts[0], parts[1]
148
159
 
149
160
  catalog_name, schema_name, table_name = parts[-3], parts[-2], parts[-1]
150
- catalog_name = catalog_name or self.catalog_name
151
- schema_name = schema_name or self.schema_name
152
- return catalog_name, schema_name, table_name
153
-
154
- def _default_warehouse(
155
- self,
156
- cluster_size: str = "Small"
157
- ):
158
- """Pick a default SQL warehouse (best-effort) matching the desired size.
159
-
160
- Args:
161
- cluster_size: Desired warehouse size (Databricks "cluster_size"), e.g. "Small".
162
- If empty/None, returns the first warehouse encountered.
163
161
 
164
- Returns:
165
- Warehouse object.
166
-
167
- Raises:
168
- ValueError: If no warehouses exist in the workspace.
169
- """
170
- wk = self.workspace.sdk()
171
- existing = list(wk.warehouses.list())
172
- first = None
173
-
174
- for warehouse in existing:
175
- if first is None:
176
- first = warehouse
177
-
178
- if cluster_size:
179
- if getattr(warehouse, "cluster_size", None) == cluster_size:
180
- logger.debug("Default warehouse match found: id=%s cluster_size=%s", warehouse.id, warehouse.cluster_size)
181
- return warehouse
182
- else:
183
- logger.debug("Default warehouse selected (first): id=%s", warehouse.id)
184
- return warehouse
185
-
186
- if first is not None:
187
- logger.info(
188
- "No warehouse matched cluster_size=%s; falling back to first warehouse id=%s cluster_size=%s",
189
- cluster_size,
190
- getattr(first, "id", None),
191
- getattr(first, "cluster_size", None),
192
- )
193
- return first
194
-
195
- raise ValueError(f"No default warehouse found in {wk.config.host}")
196
-
197
- def _get_or_default_warehouse_id(self, cluster_size: str = "Small") -> str:
198
- """Return configured warehouse_id or resolve a default one.
199
-
200
- Args:
201
- cluster_size: Desired warehouse size filter used when resolving defaults.
202
-
203
- Returns:
204
- Warehouse id string.
205
- """
206
- if not self.warehouse_id:
207
- dft = self._default_warehouse(cluster_size=cluster_size)
208
- self.warehouse_id = dft.id
209
- logger.info("Resolved default warehouse_id=%s (cluster_size=%s)", self.warehouse_id, cluster_size)
210
-
211
- return self.warehouse_id
162
+ return catalog_name or self.catalog_name, schema_name or self.schema_name, table_name
212
163
 
213
164
  @staticmethod
214
165
  def _random_suffix(prefix: str = "") -> str:
@@ -217,12 +168,44 @@ class SQLEngine(WorkspaceService):
217
168
  timestamp = int(time.time() * 1000)
218
169
  return f"{prefix}{timestamp}_{unique}"
219
170
 
220
- @staticmethod
221
- def _sql_preview(sql: str, limit: int = 220) -> str:
222
- """Short, single-line preview for logs (avoids spewing giant SQL)."""
223
- if not sql:
224
- return ""
225
- return sql[:limit] + ("…" if len(sql) > limit else "")
171
+ def warehouse(
172
+ self,
173
+ warehouse_id: Optional[str] = None,
174
+ warehouse_name: Optional[str] = None,
175
+ ) -> SQLWarehouse:
176
+ if self._warehouse is None:
177
+ wh = SQLWarehouse(
178
+ workspace=self.workspace,
179
+ warehouse_id=warehouse_id,
180
+ warehouse_name=warehouse_name
181
+ )
182
+
183
+ self._warehouse = wh.find_warehouse(
184
+ warehouse_id=warehouse_id,
185
+ warehouse_name=warehouse_name,
186
+ raise_error=False
187
+ )
188
+
189
+ if self._warehouse is None:
190
+ self._warehouse = wh.create_or_update()
191
+
192
+ return self._warehouse.find_warehouse(
193
+ warehouse_id=warehouse_id,
194
+ warehouse_name=warehouse_name,
195
+ raise_error=True
196
+ )
197
+
198
+ def ai_session(
199
+ self,
200
+ model: str = "databricks-gemini-2-5-pro",
201
+ flavor: SQLFlavor = SQLFlavor.DATABRICKS
202
+ ):
203
+ return SQLAISession(
204
+ model=model,
205
+ api_key=self.workspace.current_token(),
206
+ base_url="%s/serving-endpoints" % self.workspace.safe_host,
207
+ flavor=flavor
208
+ )
226
209
 
227
210
  def execute(
228
211
  self,
@@ -230,17 +213,17 @@ class SQLEngine(WorkspaceService):
230
213
  *,
231
214
  engine: Optional[Literal["spark", "api"]] = None,
232
215
  warehouse_id: Optional[str] = None,
216
+ warehouse_name: Optional[str] = None,
233
217
  byte_limit: Optional[int] = None,
234
- disposition: Optional["Disposition"] = None,
235
- format: Optional["Format"] = None,
236
- on_wait_timeout: Optional["ExecuteStatementRequestOnWaitTimeout"] = None,
237
- parameters: Optional[List["StatementParameterListItem"]] = None,
218
+ disposition: Optional[Disposition] = None,
219
+ format: Optional[Format] = None,
220
+ on_wait_timeout: Optional[ExecuteStatementRequestOnWaitTimeout] = None,
221
+ parameters: Optional[List[StatementParameterListItem]] = None,
238
222
  row_limit: Optional[int] = None,
239
223
  wait_timeout: Optional[str] = None,
240
224
  catalog_name: Optional[str] = None,
241
225
  schema_name: Optional[str] = None,
242
- table_name: Optional[str] = None,
243
- wait_result: bool = True,
226
+ wait: Optional[WaitingConfigArg] = True
244
227
  ) -> "StatementResult":
245
228
  """Execute a SQL statement via Spark or Databricks SQL Statement Execution API.
246
229
 
@@ -256,6 +239,7 @@ class SQLEngine(WorkspaceService):
256
239
  statement: SQL statement to execute. If None, a `SELECT *` is generated from the table params.
257
240
  engine: "spark" or "api".
258
241
  warehouse_id: Warehouse override (for API engine).
242
+ warehouse_name: Warehouse name override (for API engine).
259
243
  byte_limit: Optional byte limit for results.
260
244
  disposition: Result disposition mode (API engine).
261
245
  format: Result format (API engine).
@@ -265,8 +249,7 @@ class SQLEngine(WorkspaceService):
265
249
  wait_timeout: API wait timeout value.
266
250
  catalog_name: Optional catalog override for API engine.
267
251
  schema_name: Optional schema override for API engine.
268
- table_name: Optional table override used when `statement` is None.
269
- wait_result: Whether to block until completion (API engine).
252
+ wait: Whether to block until completion (API engine).
270
253
 
271
254
  Returns:
272
255
  StatementResult.
@@ -284,72 +267,44 @@ class SQLEngine(WorkspaceService):
284
267
  if spark_session is None:
285
268
  raise ValueError("No spark session found to run sql query")
286
269
 
287
- df: SparkDataFrame = spark_session.sql(statement)
288
-
289
- if row_limit:
290
- df = df.limit(row_limit)
291
-
292
270
  logger.debug(
293
- "SPARK SQL executed query:\n%s",
271
+ "SPARK SQL executing query:\n%s",
294
272
  statement
295
273
  )
296
274
 
297
- # Avoid Disposition dependency if SDK imports are absent
298
- spark_disp = disposition if disposition is not None else getattr(globals().get("Disposition", object), "EXTERNAL_LINKS", None)
275
+ df: SparkDataFrame = spark_session.sql(statement)
276
+
277
+ if row_limit:
278
+ df = df.limit(row_limit)
299
279
 
300
280
  return StatementResult(
301
- engine=self,
302
- statement_id="sparksql",
303
- disposition=spark_disp,
281
+ workspace_client=self.workspace.sdk(),
282
+ warehouse_id="SparkSQL",
283
+ statement_id="SparkSQL",
284
+ disposition=Disposition.EXTERNAL_LINKS,
304
285
  _spark_df=df,
305
286
  )
306
287
 
307
- # --- API path defaults ---
308
- if format is None:
309
- format = Format.ARROW_STREAM
310
-
311
- if (disposition is None or disposition == Disposition.INLINE) and format in [Format.CSV, Format.ARROW_STREAM]:
312
- disposition = Disposition.EXTERNAL_LINKS
313
-
314
- if not statement:
315
- full_name = self.table_full_name(catalog_name=catalog_name, schema_name=schema_name, table_name=table_name)
316
- statement = f"SELECT * FROM {full_name}"
317
-
318
- if not warehouse_id:
319
- warehouse_id = self._get_or_default_warehouse_id()
288
+ wh = self.warehouse(
289
+ warehouse_id=warehouse_id,
290
+ warehouse_name=warehouse_name,
291
+ )
320
292
 
321
- response = self.workspace.sdk().statement_execution.execute_statement(
293
+ return wh.execute(
322
294
  statement=statement,
323
295
  warehouse_id=warehouse_id,
296
+ warehouse_name=warehouse_name,
324
297
  byte_limit=byte_limit,
325
298
  disposition=disposition,
326
299
  format=format,
327
300
  on_wait_timeout=on_wait_timeout,
328
301
  parameters=parameters,
329
- row_limit=row_limit,
330
302
  wait_timeout=wait_timeout,
331
- catalog=catalog_name or self.catalog_name,
332
- schema=schema_name or self.schema_name,
333
- )
334
-
335
- execution = StatementResult(
336
- engine=self,
337
- statement_id=response.statement_id,
338
- _response=response,
339
- disposition=disposition,
340
- )
341
-
342
- logger.info(
343
- "API SQL executed statement '%s'",
344
- execution.statement_id
345
- )
346
- logger.debug(
347
- "API SQL executed query:\n%s",
348
- statement
303
+ catalog_name=catalog_name,
304
+ schema_name=schema_name,
305
+ wait=wait
349
306
  )
350
307
 
351
- return execution.wait() if wait_result else execution
352
-
353
308
  def spark_table(
354
309
  self,
355
310
  full_name: Optional[str] = None,
@@ -412,7 +367,7 @@ class SQLEngine(WorkspaceService):
412
367
  None (mutates the destination table).
413
368
  """
414
369
 
415
- if pyspark is not None:
370
+ if pyspark is not None or spark_session is not None:
416
371
  spark_session = SparkSession.getActiveSession() if spark_session is None else spark_session
417
372
 
418
373
  if spark_session is not None or isinstance(data, SparkDataFrame):
@@ -502,6 +457,7 @@ class SQLEngine(WorkspaceService):
502
457
  if existing_schema is None:
503
458
  try:
504
459
  existing_schema = connected.get_table_schema(
460
+ location=location,
505
461
  catalog_name=catalog_name,
506
462
  schema_name=schema_name,
507
463
  table_name=table_name,
@@ -511,8 +467,7 @@ class SQLEngine(WorkspaceService):
511
467
  data_tbl = convert(data, pa.Table)
512
468
  existing_schema = data_tbl.schema
513
469
  logger.warning(
514
- "Table %s not found (%s). Creating it from input schema (columns=%s)",
515
- location,
470
+ "%s, creating it from input schema (columns=%s)",
516
471
  exc,
517
472
  existing_schema.names,
518
473
  )
@@ -544,13 +499,11 @@ class SQLEngine(WorkspaceService):
544
499
  except Exception:
545
500
  logger.exception("Arrow insert failed after auto-creating %s; attempting cleanup (DROP TABLE)", location)
546
501
  try:
547
- connected.drop_table(location=location)
502
+ connected.drop_table(location=location, wait=True)
548
503
  except Exception:
549
504
  logger.exception("Failed to drop table %s after auto creation error", location)
550
505
  raise
551
506
 
552
- transaction_id = self._random_suffix()
553
-
554
507
  data_tbl = convert(
555
508
  data, pa.Table,
556
509
  options=cast_options, target_field=existing_schema
@@ -567,14 +520,15 @@ class SQLEngine(WorkspaceService):
567
520
  )
568
521
 
569
522
  # Write in temp volume
570
- temp_volume_path = connected.dbfs_path(
571
- kind=DatabricksPathKind.VOLUME,
572
- parts=[catalog_name, schema_name, "tmp", "sql", transaction_id],
523
+ temp_volume_path = self.workspace.tmp_path(
524
+ catalog_name=catalog_name,
525
+ schema_name=schema_name,
526
+ volume_name="tmp",
527
+ extension="parquet"
573
528
  ) if temp_volume_path is None else DatabricksPath.parse(obj=temp_volume_path, workspace=connected.workspace)
574
529
 
575
530
  logger.debug("Staging Parquet to temp volume: %s", temp_volume_path)
576
- temp_volume_path.mkdir()
577
- temp_volume_path.write_arrow_table(data_tbl)
531
+ temp_volume_path.write_arrow_table(data_tbl, file_format=pds.ParquetFileFormat())
578
532
 
579
533
  columns = list(existing_schema.names)
580
534
  cols_quoted = ", ".join([f"`{c}`" for c in columns])
@@ -620,7 +574,12 @@ FROM parquet.`{temp_volume_path}`"""
620
574
  connected.execute(stmt.strip())
621
575
  finally:
622
576
  try:
623
- temp_volume_path.rmdir(recursive=True)
577
+ Thread(
578
+ target=temp_volume_path.rmdir,
579
+ kwargs={
580
+ "recursive": True
581
+ }
582
+ ).start()
624
583
  except Exception:
625
584
  logger.exception("Failed cleaning temp volume: %s", temp_volume_path)
626
585
 
@@ -732,8 +691,6 @@ FROM parquet.`{temp_volume_path}`"""
732
691
  cast_options = CastOptions.check_arg(options=cast_options, target_field=existing_schema)
733
692
  data = cast_spark_dataframe(data, options=cast_options)
734
693
 
735
- logger.debug("Incoming Spark columns: %s", data.columns)
736
-
737
694
  if match_by:
738
695
  notnull = None
739
696
  for k in match_by:
@@ -788,6 +745,7 @@ FROM parquet.`{temp_volume_path}`"""
788
745
 
789
746
  def get_table_schema(
790
747
  self,
748
+ location: Optional[str] = None,
791
749
  catalog_name: Optional[str] = None,
792
750
  schema_name: Optional[str] = None,
793
751
  table_name: Optional[str] = None,
@@ -796,6 +754,7 @@ FROM parquet.`{temp_volume_path}`"""
796
754
  """Fetch a table schema from Unity Catalog and convert it to Arrow types.
797
755
 
798
756
  Args:
757
+ location: Optional Fully qualified location name
799
758
  catalog_name: Optional catalog override.
800
759
  schema_name: Optional schema override.
801
760
  table_name: Optional table name override.
@@ -804,25 +763,44 @@ FROM parquet.`{temp_volume_path}`"""
804
763
  Returns:
805
764
  Arrow Schema or a STRUCT Field representing the table.
806
765
  """
807
- full_name = self.table_full_name(
766
+ location, catalog_name, schema_name, table_name = self._check_location_params(
767
+ location=location,
808
768
  catalog_name=catalog_name,
809
769
  schema_name=schema_name,
810
770
  table_name=table_name,
811
771
  safe_chars=False,
812
772
  )
813
773
 
814
- wk = self.workspace.sdk()
774
+ client = self.workspace.sdk().tables
815
775
 
816
776
  try:
817
- table = wk.tables.get(full_name)
777
+ table = client.get(location)
818
778
  except Exception as e:
819
- raise ValueError(f"Table %s not found, {type(e)} {e}" % full_name)
779
+ raise ValueError(f"Table %s not found, {type(e)} {e}" % location)
820
780
 
821
- fields = [column_info_to_arrow_field(_) for _ in table.columns]
781
+ fields = [
782
+ column_info_to_arrow_field(_) for _ in table.columns
783
+ ]
784
+
785
+ metadata = {
786
+ b"engine": b"databricks",
787
+ b"full_name": location,
788
+ b"catalog_name": catalog_name,
789
+ b"schema_name": schema_name,
790
+ b"table_name": table_name,
791
+ }
822
792
 
823
793
  if to_arrow_schema:
824
- return pa.schema(fields, metadata={b"name": table_name})
825
- return pa.field(table.name, pa.struct(fields))
794
+ return pa.schema(
795
+ fields,
796
+ metadata=metadata
797
+ )
798
+
799
+ return pa.field(
800
+ location,
801
+ pa.struct(fields),
802
+ metadata=metadata
803
+ )
826
804
 
827
805
  def drop_table(
828
806
  self,
@@ -830,6 +808,7 @@ FROM parquet.`{temp_volume_path}`"""
830
808
  catalog_name: Optional[str] = None,
831
809
  schema_name: Optional[str] = None,
832
810
  table_name: Optional[str] = None,
811
+ wait: Optional[WaitingConfigArg] = True
833
812
  ):
834
813
  """Drop a table if it exists."""
835
814
  location, _, _, _ = self._check_location_params(
@@ -839,13 +818,17 @@ FROM parquet.`{temp_volume_path}`"""
839
818
  table_name=table_name,
840
819
  safe_chars=True,
841
820
  )
842
- logger.info("Dropping table if exists: %s", location)
843
- return self.execute(f"DROP TABLE IF EXISTS {location}")
821
+
822
+ logger.debug("Dropping table if exists: %s", location)
823
+
824
+ self.execute(f"DROP TABLE IF EXISTS {location}", wait=wait)
825
+
826
+ logger.info("Dropped table if exists: %s", location)
844
827
 
845
828
  def create_table(
846
829
  self,
847
830
  field: Union[pa.Field, pa.Schema],
848
- table_fqn: Optional[str] = None, # e.g. catalog.schema.table
831
+ full_name: Optional[str] = None, # e.g. catalog.schema.table
849
832
  catalog_name: Optional[str] = None,
850
833
  schema_name: Optional[str] = None,
851
834
  table_name: Optional[str] = None,
@@ -853,7 +836,7 @@ FROM parquet.`{temp_volume_path}`"""
853
836
  partition_by: Optional[list[str]] = None,
854
837
  cluster_by: Optional[bool | list[str]] = True,
855
838
  comment: Optional[str] = None,
856
- tblproperties: Optional[dict[str, Any]] = None,
839
+ properties: Optional[dict[str, Any]] = None,
857
840
  if_not_exists: bool = True,
858
841
  or_replace: bool = False,
859
842
  using: str = "DELTA",
@@ -886,7 +869,7 @@ FROM parquet.`{temp_volume_path}`"""
886
869
  - If `pa.Schema`, all schema fields are used as columns.
887
870
  - If `pa.Field` with struct type, its children become columns.
888
871
  - If `pa.Field` non-struct, it becomes a single-column table.
889
- table_fqn:
872
+ full_name:
890
873
  Fully-qualified table name, e.g. `"catalog.schema.table"`.
891
874
  If provided, it takes precedence over `catalog_name`/`schema_name`/`table_name`.
892
875
  Parts are quoted as needed.
@@ -906,7 +889,7 @@ FROM parquet.`{temp_volume_path}`"""
906
889
  - list[str] -> emits `CLUSTER BY (<cols...>)` (all cols must exist in schema)
907
890
  comment:
908
891
  Optional table comment. If not provided and Arrow metadata contains `b"comment"`, that is used.
909
- tblproperties:
892
+ properties:
910
893
  Additional/override Delta table properties (final say).
911
894
  Example: `{"delta.enableChangeDataFeed": "true"}` or `{"delta.logRetentionDuration": "30 days"}`
912
895
  if_not_exists:
@@ -973,19 +956,22 @@ FROM parquet.`{temp_volume_path}`"""
973
956
  Examples
974
957
  --------
975
958
  Create a managed Delta table with auto clustering and auto column mapping:
976
- >>> plan = client.create_table(schema, table_fqn="main.analytics.events", execute=False, return_plan=True)
959
+ >>> plan = client.create_table(schema, full_name="main.analytics.events", execute=False, return_plan=True)
977
960
  >>> print(plan.sql)
978
961
 
979
962
  External table with explicit partitioning and CDF:
980
963
  >>> client.create_table(
981
964
  ... schema,
982
- ... table_fqn="main.analytics.events",
965
+ ... full_name="main.analytics.events",
983
966
  ... storage_location="abfss://.../events",
984
967
  ... partition_by=["event_date"],
985
968
  ... enable_cdf=True,
986
969
  ... )
987
970
  """
988
971
 
972
+ if not isinstance(field, (pa.Field, pa.Schema)):
973
+ field = convert(field, pa.Field)
974
+
989
975
  # ---- Normalize Arrow input ----
990
976
  if isinstance(field, pa.Schema):
991
977
  arrow_fields = list(field)
@@ -998,16 +984,13 @@ FROM parquet.`{temp_volume_path}`"""
998
984
  else:
999
985
  arrow_fields = [field]
1000
986
 
1001
- # ---- Resolve table FQN ----
1002
- # Prefer explicit table_fqn. Else build from catalog/schema/table_name.
1003
- if table_fqn is None:
1004
- if not (catalog_name and schema_name and table_name):
1005
- raise ValueError("Provide table_fqn or (catalog_name, schema_name, table_name).")
1006
- table_fqn = ".".join(map(_quote_ident, [catalog_name, schema_name, table_name]))
1007
- else:
1008
- # If caller passes raw "cat.schema.table", quote each part safely
1009
- parts = table_fqn.split(".")
1010
- table_fqn = ".".join(_quote_ident(p) for p in parts)
987
+ full_name, catalog_name, schema_name, table_name = self._check_location_params(
988
+ location=full_name,
989
+ catalog_name=catalog_name,
990
+ schema_name=schema_name,
991
+ table_name=table_name,
992
+ safe_chars=True
993
+ )
1011
994
 
1012
995
  # ---- Comments ----
1013
996
  if comment is None and schema_metadata:
@@ -1051,7 +1034,7 @@ FROM parquet.`{temp_volume_path}`"""
1051
1034
  create_kw = "CREATE TABLE IF NOT EXISTS"
1052
1035
 
1053
1036
  sql_parts: list[str] = [
1054
- f"{create_kw} {table_fqn} (",
1037
+ f"{create_kw} {full_name} (",
1055
1038
  " " + ",\n ".join(column_definitions),
1056
1039
  ")",
1057
1040
  f"USING {using}",
@@ -1096,8 +1079,8 @@ FROM parquet.`{temp_volume_path}`"""
1096
1079
  pass
1097
1080
 
1098
1081
  # Let caller override anything (final say)
1099
- if tblproperties:
1100
- props.update(tblproperties)
1082
+ if properties:
1083
+ props.update(properties)
1101
1084
 
1102
1085
  if any_invalid and column_mapping_mode == "none":
1103
1086
  warnings.append(
@@ -1105,6 +1088,11 @@ FROM parquet.`{temp_volume_path}`"""
1105
1088
  "This will fail unless you rename/escape columns."
1106
1089
  )
1107
1090
 
1091
+ default_tags = self.workspace.default_tags()
1092
+
1093
+ for k, v in default_tags.items():
1094
+ props[f"tags.{k}"] = v
1095
+
1108
1096
  if props:
1109
1097
  def fmt(k: str, v: Any) -> str:
1110
1098
  if isinstance(v, str):
@@ -1122,7 +1110,7 @@ FROM parquet.`{temp_volume_path}`"""
1122
1110
  if not execute:
1123
1111
  return plan if return_plan else statement
1124
1112
 
1125
- res = self.execute(statement, wait_result=wait_result)
1113
+ res = self.execute(statement, wait=wait_result)
1126
1114
  plan.result = res
1127
1115
  return plan if return_plan else res
1128
1116