ygg 0.1.57__py3-none-any.whl → 0.1.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/METADATA +2 -2
  2. ygg-0.1.64.dist-info/RECORD +74 -0
  3. yggdrasil/ai/__init__.py +2 -0
  4. yggdrasil/ai/session.py +87 -0
  5. yggdrasil/ai/sql_session.py +310 -0
  6. yggdrasil/databricks/__init__.py +0 -3
  7. yggdrasil/databricks/compute/cluster.py +68 -113
  8. yggdrasil/databricks/compute/command_execution.py +674 -0
  9. yggdrasil/databricks/compute/exceptions.py +19 -0
  10. yggdrasil/databricks/compute/execution_context.py +491 -282
  11. yggdrasil/databricks/compute/remote.py +4 -14
  12. yggdrasil/databricks/exceptions.py +10 -0
  13. yggdrasil/databricks/sql/__init__.py +0 -4
  14. yggdrasil/databricks/sql/engine.py +178 -178
  15. yggdrasil/databricks/sql/exceptions.py +9 -1
  16. yggdrasil/databricks/sql/statement_result.py +108 -120
  17. yggdrasil/databricks/sql/warehouse.py +339 -92
  18. yggdrasil/databricks/workspaces/io.py +185 -40
  19. yggdrasil/databricks/workspaces/path.py +114 -100
  20. yggdrasil/databricks/workspaces/workspace.py +210 -61
  21. yggdrasil/exceptions.py +7 -0
  22. yggdrasil/libs/databrickslib.py +22 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -2
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/modules.py +6 -7
  30. yggdrasil/pyutils/python_env.py +16 -21
  31. yggdrasil/pyutils/waiting_config.py +171 -0
  32. yggdrasil/requests/msal.py +9 -96
  33. yggdrasil/types/cast/arrow_cast.py +3 -0
  34. yggdrasil/types/cast/pandas_cast.py +157 -169
  35. yggdrasil/types/cast/polars_cast.py +11 -43
  36. yggdrasil/types/dummy_class.py +81 -0
  37. yggdrasil/types/file_format.py +6 -2
  38. yggdrasil/types/python_defaults.py +92 -76
  39. yggdrasil/version.py +1 -1
  40. ygg-0.1.57.dist-info/RECORD +0 -66
  41. yggdrasil/databricks/ai/loki.py +0 -53
  42. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/WHEEL +0 -0
  43. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/entry_points.txt +0 -0
  44. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/licenses/LICENSE +0 -0
  45. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/top_level.txt +0 -0
  46. /yggdrasil/{databricks/ai/__init__.py → pyutils/mimetypes.py} +0 -0
@@ -16,17 +16,20 @@ import logging
16
16
  import random
17
17
  import string
18
18
  import time
19
- from typing import Optional, Union, Any, Dict, List, Literal
19
+ from threading import Thread
20
+ from typing import Optional, Union, Any, Dict, List, Literal, TYPE_CHECKING
20
21
 
21
22
  import pyarrow as pa
23
+ import pyarrow.dataset as pds
22
24
 
23
25
  from .statement_result import StatementResult
24
26
  from .types import column_info_to_arrow_field
25
- from .. import DatabricksPathKind, DatabricksPath
26
- from ..workspaces import WorkspaceService
27
- from ...libs.databrickslib import databricks_sdk
27
+ from .warehouse import SQLWarehouse
28
+ from ..workspaces import WorkspaceService, DatabricksPath
29
+ from ...libs.databrickslib import databricks_sdk, DatabricksDummyClass
28
30
  from ...libs.sparklib import SparkSession, SparkDataFrame, pyspark
29
- from ...types import is_arrow_type_string_like, is_arrow_type_binary_like
31
+ from ...pyutils.waiting_config import WaitingConfigArg
32
+ from ...types import is_arrow_type_string_like, is_arrow_type_binary_like, cast_arrow_tabular
30
33
  from ...types.cast.cast_options import CastOptions
31
34
  from ...types.cast.registry import convert
32
35
  from ...types.cast.spark_cast import cast_spark_dataframe
@@ -43,13 +46,14 @@ except ImportError:
43
46
 
44
47
  if databricks_sdk is not None:
45
48
  from databricks.sdk.service.sql import (
46
- StatementResponse, Disposition, Format,
49
+ Disposition, Format,
47
50
  ExecuteStatementRequestOnWaitTimeout, StatementParameterListItem
48
51
  )
49
- StatementResponse = StatementResponse
50
52
  else:
51
- class StatementResponse: # pragma: no cover
52
- pass
53
+ Disposition = DatabricksDummyClass
54
+ Format = DatabricksDummyClass
55
+ ExecuteStatementRequestOnWaitTimeout = DatabricksDummyClass
56
+ StatementParameterListItem = DatabricksDummyClass
53
57
 
54
58
 
55
59
  logger = logging.getLogger(__name__)
@@ -57,7 +61,15 @@ logger = logging.getLogger(__name__)
57
61
  if pyspark is not None:
58
62
  import pyspark.sql.functions as F
59
63
 
60
- __all__ = ["SQLEngine", "StatementResult"]
64
+
65
+ if TYPE_CHECKING:
66
+ from ...ai.sql_session import SQLAISession, SQLFlavor
67
+
68
+
69
+ __all__ = [
70
+ "SQLEngine",
71
+ "StatementResult"
72
+ ]
61
73
 
62
74
 
63
75
  @dataclasses.dataclass
@@ -88,10 +100,12 @@ def _needs_column_mapping(col_name: str) -> bool:
88
100
  @dataclasses.dataclass
89
101
  class SQLEngine(WorkspaceService):
90
102
  """Execute SQL statements and manage tables via Databricks SQL / Spark."""
91
- warehouse_id: Optional[str] = None
92
103
  catalog_name: Optional[str] = None
93
104
  schema_name: Optional[str] = None
94
105
 
106
+ _warehouse: Optional[SQLWarehouse] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
107
+ _ai_session: Optional["SQLAISession"] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
108
+
95
109
  def table_full_name(
96
110
  self,
97
111
  catalog_name: Optional[str] = None,
@@ -147,82 +161,59 @@ class SQLEngine(WorkspaceService):
147
161
  return self.catalog_name, parts[0], parts[1]
148
162
 
149
163
  catalog_name, schema_name, table_name = parts[-3], parts[-2], parts[-1]
150
- catalog_name = catalog_name or self.catalog_name
151
- schema_name = schema_name or self.schema_name
152
- return catalog_name, schema_name, table_name
153
164
 
154
- def _default_warehouse(
155
- self,
156
- cluster_size: str = "Small"
157
- ):
158
- """Pick a default SQL warehouse (best-effort) matching the desired size.
159
-
160
- Args:
161
- cluster_size: Desired warehouse size (Databricks "cluster_size"), e.g. "Small".
162
- If empty/None, returns the first warehouse encountered.
163
-
164
- Returns:
165
- Warehouse object.
165
+ return catalog_name or self.catalog_name, schema_name or self.schema_name, table_name
166
166
 
167
- Raises:
168
- ValueError: If no warehouses exist in the workspace.
169
- """
170
- wk = self.workspace.sdk()
171
- existing = list(wk.warehouses.list())
172
- first = None
173
-
174
- for warehouse in existing:
175
- if first is None:
176
- first = warehouse
177
-
178
- if cluster_size:
179
- if getattr(warehouse, "cluster_size", None) == cluster_size:
180
- logger.debug("Default warehouse match found: id=%s cluster_size=%s", warehouse.id, warehouse.cluster_size)
181
- return warehouse
182
- else:
183
- logger.debug("Default warehouse selected (first): id=%s", warehouse.id)
184
- return warehouse
167
+ @staticmethod
168
+ def _random_suffix(prefix: str = "") -> str:
169
+ """Generate a unique suffix for temporary resources."""
170
+ unique = "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(8))
171
+ timestamp = int(time.time() * 1000)
172
+ return f"{prefix}{timestamp}_{unique}"
185
173
 
186
- if first is not None:
187
- logger.info(
188
- "No warehouse matched cluster_size=%s; falling back to first warehouse id=%s cluster_size=%s",
189
- cluster_size,
190
- getattr(first, "id", None),
191
- getattr(first, "cluster_size", None),
174
+ def warehouse(
175
+ self,
176
+ warehouse_id: Optional[str] = None,
177
+ warehouse_name: Optional[str] = None,
178
+ ) -> SQLWarehouse:
179
+ if self._warehouse is None:
180
+ wh = SQLWarehouse(
181
+ workspace=self.workspace,
182
+ warehouse_id=warehouse_id,
183
+ warehouse_name=warehouse_name
192
184
  )
193
- return first
194
-
195
- raise ValueError(f"No default warehouse found in {wk.config.host}")
196
185
 
197
- def _get_or_default_warehouse_id(self, cluster_size: str = "Small") -> str:
198
- """Return configured warehouse_id or resolve a default one.
186
+ self._warehouse = wh.find_warehouse(
187
+ warehouse_id=warehouse_id,
188
+ warehouse_name=warehouse_name,
189
+ raise_error=False
190
+ )
199
191
 
200
- Args:
201
- cluster_size: Desired warehouse size filter used when resolving defaults.
192
+ if self._warehouse is None:
193
+ self._warehouse = wh.create_or_update()
202
194
 
203
- Returns:
204
- Warehouse id string.
205
- """
206
- if not self.warehouse_id:
207
- dft = self._default_warehouse(cluster_size=cluster_size)
208
- self.warehouse_id = dft.id
209
- logger.info("Resolved default warehouse_id=%s (cluster_size=%s)", self.warehouse_id, cluster_size)
195
+ return self._warehouse.find_warehouse(
196
+ warehouse_id=warehouse_id,
197
+ warehouse_name=warehouse_name,
198
+ raise_error=True
199
+ )
210
200
 
211
- return self.warehouse_id
201
+ def ai_session(
202
+ self,
203
+ model: str = "databricks-gemini-2-5-pro",
204
+ flavor: Optional["SQLFlavor"] = None
205
+ ):
206
+ from ...ai.sql_session import SQLAISession, SQLFlavor
212
207
 
213
- @staticmethod
214
- def _random_suffix(prefix: str = "") -> str:
215
- """Generate a unique suffix for temporary resources."""
216
- unique = "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(8))
217
- timestamp = int(time.time() * 1000)
218
- return f"{prefix}{timestamp}_{unique}"
208
+ if flavor is None:
209
+ flavor = SQLFlavor.DATABRICKS
219
210
 
220
- @staticmethod
221
- def _sql_preview(sql: str, limit: int = 220) -> str:
222
- """Short, single-line preview for logs (avoids spewing giant SQL)."""
223
- if not sql:
224
- return ""
225
- return sql[:limit] + ("…" if len(sql) > limit else "")
211
+ return SQLAISession(
212
+ model=model,
213
+ api_key=self.workspace.current_token(),
214
+ base_url="%s/serving-endpoints" % self.workspace.safe_host,
215
+ flavor=flavor
216
+ )
226
217
 
227
218
  def execute(
228
219
  self,
@@ -230,18 +221,18 @@ class SQLEngine(WorkspaceService):
230
221
  *,
231
222
  engine: Optional[Literal["spark", "api"]] = None,
232
223
  warehouse_id: Optional[str] = None,
224
+ warehouse_name: Optional[str] = None,
233
225
  byte_limit: Optional[int] = None,
234
- disposition: Optional["Disposition"] = None,
235
- format: Optional["Format"] = None,
236
- on_wait_timeout: Optional["ExecuteStatementRequestOnWaitTimeout"] = None,
237
- parameters: Optional[List["StatementParameterListItem"]] = None,
226
+ disposition: Optional[Disposition] = None,
227
+ format: Optional[Format] = None,
228
+ on_wait_timeout: Optional[ExecuteStatementRequestOnWaitTimeout] = None,
229
+ parameters: Optional[List[StatementParameterListItem]] = None,
238
230
  row_limit: Optional[int] = None,
239
231
  wait_timeout: Optional[str] = None,
240
232
  catalog_name: Optional[str] = None,
241
233
  schema_name: Optional[str] = None,
242
- table_name: Optional[str] = None,
243
- wait_result: bool = True,
244
- ) -> "StatementResult":
234
+ wait: Optional[WaitingConfigArg] = True
235
+ ) -> StatementResult:
245
236
  """Execute a SQL statement via Spark or Databricks SQL Statement Execution API.
246
237
 
247
238
  Engine resolution:
@@ -256,6 +247,7 @@ class SQLEngine(WorkspaceService):
256
247
  statement: SQL statement to execute. If None, a `SELECT *` is generated from the table params.
257
248
  engine: "spark" or "api".
258
249
  warehouse_id: Warehouse override (for API engine).
250
+ warehouse_name: Warehouse name override (for API engine).
259
251
  byte_limit: Optional byte limit for results.
260
252
  disposition: Result disposition mode (API engine).
261
253
  format: Result format (API engine).
@@ -265,8 +257,7 @@ class SQLEngine(WorkspaceService):
265
257
  wait_timeout: API wait timeout value.
266
258
  catalog_name: Optional catalog override for API engine.
267
259
  schema_name: Optional schema override for API engine.
268
- table_name: Optional table override used when `statement` is None.
269
- wait_result: Whether to block until completion (API engine).
260
+ wait: Whether to block until completion (API engine).
270
261
 
271
262
  Returns:
272
263
  StatementResult.
@@ -284,72 +275,44 @@ class SQLEngine(WorkspaceService):
284
275
  if spark_session is None:
285
276
  raise ValueError("No spark session found to run sql query")
286
277
 
287
- df: SparkDataFrame = spark_session.sql(statement)
288
-
289
- if row_limit:
290
- df = df.limit(row_limit)
291
-
292
278
  logger.debug(
293
- "SPARK SQL executed query:\n%s",
279
+ "SPARK SQL executing query:\n%s",
294
280
  statement
295
281
  )
296
282
 
297
- # Avoid Disposition dependency if SDK imports are absent
298
- spark_disp = disposition if disposition is not None else getattr(globals().get("Disposition", object), "EXTERNAL_LINKS", None)
283
+ df: SparkDataFrame = spark_session.sql(statement)
284
+
285
+ if row_limit:
286
+ df = df.limit(row_limit)
299
287
 
300
288
  return StatementResult(
301
- engine=self,
302
- statement_id="sparksql",
303
- disposition=spark_disp,
289
+ workspace_client=self.workspace.sdk(),
290
+ warehouse_id="SparkSQL",
291
+ statement_id="SparkSQL",
292
+ disposition=Disposition.EXTERNAL_LINKS,
304
293
  _spark_df=df,
305
294
  )
306
295
 
307
- # --- API path defaults ---
308
- if format is None:
309
- format = Format.ARROW_STREAM
310
-
311
- if (disposition is None or disposition == Disposition.INLINE) and format in [Format.CSV, Format.ARROW_STREAM]:
312
- disposition = Disposition.EXTERNAL_LINKS
313
-
314
- if not statement:
315
- full_name = self.table_full_name(catalog_name=catalog_name, schema_name=schema_name, table_name=table_name)
316
- statement = f"SELECT * FROM {full_name}"
317
-
318
- if not warehouse_id:
319
- warehouse_id = self._get_or_default_warehouse_id()
296
+ wh = self.warehouse(
297
+ warehouse_id=warehouse_id,
298
+ warehouse_name=warehouse_name,
299
+ )
320
300
 
321
- response = self.workspace.sdk().statement_execution.execute_statement(
301
+ return wh.execute(
322
302
  statement=statement,
323
303
  warehouse_id=warehouse_id,
304
+ warehouse_name=warehouse_name,
324
305
  byte_limit=byte_limit,
325
306
  disposition=disposition,
326
307
  format=format,
327
308
  on_wait_timeout=on_wait_timeout,
328
309
  parameters=parameters,
329
- row_limit=row_limit,
330
310
  wait_timeout=wait_timeout,
331
- catalog=catalog_name or self.catalog_name,
332
- schema=schema_name or self.schema_name,
333
- )
334
-
335
- execution = StatementResult(
336
- engine=self,
337
- statement_id=response.statement_id,
338
- _response=response,
339
- disposition=disposition,
340
- )
341
-
342
- logger.info(
343
- "API SQL executed statement '%s'",
344
- execution.statement_id
345
- )
346
- logger.debug(
347
- "API SQL executed query:\n%s",
348
- statement
311
+ catalog_name=catalog_name,
312
+ schema_name=schema_name,
313
+ wait=wait
349
314
  )
350
315
 
351
- return execution.wait() if wait_result else execution
352
-
353
316
  def spark_table(
354
317
  self,
355
318
  full_name: Optional[str] = None,
@@ -412,7 +375,7 @@ class SQLEngine(WorkspaceService):
412
375
  None (mutates the destination table).
413
376
  """
414
377
 
415
- if pyspark is not None:
378
+ if pyspark is not None or spark_session is not None:
416
379
  spark_session = SparkSession.getActiveSession() if spark_session is None else spark_session
417
380
 
418
381
  if spark_session is not None or isinstance(data, SparkDataFrame):
@@ -502,6 +465,7 @@ class SQLEngine(WorkspaceService):
502
465
  if existing_schema is None:
503
466
  try:
504
467
  existing_schema = connected.get_table_schema(
468
+ location=location,
505
469
  catalog_name=catalog_name,
506
470
  schema_name=schema_name,
507
471
  table_name=table_name,
@@ -511,8 +475,7 @@ class SQLEngine(WorkspaceService):
511
475
  data_tbl = convert(data, pa.Table)
512
476
  existing_schema = data_tbl.schema
513
477
  logger.warning(
514
- "Table %s not found (%s). Creating it from input schema (columns=%s)",
515
- location,
478
+ "%s, creating it from input schema (columns=%s)",
516
479
  exc,
517
480
  existing_schema.names,
518
481
  )
@@ -544,17 +507,18 @@ class SQLEngine(WorkspaceService):
544
507
  except Exception:
545
508
  logger.exception("Arrow insert failed after auto-creating %s; attempting cleanup (DROP TABLE)", location)
546
509
  try:
547
- connected.drop_table(location=location)
510
+ connected.drop_table(location=location, wait=True)
548
511
  except Exception:
549
512
  logger.exception("Failed to drop table %s after auto creation error", location)
550
513
  raise
551
514
 
552
- transaction_id = self._random_suffix()
515
+ cast_options = CastOptions.check_arg(options=cast_options, target_field=existing_schema)
516
+
517
+ if isinstance(data, (pa.Table, pa.RecordBatch)):
518
+ data_tbl = cast_arrow_tabular(data, options=cast_options)
519
+ else:
520
+ data_tbl = convert(data, pa.Table, options=cast_options)
553
521
 
554
- data_tbl = convert(
555
- data, pa.Table,
556
- options=cast_options, target_field=existing_schema
557
- )
558
522
  num_rows = data_tbl.num_rows
559
523
 
560
524
  logger.debug(
@@ -567,14 +531,16 @@ class SQLEngine(WorkspaceService):
567
531
  )
568
532
 
569
533
  # Write in temp volume
570
- temp_volume_path = connected.dbfs_path(
571
- kind=DatabricksPathKind.VOLUME,
572
- parts=[catalog_name, schema_name, "tmp", "sql", transaction_id],
534
+ temp_volume_path = self.workspace.tmp_path(
535
+ catalog_name=catalog_name,
536
+ schema_name=schema_name,
537
+ volume_name="tmp",
538
+ extension="parquet",
539
+ max_lifetime=3600,
573
540
  ) if temp_volume_path is None else DatabricksPath.parse(obj=temp_volume_path, workspace=connected.workspace)
574
541
 
575
542
  logger.debug("Staging Parquet to temp volume: %s", temp_volume_path)
576
- temp_volume_path.mkdir()
577
- temp_volume_path.write_arrow_table(data_tbl)
543
+ temp_volume_path.write_arrow_table(data_tbl, file_format=pds.ParquetFileFormat())
578
544
 
579
545
  columns = list(existing_schema.names)
580
546
  cols_quoted = ", ".join([f"`{c}`" for c in columns])
@@ -620,7 +586,12 @@ FROM parquet.`{temp_volume_path}`"""
620
586
  connected.execute(stmt.strip())
621
587
  finally:
622
588
  try:
623
- temp_volume_path.rmdir(recursive=True)
589
+ Thread(
590
+ target=temp_volume_path.remove,
591
+ kwargs={
592
+ "recursive": True
593
+ }
594
+ ).start()
624
595
  except Exception:
625
596
  logger.exception("Failed cleaning temp volume: %s", temp_volume_path)
626
597
 
@@ -732,8 +703,6 @@ FROM parquet.`{temp_volume_path}`"""
732
703
  cast_options = CastOptions.check_arg(options=cast_options, target_field=existing_schema)
733
704
  data = cast_spark_dataframe(data, options=cast_options)
734
705
 
735
- logger.debug("Incoming Spark columns: %s", data.columns)
736
-
737
706
  if match_by:
738
707
  notnull = None
739
708
  for k in match_by:
@@ -788,6 +757,7 @@ FROM parquet.`{temp_volume_path}`"""
788
757
 
789
758
  def get_table_schema(
790
759
  self,
760
+ location: Optional[str] = None,
791
761
  catalog_name: Optional[str] = None,
792
762
  schema_name: Optional[str] = None,
793
763
  table_name: Optional[str] = None,
@@ -796,6 +766,7 @@ FROM parquet.`{temp_volume_path}`"""
796
766
  """Fetch a table schema from Unity Catalog and convert it to Arrow types.
797
767
 
798
768
  Args:
769
+ location: Optional Fully qualified location name
799
770
  catalog_name: Optional catalog override.
800
771
  schema_name: Optional schema override.
801
772
  table_name: Optional table name override.
@@ -804,25 +775,44 @@ FROM parquet.`{temp_volume_path}`"""
804
775
  Returns:
805
776
  Arrow Schema or a STRUCT Field representing the table.
806
777
  """
807
- full_name = self.table_full_name(
778
+ location, catalog_name, schema_name, table_name = self._check_location_params(
779
+ location=location,
808
780
  catalog_name=catalog_name,
809
781
  schema_name=schema_name,
810
782
  table_name=table_name,
811
783
  safe_chars=False,
812
784
  )
813
785
 
814
- wk = self.workspace.sdk()
786
+ client = self.workspace.sdk().tables
815
787
 
816
788
  try:
817
- table = wk.tables.get(full_name)
789
+ table = client.get(location)
818
790
  except Exception as e:
819
- raise ValueError(f"Table %s not found, {type(e)} {e}" % full_name)
791
+ raise ValueError(f"Table %s not found, {type(e)} {e}" % location)
792
+
793
+ fields = [
794
+ column_info_to_arrow_field(_) for _ in table.columns
795
+ ]
820
796
 
821
- fields = [column_info_to_arrow_field(_) for _ in table.columns]
797
+ metadata = {
798
+ b"engine": b"databricks",
799
+ b"full_name": location,
800
+ b"catalog_name": catalog_name,
801
+ b"schema_name": schema_name,
802
+ b"table_name": table_name,
803
+ }
822
804
 
823
805
  if to_arrow_schema:
824
- return pa.schema(fields, metadata={b"name": table_name})
825
- return pa.field(table.name, pa.struct(fields))
806
+ return pa.schema(
807
+ fields,
808
+ metadata=metadata
809
+ )
810
+
811
+ return pa.field(
812
+ location,
813
+ pa.struct(fields),
814
+ metadata=metadata
815
+ )
826
816
 
827
817
  def drop_table(
828
818
  self,
@@ -830,6 +820,7 @@ FROM parquet.`{temp_volume_path}`"""
830
820
  catalog_name: Optional[str] = None,
831
821
  schema_name: Optional[str] = None,
832
822
  table_name: Optional[str] = None,
823
+ wait: Optional[WaitingConfigArg] = True
833
824
  ):
834
825
  """Drop a table if it exists."""
835
826
  location, _, _, _ = self._check_location_params(
@@ -839,13 +830,17 @@ FROM parquet.`{temp_volume_path}`"""
839
830
  table_name=table_name,
840
831
  safe_chars=True,
841
832
  )
842
- logger.info("Dropping table if exists: %s", location)
843
- return self.execute(f"DROP TABLE IF EXISTS {location}")
833
+
834
+ logger.debug("Dropping table if exists: %s", location)
835
+
836
+ self.execute(f"DROP TABLE IF EXISTS {location}", wait=wait)
837
+
838
+ logger.info("Dropped table if exists: %s", location)
844
839
 
845
840
  def create_table(
846
841
  self,
847
842
  field: Union[pa.Field, pa.Schema],
848
- table_fqn: Optional[str] = None, # e.g. catalog.schema.table
843
+ full_name: Optional[str] = None, # e.g. catalog.schema.table
849
844
  catalog_name: Optional[str] = None,
850
845
  schema_name: Optional[str] = None,
851
846
  table_name: Optional[str] = None,
@@ -853,7 +848,7 @@ FROM parquet.`{temp_volume_path}`"""
853
848
  partition_by: Optional[list[str]] = None,
854
849
  cluster_by: Optional[bool | list[str]] = True,
855
850
  comment: Optional[str] = None,
856
- tblproperties: Optional[dict[str, Any]] = None,
851
+ properties: Optional[dict[str, Any]] = None,
857
852
  if_not_exists: bool = True,
858
853
  or_replace: bool = False,
859
854
  using: str = "DELTA",
@@ -886,7 +881,7 @@ FROM parquet.`{temp_volume_path}`"""
886
881
  - If `pa.Schema`, all schema fields are used as columns.
887
882
  - If `pa.Field` with struct type, its children become columns.
888
883
  - If `pa.Field` non-struct, it becomes a single-column table.
889
- table_fqn:
884
+ full_name:
890
885
  Fully-qualified table name, e.g. `"catalog.schema.table"`.
891
886
  If provided, it takes precedence over `catalog_name`/`schema_name`/`table_name`.
892
887
  Parts are quoted as needed.
@@ -906,7 +901,7 @@ FROM parquet.`{temp_volume_path}`"""
906
901
  - list[str] -> emits `CLUSTER BY (<cols...>)` (all cols must exist in schema)
907
902
  comment:
908
903
  Optional table comment. If not provided and Arrow metadata contains `b"comment"`, that is used.
909
- tblproperties:
904
+ properties:
910
905
  Additional/override Delta table properties (final say).
911
906
  Example: `{"delta.enableChangeDataFeed": "true"}` or `{"delta.logRetentionDuration": "30 days"}`
912
907
  if_not_exists:
@@ -973,19 +968,22 @@ FROM parquet.`{temp_volume_path}`"""
973
968
  Examples
974
969
  --------
975
970
  Create a managed Delta table with auto clustering and auto column mapping:
976
- >>> plan = client.create_table(schema, table_fqn="main.analytics.events", execute=False, return_plan=True)
971
+ >>> plan = client.create_table(schema, full_name="main.analytics.events", execute=False, return_plan=True)
977
972
  >>> print(plan.sql)
978
973
 
979
974
  External table with explicit partitioning and CDF:
980
975
  >>> client.create_table(
981
976
  ... schema,
982
- ... table_fqn="main.analytics.events",
977
+ ... full_name="main.analytics.events",
983
978
  ... storage_location="abfss://.../events",
984
979
  ... partition_by=["event_date"],
985
980
  ... enable_cdf=True,
986
981
  ... )
987
982
  """
988
983
 
984
+ if not isinstance(field, (pa.Field, pa.Schema)):
985
+ field = convert(field, pa.Field)
986
+
989
987
  # ---- Normalize Arrow input ----
990
988
  if isinstance(field, pa.Schema):
991
989
  arrow_fields = list(field)
@@ -998,16 +996,13 @@ FROM parquet.`{temp_volume_path}`"""
998
996
  else:
999
997
  arrow_fields = [field]
1000
998
 
1001
- # ---- Resolve table FQN ----
1002
- # Prefer explicit table_fqn. Else build from catalog/schema/table_name.
1003
- if table_fqn is None:
1004
- if not (catalog_name and schema_name and table_name):
1005
- raise ValueError("Provide table_fqn or (catalog_name, schema_name, table_name).")
1006
- table_fqn = ".".join(map(_quote_ident, [catalog_name, schema_name, table_name]))
1007
- else:
1008
- # If caller passes raw "cat.schema.table", quote each part safely
1009
- parts = table_fqn.split(".")
1010
- table_fqn = ".".join(_quote_ident(p) for p in parts)
999
+ full_name, catalog_name, schema_name, table_name = self._check_location_params(
1000
+ location=full_name,
1001
+ catalog_name=catalog_name,
1002
+ schema_name=schema_name,
1003
+ table_name=table_name,
1004
+ safe_chars=True
1005
+ )
1011
1006
 
1012
1007
  # ---- Comments ----
1013
1008
  if comment is None and schema_metadata:
@@ -1051,7 +1046,7 @@ FROM parquet.`{temp_volume_path}`"""
1051
1046
  create_kw = "CREATE TABLE IF NOT EXISTS"
1052
1047
 
1053
1048
  sql_parts: list[str] = [
1054
- f"{create_kw} {table_fqn} (",
1049
+ f"{create_kw} {full_name} (",
1055
1050
  " " + ",\n ".join(column_definitions),
1056
1051
  ")",
1057
1052
  f"USING {using}",
@@ -1096,8 +1091,8 @@ FROM parquet.`{temp_volume_path}`"""
1096
1091
  pass
1097
1092
 
1098
1093
  # Let caller override anything (final say)
1099
- if tblproperties:
1100
- props.update(tblproperties)
1094
+ if properties:
1095
+ props.update(properties)
1101
1096
 
1102
1097
  if any_invalid and column_mapping_mode == "none":
1103
1098
  warnings.append(
@@ -1105,6 +1100,11 @@ FROM parquet.`{temp_volume_path}`"""
1105
1100
  "This will fail unless you rename/escape columns."
1106
1101
  )
1107
1102
 
1103
+ default_tags = self.workspace.default_tags()
1104
+
1105
+ for k, v in default_tags.items():
1106
+ props[f"tags.{k}"] = v
1107
+
1108
1108
  if props:
1109
1109
  def fmt(k: str, v: Any) -> str:
1110
1110
  if isinstance(v, str):
@@ -1122,7 +1122,7 @@ FROM parquet.`{temp_volume_path}`"""
1122
1122
  if not execute:
1123
1123
  return plan if return_plan else statement
1124
1124
 
1125
- res = self.execute(statement, wait_result=wait_result)
1125
+ res = self.execute(statement, wait=wait_result)
1126
1126
  plan.result = res
1127
1127
  return plan if return_plan else res
1128
1128