ygg 0.1.29__py3-none-any.whl → 0.1.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.29.dist-info → ygg-0.1.30.dist-info}/METADATA +1 -1
- {ygg-0.1.29.dist-info → ygg-0.1.30.dist-info}/RECORD +16 -16
- yggdrasil/databricks/compute/cluster.py +41 -21
- yggdrasil/databricks/compute/execution_context.py +9 -10
- yggdrasil/databricks/compute/remote.py +10 -6
- yggdrasil/databricks/sql/engine.py +4 -2
- yggdrasil/databricks/sql/statement_result.py +17 -2
- yggdrasil/databricks/workspaces/databricks_path.py +192 -283
- yggdrasil/databricks/workspaces/workspace.py +53 -416
- yggdrasil/pyutils/callable_serde.py +1 -0
- yggdrasil/pyutils/modules.py +1 -1
- yggdrasil/pyutils/python_env.py +81 -264
- {ygg-0.1.29.dist-info → ygg-0.1.30.dist-info}/WHEEL +0 -0
- {ygg-0.1.29.dist-info → ygg-0.1.30.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.29.dist-info → ygg-0.1.30.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.29.dist-info → ygg-0.1.30.dist-info}/top_level.txt +0 -0
|
@@ -1,29 +1,28 @@
|
|
|
1
|
-
import base64
|
|
2
1
|
import dataclasses
|
|
3
|
-
import io
|
|
4
2
|
import logging
|
|
5
3
|
import os
|
|
6
4
|
import posixpath
|
|
7
5
|
from abc import ABC
|
|
8
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
6
|
from dataclasses import dataclass
|
|
10
7
|
from pathlib import Path
|
|
11
8
|
from typing import (
|
|
12
9
|
Any,
|
|
13
10
|
BinaryIO,
|
|
14
11
|
Iterator,
|
|
15
|
-
List,
|
|
16
12
|
Optional,
|
|
17
|
-
Union
|
|
13
|
+
Union, TYPE_CHECKING, List
|
|
18
14
|
)
|
|
19
15
|
|
|
20
|
-
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from ..compute.cluster import Cluster
|
|
18
|
+
|
|
19
|
+
from .databricks_path import DatabricksPath, DatabricksPathKind
|
|
21
20
|
from ...libs.databrickslib import require_databricks_sdk, databricks_sdk
|
|
22
21
|
|
|
23
22
|
if databricks_sdk is not None:
|
|
24
23
|
from databricks.sdk import WorkspaceClient
|
|
25
24
|
from databricks.sdk.errors import ResourceDoesNotExist, NotFound
|
|
26
|
-
from databricks.sdk.service.workspace import
|
|
25
|
+
from databricks.sdk.service.workspace import ExportFormat, ObjectInfo
|
|
27
26
|
from databricks.sdk.service import catalog as catalog_svc
|
|
28
27
|
from databricks.sdk.dbutils import FileInfo
|
|
29
28
|
from databricks.sdk.service.files import DirectoryEntry
|
|
@@ -62,31 +61,8 @@ def _get_env_product_tag():
|
|
|
62
61
|
v = os.getenv("DATABRICKS_PRODUCT_TAG")
|
|
63
62
|
|
|
64
63
|
if not v:
|
|
65
|
-
return "default"
|
|
66
|
-
|
|
67
|
-
return v.strip().lower()
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def _get_remote_size(sdk, target_path: str) -> Optional[int]:
|
|
71
|
-
"""
|
|
72
|
-
Best-effort fetch remote file size for target_path across
|
|
73
|
-
DBFS, Volumes, and Workspace. Returns None if not found.
|
|
74
|
-
"""
|
|
75
|
-
try:
|
|
76
|
-
if target_path.startswith("dbfs:/"):
|
|
77
|
-
st = sdk.dbfs.get_status(target_path)
|
|
78
|
-
return getattr(st, "file_size", None)
|
|
79
|
-
|
|
80
|
-
if target_path.startswith("/Volumes"):
|
|
81
|
-
st = sdk.files.get_status(file_path=target_path)
|
|
82
|
-
return getattr(st, "file_size", None)
|
|
83
|
-
|
|
84
|
-
# Workspace path
|
|
85
|
-
st = sdk.workspace.get_status(target_path)
|
|
86
|
-
return getattr(st, "size", None)
|
|
87
|
-
|
|
88
|
-
except ResourceDoesNotExist:
|
|
89
64
|
return None
|
|
65
|
+
return v.strip().lower()
|
|
90
66
|
|
|
91
67
|
|
|
92
68
|
@dataclass
|
|
@@ -140,9 +116,7 @@ class Workspace:
|
|
|
140
116
|
state = self.__dict__.copy()
|
|
141
117
|
state.pop("_sdk", None)
|
|
142
118
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
state["_was_connected"] = was_connected
|
|
119
|
+
state["_was_connected"] = self._sdk is not None
|
|
146
120
|
state["_cached_token"] = self.current_token()
|
|
147
121
|
|
|
148
122
|
return state
|
|
@@ -169,8 +143,13 @@ class Workspace:
|
|
|
169
143
|
# -------------------------
|
|
170
144
|
# Clone
|
|
171
145
|
# -------------------------
|
|
172
|
-
def clone(
|
|
173
|
-
|
|
146
|
+
def clone(
|
|
147
|
+
self,
|
|
148
|
+
**kwargs
|
|
149
|
+
) -> "Workspace":
|
|
150
|
+
state = self.__getstate__()
|
|
151
|
+
state.update(kwargs)
|
|
152
|
+
return Workspace().__setstate__(state)
|
|
174
153
|
|
|
175
154
|
# -------------------------
|
|
176
155
|
# SDK connection
|
|
@@ -308,17 +287,30 @@ class Workspace:
|
|
|
308
287
|
# ------------------------------------------------------------------ #
|
|
309
288
|
# Path helpers
|
|
310
289
|
# ------------------------------------------------------------------ #
|
|
311
|
-
def
|
|
290
|
+
def dbfs_path(
|
|
291
|
+
self,
|
|
292
|
+
parts: Union[List[str], str],
|
|
293
|
+
kind: Optional[DatabricksPathKind] = None,
|
|
294
|
+
workspace: Optional["Workspace"] = None
|
|
295
|
+
):
|
|
296
|
+
workspace = self if workspace is None else workspace
|
|
297
|
+
|
|
298
|
+
if kind is None or isinstance(parts, str):
|
|
299
|
+
return DatabricksPath.parse(
|
|
300
|
+
parts=parts,
|
|
301
|
+
workspace=workspace
|
|
302
|
+
)
|
|
303
|
+
|
|
312
304
|
return DatabricksPath(
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
305
|
+
kind=kind,
|
|
306
|
+
parts=parts,
|
|
307
|
+
workspace=workspace
|
|
316
308
|
)
|
|
317
309
|
|
|
318
|
-
@staticmethod
|
|
319
310
|
def shared_cache_path(
|
|
311
|
+
self,
|
|
320
312
|
suffix: Optional[str] = None
|
|
321
|
-
) ->
|
|
313
|
+
) -> DatabricksPath:
|
|
322
314
|
"""
|
|
323
315
|
Shared cache base under Volumes for the current user.
|
|
324
316
|
"""
|
|
@@ -328,31 +320,7 @@ class Workspace:
|
|
|
328
320
|
return base
|
|
329
321
|
|
|
330
322
|
suffix = suffix.lstrip("/")
|
|
331
|
-
return f"{base}/{suffix}"
|
|
332
|
-
|
|
333
|
-
def temp_volume_folder(
|
|
334
|
-
self,
|
|
335
|
-
suffix: Optional[str] = None,
|
|
336
|
-
catalog_name: Optional[str] = None,
|
|
337
|
-
schema_name: Optional[str] = None,
|
|
338
|
-
volume_name: Optional[str] = None,
|
|
339
|
-
) -> str:
|
|
340
|
-
"""
|
|
341
|
-
Temporary folder either under a UC Volume or dbfs:/FileStore/.ygg/tmp/<user>.
|
|
342
|
-
"""
|
|
343
|
-
if volume_name:
|
|
344
|
-
catalog_name = catalog_name or os.getenv("DATABRICKS_CATALOG_NAME")
|
|
345
|
-
schema_name = schema_name or os.getenv("DATABRICKS_SCHEMA_NAME")
|
|
346
|
-
|
|
347
|
-
base = f"/Volumes/{catalog_name}/{schema_name}/{volume_name}"
|
|
348
|
-
else:
|
|
349
|
-
base = f"dbfs:/FileStore/.ygg/tmp/{self.current_user.user_name}"
|
|
350
|
-
|
|
351
|
-
if not suffix:
|
|
352
|
-
return base
|
|
353
|
-
|
|
354
|
-
suffix = suffix.lstrip("/")
|
|
355
|
-
return f"{base}/{suffix}"
|
|
323
|
+
return self.dbfs_path(f"{base}/{suffix}")
|
|
356
324
|
|
|
357
325
|
# ------------------------------------------------------------------ #
|
|
358
326
|
# SDK access / connection
|
|
@@ -415,346 +383,6 @@ class Workspace:
|
|
|
415
383
|
# 4) finally create the directory path itself
|
|
416
384
|
sdk.files.create_directory(target_path)
|
|
417
385
|
|
|
418
|
-
# ------------------------------------------------------------------ #
|
|
419
|
-
# Upload helpers
|
|
420
|
-
# ------------------------------------------------------------------ #
|
|
421
|
-
def upload_file_content(
|
|
422
|
-
self,
|
|
423
|
-
content: Union[bytes, BinaryIO],
|
|
424
|
-
target_path: str,
|
|
425
|
-
makedirs: bool = True,
|
|
426
|
-
overwrite: bool = True,
|
|
427
|
-
only_if_size_diff: bool = False,
|
|
428
|
-
parallel_pool: Optional[ThreadPoolExecutor] = None,
|
|
429
|
-
):
|
|
430
|
-
"""
|
|
431
|
-
Upload a single content blob into Databricks (Workspace / Volumes / DBFS).
|
|
432
|
-
|
|
433
|
-
content:
|
|
434
|
-
bytes or a binary file-like object.
|
|
435
|
-
|
|
436
|
-
target_path:
|
|
437
|
-
- "dbfs:/..." → DBFS via dbfs.put
|
|
438
|
-
- "/Volumes/..." → Unity Catalog Volumes via files.upload
|
|
439
|
-
- anything else → Workspace via workspace.upload
|
|
440
|
-
|
|
441
|
-
If parallel_pool is provided, this schedules the upload on the pool
|
|
442
|
-
and returns a Future. The underlying call is non-parallel (no nested pool).
|
|
443
|
-
|
|
444
|
-
If only_if_size_diff=True, it will:
|
|
445
|
-
- compute local content size (len(bytes))
|
|
446
|
-
- fetch remote size (best-effort)
|
|
447
|
-
- skip upload if sizes match.
|
|
448
|
-
"""
|
|
449
|
-
# If we're doing this in a pool, normalize content to bytes *before*
|
|
450
|
-
# submitting so we don't share a live file handle across threads.
|
|
451
|
-
if parallel_pool is not None:
|
|
452
|
-
if hasattr(content, "read"):
|
|
453
|
-
data = content.read()
|
|
454
|
-
else:
|
|
455
|
-
data = content
|
|
456
|
-
|
|
457
|
-
# use a cloned workspace so clients don't collide across threads
|
|
458
|
-
return parallel_pool.submit(
|
|
459
|
-
self.clone().upload_file_content,
|
|
460
|
-
content=data,
|
|
461
|
-
target_path=target_path,
|
|
462
|
-
makedirs=makedirs,
|
|
463
|
-
overwrite=overwrite,
|
|
464
|
-
only_if_size_diff=only_if_size_diff,
|
|
465
|
-
parallel_pool=None,
|
|
466
|
-
)
|
|
467
|
-
|
|
468
|
-
with self.connect() as connected:
|
|
469
|
-
sdk = connected.sdk()
|
|
470
|
-
|
|
471
|
-
# Normalize content to bytes once
|
|
472
|
-
if hasattr(content, "read"): # BinaryIO
|
|
473
|
-
data = content.read()
|
|
474
|
-
else:
|
|
475
|
-
data = content
|
|
476
|
-
|
|
477
|
-
if not isinstance(data, (bytes, bytearray)):
|
|
478
|
-
if isinstance(data, str):
|
|
479
|
-
data = data.encode()
|
|
480
|
-
else:
|
|
481
|
-
raise TypeError(
|
|
482
|
-
f"content must be bytes or BinaryIO, got {type(content)!r}"
|
|
483
|
-
)
|
|
484
|
-
|
|
485
|
-
data_bytes = bytes(data)
|
|
486
|
-
local_size = len(data_bytes)
|
|
487
|
-
|
|
488
|
-
# Only-if-size-diff: check remote size and bail early if equal
|
|
489
|
-
if only_if_size_diff:
|
|
490
|
-
remote_size = _get_remote_size(sdk, target_path)
|
|
491
|
-
if remote_size is not None and remote_size == local_size:
|
|
492
|
-
# Same size remotely -> skip upload
|
|
493
|
-
return None
|
|
494
|
-
|
|
495
|
-
# Ensure parent directory if requested
|
|
496
|
-
parent = os.path.dirname(target_path)
|
|
497
|
-
|
|
498
|
-
if target_path.startswith("dbfs:/"):
|
|
499
|
-
# --- DBFS path ---
|
|
500
|
-
if makedirs and parent and parent != "dbfs:/":
|
|
501
|
-
sdk.dbfs.mkdirs(parent)
|
|
502
|
-
|
|
503
|
-
data_str = base64.b64encode(data_bytes).decode("utf-8")
|
|
504
|
-
sdk.dbfs.put(
|
|
505
|
-
path=target_path,
|
|
506
|
-
contents=data_str,
|
|
507
|
-
overwrite=overwrite,
|
|
508
|
-
)
|
|
509
|
-
|
|
510
|
-
elif target_path.startswith("/Volumes"):
|
|
511
|
-
# --- Unity Catalog Volumes path ---
|
|
512
|
-
if makedirs and parent and parent != "/":
|
|
513
|
-
try:
|
|
514
|
-
sdk.files.create_directory(parent)
|
|
515
|
-
except NotFound:
|
|
516
|
-
connected.ensure_uc_volume_and_dir(parent)
|
|
517
|
-
|
|
518
|
-
sdk.files.upload(
|
|
519
|
-
file_path=target_path,
|
|
520
|
-
contents=io.BytesIO(data_bytes),
|
|
521
|
-
overwrite=overwrite,
|
|
522
|
-
)
|
|
523
|
-
|
|
524
|
-
else:
|
|
525
|
-
# --- Workspace Files / Notebooks ---
|
|
526
|
-
if makedirs and parent:
|
|
527
|
-
sdk.workspace.mkdirs(parent)
|
|
528
|
-
|
|
529
|
-
sdk.workspace.upload(
|
|
530
|
-
path=target_path,
|
|
531
|
-
format=ImportFormat.RAW,
|
|
532
|
-
content=data_bytes,
|
|
533
|
-
overwrite=overwrite,
|
|
534
|
-
)
|
|
535
|
-
|
|
536
|
-
def upload_local_path(
|
|
537
|
-
self,
|
|
538
|
-
local_path: str,
|
|
539
|
-
target_path: str,
|
|
540
|
-
makedirs: bool = True,
|
|
541
|
-
overwrite: bool = True,
|
|
542
|
-
only_if_size_diff: bool = False,
|
|
543
|
-
parallel_pool: Optional[ThreadPoolExecutor] = None,
|
|
544
|
-
):
|
|
545
|
-
if os.path.isfile(local_path):
|
|
546
|
-
return self.upload_local_file(
|
|
547
|
-
local_path=local_path,
|
|
548
|
-
target_path=target_path,
|
|
549
|
-
makedirs=makedirs,
|
|
550
|
-
overwrite=overwrite,
|
|
551
|
-
only_if_size_diff=only_if_size_diff,
|
|
552
|
-
parallel_pool=parallel_pool
|
|
553
|
-
)
|
|
554
|
-
else:
|
|
555
|
-
return self.upload_local_folder(
|
|
556
|
-
local_path=local_path,
|
|
557
|
-
target_path=target_path,
|
|
558
|
-
makedirs=makedirs,
|
|
559
|
-
only_if_size_diff=only_if_size_diff,
|
|
560
|
-
parallel_pool=parallel_pool
|
|
561
|
-
)
|
|
562
|
-
|
|
563
|
-
def upload_local_file(
|
|
564
|
-
self,
|
|
565
|
-
local_path: str,
|
|
566
|
-
target_path: str,
|
|
567
|
-
makedirs: bool = True,
|
|
568
|
-
overwrite: bool = True,
|
|
569
|
-
only_if_size_diff: bool = False,
|
|
570
|
-
parallel_pool: Optional[ThreadPoolExecutor] = None,
|
|
571
|
-
):
|
|
572
|
-
"""
|
|
573
|
-
Upload a single local file into Databricks.
|
|
574
|
-
|
|
575
|
-
If parallel_pool is provided, this schedules the upload on the pool
|
|
576
|
-
and returns a Future.
|
|
577
|
-
|
|
578
|
-
If only_if_size_diff=True, it will:
|
|
579
|
-
- For large files (>4 MiB), check remote file status
|
|
580
|
-
- Skip upload if remote size == local size
|
|
581
|
-
"""
|
|
582
|
-
if parallel_pool is not None:
|
|
583
|
-
# Submit a *non-parallel* variant into the pool
|
|
584
|
-
return parallel_pool.submit(
|
|
585
|
-
self.upload_local_file,
|
|
586
|
-
local_path=local_path,
|
|
587
|
-
target_path=target_path,
|
|
588
|
-
makedirs=makedirs,
|
|
589
|
-
overwrite=overwrite,
|
|
590
|
-
only_if_size_diff=only_if_size_diff,
|
|
591
|
-
parallel_pool=None,
|
|
592
|
-
)
|
|
593
|
-
|
|
594
|
-
sdk = self.sdk()
|
|
595
|
-
|
|
596
|
-
local_size = os.path.getsize(local_path)
|
|
597
|
-
large_threshold = 32 * 1024
|
|
598
|
-
|
|
599
|
-
if only_if_size_diff and local_size > large_threshold:
|
|
600
|
-
try:
|
|
601
|
-
info = sdk.workspace.get_status(path=target_path)
|
|
602
|
-
remote_size = getattr(info, "size", None)
|
|
603
|
-
|
|
604
|
-
if remote_size is not None and remote_size == local_size:
|
|
605
|
-
return
|
|
606
|
-
except ResourceDoesNotExist:
|
|
607
|
-
# Doesn't exist → upload below
|
|
608
|
-
pass
|
|
609
|
-
|
|
610
|
-
with open(local_path, "rb") as f:
|
|
611
|
-
content = f.read()
|
|
612
|
-
|
|
613
|
-
return self.upload_file_content(
|
|
614
|
-
content=content,
|
|
615
|
-
target_path=target_path,
|
|
616
|
-
makedirs=makedirs,
|
|
617
|
-
overwrite=overwrite,
|
|
618
|
-
only_if_size_diff=False,
|
|
619
|
-
parallel_pool=parallel_pool,
|
|
620
|
-
)
|
|
621
|
-
|
|
622
|
-
def upload_local_folder(
|
|
623
|
-
self,
|
|
624
|
-
local_path: str,
|
|
625
|
-
target_path: str,
|
|
626
|
-
makedirs: bool = True,
|
|
627
|
-
only_if_size_diff: bool = True,
|
|
628
|
-
exclude_dir_names: Optional[List[str]] = None,
|
|
629
|
-
exclude_hidden: bool = True,
|
|
630
|
-
parallel_pool: Optional[Union[ThreadPoolExecutor, int]] = None,
|
|
631
|
-
):
|
|
632
|
-
"""
|
|
633
|
-
Recursively upload a local folder into Databricks Workspace Files.
|
|
634
|
-
|
|
635
|
-
- Traverses subdirectories recursively.
|
|
636
|
-
- Optionally skips files that match size/mtime of remote entries.
|
|
637
|
-
- Can upload files in parallel using a ThreadPoolExecutor.
|
|
638
|
-
|
|
639
|
-
Args:
|
|
640
|
-
local_path: Local directory to upload from.
|
|
641
|
-
target_path: Workspace path to upload into.
|
|
642
|
-
makedirs: Create remote directories as needed.
|
|
643
|
-
only_if_size_diff: Skip upload if remote file exists with same size and newer mtime.
|
|
644
|
-
exclude_dir_names: Directory names to skip entirely.
|
|
645
|
-
exclude_hidden: Skip dot-prefixed files/directories.
|
|
646
|
-
parallel_pool: None | ThreadPoolExecutor | int (max_workers).
|
|
647
|
-
"""
|
|
648
|
-
sdk = self.sdk()
|
|
649
|
-
local_path = os.path.abspath(local_path)
|
|
650
|
-
exclude_dirs_set = set(exclude_dir_names or [])
|
|
651
|
-
|
|
652
|
-
try:
|
|
653
|
-
existing_objs = list(sdk.workspace.list(target_path))
|
|
654
|
-
except ResourceDoesNotExist:
|
|
655
|
-
existing_objs = []
|
|
656
|
-
|
|
657
|
-
# --- setup pool semantics ---
|
|
658
|
-
created_pool: Optional[ThreadPoolExecutor] = None
|
|
659
|
-
if isinstance(parallel_pool, int):
|
|
660
|
-
created_pool = ThreadPoolExecutor(max_workers=parallel_pool)
|
|
661
|
-
pool: Optional[ThreadPoolExecutor] = created_pool
|
|
662
|
-
elif isinstance(parallel_pool, ThreadPoolExecutor):
|
|
663
|
-
pool = parallel_pool
|
|
664
|
-
else:
|
|
665
|
-
pool = None
|
|
666
|
-
|
|
667
|
-
futures = []
|
|
668
|
-
|
|
669
|
-
def _upload_dir(local_root: str, remote_root: str, ensure_dir: bool):
|
|
670
|
-
# Ensure remote directory exists if requested
|
|
671
|
-
existing_remote_root_obj = [
|
|
672
|
-
_ for _ in existing_objs
|
|
673
|
-
if _.path.startswith(remote_root)
|
|
674
|
-
]
|
|
675
|
-
|
|
676
|
-
if ensure_dir and not existing_remote_root_obj:
|
|
677
|
-
sdk.workspace.mkdirs(remote_root)
|
|
678
|
-
|
|
679
|
-
try:
|
|
680
|
-
local_entries = list(os.scandir(local_root))
|
|
681
|
-
except FileNotFoundError:
|
|
682
|
-
return
|
|
683
|
-
|
|
684
|
-
local_files = []
|
|
685
|
-
local_dirs = []
|
|
686
|
-
|
|
687
|
-
for local_entry in local_entries:
|
|
688
|
-
# Skip hidden if requested
|
|
689
|
-
if exclude_hidden and local_entry.name.startswith("."):
|
|
690
|
-
continue
|
|
691
|
-
|
|
692
|
-
if local_entry.is_dir():
|
|
693
|
-
if local_entry.name in exclude_dirs_set:
|
|
694
|
-
continue
|
|
695
|
-
local_dirs.append(local_entry)
|
|
696
|
-
elif existing_objs:
|
|
697
|
-
found_same_remote = None
|
|
698
|
-
for exiting_obj in existing_objs:
|
|
699
|
-
existing_obj_name = os.path.basename(exiting_obj.path)
|
|
700
|
-
if existing_obj_name == local_entry.name:
|
|
701
|
-
found_same_remote = exiting_obj
|
|
702
|
-
break
|
|
703
|
-
|
|
704
|
-
if found_same_remote:
|
|
705
|
-
found_same_remote_epoch = found_same_remote.modified_at / 1000
|
|
706
|
-
local_stats = local_entry.stat()
|
|
707
|
-
|
|
708
|
-
if (
|
|
709
|
-
only_if_size_diff
|
|
710
|
-
and found_same_remote.size
|
|
711
|
-
and found_same_remote.size != local_stats.st_size
|
|
712
|
-
):
|
|
713
|
-
pass # size diff -> upload
|
|
714
|
-
elif local_stats.st_mtime < found_same_remote_epoch:
|
|
715
|
-
# remote is newer -> skip
|
|
716
|
-
continue
|
|
717
|
-
else:
|
|
718
|
-
local_files.append(local_entry)
|
|
719
|
-
else:
|
|
720
|
-
local_files.append(local_entry)
|
|
721
|
-
else:
|
|
722
|
-
local_files.append(local_entry)
|
|
723
|
-
|
|
724
|
-
# ---- upload files in this directory ----
|
|
725
|
-
for local_entry in local_files:
|
|
726
|
-
remote_path = posixpath.join(remote_root, local_entry.name)
|
|
727
|
-
|
|
728
|
-
entry_fut = self.upload_local_file(
|
|
729
|
-
local_path=local_entry.path,
|
|
730
|
-
target_path=remote_path,
|
|
731
|
-
makedirs=False,
|
|
732
|
-
overwrite=True,
|
|
733
|
-
only_if_size_diff=False,
|
|
734
|
-
parallel_pool=pool,
|
|
735
|
-
)
|
|
736
|
-
|
|
737
|
-
if pool is not None:
|
|
738
|
-
futures.append(entry_fut)
|
|
739
|
-
|
|
740
|
-
# ---- recurse into subdirectories ----
|
|
741
|
-
for local_entry in local_dirs:
|
|
742
|
-
_upload_dir(
|
|
743
|
-
local_entry.path,
|
|
744
|
-
posixpath.join(remote_root, local_entry.name),
|
|
745
|
-
ensure_dir=makedirs,
|
|
746
|
-
)
|
|
747
|
-
|
|
748
|
-
try:
|
|
749
|
-
_upload_dir(local_path, target_path, ensure_dir=makedirs)
|
|
750
|
-
|
|
751
|
-
if pool is not None:
|
|
752
|
-
for fut in as_completed(futures):
|
|
753
|
-
fut.result()
|
|
754
|
-
finally:
|
|
755
|
-
if created_pool is not None:
|
|
756
|
-
created_pool.shutdown(wait=True)
|
|
757
|
-
|
|
758
386
|
# ------------------------------------------------------------------ #
|
|
759
387
|
# List / open / delete / SQL
|
|
760
388
|
# ------------------------------------------------------------------ #
|
|
@@ -895,15 +523,15 @@ class Workspace:
|
|
|
895
523
|
**kwargs
|
|
896
524
|
)
|
|
897
525
|
|
|
898
|
-
def
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
526
|
+
def clusters(
|
|
527
|
+
self,
|
|
528
|
+
cluster_id: Optional[str] = None,
|
|
529
|
+
cluster_name: Optional[str] = None,
|
|
530
|
+
**kwargs
|
|
531
|
+
) -> "Cluster":
|
|
904
532
|
from ..compute.cluster import Cluster
|
|
905
533
|
|
|
906
|
-
return Cluster(workspace=self, **kwargs)
|
|
534
|
+
return Cluster(workspace=self, cluster_id=cluster_id, cluster_name=cluster_name, **kwargs)
|
|
907
535
|
|
|
908
536
|
|
|
909
537
|
# ---------------------------------------------------------------------------
|
|
@@ -935,8 +563,17 @@ class WorkspaceService(ABC):
|
|
|
935
563
|
self.workspace = self.workspace.connect()
|
|
936
564
|
return self
|
|
937
565
|
|
|
938
|
-
def
|
|
939
|
-
|
|
566
|
+
def dbfs_path(
|
|
567
|
+
self,
|
|
568
|
+
parts: Union[List[str], str],
|
|
569
|
+
kind: Optional[DatabricksPathKind] = None,
|
|
570
|
+
workspace: Optional["Workspace"] = None
|
|
571
|
+
):
|
|
572
|
+
return self.workspace.dbfs_path(
|
|
573
|
+
kind=kind,
|
|
574
|
+
parts=parts,
|
|
575
|
+
workspace=workspace
|
|
576
|
+
)
|
|
940
577
|
|
|
941
578
|
def sdk(self):
|
|
942
579
|
return self.workspace.sdk()
|
yggdrasil/pyutils/modules.py
CHANGED