ygg 0.1.48__tar.gz → 0.1.50__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.48 → ygg-0.1.50}/PKG-INFO +3 -1
- {ygg-0.1.48 → ygg-0.1.50}/pyproject.toml +15 -8
- {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/PKG-INFO +3 -1
- {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/requires.txt +2 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/compute/cluster.py +45 -19
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/compute/execution_context.py +19 -11
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/compute/remote.py +4 -1
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/statement_result.py +12 -5
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/io.py +80 -56
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/path.py +101 -50
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/workspace.py +45 -27
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/pandaslib.py +6 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/polarslib.py +5 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/python_env.py +7 -4
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/polars_cast.py +1 -0
- ygg-0.1.50/src/yggdrasil/version.py +1 -0
- ygg-0.1.48/src/yggdrasil/version.py +0 -1
- {ygg-0.1.48 → ygg-0.1.50}/LICENSE +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/README.md +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/setup.cfg +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/SOURCES.txt +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/dependency_links.txt +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/entry_points.txt +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/top_level.txt +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/compute/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/jobs/config.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/engine.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/types.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/warehouse.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/filesytem.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/path_kind.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/dataclasses/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/dataclasses/dataclass.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/databrickslib.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/extensions/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/sparklib.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/callable_serde.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/equality.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/exceptions.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/expiring_dict.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/modules.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/parallel.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/retry.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/requests/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/requests/msal.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/requests/session.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/__init__.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/cast_options.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/registry.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/spark_cast.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/python_arrow.py +0 -0
- {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/python_defaults.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.50
|
|
4
4
|
Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
|
|
5
5
|
Author: Yggdrasil contributors
|
|
6
6
|
License: Apache License
|
|
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
|
235
235
|
Requires-Dist: black; extra == "dev"
|
|
236
236
|
Requires-Dist: ruff; extra == "dev"
|
|
237
237
|
Requires-Dist: mypy; extra == "dev"
|
|
238
|
+
Requires-Dist: build; extra == "dev"
|
|
239
|
+
Requires-Dist: twine; extra == "dev"
|
|
238
240
|
Dynamic: license-file
|
|
239
241
|
|
|
240
242
|
# Yggdrasil (Python)
|
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
|
|
2
|
+
# bump setuptools so type-info files are handled sanely
|
|
3
|
+
requires = ["setuptools>=69", "wheel"]
|
|
3
4
|
build-backend = "setuptools.build_meta"
|
|
4
5
|
|
|
5
6
|
[project]
|
|
6
7
|
name = "ygg"
|
|
7
|
-
version = "0.1.
|
|
8
|
+
version = "0.1.50"
|
|
8
9
|
description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
|
|
9
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
11
|
license = { file = "LICENSE" }
|
|
11
12
|
requires-python = ">=3.10"
|
|
12
|
-
authors = [
|
|
13
|
-
{ name = "Yggdrasil contributors" },
|
|
14
|
-
]
|
|
13
|
+
authors = [{ name = "Yggdrasil contributors" }]
|
|
15
14
|
keywords = ["arrow", "polars", "pandas", "spark", "databricks", "typing", "dataclass", "serialization"]
|
|
16
15
|
classifiers = [
|
|
17
16
|
"Development Status :: 3 - Alpha",
|
|
@@ -42,6 +41,8 @@ dev = [
|
|
|
42
41
|
"black",
|
|
43
42
|
"ruff",
|
|
44
43
|
"mypy",
|
|
44
|
+
"build",
|
|
45
|
+
"twine",
|
|
45
46
|
]
|
|
46
47
|
|
|
47
48
|
[project.scripts]
|
|
@@ -55,9 +56,15 @@ Documentation = "https://github.com/Platob/Yggdrasil/tree/main/python/docs"
|
|
|
55
56
|
[tool.setuptools]
|
|
56
57
|
package-dir = { "" = "src" }
|
|
57
58
|
license-files = ["LICENSE"]
|
|
58
|
-
|
|
59
|
-
[tool.uv]
|
|
60
|
-
native-tls = true
|
|
59
|
+
include-package-data = true
|
|
61
60
|
|
|
62
61
|
[tool.setuptools.packages.find]
|
|
63
62
|
where = ["src"]
|
|
63
|
+
|
|
64
|
+
# If your import package is yggdrasil (seems likely from yggenv entrypoint),
|
|
65
|
+
# ship the PEP 561 marker file. Put `py.typed` inside src/yggdrasil/
|
|
66
|
+
[tool.setuptools.package-data]
|
|
67
|
+
yggdrasil = ["py.typed"]
|
|
68
|
+
|
|
69
|
+
[tool.uv]
|
|
70
|
+
native-tls = true
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.50
|
|
4
4
|
Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
|
|
5
5
|
Author: Yggdrasil contributors
|
|
6
6
|
License: Apache License
|
|
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
|
235
235
|
Requires-Dist: black; extra == "dev"
|
|
236
236
|
Requires-Dist: ruff; extra == "dev"
|
|
237
237
|
Requires-Dist: mypy; extra == "dev"
|
|
238
|
+
Requires-Dist: build; extra == "dev"
|
|
239
|
+
Requires-Dist: twine; extra == "dev"
|
|
238
240
|
Dynamic: license-file
|
|
239
241
|
|
|
240
242
|
# Yggdrasil (Python)
|
|
@@ -144,6 +144,7 @@ class Cluster(WorkspaceService):
|
|
|
144
144
|
single_user_name: Optional[str] = None,
|
|
145
145
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
146
146
|
libraries: Optional[list[str]] = None,
|
|
147
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
147
148
|
**kwargs
|
|
148
149
|
) -> "Cluster":
|
|
149
150
|
"""Create or reuse a cluster that mirrors the current Python environment.
|
|
@@ -152,9 +153,10 @@ class Cluster(WorkspaceService):
|
|
|
152
153
|
workspace: Workspace to use for the cluster.
|
|
153
154
|
cluster_id: Optional cluster id to reuse.
|
|
154
155
|
cluster_name: Optional cluster name to reuse.
|
|
155
|
-
single_user_name: Optional
|
|
156
|
+
single_user_name: Optional username for single-user clusters.
|
|
156
157
|
runtime_engine: Optional Databricks runtime engine.
|
|
157
158
|
libraries: Optional list of libraries to install.
|
|
159
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
158
160
|
**kwargs: Additional cluster specification overrides.
|
|
159
161
|
|
|
160
162
|
Returns:
|
|
@@ -176,6 +178,7 @@ class Cluster(WorkspaceService):
|
|
|
176
178
|
single_user_name=single_user_name,
|
|
177
179
|
runtime_engine=runtime_engine,
|
|
178
180
|
libraries=libraries,
|
|
181
|
+
update_timeout=update_timeout,
|
|
179
182
|
**kwargs
|
|
180
183
|
)
|
|
181
184
|
)
|
|
@@ -190,6 +193,7 @@ class Cluster(WorkspaceService):
|
|
|
190
193
|
single_user_name: Optional[str] = "current",
|
|
191
194
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
192
195
|
libraries: Optional[list[str]] = None,
|
|
196
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
193
197
|
**kwargs
|
|
194
198
|
) -> "Cluster":
|
|
195
199
|
"""Create/update a cluster to match the local Python environment.
|
|
@@ -198,9 +202,10 @@ class Cluster(WorkspaceService):
|
|
|
198
202
|
source: Optional PythonEnv to mirror (defaults to current).
|
|
199
203
|
cluster_id: Optional cluster id to update.
|
|
200
204
|
cluster_name: Optional cluster name to update.
|
|
201
|
-
single_user_name: Optional single
|
|
205
|
+
single_user_name: Optional single username for the cluster.
|
|
202
206
|
runtime_engine: Optional runtime engine selection.
|
|
203
207
|
libraries: Optional list of libraries to install.
|
|
208
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
204
209
|
**kwargs: Additional cluster specification overrides.
|
|
205
210
|
|
|
206
211
|
Returns:
|
|
@@ -242,6 +247,7 @@ class Cluster(WorkspaceService):
|
|
|
242
247
|
single_user_name=single_user_name,
|
|
243
248
|
runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
|
|
244
249
|
libraries=libraries,
|
|
250
|
+
update_timeout=update_timeout,
|
|
245
251
|
**kwargs
|
|
246
252
|
)
|
|
247
253
|
|
|
@@ -380,7 +386,9 @@ class Cluster(WorkspaceService):
|
|
|
380
386
|
start = time.time()
|
|
381
387
|
sleep_time = tick
|
|
382
388
|
|
|
383
|
-
if
|
|
389
|
+
if not timeout:
|
|
390
|
+
timeout = 20 * 60.0
|
|
391
|
+
elif isinstance(timeout, dt.timedelta):
|
|
384
392
|
timeout = timeout.total_seconds()
|
|
385
393
|
|
|
386
394
|
while self.is_pending:
|
|
@@ -412,12 +420,14 @@ class Cluster(WorkspaceService):
|
|
|
412
420
|
# Extract "major.minor" from strings like "17.3.x-scala2.13-ml-gpu"
|
|
413
421
|
v = self.spark_version
|
|
414
422
|
|
|
415
|
-
if v
|
|
423
|
+
if not v:
|
|
416
424
|
return None
|
|
417
425
|
|
|
418
426
|
parts = v.split(".")
|
|
427
|
+
|
|
419
428
|
if len(parts) < 2:
|
|
420
429
|
return None
|
|
430
|
+
|
|
421
431
|
return ".".join(parts[:2]) # e.g. "17.3"
|
|
422
432
|
|
|
423
433
|
@property
|
|
@@ -428,8 +438,10 @@ class Cluster(WorkspaceService):
|
|
|
428
438
|
When the runtime can't be mapped, returns ``None``.
|
|
429
439
|
"""
|
|
430
440
|
v = self.runtime_version
|
|
431
|
-
|
|
441
|
+
|
|
442
|
+
if not v:
|
|
432
443
|
return None
|
|
444
|
+
|
|
433
445
|
return _PYTHON_BY_DBR.get(v)
|
|
434
446
|
|
|
435
447
|
# ------------------------------------------------------------------ #
|
|
@@ -586,6 +598,7 @@ class Cluster(WorkspaceService):
|
|
|
586
598
|
cluster_id: Optional[str] = None,
|
|
587
599
|
cluster_name: Optional[str] = None,
|
|
588
600
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
601
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
589
602
|
**cluster_spec: Any
|
|
590
603
|
):
|
|
591
604
|
"""Create a new cluster or update an existing one.
|
|
@@ -594,6 +607,7 @@ class Cluster(WorkspaceService):
|
|
|
594
607
|
cluster_id: Optional cluster id to update.
|
|
595
608
|
cluster_name: Optional cluster name to update or create.
|
|
596
609
|
libraries: Optional libraries to install.
|
|
610
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
597
611
|
**cluster_spec: Cluster specification overrides.
|
|
598
612
|
|
|
599
613
|
Returns:
|
|
@@ -609,24 +623,28 @@ class Cluster(WorkspaceService):
|
|
|
609
623
|
return found.update(
|
|
610
624
|
cluster_name=cluster_name,
|
|
611
625
|
libraries=libraries,
|
|
626
|
+
wait_timeout=update_timeout,
|
|
612
627
|
**cluster_spec
|
|
613
628
|
)
|
|
614
629
|
|
|
615
630
|
return self.create(
|
|
616
631
|
cluster_name=cluster_name,
|
|
617
632
|
libraries=libraries,
|
|
633
|
+
wait_timeout=update_timeout,
|
|
618
634
|
**cluster_spec
|
|
619
635
|
)
|
|
620
636
|
|
|
621
637
|
def create(
|
|
622
638
|
self,
|
|
623
639
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
640
|
+
wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
|
|
624
641
|
**cluster_spec: Any
|
|
625
642
|
) -> str:
|
|
626
643
|
"""Create a new cluster and optionally install libraries.
|
|
627
644
|
|
|
628
645
|
Args:
|
|
629
646
|
libraries: Optional list of libraries to install after creation.
|
|
647
|
+
wait_timeout: wait timeout, if None it will not wait completion
|
|
630
648
|
**cluster_spec: Cluster specification overrides.
|
|
631
649
|
|
|
632
650
|
Returns:
|
|
@@ -646,14 +664,17 @@ class Cluster(WorkspaceService):
|
|
|
646
664
|
update_details,
|
|
647
665
|
)
|
|
648
666
|
|
|
649
|
-
self.details = self.clusters_client().
|
|
667
|
+
self.details = self.clusters_client().create(**update_details)
|
|
650
668
|
|
|
651
669
|
LOGGER.info(
|
|
652
670
|
"Created %s",
|
|
653
671
|
self
|
|
654
672
|
)
|
|
655
673
|
|
|
656
|
-
self.install_libraries(libraries=libraries, raise_error=False)
|
|
674
|
+
self.install_libraries(libraries=libraries, raise_error=False, wait_timeout=None)
|
|
675
|
+
|
|
676
|
+
if wait_timeout:
|
|
677
|
+
self.wait_for_status(timeout=wait_timeout)
|
|
657
678
|
|
|
658
679
|
return self
|
|
659
680
|
|
|
@@ -661,7 +682,7 @@ class Cluster(WorkspaceService):
|
|
|
661
682
|
self,
|
|
662
683
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
663
684
|
access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
|
|
664
|
-
wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
|
|
685
|
+
wait_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
665
686
|
**cluster_spec: Any
|
|
666
687
|
) -> "Cluster":
|
|
667
688
|
"""Update cluster configuration and optionally install libraries.
|
|
@@ -708,7 +729,7 @@ class Cluster(WorkspaceService):
|
|
|
708
729
|
self, diff
|
|
709
730
|
)
|
|
710
731
|
|
|
711
|
-
self.wait_for_status()
|
|
732
|
+
self.wait_for_status(timeout=wait_timeout)
|
|
712
733
|
self.clusters_client().edit(**update_details)
|
|
713
734
|
self.update_permissions(access_control_list=access_control_list)
|
|
714
735
|
|
|
@@ -727,7 +748,7 @@ class Cluster(WorkspaceService):
|
|
|
727
748
|
access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
|
|
728
749
|
):
|
|
729
750
|
if not access_control_list:
|
|
730
|
-
|
|
751
|
+
return self
|
|
731
752
|
|
|
732
753
|
access_control_list = self._check_permission(access_control_list)
|
|
733
754
|
|
|
@@ -745,6 +766,7 @@ class Cluster(WorkspaceService):
|
|
|
745
766
|
permission_level=ClusterPermissionLevel.CAN_MANAGE
|
|
746
767
|
)
|
|
747
768
|
for name in current_groups
|
|
769
|
+
if name not in {"users"}
|
|
748
770
|
]
|
|
749
771
|
|
|
750
772
|
def _check_permission(
|
|
@@ -862,18 +884,22 @@ class Cluster(WorkspaceService):
|
|
|
862
884
|
Returns:
|
|
863
885
|
The current Cluster instance.
|
|
864
886
|
"""
|
|
887
|
+
if self.is_running:
|
|
888
|
+
return self
|
|
889
|
+
|
|
865
890
|
self.wait_for_status()
|
|
866
891
|
|
|
867
|
-
if
|
|
868
|
-
|
|
892
|
+
if self.is_running:
|
|
893
|
+
return self
|
|
869
894
|
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
else:
|
|
874
|
-
self.clusters_client().start(cluster_id=self.cluster_id)
|
|
895
|
+
LOGGER.debug("Starting %s", self)
|
|
896
|
+
|
|
897
|
+
self.clusters_client().start(cluster_id=self.cluster_id)
|
|
875
898
|
|
|
876
|
-
|
|
899
|
+
LOGGER.info("Started %s", self)
|
|
900
|
+
|
|
901
|
+
if wait_timeout:
|
|
902
|
+
self.wait_for_status(timeout=wait_timeout.total_seconds())
|
|
877
903
|
|
|
878
904
|
return self
|
|
879
905
|
|
|
@@ -889,7 +915,7 @@ class Cluster(WorkspaceService):
|
|
|
889
915
|
|
|
890
916
|
if self.is_running:
|
|
891
917
|
self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
|
|
892
|
-
return self
|
|
918
|
+
return self
|
|
893
919
|
|
|
894
920
|
return self.start()
|
|
895
921
|
|
|
@@ -180,7 +180,7 @@ print(json.dumps(meta))"""
|
|
|
180
180
|
"""
|
|
181
181
|
return self.cluster.workspace.sdk()
|
|
182
182
|
|
|
183
|
-
def
|
|
183
|
+
def create_command(
|
|
184
184
|
self,
|
|
185
185
|
language: "Language",
|
|
186
186
|
) -> any:
|
|
@@ -192,17 +192,29 @@ print(json.dumps(meta))"""
|
|
|
192
192
|
Returns:
|
|
193
193
|
The created command execution context response.
|
|
194
194
|
"""
|
|
195
|
-
self.cluster.ensure_running()
|
|
196
|
-
|
|
197
195
|
LOGGER.debug(
|
|
198
196
|
"Creating Databricks command execution context for %s",
|
|
199
197
|
self.cluster
|
|
200
198
|
)
|
|
201
199
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
200
|
+
try:
|
|
201
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
202
|
+
cluster_id=self.cluster.cluster_id,
|
|
203
|
+
language=language,
|
|
204
|
+
)
|
|
205
|
+
except:
|
|
206
|
+
self.cluster.ensure_running()
|
|
207
|
+
|
|
208
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
209
|
+
cluster_id=self.cluster.cluster_id,
|
|
210
|
+
language=language,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
LOGGER.info(
|
|
214
|
+
"Created Databricks command execution context %s",
|
|
215
|
+
self
|
|
205
216
|
)
|
|
217
|
+
|
|
206
218
|
created = getattr(created, "response", created)
|
|
207
219
|
|
|
208
220
|
return created
|
|
@@ -220,10 +232,6 @@ print(json.dumps(meta))"""
|
|
|
220
232
|
The connected ExecutionContext instance.
|
|
221
233
|
"""
|
|
222
234
|
if self.context_id is not None:
|
|
223
|
-
LOGGER.debug(
|
|
224
|
-
"Execution context already open for %s",
|
|
225
|
-
self
|
|
226
|
-
)
|
|
227
235
|
return self
|
|
228
236
|
|
|
229
237
|
self.language = language or self.language
|
|
@@ -231,7 +239,7 @@ print(json.dumps(meta))"""
|
|
|
231
239
|
if self.language is None:
|
|
232
240
|
self.language = Language.PYTHON
|
|
233
241
|
|
|
234
|
-
ctx = self.
|
|
242
|
+
ctx = self.create_command(language=self.language)
|
|
235
243
|
|
|
236
244
|
context_id = ctx.id
|
|
237
245
|
if not context_id:
|
|
@@ -39,6 +39,7 @@ def databricks_remote_compute(
|
|
|
39
39
|
timeout: Optional[dt.timedelta] = None,
|
|
40
40
|
env_keys: Optional[List[str]] = None,
|
|
41
41
|
force_local: bool = False,
|
|
42
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = None,
|
|
42
43
|
**options
|
|
43
44
|
) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
|
|
44
45
|
"""Return a decorator that executes functions on a remote cluster.
|
|
@@ -52,6 +53,7 @@ def databricks_remote_compute(
|
|
|
52
53
|
timeout: Optional execution timeout for remote calls.
|
|
53
54
|
env_keys: Optional environment variable names to forward.
|
|
54
55
|
force_local: Force local execution
|
|
56
|
+
update_timeout: creation or update wait timeout
|
|
55
57
|
**options: Extra options forwarded to the execution decorator.
|
|
56
58
|
|
|
57
59
|
Returns:
|
|
@@ -82,7 +84,8 @@ def databricks_remote_compute(
|
|
|
82
84
|
cluster = workspace.clusters().replicated_current_environment(
|
|
83
85
|
workspace=workspace,
|
|
84
86
|
cluster_name=cluster_name,
|
|
85
|
-
single_user_name=workspace.current_user.user_name
|
|
87
|
+
single_user_name=workspace.current_user.user_name,
|
|
88
|
+
update_timeout=update_timeout
|
|
86
89
|
)
|
|
87
90
|
|
|
88
91
|
cluster.ensure_running(wait_timeout=None)
|
|
@@ -344,10 +344,17 @@ class StatementResult:
|
|
|
344
344
|
if self.persisted:
|
|
345
345
|
if self._arrow_table is not None:
|
|
346
346
|
return self._arrow_table.schema
|
|
347
|
-
|
|
347
|
+
elif self._spark_df is not None:
|
|
348
|
+
return spark_schema_to_arrow_schema(self._spark_df.schema)
|
|
349
|
+
raise NotImplementedError("")
|
|
350
|
+
|
|
351
|
+
manifest = self.manifest
|
|
352
|
+
|
|
353
|
+
if manifest is None:
|
|
354
|
+
return pa.schema([])
|
|
348
355
|
|
|
349
356
|
fields = [
|
|
350
|
-
column_info_to_arrow_field(_) for _ in
|
|
357
|
+
column_info_to_arrow_field(_) for _ in manifest.schema.columns
|
|
351
358
|
]
|
|
352
359
|
|
|
353
360
|
return pa.schema(fields)
|
|
@@ -362,7 +369,7 @@ class StatementResult:
|
|
|
362
369
|
An Arrow Table containing all rows.
|
|
363
370
|
"""
|
|
364
371
|
if self.persisted:
|
|
365
|
-
if self._arrow_table:
|
|
372
|
+
if self._arrow_table is not None:
|
|
366
373
|
return self._arrow_table
|
|
367
374
|
else:
|
|
368
375
|
return self._spark_df.toArrow()
|
|
@@ -370,7 +377,6 @@ class StatementResult:
|
|
|
370
377
|
batches = list(self.to_arrow_batches(parallel_pool=parallel_pool))
|
|
371
378
|
|
|
372
379
|
if not batches:
|
|
373
|
-
# empty table with no columns
|
|
374
380
|
return pa.Table.from_batches([], schema=self.arrow_schema())
|
|
375
381
|
|
|
376
382
|
return pa.Table.from_batches(batches)
|
|
@@ -501,8 +507,9 @@ class StatementResult:
|
|
|
501
507
|
Returns:
|
|
502
508
|
A Spark DataFrame with the result rows.
|
|
503
509
|
"""
|
|
504
|
-
if self._spark_df:
|
|
510
|
+
if self._spark_df is not None:
|
|
505
511
|
return self._spark_df
|
|
506
512
|
|
|
507
513
|
self._spark_df = arrow_table_to_spark_dataframe(self.to_arrow_table())
|
|
514
|
+
|
|
508
515
|
return self._spark_df
|