ygg 0.1.29__tar.gz → 0.1.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.29 → ygg-0.1.31}/PKG-INFO +1 -1
- {ygg-0.1.29 → ygg-0.1.31}/pyproject.toml +1 -1
- {ygg-0.1.29 → ygg-0.1.31}/src/ygg.egg-info/PKG-INFO +1 -1
- {ygg-0.1.29 → ygg-0.1.31}/src/ygg.egg-info/SOURCES.txt +4 -1
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/compute/cluster.py +41 -21
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/compute/execution_context.py +9 -10
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/compute/remote.py +10 -6
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/jobs/config.py +2 -30
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/sql/engine.py +4 -2
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/sql/statement_result.py +18 -3
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/sql/types.py +16 -0
- ygg-0.1.31/src/yggdrasil/databricks/workspaces/__init__.py +4 -0
- ygg-0.1.31/src/yggdrasil/databricks/workspaces/filesytem.py +161 -0
- ygg-0.1.31/src/yggdrasil/databricks/workspaces/io.py +745 -0
- ygg-0.1.31/src/yggdrasil/databricks/workspaces/path.py +1120 -0
- ygg-0.1.31/src/yggdrasil/databricks/workspaces/path_kind.py +10 -0
- ygg-0.1.31/src/yggdrasil/databricks/workspaces/workspace.py +530 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/pyutils/callable_serde.py +1 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/pyutils/modules.py +1 -1
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/pyutils/python_env.py +81 -264
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/arrow_cast.py +9 -0
- ygg-0.1.29/src/yggdrasil/databricks/workspaces/__init__.py +0 -2
- ygg-0.1.29/src/yggdrasil/databricks/workspaces/databricks_path.py +0 -875
- ygg-0.1.29/src/yggdrasil/databricks/workspaces/workspace.py +0 -946
- {ygg-0.1.29 → ygg-0.1.31}/LICENSE +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/README.md +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/setup.cfg +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/ygg.egg-info/dependency_links.txt +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/ygg.egg-info/entry_points.txt +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/ygg.egg-info/requires.txt +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/ygg.egg-info/top_level.txt +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/compute/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/sql/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/dataclasses/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/dataclasses/dataclass.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/libs/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/libs/databrickslib.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/libs/extensions/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/libs/pandaslib.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/libs/polarslib.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/libs/sparklib.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/pyutils/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/pyutils/exceptions.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/pyutils/parallel.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/pyutils/retry.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/requests/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/requests/msal.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/requests/session.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/__init__.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/cast_options.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/polars_cast.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/registry.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/spark_cast.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/libs.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/python_arrow.py +0 -0
- {ygg-0.1.29 → ygg-0.1.31}/src/yggdrasil/types/python_defaults.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ygg"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.31"
|
|
8
8
|
description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -21,7 +21,10 @@ src/yggdrasil/databricks/sql/exceptions.py
|
|
|
21
21
|
src/yggdrasil/databricks/sql/statement_result.py
|
|
22
22
|
src/yggdrasil/databricks/sql/types.py
|
|
23
23
|
src/yggdrasil/databricks/workspaces/__init__.py
|
|
24
|
-
src/yggdrasil/databricks/workspaces/
|
|
24
|
+
src/yggdrasil/databricks/workspaces/filesytem.py
|
|
25
|
+
src/yggdrasil/databricks/workspaces/io.py
|
|
26
|
+
src/yggdrasil/databricks/workspaces/path.py
|
|
27
|
+
src/yggdrasil/databricks/workspaces/path_kind.py
|
|
25
28
|
src/yggdrasil/databricks/workspaces/workspace.py
|
|
26
29
|
src/yggdrasil/dataclasses/__init__.py
|
|
27
30
|
src/yggdrasil/dataclasses/dataclass.py
|
|
@@ -141,7 +141,7 @@ class Cluster(WorkspaceService):
|
|
|
141
141
|
source: Optional[PythonEnv] = None,
|
|
142
142
|
cluster_id: Optional[str] = None,
|
|
143
143
|
cluster_name: Optional[str] = None,
|
|
144
|
-
single_user_name: Optional[str] =
|
|
144
|
+
single_user_name: Optional[str] = "current",
|
|
145
145
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
146
146
|
libraries: Optional[list[str]] = None,
|
|
147
147
|
**kwargs
|
|
@@ -153,7 +153,6 @@ class Cluster(WorkspaceService):
|
|
|
153
153
|
libraries.extend([
|
|
154
154
|
_ for _ in [
|
|
155
155
|
"ygg",
|
|
156
|
-
"dill",
|
|
157
156
|
"uv",
|
|
158
157
|
] if _ not in libraries
|
|
159
158
|
])
|
|
@@ -165,11 +164,22 @@ class Cluster(WorkspaceService):
|
|
|
165
164
|
elif python_version[1] < 11:
|
|
166
165
|
python_version = None
|
|
167
166
|
|
|
167
|
+
current_user_name = self.workspace.current_user.user_name
|
|
168
|
+
|
|
169
|
+
if single_user_name == "current":
|
|
170
|
+
single_user_name = current_user_name
|
|
171
|
+
|
|
172
|
+
cluster_id = cluster_id or self.cluster_id
|
|
173
|
+
cluster_name = cluster_name or self.cluster_name
|
|
174
|
+
|
|
175
|
+
if not cluster_id and not cluster_name:
|
|
176
|
+
cluster_name = current_user_name
|
|
177
|
+
|
|
168
178
|
inst = self.create_or_update(
|
|
169
179
|
cluster_id=cluster_id,
|
|
170
|
-
cluster_name=cluster_name
|
|
180
|
+
cluster_name=cluster_name,
|
|
171
181
|
python_version=python_version,
|
|
172
|
-
single_user_name=single_user_name
|
|
182
|
+
single_user_name=single_user_name,
|
|
173
183
|
runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
|
|
174
184
|
libraries=libraries,
|
|
175
185
|
**kwargs
|
|
@@ -180,11 +190,10 @@ class Cluster(WorkspaceService):
|
|
|
180
190
|
def pull_python_environment(
|
|
181
191
|
self,
|
|
182
192
|
name: Optional[str] = None,
|
|
183
|
-
target:
|
|
193
|
+
target: PythonEnv | str | None = None,
|
|
184
194
|
):
|
|
185
195
|
with self.context() as c:
|
|
186
196
|
m = c.remote_metadata
|
|
187
|
-
requirements = m.requirements
|
|
188
197
|
version_info = m.version_info
|
|
189
198
|
|
|
190
199
|
python_version = ".".join(str(_) for _ in version_info)
|
|
@@ -192,14 +201,20 @@ class Cluster(WorkspaceService):
|
|
|
192
201
|
if target is None:
|
|
193
202
|
target = PythonEnv.create(
|
|
194
203
|
name=name or self.name,
|
|
195
|
-
requirements=requirements,
|
|
196
204
|
python=python_version
|
|
197
205
|
)
|
|
198
|
-
|
|
199
|
-
target.
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
206
|
+
elif isinstance(target, str):
|
|
207
|
+
if target.casefold() == "current":
|
|
208
|
+
target = PythonEnv.get_current()
|
|
209
|
+
else:
|
|
210
|
+
target = PythonEnv.create(
|
|
211
|
+
name=target,
|
|
212
|
+
python=python_version
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
target.update(
|
|
216
|
+
python=python_version,
|
|
217
|
+
)
|
|
203
218
|
|
|
204
219
|
return target
|
|
205
220
|
|
|
@@ -670,6 +685,9 @@ class Cluster(WorkspaceService):
|
|
|
670
685
|
def h(z): ...
|
|
671
686
|
"""
|
|
672
687
|
def decorator(func: Callable):
|
|
688
|
+
if os.getenv("DATABRICKS_RUNTIME_VERSION") is not None:
|
|
689
|
+
return func
|
|
690
|
+
|
|
673
691
|
context = self.context(language=language or Language.PYTHON)
|
|
674
692
|
serialized = CallableSerde.from_callable(func)
|
|
675
693
|
|
|
@@ -850,8 +868,11 @@ class Cluster(WorkspaceService):
|
|
|
850
868
|
target_path = self.workspace.shared_cache_path(
|
|
851
869
|
suffix=f"/clusters/{self.cluster_id}/{os.path.basename(value)}"
|
|
852
870
|
)
|
|
853
|
-
|
|
854
|
-
value =
|
|
871
|
+
|
|
872
|
+
with open(value, mode="rb") as f:
|
|
873
|
+
target_path.write_bytes(f.read())
|
|
874
|
+
|
|
875
|
+
value = str(target_path)
|
|
855
876
|
elif "." in value and not "/" in value:
|
|
856
877
|
value = value.split(".")[0]
|
|
857
878
|
|
|
@@ -865,13 +886,12 @@ class Cluster(WorkspaceService):
|
|
|
865
886
|
|
|
866
887
|
repo = None
|
|
867
888
|
|
|
868
|
-
if pip_settings.extra_index_url
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
repo = pip_settings.extra_index_url
|
|
889
|
+
if pip_settings.extra_index_url and (
|
|
890
|
+
value.startswith("datamanagement")
|
|
891
|
+
or value.startswith("TSSecrets")
|
|
892
|
+
or value.startswith("tgp_")
|
|
893
|
+
):
|
|
894
|
+
repo = pip_settings.extra_index_url
|
|
875
895
|
|
|
876
896
|
return Library(
|
|
877
897
|
pypi=PythonPyPiLibrary(
|
|
@@ -273,7 +273,6 @@ print(json.dumps(meta))"""
|
|
|
273
273
|
print_stdout: Optional[bool] = True,
|
|
274
274
|
timeout: Optional[dt.timedelta] = None,
|
|
275
275
|
command: Optional[str] = None,
|
|
276
|
-
use_dill: Optional[bool] = None
|
|
277
276
|
) -> Any:
|
|
278
277
|
if self.is_in_databricks_environment():
|
|
279
278
|
args = args or []
|
|
@@ -291,16 +290,18 @@ print(json.dumps(meta))"""
|
|
|
291
290
|
|
|
292
291
|
serialized = CallableSerde.from_callable(func)
|
|
293
292
|
|
|
294
|
-
|
|
293
|
+
if serialized.pkg_root:
|
|
294
|
+
self.install_temporary_libraries(libraries=serialized.pkg_root)
|
|
295
295
|
|
|
296
|
-
# Use dill of same version
|
|
297
296
|
current_version = (sys.version_info.major, sys.version_info.minor)
|
|
298
297
|
|
|
299
|
-
if
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
298
|
+
if current_version != self.cluster.python_version[:2]:
|
|
299
|
+
raise RuntimeError(
|
|
300
|
+
f"Cannot execute callable: local Python version "
|
|
301
|
+
f"{current_version[0]}.{current_version[1]} does not match "
|
|
302
|
+
f"remote cluster Python version "
|
|
303
|
+
f"{self.cluster.python_version[0]}.{self.cluster.python_version[1]}"
|
|
304
|
+
)
|
|
304
305
|
|
|
305
306
|
result_tag = "<<<RESULT>>>"
|
|
306
307
|
|
|
@@ -340,7 +341,6 @@ print(json.dumps(meta))"""
|
|
|
340
341
|
print_stdout=print_stdout,
|
|
341
342
|
timeout=timeout,
|
|
342
343
|
command=command,
|
|
343
|
-
use_dill=use_dill
|
|
344
344
|
)
|
|
345
345
|
raise remote_module_error
|
|
346
346
|
|
|
@@ -497,7 +497,6 @@ with zipfile.ZipFile(buf, "r") as zf:
|
|
|
497
497
|
]
|
|
498
498
|
|
|
499
499
|
resolved = resolve_local_lib_path(libraries)
|
|
500
|
-
resolved_str = str(resolved)
|
|
501
500
|
|
|
502
501
|
remote_site_packages_path = self.remote_metadata.site_packages_path
|
|
503
502
|
if resolved.is_dir():
|
|
@@ -4,9 +4,12 @@ from typing import (
|
|
|
4
4
|
Callable,
|
|
5
5
|
Optional,
|
|
6
6
|
TypeVar,
|
|
7
|
-
List,
|
|
7
|
+
List, TYPE_CHECKING,
|
|
8
8
|
)
|
|
9
9
|
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from .cluster import Cluster
|
|
12
|
+
|
|
10
13
|
from ..workspaces.workspace import Workspace
|
|
11
14
|
|
|
12
15
|
ReturnType = TypeVar("ReturnType")
|
|
@@ -23,16 +26,17 @@ def databricks_remote_compute(
|
|
|
23
26
|
env_keys: Optional[List[str]] = None,
|
|
24
27
|
**options
|
|
25
28
|
) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
|
|
26
|
-
from .. import Cluster
|
|
27
|
-
|
|
28
29
|
if isinstance(workspace, str):
|
|
29
30
|
workspace = Workspace(host=workspace)
|
|
30
31
|
|
|
31
32
|
if cluster is None:
|
|
32
|
-
if cluster_id:
|
|
33
|
-
cluster =
|
|
33
|
+
if cluster_id or cluster_name:
|
|
34
|
+
cluster = workspace.clusters(
|
|
35
|
+
cluster_id=cluster_id,
|
|
36
|
+
cluster_name=cluster_name
|
|
37
|
+
)
|
|
34
38
|
else:
|
|
35
|
-
cluster =
|
|
39
|
+
cluster = workspace.clusters().replicated_current_environment(
|
|
36
40
|
workspace=workspace,
|
|
37
41
|
cluster_name=cluster_name
|
|
38
42
|
)
|
|
@@ -2,11 +2,11 @@ import builtins
|
|
|
2
2
|
import dataclasses
|
|
3
3
|
import datetime as dt
|
|
4
4
|
import inspect
|
|
5
|
+
import logging
|
|
5
6
|
from dataclasses import dataclass, fields
|
|
6
7
|
from enum import Enum
|
|
7
8
|
from inspect import isclass
|
|
8
|
-
from typing import Any, Dict, List, get_type_hints,
|
|
9
|
-
import logging
|
|
9
|
+
from typing import Any, Dict, List, get_type_hints, get_origin
|
|
10
10
|
|
|
11
11
|
from ...libs.sparklib import SparkSession
|
|
12
12
|
from ...types.cast.registry import convert
|
|
@@ -308,31 +308,3 @@ class NotebookConfig:
|
|
|
308
308
|
spark_session.conf.set("spark.sql.session.timeZone", "UTC")
|
|
309
309
|
|
|
310
310
|
return cls.from_environment()
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
class ExampleEnum(Enum):
|
|
314
|
-
"""Example enum for widget demonstration"""
|
|
315
|
-
OPTION1 = "option1"
|
|
316
|
-
OPTION2 = "option2"
|
|
317
|
-
OPTION3 = "option3"
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
@dataclass
|
|
321
|
-
class CompleteNotebookConfig(NotebookConfig):
|
|
322
|
-
"""Example JobConfig with various field types to demonstrate widget handling"""
|
|
323
|
-
# Basic types
|
|
324
|
-
text_field: str
|
|
325
|
-
integer_field: int = 42
|
|
326
|
-
float_field: float = 3.14
|
|
327
|
-
boolean_field: bool = True
|
|
328
|
-
|
|
329
|
-
# Special types
|
|
330
|
-
date_field: dt.date = dt.date(2023, 1, 1)
|
|
331
|
-
datetime_field: dt.datetime = dt.datetime(2023, 1, 1, 12, 0, 0)
|
|
332
|
-
enum_field: ExampleEnum = ExampleEnum.OPTION1
|
|
333
|
-
|
|
334
|
-
# Collection types
|
|
335
|
-
list_of_strings: List[str] = None # Will be displayed as multiselect
|
|
336
|
-
|
|
337
|
-
# Optional fields
|
|
338
|
-
optional_text: Optional[str] = None
|
|
@@ -11,6 +11,7 @@ import pyarrow.parquet as pq
|
|
|
11
11
|
|
|
12
12
|
from .statement_result import StatementResult
|
|
13
13
|
from .types import column_info_to_arrow_field
|
|
14
|
+
from .. import DatabricksPathKind
|
|
14
15
|
from ..workspaces import WorkspaceService
|
|
15
16
|
from ...libs.databrickslib import databricks_sdk
|
|
16
17
|
from ...libs.sparklib import SparkSession, SparkDataFrame, pyspark
|
|
@@ -374,8 +375,9 @@ class SQLEngine(WorkspaceService):
|
|
|
374
375
|
data = convert(data, pa.Table, options=cast_options, target_field=existing_schema)
|
|
375
376
|
|
|
376
377
|
# Write in temp volume
|
|
377
|
-
databricks_tmp_path = connected.
|
|
378
|
-
|
|
378
|
+
databricks_tmp_path = connected.dbfs_path(
|
|
379
|
+
kind=DatabricksPathKind.VOLUME,
|
|
380
|
+
parts=[catalog_name, schema_name, "tmp", transaction_id, "data.parquet"]
|
|
379
381
|
)
|
|
380
382
|
databricks_tmp_folder = databricks_tmp_path.parent
|
|
381
383
|
|
|
@@ -96,7 +96,7 @@ class StatementResult:
|
|
|
96
96
|
if self.is_spark_sql:
|
|
97
97
|
return self._response
|
|
98
98
|
|
|
99
|
-
if
|
|
99
|
+
if self.statement_id and not self.done and time.time() - self._response_refresh_time > delay:
|
|
100
100
|
self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
|
|
101
101
|
|
|
102
102
|
return self._response
|
|
@@ -143,11 +143,25 @@ class StatementResult:
|
|
|
143
143
|
|
|
144
144
|
@property
|
|
145
145
|
def done(self):
|
|
146
|
-
|
|
146
|
+
if self.persisted:
|
|
147
|
+
return True
|
|
148
|
+
|
|
149
|
+
if self._response is None:
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
return self._response.status.state in [
|
|
153
|
+
StatementState.CANCELED, StatementState.CLOSED, StatementState.FAILED, StatementState.SUCCEEDED
|
|
154
|
+
]
|
|
147
155
|
|
|
148
156
|
@property
|
|
149
157
|
def failed(self):
|
|
150
|
-
|
|
158
|
+
if self.persisted:
|
|
159
|
+
return True
|
|
160
|
+
|
|
161
|
+
if self._response is None:
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
return self._response.status.state in [StatementState.CANCELED, StatementState.FAILED]
|
|
151
165
|
|
|
152
166
|
@property
|
|
153
167
|
def persisted(self):
|
|
@@ -163,6 +177,7 @@ class StatementResult:
|
|
|
163
177
|
self, self.disposition, Disposition.EXTERNAL_LINKS
|
|
164
178
|
)
|
|
165
179
|
|
|
180
|
+
self.wait()
|
|
166
181
|
result_data = self.result
|
|
167
182
|
wsdk = self.workspace.sdk()
|
|
168
183
|
|
|
@@ -102,6 +102,18 @@ def _split_top_level_commas(s: str):
|
|
|
102
102
|
return parts
|
|
103
103
|
|
|
104
104
|
|
|
105
|
+
def _safe_bytes(obj):
|
|
106
|
+
if not isinstance(obj, bytes):
|
|
107
|
+
if not obj:
|
|
108
|
+
return b""
|
|
109
|
+
|
|
110
|
+
if not isinstance(obj, str):
|
|
111
|
+
obj = str(obj)
|
|
112
|
+
|
|
113
|
+
return obj.encode("utf-8")
|
|
114
|
+
return obj
|
|
115
|
+
|
|
116
|
+
|
|
105
117
|
def parse_sql_type_to_pa(type_str: str) -> pa.DataType:
|
|
106
118
|
"""
|
|
107
119
|
Adapted parser that:
|
|
@@ -170,6 +182,10 @@ def column_info_to_arrow_field(col: Union[SQLColumnInfo, CatalogColumnInfo]):
|
|
|
170
182
|
if isinstance(col, CatalogColumnInfo):
|
|
171
183
|
parsed = json.loads(col.type_json)
|
|
172
184
|
md = parsed.get("metadata", {}) or {}
|
|
185
|
+
md = {
|
|
186
|
+
_safe_bytes(k): _safe_bytes(v)
|
|
187
|
+
for k, v in md.items()
|
|
188
|
+
}
|
|
173
189
|
nullable = col.nullable
|
|
174
190
|
elif isinstance(col, SQLColumnInfo):
|
|
175
191
|
md = {}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
__all__ = [
|
|
2
|
+
"DatabricksFileSystem",
|
|
3
|
+
"DatabricksFileSystemHandler"
|
|
4
|
+
]
|
|
5
|
+
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Union, List, Optional
|
|
7
|
+
|
|
8
|
+
from pyarrow import PythonFile
|
|
9
|
+
from pyarrow.fs import FileSystem, FileInfo, FileSelector, PyFileSystem, FileSystemHandler
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from ..workspaces.workspace import Workspace
|
|
13
|
+
from .path import DatabricksPath
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DatabricksFileSystemHandler(FileSystemHandler):
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
workspace: "Workspace",
|
|
21
|
+
):
|
|
22
|
+
super().__init__()
|
|
23
|
+
self.workspace = workspace
|
|
24
|
+
|
|
25
|
+
def __enter__(self):
|
|
26
|
+
return self.connect(clone=True)
|
|
27
|
+
|
|
28
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
29
|
+
self.workspace.__exit__(exc_type, exc_val, exc_tb)
|
|
30
|
+
|
|
31
|
+
def _parse_path(self, obj: Any) -> "DatabricksPath":
|
|
32
|
+
from .path import DatabricksPath
|
|
33
|
+
|
|
34
|
+
return DatabricksPath.parse(obj, workspace=self.workspace)
|
|
35
|
+
|
|
36
|
+
def connect(self, clone: bool = True):
|
|
37
|
+
workspace = self.connect(clone=clone)
|
|
38
|
+
|
|
39
|
+
if clone:
|
|
40
|
+
return DatabricksFileSystemHandler(
|
|
41
|
+
workspace=workspace
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.workspace = workspace
|
|
45
|
+
return self
|
|
46
|
+
|
|
47
|
+
def close(self):
|
|
48
|
+
self.workspace.close()
|
|
49
|
+
|
|
50
|
+
def copy_file(self, src, dest, *, chunk_size: int = 4 * 1024 * 1024):
|
|
51
|
+
src = self._parse_path(src)
|
|
52
|
+
dest = self._parse_path(dest)
|
|
53
|
+
|
|
54
|
+
with src.open("rb") as r, dest.open("wb") as w:
|
|
55
|
+
while True:
|
|
56
|
+
chunk = r.read(chunk_size)
|
|
57
|
+
if not chunk:
|
|
58
|
+
break
|
|
59
|
+
w.write(chunk)
|
|
60
|
+
|
|
61
|
+
def create_dir(self, path, *args, recursive: bool = True, **kwargs):
|
|
62
|
+
return self._parse_path(path).mkdir(parents=recursive)
|
|
63
|
+
|
|
64
|
+
def delete_dir(self, path):
|
|
65
|
+
return self._parse_path(path).rmdir(recursive=True)
|
|
66
|
+
|
|
67
|
+
def delete_dir_contents(self, path, *args, accept_root_dir: bool = False, **kwargs):
|
|
68
|
+
return self._parse_path(path).rmdir(recursive=True)
|
|
69
|
+
|
|
70
|
+
def delete_root_dir_contents(self):
|
|
71
|
+
return self.delete_dir_contents("/", accept_root_dir=True)
|
|
72
|
+
|
|
73
|
+
def delete_file(self, path):
|
|
74
|
+
return self._parse_path(path).rmfile()
|
|
75
|
+
|
|
76
|
+
def equals(self, other: FileSystem):
|
|
77
|
+
return self == other
|
|
78
|
+
|
|
79
|
+
def from_uri(self, uri):
|
|
80
|
+
uri = self._parse_path(uri)
|
|
81
|
+
|
|
82
|
+
return self.__class__(
|
|
83
|
+
workspace=uri.workspace
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def get_file_info(
|
|
87
|
+
self,
|
|
88
|
+
paths_or_selector: Union[FileSelector, str, "DatabricksPath", List[Union[str, "DatabricksPath"]]]
|
|
89
|
+
) -> Union[FileInfo, List[FileInfo]]:
|
|
90
|
+
from .path import DatabricksPath
|
|
91
|
+
|
|
92
|
+
if isinstance(paths_or_selector, (str, DatabricksPath)):
|
|
93
|
+
result = self._parse_path(paths_or_selector).file_info
|
|
94
|
+
|
|
95
|
+
return result
|
|
96
|
+
|
|
97
|
+
if isinstance(paths_or_selector, FileSelector):
|
|
98
|
+
return self.get_file_info_selector(paths_or_selector)
|
|
99
|
+
|
|
100
|
+
return [
|
|
101
|
+
self.get_file_info(obj)
|
|
102
|
+
for obj in paths_or_selector
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
def get_file_info_selector(
|
|
106
|
+
self,
|
|
107
|
+
selector: FileSelector
|
|
108
|
+
):
|
|
109
|
+
base_dir = self._parse_path(selector.base_dir)
|
|
110
|
+
|
|
111
|
+
return [
|
|
112
|
+
p.file_info
|
|
113
|
+
for p in base_dir.ls(
|
|
114
|
+
recursive=selector.recursive,
|
|
115
|
+
allow_not_found=selector.allow_not_found
|
|
116
|
+
)
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
def get_type_name(self):
|
|
120
|
+
return "dbfs"
|
|
121
|
+
|
|
122
|
+
def move(self, src, dest):
|
|
123
|
+
src = self._parse_path(src)
|
|
124
|
+
|
|
125
|
+
src.copy_to(dest)
|
|
126
|
+
|
|
127
|
+
src.remove(recursive=True)
|
|
128
|
+
|
|
129
|
+
def normalize_path(self, path):
|
|
130
|
+
return self._parse_path(path).full_path()
|
|
131
|
+
|
|
132
|
+
def open(
|
|
133
|
+
self,
|
|
134
|
+
path,
|
|
135
|
+
mode: str = "r+",
|
|
136
|
+
encoding: Optional[str] = None,
|
|
137
|
+
):
|
|
138
|
+
return self._parse_path(path).open(mode=mode, encoding=encoding, clone=False)
|
|
139
|
+
|
|
140
|
+
def open_append_stream(self, path, compression='detect', buffer_size=None, metadata=None):
|
|
141
|
+
return self._parse_path(path).open(mode="ab")
|
|
142
|
+
|
|
143
|
+
def open_input_file(self, path, mode: str = "rb", **kwargs):
|
|
144
|
+
buf = self._parse_path(path).open(mode=mode).connect(clone=True)
|
|
145
|
+
|
|
146
|
+
return PythonFile(
|
|
147
|
+
buf,
|
|
148
|
+
mode=mode
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def open_input_stream(self, path, compression='detect', buffer_size=None):
|
|
152
|
+
return self._parse_path(path).open(mode="rb")
|
|
153
|
+
|
|
154
|
+
def open_output_stream(self, path, compression='detect', buffer_size=None, metadata=None):
|
|
155
|
+
return self._parse_path(path).open(mode="wb")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class DatabricksFileSystem(PyFileSystem):
|
|
159
|
+
|
|
160
|
+
def __init__(self, handler): # real signature unknown; restored from __doc__
|
|
161
|
+
super().__init__(handler)
|