ygg 0.1.28__tar.gz → 0.1.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.28 → ygg-0.1.30}/PKG-INFO +1 -1
- {ygg-0.1.28 → ygg-0.1.30}/pyproject.toml +1 -1
- {ygg-0.1.28 → ygg-0.1.30}/src/ygg.egg-info/PKG-INFO +1 -1
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/compute/cluster.py +41 -25
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/compute/execution_context.py +9 -10
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/compute/remote.py +10 -6
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/sql/engine.py +4 -2
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/sql/statement_result.py +17 -2
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/workspaces/databricks_path.py +192 -283
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/workspaces/workspace.py +53 -416
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/pyutils/callable_serde.py +2 -28
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/pyutils/modules.py +1 -1
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/pyutils/python_env.py +81 -264
- {ygg-0.1.28 → ygg-0.1.30}/LICENSE +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/README.md +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/setup.cfg +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/ygg.egg-info/SOURCES.txt +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/ygg.egg-info/dependency_links.txt +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/ygg.egg-info/entry_points.txt +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/ygg.egg-info/requires.txt +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/ygg.egg-info/top_level.txt +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/compute/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/jobs/config.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/sql/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/sql/types.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/dataclasses/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/dataclasses/dataclass.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/libs/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/libs/databrickslib.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/libs/extensions/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/libs/pandaslib.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/libs/polarslib.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/libs/sparklib.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/pyutils/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/pyutils/exceptions.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/pyutils/parallel.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/pyutils/retry.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/requests/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/requests/msal.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/requests/session.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/__init__.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/cast_options.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/polars_cast.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/registry.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/spark_cast.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/libs.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/python_arrow.py +0 -0
- {ygg-0.1.28 → ygg-0.1.30}/src/yggdrasil/types/python_defaults.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ygg"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.30"
|
|
8
8
|
description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -141,7 +141,7 @@ class Cluster(WorkspaceService):
|
|
|
141
141
|
source: Optional[PythonEnv] = None,
|
|
142
142
|
cluster_id: Optional[str] = None,
|
|
143
143
|
cluster_name: Optional[str] = None,
|
|
144
|
-
single_user_name: Optional[str] =
|
|
144
|
+
single_user_name: Optional[str] = "current",
|
|
145
145
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
146
146
|
libraries: Optional[list[str]] = None,
|
|
147
147
|
**kwargs
|
|
@@ -153,7 +153,6 @@ class Cluster(WorkspaceService):
|
|
|
153
153
|
libraries.extend([
|
|
154
154
|
_ for _ in [
|
|
155
155
|
"ygg",
|
|
156
|
-
"dill",
|
|
157
156
|
"uv",
|
|
158
157
|
] if _ not in libraries
|
|
159
158
|
])
|
|
@@ -165,11 +164,22 @@ class Cluster(WorkspaceService):
|
|
|
165
164
|
elif python_version[1] < 11:
|
|
166
165
|
python_version = None
|
|
167
166
|
|
|
167
|
+
current_user_name = self.workspace.current_user.user_name
|
|
168
|
+
|
|
169
|
+
if single_user_name == "current":
|
|
170
|
+
single_user_name = current_user_name
|
|
171
|
+
|
|
172
|
+
cluster_id = cluster_id or self.cluster_id
|
|
173
|
+
cluster_name = cluster_name or self.cluster_name
|
|
174
|
+
|
|
175
|
+
if not cluster_id and not cluster_name:
|
|
176
|
+
cluster_name = current_user_name
|
|
177
|
+
|
|
168
178
|
inst = self.create_or_update(
|
|
169
179
|
cluster_id=cluster_id,
|
|
170
|
-
cluster_name=cluster_name
|
|
180
|
+
cluster_name=cluster_name,
|
|
171
181
|
python_version=python_version,
|
|
172
|
-
single_user_name=single_user_name
|
|
182
|
+
single_user_name=single_user_name,
|
|
173
183
|
runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
|
|
174
184
|
libraries=libraries,
|
|
175
185
|
**kwargs
|
|
@@ -180,11 +190,10 @@ class Cluster(WorkspaceService):
|
|
|
180
190
|
def pull_python_environment(
|
|
181
191
|
self,
|
|
182
192
|
name: Optional[str] = None,
|
|
183
|
-
target:
|
|
193
|
+
target: PythonEnv | str | None = None,
|
|
184
194
|
):
|
|
185
195
|
with self.context() as c:
|
|
186
196
|
m = c.remote_metadata
|
|
187
|
-
requirements = m.requirements
|
|
188
197
|
version_info = m.version_info
|
|
189
198
|
|
|
190
199
|
python_version = ".".join(str(_) for _ in version_info)
|
|
@@ -192,14 +201,20 @@ class Cluster(WorkspaceService):
|
|
|
192
201
|
if target is None:
|
|
193
202
|
target = PythonEnv.create(
|
|
194
203
|
name=name or self.name,
|
|
195
|
-
requirements=requirements,
|
|
196
204
|
python=python_version
|
|
197
205
|
)
|
|
198
|
-
|
|
199
|
-
target.
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
206
|
+
elif isinstance(target, str):
|
|
207
|
+
if target.casefold() == "current":
|
|
208
|
+
target = PythonEnv.get_current()
|
|
209
|
+
else:
|
|
210
|
+
target = PythonEnv.create(
|
|
211
|
+
name=target,
|
|
212
|
+
python=python_version
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
target.update(
|
|
216
|
+
python=python_version,
|
|
217
|
+
)
|
|
203
218
|
|
|
204
219
|
return target
|
|
205
220
|
|
|
@@ -646,7 +661,6 @@ class Cluster(WorkspaceService):
|
|
|
646
661
|
self,
|
|
647
662
|
_func: Optional[Callable] = None,
|
|
648
663
|
*,
|
|
649
|
-
before: Optional[Callable] = None,
|
|
650
664
|
language: Optional["Language"] = None,
|
|
651
665
|
env_keys: Optional[List[str]] = None,
|
|
652
666
|
env_variables: Optional[Dict[str, str]] = None,
|
|
@@ -671,17 +685,17 @@ class Cluster(WorkspaceService):
|
|
|
671
685
|
def h(z): ...
|
|
672
686
|
"""
|
|
673
687
|
def decorator(func: Callable):
|
|
688
|
+
if os.getenv("DATABRICKS_RUNTIME_VERSION") is not None:
|
|
689
|
+
return func
|
|
690
|
+
|
|
674
691
|
context = self.context(language=language or Language.PYTHON)
|
|
675
692
|
serialized = CallableSerde.from_callable(func)
|
|
676
|
-
do_before = CallableSerde.from_callable(before)
|
|
677
693
|
|
|
678
694
|
@functools.wraps(func)
|
|
679
695
|
def wrapper(*args, **kwargs):
|
|
680
696
|
if os.getenv("DATABRICKS_RUNTIME_VERSION") is not None:
|
|
681
697
|
return func(*args, **kwargs)
|
|
682
698
|
|
|
683
|
-
do_before()
|
|
684
|
-
|
|
685
699
|
return context.execute(
|
|
686
700
|
obj=serialized,
|
|
687
701
|
args=list(args),
|
|
@@ -854,8 +868,11 @@ class Cluster(WorkspaceService):
|
|
|
854
868
|
target_path = self.workspace.shared_cache_path(
|
|
855
869
|
suffix=f"/clusters/{self.cluster_id}/{os.path.basename(value)}"
|
|
856
870
|
)
|
|
857
|
-
|
|
858
|
-
value =
|
|
871
|
+
|
|
872
|
+
with open(value, mode="rb") as f:
|
|
873
|
+
target_path.write_bytes(f.read())
|
|
874
|
+
|
|
875
|
+
value = str(target_path)
|
|
859
876
|
elif "." in value and not "/" in value:
|
|
860
877
|
value = value.split(".")[0]
|
|
861
878
|
|
|
@@ -869,13 +886,12 @@ class Cluster(WorkspaceService):
|
|
|
869
886
|
|
|
870
887
|
repo = None
|
|
871
888
|
|
|
872
|
-
if pip_settings.extra_index_url
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
repo = pip_settings.extra_index_url
|
|
889
|
+
if pip_settings.extra_index_url and (
|
|
890
|
+
value.startswith("datamanagement")
|
|
891
|
+
or value.startswith("TSSecrets")
|
|
892
|
+
or value.startswith("tgp_")
|
|
893
|
+
):
|
|
894
|
+
repo = pip_settings.extra_index_url
|
|
879
895
|
|
|
880
896
|
return Library(
|
|
881
897
|
pypi=PythonPyPiLibrary(
|
|
@@ -273,7 +273,6 @@ print(json.dumps(meta))"""
|
|
|
273
273
|
print_stdout: Optional[bool] = True,
|
|
274
274
|
timeout: Optional[dt.timedelta] = None,
|
|
275
275
|
command: Optional[str] = None,
|
|
276
|
-
use_dill: Optional[bool] = None
|
|
277
276
|
) -> Any:
|
|
278
277
|
if self.is_in_databricks_environment():
|
|
279
278
|
args = args or []
|
|
@@ -291,16 +290,18 @@ print(json.dumps(meta))"""
|
|
|
291
290
|
|
|
292
291
|
serialized = CallableSerde.from_callable(func)
|
|
293
292
|
|
|
294
|
-
|
|
293
|
+
if serialized.pkg_root:
|
|
294
|
+
self.install_temporary_libraries(libraries=serialized.pkg_root)
|
|
295
295
|
|
|
296
|
-
# Use dill of same version
|
|
297
296
|
current_version = (sys.version_info.major, sys.version_info.minor)
|
|
298
297
|
|
|
299
|
-
if
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
298
|
+
if current_version != self.cluster.python_version[:2]:
|
|
299
|
+
raise RuntimeError(
|
|
300
|
+
f"Cannot execute callable: local Python version "
|
|
301
|
+
f"{current_version[0]}.{current_version[1]} does not match "
|
|
302
|
+
f"remote cluster Python version "
|
|
303
|
+
f"{self.cluster.python_version[0]}.{self.cluster.python_version[1]}"
|
|
304
|
+
)
|
|
304
305
|
|
|
305
306
|
result_tag = "<<<RESULT>>>"
|
|
306
307
|
|
|
@@ -340,7 +341,6 @@ print(json.dumps(meta))"""
|
|
|
340
341
|
print_stdout=print_stdout,
|
|
341
342
|
timeout=timeout,
|
|
342
343
|
command=command,
|
|
343
|
-
use_dill=use_dill
|
|
344
344
|
)
|
|
345
345
|
raise remote_module_error
|
|
346
346
|
|
|
@@ -497,7 +497,6 @@ with zipfile.ZipFile(buf, "r") as zf:
|
|
|
497
497
|
]
|
|
498
498
|
|
|
499
499
|
resolved = resolve_local_lib_path(libraries)
|
|
500
|
-
resolved_str = str(resolved)
|
|
501
500
|
|
|
502
501
|
remote_site_packages_path = self.remote_metadata.site_packages_path
|
|
503
502
|
if resolved.is_dir():
|
|
@@ -4,9 +4,12 @@ from typing import (
|
|
|
4
4
|
Callable,
|
|
5
5
|
Optional,
|
|
6
6
|
TypeVar,
|
|
7
|
-
List,
|
|
7
|
+
List, TYPE_CHECKING,
|
|
8
8
|
)
|
|
9
9
|
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from .cluster import Cluster
|
|
12
|
+
|
|
10
13
|
from ..workspaces.workspace import Workspace
|
|
11
14
|
|
|
12
15
|
ReturnType = TypeVar("ReturnType")
|
|
@@ -23,16 +26,17 @@ def databricks_remote_compute(
|
|
|
23
26
|
env_keys: Optional[List[str]] = None,
|
|
24
27
|
**options
|
|
25
28
|
) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
|
|
26
|
-
from .. import Cluster
|
|
27
|
-
|
|
28
29
|
if isinstance(workspace, str):
|
|
29
30
|
workspace = Workspace(host=workspace)
|
|
30
31
|
|
|
31
32
|
if cluster is None:
|
|
32
|
-
if cluster_id:
|
|
33
|
-
cluster =
|
|
33
|
+
if cluster_id or cluster_name:
|
|
34
|
+
cluster = workspace.clusters(
|
|
35
|
+
cluster_id=cluster_id,
|
|
36
|
+
cluster_name=cluster_name
|
|
37
|
+
)
|
|
34
38
|
else:
|
|
35
|
-
cluster =
|
|
39
|
+
cluster = workspace.clusters().replicated_current_environment(
|
|
36
40
|
workspace=workspace,
|
|
37
41
|
cluster_name=cluster_name
|
|
38
42
|
)
|
|
@@ -11,6 +11,7 @@ import pyarrow.parquet as pq
|
|
|
11
11
|
|
|
12
12
|
from .statement_result import StatementResult
|
|
13
13
|
from .types import column_info_to_arrow_field
|
|
14
|
+
from .. import DatabricksPathKind
|
|
14
15
|
from ..workspaces import WorkspaceService
|
|
15
16
|
from ...libs.databrickslib import databricks_sdk
|
|
16
17
|
from ...libs.sparklib import SparkSession, SparkDataFrame, pyspark
|
|
@@ -374,8 +375,9 @@ class SQLEngine(WorkspaceService):
|
|
|
374
375
|
data = convert(data, pa.Table, options=cast_options, target_field=existing_schema)
|
|
375
376
|
|
|
376
377
|
# Write in temp volume
|
|
377
|
-
databricks_tmp_path = connected.
|
|
378
|
-
|
|
378
|
+
databricks_tmp_path = connected.dbfs_path(
|
|
379
|
+
kind=DatabricksPathKind.VOLUME,
|
|
380
|
+
parts=[catalog_name, schema_name, "tmp", transaction_id, "data.parquet"]
|
|
379
381
|
)
|
|
380
382
|
databricks_tmp_folder = databricks_tmp_path.parent
|
|
381
383
|
|
|
@@ -143,11 +143,25 @@ class StatementResult:
|
|
|
143
143
|
|
|
144
144
|
@property
|
|
145
145
|
def done(self):
|
|
146
|
-
|
|
146
|
+
if self.persisted:
|
|
147
|
+
return True
|
|
148
|
+
|
|
149
|
+
if self._response is None:
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
return self._response.status.state in [
|
|
153
|
+
StatementState.CANCELED, StatementState.CLOSED, StatementState.FAILED, StatementState.SUCCEEDED
|
|
154
|
+
]
|
|
147
155
|
|
|
148
156
|
@property
|
|
149
157
|
def failed(self):
|
|
150
|
-
|
|
158
|
+
if self.persisted:
|
|
159
|
+
return True
|
|
160
|
+
|
|
161
|
+
if self._response is None:
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
return self._response.status.state in [StatementState.CANCELED, StatementState.FAILED]
|
|
151
165
|
|
|
152
166
|
@property
|
|
153
167
|
def persisted(self):
|
|
@@ -163,6 +177,7 @@ class StatementResult:
|
|
|
163
177
|
self, self.disposition, Disposition.EXTERNAL_LINKS
|
|
164
178
|
)
|
|
165
179
|
|
|
180
|
+
self.wait()
|
|
166
181
|
result_data = self.result
|
|
167
182
|
wsdk = self.workspace.sdk()
|
|
168
183
|
|