ygg 0.1.29__tar.gz → 0.1.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ygg-0.1.29 → ygg-0.1.30}/PKG-INFO +1 -1
  2. {ygg-0.1.29 → ygg-0.1.30}/pyproject.toml +1 -1
  3. {ygg-0.1.29 → ygg-0.1.30}/src/ygg.egg-info/PKG-INFO +1 -1
  4. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/compute/cluster.py +41 -21
  5. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/compute/execution_context.py +9 -10
  6. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/compute/remote.py +10 -6
  7. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/sql/engine.py +4 -2
  8. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/sql/statement_result.py +17 -2
  9. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/workspaces/databricks_path.py +192 -283
  10. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/workspaces/workspace.py +53 -416
  11. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/pyutils/callable_serde.py +1 -0
  12. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/pyutils/modules.py +1 -1
  13. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/pyutils/python_env.py +81 -264
  14. {ygg-0.1.29 → ygg-0.1.30}/LICENSE +0 -0
  15. {ygg-0.1.29 → ygg-0.1.30}/README.md +0 -0
  16. {ygg-0.1.29 → ygg-0.1.30}/setup.cfg +0 -0
  17. {ygg-0.1.29 → ygg-0.1.30}/src/ygg.egg-info/SOURCES.txt +0 -0
  18. {ygg-0.1.29 → ygg-0.1.30}/src/ygg.egg-info/dependency_links.txt +0 -0
  19. {ygg-0.1.29 → ygg-0.1.30}/src/ygg.egg-info/entry_points.txt +0 -0
  20. {ygg-0.1.29 → ygg-0.1.30}/src/ygg.egg-info/requires.txt +0 -0
  21. {ygg-0.1.29 → ygg-0.1.30}/src/ygg.egg-info/top_level.txt +0 -0
  22. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/__init__.py +0 -0
  23. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/__init__.py +0 -0
  24. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/compute/__init__.py +0 -0
  25. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
  26. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/jobs/config.py +0 -0
  27. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/sql/__init__.py +0 -0
  28. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
  29. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/sql/types.py +0 -0
  30. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
  31. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/dataclasses/__init__.py +0 -0
  32. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/dataclasses/dataclass.py +0 -0
  33. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/libs/__init__.py +0 -0
  34. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/libs/databrickslib.py +0 -0
  35. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/libs/extensions/__init__.py +0 -0
  36. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
  37. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
  38. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/libs/pandaslib.py +0 -0
  39. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/libs/polarslib.py +0 -0
  40. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/libs/sparklib.py +0 -0
  41. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/pyutils/__init__.py +0 -0
  42. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/pyutils/exceptions.py +0 -0
  43. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/pyutils/parallel.py +0 -0
  44. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/pyutils/retry.py +0 -0
  45. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/requests/__init__.py +0 -0
  46. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/requests/msal.py +0 -0
  47. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/requests/session.py +0 -0
  48. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/__init__.py +0 -0
  49. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/__init__.py +0 -0
  50. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
  51. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/cast_options.py +0 -0
  52. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
  53. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/polars_cast.py +0 -0
  54. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
  55. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/registry.py +0 -0
  56. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/spark_cast.py +0 -0
  57. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
  58. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
  59. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/libs.py +0 -0
  60. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/python_arrow.py +0 -0
  61. {ygg-0.1.29 → ygg-0.1.30}/src/yggdrasil/types/python_defaults.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.29
3
+ Version: 0.1.30
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ygg"
7
- version = "0.1.29"
7
+ version = "0.1.30"
8
8
  description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  license = { file = "LICENSE" }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.29
3
+ Version: 0.1.30
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -141,7 +141,7 @@ class Cluster(WorkspaceService):
141
141
  source: Optional[PythonEnv] = None,
142
142
  cluster_id: Optional[str] = None,
143
143
  cluster_name: Optional[str] = None,
144
- single_user_name: Optional[str] = None,
144
+ single_user_name: Optional[str] = "current",
145
145
  runtime_engine: Optional["RuntimeEngine"] = None,
146
146
  libraries: Optional[list[str]] = None,
147
147
  **kwargs
@@ -153,7 +153,6 @@ class Cluster(WorkspaceService):
153
153
  libraries.extend([
154
154
  _ for _ in [
155
155
  "ygg",
156
- "dill",
157
156
  "uv",
158
157
  ] if _ not in libraries
159
158
  ])
@@ -165,11 +164,22 @@ class Cluster(WorkspaceService):
165
164
  elif python_version[1] < 11:
166
165
  python_version = None
167
166
 
167
+ current_user_name = self.workspace.current_user.user_name
168
+
169
+ if single_user_name == "current":
170
+ single_user_name = current_user_name
171
+
172
+ cluster_id = cluster_id or self.cluster_id
173
+ cluster_name = cluster_name or self.cluster_name
174
+
175
+ if not cluster_id and not cluster_name:
176
+ cluster_name = current_user_name
177
+
168
178
  inst = self.create_or_update(
169
179
  cluster_id=cluster_id,
170
- cluster_name=cluster_name or self.cluster_name or self.workspace.current_user.user_name,
180
+ cluster_name=cluster_name,
171
181
  python_version=python_version,
172
- single_user_name=single_user_name or self.workspace.current_user.user_name,
182
+ single_user_name=single_user_name,
173
183
  runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
174
184
  libraries=libraries,
175
185
  **kwargs
@@ -180,11 +190,10 @@ class Cluster(WorkspaceService):
180
190
  def pull_python_environment(
181
191
  self,
182
192
  name: Optional[str] = None,
183
- target: Optional[PythonEnv] = None,
193
+ target: PythonEnv | str | None = None,
184
194
  ):
185
195
  with self.context() as c:
186
196
  m = c.remote_metadata
187
- requirements = m.requirements
188
197
  version_info = m.version_info
189
198
 
190
199
  python_version = ".".join(str(_) for _ in version_info)
@@ -192,14 +201,20 @@ class Cluster(WorkspaceService):
192
201
  if target is None:
193
202
  target = PythonEnv.create(
194
203
  name=name or self.name,
195
- requirements=requirements,
196
204
  python=python_version
197
205
  )
198
- else:
199
- target.update(
200
- requirements=requirements,
201
- python=python_version,
202
- )
206
+ elif isinstance(target, str):
207
+ if target.casefold() == "current":
208
+ target = PythonEnv.get_current()
209
+ else:
210
+ target = PythonEnv.create(
211
+ name=target,
212
+ python=python_version
213
+ )
214
+
215
+ target.update(
216
+ python=python_version,
217
+ )
203
218
 
204
219
  return target
205
220
 
@@ -670,6 +685,9 @@ class Cluster(WorkspaceService):
670
685
  def h(z): ...
671
686
  """
672
687
  def decorator(func: Callable):
688
+ if os.getenv("DATABRICKS_RUNTIME_VERSION") is not None:
689
+ return func
690
+
673
691
  context = self.context(language=language or Language.PYTHON)
674
692
  serialized = CallableSerde.from_callable(func)
675
693
 
@@ -850,8 +868,11 @@ class Cluster(WorkspaceService):
850
868
  target_path = self.workspace.shared_cache_path(
851
869
  suffix=f"/clusters/{self.cluster_id}/{os.path.basename(value)}"
852
870
  )
853
- self.workspace.upload_local_path(local_path=value, target_path=target_path)
854
- value = target_path
871
+
872
+ with open(value, mode="rb") as f:
873
+ target_path.write_bytes(f.read())
874
+
875
+ value = str(target_path)
855
876
  elif "." in value and not "/" in value:
856
877
  value = value.split(".")[0]
857
878
 
@@ -865,13 +886,12 @@ class Cluster(WorkspaceService):
865
886
 
866
887
  repo = None
867
888
 
868
- if pip_settings.extra_index_url:
869
- if (
870
- value.startswith("datamanagement")
871
- or value.startswith("TSSecrets")
872
- or value.startswith("tgp_")
873
- ):
874
- repo = pip_settings.extra_index_url
889
+ if pip_settings.extra_index_url and (
890
+ value.startswith("datamanagement")
891
+ or value.startswith("TSSecrets")
892
+ or value.startswith("tgp_")
893
+ ):
894
+ repo = pip_settings.extra_index_url
875
895
 
876
896
  return Library(
877
897
  pypi=PythonPyPiLibrary(
@@ -273,7 +273,6 @@ print(json.dumps(meta))"""
273
273
  print_stdout: Optional[bool] = True,
274
274
  timeout: Optional[dt.timedelta] = None,
275
275
  command: Optional[str] = None,
276
- use_dill: Optional[bool] = None
277
276
  ) -> Any:
278
277
  if self.is_in_databricks_environment():
279
278
  args = args or []
@@ -291,16 +290,18 @@ print(json.dumps(meta))"""
291
290
 
292
291
  serialized = CallableSerde.from_callable(func)
293
292
 
294
- self.install_temporary_libraries(libraries=serialized.pkg_root)
293
+ if serialized.pkg_root:
294
+ self.install_temporary_libraries(libraries=serialized.pkg_root)
295
295
 
296
- # Use dill of same version
297
296
  current_version = (sys.version_info.major, sys.version_info.minor)
298
297
 
299
- if use_dill is None:
300
- if current_version == self.cluster.python_version:
301
- use_dill = True
302
- else:
303
- use_dill = False
298
+ if current_version != self.cluster.python_version[:2]:
299
+ raise RuntimeError(
300
+ f"Cannot execute callable: local Python version "
301
+ f"{current_version[0]}.{current_version[1]} does not match "
302
+ f"remote cluster Python version "
303
+ f"{self.cluster.python_version[0]}.{self.cluster.python_version[1]}"
304
+ )
304
305
 
305
306
  result_tag = "<<<RESULT>>>"
306
307
 
@@ -340,7 +341,6 @@ print(json.dumps(meta))"""
340
341
  print_stdout=print_stdout,
341
342
  timeout=timeout,
342
343
  command=command,
343
- use_dill=use_dill
344
344
  )
345
345
  raise remote_module_error
346
346
 
@@ -497,7 +497,6 @@ with zipfile.ZipFile(buf, "r") as zf:
497
497
  ]
498
498
 
499
499
  resolved = resolve_local_lib_path(libraries)
500
- resolved_str = str(resolved)
501
500
 
502
501
  remote_site_packages_path = self.remote_metadata.site_packages_path
503
502
  if resolved.is_dir():
@@ -4,9 +4,12 @@ from typing import (
4
4
  Callable,
5
5
  Optional,
6
6
  TypeVar,
7
- List,
7
+ List, TYPE_CHECKING,
8
8
  )
9
9
 
10
+ if TYPE_CHECKING:
11
+ from .cluster import Cluster
12
+
10
13
  from ..workspaces.workspace import Workspace
11
14
 
12
15
  ReturnType = TypeVar("ReturnType")
@@ -23,16 +26,17 @@ def databricks_remote_compute(
23
26
  env_keys: Optional[List[str]] = None,
24
27
  **options
25
28
  ) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
26
- from .. import Cluster
27
-
28
29
  if isinstance(workspace, str):
29
30
  workspace = Workspace(host=workspace)
30
31
 
31
32
  if cluster is None:
32
- if cluster_id:
33
- cluster = Cluster(workspace=workspace, cluster_id=cluster_id)
33
+ if cluster_id or cluster_name:
34
+ cluster = workspace.clusters(
35
+ cluster_id=cluster_id,
36
+ cluster_name=cluster_name
37
+ )
34
38
  else:
35
- cluster = Cluster.replicated_current_environment(
39
+ cluster = workspace.clusters().replicated_current_environment(
36
40
  workspace=workspace,
37
41
  cluster_name=cluster_name
38
42
  )
@@ -11,6 +11,7 @@ import pyarrow.parquet as pq
11
11
 
12
12
  from .statement_result import StatementResult
13
13
  from .types import column_info_to_arrow_field
14
+ from .. import DatabricksPathKind
14
15
  from ..workspaces import WorkspaceService
15
16
  from ...libs.databrickslib import databricks_sdk
16
17
  from ...libs.sparklib import SparkSession, SparkDataFrame, pyspark
@@ -374,8 +375,9 @@ class SQLEngine(WorkspaceService):
374
375
  data = convert(data, pa.Table, options=cast_options, target_field=existing_schema)
375
376
 
376
377
  # Write in temp volume
377
- databricks_tmp_path = connected.path(
378
- "/Volumes", catalog_name, schema_name, "tmp", transaction_id, "data.parquet",
378
+ databricks_tmp_path = connected.dbfs_path(
379
+ kind=DatabricksPathKind.VOLUME,
380
+ parts=[catalog_name, schema_name, "tmp", transaction_id, "data.parquet"]
379
381
  )
380
382
  databricks_tmp_folder = databricks_tmp_path.parent
381
383
 
@@ -143,11 +143,25 @@ class StatementResult:
143
143
 
144
144
  @property
145
145
  def done(self):
146
- return self.persisted or self.state in [StatementState.CANCELED, StatementState.CLOSED, StatementState.FAILED, StatementState.SUCCEEDED]
146
+ if self.persisted:
147
+ return True
148
+
149
+ if self._response is None:
150
+ return False
151
+
152
+ return self._response.status.state in [
153
+ StatementState.CANCELED, StatementState.CLOSED, StatementState.FAILED, StatementState.SUCCEEDED
154
+ ]
147
155
 
148
156
  @property
149
157
  def failed(self):
150
- return self.state in [StatementState.CANCELED, StatementState.FAILED]
158
+ if self.persisted:
159
+ return True
160
+
161
+ if self._response is None:
162
+ return False
163
+
164
+ return self._response.status.state in [StatementState.CANCELED, StatementState.FAILED]
151
165
 
152
166
  @property
153
167
  def persisted(self):
@@ -163,6 +177,7 @@ class StatementResult:
163
177
  self, self.disposition, Disposition.EXTERNAL_LINKS
164
178
  )
165
179
 
180
+ self.wait()
166
181
  result_data = self.result
167
182
  wsdk = self.workspace.sdk()
168
183