ygg 0.1.48__tar.gz → 0.1.50__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {ygg-0.1.48 → ygg-0.1.50}/PKG-INFO +3 -1
  2. {ygg-0.1.48 → ygg-0.1.50}/pyproject.toml +15 -8
  3. {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/PKG-INFO +3 -1
  4. {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/requires.txt +2 -0
  5. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/compute/cluster.py +45 -19
  6. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/compute/execution_context.py +19 -11
  7. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/compute/remote.py +4 -1
  8. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/statement_result.py +12 -5
  9. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/io.py +80 -56
  10. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/path.py +101 -50
  11. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/workspace.py +45 -27
  12. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/pandaslib.py +6 -0
  13. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/polarslib.py +5 -0
  14. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/python_env.py +7 -4
  15. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/polars_cast.py +1 -0
  16. ygg-0.1.50/src/yggdrasil/version.py +1 -0
  17. ygg-0.1.48/src/yggdrasil/version.py +0 -1
  18. {ygg-0.1.48 → ygg-0.1.50}/LICENSE +0 -0
  19. {ygg-0.1.48 → ygg-0.1.50}/README.md +0 -0
  20. {ygg-0.1.48 → ygg-0.1.50}/setup.cfg +0 -0
  21. {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/SOURCES.txt +0 -0
  22. {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/dependency_links.txt +0 -0
  23. {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/entry_points.txt +0 -0
  24. {ygg-0.1.48 → ygg-0.1.50}/src/ygg.egg-info/top_level.txt +0 -0
  25. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/__init__.py +0 -0
  26. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/__init__.py +0 -0
  27. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/compute/__init__.py +0 -0
  28. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
  29. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/jobs/config.py +0 -0
  30. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/__init__.py +0 -0
  31. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/engine.py +0 -0
  32. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
  33. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/types.py +0 -0
  34. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/sql/warehouse.py +0 -0
  35. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
  36. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/filesytem.py +0 -0
  37. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/databricks/workspaces/path_kind.py +0 -0
  38. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/dataclasses/__init__.py +0 -0
  39. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/dataclasses/dataclass.py +0 -0
  40. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/__init__.py +0 -0
  41. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/databrickslib.py +0 -0
  42. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/extensions/__init__.py +0 -0
  43. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
  44. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
  45. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/libs/sparklib.py +0 -0
  46. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/__init__.py +0 -0
  47. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/callable_serde.py +0 -0
  48. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/equality.py +0 -0
  49. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/exceptions.py +0 -0
  50. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/expiring_dict.py +0 -0
  51. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/modules.py +0 -0
  52. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/parallel.py +0 -0
  53. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/pyutils/retry.py +0 -0
  54. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/requests/__init__.py +0 -0
  55. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/requests/msal.py +0 -0
  56. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/requests/session.py +0 -0
  57. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/__init__.py +0 -0
  58. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/__init__.py +0 -0
  59. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
  60. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/cast_options.py +0 -0
  61. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
  62. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
  63. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/registry.py +0 -0
  64. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/spark_cast.py +0 -0
  65. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
  66. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
  67. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/python_arrow.py +0 -0
  68. {ygg-0.1.48 → ygg-0.1.50}/src/yggdrasil/types/python_defaults.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.48
3
+ Version: 0.1.50
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
235
235
  Requires-Dist: black; extra == "dev"
236
236
  Requires-Dist: ruff; extra == "dev"
237
237
  Requires-Dist: mypy; extra == "dev"
238
+ Requires-Dist: build; extra == "dev"
239
+ Requires-Dist: twine; extra == "dev"
238
240
  Dynamic: license-file
239
241
 
240
242
  # Yggdrasil (Python)
@@ -1,17 +1,16 @@
1
1
  [build-system]
2
- requires = ["setuptools>=61", "wheel"]
2
+ # bump setuptools so type-info files are handled sanely
3
+ requires = ["setuptools>=69", "wheel"]
3
4
  build-backend = "setuptools.build_meta"
4
5
 
5
6
  [project]
6
7
  name = "ygg"
7
- version = "0.1.48"
8
+ version = "0.1.50"
8
9
  description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
9
10
  readme = { file = "README.md", content-type = "text/markdown" }
10
11
  license = { file = "LICENSE" }
11
12
  requires-python = ">=3.10"
12
- authors = [
13
- { name = "Yggdrasil contributors" },
14
- ]
13
+ authors = [{ name = "Yggdrasil contributors" }]
15
14
  keywords = ["arrow", "polars", "pandas", "spark", "databricks", "typing", "dataclass", "serialization"]
16
15
  classifiers = [
17
16
  "Development Status :: 3 - Alpha",
@@ -42,6 +41,8 @@ dev = [
42
41
  "black",
43
42
  "ruff",
44
43
  "mypy",
44
+ "build",
45
+ "twine",
45
46
  ]
46
47
 
47
48
  [project.scripts]
@@ -55,9 +56,15 @@ Documentation = "https://github.com/Platob/Yggdrasil/tree/main/python/docs"
55
56
  [tool.setuptools]
56
57
  package-dir = { "" = "src" }
57
58
  license-files = ["LICENSE"]
58
-
59
- [tool.uv]
60
- native-tls = true
59
+ include-package-data = true
61
60
 
62
61
  [tool.setuptools.packages.find]
63
62
  where = ["src"]
63
+
64
+ # If your import package is yggdrasil (seems likely from yggenv entrypoint),
65
+ # ship the PEP 561 marker file. Put `py.typed` inside src/yggdrasil/
66
+ [tool.setuptools.package-data]
67
+ yggdrasil = ["py.typed"]
68
+
69
+ [tool.uv]
70
+ native-tls = true
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.48
3
+ Version: 0.1.50
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
235
235
  Requires-Dist: black; extra == "dev"
236
236
  Requires-Dist: ruff; extra == "dev"
237
237
  Requires-Dist: mypy; extra == "dev"
238
+ Requires-Dist: build; extra == "dev"
239
+ Requires-Dist: twine; extra == "dev"
238
240
  Dynamic: license-file
239
241
 
240
242
  # Yggdrasil (Python)
@@ -11,3 +11,5 @@ pytest-asyncio
11
11
  black
12
12
  ruff
13
13
  mypy
14
+ build
15
+ twine
@@ -144,6 +144,7 @@ class Cluster(WorkspaceService):
144
144
  single_user_name: Optional[str] = None,
145
145
  runtime_engine: Optional["RuntimeEngine"] = None,
146
146
  libraries: Optional[list[str]] = None,
147
+ update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
147
148
  **kwargs
148
149
  ) -> "Cluster":
149
150
  """Create or reuse a cluster that mirrors the current Python environment.
@@ -152,9 +153,10 @@ class Cluster(WorkspaceService):
152
153
  workspace: Workspace to use for the cluster.
153
154
  cluster_id: Optional cluster id to reuse.
154
155
  cluster_name: Optional cluster name to reuse.
155
- single_user_name: Optional user name for single-user clusters.
156
+ single_user_name: Optional username for single-user clusters.
156
157
  runtime_engine: Optional Databricks runtime engine.
157
158
  libraries: Optional list of libraries to install.
159
+ update_timeout: wait timeout, if None it will not wait completion
158
160
  **kwargs: Additional cluster specification overrides.
159
161
 
160
162
  Returns:
@@ -176,6 +178,7 @@ class Cluster(WorkspaceService):
176
178
  single_user_name=single_user_name,
177
179
  runtime_engine=runtime_engine,
178
180
  libraries=libraries,
181
+ update_timeout=update_timeout,
179
182
  **kwargs
180
183
  )
181
184
  )
@@ -190,6 +193,7 @@ class Cluster(WorkspaceService):
190
193
  single_user_name: Optional[str] = "current",
191
194
  runtime_engine: Optional["RuntimeEngine"] = None,
192
195
  libraries: Optional[list[str]] = None,
196
+ update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
193
197
  **kwargs
194
198
  ) -> "Cluster":
195
199
  """Create/update a cluster to match the local Python environment.
@@ -198,9 +202,10 @@ class Cluster(WorkspaceService):
198
202
  source: Optional PythonEnv to mirror (defaults to current).
199
203
  cluster_id: Optional cluster id to update.
200
204
  cluster_name: Optional cluster name to update.
201
- single_user_name: Optional single user name for the cluster.
205
+ single_user_name: Optional single username for the cluster.
202
206
  runtime_engine: Optional runtime engine selection.
203
207
  libraries: Optional list of libraries to install.
208
+ update_timeout: wait timeout, if None it will not wait completion
204
209
  **kwargs: Additional cluster specification overrides.
205
210
 
206
211
  Returns:
@@ -242,6 +247,7 @@ class Cluster(WorkspaceService):
242
247
  single_user_name=single_user_name,
243
248
  runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
244
249
  libraries=libraries,
250
+ update_timeout=update_timeout,
245
251
  **kwargs
246
252
  )
247
253
 
@@ -380,7 +386,9 @@ class Cluster(WorkspaceService):
380
386
  start = time.time()
381
387
  sleep_time = tick
382
388
 
383
- if isinstance(timeout, dt.timedelta):
389
+ if not timeout:
390
+ timeout = 20 * 60.0
391
+ elif isinstance(timeout, dt.timedelta):
384
392
  timeout = timeout.total_seconds()
385
393
 
386
394
  while self.is_pending:
@@ -412,12 +420,14 @@ class Cluster(WorkspaceService):
412
420
  # Extract "major.minor" from strings like "17.3.x-scala2.13-ml-gpu"
413
421
  v = self.spark_version
414
422
 
415
- if v is None:
423
+ if not v:
416
424
  return None
417
425
 
418
426
  parts = v.split(".")
427
+
419
428
  if len(parts) < 2:
420
429
  return None
430
+
421
431
  return ".".join(parts[:2]) # e.g. "17.3"
422
432
 
423
433
  @property
@@ -428,8 +438,10 @@ class Cluster(WorkspaceService):
428
438
  When the runtime can't be mapped, returns ``None``.
429
439
  """
430
440
  v = self.runtime_version
431
- if v is None:
441
+
442
+ if not v:
432
443
  return None
444
+
433
445
  return _PYTHON_BY_DBR.get(v)
434
446
 
435
447
  # ------------------------------------------------------------------ #
@@ -586,6 +598,7 @@ class Cluster(WorkspaceService):
586
598
  cluster_id: Optional[str] = None,
587
599
  cluster_name: Optional[str] = None,
588
600
  libraries: Optional[List[Union[str, "Library"]]] = None,
601
+ update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
589
602
  **cluster_spec: Any
590
603
  ):
591
604
  """Create a new cluster or update an existing one.
@@ -594,6 +607,7 @@ class Cluster(WorkspaceService):
594
607
  cluster_id: Optional cluster id to update.
595
608
  cluster_name: Optional cluster name to update or create.
596
609
  libraries: Optional libraries to install.
610
+ update_timeout: wait timeout, if None it will not wait completion
597
611
  **cluster_spec: Cluster specification overrides.
598
612
 
599
613
  Returns:
@@ -609,24 +623,28 @@ class Cluster(WorkspaceService):
609
623
  return found.update(
610
624
  cluster_name=cluster_name,
611
625
  libraries=libraries,
626
+ wait_timeout=update_timeout,
612
627
  **cluster_spec
613
628
  )
614
629
 
615
630
  return self.create(
616
631
  cluster_name=cluster_name,
617
632
  libraries=libraries,
633
+ wait_timeout=update_timeout,
618
634
  **cluster_spec
619
635
  )
620
636
 
621
637
  def create(
622
638
  self,
623
639
  libraries: Optional[List[Union[str, "Library"]]] = None,
640
+ wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
624
641
  **cluster_spec: Any
625
642
  ) -> str:
626
643
  """Create a new cluster and optionally install libraries.
627
644
 
628
645
  Args:
629
646
  libraries: Optional list of libraries to install after creation.
647
+ wait_timeout: wait timeout, if None it will not wait completion
630
648
  **cluster_spec: Cluster specification overrides.
631
649
 
632
650
  Returns:
@@ -646,14 +664,17 @@ class Cluster(WorkspaceService):
646
664
  update_details,
647
665
  )
648
666
 
649
- self.details = self.clusters_client().create_and_wait(**update_details)
667
+ self.details = self.clusters_client().create(**update_details)
650
668
 
651
669
  LOGGER.info(
652
670
  "Created %s",
653
671
  self
654
672
  )
655
673
 
656
- self.install_libraries(libraries=libraries, raise_error=False)
674
+ self.install_libraries(libraries=libraries, raise_error=False, wait_timeout=None)
675
+
676
+ if wait_timeout:
677
+ self.wait_for_status(timeout=wait_timeout)
657
678
 
658
679
  return self
659
680
 
@@ -661,7 +682,7 @@ class Cluster(WorkspaceService):
661
682
  self,
662
683
  libraries: Optional[List[Union[str, "Library"]]] = None,
663
684
  access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
664
- wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
685
+ wait_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
665
686
  **cluster_spec: Any
666
687
  ) -> "Cluster":
667
688
  """Update cluster configuration and optionally install libraries.
@@ -708,7 +729,7 @@ class Cluster(WorkspaceService):
708
729
  self, diff
709
730
  )
710
731
 
711
- self.wait_for_status()
732
+ self.wait_for_status(timeout=wait_timeout)
712
733
  self.clusters_client().edit(**update_details)
713
734
  self.update_permissions(access_control_list=access_control_list)
714
735
 
@@ -727,7 +748,7 @@ class Cluster(WorkspaceService):
727
748
  access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
728
749
  ):
729
750
  if not access_control_list:
730
- access_control_list = self.default_permissions()
751
+ return self
731
752
 
732
753
  access_control_list = self._check_permission(access_control_list)
733
754
 
@@ -745,6 +766,7 @@ class Cluster(WorkspaceService):
745
766
  permission_level=ClusterPermissionLevel.CAN_MANAGE
746
767
  )
747
768
  for name in current_groups
769
+ if name not in {"users"}
748
770
  ]
749
771
 
750
772
  def _check_permission(
@@ -862,18 +884,22 @@ class Cluster(WorkspaceService):
862
884
  Returns:
863
885
  The current Cluster instance.
864
886
  """
887
+ if self.is_running:
888
+ return self
889
+
865
890
  self.wait_for_status()
866
891
 
867
- if not self.is_running:
868
- LOGGER.debug("Starting %s", self)
892
+ if self.is_running:
893
+ return self
869
894
 
870
- if wait_timeout:
871
- self.clusters_client().start(cluster_id=self.cluster_id)
872
- self.wait_for_status(timeout=wait_timeout.total_seconds())
873
- else:
874
- self.clusters_client().start(cluster_id=self.cluster_id)
895
+ LOGGER.debug("Starting %s", self)
896
+
897
+ self.clusters_client().start(cluster_id=self.cluster_id)
875
898
 
876
- LOGGER.info("Started %s", self)
899
+ LOGGER.info("Started %s", self)
900
+
901
+ if wait_timeout:
902
+ self.wait_for_status(timeout=wait_timeout.total_seconds())
877
903
 
878
904
  return self
879
905
 
@@ -889,7 +915,7 @@ class Cluster(WorkspaceService):
889
915
 
890
916
  if self.is_running:
891
917
  self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
892
- return self.wait_for_status()
918
+ return self
893
919
 
894
920
  return self.start()
895
921
 
@@ -180,7 +180,7 @@ print(json.dumps(meta))"""
180
180
  """
181
181
  return self.cluster.workspace.sdk()
182
182
 
183
- def _create_command(
183
+ def create_command(
184
184
  self,
185
185
  language: "Language",
186
186
  ) -> any:
@@ -192,17 +192,29 @@ print(json.dumps(meta))"""
192
192
  Returns:
193
193
  The created command execution context response.
194
194
  """
195
- self.cluster.ensure_running()
196
-
197
195
  LOGGER.debug(
198
196
  "Creating Databricks command execution context for %s",
199
197
  self.cluster
200
198
  )
201
199
 
202
- created = self._workspace_client().command_execution.create_and_wait(
203
- cluster_id=self.cluster.cluster_id,
204
- language=language,
200
+ try:
201
+ created = self._workspace_client().command_execution.create_and_wait(
202
+ cluster_id=self.cluster.cluster_id,
203
+ language=language,
204
+ )
205
+ except:
206
+ self.cluster.ensure_running()
207
+
208
+ created = self._workspace_client().command_execution.create_and_wait(
209
+ cluster_id=self.cluster.cluster_id,
210
+ language=language,
211
+ )
212
+
213
+ LOGGER.info(
214
+ "Created Databricks command execution context %s",
215
+ self
205
216
  )
217
+
206
218
  created = getattr(created, "response", created)
207
219
 
208
220
  return created
@@ -220,10 +232,6 @@ print(json.dumps(meta))"""
220
232
  The connected ExecutionContext instance.
221
233
  """
222
234
  if self.context_id is not None:
223
- LOGGER.debug(
224
- "Execution context already open for %s",
225
- self
226
- )
227
235
  return self
228
236
 
229
237
  self.language = language or self.language
@@ -231,7 +239,7 @@ print(json.dumps(meta))"""
231
239
  if self.language is None:
232
240
  self.language = Language.PYTHON
233
241
 
234
- ctx = self._create_command(language=self.language)
242
+ ctx = self.create_command(language=self.language)
235
243
 
236
244
  context_id = ctx.id
237
245
  if not context_id:
@@ -39,6 +39,7 @@ def databricks_remote_compute(
39
39
  timeout: Optional[dt.timedelta] = None,
40
40
  env_keys: Optional[List[str]] = None,
41
41
  force_local: bool = False,
42
+ update_timeout: Optional[Union[float, dt.timedelta]] = None,
42
43
  **options
43
44
  ) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
44
45
  """Return a decorator that executes functions on a remote cluster.
@@ -52,6 +53,7 @@ def databricks_remote_compute(
52
53
  timeout: Optional execution timeout for remote calls.
53
54
  env_keys: Optional environment variable names to forward.
54
55
  force_local: Force local execution
56
+ update_timeout: creation or update wait timeout
55
57
  **options: Extra options forwarded to the execution decorator.
56
58
 
57
59
  Returns:
@@ -82,7 +84,8 @@ def databricks_remote_compute(
82
84
  cluster = workspace.clusters().replicated_current_environment(
83
85
  workspace=workspace,
84
86
  cluster_name=cluster_name,
85
- single_user_name=workspace.current_user.user_name
87
+ single_user_name=workspace.current_user.user_name,
88
+ update_timeout=update_timeout
86
89
  )
87
90
 
88
91
  cluster.ensure_running(wait_timeout=None)
@@ -344,10 +344,17 @@ class StatementResult:
344
344
  if self.persisted:
345
345
  if self._arrow_table is not None:
346
346
  return self._arrow_table.schema
347
- return spark_schema_to_arrow_schema(self._spark_df.schema)
347
+ elif self._spark_df is not None:
348
+ return spark_schema_to_arrow_schema(self._spark_df.schema)
349
+ raise NotImplementedError("")
350
+
351
+ manifest = self.manifest
352
+
353
+ if manifest is None:
354
+ return pa.schema([])
348
355
 
349
356
  fields = [
350
- column_info_to_arrow_field(_) for _ in self.manifest.schema.columns
357
+ column_info_to_arrow_field(_) for _ in manifest.schema.columns
351
358
  ]
352
359
 
353
360
  return pa.schema(fields)
@@ -362,7 +369,7 @@ class StatementResult:
362
369
  An Arrow Table containing all rows.
363
370
  """
364
371
  if self.persisted:
365
- if self._arrow_table:
372
+ if self._arrow_table is not None:
366
373
  return self._arrow_table
367
374
  else:
368
375
  return self._spark_df.toArrow()
@@ -370,7 +377,6 @@ class StatementResult:
370
377
  batches = list(self.to_arrow_batches(parallel_pool=parallel_pool))
371
378
 
372
379
  if not batches:
373
- # empty table with no columns
374
380
  return pa.Table.from_batches([], schema=self.arrow_schema())
375
381
 
376
382
  return pa.Table.from_batches(batches)
@@ -501,8 +507,9 @@ class StatementResult:
501
507
  Returns:
502
508
  A Spark DataFrame with the result rows.
503
509
  """
504
- if self._spark_df:
510
+ if self._spark_df is not None:
505
511
  return self._spark_df
506
512
 
507
513
  self._spark_df = arrow_table_to_spark_dataframe(self.to_arrow_table())
514
+
508
515
  return self._spark_df