ygg 0.1.35__py3-none-any.whl → 0.1.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.35
3
+ Version: 0.1.38
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -1,17 +1,17 @@
1
- ygg-0.1.35.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ ygg-0.1.38.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
2
2
  yggdrasil/__init__.py,sha256=PfH7Xwt6uue6oqe6S5V8NhDJcVQClkKrBE1KXhdelZc,117
3
- yggdrasil/version.py,sha256=dvUFqQgabIeithBNW7swqwxg6R59T0-i289dJYgomYQ,22
3
+ yggdrasil/version.py,sha256=pPo3-5ffVm5NbQms9PN259Sjq7R-Q9UY4HYJ8M2iom4,22
4
4
  yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
5
5
  yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
6
- yggdrasil/databricks/compute/cluster.py,sha256=yx3xEJ5Vgg-IPeyYxxdf74_9DSBPHvg2-FYs8ptJrl0,40404
6
+ yggdrasil/databricks/compute/cluster.py,sha256=mnNzjCx7X3iK22oZ7K3pqot0AXq9JTdg97kT61j2_UU,40729
7
7
  yggdrasil/databricks/compute/execution_context.py,sha256=nxrNXoarq_JAB-Cpj0udHhq2jx-DmMbRWJdAezLrPis,22347
8
8
  yggdrasil/databricks/compute/remote.py,sha256=nEN_Fr1Ouul_iKOf4B5QjEGscYAcl7nHjGsl2toRzrU,2874
9
9
  yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
10
10
  yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
11
11
  yggdrasil/databricks/sql/__init__.py,sha256=y1n5yg-drZ8QVZbEgznsRG24kdJSnFis9l2YfYCsaCM,234
12
- yggdrasil/databricks/sql/engine.py,sha256=kUFBddJJQC0AgDqH0l7GFs7d_Ony5rc8fOv4inLU6Vw,41051
12
+ yggdrasil/databricks/sql/engine.py,sha256=Azx3gKtWOMy3D9I2FhkLmpthZPWAJZ9iZkaDivmt_0s,41002
13
13
  yggdrasil/databricks/sql/exceptions.py,sha256=Jqd_gT_VyPL8klJEHYEzpv5eHtmdY43WiQ7HZBaEqSk,53
14
- yggdrasil/databricks/sql/statement_result.py,sha256=VlHXhTcvTVya_2aJ-uUfUooZF_MqQuOZ8k7g6PBDhOM,17227
14
+ yggdrasil/databricks/sql/statement_result.py,sha256=tik53oB7Eq6nA4Unv8qH6fe9RJJO9HcumI5BAkW3omA,16684
15
15
  yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
16
16
  yggdrasil/databricks/workspaces/__init__.py,sha256=Ti1I99JTC3koYJaCy8WYvkAox4KdcuMRk8b2rHroWCY,133
17
17
  yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
@@ -55,8 +55,8 @@ yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgT
55
55
  yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
56
56
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
57
57
  yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
58
- ygg-0.1.35.dist-info/METADATA,sha256=qSh-Cd0LtjU_CV9Je0E1ns1AnuRbIUPMIpvVJoJtIUA,19204
59
- ygg-0.1.35.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- ygg-0.1.35.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
61
- ygg-0.1.35.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
62
- ygg-0.1.35.dist-info/RECORD,,
58
+ ygg-0.1.38.dist-info/METADATA,sha256=2G6ZcCvdN5TLyP9Z7z3GHQio7xjzBCd-ZEOO2qnL2Yc,19204
59
+ ygg-0.1.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
+ ygg-0.1.38.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
61
+ ygg-0.1.38.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
62
+ ygg-0.1.38.dist-info/RECORD,,
@@ -310,6 +310,11 @@ class Cluster(WorkspaceService):
310
310
  self.details = self.clusters_client().get(cluster_id=self.cluster_id)
311
311
  return self._details
312
312
 
313
+ def refresh(self, max_delay: float | None = None):
314
+ self.details = self.fresh_details(max_delay=max_delay)
315
+
316
+ return self
317
+
313
318
  @details.setter
314
319
  def details(self, value: "ClusterDetails"):
315
320
  """Cache cluster details and update identifiers."""
@@ -322,10 +327,10 @@ class Cluster(WorkspaceService):
322
327
  @property
323
328
  def state(self):
324
329
  """Return the current cluster state."""
325
- details = self.fresh_details(max_delay=10)
330
+ self.refresh()
326
331
 
327
- if details is not None:
328
- return details.state
332
+ if self._details is not None:
333
+ return self._details.state
329
334
  return State.UNKNOWN
330
335
 
331
336
  @property
@@ -356,7 +361,7 @@ class Cluster(WorkspaceService):
356
361
  def wait_for_status(
357
362
  self,
358
363
  tick: float = 0.5,
359
- timeout: float = 600,
364
+ timeout: Union[float, dt.timedelta] = 600,
360
365
  backoff: int = 2,
361
366
  max_sleep_time: float = 15
362
367
  ):
@@ -374,6 +379,9 @@ class Cluster(WorkspaceService):
374
379
  start = time.time()
375
380
  sleep_time = tick
376
381
 
382
+ if isinstance(timeout, dt.timedelta):
383
+ timeout = timeout.total_seconds()
384
+
377
385
  while self.is_pending:
378
386
  time.sleep(sleep_time)
379
387
 
@@ -691,7 +699,8 @@ class Cluster(WorkspaceService):
691
699
  )
692
700
 
693
701
  self.wait_for_status()
694
- self.details = self.clusters_client().edit_and_wait(**update_details)
702
+ self.details = self.clusters_client().edit(**update_details)
703
+ self.wait_for_status()
695
704
 
696
705
  logger.info(
697
706
  "Updated %s",
@@ -754,7 +763,10 @@ class Cluster(WorkspaceService):
754
763
  return None
755
764
 
756
765
  return Cluster(
757
- workspace=self.workspace, cluster_id=details.cluster_id, _details=details
766
+ workspace=self.workspace,
767
+ cluster_id=details.cluster_id,
768
+ cluster_name=details.cluster_name,
769
+ _details=details
758
770
  )
759
771
 
760
772
  for cluster in self.list_clusters():
@@ -796,10 +808,8 @@ class Cluster(WorkspaceService):
796
808
  logger.info("Starting %s", self)
797
809
 
798
810
  if wait_timeout:
799
- self.details = (
800
- self.clusters_client()
801
- .start_and_wait(cluster_id=self.cluster_id, timeout=wait_timeout)
802
- )
811
+ self.clusters_client().start(cluster_id=self.cluster_id)
812
+ self.wait_for_status(timeout=wait_timeout.total_seconds())
803
813
  self.wait_installed_libraries(timeout=wait_timeout)
804
814
  else:
805
815
  self.clusters_client().start(cluster_id=self.cluster_id)
@@ -309,7 +309,6 @@ class SQLEngine(WorkspaceService):
309
309
  engine=self,
310
310
  statement_id=response.statement_id,
311
311
  _response=response,
312
- _response_refresh_time=time.time(),
313
312
  disposition=disposition,
314
313
  )
315
314
 
@@ -44,6 +44,15 @@ if TYPE_CHECKING:
44
44
  from .engine import SQLEngine
45
45
 
46
46
 
47
+ DONE_STATES = {
48
+ StatementState.CANCELED, StatementState.CLOSED, StatementState.FAILED,
49
+ StatementState.SUCCEEDED
50
+ }
51
+
52
+ FAILED_STATES = {
53
+ StatementState.FAILED, StatementState.CANCELED
54
+ }
55
+
47
56
  __all__ = [
48
57
  "StatementResult"
49
58
  ]
@@ -57,7 +66,6 @@ class StatementResult:
57
66
  disposition: "Disposition"
58
67
 
59
68
  _response: Optional[StatementResponse] = dataclasses.field(default=None, repr=False)
60
- _response_refresh_time: float = dataclasses.field(default=0, repr=False)
61
69
 
62
70
  _spark_df: Optional[SparkDataFrame] = dataclasses.field(default=None, repr=False)
63
71
  _arrow_table: Optional[pa.Table] = dataclasses.field(default=None, repr=False)
@@ -101,8 +109,30 @@ class StatementResult:
101
109
  Returns:
102
110
  The current StatementResponse object.
103
111
  """
104
- if self._response is None and not self.is_spark_sql:
105
- self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
112
+ if self.is_spark_sql:
113
+ return StatementResponse(
114
+ statement_id=self.statement_id or "sparksql",
115
+ status=StatementStatus(
116
+ state=StatementState.SUCCEEDED
117
+ )
118
+ )
119
+ elif not self.statement_id:
120
+ return StatementResponse(
121
+ statement_id="unknown",
122
+ status=StatementStatus(
123
+ state=StatementState.PENDING
124
+ )
125
+ )
126
+
127
+ statement_execution = self.workspace.sdk().statement_execution
128
+
129
+ if self._response is None:
130
+ # Initialize
131
+ self._response = statement_execution.get_statement(self.statement_id)
132
+ elif self._response.status.state not in DONE_STATES:
133
+ # Refresh
134
+ self._response = statement_execution.get_statement(self.statement_id)
135
+
106
136
  return self._response
107
137
 
108
138
  @response.setter
@@ -113,27 +143,8 @@ class StatementResult:
113
143
  value: StatementResponse to cache.
114
144
  """
115
145
  self._response = value
116
- self._response_refresh_time = time.time()
117
-
118
146
  self.statement_id = self._response.statement_id
119
147
 
120
- def fresh_response(self, delay: float):
121
- """Refresh the response if it is older than ``delay`` seconds.
122
-
123
- Args:
124
- delay: Minimum age in seconds before refreshing.
125
-
126
- Returns:
127
- The refreshed StatementResponse object.
128
- """
129
- if self.is_spark_sql:
130
- return self._response
131
-
132
- if self.statement_id and not self.done and time.time() - self._response_refresh_time > delay:
133
- self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
134
-
135
- return self._response
136
-
137
148
  def result_data_at(self, chunk_index: int):
138
149
  """Fetch a specific result chunk by index.
139
150
 
@@ -166,17 +177,7 @@ class StatementResult:
166
177
  Returns:
167
178
  A StatementStatus object.
168
179
  """
169
- if self.persisted:
170
- return StatementStatus(
171
- state=StatementState.SUCCEEDED
172
- )
173
-
174
- if not self.statement_id:
175
- return StatementStatus(
176
- state=StatementState.PENDING
177
- )
178
-
179
- return self.fresh_response(delay=1).status
180
+ return self.response.status
180
181
 
181
182
  @property
182
183
  def state(self):
@@ -194,8 +195,6 @@ class StatementResult:
194
195
  Returns:
195
196
  The result manifest or None for Spark SQL results.
196
197
  """
197
- if self.is_spark_sql:
198
- return None
199
198
  return self.response.manifest
200
199
 
201
200
  @property
@@ -214,15 +213,7 @@ class StatementResult:
214
213
  Returns:
215
214
  True if the statement is done, otherwise False.
216
215
  """
217
- if self.persisted:
218
- return True
219
-
220
- if self._response is None:
221
- return False
222
-
223
- return self._response.status.state in [
224
- StatementState.CANCELED, StatementState.CLOSED, StatementState.FAILED, StatementState.SUCCEEDED
225
- ]
216
+ return self.state in DONE_STATES
226
217
 
227
218
  @property
228
219
  def failed(self):
@@ -231,13 +222,7 @@ class StatementResult:
231
222
  Returns:
232
223
  True if the statement failed or was cancelled.
233
224
  """
234
- if self.persisted:
235
- return True
236
-
237
- if self._response is None:
238
- return False
239
-
240
- return self._response.status.state in [StatementState.CANCELED, StatementState.FAILED]
225
+ return self.state in FAILED_STATES
241
226
 
242
227
  @property
243
228
  def persisted(self):
yggdrasil/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.35"
1
+ __version__ = "0.1.38"
File without changes