ygg 0.1.57__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
  2. ygg-0.1.60.dist-info/RECORD +74 -0
  3. yggdrasil/ai/__init__.py +2 -0
  4. yggdrasil/ai/session.py +89 -0
  5. yggdrasil/ai/sql_session.py +310 -0
  6. yggdrasil/databricks/__init__.py +0 -3
  7. yggdrasil/databricks/compute/cluster.py +68 -113
  8. yggdrasil/databricks/compute/command_execution.py +674 -0
  9. yggdrasil/databricks/compute/exceptions.py +19 -0
  10. yggdrasil/databricks/compute/execution_context.py +491 -282
  11. yggdrasil/databricks/compute/remote.py +4 -14
  12. yggdrasil/databricks/exceptions.py +10 -0
  13. yggdrasil/databricks/sql/__init__.py +0 -4
  14. yggdrasil/databricks/sql/engine.py +161 -173
  15. yggdrasil/databricks/sql/exceptions.py +9 -1
  16. yggdrasil/databricks/sql/statement_result.py +108 -120
  17. yggdrasil/databricks/sql/warehouse.py +331 -92
  18. yggdrasil/databricks/workspaces/io.py +89 -9
  19. yggdrasil/databricks/workspaces/path.py +120 -72
  20. yggdrasil/databricks/workspaces/workspace.py +214 -61
  21. yggdrasil/exceptions.py +7 -0
  22. yggdrasil/libs/databrickslib.py +23 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -2
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/python_env.py +14 -13
  30. yggdrasil/pyutils/waiting_config.py +171 -0
  31. yggdrasil/types/cast/arrow_cast.py +3 -0
  32. yggdrasil/types/cast/pandas_cast.py +157 -169
  33. yggdrasil/types/cast/polars_cast.py +11 -43
  34. yggdrasil/types/dummy_class.py +81 -0
  35. yggdrasil/version.py +1 -1
  36. ygg-0.1.57.dist-info/RECORD +0 -66
  37. yggdrasil/databricks/ai/loki.py +0 -53
  38. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/WHEEL +0 -0
  39. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
  40. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
  41. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
  42. /yggdrasil/{databricks/ai/__init__.py → pyutils/mimetypes.py} +0 -0
@@ -2,7 +2,6 @@
2
2
 
3
3
  import base64
4
4
  import dataclasses as dc
5
- import datetime as dt
6
5
  import io
7
6
  import json
8
7
  import logging
@@ -12,28 +11,57 @@ import re
12
11
  import sys
13
12
  import threading
14
13
  import zipfile
14
+ from concurrent.futures import ThreadPoolExecutor
15
15
  from threading import Thread
16
16
  from types import ModuleType
17
17
  from typing import TYPE_CHECKING, Optional, Any, Callable, List, Dict, Union, Iterable, Tuple
18
18
 
19
- from ...libs.databrickslib import databricks_sdk
19
+ from .command_execution import CommandExecution
20
+ from .exceptions import ClientTerminatedSession
21
+ from ...libs.databrickslib import databricks_sdk, DatabricksDummyClass
22
+ from ...pyutils.callable_serde import CallableSerde
20
23
  from ...pyutils.exceptions import raise_parsed_traceback
21
24
  from ...pyutils.expiring_dict import ExpiringDict
22
25
  from ...pyutils.modules import resolve_local_lib_path
23
- from ...pyutils.callable_serde import CallableSerde
26
+ from ...pyutils.waiting_config import WaitingConfig, WaitingConfigArg
24
27
 
25
28
  if TYPE_CHECKING:
26
29
  from .cluster import Cluster
27
30
 
28
31
  if databricks_sdk is not None:
29
- from databricks.sdk.service.compute import Language, ResultType
32
+ from databricks.sdk.service.compute import Language, ResultType, CommandStatusResponse
33
+ else:
34
+ Language = DatabricksDummyClass
35
+ ResultType = DatabricksDummyClass
36
+ CommandStatusResponse = DatabricksDummyClass
37
+
30
38
 
31
39
  __all__ = [
32
40
  "ExecutionContext"
33
41
  ]
34
42
 
35
43
  LOGGER = logging.getLogger(__name__)
44
+ UPLOADED_PACKAGE_ROOTS: Dict[str, ExpiringDict] = {}
45
+ BytesLike = Union[bytes, bytearray, memoryview]
46
+
47
+ @dc.dataclass(frozen=True)
48
+ class BytesSource:
49
+ """
50
+ Hashable wrapper for in-memory content so it can be used as a dict key.
36
51
 
52
+ name: only used for debugging / metadata (not required to match remote basename)
53
+ data: bytes-like payload
54
+ """
55
+ name: str
56
+ data: bytes
57
+
58
+ LocalSpec = Union[
59
+ str,
60
+ os.PathLike,
61
+ bytes, # raw bytes as key (works, but no name)
62
+ BytesSource, # recommended for buffers
63
+ Tuple[str, BytesLike], # (name, data) helper
64
+ ]
37
65
 
38
66
  @dc.dataclass
39
67
  class RemoteMetadata:
@@ -41,6 +69,7 @@ class RemoteMetadata:
41
69
  site_packages_path: Optional[str] = dc.field(default=None)
42
70
  os_env: Dict[str, str] = dc.field(default_factory=dict)
43
71
  version_info: Tuple[int, int, int] = dc.field(default=(0, 0, 0))
72
+ temp_path: str = ""
44
73
 
45
74
  def os_env_diff(
46
75
  self,
@@ -76,14 +105,14 @@ class ExecutionContext:
76
105
  ctx.execute("print(x + 1)")
77
106
  """
78
107
  cluster: "Cluster"
79
- language: Optional["Language"] = None
80
108
  context_id: Optional[str] = None
81
109
 
82
- _was_connected: Optional[bool] = dc.field(default=None, repr=False)
83
- _remote_metadata: Optional[RemoteMetadata] = dc.field(default=None, repr=False)
84
- _uploaded_package_roots: Optional[ExpiringDict] = dc.field(default_factory=ExpiringDict, repr=False)
110
+ language: Optional[Language] = dc.field(default=None, repr=False, compare=False, hash=False)
85
111
 
86
- _lock: threading.RLock = dc.field(default_factory=threading.RLock, init=False, repr=False)
112
+ _was_connected: Optional[bool] = dc.field(default=None, repr=False, compare=False, hash=False)
113
+ _remote_metadata: Optional[RemoteMetadata] = dc.field(default=None, repr=False, compare=False, hash=False)
114
+ _uploaded_package_roots: Optional[ExpiringDict] = dc.field(default_factory=ExpiringDict, repr=False, compare=False, hash=False)
115
+ _lock: threading.RLock = dc.field(default_factory=threading.RLock, init=False, repr=False, compare=False, hash=False)
87
116
 
88
117
  # --- Pickle / cloudpickle support (don’t serialize locks or cached remote metadata) ---
89
118
  def __getstate__(self):
@@ -110,16 +139,31 @@ class ExecutionContext:
110
139
  def __exit__(self, exc_type, exc_val, exc_tb):
111
140
  """Exit the context manager and close the remote context if created."""
112
141
  if not self._was_connected:
113
- self.close()
142
+ self.close(wait=False)
114
143
  self.cluster.__exit__(exc_type, exc_val=exc_val, exc_tb=exc_tb)
115
144
 
116
- def __del__(self):
117
- """Best-effort cleanup for the remote execution context."""
118
- if self.context_id:
119
- try:
120
- Thread(target=self.close).start()
121
- except BaseException:
122
- pass
145
+ def __repr__(self):
146
+ return "%s(url=%s)" % (
147
+ self.__class__.__name__,
148
+ self.url()
149
+ )
150
+
151
+ def __str__(self):
152
+ return self.url()
153
+
154
+ def url(self) -> str:
155
+ return "%s/context/%s" % (
156
+ self.cluster.url(),
157
+ self.context_id or "unknown"
158
+ )
159
+
160
+ @property
161
+ def workspace(self):
162
+ return self.cluster.workspace
163
+
164
+ @property
165
+ def cluster_id(self):
166
+ return self.cluster.cluster_id
123
167
 
124
168
  @property
125
169
  def remote_metadata(self) -> RemoteMetadata:
@@ -133,46 +177,43 @@ class ExecutionContext:
133
177
  with self._lock:
134
178
  # double-check after acquiring lock
135
179
  if self._remote_metadata is None:
136
- cmd = r"""import glob, json, os
180
+ cmd = r"""import glob, json, os, tempfile
137
181
  from yggdrasil.pyutils.python_env import PythonEnv
138
182
 
139
183
  current_env = PythonEnv.get_current()
140
184
  meta = {}
141
185
 
186
+ # temp dir (explicit + stable for downstream code)
187
+ tmp_dir = tempfile.mkdtemp(prefix="tmp_")
188
+ meta["temp_path"] = tmp_dir
189
+ os.environ["TMPDIR"] = tmp_dir # many libs respect this
190
+
191
+ # find site-packages
142
192
  for path in glob.glob('/local_**/.ephemeral_nfs/cluster_libraries/python/lib/python*/site-*', recursive=False):
143
193
  if path.endswith('site-packages'):
144
194
  meta["site_packages_path"] = path
145
195
  break
146
196
 
197
+ # env vars snapshot
147
198
  os_env = meta["os_env"] = {}
148
199
  for k, v in os.environ.items():
149
200
  os_env[k] = v
150
-
201
+
151
202
  meta["version_info"] = current_env.version_info
152
203
 
153
204
  print(json.dumps(meta))"""
154
205
 
155
- try:
156
- content = self.execute_command(
157
- command=cmd,
158
- result_tag="<<RESULT>>",
159
- print_stdout=False,
160
- )
161
- except ImportError:
162
- self.cluster.ensure_running()
163
-
164
- content = self.execute_command(
165
- command=cmd,
166
- result_tag="<<RESULT>>",
167
- print_stdout=False,
168
- )
169
-
170
- self._remote_metadata = RemoteMetadata(**json.loads(content))
206
+ content = self.command(
207
+ command=cmd,
208
+ language=Language.PYTHON,
209
+ ).start().wait().result(unpickle=True)
210
+
211
+ self._remote_metadata = RemoteMetadata(**content)
171
212
 
172
213
  return self._remote_metadata
173
214
 
174
215
  # ------------ internal helpers ------------
175
- def _workspace_client(self):
216
+ def workspace_client(self):
176
217
  """Return the Databricks SDK client for command execution.
177
218
 
178
219
  Returns:
@@ -180,14 +221,26 @@ print(json.dumps(meta))"""
180
221
  """
181
222
  return self.cluster.workspace.sdk()
182
223
 
183
- def create_command(
224
+ def shared_cache_path(
225
+ self,
226
+ suffix: str
227
+ ):
228
+ assert suffix, "Missing suffix arg"
229
+
230
+ return self.cluster.shared_cache_path(
231
+ suffix="/context/%s" % suffix.lstrip("/")
232
+ )
233
+
234
+ def create(
184
235
  self,
185
236
  language: "Language",
186
- ) -> any:
237
+ wait: Optional[WaitingConfigArg] = True,
238
+ ) -> "ExecutionContext":
187
239
  """Create a command execution context, retrying if needed.
188
240
 
189
241
  Args:
190
242
  language: The Databricks command language to use.
243
+ wait: Waiting config to update
191
244
 
192
245
  Returns:
193
246
  The created command execution context response.
@@ -197,62 +250,82 @@ print(json.dumps(meta))"""
197
250
  self.cluster
198
251
  )
199
252
 
253
+ client = self.workspace_client().command_execution
254
+
200
255
  try:
201
- created = self._workspace_client().command_execution.create_and_wait(
202
- cluster_id=self.cluster.cluster_id,
203
- language=language,
204
- )
205
- except:
206
- self.cluster.ensure_running()
256
+ with ThreadPoolExecutor(max_workers=1) as ex:
257
+ fut = ex.submit(
258
+ client.create,
259
+ cluster_id=self.cluster_id,
260
+ language=language,
261
+ )
262
+
263
+ try:
264
+ created = fut.result(timeout=10).response
265
+ except TimeoutError:
266
+ self.cluster.ensure_running(wait=True)
267
+
268
+ created = client.create(
269
+ cluster_id=self.cluster_id,
270
+ language=language,
271
+ ).response
272
+ except Exception as e:
273
+ LOGGER.warning(e)
207
274
 
208
- created = self._workspace_client().command_execution.create_and_wait(
209
- cluster_id=self.cluster.cluster_id,
275
+ self.cluster.ensure_running(wait=True)
276
+
277
+ created = client.create(
278
+ cluster_id=self.cluster_id,
210
279
  language=language,
211
- )
280
+ ).response
212
281
 
213
282
  LOGGER.info(
214
- "Created Databricks command execution context %s",
283
+ "Created %s",
215
284
  self
216
285
  )
217
286
 
218
- created = getattr(created, "response", created)
287
+ self.context_id = created.id
219
288
 
220
- return created
289
+ return self
221
290
 
222
291
  def connect(
223
292
  self,
224
- language: Optional["Language"] = None
293
+ language: Optional[Language] = None,
294
+ wait: Optional[WaitingConfigArg] = True,
295
+ reset: bool = False,
225
296
  ) -> "ExecutionContext":
226
297
  """Create a remote command execution context if not already open.
227
298
 
228
299
  Args:
229
300
  language: Optional language override for the context.
301
+ wait: Wait config
302
+ reset: Reset existing if connected
230
303
 
231
304
  Returns:
232
305
  The connected ExecutionContext instance.
233
306
  """
234
307
  if self.context_id is not None:
235
- return self
308
+ if not reset:
309
+ return self
236
310
 
237
- self.language = language or self.language
311
+ LOGGER.info(
312
+ "%s reset connection",
313
+ self
314
+ )
238
315
 
239
- if self.language is None:
240
- self.language = Language.PYTHON
316
+ self.close(wait=False)
241
317
 
242
- ctx = self.create_command(language=self.language)
318
+ language = language or self.language
243
319
 
244
- context_id = ctx.id
245
- if not context_id:
246
- raise RuntimeError("Failed to create command execution context")
320
+ if language is None:
321
+ language = Language.PYTHON
247
322
 
248
- self.context_id = context_id
249
- LOGGER.info(
250
- "Opened execution context for %s",
251
- self
323
+ return self.create(
324
+ language=language,
325
+ wait=wait
252
326
  )
253
- return self
254
327
 
255
- def close(self) -> None:
328
+ def close(self, wait: bool = True) -> None:
256
329
  """Destroy the remote command execution context if it exists.
257
330
 
258
331
  Returns:
@@ -261,18 +334,91 @@ print(json.dumps(meta))"""
261
334
  if not self.context_id:
262
335
  return
263
336
 
337
+ client = self.workspace_client()
338
+
264
339
  try:
265
- self._workspace_client().command_execution.destroy(
266
- cluster_id=self.cluster.cluster_id,
267
- context_id=self.context_id,
268
- )
269
- except Exception:
340
+ if wait:
341
+ client.command_execution.destroy(
342
+ cluster_id=self.cluster.cluster_id,
343
+ context_id=self.context_id,
344
+ )
345
+ else:
346
+ Thread(
347
+ target=client.command_execution.destroy,
348
+ kwargs={
349
+ "cluster_id": self.cluster.cluster_id,
350
+ "context_id": self.context_id,
351
+ }
352
+ ).start()
353
+ except BaseException:
270
354
  # non-fatal: context cleanup best-effort
271
355
  pass
272
356
  finally:
273
357
  self.context_id = None
274
358
 
275
359
  # ------------ public API ------------
360
+ def command(
361
+ self,
362
+ context: Optional["ExecutionContext"] = None,
363
+ func: Optional[Callable] = None,
364
+ command_id: Optional[str] = None,
365
+ command: Optional[str] = None,
366
+ language: Optional[Language] = None,
367
+ environ: Optional[Dict[str, str]] = None,
368
+ ):
369
+ context = self if context is None else context
370
+
371
+ return CommandExecution(
372
+ context=context,
373
+ command_id=command_id,
374
+ language=language,
375
+ command=command,
376
+ ).create(
377
+ context=context,
378
+ language=language,
379
+ command=command,
380
+ func=func,
381
+ environ=environ
382
+ )
383
+
384
+ def decorate(
385
+ self,
386
+ func: Optional[Callable] = None,
387
+ command: Optional[str] = None,
388
+ language: Optional[Language] = None,
389
+ command_id: Optional[str] = None,
390
+ environ: Optional[Union[Iterable[str], Dict[str, str]]] = None,
391
+ ) -> Callable:
392
+ language = Language.PYTHON if language is None else language
393
+
394
+ def decorator(
395
+ f: Callable,
396
+ c: ExecutionContext = self,
397
+ cmd: Optional[str] = command,
398
+ l: Optional[Language] = language,
399
+ cid: Optional[str] = command_id,
400
+ env: Optional[Union[Iterable[str], Dict[str, str]]] = environ,
401
+ ):
402
+ if c.is_in_databricks_environment():
403
+ return func
404
+
405
+ c.cluster.ensure_running(
406
+ wait=False
407
+ )
408
+
409
+ return c.command(
410
+ context=c,
411
+ func=f,
412
+ command_id=cid,
413
+ command=cmd,
414
+ language=l,
415
+ environ=env
416
+ )
417
+
418
+ if func is not None and callable(func):
419
+ return decorator(f=func)
420
+ return decorator
421
+
276
422
  def execute(
277
423
  self,
278
424
  obj: Union[str, Callable],
@@ -281,9 +427,7 @@ print(json.dumps(meta))"""
281
427
  kwargs: Dict[str, Any] = None,
282
428
  env_keys: Optional[List[str]] = None,
283
429
  env_variables: Optional[dict[str, str]] = None,
284
- timeout: Optional[dt.timedelta] = None,
285
- result_tag: Optional[str] = None,
286
- **options
430
+ timeout: Optional[WaitingConfigArg] = True,
287
431
  ):
288
432
  """Execute a string command or a callable in the remote context.
289
433
 
@@ -294,8 +438,6 @@ print(json.dumps(meta))"""
294
438
  env_keys: Environment variable names to forward.
295
439
  env_variables: Environment variables to inject remotely.
296
440
  timeout: Optional timeout for execution.
297
- result_tag: Optional result tag for parsing output.
298
- **options: Additional execution options.
299
441
 
300
442
  Returns:
301
443
  The decoded execution result.
@@ -303,9 +445,7 @@ print(json.dumps(meta))"""
303
445
  if isinstance(obj, str):
304
446
  return self.execute_command(
305
447
  command=obj,
306
- timeout=timeout,
307
- result_tag=result_tag,
308
- **options
448
+ wait=timeout,
309
449
  )
310
450
  elif callable(obj):
311
451
  return self.execute_callable(
@@ -315,9 +455,9 @@ print(json.dumps(meta))"""
315
455
  env_keys=env_keys,
316
456
  env_variables=env_variables,
317
457
  timeout=timeout,
318
- **options
319
458
  )
320
- raise ValueError(f"Cannot execute {type(obj)}")
459
+ else:
460
+ raise ValueError(f"Cannot execute {type(obj)}")
321
461
 
322
462
  def is_in_databricks_environment(self):
323
463
  """Return True when running on a Databricks runtime."""
@@ -330,8 +470,7 @@ print(json.dumps(meta))"""
330
470
  kwargs: Dict[str, Any] = None,
331
471
  env_keys: Optional[Iterable[str]] = None,
332
472
  env_variables: Optional[Dict[str, str]] = None,
333
- print_stdout: Optional[bool] = True,
334
- timeout: Optional[dt.timedelta] = None,
473
+ timeout: Optional[WaitingConfigArg] = True,
335
474
  command: Optional[str] = None,
336
475
  ) -> Any:
337
476
  """Execute a Python callable remotely and return the decoded result.
@@ -342,7 +481,6 @@ print(json.dumps(meta))"""
342
481
  kwargs: Keyword arguments for the callable.
343
482
  env_keys: Environment variable names to forward.
344
483
  env_variables: Environment variables to inject remotely.
345
- print_stdout: Whether to print stdout from the command output.
346
484
  timeout: Optional timeout for execution.
347
485
  command: Optional prebuilt command string override.
348
486
 
@@ -389,48 +527,14 @@ print(json.dumps(meta))"""
389
527
 
390
528
  raw_result = self.execute_command(
391
529
  command,
392
- timeout=timeout, result_tag=result_tag, print_stdout=print_stdout
530
+ wait=timeout
393
531
  )
394
532
 
395
- try:
396
- result = serialized.parse_command_result(
397
- raw_result,
398
- result_tag=result_tag,
399
- workspace=self.cluster.workspace
400
- )
401
- except ModuleNotFoundError as remote_module_error:
402
- _MOD_NOT_FOUND_RE = re.compile(r"No module named ['\"]([^'\"]+)['\"]")
403
- module_name = _MOD_NOT_FOUND_RE.search(str(remote_module_error))
404
- module_name = module_name.group(1) if module_name else None
405
- module_name = module_name.split(".")[0]
406
-
407
- if module_name and "yggdrasil" not in module_name:
408
- LOGGER.debug(
409
- "Installing missing module %s from local environment",
410
- module_name,
411
- )
412
-
413
- self.install_temporary_libraries(
414
- libraries=[module_name],
415
- )
416
-
417
- LOGGER.warning(
418
- "Installed missing module %s from local environment",
419
- module_name,
420
- )
421
-
422
- return self.execute_callable(
423
- func=func,
424
- args=args,
425
- kwargs=kwargs,
426
- env_keys=env_keys,
427
- env_variables=env_variables,
428
- print_stdout=print_stdout,
429
- timeout=timeout,
430
- command=command,
431
- )
432
-
433
- raise remote_module_error
533
+ result = serialized.parse_command_result(
534
+ raw_result,
535
+ result_tag=result_tag,
536
+ workspace=self.cluster.workspace
537
+ )
434
538
 
435
539
  return result
436
540
 
@@ -438,152 +542,256 @@ print(json.dumps(meta))"""
438
542
  self,
439
543
  command: str,
440
544
  *,
441
- timeout: Optional[dt.timedelta] = dt.timedelta(minutes=20),
442
- result_tag: Optional[str] = None,
443
- print_stdout: Optional[bool] = True,
545
+ language: Optional[Language] = None,
546
+ wait: Optional[WaitingConfigArg] = True,
547
+ raise_error: bool = True
444
548
  ) -> str:
445
549
  """Execute a command in this context and return decoded output.
446
550
 
447
551
  Args:
448
552
  command: The command string to execute.
449
- timeout: Optional timeout for execution.
450
- result_tag: Optional tag to extract a specific result segment.
451
- print_stdout: Whether to print stdout for tagged output.
553
+ language: Language to execute
554
+ wait: Optional timeout for execution.
555
+ raise_error: Raises error if failed
452
556
 
453
557
  Returns:
454
558
  The decoded command output string.
455
559
  """
456
- self.connect()
457
-
458
- client = self._workspace_client()
459
- result = client.command_execution.execute_and_wait(
460
- cluster_id=self.cluster.cluster_id,
461
- context_id=self.context_id,
462
- language=self.language,
560
+ return self.command(
463
561
  command=command,
464
- timeout=timeout or dt.timedelta(minutes=20)
465
- )
466
-
467
- try:
468
- return self._decode_result(result, result_tag=result_tag, print_stdout=print_stdout)
469
- except ModuleNotFoundError as remote_module_error:
470
- _MOD_NOT_FOUND_RE = re.compile(r"No module named ['\"]([^'\"]+)['\"]")
471
- module_name = _MOD_NOT_FOUND_RE.search(str(remote_module_error))
472
- module_name = module_name.group(1) if module_name else None
473
- module_name = module_name.split(".")[0]
474
-
475
- if module_name and "yggdrasil" not in module_name:
476
- LOGGER.debug(
477
- "Installing missing module %s from local environment",
478
- module_name,
479
- )
480
-
481
- self.install_temporary_libraries(
482
- libraries=[module_name],
483
- )
484
-
485
- LOGGER.warning(
486
- "Installed missing module %s from local environment",
487
- module_name,
488
- )
489
-
490
- return self.execute_command(
491
- command=command,
492
- timeout=timeout,
493
- result_tag=result_tag,
494
- print_stdout=print_stdout
495
- )
496
-
497
- raise remote_module_error
562
+ language=language,
563
+ ).wait(wait=wait, raise_error=raise_error)
498
564
 
499
565
  # ------------------------------------------------------------------
500
566
  # generic local → remote uploader, via remote python
501
567
  # ------------------------------------------------------------------
502
- def upload_local_path(self, local_path: str, remote_path: str) -> None:
503
- """
504
- Generic uploader.
505
-
506
- - If local_path is a file:
507
- remote_path is the *file* path on remote.
508
- - If local_path is a directory:
509
- remote_path is the *directory root* on remote; the directory
510
- contents are mirrored under it.
511
- Args:
512
- local_path: Local file or directory to upload.
513
- remote_path: Target path on the remote cluster.
514
-
515
- Returns:
516
- None.
568
+ def upload_local_path(
569
+ self,
570
+ paths: Union[Iterable[Tuple[LocalSpec, str]], Dict[LocalSpec, str]],
571
+ byte_limit: int = 64 * 1024
572
+ ) -> None:
517
573
  """
518
- local_path = os.path.abspath(local_path)
519
- if not os.path.exists(local_path):
520
- raise FileNotFoundError(f"Local path not found: {local_path}")
521
-
522
- # normalize to POSIX for remote (Linux)
523
- remote_path = remote_path.replace("\\", "/")
524
-
525
- if os.path.isfile(local_path):
526
- # ---------- single file ----------
527
- with open(local_path, "rb") as f:
528
- data_b64 = base64.b64encode(f.read()).decode("ascii")
574
+ One-shot uploader. Sends exactly ONE remote command.
529
575
 
530
- cmd = f"""import base64, os
576
+ paths: dict[local_spec -> remote_target]
531
577
 
532
- remote_file = {remote_path!r}
533
- data_b64 = {data_b64!r}
534
-
535
- os.makedirs(os.path.dirname(remote_file), exist_ok=True)
536
- with open(remote_file, "wb") as f:
537
- f.write(base64.b64decode(data_b64))
538
- """
539
-
540
- self.execute_command(command=cmd, print_stdout=False)
541
- return
578
+ local_spec can be:
579
+ - str | PathLike: local file or directory
580
+ - bytes/bytearray/memoryview: raw content (remote_target must be a file path)
581
+ - BytesSource(name, data): raw content with a name
582
+ - (name, bytes-like): raw content with a name
542
583
 
543
- # ---------- directory ----------
584
+ remote_target:
585
+ - if local_spec is file: full remote file path
586
+ - if local_spec is dir: remote directory root
587
+ - if local_spec is bytes: full remote file path
588
+ """
589
+ if isinstance(paths, dict):
590
+ paths = paths.items()
591
+
592
+ def _to_bytes(x: BytesLike) -> bytes:
593
+ if isinstance(x, bytes):
594
+ return x
595
+ if isinstance(x, bytearray):
596
+ return bytes(x)
597
+ if isinstance(x, memoryview):
598
+ return x.tobytes()
599
+ elif isinstance(x, io.BytesIO):
600
+ return x.getvalue()
601
+ raise TypeError(f"Unsupported bytes-like: {type(x)!r}")
602
+
603
+ # normalize + validate + build a unified "work list"
604
+ work: list[dict[str, Any]] = []
605
+ for local_spec, remote in paths:
606
+ if not isinstance(remote, str) or not remote:
607
+ raise TypeError("remote_target must be a non-empty string")
608
+
609
+ remote_posix = remote.replace("\\", "/")
610
+
611
+ # --- bytes payloads ---
612
+ if isinstance(local_spec, BytesSource):
613
+ work.append({
614
+ "kind": "bytes",
615
+ "name": local_spec.name,
616
+ "data": local_spec.data,
617
+ "remote": remote_posix,
618
+ })
619
+ continue
620
+
621
+ if isinstance(local_spec, tuple) and len(local_spec) == 2 and isinstance(local_spec[0], str):
622
+ name, data = local_spec
623
+ work.append({
624
+ "kind": "bytes",
625
+ "name": name,
626
+ "data": _to_bytes(data),
627
+ "remote": remote_posix,
628
+ })
629
+ continue
630
+
631
+ if isinstance(local_spec, (bytes, bytearray, memoryview, io.BytesIO)):
632
+ work.append({
633
+ "kind": "bytes",
634
+ "name": "blob",
635
+ "data": _to_bytes(local_spec),
636
+ "remote": remote_posix,
637
+ })
638
+ continue
639
+
640
+ # --- filesystem payloads ---
641
+ if isinstance(local_spec, os.PathLike):
642
+ local_spec = os.fspath(local_spec)
643
+
644
+ if isinstance(local_spec, str):
645
+ local_abs = os.path.abspath(local_spec)
646
+ if not os.path.exists(local_abs):
647
+ raise FileNotFoundError(f"Local path not found: {local_spec}")
648
+
649
+ if os.path.isfile(local_abs):
650
+ work.append({
651
+ "kind": "file",
652
+ "local": local_abs,
653
+ "remote": remote_posix,
654
+ })
655
+ else:
656
+ work.append({
657
+ "kind": "dir",
658
+ "local": local_abs,
659
+ "remote_root": remote_posix.rstrip("/"),
660
+ "top": os.path.basename(local_abs.rstrip(os.sep)) or "dir",
661
+ })
662
+ continue
663
+
664
+ raise TypeError(f"Unsupported local_spec type: {type(local_spec)!r}")
665
+
666
+ # build one zip containing all content
667
+ manifest: list[dict[str, Any]] = []
544
668
  buf = io.BytesIO()
545
- local_root = local_path
669
+ with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf:
670
+ for idx, item in enumerate(work):
671
+ kind = item["kind"]
672
+
673
+ if kind == "bytes":
674
+ zip_name = f"BYTES/{idx}"
675
+ zf.writestr(zip_name, item["data"])
676
+ manifest.append({
677
+ "kind": "bytes",
678
+ "zip": zip_name,
679
+ "remote": item["remote"],
680
+ })
681
+
682
+ elif kind == "file":
683
+ zip_name = f"FILE/{idx}"
684
+ zf.write(item["local"], arcname=zip_name)
685
+ manifest.append({
686
+ "kind": "file",
687
+ "zip": zip_name,
688
+ "remote": item["remote"],
689
+ })
690
+
691
+ elif kind == "dir":
692
+ local_root = item["local"]
693
+ top = item["top"]
694
+ prefix = f"DIR/{idx}/{top}"
695
+
696
+ for root, dirs, files in os.walk(local_root):
697
+ dirs[:] = [d for d in dirs if d != "__pycache__"]
698
+
699
+ rel_root = os.path.relpath(root, local_root)
700
+ rel_root = "" if rel_root == "." else rel_root
701
+
702
+ for name in files:
703
+ if name.endswith((".pyc", ".pyo")):
704
+ continue
705
+ full = os.path.join(root, name)
706
+ rel_path = os.path.join(rel_root, name) if rel_root else name
707
+ zip_name = f"{prefix}/{rel_path}".replace("\\", "/")
708
+ zf.write(full, arcname=zip_name)
709
+
710
+ manifest.append({
711
+ "kind": "dir",
712
+ "zip_prefix": f"{prefix}/",
713
+ "remote_root": item["remote_root"],
714
+ })
715
+
716
+ else:
717
+ raise ValueError(f"Unknown kind in work list: {kind}")
718
+
719
+ raw = buf.getvalue()
720
+
721
+ # optional zlib on top of zip
722
+ algo = "none"
723
+ payload = raw
724
+ if len(raw) > byte_limit:
725
+ import zlib
726
+ compressed = zlib.compress(raw, level=9)
727
+ if len(compressed) < int(len(raw) * 0.95):
728
+ algo = "zlib"
729
+ payload = compressed
730
+
731
+ packed = b"ALG:" + algo.encode("ascii") + b"\n" + payload
732
+ data_b64 = base64.b64encode(packed).decode("ascii")
733
+
734
+ cmd = f"""import base64, io, os, zipfile, zlib
735
+
736
+ packed_b64 = {data_b64!r}
737
+ manifest = {manifest!r}
738
+
739
+ packed = base64.b64decode(packed_b64)
740
+ nl = packed.find(b"\\n")
741
+ if nl == -1 or not packed.startswith(b"ALG:"):
742
+ raise ValueError("Bad payload header")
743
+
744
+ algo = packed[4:nl].decode("ascii")
745
+ payload = packed[nl+1:]
746
+
747
+ if algo == "none":
748
+ raw = payload
749
+ elif algo == "zlib":
750
+ raw = zlib.decompress(payload)
751
+ else:
752
+ raise ValueError(f"Unknown compression algo: {{algo}}")
753
+
754
+ buf = io.BytesIO(raw)
755
+ with zipfile.ZipFile(buf, "r") as zf:
756
+ names = set(zf.namelist())
546
757
 
547
- # zip local folder into memory
548
- with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
549
- for root, dirs, files in os.walk(local_root):
550
- # skip __pycache__
551
- dirs[:] = [d for d in dirs if d != "__pycache__"]
758
+ for item in manifest:
759
+ kind = item["kind"]
552
760
 
553
- rel_root = os.path.relpath(root, local_root)
554
- if rel_root == ".":
555
- rel_root = ""
556
- for name in files:
557
- if name.endswith((".pyc", ".pyo")):
558
- continue
559
- full = os.path.join(root, name)
560
- arcname = os.path.join(rel_root, name) if rel_root else name
561
- zf.write(full, arcname=arcname)
761
+ if kind in ("file", "bytes"):
762
+ zip_name = item["zip"]
763
+ remote_file = item["remote"]
764
+ if zip_name not in names:
765
+ raise FileNotFoundError(f"Missing in zip: {{zip_name}}")
562
766
 
563
- data_b64 = base64.b64encode(buf.getvalue()).decode("ascii")
767
+ parent = os.path.dirname(remote_file)
768
+ if parent:
769
+ os.makedirs(parent, exist_ok=True)
564
770
 
565
- cmd = f"""import base64, io, os, zipfile
771
+ with zf.open(zip_name, "r") as src, open(remote_file, "wb") as dst:
772
+ dst.write(src.read())
566
773
 
567
- remote_root = {remote_path!r}
568
- data_b64 = {data_b64!r}
774
+ elif kind == "dir":
775
+ prefix = item["zip_prefix"]
776
+ remote_root = item["remote_root"]
777
+ os.makedirs(remote_root, exist_ok=True)
569
778
 
570
- os.makedirs(remote_root, exist_ok=True)
779
+ for n in names:
780
+ if not n.startswith(prefix):
781
+ continue
782
+ rel = n[len(prefix):]
783
+ if not rel or rel.endswith("/"):
784
+ continue
571
785
 
572
- buf = io.BytesIO(base64.b64decode(data_b64))
573
- with zipfile.ZipFile(buf, "r") as zf:
574
- for member in zf.infolist():
575
- rel_name = member.filename
576
- target_path = os.path.join(remote_root, rel_name)
786
+ target = os.path.join(remote_root, rel)
787
+ os.makedirs(os.path.dirname(target), exist_ok=True)
788
+ with zf.open(n, "r") as src, open(target, "wb") as dst:
789
+ dst.write(src.read())
577
790
 
578
- if member.is_dir() or rel_name.endswith("/"):
579
- os.makedirs(target_path, exist_ok=True)
580
791
  else:
581
- os.makedirs(os.path.dirname(target_path), exist_ok=True)
582
- with zf.open(member, "r") as src, open(target_path, "wb") as dst:
583
- dst.write(src.read())
792
+ raise ValueError(f"Unknown manifest kind: {{kind}}")
584
793
  """
585
-
586
- self.execute_command(command=cmd, print_stdout=False)
794
+ self.execute_command(command=cmd)
587
795
 
588
796
  # ------------------------------------------------------------------
589
797
  # upload local lib into remote site-packages
@@ -591,7 +799,6 @@ with zipfile.ZipFile(buf, "r") as zf:
591
799
  def install_temporary_libraries(
592
800
  self,
593
801
  libraries: str | ModuleType | List[str | ModuleType],
594
- with_dependencies: bool = True
595
802
  ) -> Union[str, ModuleType, List[str | ModuleType]]:
596
803
  """
597
804
  Upload a local Python lib/module into the remote cluster's
@@ -604,7 +811,6 @@ with zipfile.ZipFile(buf, "r") as zf:
604
811
  - module object (e.g. import ygg; workspace.upload_local_lib(ygg))
605
812
  Args:
606
813
  libraries: Library path, name, module, or iterable of these.
607
- with_dependencies: Whether to include dependencies (unused).
608
814
 
609
815
  Returns:
610
816
  The resolved library or list of libraries uploaded.
@@ -615,6 +821,13 @@ with zipfile.ZipFile(buf, "r") as zf:
615
821
  ]
616
822
 
617
823
  resolved = resolve_local_lib_path(libraries)
824
+
825
+ LOGGER.debug(
826
+ "Installing temporary lib '%s' in %s",
827
+ resolved,
828
+ self
829
+ )
830
+
618
831
  str_resolved = str(resolved)
619
832
  existing = self._uploaded_package_roots.get(str_resolved)
620
833
 
@@ -628,57 +841,53 @@ with zipfile.ZipFile(buf, "r") as zf:
628
841
  # site-packages/<module_file>
629
842
  remote_target = posixpath.join(remote_site_packages_path, resolved.name)
630
843
 
631
- self.upload_local_path(resolved, remote_target)
844
+ self.upload_local_path({
845
+ str_resolved: remote_target
846
+ })
632
847
 
633
848
  self._uploaded_package_roots[str_resolved] = remote_target
634
849
 
850
+ LOGGER.info(
851
+ "Installed temporary lib '%s' in %s",
852
+ resolved,
853
+ self
854
+ )
855
+
635
856
  return libraries
636
857
 
637
- def _decode_result(
638
- self,
639
- result: Any,
640
- *,
641
- result_tag: Optional[str],
642
- print_stdout: Optional[bool] = True
643
- ) -> str:
644
- """Mirror the old Cluster.execute_command result handling.
645
858
 
646
- Args:
647
- result: Raw command execution response.
648
- result_tag: Optional tag to extract a segment from output.
649
- print_stdout: Whether to print stdout when using tags.
859
+ def _decode_result(
860
+ result: CommandStatusResponse,
861
+ language: Language
862
+ ) -> str:
863
+ """Mirror the old Cluster.execute_command result handling.
650
864
 
651
- Returns:
652
- The decoded output string.
653
- """
654
- if not getattr(result, "results", None):
655
- raise RuntimeError("Command execution returned no results")
865
+ Args:
866
+ result: Raw command execution response.
656
867
 
657
- res = result.results
868
+ Returns:
869
+ The decoded output string.
870
+ """
871
+ res = result.results
658
872
 
659
- # error handling
660
- if res.result_type == ResultType.ERROR:
661
- message = res.cause or "Command execution failed"
873
+ # error handling
874
+ if res.result_type == ResultType.ERROR:
875
+ message = res.cause or "Command execution failed"
662
876
 
663
- if self.language == Language.PYTHON:
664
- raise_parsed_traceback(message)
877
+ if "client terminated the session" in message:
878
+ raise ClientTerminatedSession(message)
665
879
 
666
- remote_tb = (
667
- getattr(res, "data", None)
668
- or getattr(res, "stack_trace", None)
669
- or getattr(res, "traceback", None)
670
- )
671
- if remote_tb:
672
- message = f"{message}\n{remote_tb}"
880
+ if language == Language.PYTHON:
881
+ raise_parsed_traceback(message)
673
882
 
674
- raise RuntimeError(message)
883
+ raise RuntimeError(message)
675
884
 
676
- # normal output
677
- if res.result_type == ResultType.TEXT:
678
- output = getattr(res, "data", "") or ""
679
- elif getattr(res, "data", None) is not None:
680
- output = str(res.data)
681
- else:
682
- output = ""
885
+ # normal output
886
+ if res.result_type == ResultType.TEXT:
887
+ output = res.data or ""
888
+ elif res.data is not None:
889
+ output = str(res.data)
890
+ else:
891
+ output = ""
683
892
 
684
- return output
893
+ return output