ygg 0.1.56__py3-none-any.whl → 0.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
- ygg-0.1.60.dist-info/RECORD +74 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/WHEEL +1 -1
- yggdrasil/ai/__init__.py +2 -0
- yggdrasil/ai/session.py +89 -0
- yggdrasil/ai/sql_session.py +310 -0
- yggdrasil/databricks/__init__.py +0 -3
- yggdrasil/databricks/compute/cluster.py +68 -113
- yggdrasil/databricks/compute/command_execution.py +674 -0
- yggdrasil/databricks/compute/exceptions.py +7 -2
- yggdrasil/databricks/compute/execution_context.py +465 -277
- yggdrasil/databricks/compute/remote.py +4 -14
- yggdrasil/databricks/exceptions.py +10 -0
- yggdrasil/databricks/sql/__init__.py +0 -4
- yggdrasil/databricks/sql/engine.py +161 -173
- yggdrasil/databricks/sql/exceptions.py +9 -1
- yggdrasil/databricks/sql/statement_result.py +108 -120
- yggdrasil/databricks/sql/warehouse.py +331 -92
- yggdrasil/databricks/workspaces/io.py +92 -9
- yggdrasil/databricks/workspaces/path.py +120 -74
- yggdrasil/databricks/workspaces/workspace.py +212 -68
- yggdrasil/libs/databrickslib.py +23 -18
- yggdrasil/libs/extensions/spark_extensions.py +1 -1
- yggdrasil/libs/pandaslib.py +15 -6
- yggdrasil/libs/polarslib.py +49 -13
- yggdrasil/pyutils/__init__.py +1 -0
- yggdrasil/pyutils/callable_serde.py +12 -19
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/mimetypes.py +0 -0
- yggdrasil/pyutils/python_env.py +13 -12
- yggdrasil/pyutils/waiting_config.py +171 -0
- yggdrasil/types/cast/arrow_cast.py +3 -0
- yggdrasil/types/cast/pandas_cast.py +157 -169
- yggdrasil/types/cast/polars_cast.py +11 -43
- yggdrasil/types/dummy_class.py +81 -0
- yggdrasil/version.py +1 -1
- ygg-0.1.56.dist-info/RECORD +0 -68
- yggdrasil/databricks/ai/__init__.py +0 -1
- yggdrasil/databricks/ai/loki.py +0 -374
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
|
@@ -497,9 +497,8 @@ class CallableSerde:
|
|
|
497
497
|
kwargs: Optional[Dict[str, Any]] = None,
|
|
498
498
|
*,
|
|
499
499
|
result_tag: str = "__CALLABLE_SERDE_RESULT__",
|
|
500
|
-
prefer: str = "dill",
|
|
501
500
|
byte_limit: int = 64 * 1024,
|
|
502
|
-
dump_env: str = "none",
|
|
501
|
+
dump_env: str = "none", # "none" | "globals" | "closure" | "both"
|
|
503
502
|
filter_used_globals: bool = True,
|
|
504
503
|
env_keys: Optional[Iterable[str]] = None,
|
|
505
504
|
env_variables: Optional[Dict[str, str]] = None,
|
|
@@ -697,26 +696,18 @@ sys.stdout.flush()
|
|
|
697
696
|
string_result.replace("DBXPATH:", "")
|
|
698
697
|
)
|
|
699
698
|
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
with path.open(mode="rb") as f:
|
|
704
|
-
buf = io.BytesIO(f.read_all_bytes())
|
|
705
|
-
|
|
699
|
+
try:
|
|
700
|
+
df = path.read_pandas()
|
|
701
|
+
finally:
|
|
706
702
|
path.rmfile()
|
|
707
|
-
buf.seek(0)
|
|
708
|
-
return pandas.read_parquet(buf)
|
|
709
703
|
|
|
710
|
-
|
|
711
|
-
blob = f.read_all_bytes()
|
|
704
|
+
return df
|
|
712
705
|
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
except (UnicodeEncodeError, binascii.Error) as e:
|
|
719
|
-
raise ValueError("Invalid base64 payload after result tag (corrupted/contaminated).") from e
|
|
706
|
+
# Strict base64 decode (rejects junk chars)
|
|
707
|
+
try:
|
|
708
|
+
blob = base64.b64decode(string_result.encode("ascii"), validate=True)
|
|
709
|
+
except (UnicodeEncodeError, binascii.Error) as e:
|
|
710
|
+
raise ValueError("Invalid base64 payload after result tag (corrupted/contaminated).") from e
|
|
720
711
|
|
|
721
712
|
raw = _decode_result_blob(blob)
|
|
722
713
|
try:
|
|
@@ -725,3 +716,5 @@ sys.stdout.flush()
|
|
|
725
716
|
raise ValueError("Failed to dill.loads decoded payload") from e
|
|
726
717
|
|
|
727
718
|
return result
|
|
719
|
+
|
|
720
|
+
|
yggdrasil/pyutils/exceptions.py
CHANGED
|
@@ -86,6 +86,19 @@ def parse_exception_from_traceback(tb_text: str) -> ParsedException:
|
|
|
86
86
|
return ParsedException(RuntimeError, clean, "RuntimeError")
|
|
87
87
|
|
|
88
88
|
|
|
89
|
+
def missing_module_name(exc: BaseException) -> str | None:
|
|
90
|
+
if isinstance(exc, ModuleNotFoundError):
|
|
91
|
+
if getattr(exc, "name", None):
|
|
92
|
+
return exc.name
|
|
93
|
+
|
|
94
|
+
# fallback: parse from message/args
|
|
95
|
+
msg = exc.args[0] if exc.args else str(exc)
|
|
96
|
+
m = re.search(r"No module named ['\"]([^'\"]+)['\"]", msg)
|
|
97
|
+
return m.group(1) if m else None
|
|
98
|
+
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
|
|
89
102
|
def raise_parsed_traceback(tb_text: str, *, attach_as_cause: bool = True) -> None:
|
|
90
103
|
"""
|
|
91
104
|
Infer exception from traceback text and raise it.
|
|
@@ -94,6 +107,9 @@ def raise_parsed_traceback(tb_text: str, *, attach_as_cause: bool = True) -> Non
|
|
|
94
107
|
parsed = parse_exception_from_traceback(tb_text)
|
|
95
108
|
exc = parsed.exc_type(parsed.message) if parsed.message else parsed.exc_type()
|
|
96
109
|
|
|
110
|
+
if isinstance(exc, ModuleNotFoundError):
|
|
111
|
+
exc.name = missing_module_name(exc)
|
|
112
|
+
|
|
97
113
|
if attach_as_cause:
|
|
98
114
|
raise exc from RemoteTraceback(tb_text)
|
|
99
115
|
raise exc
|
|
File without changes
|
yggdrasil/pyutils/python_env.py
CHANGED
|
@@ -441,19 +441,20 @@ class PythonEnv:
|
|
|
441
441
|
Returns:
|
|
442
442
|
PythonEnv representing the current environment.
|
|
443
443
|
"""
|
|
444
|
-
|
|
445
|
-
if venv:
|
|
446
|
-
log.debug("current env from VIRTUAL_ENV=%s", venv)
|
|
447
|
-
return cls(Path(venv))
|
|
444
|
+
global CURRENT_PYTHON_ENV
|
|
448
445
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
log.debug("current env inferred from sys.executable=%s", str(exe))
|
|
453
|
-
return cls(parent.parent)
|
|
446
|
+
if CURRENT_PYTHON_ENV is None:
|
|
447
|
+
exe = Path(sys.executable).expanduser().resolve()
|
|
448
|
+
parent = exe.parent
|
|
454
449
|
|
|
455
|
-
|
|
456
|
-
|
|
450
|
+
if parent.name in ("bin", "Scripts"):
|
|
451
|
+
log.debug("current env inferred from sys.executable=%s", str(exe))
|
|
452
|
+
CURRENT_PYTHON_ENV = cls(parent.parent)
|
|
453
|
+
else:
|
|
454
|
+
log.debug("current env fallback to sys.prefix=%s", sys.prefix)
|
|
455
|
+
CURRENT_PYTHON_ENV = cls(Path(sys.prefix))
|
|
456
|
+
|
|
457
|
+
return CURRENT_PYTHON_ENV
|
|
457
458
|
|
|
458
459
|
@classmethod
|
|
459
460
|
def ensure_uv(
|
|
@@ -1510,7 +1511,7 @@ print("RESULT:" + json.dumps(top_level))""".strip()
|
|
|
1510
1511
|
|
|
1511
1512
|
|
|
1512
1513
|
# Snapshot singleton (import-time)
|
|
1513
|
-
CURRENT_PYTHON_ENV: PythonEnv =
|
|
1514
|
+
CURRENT_PYTHON_ENV: PythonEnv = None
|
|
1514
1515
|
|
|
1515
1516
|
|
|
1516
1517
|
if __name__ == "__main__":
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import time
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Optional, Union
|
|
5
|
+
|
|
6
|
+
__all__ = ["WaitingConfig", "WaitingConfigArg"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _safe_seconds_tick(ticks: Union[int, float, dt.timedelta]):
|
|
10
|
+
if isinstance(ticks, dt.timedelta):
|
|
11
|
+
return ticks.total_seconds()
|
|
12
|
+
return ticks
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
DEFAULT_TIMEOUT_TICKS = float(20 * 60) # 20 minutes
|
|
16
|
+
WaitingConfigArg = Union["WaitingConfig", dict, int, float, dt.datetime, bool]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class WaitingConfig:
|
|
21
|
+
timeout: float = DEFAULT_TIMEOUT_TICKS
|
|
22
|
+
interval: float = 2.0
|
|
23
|
+
backoff: float = 1.0
|
|
24
|
+
max_interval: float = 10.0
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def timeout_timedelta(self) -> dt.timedelta:
|
|
28
|
+
return dt.timedelta(seconds=self.timeout)
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def default(cls):
|
|
32
|
+
return DEFAULT_WAITING_CONFIG
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def _to_seconds(value) -> Optional[float]:
|
|
36
|
+
if value is None:
|
|
37
|
+
return None
|
|
38
|
+
if isinstance(value, dt.timedelta):
|
|
39
|
+
return float(value.total_seconds())
|
|
40
|
+
if isinstance(value, (int, float)):
|
|
41
|
+
return float(value)
|
|
42
|
+
raise TypeError(f"Expected seconds as int/float/timedelta, got {type(value)!r}")
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def _deadline_to_timeout(deadline: dt.datetime) -> float:
|
|
46
|
+
if not isinstance(deadline, dt.datetime):
|
|
47
|
+
raise TypeError(f"deadline must be datetime, got {type(deadline)!r}")
|
|
48
|
+
now = dt.datetime.now(tz=deadline.tzinfo) if deadline.tzinfo else dt.datetime.now()
|
|
49
|
+
return (deadline - now).total_seconds()
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def check_arg(
|
|
53
|
+
cls,
|
|
54
|
+
arg: Optional[WaitingConfigArg] = None,
|
|
55
|
+
timeout: Optional[Union[int, float, dt.timedelta]] = None,
|
|
56
|
+
interval: Optional[Union[int, float, dt.timedelta]] = None,
|
|
57
|
+
backoff: Optional[Union[int, float, dt.timedelta]] = None,
|
|
58
|
+
max_interval: Optional[Union[int, float, dt.timedelta]] = None,
|
|
59
|
+
) -> Optional["WaitingConfig"]:
|
|
60
|
+
base_timeout: Optional[float] = None
|
|
61
|
+
base_interval: Optional[float] = None
|
|
62
|
+
base_backoff: Optional[float] = None
|
|
63
|
+
base_max_interval: Optional[float] = None
|
|
64
|
+
|
|
65
|
+
if arg is not None:
|
|
66
|
+
if isinstance(arg, cls):
|
|
67
|
+
if timeout is None and interval is None and backoff is None and max_interval is None:
|
|
68
|
+
return arg
|
|
69
|
+
|
|
70
|
+
base_timeout = arg.timeout
|
|
71
|
+
base_interval = arg.interval
|
|
72
|
+
base_backoff = arg.backoff
|
|
73
|
+
base_max_interval = arg.max_interval
|
|
74
|
+
|
|
75
|
+
elif isinstance(arg, bool):
|
|
76
|
+
base_timeout = DEFAULT_TIMEOUT_TICKS if arg else 0.0
|
|
77
|
+
base_interval = 2.0
|
|
78
|
+
base_backoff = 2.0
|
|
79
|
+
base_max_interval = 15.0
|
|
80
|
+
|
|
81
|
+
elif isinstance(arg, (int, float, dt.timedelta)):
|
|
82
|
+
base_timeout = cls._to_seconds(arg)
|
|
83
|
+
|
|
84
|
+
elif isinstance(arg, dt.datetime):
|
|
85
|
+
base_timeout = float(cls._deadline_to_timeout(arg))
|
|
86
|
+
|
|
87
|
+
elif isinstance(arg, dict):
|
|
88
|
+
if "deadline" in arg and "timeout" in arg:
|
|
89
|
+
raise ValueError("Provide only one of 'deadline' or 'timeout' in WaitingOptions dict.")
|
|
90
|
+
|
|
91
|
+
if "deadline" in arg and arg["deadline"] is not None:
|
|
92
|
+
base_timeout = float(cls._deadline_to_timeout(arg["deadline"]))
|
|
93
|
+
else:
|
|
94
|
+
base_timeout = cls._to_seconds(arg.get("timeout"))
|
|
95
|
+
|
|
96
|
+
base_interval = cls._to_seconds(arg.get("interval"))
|
|
97
|
+
base_backoff = cls._to_seconds(arg.get("backoff"))
|
|
98
|
+
base_max_interval = cls._to_seconds(arg.get("max_interval"))
|
|
99
|
+
|
|
100
|
+
else:
|
|
101
|
+
raise TypeError(f"Unsupported WaitingOptions arg type: {type(arg)!r}")
|
|
102
|
+
|
|
103
|
+
# explicit kwargs win
|
|
104
|
+
final_timeout = cls._to_seconds(timeout) if timeout is not None else base_timeout
|
|
105
|
+
final_interval = cls._to_seconds(interval) if interval is not None else base_interval
|
|
106
|
+
final_backoff = cls._to_seconds(backoff) if backoff is not None else base_backoff
|
|
107
|
+
final_max_interval = cls._to_seconds(max_interval) if max_interval is not None else base_max_interval
|
|
108
|
+
|
|
109
|
+
# defaults to match non-Optional signature
|
|
110
|
+
if final_timeout is None:
|
|
111
|
+
final_timeout = 0.0
|
|
112
|
+
elif final_timeout < 0:
|
|
113
|
+
final_timeout = 0.0
|
|
114
|
+
|
|
115
|
+
if final_interval is None:
|
|
116
|
+
final_interval = 2.0
|
|
117
|
+
|
|
118
|
+
if final_backoff is None:
|
|
119
|
+
final_backoff = 2.0
|
|
120
|
+
elif final_backoff < 1:
|
|
121
|
+
final_backoff = 2.0
|
|
122
|
+
|
|
123
|
+
if final_max_interval is None:
|
|
124
|
+
final_max_interval = 10.0
|
|
125
|
+
|
|
126
|
+
return cls(
|
|
127
|
+
timeout=float(final_timeout),
|
|
128
|
+
interval=float(final_interval),
|
|
129
|
+
backoff=float(final_backoff),
|
|
130
|
+
max_interval=float(final_max_interval),
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def sleep(self, iteration: int, start: float | None = None) -> None:
|
|
134
|
+
"""
|
|
135
|
+
iteration is 0-based (first wait => iteration=0)
|
|
136
|
+
|
|
137
|
+
- interval == 0 => no sleep
|
|
138
|
+
- backoff >= 1 => interval * backoff**iteration
|
|
139
|
+
- max_interval == 0 => no cap, else cap sleep to max_interval
|
|
140
|
+
- if start is provided and timeout > 0:
|
|
141
|
+
* raise TimeoutError if already out of time
|
|
142
|
+
* cap sleep so we don't oversleep past timeout
|
|
143
|
+
"""
|
|
144
|
+
if iteration < 0:
|
|
145
|
+
raise ValueError(f"iteration must be >= 0, got {iteration}")
|
|
146
|
+
|
|
147
|
+
if self.interval == 0:
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
sleep_s = self.interval * (self.backoff ** int(iteration))
|
|
151
|
+
|
|
152
|
+
if self.max_interval > 0:
|
|
153
|
+
sleep_s = min(sleep_s, self.max_interval)
|
|
154
|
+
|
|
155
|
+
if sleep_s <= 0:
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
if start is not None and self.timeout > 0:
|
|
159
|
+
elapsed = time.time() - float(start)
|
|
160
|
+
remaining = self.timeout - elapsed
|
|
161
|
+
if remaining <= 0:
|
|
162
|
+
raise TimeoutError(f"Timed out waiting after {self.timeout:.3f}s")
|
|
163
|
+
sleep_s = min(sleep_s, remaining)
|
|
164
|
+
|
|
165
|
+
if sleep_s <= 0:
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
time.sleep(sleep_s)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
DEFAULT_WAITING_CONFIG = WaitingConfig()
|