ygg 0.1.56__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
  2. ygg-0.1.60.dist-info/RECORD +74 -0
  3. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/WHEEL +1 -1
  4. yggdrasil/ai/__init__.py +2 -0
  5. yggdrasil/ai/session.py +89 -0
  6. yggdrasil/ai/sql_session.py +310 -0
  7. yggdrasil/databricks/__init__.py +0 -3
  8. yggdrasil/databricks/compute/cluster.py +68 -113
  9. yggdrasil/databricks/compute/command_execution.py +674 -0
  10. yggdrasil/databricks/compute/exceptions.py +7 -2
  11. yggdrasil/databricks/compute/execution_context.py +465 -277
  12. yggdrasil/databricks/compute/remote.py +4 -14
  13. yggdrasil/databricks/exceptions.py +10 -0
  14. yggdrasil/databricks/sql/__init__.py +0 -4
  15. yggdrasil/databricks/sql/engine.py +161 -173
  16. yggdrasil/databricks/sql/exceptions.py +9 -1
  17. yggdrasil/databricks/sql/statement_result.py +108 -120
  18. yggdrasil/databricks/sql/warehouse.py +331 -92
  19. yggdrasil/databricks/workspaces/io.py +92 -9
  20. yggdrasil/databricks/workspaces/path.py +120 -74
  21. yggdrasil/databricks/workspaces/workspace.py +212 -68
  22. yggdrasil/libs/databrickslib.py +23 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -0
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/mimetypes.py +0 -0
  30. yggdrasil/pyutils/python_env.py +13 -12
  31. yggdrasil/pyutils/waiting_config.py +171 -0
  32. yggdrasil/types/cast/arrow_cast.py +3 -0
  33. yggdrasil/types/cast/pandas_cast.py +157 -169
  34. yggdrasil/types/cast/polars_cast.py +11 -43
  35. yggdrasil/types/dummy_class.py +81 -0
  36. yggdrasil/version.py +1 -1
  37. ygg-0.1.56.dist-info/RECORD +0 -68
  38. yggdrasil/databricks/ai/__init__.py +0 -1
  39. yggdrasil/databricks/ai/loki.py +0 -374
  40. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
  41. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
  42. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
@@ -497,9 +497,8 @@ class CallableSerde:
497
497
  kwargs: Optional[Dict[str, Any]] = None,
498
498
  *,
499
499
  result_tag: str = "__CALLABLE_SERDE_RESULT__",
500
- prefer: str = "dill",
501
500
  byte_limit: int = 64 * 1024,
502
- dump_env: str = "none", # "none" | "globals" | "closure" | "both"
501
+ dump_env: str = "none", # "none" | "globals" | "closure" | "both"
503
502
  filter_used_globals: bool = True,
504
503
  env_keys: Optional[Iterable[str]] = None,
505
504
  env_variables: Optional[Dict[str, str]] = None,
@@ -697,26 +696,18 @@ sys.stdout.flush()
697
696
  string_result.replace("DBXPATH:", "")
698
697
  )
699
698
 
700
- if path.name.endswith(".parquet"):
701
- import pandas
702
-
703
- with path.open(mode="rb") as f:
704
- buf = io.BytesIO(f.read_all_bytes())
705
-
699
+ try:
700
+ df = path.read_pandas()
701
+ finally:
706
702
  path.rmfile()
707
- buf.seek(0)
708
- return pandas.read_parquet(buf)
709
703
 
710
- with path.open(mode="rb") as f:
711
- blob = f.read_all_bytes()
704
+ return df
712
705
 
713
- path.rmfile()
714
- else:
715
- # Strict base64 decode (rejects junk chars)
716
- try:
717
- blob = base64.b64decode(string_result.encode("ascii"), validate=True)
718
- except (UnicodeEncodeError, binascii.Error) as e:
719
- raise ValueError("Invalid base64 payload after result tag (corrupted/contaminated).") from e
706
+ # Strict base64 decode (rejects junk chars)
707
+ try:
708
+ blob = base64.b64decode(string_result.encode("ascii"), validate=True)
709
+ except (UnicodeEncodeError, binascii.Error) as e:
710
+ raise ValueError("Invalid base64 payload after result tag (corrupted/contaminated).") from e
720
711
 
721
712
  raw = _decode_result_blob(blob)
722
713
  try:
@@ -725,3 +716,5 @@ sys.stdout.flush()
725
716
  raise ValueError("Failed to dill.loads decoded payload") from e
726
717
 
727
718
  return result
719
+
720
+
@@ -86,6 +86,19 @@ def parse_exception_from_traceback(tb_text: str) -> ParsedException:
86
86
  return ParsedException(RuntimeError, clean, "RuntimeError")
87
87
 
88
88
 
89
+ def missing_module_name(exc: BaseException) -> str | None:
90
+ if isinstance(exc, ModuleNotFoundError):
91
+ if getattr(exc, "name", None):
92
+ return exc.name
93
+
94
+ # fallback: parse from message/args
95
+ msg = exc.args[0] if exc.args else str(exc)
96
+ m = re.search(r"No module named ['\"]([^'\"]+)['\"]", msg)
97
+ return m.group(1) if m else None
98
+
99
+ return None
100
+
101
+
89
102
  def raise_parsed_traceback(tb_text: str, *, attach_as_cause: bool = True) -> None:
90
103
  """
91
104
  Infer exception from traceback text and raise it.
@@ -94,6 +107,9 @@ def raise_parsed_traceback(tb_text: str, *, attach_as_cause: bool = True) -> Non
94
107
  parsed = parse_exception_from_traceback(tb_text)
95
108
  exc = parsed.exc_type(parsed.message) if parsed.message else parsed.exc_type()
96
109
 
110
+ if isinstance(exc, ModuleNotFoundError):
111
+ exc.name = missing_module_name(exc)
112
+
97
113
  if attach_as_cause:
98
114
  raise exc from RemoteTraceback(tb_text)
99
115
  raise exc
File without changes
@@ -441,19 +441,20 @@ class PythonEnv:
441
441
  Returns:
442
442
  PythonEnv representing the current environment.
443
443
  """
444
- venv = os.environ.get("VIRTUAL_ENV")
445
- if venv:
446
- log.debug("current env from VIRTUAL_ENV=%s", venv)
447
- return cls(Path(venv))
444
+ global CURRENT_PYTHON_ENV
448
445
 
449
- exe = Path(sys.executable).expanduser().resolve()
450
- parent = exe.parent
451
- if parent.name in ("bin", "Scripts"):
452
- log.debug("current env inferred from sys.executable=%s", str(exe))
453
- return cls(parent.parent)
446
+ if CURRENT_PYTHON_ENV is None:
447
+ exe = Path(sys.executable).expanduser().resolve()
448
+ parent = exe.parent
454
449
 
455
- log.debug("current env fallback to sys.prefix=%s", sys.prefix)
456
- return cls(Path(sys.prefix))
450
+ if parent.name in ("bin", "Scripts"):
451
+ log.debug("current env inferred from sys.executable=%s", str(exe))
452
+ CURRENT_PYTHON_ENV = cls(parent.parent)
453
+ else:
454
+ log.debug("current env fallback to sys.prefix=%s", sys.prefix)
455
+ CURRENT_PYTHON_ENV = cls(Path(sys.prefix))
456
+
457
+ return CURRENT_PYTHON_ENV
457
458
 
458
459
  @classmethod
459
460
  def ensure_uv(
@@ -1510,7 +1511,7 @@ print("RESULT:" + json.dumps(top_level))""".strip()
1510
1511
 
1511
1512
 
1512
1513
  # Snapshot singleton (import-time)
1513
- CURRENT_PYTHON_ENV: PythonEnv = PythonEnv.get_current()
1514
+ CURRENT_PYTHON_ENV: PythonEnv = None
1514
1515
 
1515
1516
 
1516
1517
  if __name__ == "__main__":
@@ -0,0 +1,171 @@
1
+ import datetime as dt
2
+ import time
3
+ from dataclasses import dataclass
4
+ from typing import Optional, Union
5
+
6
+ __all__ = ["WaitingConfig", "WaitingConfigArg"]
7
+
8
+
9
+ def _safe_seconds_tick(ticks: Union[int, float, dt.timedelta]):
10
+ if isinstance(ticks, dt.timedelta):
11
+ return ticks.total_seconds()
12
+ return ticks
13
+
14
+
15
+ DEFAULT_TIMEOUT_TICKS = float(20 * 60) # 20 minutes
16
+ WaitingConfigArg = Union["WaitingConfig", dict, int, float, dt.datetime, bool]
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class WaitingConfig:
21
+ timeout: float = DEFAULT_TIMEOUT_TICKS
22
+ interval: float = 2.0
23
+ backoff: float = 1.0
24
+ max_interval: float = 10.0
25
+
26
+ @property
27
+ def timeout_timedelta(self) -> dt.timedelta:
28
+ return dt.timedelta(seconds=self.timeout)
29
+
30
+ @classmethod
31
+ def default(cls):
32
+ return DEFAULT_WAITING_CONFIG
33
+
34
+ @staticmethod
35
+ def _to_seconds(value) -> Optional[float]:
36
+ if value is None:
37
+ return None
38
+ if isinstance(value, dt.timedelta):
39
+ return float(value.total_seconds())
40
+ if isinstance(value, (int, float)):
41
+ return float(value)
42
+ raise TypeError(f"Expected seconds as int/float/timedelta, got {type(value)!r}")
43
+
44
+ @staticmethod
45
+ def _deadline_to_timeout(deadline: dt.datetime) -> float:
46
+ if not isinstance(deadline, dt.datetime):
47
+ raise TypeError(f"deadline must be datetime, got {type(deadline)!r}")
48
+ now = dt.datetime.now(tz=deadline.tzinfo) if deadline.tzinfo else dt.datetime.now()
49
+ return (deadline - now).total_seconds()
50
+
51
+ @classmethod
52
+ def check_arg(
53
+ cls,
54
+ arg: Optional[WaitingConfigArg] = None,
55
+ timeout: Optional[Union[int, float, dt.timedelta]] = None,
56
+ interval: Optional[Union[int, float, dt.timedelta]] = None,
57
+ backoff: Optional[Union[int, float, dt.timedelta]] = None,
58
+ max_interval: Optional[Union[int, float, dt.timedelta]] = None,
59
+ ) -> Optional["WaitingConfig"]:
60
+ base_timeout: Optional[float] = None
61
+ base_interval: Optional[float] = None
62
+ base_backoff: Optional[float] = None
63
+ base_max_interval: Optional[float] = None
64
+
65
+ if arg is not None:
66
+ if isinstance(arg, cls):
67
+ if timeout is None and interval is None and backoff is None and max_interval is None:
68
+ return arg
69
+
70
+ base_timeout = arg.timeout
71
+ base_interval = arg.interval
72
+ base_backoff = arg.backoff
73
+ base_max_interval = arg.max_interval
74
+
75
+ elif isinstance(arg, bool):
76
+ base_timeout = DEFAULT_TIMEOUT_TICKS if arg else 0.0
77
+ base_interval = 2.0
78
+ base_backoff = 2.0
79
+ base_max_interval = 15.0
80
+
81
+ elif isinstance(arg, (int, float, dt.timedelta)):
82
+ base_timeout = cls._to_seconds(arg)
83
+
84
+ elif isinstance(arg, dt.datetime):
85
+ base_timeout = float(cls._deadline_to_timeout(arg))
86
+
87
+ elif isinstance(arg, dict):
88
+ if "deadline" in arg and "timeout" in arg:
89
+ raise ValueError("Provide only one of 'deadline' or 'timeout' in WaitingOptions dict.")
90
+
91
+ if "deadline" in arg and arg["deadline"] is not None:
92
+ base_timeout = float(cls._deadline_to_timeout(arg["deadline"]))
93
+ else:
94
+ base_timeout = cls._to_seconds(arg.get("timeout"))
95
+
96
+ base_interval = cls._to_seconds(arg.get("interval"))
97
+ base_backoff = cls._to_seconds(arg.get("backoff"))
98
+ base_max_interval = cls._to_seconds(arg.get("max_interval"))
99
+
100
+ else:
101
+ raise TypeError(f"Unsupported WaitingOptions arg type: {type(arg)!r}")
102
+
103
+ # explicit kwargs win
104
+ final_timeout = cls._to_seconds(timeout) if timeout is not None else base_timeout
105
+ final_interval = cls._to_seconds(interval) if interval is not None else base_interval
106
+ final_backoff = cls._to_seconds(backoff) if backoff is not None else base_backoff
107
+ final_max_interval = cls._to_seconds(max_interval) if max_interval is not None else base_max_interval
108
+
109
+ # defaults to match non-Optional signature
110
+ if final_timeout is None:
111
+ final_timeout = 0.0
112
+ elif final_timeout < 0:
113
+ final_timeout = 0.0
114
+
115
+ if final_interval is None:
116
+ final_interval = 2.0
117
+
118
+ if final_backoff is None:
119
+ final_backoff = 2.0
120
+ elif final_backoff < 1:
121
+ final_backoff = 2.0
122
+
123
+ if final_max_interval is None:
124
+ final_max_interval = 10.0
125
+
126
+ return cls(
127
+ timeout=float(final_timeout),
128
+ interval=float(final_interval),
129
+ backoff=float(final_backoff),
130
+ max_interval=float(final_max_interval),
131
+ )
132
+
133
+ def sleep(self, iteration: int, start: float | None = None) -> None:
134
+ """
135
+ iteration is 0-based (first wait => iteration=0)
136
+
137
+ - interval == 0 => no sleep
138
+ - backoff >= 1 => interval * backoff**iteration
139
+ - max_interval == 0 => no cap, else cap sleep to max_interval
140
+ - if start is provided and timeout > 0:
141
+ * raise TimeoutError if already out of time
142
+ * cap sleep so we don't oversleep past timeout
143
+ """
144
+ if iteration < 0:
145
+ raise ValueError(f"iteration must be >= 0, got {iteration}")
146
+
147
+ if self.interval == 0:
148
+ return
149
+
150
+ sleep_s = self.interval * (self.backoff ** int(iteration))
151
+
152
+ if self.max_interval > 0:
153
+ sleep_s = min(sleep_s, self.max_interval)
154
+
155
+ if sleep_s <= 0:
156
+ return
157
+
158
+ if start is not None and self.timeout > 0:
159
+ elapsed = time.time() - float(start)
160
+ remaining = self.timeout - elapsed
161
+ if remaining <= 0:
162
+ raise TimeoutError(f"Timed out waiting after {self.timeout:.3f}s")
163
+ sleep_s = min(sleep_s, remaining)
164
+
165
+ if sleep_s <= 0:
166
+ return
167
+
168
+ time.sleep(sleep_s)
169
+
170
+
171
+ DEFAULT_WAITING_CONFIG = WaitingConfig()
@@ -26,6 +26,9 @@ __all__ = [
26
26
  "to_spark_arrow_type",
27
27
  "to_polars_arrow_type",
28
28
  "arrow_field_to_schema",
29
+ "is_arrow_type_binary_like",
30
+ "is_arrow_type_string_like",
31
+ "is_arrow_type_list_like",
29
32
  ]
30
33
 
31
34
  logger = logging.getLogger(__name__)