ygg 0.1.56__py3-none-any.whl → 0.1.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.56
3
+ Version: 0.1.57
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -1,14 +1,12 @@
1
- ygg-0.1.56.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ ygg-0.1.57.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
2
2
  yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
3
- yggdrasil/exceptions.py,sha256=NEpbDFn-8ZRsLiEgJicCwrTHNMWAGtdrTJzosfAeVJo,82
4
- yggdrasil/version.py,sha256=c0ITmemMU7anWgDBUD3t_BAhA3li2gt3XgswCbHv1oU,22
3
+ yggdrasil/version.py,sha256=mM67BdyYZ17u9xAi4WRzFQM2e6yfmX4MPd36R3L920M,22
5
4
  yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
6
- yggdrasil/databricks/ai/__init__.py,sha256=Mkp70UOVBzDQvdPNsqncHcyzxe5PnSGYE_bHnYxA1eA,21
7
- yggdrasil/databricks/ai/loki.py,sha256=HyVWxzJgfW03YO6TMOTJ1oNvrBJovnqYDn_MeNV6Ni0,11989
5
+ yggdrasil/databricks/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ yggdrasil/databricks/ai/loki.py,sha256=1AhMOquMTsZZGYw5cGoXn-QQhdBRMXM9ZRPEUAv4Y3k,1216
8
7
  yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
9
8
  yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
10
- yggdrasil/databricks/compute/exceptions.py,sha256=Ug0ioxu5m2atdTX4OLH0s4R4dylHNxEdn7VhQI66b5M,209
11
- yggdrasil/databricks/compute/execution_context.py,sha256=mhcwSvKTxgcUHdb7huSEjCVU_feiXSGq0JLyLXldjQM,23952
9
+ yggdrasil/databricks/compute/execution_context.py,sha256=jIV6uru2NeX3O5lg-3KEqmXtLxxq45CFgkBQgQIIOHQ,23327
12
10
  yggdrasil/databricks/compute/remote.py,sha256=yicEhyQypssRa2ByscO36s3cBkEgORFsRME9aaq91Pc,3045
13
11
  yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
14
12
  yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
@@ -20,11 +18,11 @@ yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCr
20
18
  yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
21
19
  yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
22
20
  yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
23
- yggdrasil/databricks/workspaces/io.py,sha256=PAoxIxYvTC162Dx2qL2hk8oAdt8BnYrQ3jJHcJm4VkA,33116
24
- yggdrasil/databricks/workspaces/path.py,sha256=h1j3bvjwKcDhJvlU_kAaLcLVz4jrdaWgjqPQMarZRHU,55233
21
+ yggdrasil/databricks/workspaces/io.py,sha256=hErGeSKJ9XpSUvlYAAckh_8IKQQmGeDOqbdl2rh9Fbs,33240
22
+ yggdrasil/databricks/workspaces/path.py,sha256=KkvLFHrps3UFr4ogYdESbJHEMfQBcWfWfXjlrv_7rTU,55180
25
23
  yggdrasil/databricks/workspaces/path_kind.py,sha256=rhWe1ky7uPD0du0bZSv2S4fK4C5zWd7zAF3UeS2iiPU,283
26
24
  yggdrasil/databricks/workspaces/volumes_path.py,sha256=s8CA33cG3jpMVJy5MILLlkEBcFg_qInDCF2jozLj1Fg,2431
27
- yggdrasil/databricks/workspaces/workspace.py,sha256=5DCPz5io_rmrpGNi5I6RChmyZ8kjlNUFGQl8mzQJThg,25511
25
+ yggdrasil/databricks/workspaces/workspace.py,sha256=Tl1pYzTGNpjsPmHCJ62HoJvdzHGiZb43vQxrI3Sk7js,25233
28
26
  yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
29
27
  yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
30
28
  yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
@@ -35,14 +33,14 @@ yggdrasil/libs/sparklib.py,sha256=FQ3W1iz2EIpQreorOiQuFt15rdhq2QhGEAWp8Zrbl9A,10
35
33
  yggdrasil/libs/extensions/__init__.py,sha256=mcXW5Li3Cbprbs4Ci-b5A0Ju0wmLcfvEiFusTx6xNjU,117
36
34
  yggdrasil/libs/extensions/polars_extensions.py,sha256=RTkGi8llhPJjX7x9egix7-yXWo2X24zIAPSKXV37SSA,12397
37
35
  yggdrasil/libs/extensions/spark_extensions.py,sha256=E64n-3SFTDgMuXwWitX6vOYP9ln2lpGKb0htoBLEZgc,16745
38
- yggdrasil/pyutils/__init__.py,sha256=a7LKg9xWZGtv2o5jr_7wrSZJ9ZmwolKWc--8iRgKlL4,225
36
+ yggdrasil/pyutils/__init__.py,sha256=tl-LapAc71TV7RMgf2ftKwrzr8iiLOGHeJgA3RvO93w,293
39
37
  yggdrasil/pyutils/callable_serde.py,sha256=1XckmFO-ThP0MedxgXwB71u9jWUuhM1btOzW9gJ8w9g,23117
40
38
  yggdrasil/pyutils/equality.py,sha256=Xyf8D1dLUCm3spDEir8Zyj7O4US_fBJwEylJCfJ9slI,3080
41
39
  yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWgng,3388
42
40
  yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
43
41
  yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
44
42
  yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
45
- yggdrasil/pyutils/python_env.py,sha256=nmXXcIUSSbpKWHBnYfCY1QIW-LQ8IILV9Ceijufu5Pg,51086
43
+ yggdrasil/pyutils/python_env.py,sha256=Gh5geFK9ABpyWEfyegGUfIJUoPxKwcH0pqLBiMrW9Rw,51103
46
44
  yggdrasil/pyutils/retry.py,sha256=gXBtn1DdmIYIUmGKOUr8-SUT7MOu97LykN2YR4uocgc,11917
47
45
  yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
48
46
  yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
@@ -61,8 +59,8 @@ yggdrasil/types/cast/registry.py,sha256=OOqIfbIjPH-a3figvu-zTvEtUDTEWhe2xIl3cCA4
61
59
  yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
62
60
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
63
61
  yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
64
- ygg-0.1.56.dist-info/METADATA,sha256=Rr4DBB8q39XEEzBTQtYB_pja448GR74cEh3gc8-BKvY,18528
65
- ygg-0.1.56.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
66
- ygg-0.1.56.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
67
- ygg-0.1.56.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
68
- ygg-0.1.56.dist-info/RECORD,,
62
+ ygg-0.1.57.dist-info/METADATA,sha256=0VEcri5fh3BUYJxxhQ_icTZfhkry6KgEhbKQEEDrKJ4,18528
63
+ ygg-0.1.57.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
64
+ ygg-0.1.57.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
65
+ ygg-0.1.57.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
66
+ ygg-0.1.57.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1 +0,0 @@
1
- from .loki import *
@@ -1,374 +1,53 @@
1
- """
2
- loki.py
3
-
4
- Databricks Model Serving (OpenAI-compatible) wrapper with:
5
- - Loki.ask(): stateless call
6
- - TradingChatSession: stateful commodity trading analytics chat
7
- - SqlChatSession: stateful Databricks SQL generator chat
8
-
9
- Important constraint:
10
- - Gemini models only support ONE system prompt.
11
- => We must NOT send multiple system messages.
12
- => We fold summary + context blocks into a single system string.
13
- """
14
-
15
- from __future__ import annotations
16
-
17
- import json
18
- from dataclasses import dataclass, field
19
- from typing import Any, Dict, List, Optional, Union
1
+ from typing import Optional
2
+ from dataclasses import field, dataclass
20
3
 
21
4
  from ..workspaces.workspace import WorkspaceService
22
5
 
23
6
  try:
24
- from openai import OpenAI as _OpenAI # noqa: F401
25
- except ImportError:
26
- _OpenAI = None # type: ignore
27
-
28
-
29
- def make_ai_client(api_key: str, base_url: str):
30
- """Late import so module can load even if openai isn't installed."""
31
7
  from openai import OpenAI
32
- return OpenAI(api_key=api_key, base_url=base_url)
33
-
34
-
35
- __all__ = ["Loki", "TradingChatSession", "SqlChatSession"]
36
-
37
-
38
- DEFAULT_TRADING_SYSTEM_PROMPT = """You are Loki: a conversational commodity trading analytics copilot.
39
8
 
40
- Scope:
41
- - Commodity trading analytics: curves, forwards, spreads, basis, hedging, risk, PnL explain, inventory, scheduling.
42
- - Databricks-first workflows: Spark/Delta/Unity Catalog, Databricks SQL, performant Python.
43
-
44
- Rules:
45
- - Do NOT invent prices, positions, PnL, risk, or market facts not provided.
46
- - State assumptions explicitly (units, time conventions, delivery months, calendars).
47
- - Prefer actionable output (SQL + efficient Python). Avoid slow patterns.
48
- - If data is missing, list exactly what you need and proceed with a reasonable template.
49
-
50
- Style:
51
- - Be concise, practical, performance-focused.
52
- """
53
-
54
- DEFAULT_SQL_SYSTEM_PROMPT = """You are LokiSQL: a Databricks SQL generator for commodity trading analytics.
55
-
56
- Hard rules:
57
- - Output ONLY SQL unless the user explicitly asks for explanation.
58
- - Use Databricks SQL / Spark SQL dialect.
59
- - Prefer readable CTEs, explicit column lists, deterministic joins.
60
- - Do NOT invent table/column names. If missing, use placeholders like <table>, <col>.
61
- - If ambiguous, output best-effort SQL template with SQL comments (-- TODO ...) and placeholders.
62
- - Performance: push filters early, avoid exploding joins, avoid SELECT *.
9
+ def make_openai_client(
10
+ api_key: str,
11
+ base_url: str
12
+ ):
13
+ return OpenAI(
14
+ api_key=api_key,
15
+ base_url=base_url
16
+ )
17
+ except ImportError:
18
+ class OpenAI:
19
+ pass
20
+
21
+ def make_openai_client(
22
+ api_key: str,
23
+ base_url: str
24
+ ):
25
+ from openai import OpenAI
26
+
27
+ return OpenAI(
28
+ api_key=api_key,
29
+ base_url=base_url
30
+ )
63
31
 
64
- Default assumptions:
65
- - Dates UTC unless specified.
66
- """
32
+ __all__ = [
33
+ "Loki"
34
+ ]
67
35
 
68
36
 
69
37
  @dataclass
70
38
  class Loki(WorkspaceService):
71
- """
72
- Loki wraps an OpenAI-compatible client pointing at Databricks Model Serving endpoints.
73
- """
74
-
75
39
  model: str = "databricks-gemini-2-5-flash"
76
- _ai_client: Optional[Any] = field(repr=False, hash=False, default=None)
40
+
41
+ _openai_client: Optional[OpenAI] = field(repr=False, hash=False, default=None)
77
42
 
78
43
  @property
79
- def ai_client(self):
80
- if self._ai_client is None:
81
- self._ai_client = self.make_aiclient()
82
- return self._ai_client
44
+ def openai_client(self):
45
+ if self._openai_client is None:
46
+ self._openai_client = self.make_openai_client()
47
+ return self._openai_client
83
48
 
84
- def make_aiclient(self):
85
- host = self.workspace.host.rstrip("/")
86
- return make_ai_client(
49
+ def make_openai_client(self):
50
+ return make_openai_client(
87
51
  api_key=self.workspace.current_token(),
88
- base_url=f"{host}/serving-endpoints",
89
- )
90
-
91
- def ask(
92
- self,
93
- command: str,
94
- *,
95
- system: Optional[str] = None,
96
- max_tokens: int = 5000,
97
- temperature: Optional[float] = None,
98
- extra_messages: Optional[List[Dict[str, str]]] = None,
99
- **kwargs: Any,
100
- ) -> str:
101
- """
102
- Stateless single call to the model.
103
-
104
- NOTE (Gemini constraint):
105
- - Provide at most ONE system prompt (i.e., a single system message).
106
- - Do not pass additional messages with role="system".
107
- """
108
- messages: List[Dict[str, str]] = []
109
- if system:
110
- messages.append({"role": "system", "content": system})
111
-
112
- if extra_messages:
113
- # IMPORTANT: caller must not include any "system" roles here for Gemini models
114
- messages.extend(extra_messages)
115
-
116
- messages.append({"role": "user", "content": command})
117
-
118
- params: Dict[str, Any] = dict(
119
- model=self.model,
120
- messages=messages,
121
- max_tokens=max_tokens,
122
- **kwargs,
123
- )
124
- if temperature is not None:
125
- params["temperature"] = temperature
126
-
127
- resp = self.ai_client.chat.completions.create(**params)
128
- return resp.choices[0].message.content or ""
129
-
130
- def new_trading_chat(
131
- self,
132
- *,
133
- system_prompt: str = DEFAULT_TRADING_SYSTEM_PROMPT,
134
- max_context_turns: int = 20,
135
- max_context_chars: int = 120_000,
136
- ) -> "TradingChatSession":
137
- return TradingChatSession(
138
- loki=self,
139
- system_prompt=system_prompt,
140
- max_context_turns=max_context_turns,
141
- max_context_chars=max_context_chars,
142
- )
143
-
144
- def new_sql_chat(
145
- self,
146
- *,
147
- system_prompt: str = DEFAULT_SQL_SYSTEM_PROMPT,
148
- max_context_turns: int = 20,
149
- max_context_chars: int = 120_000,
150
- ) -> "SqlChatSession":
151
- return SqlChatSession(
152
- loki=self,
153
- system_prompt=system_prompt,
154
- max_context_turns=max_context_turns,
155
- max_context_chars=max_context_chars,
156
- )
157
-
158
-
159
- @dataclass
160
- class _BaseChatSession:
161
- """
162
- Stateful session that maintains history + injected context blocks.
163
-
164
- Gemini constraint:
165
- - We must fold ALL system content into one system string.
166
- - Therefore summary/context_blocks are concatenated into the system prompt.
167
- """
168
- loki: Loki
169
- system_prompt: str
170
-
171
- history: List[Dict[str, str]] = field(default_factory=list)
172
- summary: Optional[str] = None
173
- context_blocks: List[str] = field(default_factory=list)
174
-
175
- max_context_turns: int = 20
176
- max_context_chars: int = 120_000
177
-
178
- def reset(self) -> None:
179
- self.history.clear()
180
- self.summary = None
181
- self.context_blocks.clear()
182
-
183
- def add_context(self, title: str, payload: Union[str, Dict[str, Any], List[Any]]) -> None:
184
- if isinstance(payload, str):
185
- payload_str = payload
186
- else:
187
- payload_str = json.dumps(payload, ensure_ascii=False, indent=2)
188
-
189
- self.context_blocks.append(f"[Context: {title}]\n{payload_str}".strip())
190
- self._trim()
191
-
192
- def _estimate_chars(self, msgs: List[Dict[str, str]]) -> int:
193
- return sum(len(m.get("content", "")) for m in msgs)
194
-
195
- def _build_system(self, extra_system: Optional[str] = None) -> str:
196
- parts: List[str] = [self.system_prompt.strip()]
197
- if extra_system:
198
- parts.append(extra_system.strip())
199
- if self.summary:
200
- parts.append(f"[ConversationSummary]\n{self.summary}".strip())
201
- if self.context_blocks:
202
- parts.append("\n\n".join(self.context_blocks).strip())
203
- return "\n\n".join(p for p in parts if p)
204
-
205
- def _trim(self) -> None:
206
- # Turn trim (keep last N turns => N*2 messages)
207
- if self.max_context_turns > 0:
208
- max_msgs = self.max_context_turns * 2
209
- if len(self.history) > max_msgs:
210
- self.history = self.history[-max_msgs:]
211
-
212
- # Char trim: shrink history first, then context blocks if needed
213
- def total_chars() -> int:
214
- sys_len = len(self._build_system())
215
- return sys_len + self._estimate_chars(self.history)
216
-
217
- while total_chars() > self.max_context_chars and self.history:
218
- self.history = self.history[1:]
219
-
220
- while total_chars() > self.max_context_chars and self.context_blocks:
221
- self.context_blocks = self.context_blocks[1:]
222
-
223
-
224
- @dataclass
225
- class TradingChatSession(_BaseChatSession):
226
- """
227
- Commodity trading analytics chat session.
228
- Optionally returns structured JSON for downstream automation.
229
- """
230
-
231
- def chat(
232
- self,
233
- user_text: str,
234
- *,
235
- structured: bool = True,
236
- max_tokens: int = 12000,
237
- temperature: Optional[float] = None,
238
- **kwargs: Any,
239
- ) -> Union[str, Dict[str, Any]]:
240
- self._trim()
241
-
242
- extra_system = None
243
- if structured:
244
- extra_system = (
245
- "Respond ONLY as valid JSON with keys: "
246
- "final_answer (string), assumptions (array of strings), data_needed (array of strings), "
247
- "sql (string or null), python (string or null). "
248
- "No markdown. No extra keys."
249
- )
250
-
251
- system = self._build_system(extra_system=extra_system)
252
-
253
- assistant_text = self.loki.ask(
254
- user_text,
255
- system=system,
256
- extra_messages=self.history, # NOTE: history must contain no system roles
257
- max_tokens=max_tokens,
258
- temperature=temperature,
259
- **kwargs,
52
+ base_url=self.workspace.host + "/serving-endpoints"
260
53
  )
261
-
262
- self.history.append({"role": "user", "content": user_text})
263
- self.history.append({"role": "assistant", "content": assistant_text})
264
- self._trim()
265
-
266
- if structured:
267
- parsed = _try_parse_json_object(assistant_text)
268
- if parsed is not None:
269
- return parsed
270
-
271
- return assistant_text
272
-
273
-
274
- @dataclass
275
- class SqlChatSession(_BaseChatSession):
276
- """
277
- SQL-only conversational session that generates Databricks SQL.
278
-
279
- Uses a single system message with strict instructions to output SQL only.
280
- """
281
-
282
- def generate_sql(
283
- self,
284
- request: str,
285
- *,
286
- max_tokens: int = 12000,
287
- temperature: Optional[float] = None,
288
- sql_only: bool = True,
289
- **kwargs: Any,
290
- ) -> str:
291
- self._trim()
292
-
293
- extra_system = None
294
- if sql_only:
295
- extra_system = (
296
- "Reminder: Output ONLY SQL. "
297
- "If ambiguity exists, use SQL comments (-- TODO ...) and placeholders, but still output SQL only."
298
- )
299
-
300
- system = self._build_system(extra_system=extra_system)
301
-
302
- sql = self.loki.ask(
303
- request,
304
- system=system,
305
- extra_messages=self.history, # history must contain no system roles
306
- max_tokens=max_tokens,
307
- temperature=temperature,
308
- **kwargs,
309
- ).strip()
310
-
311
- self.history.append({"role": "user", "content": request})
312
- self.history.append({"role": "assistant", "content": sql})
313
- self._trim()
314
-
315
- return _strip_sql_fences(sql)
316
-
317
-
318
- def _strip_sql_fences(text: str) -> str:
319
- t = text.strip()
320
- if t.startswith("```"):
321
- lines = t.splitlines()
322
- lines = lines[1:] # drop ``` or ```sql
323
- if lines and lines[-1].strip().startswith("```"):
324
- lines = lines[:-1]
325
- return "\n".join(lines).strip()
326
- return t
327
-
328
-
329
- def _strip_markdown_fences(text: str) -> str:
330
- """
331
- Remove ```lang ... ``` fences if present.
332
- Keeps inner content unchanged.
333
- """
334
- t = text.strip()
335
- if not t.startswith("```"):
336
- return t
337
-
338
- lines = t.splitlines()
339
- if not lines:
340
- return t
341
-
342
- # Drop first line: ``` or ```json
343
- lines = lines[1:]
344
-
345
- # Drop last line if it's ```
346
- if lines and lines[-1].strip().startswith("```"):
347
- lines = lines[:-1]
348
-
349
- return "\n".join(lines).strip()
350
-
351
-
352
- def _try_parse_json_object(text: str) -> Optional[Dict[str, Any]]:
353
- t = _strip_markdown_fences(text).strip()
354
-
355
- # Best effort extraction if there's extra junk around JSON
356
- if not t.startswith("{"):
357
- start = t.find("{")
358
- end = t.rfind("}")
359
- if start != -1 and end != -1 and end > start:
360
- t = t[start : end + 1]
361
-
362
- try:
363
- obj = json.loads(t)
364
- except Exception:
365
- return None
366
-
367
- if not isinstance(obj, dict):
368
- return None
369
-
370
- required = {"final_answer", "assumptions", "data_needed", "sql", "python"}
371
- if not required.issubset(set(obj.keys())):
372
- return None
373
-
374
- return obj
@@ -16,7 +16,6 @@ from threading import Thread
16
16
  from types import ModuleType
17
17
  from typing import TYPE_CHECKING, Optional, Any, Callable, List, Dict, Union, Iterable, Tuple
18
18
 
19
- from .exceptions import CommandAborted
20
19
  from ...libs.databrickslib import databricks_sdk
21
20
  from ...pyutils.exceptions import raise_parsed_traceback
22
21
  from ...pyutils.expiring_dict import ExpiringDict
@@ -111,12 +110,16 @@ class ExecutionContext:
111
110
  def __exit__(self, exc_type, exc_val, exc_tb):
112
111
  """Exit the context manager and close the remote context if created."""
113
112
  if not self._was_connected:
114
- self.close(wait=False)
113
+ self.close()
115
114
  self.cluster.__exit__(exc_type, exc_val=exc_val, exc_tb=exc_tb)
116
115
 
117
116
  def __del__(self):
118
117
  """Best-effort cleanup for the remote execution context."""
119
- self.close(wait=False)
118
+ if self.context_id:
119
+ try:
120
+ Thread(target=self.close).start()
121
+ except BaseException:
122
+ pass
120
123
 
121
124
  @property
122
125
  def remote_metadata(self) -> RemoteMetadata:
@@ -177,7 +180,7 @@ print(json.dumps(meta))"""
177
180
  """
178
181
  return self.cluster.workspace.sdk()
179
182
 
180
- def create(
183
+ def create_command(
181
184
  self,
182
185
  language: "Language",
183
186
  ) -> any:
@@ -194,17 +197,15 @@ print(json.dumps(meta))"""
194
197
  self.cluster
195
198
  )
196
199
 
197
- client = self._workspace_client().command_execution
198
-
199
200
  try:
200
- created = client.create_and_wait(
201
+ created = self._workspace_client().command_execution.create_and_wait(
201
202
  cluster_id=self.cluster.cluster_id,
202
203
  language=language,
203
204
  )
204
205
  except:
205
206
  self.cluster.ensure_running()
206
207
 
207
- created = client.create_and_wait(
208
+ created = self._workspace_client().command_execution.create_and_wait(
208
209
  cluster_id=self.cluster.cluster_id,
209
210
  language=language,
210
211
  )
@@ -216,38 +217,42 @@ print(json.dumps(meta))"""
216
217
 
217
218
  created = getattr(created, "response", created)
218
219
 
219
- self.context_id = created.id
220
-
221
- return self
220
+ return created
222
221
 
223
222
  def connect(
224
223
  self,
225
- language: Optional["Language"] = None,
226
- reset: bool = False
224
+ language: Optional["Language"] = None
227
225
  ) -> "ExecutionContext":
228
226
  """Create a remote command execution context if not already open.
229
227
 
230
228
  Args:
231
229
  language: Optional language override for the context.
232
- reset: Reset existing if connected
233
230
 
234
231
  Returns:
235
232
  The connected ExecutionContext instance.
236
233
  """
237
234
  if self.context_id is not None:
238
- if not reset:
239
- return self
235
+ return self
240
236
 
241
- self.close(wait=False)
237
+ self.language = language or self.language
242
238
 
243
- language = language or self.language
239
+ if self.language is None:
240
+ self.language = Language.PYTHON
244
241
 
245
- if language is None:
246
- language = Language.PYTHON
242
+ ctx = self.create_command(language=self.language)
247
243
 
248
- return self.create(language=language)
244
+ context_id = ctx.id
245
+ if not context_id:
246
+ raise RuntimeError("Failed to create command execution context")
249
247
 
250
- def close(self, wait: bool = True) -> None:
248
+ self.context_id = context_id
249
+ LOGGER.info(
250
+ "Opened execution context for %s",
251
+ self
252
+ )
253
+ return self
254
+
255
+ def close(self) -> None:
251
256
  """Destroy the remote command execution context if it exists.
252
257
 
253
258
  Returns:
@@ -256,23 +261,12 @@ print(json.dumps(meta))"""
256
261
  if not self.context_id:
257
262
  return
258
263
 
259
- client = self._workspace_client()
260
-
261
264
  try:
262
- if wait:
263
- client.command_execution.destroy(
264
- cluster_id=self.cluster.cluster_id,
265
- context_id=self.context_id,
266
- )
267
- else:
268
- Thread(
269
- target=client.command_execution.destroy,
270
- kwargs={
271
- "cluster_id": self.cluster.cluster_id,
272
- "context_id": self.context_id,
273
- }
274
- ).start()
275
- except BaseException:
265
+ self._workspace_client().command_execution.destroy(
266
+ cluster_id=self.cluster.cluster_id,
267
+ context_id=self.context_id,
268
+ )
269
+ except Exception:
276
270
  # non-fatal: context cleanup best-effort
277
271
  pass
278
272
  finally:
@@ -471,18 +465,7 @@ print(json.dumps(meta))"""
471
465
  )
472
466
 
473
467
  try:
474
- return self._decode_result(
475
- result,
476
- result_tag=result_tag,
477
- print_stdout=print_stdout
478
- )
479
- except CommandAborted:
480
- return self.connect(language=self.language, reset=True).execute_command(
481
- command=command,
482
- timeout=timeout,
483
- result_tag=result_tag,
484
- print_stdout=print_stdout
485
- )
468
+ return self._decode_result(result, result_tag=result_tag, print_stdout=print_stdout)
486
469
  except ModuleNotFoundError as remote_module_error:
487
470
  _MOD_NOT_FOUND_RE = re.compile(r"No module named ['\"]([^'\"]+)['\"]")
488
471
  module_name = _MOD_NOT_FOUND_RE.search(str(remote_module_error))
@@ -677,9 +660,6 @@ with zipfile.ZipFile(buf, "r") as zf:
677
660
  if res.result_type == ResultType.ERROR:
678
661
  message = res.cause or "Command execution failed"
679
662
 
680
- if "client terminated the session" in message:
681
- raise CommandAborted(message)
682
-
683
663
  if self.language == Language.PYTHON:
684
664
  raise_parsed_traceback(message)
685
665
 
@@ -688,7 +668,6 @@ with zipfile.ZipFile(buf, "r") as zf:
688
668
  or getattr(res, "stack_trace", None)
689
669
  or getattr(res, "traceback", None)
690
670
  )
691
-
692
671
  if remote_tb:
693
672
  message = f"{message}\n{remote_tb}"
694
673
 
@@ -20,6 +20,7 @@ from .path_kind import DatabricksPathKind
20
20
  from ...libs.databrickslib import databricks
21
21
  from ...libs.pandaslib import PandasDataFrame
22
22
  from ...libs.polarslib import polars, PolarsDataFrame
23
+ from ...pyutils import retry
23
24
  from ...types.cast.registry import convert
24
25
  from ...types.file_format import ExcelFileFormat
25
26
 
@@ -30,6 +31,7 @@ if databricks is not None:
30
31
  ResourceDoesNotExist,
31
32
  BadRequest,
32
33
  )
34
+ from databricks.sdk.errors import InternalError
33
35
 
34
36
  if TYPE_CHECKING:
35
37
  from .path import DatabricksPath
@@ -1036,6 +1038,7 @@ class DatabricksVolumeIO(DatabricksIO):
1036
1038
  end = start + length
1037
1039
  return data[start:end]
1038
1040
 
1041
+ @retry(exceptions=(InternalError,))
1039
1042
  def write_all_bytes(self, data: bytes):
1040
1043
  """Write bytes to a volume file.
1041
1044
 
@@ -30,7 +30,7 @@ from ...types.cast.registry import convert, register_converter
30
30
  from ...types.file_format import ExcelFileFormat
31
31
 
32
32
  if databricks is not None:
33
- from databricks.sdk.service.catalog import VolumeType, VolumeInfo
33
+ from databricks.sdk.service.catalog import VolumeType, PathOperation, VolumeInfo
34
34
  from databricks.sdk.service.workspace import ObjectType
35
35
  from databricks.sdk.errors.platform import (
36
36
  NotFound,
@@ -1236,8 +1236,6 @@ class DatabricksPath:
1236
1236
  self,
1237
1237
  operation: Optional["PathOperation"] = None
1238
1238
  ):
1239
- from databricks.sdk.service.catalog import PathOperation
1240
-
1241
1239
  if self.kind != DatabricksPathKind.VOLUME:
1242
1240
  raise ValueError(f"Cannot generate temporary credentials for {repr(self)}")
1243
1241
 
@@ -652,9 +652,9 @@ class Workspace:
652
652
  """Return a Cluster helper bound to this workspace.
653
653
 
654
654
  Args:
655
- workspace: Optional workspace override.
656
655
  cluster_id: Optional cluster id.
657
656
  cluster_name: Optional cluster name.
657
+ **kwargs: Additional Cluster parameters.
658
658
 
659
659
  Returns:
660
660
  A Cluster instance.
@@ -662,7 +662,7 @@ class Workspace:
662
662
  from ..compute.cluster import Cluster
663
663
 
664
664
  return Cluster(
665
- workspace=self if workspace is None else workspace,
665
+ workspace=self,
666
666
  cluster_id=cluster_id,
667
667
  cluster_name=cluster_name,
668
668
  )
@@ -671,19 +671,10 @@ class Workspace:
671
671
  self,
672
672
  workspace: Optional["Workspace"] = None,
673
673
  ):
674
- """
675
- Return a Cluster helper bound to this workspace.
676
-
677
- Args:
678
- workspace: Optional workspace override.
679
-
680
- Returns:
681
- A Loki AI instance.
682
- """
683
674
  from ..ai.loki import Loki
684
675
 
685
676
  return Loki(
686
- workspace=self if workspace is None else workspace,
677
+ workspace=self,
687
678
  )
688
679
 
689
680
  # ---------------------------------------------------------------------------
@@ -4,3 +4,5 @@ from .retry import retry
4
4
  from .parallel import parallelize
5
5
  from .python_env import PythonEnv
6
6
  from .callable_serde import CallableSerde
7
+
8
+ __all__ = ["retry", "parallelize", "PythonEnv", "CallableSerde"]
@@ -20,7 +20,7 @@ from dataclasses import dataclass, field
20
20
  from pathlib import Path
21
21
  from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union, List, Tuple
22
22
 
23
- from .modules import PipIndexSettings
23
+ from yggdrasil.pyutils.modules import PipIndexSettings
24
24
 
25
25
  log = logging.getLogger(__name__)
26
26
 
yggdrasil/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.56"
1
+ __version__ = "0.1.57"
@@ -1,14 +0,0 @@
1
- from ...exceptions import YGGException
2
-
3
- __all__ = [
4
- "ComputeException",
5
- "CommandAborted"
6
- ]
7
-
8
-
9
- class ComputeException(YGGException):
10
- pass
11
-
12
-
13
- class CommandAborted(YGGException):
14
- pass
yggdrasil/exceptions.py DELETED
@@ -1,7 +0,0 @@
1
- __all__ = [
2
- "YGGException"
3
- ]
4
-
5
-
6
- class YGGException(Exception):
7
- pass