ygg 0.1.56__py3-none-any.whl → 0.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
- ygg-0.1.60.dist-info/RECORD +74 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/WHEEL +1 -1
- yggdrasil/ai/__init__.py +2 -0
- yggdrasil/ai/session.py +89 -0
- yggdrasil/ai/sql_session.py +310 -0
- yggdrasil/databricks/__init__.py +0 -3
- yggdrasil/databricks/compute/cluster.py +68 -113
- yggdrasil/databricks/compute/command_execution.py +674 -0
- yggdrasil/databricks/compute/exceptions.py +7 -2
- yggdrasil/databricks/compute/execution_context.py +465 -277
- yggdrasil/databricks/compute/remote.py +4 -14
- yggdrasil/databricks/exceptions.py +10 -0
- yggdrasil/databricks/sql/__init__.py +0 -4
- yggdrasil/databricks/sql/engine.py +161 -173
- yggdrasil/databricks/sql/exceptions.py +9 -1
- yggdrasil/databricks/sql/statement_result.py +108 -120
- yggdrasil/databricks/sql/warehouse.py +331 -92
- yggdrasil/databricks/workspaces/io.py +92 -9
- yggdrasil/databricks/workspaces/path.py +120 -74
- yggdrasil/databricks/workspaces/workspace.py +212 -68
- yggdrasil/libs/databrickslib.py +23 -18
- yggdrasil/libs/extensions/spark_extensions.py +1 -1
- yggdrasil/libs/pandaslib.py +15 -6
- yggdrasil/libs/polarslib.py +49 -13
- yggdrasil/pyutils/__init__.py +1 -0
- yggdrasil/pyutils/callable_serde.py +12 -19
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/mimetypes.py +0 -0
- yggdrasil/pyutils/python_env.py +13 -12
- yggdrasil/pyutils/waiting_config.py +171 -0
- yggdrasil/types/cast/arrow_cast.py +3 -0
- yggdrasil/types/cast/pandas_cast.py +157 -169
- yggdrasil/types/cast/polars_cast.py +11 -43
- yggdrasil/types/dummy_class.py +81 -0
- yggdrasil/version.py +1 -1
- ygg-0.1.56.dist-info/RECORD +0 -68
- yggdrasil/databricks/ai/__init__.py +0 -1
- yggdrasil/databricks/ai/loki.py +0 -374
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
yggdrasil/databricks/ai/loki.py
DELETED
|
@@ -1,374 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
loki.py
|
|
3
|
-
|
|
4
|
-
Databricks Model Serving (OpenAI-compatible) wrapper with:
|
|
5
|
-
- Loki.ask(): stateless call
|
|
6
|
-
- TradingChatSession: stateful commodity trading analytics chat
|
|
7
|
-
- SqlChatSession: stateful Databricks SQL generator chat
|
|
8
|
-
|
|
9
|
-
Important constraint:
|
|
10
|
-
- Gemini models only support ONE system prompt.
|
|
11
|
-
=> We must NOT send multiple system messages.
|
|
12
|
-
=> We fold summary + context blocks into a single system string.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
from __future__ import annotations
|
|
16
|
-
|
|
17
|
-
import json
|
|
18
|
-
from dataclasses import dataclass, field
|
|
19
|
-
from typing import Any, Dict, List, Optional, Union
|
|
20
|
-
|
|
21
|
-
from ..workspaces.workspace import WorkspaceService
|
|
22
|
-
|
|
23
|
-
try:
|
|
24
|
-
from openai import OpenAI as _OpenAI # noqa: F401
|
|
25
|
-
except ImportError:
|
|
26
|
-
_OpenAI = None # type: ignore
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def make_ai_client(api_key: str, base_url: str):
|
|
30
|
-
"""Late import so module can load even if openai isn't installed."""
|
|
31
|
-
from openai import OpenAI
|
|
32
|
-
return OpenAI(api_key=api_key, base_url=base_url)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
__all__ = ["Loki", "TradingChatSession", "SqlChatSession"]
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
DEFAULT_TRADING_SYSTEM_PROMPT = """You are Loki: a conversational commodity trading analytics copilot.
|
|
39
|
-
|
|
40
|
-
Scope:
|
|
41
|
-
- Commodity trading analytics: curves, forwards, spreads, basis, hedging, risk, PnL explain, inventory, scheduling.
|
|
42
|
-
- Databricks-first workflows: Spark/Delta/Unity Catalog, Databricks SQL, performant Python.
|
|
43
|
-
|
|
44
|
-
Rules:
|
|
45
|
-
- Do NOT invent prices, positions, PnL, risk, or market facts not provided.
|
|
46
|
-
- State assumptions explicitly (units, time conventions, delivery months, calendars).
|
|
47
|
-
- Prefer actionable output (SQL + efficient Python). Avoid slow patterns.
|
|
48
|
-
- If data is missing, list exactly what you need and proceed with a reasonable template.
|
|
49
|
-
|
|
50
|
-
Style:
|
|
51
|
-
- Be concise, practical, performance-focused.
|
|
52
|
-
"""
|
|
53
|
-
|
|
54
|
-
DEFAULT_SQL_SYSTEM_PROMPT = """You are LokiSQL: a Databricks SQL generator for commodity trading analytics.
|
|
55
|
-
|
|
56
|
-
Hard rules:
|
|
57
|
-
- Output ONLY SQL unless the user explicitly asks for explanation.
|
|
58
|
-
- Use Databricks SQL / Spark SQL dialect.
|
|
59
|
-
- Prefer readable CTEs, explicit column lists, deterministic joins.
|
|
60
|
-
- Do NOT invent table/column names. If missing, use placeholders like <table>, <col>.
|
|
61
|
-
- If ambiguous, output best-effort SQL template with SQL comments (-- TODO ...) and placeholders.
|
|
62
|
-
- Performance: push filters early, avoid exploding joins, avoid SELECT *.
|
|
63
|
-
|
|
64
|
-
Default assumptions:
|
|
65
|
-
- Dates UTC unless specified.
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
@dataclass
|
|
70
|
-
class Loki(WorkspaceService):
|
|
71
|
-
"""
|
|
72
|
-
Loki wraps an OpenAI-compatible client pointing at Databricks Model Serving endpoints.
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
|
-
model: str = "databricks-gemini-2-5-flash"
|
|
76
|
-
_ai_client: Optional[Any] = field(repr=False, hash=False, default=None)
|
|
77
|
-
|
|
78
|
-
@property
|
|
79
|
-
def ai_client(self):
|
|
80
|
-
if self._ai_client is None:
|
|
81
|
-
self._ai_client = self.make_aiclient()
|
|
82
|
-
return self._ai_client
|
|
83
|
-
|
|
84
|
-
def make_aiclient(self):
|
|
85
|
-
host = self.workspace.host.rstrip("/")
|
|
86
|
-
return make_ai_client(
|
|
87
|
-
api_key=self.workspace.current_token(),
|
|
88
|
-
base_url=f"{host}/serving-endpoints",
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
def ask(
|
|
92
|
-
self,
|
|
93
|
-
command: str,
|
|
94
|
-
*,
|
|
95
|
-
system: Optional[str] = None,
|
|
96
|
-
max_tokens: int = 5000,
|
|
97
|
-
temperature: Optional[float] = None,
|
|
98
|
-
extra_messages: Optional[List[Dict[str, str]]] = None,
|
|
99
|
-
**kwargs: Any,
|
|
100
|
-
) -> str:
|
|
101
|
-
"""
|
|
102
|
-
Stateless single call to the model.
|
|
103
|
-
|
|
104
|
-
NOTE (Gemini constraint):
|
|
105
|
-
- Provide at most ONE system prompt (i.e., a single system message).
|
|
106
|
-
- Do not pass additional messages with role="system".
|
|
107
|
-
"""
|
|
108
|
-
messages: List[Dict[str, str]] = []
|
|
109
|
-
if system:
|
|
110
|
-
messages.append({"role": "system", "content": system})
|
|
111
|
-
|
|
112
|
-
if extra_messages:
|
|
113
|
-
# IMPORTANT: caller must not include any "system" roles here for Gemini models
|
|
114
|
-
messages.extend(extra_messages)
|
|
115
|
-
|
|
116
|
-
messages.append({"role": "user", "content": command})
|
|
117
|
-
|
|
118
|
-
params: Dict[str, Any] = dict(
|
|
119
|
-
model=self.model,
|
|
120
|
-
messages=messages,
|
|
121
|
-
max_tokens=max_tokens,
|
|
122
|
-
**kwargs,
|
|
123
|
-
)
|
|
124
|
-
if temperature is not None:
|
|
125
|
-
params["temperature"] = temperature
|
|
126
|
-
|
|
127
|
-
resp = self.ai_client.chat.completions.create(**params)
|
|
128
|
-
return resp.choices[0].message.content or ""
|
|
129
|
-
|
|
130
|
-
def new_trading_chat(
|
|
131
|
-
self,
|
|
132
|
-
*,
|
|
133
|
-
system_prompt: str = DEFAULT_TRADING_SYSTEM_PROMPT,
|
|
134
|
-
max_context_turns: int = 20,
|
|
135
|
-
max_context_chars: int = 120_000,
|
|
136
|
-
) -> "TradingChatSession":
|
|
137
|
-
return TradingChatSession(
|
|
138
|
-
loki=self,
|
|
139
|
-
system_prompt=system_prompt,
|
|
140
|
-
max_context_turns=max_context_turns,
|
|
141
|
-
max_context_chars=max_context_chars,
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
def new_sql_chat(
|
|
145
|
-
self,
|
|
146
|
-
*,
|
|
147
|
-
system_prompt: str = DEFAULT_SQL_SYSTEM_PROMPT,
|
|
148
|
-
max_context_turns: int = 20,
|
|
149
|
-
max_context_chars: int = 120_000,
|
|
150
|
-
) -> "SqlChatSession":
|
|
151
|
-
return SqlChatSession(
|
|
152
|
-
loki=self,
|
|
153
|
-
system_prompt=system_prompt,
|
|
154
|
-
max_context_turns=max_context_turns,
|
|
155
|
-
max_context_chars=max_context_chars,
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
@dataclass
|
|
160
|
-
class _BaseChatSession:
|
|
161
|
-
"""
|
|
162
|
-
Stateful session that maintains history + injected context blocks.
|
|
163
|
-
|
|
164
|
-
Gemini constraint:
|
|
165
|
-
- We must fold ALL system content into one system string.
|
|
166
|
-
- Therefore summary/context_blocks are concatenated into the system prompt.
|
|
167
|
-
"""
|
|
168
|
-
loki: Loki
|
|
169
|
-
system_prompt: str
|
|
170
|
-
|
|
171
|
-
history: List[Dict[str, str]] = field(default_factory=list)
|
|
172
|
-
summary: Optional[str] = None
|
|
173
|
-
context_blocks: List[str] = field(default_factory=list)
|
|
174
|
-
|
|
175
|
-
max_context_turns: int = 20
|
|
176
|
-
max_context_chars: int = 120_000
|
|
177
|
-
|
|
178
|
-
def reset(self) -> None:
|
|
179
|
-
self.history.clear()
|
|
180
|
-
self.summary = None
|
|
181
|
-
self.context_blocks.clear()
|
|
182
|
-
|
|
183
|
-
def add_context(self, title: str, payload: Union[str, Dict[str, Any], List[Any]]) -> None:
|
|
184
|
-
if isinstance(payload, str):
|
|
185
|
-
payload_str = payload
|
|
186
|
-
else:
|
|
187
|
-
payload_str = json.dumps(payload, ensure_ascii=False, indent=2)
|
|
188
|
-
|
|
189
|
-
self.context_blocks.append(f"[Context: {title}]\n{payload_str}".strip())
|
|
190
|
-
self._trim()
|
|
191
|
-
|
|
192
|
-
def _estimate_chars(self, msgs: List[Dict[str, str]]) -> int:
|
|
193
|
-
return sum(len(m.get("content", "")) for m in msgs)
|
|
194
|
-
|
|
195
|
-
def _build_system(self, extra_system: Optional[str] = None) -> str:
|
|
196
|
-
parts: List[str] = [self.system_prompt.strip()]
|
|
197
|
-
if extra_system:
|
|
198
|
-
parts.append(extra_system.strip())
|
|
199
|
-
if self.summary:
|
|
200
|
-
parts.append(f"[ConversationSummary]\n{self.summary}".strip())
|
|
201
|
-
if self.context_blocks:
|
|
202
|
-
parts.append("\n\n".join(self.context_blocks).strip())
|
|
203
|
-
return "\n\n".join(p for p in parts if p)
|
|
204
|
-
|
|
205
|
-
def _trim(self) -> None:
|
|
206
|
-
# Turn trim (keep last N turns => N*2 messages)
|
|
207
|
-
if self.max_context_turns > 0:
|
|
208
|
-
max_msgs = self.max_context_turns * 2
|
|
209
|
-
if len(self.history) > max_msgs:
|
|
210
|
-
self.history = self.history[-max_msgs:]
|
|
211
|
-
|
|
212
|
-
# Char trim: shrink history first, then context blocks if needed
|
|
213
|
-
def total_chars() -> int:
|
|
214
|
-
sys_len = len(self._build_system())
|
|
215
|
-
return sys_len + self._estimate_chars(self.history)
|
|
216
|
-
|
|
217
|
-
while total_chars() > self.max_context_chars and self.history:
|
|
218
|
-
self.history = self.history[1:]
|
|
219
|
-
|
|
220
|
-
while total_chars() > self.max_context_chars and self.context_blocks:
|
|
221
|
-
self.context_blocks = self.context_blocks[1:]
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
@dataclass
|
|
225
|
-
class TradingChatSession(_BaseChatSession):
|
|
226
|
-
"""
|
|
227
|
-
Commodity trading analytics chat session.
|
|
228
|
-
Optionally returns structured JSON for downstream automation.
|
|
229
|
-
"""
|
|
230
|
-
|
|
231
|
-
def chat(
|
|
232
|
-
self,
|
|
233
|
-
user_text: str,
|
|
234
|
-
*,
|
|
235
|
-
structured: bool = True,
|
|
236
|
-
max_tokens: int = 12000,
|
|
237
|
-
temperature: Optional[float] = None,
|
|
238
|
-
**kwargs: Any,
|
|
239
|
-
) -> Union[str, Dict[str, Any]]:
|
|
240
|
-
self._trim()
|
|
241
|
-
|
|
242
|
-
extra_system = None
|
|
243
|
-
if structured:
|
|
244
|
-
extra_system = (
|
|
245
|
-
"Respond ONLY as valid JSON with keys: "
|
|
246
|
-
"final_answer (string), assumptions (array of strings), data_needed (array of strings), "
|
|
247
|
-
"sql (string or null), python (string or null). "
|
|
248
|
-
"No markdown. No extra keys."
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
system = self._build_system(extra_system=extra_system)
|
|
252
|
-
|
|
253
|
-
assistant_text = self.loki.ask(
|
|
254
|
-
user_text,
|
|
255
|
-
system=system,
|
|
256
|
-
extra_messages=self.history, # NOTE: history must contain no system roles
|
|
257
|
-
max_tokens=max_tokens,
|
|
258
|
-
temperature=temperature,
|
|
259
|
-
**kwargs,
|
|
260
|
-
)
|
|
261
|
-
|
|
262
|
-
self.history.append({"role": "user", "content": user_text})
|
|
263
|
-
self.history.append({"role": "assistant", "content": assistant_text})
|
|
264
|
-
self._trim()
|
|
265
|
-
|
|
266
|
-
if structured:
|
|
267
|
-
parsed = _try_parse_json_object(assistant_text)
|
|
268
|
-
if parsed is not None:
|
|
269
|
-
return parsed
|
|
270
|
-
|
|
271
|
-
return assistant_text
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
@dataclass
|
|
275
|
-
class SqlChatSession(_BaseChatSession):
|
|
276
|
-
"""
|
|
277
|
-
SQL-only conversational session that generates Databricks SQL.
|
|
278
|
-
|
|
279
|
-
Uses a single system message with strict instructions to output SQL only.
|
|
280
|
-
"""
|
|
281
|
-
|
|
282
|
-
def generate_sql(
|
|
283
|
-
self,
|
|
284
|
-
request: str,
|
|
285
|
-
*,
|
|
286
|
-
max_tokens: int = 12000,
|
|
287
|
-
temperature: Optional[float] = None,
|
|
288
|
-
sql_only: bool = True,
|
|
289
|
-
**kwargs: Any,
|
|
290
|
-
) -> str:
|
|
291
|
-
self._trim()
|
|
292
|
-
|
|
293
|
-
extra_system = None
|
|
294
|
-
if sql_only:
|
|
295
|
-
extra_system = (
|
|
296
|
-
"Reminder: Output ONLY SQL. "
|
|
297
|
-
"If ambiguity exists, use SQL comments (-- TODO ...) and placeholders, but still output SQL only."
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
system = self._build_system(extra_system=extra_system)
|
|
301
|
-
|
|
302
|
-
sql = self.loki.ask(
|
|
303
|
-
request,
|
|
304
|
-
system=system,
|
|
305
|
-
extra_messages=self.history, # history must contain no system roles
|
|
306
|
-
max_tokens=max_tokens,
|
|
307
|
-
temperature=temperature,
|
|
308
|
-
**kwargs,
|
|
309
|
-
).strip()
|
|
310
|
-
|
|
311
|
-
self.history.append({"role": "user", "content": request})
|
|
312
|
-
self.history.append({"role": "assistant", "content": sql})
|
|
313
|
-
self._trim()
|
|
314
|
-
|
|
315
|
-
return _strip_sql_fences(sql)
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
def _strip_sql_fences(text: str) -> str:
|
|
319
|
-
t = text.strip()
|
|
320
|
-
if t.startswith("```"):
|
|
321
|
-
lines = t.splitlines()
|
|
322
|
-
lines = lines[1:] # drop ``` or ```sql
|
|
323
|
-
if lines and lines[-1].strip().startswith("```"):
|
|
324
|
-
lines = lines[:-1]
|
|
325
|
-
return "\n".join(lines).strip()
|
|
326
|
-
return t
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
def _strip_markdown_fences(text: str) -> str:
|
|
330
|
-
"""
|
|
331
|
-
Remove ```lang ... ``` fences if present.
|
|
332
|
-
Keeps inner content unchanged.
|
|
333
|
-
"""
|
|
334
|
-
t = text.strip()
|
|
335
|
-
if not t.startswith("```"):
|
|
336
|
-
return t
|
|
337
|
-
|
|
338
|
-
lines = t.splitlines()
|
|
339
|
-
if not lines:
|
|
340
|
-
return t
|
|
341
|
-
|
|
342
|
-
# Drop first line: ``` or ```json
|
|
343
|
-
lines = lines[1:]
|
|
344
|
-
|
|
345
|
-
# Drop last line if it's ```
|
|
346
|
-
if lines and lines[-1].strip().startswith("```"):
|
|
347
|
-
lines = lines[:-1]
|
|
348
|
-
|
|
349
|
-
return "\n".join(lines).strip()
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def _try_parse_json_object(text: str) -> Optional[Dict[str, Any]]:
|
|
353
|
-
t = _strip_markdown_fences(text).strip()
|
|
354
|
-
|
|
355
|
-
# Best effort extraction if there's extra junk around JSON
|
|
356
|
-
if not t.startswith("{"):
|
|
357
|
-
start = t.find("{")
|
|
358
|
-
end = t.rfind("}")
|
|
359
|
-
if start != -1 and end != -1 and end > start:
|
|
360
|
-
t = t[start : end + 1]
|
|
361
|
-
|
|
362
|
-
try:
|
|
363
|
-
obj = json.loads(t)
|
|
364
|
-
except Exception:
|
|
365
|
-
return None
|
|
366
|
-
|
|
367
|
-
if not isinstance(obj, dict):
|
|
368
|
-
return None
|
|
369
|
-
|
|
370
|
-
required = {"final_answer", "assumptions", "data_needed", "sql", "python"}
|
|
371
|
-
if not required.issubset(set(obj.keys())):
|
|
372
|
-
return None
|
|
373
|
-
|
|
374
|
-
return obj
|
|
File without changes
|
|
File without changes
|
|
File without changes
|