ygg 0.1.56__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
  2. ygg-0.1.60.dist-info/RECORD +74 -0
  3. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/WHEEL +1 -1
  4. yggdrasil/ai/__init__.py +2 -0
  5. yggdrasil/ai/session.py +89 -0
  6. yggdrasil/ai/sql_session.py +310 -0
  7. yggdrasil/databricks/__init__.py +0 -3
  8. yggdrasil/databricks/compute/cluster.py +68 -113
  9. yggdrasil/databricks/compute/command_execution.py +674 -0
  10. yggdrasil/databricks/compute/exceptions.py +7 -2
  11. yggdrasil/databricks/compute/execution_context.py +465 -277
  12. yggdrasil/databricks/compute/remote.py +4 -14
  13. yggdrasil/databricks/exceptions.py +10 -0
  14. yggdrasil/databricks/sql/__init__.py +0 -4
  15. yggdrasil/databricks/sql/engine.py +161 -173
  16. yggdrasil/databricks/sql/exceptions.py +9 -1
  17. yggdrasil/databricks/sql/statement_result.py +108 -120
  18. yggdrasil/databricks/sql/warehouse.py +331 -92
  19. yggdrasil/databricks/workspaces/io.py +92 -9
  20. yggdrasil/databricks/workspaces/path.py +120 -74
  21. yggdrasil/databricks/workspaces/workspace.py +212 -68
  22. yggdrasil/libs/databrickslib.py +23 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -0
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/mimetypes.py +0 -0
  30. yggdrasil/pyutils/python_env.py +13 -12
  31. yggdrasil/pyutils/waiting_config.py +171 -0
  32. yggdrasil/types/cast/arrow_cast.py +3 -0
  33. yggdrasil/types/cast/pandas_cast.py +157 -169
  34. yggdrasil/types/cast/polars_cast.py +11 -43
  35. yggdrasil/types/dummy_class.py +81 -0
  36. yggdrasil/version.py +1 -1
  37. ygg-0.1.56.dist-info/RECORD +0 -68
  38. yggdrasil/databricks/ai/__init__.py +0 -1
  39. yggdrasil/databricks/ai/loki.py +0 -374
  40. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
  41. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
  42. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
@@ -1,374 +0,0 @@
1
- """
2
- loki.py
3
-
4
- Databricks Model Serving (OpenAI-compatible) wrapper with:
5
- - Loki.ask(): stateless call
6
- - TradingChatSession: stateful commodity trading analytics chat
7
- - SqlChatSession: stateful Databricks SQL generator chat
8
-
9
- Important constraint:
10
- - Gemini models only support ONE system prompt.
11
- => We must NOT send multiple system messages.
12
- => We fold summary + context blocks into a single system string.
13
- """
14
-
15
- from __future__ import annotations
16
-
17
- import json
18
- from dataclasses import dataclass, field
19
- from typing import Any, Dict, List, Optional, Union
20
-
21
- from ..workspaces.workspace import WorkspaceService
22
-
23
- try:
24
- from openai import OpenAI as _OpenAI # noqa: F401
25
- except ImportError:
26
- _OpenAI = None # type: ignore
27
-
28
-
29
- def make_ai_client(api_key: str, base_url: str):
30
- """Late import so module can load even if openai isn't installed."""
31
- from openai import OpenAI
32
- return OpenAI(api_key=api_key, base_url=base_url)
33
-
34
-
35
- __all__ = ["Loki", "TradingChatSession", "SqlChatSession"]
36
-
37
-
38
- DEFAULT_TRADING_SYSTEM_PROMPT = """You are Loki: a conversational commodity trading analytics copilot.
39
-
40
- Scope:
41
- - Commodity trading analytics: curves, forwards, spreads, basis, hedging, risk, PnL explain, inventory, scheduling.
42
- - Databricks-first workflows: Spark/Delta/Unity Catalog, Databricks SQL, performant Python.
43
-
44
- Rules:
45
- - Do NOT invent prices, positions, PnL, risk, or market facts not provided.
46
- - State assumptions explicitly (units, time conventions, delivery months, calendars).
47
- - Prefer actionable output (SQL + efficient Python). Avoid slow patterns.
48
- - If data is missing, list exactly what you need and proceed with a reasonable template.
49
-
50
- Style:
51
- - Be concise, practical, performance-focused.
52
- """
53
-
54
- DEFAULT_SQL_SYSTEM_PROMPT = """You are LokiSQL: a Databricks SQL generator for commodity trading analytics.
55
-
56
- Hard rules:
57
- - Output ONLY SQL unless the user explicitly asks for explanation.
58
- - Use Databricks SQL / Spark SQL dialect.
59
- - Prefer readable CTEs, explicit column lists, deterministic joins.
60
- - Do NOT invent table/column names. If missing, use placeholders like <table>, <col>.
61
- - If ambiguous, output best-effort SQL template with SQL comments (-- TODO ...) and placeholders.
62
- - Performance: push filters early, avoid exploding joins, avoid SELECT *.
63
-
64
- Default assumptions:
65
- - Dates UTC unless specified.
66
- """
67
-
68
-
69
- @dataclass
70
- class Loki(WorkspaceService):
71
- """
72
- Loki wraps an OpenAI-compatible client pointing at Databricks Model Serving endpoints.
73
- """
74
-
75
- model: str = "databricks-gemini-2-5-flash"
76
- _ai_client: Optional[Any] = field(repr=False, hash=False, default=None)
77
-
78
- @property
79
- def ai_client(self):
80
- if self._ai_client is None:
81
- self._ai_client = self.make_aiclient()
82
- return self._ai_client
83
-
84
- def make_aiclient(self):
85
- host = self.workspace.host.rstrip("/")
86
- return make_ai_client(
87
- api_key=self.workspace.current_token(),
88
- base_url=f"{host}/serving-endpoints",
89
- )
90
-
91
- def ask(
92
- self,
93
- command: str,
94
- *,
95
- system: Optional[str] = None,
96
- max_tokens: int = 5000,
97
- temperature: Optional[float] = None,
98
- extra_messages: Optional[List[Dict[str, str]]] = None,
99
- **kwargs: Any,
100
- ) -> str:
101
- """
102
- Stateless single call to the model.
103
-
104
- NOTE (Gemini constraint):
105
- - Provide at most ONE system prompt (i.e., a single system message).
106
- - Do not pass additional messages with role="system".
107
- """
108
- messages: List[Dict[str, str]] = []
109
- if system:
110
- messages.append({"role": "system", "content": system})
111
-
112
- if extra_messages:
113
- # IMPORTANT: caller must not include any "system" roles here for Gemini models
114
- messages.extend(extra_messages)
115
-
116
- messages.append({"role": "user", "content": command})
117
-
118
- params: Dict[str, Any] = dict(
119
- model=self.model,
120
- messages=messages,
121
- max_tokens=max_tokens,
122
- **kwargs,
123
- )
124
- if temperature is not None:
125
- params["temperature"] = temperature
126
-
127
- resp = self.ai_client.chat.completions.create(**params)
128
- return resp.choices[0].message.content or ""
129
-
130
- def new_trading_chat(
131
- self,
132
- *,
133
- system_prompt: str = DEFAULT_TRADING_SYSTEM_PROMPT,
134
- max_context_turns: int = 20,
135
- max_context_chars: int = 120_000,
136
- ) -> "TradingChatSession":
137
- return TradingChatSession(
138
- loki=self,
139
- system_prompt=system_prompt,
140
- max_context_turns=max_context_turns,
141
- max_context_chars=max_context_chars,
142
- )
143
-
144
- def new_sql_chat(
145
- self,
146
- *,
147
- system_prompt: str = DEFAULT_SQL_SYSTEM_PROMPT,
148
- max_context_turns: int = 20,
149
- max_context_chars: int = 120_000,
150
- ) -> "SqlChatSession":
151
- return SqlChatSession(
152
- loki=self,
153
- system_prompt=system_prompt,
154
- max_context_turns=max_context_turns,
155
- max_context_chars=max_context_chars,
156
- )
157
-
158
-
159
- @dataclass
160
- class _BaseChatSession:
161
- """
162
- Stateful session that maintains history + injected context blocks.
163
-
164
- Gemini constraint:
165
- - We must fold ALL system content into one system string.
166
- - Therefore summary/context_blocks are concatenated into the system prompt.
167
- """
168
- loki: Loki
169
- system_prompt: str
170
-
171
- history: List[Dict[str, str]] = field(default_factory=list)
172
- summary: Optional[str] = None
173
- context_blocks: List[str] = field(default_factory=list)
174
-
175
- max_context_turns: int = 20
176
- max_context_chars: int = 120_000
177
-
178
- def reset(self) -> None:
179
- self.history.clear()
180
- self.summary = None
181
- self.context_blocks.clear()
182
-
183
- def add_context(self, title: str, payload: Union[str, Dict[str, Any], List[Any]]) -> None:
184
- if isinstance(payload, str):
185
- payload_str = payload
186
- else:
187
- payload_str = json.dumps(payload, ensure_ascii=False, indent=2)
188
-
189
- self.context_blocks.append(f"[Context: {title}]\n{payload_str}".strip())
190
- self._trim()
191
-
192
- def _estimate_chars(self, msgs: List[Dict[str, str]]) -> int:
193
- return sum(len(m.get("content", "")) for m in msgs)
194
-
195
- def _build_system(self, extra_system: Optional[str] = None) -> str:
196
- parts: List[str] = [self.system_prompt.strip()]
197
- if extra_system:
198
- parts.append(extra_system.strip())
199
- if self.summary:
200
- parts.append(f"[ConversationSummary]\n{self.summary}".strip())
201
- if self.context_blocks:
202
- parts.append("\n\n".join(self.context_blocks).strip())
203
- return "\n\n".join(p for p in parts if p)
204
-
205
- def _trim(self) -> None:
206
- # Turn trim (keep last N turns => N*2 messages)
207
- if self.max_context_turns > 0:
208
- max_msgs = self.max_context_turns * 2
209
- if len(self.history) > max_msgs:
210
- self.history = self.history[-max_msgs:]
211
-
212
- # Char trim: shrink history first, then context blocks if needed
213
- def total_chars() -> int:
214
- sys_len = len(self._build_system())
215
- return sys_len + self._estimate_chars(self.history)
216
-
217
- while total_chars() > self.max_context_chars and self.history:
218
- self.history = self.history[1:]
219
-
220
- while total_chars() > self.max_context_chars and self.context_blocks:
221
- self.context_blocks = self.context_blocks[1:]
222
-
223
-
224
- @dataclass
225
- class TradingChatSession(_BaseChatSession):
226
- """
227
- Commodity trading analytics chat session.
228
- Optionally returns structured JSON for downstream automation.
229
- """
230
-
231
- def chat(
232
- self,
233
- user_text: str,
234
- *,
235
- structured: bool = True,
236
- max_tokens: int = 12000,
237
- temperature: Optional[float] = None,
238
- **kwargs: Any,
239
- ) -> Union[str, Dict[str, Any]]:
240
- self._trim()
241
-
242
- extra_system = None
243
- if structured:
244
- extra_system = (
245
- "Respond ONLY as valid JSON with keys: "
246
- "final_answer (string), assumptions (array of strings), data_needed (array of strings), "
247
- "sql (string or null), python (string or null). "
248
- "No markdown. No extra keys."
249
- )
250
-
251
- system = self._build_system(extra_system=extra_system)
252
-
253
- assistant_text = self.loki.ask(
254
- user_text,
255
- system=system,
256
- extra_messages=self.history, # NOTE: history must contain no system roles
257
- max_tokens=max_tokens,
258
- temperature=temperature,
259
- **kwargs,
260
- )
261
-
262
- self.history.append({"role": "user", "content": user_text})
263
- self.history.append({"role": "assistant", "content": assistant_text})
264
- self._trim()
265
-
266
- if structured:
267
- parsed = _try_parse_json_object(assistant_text)
268
- if parsed is not None:
269
- return parsed
270
-
271
- return assistant_text
272
-
273
-
274
- @dataclass
275
- class SqlChatSession(_BaseChatSession):
276
- """
277
- SQL-only conversational session that generates Databricks SQL.
278
-
279
- Uses a single system message with strict instructions to output SQL only.
280
- """
281
-
282
- def generate_sql(
283
- self,
284
- request: str,
285
- *,
286
- max_tokens: int = 12000,
287
- temperature: Optional[float] = None,
288
- sql_only: bool = True,
289
- **kwargs: Any,
290
- ) -> str:
291
- self._trim()
292
-
293
- extra_system = None
294
- if sql_only:
295
- extra_system = (
296
- "Reminder: Output ONLY SQL. "
297
- "If ambiguity exists, use SQL comments (-- TODO ...) and placeholders, but still output SQL only."
298
- )
299
-
300
- system = self._build_system(extra_system=extra_system)
301
-
302
- sql = self.loki.ask(
303
- request,
304
- system=system,
305
- extra_messages=self.history, # history must contain no system roles
306
- max_tokens=max_tokens,
307
- temperature=temperature,
308
- **kwargs,
309
- ).strip()
310
-
311
- self.history.append({"role": "user", "content": request})
312
- self.history.append({"role": "assistant", "content": sql})
313
- self._trim()
314
-
315
- return _strip_sql_fences(sql)
316
-
317
-
318
- def _strip_sql_fences(text: str) -> str:
319
- t = text.strip()
320
- if t.startswith("```"):
321
- lines = t.splitlines()
322
- lines = lines[1:] # drop ``` or ```sql
323
- if lines and lines[-1].strip().startswith("```"):
324
- lines = lines[:-1]
325
- return "\n".join(lines).strip()
326
- return t
327
-
328
-
329
- def _strip_markdown_fences(text: str) -> str:
330
- """
331
- Remove ```lang ... ``` fences if present.
332
- Keeps inner content unchanged.
333
- """
334
- t = text.strip()
335
- if not t.startswith("```"):
336
- return t
337
-
338
- lines = t.splitlines()
339
- if not lines:
340
- return t
341
-
342
- # Drop first line: ``` or ```json
343
- lines = lines[1:]
344
-
345
- # Drop last line if it's ```
346
- if lines and lines[-1].strip().startswith("```"):
347
- lines = lines[:-1]
348
-
349
- return "\n".join(lines).strip()
350
-
351
-
352
- def _try_parse_json_object(text: str) -> Optional[Dict[str, Any]]:
353
- t = _strip_markdown_fences(text).strip()
354
-
355
- # Best effort extraction if there's extra junk around JSON
356
- if not t.startswith("{"):
357
- start = t.find("{")
358
- end = t.rfind("}")
359
- if start != -1 and end != -1 and end > start:
360
- t = t[start : end + 1]
361
-
362
- try:
363
- obj = json.loads(t)
364
- except Exception:
365
- return None
366
-
367
- if not isinstance(obj, dict):
368
- return None
369
-
370
- required = {"final_answer", "assumptions", "data_needed", "sql", "python"}
371
- if not required.issubset(set(obj.keys())):
372
- return None
373
-
374
- return obj