ygg 0.1.56__py3-none-any.whl → 0.1.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.56.dist-info → ygg-0.1.57.dist-info}/METADATA +1 -1
- {ygg-0.1.56.dist-info → ygg-0.1.57.dist-info}/RECORD +15 -17
- {ygg-0.1.56.dist-info → ygg-0.1.57.dist-info}/WHEEL +1 -1
- yggdrasil/databricks/ai/__init__.py +0 -1
- yggdrasil/databricks/ai/loki.py +36 -357
- yggdrasil/databricks/compute/execution_context.py +33 -54
- yggdrasil/databricks/workspaces/io.py +3 -0
- yggdrasil/databricks/workspaces/path.py +1 -3
- yggdrasil/databricks/workspaces/workspace.py +3 -12
- yggdrasil/pyutils/__init__.py +2 -0
- yggdrasil/pyutils/python_env.py +1 -1
- yggdrasil/version.py +1 -1
- yggdrasil/databricks/compute/exceptions.py +0 -14
- yggdrasil/exceptions.py +0 -7
- {ygg-0.1.56.dist-info → ygg-0.1.57.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.57.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.57.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,12 @@
|
|
|
1
|
-
ygg-0.1.
|
|
1
|
+
ygg-0.1.57.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
2
2
|
yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
|
|
3
|
-
yggdrasil/
|
|
4
|
-
yggdrasil/version.py,sha256=c0ITmemMU7anWgDBUD3t_BAhA3li2gt3XgswCbHv1oU,22
|
|
3
|
+
yggdrasil/version.py,sha256=mM67BdyYZ17u9xAi4WRzFQM2e6yfmX4MPd36R3L920M,22
|
|
5
4
|
yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
|
|
6
|
-
yggdrasil/databricks/ai/__init__.py,sha256=
|
|
7
|
-
yggdrasil/databricks/ai/loki.py,sha256=
|
|
5
|
+
yggdrasil/databricks/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
yggdrasil/databricks/ai/loki.py,sha256=1AhMOquMTsZZGYw5cGoXn-QQhdBRMXM9ZRPEUAv4Y3k,1216
|
|
8
7
|
yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
|
|
9
8
|
yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
|
|
10
|
-
yggdrasil/databricks/compute/
|
|
11
|
-
yggdrasil/databricks/compute/execution_context.py,sha256=mhcwSvKTxgcUHdb7huSEjCVU_feiXSGq0JLyLXldjQM,23952
|
|
9
|
+
yggdrasil/databricks/compute/execution_context.py,sha256=jIV6uru2NeX3O5lg-3KEqmXtLxxq45CFgkBQgQIIOHQ,23327
|
|
12
10
|
yggdrasil/databricks/compute/remote.py,sha256=yicEhyQypssRa2ByscO36s3cBkEgORFsRME9aaq91Pc,3045
|
|
13
11
|
yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
|
|
14
12
|
yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
|
|
@@ -20,11 +18,11 @@ yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCr
|
|
|
20
18
|
yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
|
|
21
19
|
yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
|
|
22
20
|
yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
|
|
23
|
-
yggdrasil/databricks/workspaces/io.py,sha256=
|
|
24
|
-
yggdrasil/databricks/workspaces/path.py,sha256=
|
|
21
|
+
yggdrasil/databricks/workspaces/io.py,sha256=hErGeSKJ9XpSUvlYAAckh_8IKQQmGeDOqbdl2rh9Fbs,33240
|
|
22
|
+
yggdrasil/databricks/workspaces/path.py,sha256=KkvLFHrps3UFr4ogYdESbJHEMfQBcWfWfXjlrv_7rTU,55180
|
|
25
23
|
yggdrasil/databricks/workspaces/path_kind.py,sha256=rhWe1ky7uPD0du0bZSv2S4fK4C5zWd7zAF3UeS2iiPU,283
|
|
26
24
|
yggdrasil/databricks/workspaces/volumes_path.py,sha256=s8CA33cG3jpMVJy5MILLlkEBcFg_qInDCF2jozLj1Fg,2431
|
|
27
|
-
yggdrasil/databricks/workspaces/workspace.py,sha256=
|
|
25
|
+
yggdrasil/databricks/workspaces/workspace.py,sha256=Tl1pYzTGNpjsPmHCJ62HoJvdzHGiZb43vQxrI3Sk7js,25233
|
|
28
26
|
yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
|
|
29
27
|
yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
|
|
30
28
|
yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
|
|
@@ -35,14 +33,14 @@ yggdrasil/libs/sparklib.py,sha256=FQ3W1iz2EIpQreorOiQuFt15rdhq2QhGEAWp8Zrbl9A,10
|
|
|
35
33
|
yggdrasil/libs/extensions/__init__.py,sha256=mcXW5Li3Cbprbs4Ci-b5A0Ju0wmLcfvEiFusTx6xNjU,117
|
|
36
34
|
yggdrasil/libs/extensions/polars_extensions.py,sha256=RTkGi8llhPJjX7x9egix7-yXWo2X24zIAPSKXV37SSA,12397
|
|
37
35
|
yggdrasil/libs/extensions/spark_extensions.py,sha256=E64n-3SFTDgMuXwWitX6vOYP9ln2lpGKb0htoBLEZgc,16745
|
|
38
|
-
yggdrasil/pyutils/__init__.py,sha256=
|
|
36
|
+
yggdrasil/pyutils/__init__.py,sha256=tl-LapAc71TV7RMgf2ftKwrzr8iiLOGHeJgA3RvO93w,293
|
|
39
37
|
yggdrasil/pyutils/callable_serde.py,sha256=1XckmFO-ThP0MedxgXwB71u9jWUuhM1btOzW9gJ8w9g,23117
|
|
40
38
|
yggdrasil/pyutils/equality.py,sha256=Xyf8D1dLUCm3spDEir8Zyj7O4US_fBJwEylJCfJ9slI,3080
|
|
41
39
|
yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWgng,3388
|
|
42
40
|
yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
|
|
43
41
|
yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
|
|
44
42
|
yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
|
|
45
|
-
yggdrasil/pyutils/python_env.py,sha256=
|
|
43
|
+
yggdrasil/pyutils/python_env.py,sha256=Gh5geFK9ABpyWEfyegGUfIJUoPxKwcH0pqLBiMrW9Rw,51103
|
|
46
44
|
yggdrasil/pyutils/retry.py,sha256=gXBtn1DdmIYIUmGKOUr8-SUT7MOu97LykN2YR4uocgc,11917
|
|
47
45
|
yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
|
|
48
46
|
yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
|
|
@@ -61,8 +59,8 @@ yggdrasil/types/cast/registry.py,sha256=OOqIfbIjPH-a3figvu-zTvEtUDTEWhe2xIl3cCA4
|
|
|
61
59
|
yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
|
|
62
60
|
yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
|
|
63
61
|
yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
|
|
64
|
-
ygg-0.1.
|
|
65
|
-
ygg-0.1.
|
|
66
|
-
ygg-0.1.
|
|
67
|
-
ygg-0.1.
|
|
68
|
-
ygg-0.1.
|
|
62
|
+
ygg-0.1.57.dist-info/METADATA,sha256=0VEcri5fh3BUYJxxhQ_icTZfhkry6KgEhbKQEEDrKJ4,18528
|
|
63
|
+
ygg-0.1.57.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
64
|
+
ygg-0.1.57.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
|
|
65
|
+
ygg-0.1.57.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
|
|
66
|
+
ygg-0.1.57.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .loki import *
|
yggdrasil/databricks/ai/loki.py
CHANGED
|
@@ -1,374 +1,53 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
Databricks Model Serving (OpenAI-compatible) wrapper with:
|
|
5
|
-
- Loki.ask(): stateless call
|
|
6
|
-
- TradingChatSession: stateful commodity trading analytics chat
|
|
7
|
-
- SqlChatSession: stateful Databricks SQL generator chat
|
|
8
|
-
|
|
9
|
-
Important constraint:
|
|
10
|
-
- Gemini models only support ONE system prompt.
|
|
11
|
-
=> We must NOT send multiple system messages.
|
|
12
|
-
=> We fold summary + context blocks into a single system string.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
from __future__ import annotations
|
|
16
|
-
|
|
17
|
-
import json
|
|
18
|
-
from dataclasses import dataclass, field
|
|
19
|
-
from typing import Any, Dict, List, Optional, Union
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from dataclasses import field, dataclass
|
|
20
3
|
|
|
21
4
|
from ..workspaces.workspace import WorkspaceService
|
|
22
5
|
|
|
23
6
|
try:
|
|
24
|
-
from openai import OpenAI as _OpenAI # noqa: F401
|
|
25
|
-
except ImportError:
|
|
26
|
-
_OpenAI = None # type: ignore
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def make_ai_client(api_key: str, base_url: str):
|
|
30
|
-
"""Late import so module can load even if openai isn't installed."""
|
|
31
7
|
from openai import OpenAI
|
|
32
|
-
return OpenAI(api_key=api_key, base_url=base_url)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
__all__ = ["Loki", "TradingChatSession", "SqlChatSession"]
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
DEFAULT_TRADING_SYSTEM_PROMPT = """You are Loki: a conversational commodity trading analytics copilot.
|
|
39
8
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
- Performance: push filters early, avoid exploding joins, avoid SELECT *.
|
|
9
|
+
def make_openai_client(
|
|
10
|
+
api_key: str,
|
|
11
|
+
base_url: str
|
|
12
|
+
):
|
|
13
|
+
return OpenAI(
|
|
14
|
+
api_key=api_key,
|
|
15
|
+
base_url=base_url
|
|
16
|
+
)
|
|
17
|
+
except ImportError:
|
|
18
|
+
class OpenAI:
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
def make_openai_client(
|
|
22
|
+
api_key: str,
|
|
23
|
+
base_url: str
|
|
24
|
+
):
|
|
25
|
+
from openai import OpenAI
|
|
26
|
+
|
|
27
|
+
return OpenAI(
|
|
28
|
+
api_key=api_key,
|
|
29
|
+
base_url=base_url
|
|
30
|
+
)
|
|
63
31
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
32
|
+
__all__ = [
|
|
33
|
+
"Loki"
|
|
34
|
+
]
|
|
67
35
|
|
|
68
36
|
|
|
69
37
|
@dataclass
|
|
70
38
|
class Loki(WorkspaceService):
|
|
71
|
-
"""
|
|
72
|
-
Loki wraps an OpenAI-compatible client pointing at Databricks Model Serving endpoints.
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
39
|
model: str = "databricks-gemini-2-5-flash"
|
|
76
|
-
|
|
40
|
+
|
|
41
|
+
_openai_client: Optional[OpenAI] = field(repr=False, hash=False, default=None)
|
|
77
42
|
|
|
78
43
|
@property
|
|
79
|
-
def
|
|
80
|
-
if self.
|
|
81
|
-
self.
|
|
82
|
-
return self.
|
|
44
|
+
def openai_client(self):
|
|
45
|
+
if self._openai_client is None:
|
|
46
|
+
self._openai_client = self.make_openai_client()
|
|
47
|
+
return self._openai_client
|
|
83
48
|
|
|
84
|
-
def
|
|
85
|
-
|
|
86
|
-
return make_ai_client(
|
|
49
|
+
def make_openai_client(self):
|
|
50
|
+
return make_openai_client(
|
|
87
51
|
api_key=self.workspace.current_token(),
|
|
88
|
-
base_url=
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
def ask(
|
|
92
|
-
self,
|
|
93
|
-
command: str,
|
|
94
|
-
*,
|
|
95
|
-
system: Optional[str] = None,
|
|
96
|
-
max_tokens: int = 5000,
|
|
97
|
-
temperature: Optional[float] = None,
|
|
98
|
-
extra_messages: Optional[List[Dict[str, str]]] = None,
|
|
99
|
-
**kwargs: Any,
|
|
100
|
-
) -> str:
|
|
101
|
-
"""
|
|
102
|
-
Stateless single call to the model.
|
|
103
|
-
|
|
104
|
-
NOTE (Gemini constraint):
|
|
105
|
-
- Provide at most ONE system prompt (i.e., a single system message).
|
|
106
|
-
- Do not pass additional messages with role="system".
|
|
107
|
-
"""
|
|
108
|
-
messages: List[Dict[str, str]] = []
|
|
109
|
-
if system:
|
|
110
|
-
messages.append({"role": "system", "content": system})
|
|
111
|
-
|
|
112
|
-
if extra_messages:
|
|
113
|
-
# IMPORTANT: caller must not include any "system" roles here for Gemini models
|
|
114
|
-
messages.extend(extra_messages)
|
|
115
|
-
|
|
116
|
-
messages.append({"role": "user", "content": command})
|
|
117
|
-
|
|
118
|
-
params: Dict[str, Any] = dict(
|
|
119
|
-
model=self.model,
|
|
120
|
-
messages=messages,
|
|
121
|
-
max_tokens=max_tokens,
|
|
122
|
-
**kwargs,
|
|
123
|
-
)
|
|
124
|
-
if temperature is not None:
|
|
125
|
-
params["temperature"] = temperature
|
|
126
|
-
|
|
127
|
-
resp = self.ai_client.chat.completions.create(**params)
|
|
128
|
-
return resp.choices[0].message.content or ""
|
|
129
|
-
|
|
130
|
-
def new_trading_chat(
|
|
131
|
-
self,
|
|
132
|
-
*,
|
|
133
|
-
system_prompt: str = DEFAULT_TRADING_SYSTEM_PROMPT,
|
|
134
|
-
max_context_turns: int = 20,
|
|
135
|
-
max_context_chars: int = 120_000,
|
|
136
|
-
) -> "TradingChatSession":
|
|
137
|
-
return TradingChatSession(
|
|
138
|
-
loki=self,
|
|
139
|
-
system_prompt=system_prompt,
|
|
140
|
-
max_context_turns=max_context_turns,
|
|
141
|
-
max_context_chars=max_context_chars,
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
def new_sql_chat(
|
|
145
|
-
self,
|
|
146
|
-
*,
|
|
147
|
-
system_prompt: str = DEFAULT_SQL_SYSTEM_PROMPT,
|
|
148
|
-
max_context_turns: int = 20,
|
|
149
|
-
max_context_chars: int = 120_000,
|
|
150
|
-
) -> "SqlChatSession":
|
|
151
|
-
return SqlChatSession(
|
|
152
|
-
loki=self,
|
|
153
|
-
system_prompt=system_prompt,
|
|
154
|
-
max_context_turns=max_context_turns,
|
|
155
|
-
max_context_chars=max_context_chars,
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
@dataclass
|
|
160
|
-
class _BaseChatSession:
|
|
161
|
-
"""
|
|
162
|
-
Stateful session that maintains history + injected context blocks.
|
|
163
|
-
|
|
164
|
-
Gemini constraint:
|
|
165
|
-
- We must fold ALL system content into one system string.
|
|
166
|
-
- Therefore summary/context_blocks are concatenated into the system prompt.
|
|
167
|
-
"""
|
|
168
|
-
loki: Loki
|
|
169
|
-
system_prompt: str
|
|
170
|
-
|
|
171
|
-
history: List[Dict[str, str]] = field(default_factory=list)
|
|
172
|
-
summary: Optional[str] = None
|
|
173
|
-
context_blocks: List[str] = field(default_factory=list)
|
|
174
|
-
|
|
175
|
-
max_context_turns: int = 20
|
|
176
|
-
max_context_chars: int = 120_000
|
|
177
|
-
|
|
178
|
-
def reset(self) -> None:
|
|
179
|
-
self.history.clear()
|
|
180
|
-
self.summary = None
|
|
181
|
-
self.context_blocks.clear()
|
|
182
|
-
|
|
183
|
-
def add_context(self, title: str, payload: Union[str, Dict[str, Any], List[Any]]) -> None:
|
|
184
|
-
if isinstance(payload, str):
|
|
185
|
-
payload_str = payload
|
|
186
|
-
else:
|
|
187
|
-
payload_str = json.dumps(payload, ensure_ascii=False, indent=2)
|
|
188
|
-
|
|
189
|
-
self.context_blocks.append(f"[Context: {title}]\n{payload_str}".strip())
|
|
190
|
-
self._trim()
|
|
191
|
-
|
|
192
|
-
def _estimate_chars(self, msgs: List[Dict[str, str]]) -> int:
|
|
193
|
-
return sum(len(m.get("content", "")) for m in msgs)
|
|
194
|
-
|
|
195
|
-
def _build_system(self, extra_system: Optional[str] = None) -> str:
|
|
196
|
-
parts: List[str] = [self.system_prompt.strip()]
|
|
197
|
-
if extra_system:
|
|
198
|
-
parts.append(extra_system.strip())
|
|
199
|
-
if self.summary:
|
|
200
|
-
parts.append(f"[ConversationSummary]\n{self.summary}".strip())
|
|
201
|
-
if self.context_blocks:
|
|
202
|
-
parts.append("\n\n".join(self.context_blocks).strip())
|
|
203
|
-
return "\n\n".join(p for p in parts if p)
|
|
204
|
-
|
|
205
|
-
def _trim(self) -> None:
|
|
206
|
-
# Turn trim (keep last N turns => N*2 messages)
|
|
207
|
-
if self.max_context_turns > 0:
|
|
208
|
-
max_msgs = self.max_context_turns * 2
|
|
209
|
-
if len(self.history) > max_msgs:
|
|
210
|
-
self.history = self.history[-max_msgs:]
|
|
211
|
-
|
|
212
|
-
# Char trim: shrink history first, then context blocks if needed
|
|
213
|
-
def total_chars() -> int:
|
|
214
|
-
sys_len = len(self._build_system())
|
|
215
|
-
return sys_len + self._estimate_chars(self.history)
|
|
216
|
-
|
|
217
|
-
while total_chars() > self.max_context_chars and self.history:
|
|
218
|
-
self.history = self.history[1:]
|
|
219
|
-
|
|
220
|
-
while total_chars() > self.max_context_chars and self.context_blocks:
|
|
221
|
-
self.context_blocks = self.context_blocks[1:]
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
@dataclass
|
|
225
|
-
class TradingChatSession(_BaseChatSession):
|
|
226
|
-
"""
|
|
227
|
-
Commodity trading analytics chat session.
|
|
228
|
-
Optionally returns structured JSON for downstream automation.
|
|
229
|
-
"""
|
|
230
|
-
|
|
231
|
-
def chat(
|
|
232
|
-
self,
|
|
233
|
-
user_text: str,
|
|
234
|
-
*,
|
|
235
|
-
structured: bool = True,
|
|
236
|
-
max_tokens: int = 12000,
|
|
237
|
-
temperature: Optional[float] = None,
|
|
238
|
-
**kwargs: Any,
|
|
239
|
-
) -> Union[str, Dict[str, Any]]:
|
|
240
|
-
self._trim()
|
|
241
|
-
|
|
242
|
-
extra_system = None
|
|
243
|
-
if structured:
|
|
244
|
-
extra_system = (
|
|
245
|
-
"Respond ONLY as valid JSON with keys: "
|
|
246
|
-
"final_answer (string), assumptions (array of strings), data_needed (array of strings), "
|
|
247
|
-
"sql (string or null), python (string or null). "
|
|
248
|
-
"No markdown. No extra keys."
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
system = self._build_system(extra_system=extra_system)
|
|
252
|
-
|
|
253
|
-
assistant_text = self.loki.ask(
|
|
254
|
-
user_text,
|
|
255
|
-
system=system,
|
|
256
|
-
extra_messages=self.history, # NOTE: history must contain no system roles
|
|
257
|
-
max_tokens=max_tokens,
|
|
258
|
-
temperature=temperature,
|
|
259
|
-
**kwargs,
|
|
52
|
+
base_url=self.workspace.host + "/serving-endpoints"
|
|
260
53
|
)
|
|
261
|
-
|
|
262
|
-
self.history.append({"role": "user", "content": user_text})
|
|
263
|
-
self.history.append({"role": "assistant", "content": assistant_text})
|
|
264
|
-
self._trim()
|
|
265
|
-
|
|
266
|
-
if structured:
|
|
267
|
-
parsed = _try_parse_json_object(assistant_text)
|
|
268
|
-
if parsed is not None:
|
|
269
|
-
return parsed
|
|
270
|
-
|
|
271
|
-
return assistant_text
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
@dataclass
|
|
275
|
-
class SqlChatSession(_BaseChatSession):
|
|
276
|
-
"""
|
|
277
|
-
SQL-only conversational session that generates Databricks SQL.
|
|
278
|
-
|
|
279
|
-
Uses a single system message with strict instructions to output SQL only.
|
|
280
|
-
"""
|
|
281
|
-
|
|
282
|
-
def generate_sql(
|
|
283
|
-
self,
|
|
284
|
-
request: str,
|
|
285
|
-
*,
|
|
286
|
-
max_tokens: int = 12000,
|
|
287
|
-
temperature: Optional[float] = None,
|
|
288
|
-
sql_only: bool = True,
|
|
289
|
-
**kwargs: Any,
|
|
290
|
-
) -> str:
|
|
291
|
-
self._trim()
|
|
292
|
-
|
|
293
|
-
extra_system = None
|
|
294
|
-
if sql_only:
|
|
295
|
-
extra_system = (
|
|
296
|
-
"Reminder: Output ONLY SQL. "
|
|
297
|
-
"If ambiguity exists, use SQL comments (-- TODO ...) and placeholders, but still output SQL only."
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
system = self._build_system(extra_system=extra_system)
|
|
301
|
-
|
|
302
|
-
sql = self.loki.ask(
|
|
303
|
-
request,
|
|
304
|
-
system=system,
|
|
305
|
-
extra_messages=self.history, # history must contain no system roles
|
|
306
|
-
max_tokens=max_tokens,
|
|
307
|
-
temperature=temperature,
|
|
308
|
-
**kwargs,
|
|
309
|
-
).strip()
|
|
310
|
-
|
|
311
|
-
self.history.append({"role": "user", "content": request})
|
|
312
|
-
self.history.append({"role": "assistant", "content": sql})
|
|
313
|
-
self._trim()
|
|
314
|
-
|
|
315
|
-
return _strip_sql_fences(sql)
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
def _strip_sql_fences(text: str) -> str:
|
|
319
|
-
t = text.strip()
|
|
320
|
-
if t.startswith("```"):
|
|
321
|
-
lines = t.splitlines()
|
|
322
|
-
lines = lines[1:] # drop ``` or ```sql
|
|
323
|
-
if lines and lines[-1].strip().startswith("```"):
|
|
324
|
-
lines = lines[:-1]
|
|
325
|
-
return "\n".join(lines).strip()
|
|
326
|
-
return t
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
def _strip_markdown_fences(text: str) -> str:
|
|
330
|
-
"""
|
|
331
|
-
Remove ```lang ... ``` fences if present.
|
|
332
|
-
Keeps inner content unchanged.
|
|
333
|
-
"""
|
|
334
|
-
t = text.strip()
|
|
335
|
-
if not t.startswith("```"):
|
|
336
|
-
return t
|
|
337
|
-
|
|
338
|
-
lines = t.splitlines()
|
|
339
|
-
if not lines:
|
|
340
|
-
return t
|
|
341
|
-
|
|
342
|
-
# Drop first line: ``` or ```json
|
|
343
|
-
lines = lines[1:]
|
|
344
|
-
|
|
345
|
-
# Drop last line if it's ```
|
|
346
|
-
if lines and lines[-1].strip().startswith("```"):
|
|
347
|
-
lines = lines[:-1]
|
|
348
|
-
|
|
349
|
-
return "\n".join(lines).strip()
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def _try_parse_json_object(text: str) -> Optional[Dict[str, Any]]:
|
|
353
|
-
t = _strip_markdown_fences(text).strip()
|
|
354
|
-
|
|
355
|
-
# Best effort extraction if there's extra junk around JSON
|
|
356
|
-
if not t.startswith("{"):
|
|
357
|
-
start = t.find("{")
|
|
358
|
-
end = t.rfind("}")
|
|
359
|
-
if start != -1 and end != -1 and end > start:
|
|
360
|
-
t = t[start : end + 1]
|
|
361
|
-
|
|
362
|
-
try:
|
|
363
|
-
obj = json.loads(t)
|
|
364
|
-
except Exception:
|
|
365
|
-
return None
|
|
366
|
-
|
|
367
|
-
if not isinstance(obj, dict):
|
|
368
|
-
return None
|
|
369
|
-
|
|
370
|
-
required = {"final_answer", "assumptions", "data_needed", "sql", "python"}
|
|
371
|
-
if not required.issubset(set(obj.keys())):
|
|
372
|
-
return None
|
|
373
|
-
|
|
374
|
-
return obj
|
|
@@ -16,7 +16,6 @@ from threading import Thread
|
|
|
16
16
|
from types import ModuleType
|
|
17
17
|
from typing import TYPE_CHECKING, Optional, Any, Callable, List, Dict, Union, Iterable, Tuple
|
|
18
18
|
|
|
19
|
-
from .exceptions import CommandAborted
|
|
20
19
|
from ...libs.databrickslib import databricks_sdk
|
|
21
20
|
from ...pyutils.exceptions import raise_parsed_traceback
|
|
22
21
|
from ...pyutils.expiring_dict import ExpiringDict
|
|
@@ -111,12 +110,16 @@ class ExecutionContext:
|
|
|
111
110
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
112
111
|
"""Exit the context manager and close the remote context if created."""
|
|
113
112
|
if not self._was_connected:
|
|
114
|
-
self.close(
|
|
113
|
+
self.close()
|
|
115
114
|
self.cluster.__exit__(exc_type, exc_val=exc_val, exc_tb=exc_tb)
|
|
116
115
|
|
|
117
116
|
def __del__(self):
|
|
118
117
|
"""Best-effort cleanup for the remote execution context."""
|
|
119
|
-
self.
|
|
118
|
+
if self.context_id:
|
|
119
|
+
try:
|
|
120
|
+
Thread(target=self.close).start()
|
|
121
|
+
except BaseException:
|
|
122
|
+
pass
|
|
120
123
|
|
|
121
124
|
@property
|
|
122
125
|
def remote_metadata(self) -> RemoteMetadata:
|
|
@@ -177,7 +180,7 @@ print(json.dumps(meta))"""
|
|
|
177
180
|
"""
|
|
178
181
|
return self.cluster.workspace.sdk()
|
|
179
182
|
|
|
180
|
-
def
|
|
183
|
+
def create_command(
|
|
181
184
|
self,
|
|
182
185
|
language: "Language",
|
|
183
186
|
) -> any:
|
|
@@ -194,17 +197,15 @@ print(json.dumps(meta))"""
|
|
|
194
197
|
self.cluster
|
|
195
198
|
)
|
|
196
199
|
|
|
197
|
-
client = self._workspace_client().command_execution
|
|
198
|
-
|
|
199
200
|
try:
|
|
200
|
-
created =
|
|
201
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
201
202
|
cluster_id=self.cluster.cluster_id,
|
|
202
203
|
language=language,
|
|
203
204
|
)
|
|
204
205
|
except:
|
|
205
206
|
self.cluster.ensure_running()
|
|
206
207
|
|
|
207
|
-
created =
|
|
208
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
208
209
|
cluster_id=self.cluster.cluster_id,
|
|
209
210
|
language=language,
|
|
210
211
|
)
|
|
@@ -216,38 +217,42 @@ print(json.dumps(meta))"""
|
|
|
216
217
|
|
|
217
218
|
created = getattr(created, "response", created)
|
|
218
219
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
return self
|
|
220
|
+
return created
|
|
222
221
|
|
|
223
222
|
def connect(
|
|
224
223
|
self,
|
|
225
|
-
language: Optional["Language"] = None
|
|
226
|
-
reset: bool = False
|
|
224
|
+
language: Optional["Language"] = None
|
|
227
225
|
) -> "ExecutionContext":
|
|
228
226
|
"""Create a remote command execution context if not already open.
|
|
229
227
|
|
|
230
228
|
Args:
|
|
231
229
|
language: Optional language override for the context.
|
|
232
|
-
reset: Reset existing if connected
|
|
233
230
|
|
|
234
231
|
Returns:
|
|
235
232
|
The connected ExecutionContext instance.
|
|
236
233
|
"""
|
|
237
234
|
if self.context_id is not None:
|
|
238
|
-
|
|
239
|
-
return self
|
|
235
|
+
return self
|
|
240
236
|
|
|
241
|
-
|
|
237
|
+
self.language = language or self.language
|
|
242
238
|
|
|
243
|
-
|
|
239
|
+
if self.language is None:
|
|
240
|
+
self.language = Language.PYTHON
|
|
244
241
|
|
|
245
|
-
|
|
246
|
-
language = Language.PYTHON
|
|
242
|
+
ctx = self.create_command(language=self.language)
|
|
247
243
|
|
|
248
|
-
|
|
244
|
+
context_id = ctx.id
|
|
245
|
+
if not context_id:
|
|
246
|
+
raise RuntimeError("Failed to create command execution context")
|
|
249
247
|
|
|
250
|
-
|
|
248
|
+
self.context_id = context_id
|
|
249
|
+
LOGGER.info(
|
|
250
|
+
"Opened execution context for %s",
|
|
251
|
+
self
|
|
252
|
+
)
|
|
253
|
+
return self
|
|
254
|
+
|
|
255
|
+
def close(self) -> None:
|
|
251
256
|
"""Destroy the remote command execution context if it exists.
|
|
252
257
|
|
|
253
258
|
Returns:
|
|
@@ -256,23 +261,12 @@ print(json.dumps(meta))"""
|
|
|
256
261
|
if not self.context_id:
|
|
257
262
|
return
|
|
258
263
|
|
|
259
|
-
client = self._workspace_client()
|
|
260
|
-
|
|
261
264
|
try:
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
else:
|
|
268
|
-
Thread(
|
|
269
|
-
target=client.command_execution.destroy,
|
|
270
|
-
kwargs={
|
|
271
|
-
"cluster_id": self.cluster.cluster_id,
|
|
272
|
-
"context_id": self.context_id,
|
|
273
|
-
}
|
|
274
|
-
).start()
|
|
275
|
-
except BaseException:
|
|
265
|
+
self._workspace_client().command_execution.destroy(
|
|
266
|
+
cluster_id=self.cluster.cluster_id,
|
|
267
|
+
context_id=self.context_id,
|
|
268
|
+
)
|
|
269
|
+
except Exception:
|
|
276
270
|
# non-fatal: context cleanup best-effort
|
|
277
271
|
pass
|
|
278
272
|
finally:
|
|
@@ -471,18 +465,7 @@ print(json.dumps(meta))"""
|
|
|
471
465
|
)
|
|
472
466
|
|
|
473
467
|
try:
|
|
474
|
-
return self._decode_result(
|
|
475
|
-
result,
|
|
476
|
-
result_tag=result_tag,
|
|
477
|
-
print_stdout=print_stdout
|
|
478
|
-
)
|
|
479
|
-
except CommandAborted:
|
|
480
|
-
return self.connect(language=self.language, reset=True).execute_command(
|
|
481
|
-
command=command,
|
|
482
|
-
timeout=timeout,
|
|
483
|
-
result_tag=result_tag,
|
|
484
|
-
print_stdout=print_stdout
|
|
485
|
-
)
|
|
468
|
+
return self._decode_result(result, result_tag=result_tag, print_stdout=print_stdout)
|
|
486
469
|
except ModuleNotFoundError as remote_module_error:
|
|
487
470
|
_MOD_NOT_FOUND_RE = re.compile(r"No module named ['\"]([^'\"]+)['\"]")
|
|
488
471
|
module_name = _MOD_NOT_FOUND_RE.search(str(remote_module_error))
|
|
@@ -677,9 +660,6 @@ with zipfile.ZipFile(buf, "r") as zf:
|
|
|
677
660
|
if res.result_type == ResultType.ERROR:
|
|
678
661
|
message = res.cause or "Command execution failed"
|
|
679
662
|
|
|
680
|
-
if "client terminated the session" in message:
|
|
681
|
-
raise CommandAborted(message)
|
|
682
|
-
|
|
683
663
|
if self.language == Language.PYTHON:
|
|
684
664
|
raise_parsed_traceback(message)
|
|
685
665
|
|
|
@@ -688,7 +668,6 @@ with zipfile.ZipFile(buf, "r") as zf:
|
|
|
688
668
|
or getattr(res, "stack_trace", None)
|
|
689
669
|
or getattr(res, "traceback", None)
|
|
690
670
|
)
|
|
691
|
-
|
|
692
671
|
if remote_tb:
|
|
693
672
|
message = f"{message}\n{remote_tb}"
|
|
694
673
|
|
|
@@ -20,6 +20,7 @@ from .path_kind import DatabricksPathKind
|
|
|
20
20
|
from ...libs.databrickslib import databricks
|
|
21
21
|
from ...libs.pandaslib import PandasDataFrame
|
|
22
22
|
from ...libs.polarslib import polars, PolarsDataFrame
|
|
23
|
+
from ...pyutils import retry
|
|
23
24
|
from ...types.cast.registry import convert
|
|
24
25
|
from ...types.file_format import ExcelFileFormat
|
|
25
26
|
|
|
@@ -30,6 +31,7 @@ if databricks is not None:
|
|
|
30
31
|
ResourceDoesNotExist,
|
|
31
32
|
BadRequest,
|
|
32
33
|
)
|
|
34
|
+
from databricks.sdk.errors import InternalError
|
|
33
35
|
|
|
34
36
|
if TYPE_CHECKING:
|
|
35
37
|
from .path import DatabricksPath
|
|
@@ -1036,6 +1038,7 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1036
1038
|
end = start + length
|
|
1037
1039
|
return data[start:end]
|
|
1038
1040
|
|
|
1041
|
+
@retry(exceptions=(InternalError,))
|
|
1039
1042
|
def write_all_bytes(self, data: bytes):
|
|
1040
1043
|
"""Write bytes to a volume file.
|
|
1041
1044
|
|
|
@@ -30,7 +30,7 @@ from ...types.cast.registry import convert, register_converter
|
|
|
30
30
|
from ...types.file_format import ExcelFileFormat
|
|
31
31
|
|
|
32
32
|
if databricks is not None:
|
|
33
|
-
from databricks.sdk.service.catalog import VolumeType, VolumeInfo
|
|
33
|
+
from databricks.sdk.service.catalog import VolumeType, PathOperation, VolumeInfo
|
|
34
34
|
from databricks.sdk.service.workspace import ObjectType
|
|
35
35
|
from databricks.sdk.errors.platform import (
|
|
36
36
|
NotFound,
|
|
@@ -1236,8 +1236,6 @@ class DatabricksPath:
|
|
|
1236
1236
|
self,
|
|
1237
1237
|
operation: Optional["PathOperation"] = None
|
|
1238
1238
|
):
|
|
1239
|
-
from databricks.sdk.service.catalog import PathOperation
|
|
1240
|
-
|
|
1241
1239
|
if self.kind != DatabricksPathKind.VOLUME:
|
|
1242
1240
|
raise ValueError(f"Cannot generate temporary credentials for {repr(self)}")
|
|
1243
1241
|
|
|
@@ -652,9 +652,9 @@ class Workspace:
|
|
|
652
652
|
"""Return a Cluster helper bound to this workspace.
|
|
653
653
|
|
|
654
654
|
Args:
|
|
655
|
-
workspace: Optional workspace override.
|
|
656
655
|
cluster_id: Optional cluster id.
|
|
657
656
|
cluster_name: Optional cluster name.
|
|
657
|
+
**kwargs: Additional Cluster parameters.
|
|
658
658
|
|
|
659
659
|
Returns:
|
|
660
660
|
A Cluster instance.
|
|
@@ -662,7 +662,7 @@ class Workspace:
|
|
|
662
662
|
from ..compute.cluster import Cluster
|
|
663
663
|
|
|
664
664
|
return Cluster(
|
|
665
|
-
workspace=self
|
|
665
|
+
workspace=self,
|
|
666
666
|
cluster_id=cluster_id,
|
|
667
667
|
cluster_name=cluster_name,
|
|
668
668
|
)
|
|
@@ -671,19 +671,10 @@ class Workspace:
|
|
|
671
671
|
self,
|
|
672
672
|
workspace: Optional["Workspace"] = None,
|
|
673
673
|
):
|
|
674
|
-
"""
|
|
675
|
-
Return a Cluster helper bound to this workspace.
|
|
676
|
-
|
|
677
|
-
Args:
|
|
678
|
-
workspace: Optional workspace override.
|
|
679
|
-
|
|
680
|
-
Returns:
|
|
681
|
-
A Loki AI instance.
|
|
682
|
-
"""
|
|
683
674
|
from ..ai.loki import Loki
|
|
684
675
|
|
|
685
676
|
return Loki(
|
|
686
|
-
workspace=self
|
|
677
|
+
workspace=self,
|
|
687
678
|
)
|
|
688
679
|
|
|
689
680
|
# ---------------------------------------------------------------------------
|
yggdrasil/pyutils/__init__.py
CHANGED
yggdrasil/pyutils/python_env.py
CHANGED
|
@@ -20,7 +20,7 @@ from dataclasses import dataclass, field
|
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union, List, Tuple
|
|
22
22
|
|
|
23
|
-
from .modules import PipIndexSettings
|
|
23
|
+
from yggdrasil.pyutils.modules import PipIndexSettings
|
|
24
24
|
|
|
25
25
|
log = logging.getLogger(__name__)
|
|
26
26
|
|
yggdrasil/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.57"
|
yggdrasil/exceptions.py
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|