sunholo 0.116.2__py3-none-any.whl → 0.118.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/message_data.py +1 -1
- sunholo/chunker/pdfs.py +1 -1
- sunholo/chunker/publish.py +1 -1
- sunholo/cli/cli_init.py +1 -1
- sunholo/genai/__init__.py +2 -1
- sunholo/genai/genaiv2.py +542 -0
- sunholo/{types.py → langchain_types.py} +5 -5
- sunholo/langfuse/evals.py +2 -1
- sunholo/mcp/cli.py +0 -2
- sunholo/templates/agent/__init__.py +0 -0
- sunholo/templates/agent/agent_service.py +157 -0
- sunholo/templates/agent/app.py +16 -0
- sunholo/templates/agent/my_log.py +3 -0
- sunholo/templates/agent/tools/__init__.py +0 -0
- sunholo/templates/agent/tools/your_agent.py +78 -0
- sunholo/templates/agent/vac_service.py +73 -0
- sunholo/templates/project/__init__.py +0 -0
- sunholo/templates/project/app.py +17 -0
- sunholo/templates/project/my_log.py +3 -0
- sunholo/templates/project/vac_service.py +71 -0
- sunholo/templates/system_services/__init__.py +0 -0
- sunholo/templates/system_services/app.py +49 -0
- sunholo/templates/system_services/my_log.py +3 -0
- sunholo/utils/big_context.py +11 -4
- {sunholo-0.116.2.dist-info → sunholo-0.118.1.dist-info}/METADATA +15 -7
- {sunholo-0.116.2.dist-info → sunholo-0.118.1.dist-info}/RECORD +30 -15
- {sunholo-0.116.2.dist-info → sunholo-0.118.1.dist-info}/WHEEL +1 -1
- {sunholo-0.116.2.dist-info → sunholo-0.118.1.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.116.2.dist-info → sunholo-0.118.1.dist-info}/entry_points.txt +0 -0
- {sunholo-0.116.2.dist-info → sunholo-0.118.1.dist-info}/top_level.txt +0 -0
sunholo/chunker/message_data.py
CHANGED
sunholo/chunker/pdfs.py
CHANGED
sunholo/chunker/publish.py
CHANGED
|
@@ -2,7 +2,7 @@ from ..custom_logging import log
|
|
|
2
2
|
from ..pubsub import PubSubManager
|
|
3
3
|
from ..utils.parsers import contains_url, extract_urls
|
|
4
4
|
from ..utils.gcp_project import get_gcp_project
|
|
5
|
-
from ..
|
|
5
|
+
from ..langchain_types import Document
|
|
6
6
|
|
|
7
7
|
def publish_if_urls(the_content, vector_name):
|
|
8
8
|
"""
|
sunholo/cli/cli_init.py
CHANGED
|
@@ -200,7 +200,7 @@ def write_vac_config(project_dir: str, service_name: str):
|
|
|
200
200
|
|
|
201
201
|
# Write the YAML configuration to the file
|
|
202
202
|
with open(vac_config_path, 'w') as file:
|
|
203
|
-
yaml.dump(vac_config_content, file
|
|
203
|
+
yaml.dump(vac_config_content, file)
|
|
204
204
|
|
|
205
205
|
print(f"{vac_config_path} written successfully with service name '{service_name}'.")
|
|
206
206
|
|
sunholo/genai/__init__.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from .process_funcs_cls import GenAIFunctionProcessor
|
|
2
2
|
from .safety import genai_safety
|
|
3
3
|
from .init import init_genai
|
|
4
|
-
from .file_handling import download_gcs_upload_genai, construct_file_content
|
|
4
|
+
from .file_handling import download_gcs_upload_genai, construct_file_content
|
|
5
|
+
from .genaiv2 import GoogleAI, GoogleAIConfig
|
sunholo/genai/genaiv2.py
ADDED
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
from typing import Optional, List, Union, Dict, Any, TypedDict, TYPE_CHECKING, Generator
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import json
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
import time
|
|
7
|
+
try:
|
|
8
|
+
from google import genai
|
|
9
|
+
from google.genai import types
|
|
10
|
+
except ImportError:
|
|
11
|
+
genai = None
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import sounddevice as sd
|
|
15
|
+
except ImportError:
|
|
16
|
+
sd = None
|
|
17
|
+
except OSError:
|
|
18
|
+
sd = None
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
import numpy as np
|
|
22
|
+
except ImportError:
|
|
23
|
+
np = None
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
import cv2 as cv2
|
|
27
|
+
except ImportError:
|
|
28
|
+
cv2 = None
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from google import genai
|
|
32
|
+
from google.genai import types
|
|
33
|
+
from google.genai.types import Tool, GenerateContentConfig, EmbedContentConfig
|
|
34
|
+
else:
|
|
35
|
+
genai = None
|
|
36
|
+
|
|
37
|
+
class GoogleAIConfig(BaseModel):
|
|
38
|
+
"""Configuration class for GoogleAI client initialization.
|
|
39
|
+
See https://ai.google.dev/gemini-api/docs/models/gemini-v2
|
|
40
|
+
"""
|
|
41
|
+
api_key: Optional[str] = None
|
|
42
|
+
project_id: Optional[str] = None
|
|
43
|
+
location: str = "us-central1"
|
|
44
|
+
use_vertex: bool = False
|
|
45
|
+
|
|
46
|
+
class GoogleAI:
|
|
47
|
+
"""A wrapper class for Google's v2 Generative AI APIs.
|
|
48
|
+
See https://ai.google.dev/gemini-api/docs/models/gemini-v2
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, config: GoogleAIConfig):
|
|
52
|
+
"""Initialize the GoogleAI client.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
config (GoogleAIConfig): Configuration for client initialization
|
|
56
|
+
"""
|
|
57
|
+
if genai is None:
|
|
58
|
+
raise ImportError("GoogleAI requires google-genai to be installed, try sunholo[gcp]")
|
|
59
|
+
if config.use_vertex:
|
|
60
|
+
if not config.project_id:
|
|
61
|
+
raise ValueError("project_id is required for Vertex AI")
|
|
62
|
+
self.client = genai.Client(
|
|
63
|
+
vertexai=True,
|
|
64
|
+
project=config.project_id,
|
|
65
|
+
location=config.location
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
if not config.api_key:
|
|
69
|
+
raise ValueError("api_key is required for Google AI API")
|
|
70
|
+
self.client = genai.Client(api_key=config.api_key)
|
|
71
|
+
|
|
72
|
+
self.default_model = "gemini-2.0-flash-exp"
|
|
73
|
+
|
|
74
|
+
def google_search_tool(self) -> "types.Tool":
|
|
75
|
+
from google.genai.types import Tool, GoogleSearch
|
|
76
|
+
return Tool(
|
|
77
|
+
google_search = GoogleSearch()
|
|
78
|
+
)
|
|
79
|
+
def generate_text(
|
|
80
|
+
self,
|
|
81
|
+
prompt: str,
|
|
82
|
+
model: Optional[str] = None,
|
|
83
|
+
temperature: float = 0.7,
|
|
84
|
+
max_output_tokens: int = 1024,
|
|
85
|
+
top_p: float = 0.95,
|
|
86
|
+
top_k: int = 20,
|
|
87
|
+
stop_sequences: Optional[List[str]] = None,
|
|
88
|
+
system_prompt: Optional[str] = None,
|
|
89
|
+
tools: Optional[List["types.Tool"]] = None
|
|
90
|
+
) -> str:
|
|
91
|
+
"""Generate text using the specified model.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
prompt (str): The input prompt
|
|
95
|
+
model (Optional[str]): Model name to use
|
|
96
|
+
temperature (float): Controls randomness (0.0-1.0)
|
|
97
|
+
max_output_tokens (int): Maximum number of tokens to generate
|
|
98
|
+
top_p (float): Nucleus sampling parameter
|
|
99
|
+
top_k (int): Top-k sampling parameter
|
|
100
|
+
stop_sequences (Optional[List[str]]): Sequences that stop generation
|
|
101
|
+
system_prompt (Optional[str]): System-level instruction
|
|
102
|
+
tools: list of python functions or Tool objects
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
str: Generated text response
|
|
106
|
+
"""
|
|
107
|
+
model = model or self.default_model
|
|
108
|
+
|
|
109
|
+
config = types.GenerateContentConfig(
|
|
110
|
+
temperature=temperature,
|
|
111
|
+
max_output_tokens=max_output_tokens,
|
|
112
|
+
top_p=top_p,
|
|
113
|
+
top_k=top_k,
|
|
114
|
+
stop_sequences=stop_sequences or [],
|
|
115
|
+
tools=tools or []
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if system_prompt:
|
|
119
|
+
config.system_instruction = system_prompt
|
|
120
|
+
|
|
121
|
+
response = self.client.models.generate_content(
|
|
122
|
+
model=model,
|
|
123
|
+
contents=prompt,
|
|
124
|
+
config=config
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return response.text
|
|
128
|
+
|
|
129
|
+
async def generate_text_async(
|
|
130
|
+
self,
|
|
131
|
+
prompt: str,
|
|
132
|
+
model: Optional[str] = None,
|
|
133
|
+
**kwargs
|
|
134
|
+
) -> str:
|
|
135
|
+
"""Async version of generate_text."""
|
|
136
|
+
model = model or self.default_model
|
|
137
|
+
response = await self.client.aio.models.generate_content(
|
|
138
|
+
model=model,
|
|
139
|
+
contents=prompt,
|
|
140
|
+
config=types.GenerateContentConfig(**kwargs)
|
|
141
|
+
)
|
|
142
|
+
return response.text
|
|
143
|
+
|
|
144
|
+
async def _record_audio(
|
|
145
|
+
self,
|
|
146
|
+
duration: float = 5.0,
|
|
147
|
+
sample_rate: int = 16000
|
|
148
|
+
) -> bytes:
|
|
149
|
+
"""Internal method to record audio."""
|
|
150
|
+
if sd is None or np is None:
|
|
151
|
+
raise ImportError("sounddevice and numpy are required for audio. Install with pip install sunholo[tts]")
|
|
152
|
+
|
|
153
|
+
print(f"Recording for {duration} seconds...")
|
|
154
|
+
audio_data = sd.rec(
|
|
155
|
+
int(duration * sample_rate),
|
|
156
|
+
samplerate=sample_rate,
|
|
157
|
+
channels=1,
|
|
158
|
+
dtype=np.int16
|
|
159
|
+
)
|
|
160
|
+
sd.wait()
|
|
161
|
+
print("Recording complete")
|
|
162
|
+
return audio_data.tobytes()
|
|
163
|
+
|
|
164
|
+
async def _record_video(
|
|
165
|
+
self,
|
|
166
|
+
duration: float = 5.0
|
|
167
|
+
) -> List[bytes]:
|
|
168
|
+
"""Internal method to record video frames."""
|
|
169
|
+
import cv2
|
|
170
|
+
|
|
171
|
+
frames = []
|
|
172
|
+
screen = cv2.VideoCapture(0)
|
|
173
|
+
start_time = time.time()
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
while time.time() - start_time < duration:
|
|
177
|
+
ret, frame = screen.read()
|
|
178
|
+
if ret:
|
|
179
|
+
_, buffer = cv2.imencode('.jpg', frame)
|
|
180
|
+
frames.append(buffer.tobytes())
|
|
181
|
+
time.sleep(0.1) # Limit frame rate
|
|
182
|
+
finally:
|
|
183
|
+
screen.release()
|
|
184
|
+
|
|
185
|
+
return frames
|
|
186
|
+
|
|
187
|
+
async def _process_responses(self, session) -> List[str]:
|
|
188
|
+
"""Internal method to process session responses."""
|
|
189
|
+
responses = []
|
|
190
|
+
i = 1
|
|
191
|
+
async for response in session.receive():
|
|
192
|
+
model_turn = response.server_content.model_turn
|
|
193
|
+
if model_turn is None:
|
|
194
|
+
continue
|
|
195
|
+
for part in model_turn.parts:
|
|
196
|
+
text = part.text
|
|
197
|
+
print(f"[{i}] {text}")
|
|
198
|
+
i += 1
|
|
199
|
+
responses.append(text)
|
|
200
|
+
return responses
|
|
201
|
+
|
|
202
|
+
async def live_async(
|
|
203
|
+
self,
|
|
204
|
+
prompt: Optional[Union[str, List[Union[str, bytes]]]] = None,
|
|
205
|
+
input_type: str = "text", # "text", "audio", or "video"
|
|
206
|
+
duration: Optional[float] = None, # For audio/video recording duration
|
|
207
|
+
model: Optional[str] = None,
|
|
208
|
+
**kwargs
|
|
209
|
+
) -> str:
|
|
210
|
+
"""Live Multimodal API with support for text, audio, and video inputs.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
input_type: Type of input ("text", "audio", or "video")
|
|
214
|
+
prompt: Text prompt or list of text/binary chunks
|
|
215
|
+
duration: Recording duration for audio/video in seconds
|
|
216
|
+
model: Optional model name
|
|
217
|
+
**kwargs: Additional configuration parameters
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
str: Generated response text
|
|
221
|
+
"""
|
|
222
|
+
client = genai.Client(
|
|
223
|
+
http_options={
|
|
224
|
+
'api_version': 'v1alpha',
|
|
225
|
+
'url': 'generativelanguage.googleapis.com',
|
|
226
|
+
}
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
config = {
|
|
230
|
+
"generation_config": {"response_modalities": ["TEXT"]}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
async with client.aio.live.connect(model=self.default_model, config=config) as session:
|
|
234
|
+
# Handle different input types
|
|
235
|
+
if input_type == "text":
|
|
236
|
+
message = {
|
|
237
|
+
"client_content": {
|
|
238
|
+
"turns": [
|
|
239
|
+
{
|
|
240
|
+
"parts": [{"text": prompt}],
|
|
241
|
+
"role": "user"
|
|
242
|
+
}
|
|
243
|
+
],
|
|
244
|
+
"turn_complete": True
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
await session.send(json.dumps(message), end_of_turn=True)
|
|
248
|
+
|
|
249
|
+
elif input_type == "audio":
|
|
250
|
+
audio_data = await self._record_audio(duration=duration or 5.0)
|
|
251
|
+
message = {"media_chunks": [audio_data]}
|
|
252
|
+
await session.send(message)
|
|
253
|
+
await session.send(json.dumps({"turn_complete": True}), end_of_turn=True)
|
|
254
|
+
|
|
255
|
+
elif input_type == "video":
|
|
256
|
+
frames = await self._record_video(duration=duration or 5.0)
|
|
257
|
+
for frame in frames:
|
|
258
|
+
message = {"media_chunks": [frame]}
|
|
259
|
+
await session.send(message)
|
|
260
|
+
await session.send(json.dumps({"turn_complete": True}), end_of_turn=True)
|
|
261
|
+
|
|
262
|
+
else:
|
|
263
|
+
raise ValueError(f"Unsupported input_type: {input_type}")
|
|
264
|
+
|
|
265
|
+
# Process responses
|
|
266
|
+
responses = await self._process_responses(session)
|
|
267
|
+
return "OK"
|
|
268
|
+
|
|
269
|
+
def gs_uri(self, uri, mime_type=None):
|
|
270
|
+
|
|
271
|
+
if mime_type is None:
|
|
272
|
+
from ..utils.mime import guess_mime_type
|
|
273
|
+
mime_type = guess_mime_type(uri)
|
|
274
|
+
|
|
275
|
+
return types.Part.from_uri(
|
|
276
|
+
file_uri=uri,
|
|
277
|
+
mime_type=mime_type,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
def local_file(self, filename, mime_type=None):
|
|
281
|
+
if mime_type is None:
|
|
282
|
+
from ..utils.mime import guess_mime_type
|
|
283
|
+
mime_type = guess_mime_type(filename)
|
|
284
|
+
|
|
285
|
+
with open(filename, 'rb') as f:
|
|
286
|
+
image_bytes = f.read()
|
|
287
|
+
|
|
288
|
+
if image_bytes and mime_type:
|
|
289
|
+
return types.Part.from_bytes(
|
|
290
|
+
data=image_bytes,
|
|
291
|
+
mime_type=mime_type,
|
|
292
|
+
)
|
|
293
|
+
else:
|
|
294
|
+
raise ValueError(f"Could not read bytes or mime_type for {filename=} - {mime_type=}")
|
|
295
|
+
|
|
296
|
+
def stream_text(
|
|
297
|
+
self,
|
|
298
|
+
prompt: str,
|
|
299
|
+
model: Optional[str] = None,
|
|
300
|
+
**kwargs
|
|
301
|
+
) -> "Generator[str, None, None]":
|
|
302
|
+
"""Stream text generation responses.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
prompt (str): The input prompt
|
|
306
|
+
model (Optional[str]): Model name to use
|
|
307
|
+
**kwargs: Additional configuration parameters
|
|
308
|
+
|
|
309
|
+
Yields:
|
|
310
|
+
str: Chunks of generated text
|
|
311
|
+
"""
|
|
312
|
+
model = model or self.default_model
|
|
313
|
+
for chunk in self.client.models.generate_content_stream(
|
|
314
|
+
model=model,
|
|
315
|
+
contents=prompt,
|
|
316
|
+
config=types.GenerateContentConfig(**kwargs)
|
|
317
|
+
):
|
|
318
|
+
yield chunk.text
|
|
319
|
+
|
|
320
|
+
def get_embedding(
|
|
321
|
+
self,
|
|
322
|
+
text: Union[str, List[str]],
|
|
323
|
+
model: str = "text-embedding-004",
|
|
324
|
+
output_dim: Optional[int] = None
|
|
325
|
+
) -> Union[List[float], List[List[float]]]:
|
|
326
|
+
"""Get text embeddings.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
text (Union[str, List[str]]): Text to embed
|
|
330
|
+
model (str): Embedding model to use
|
|
331
|
+
output_dim (Optional[int]): Desired embedding dimension
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
Union[List[float], List[List[float]]]: Embeddings
|
|
335
|
+
"""
|
|
336
|
+
config = {}
|
|
337
|
+
if output_dim:
|
|
338
|
+
config["output_dimensionality"] = output_dim
|
|
339
|
+
|
|
340
|
+
response = self.client.models.embed_content(
|
|
341
|
+
model=model,
|
|
342
|
+
contents=text,
|
|
343
|
+
config=types.EmbedContentConfig(**config) if config else None
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
if isinstance(text, str):
|
|
347
|
+
return response.embeddings[0].values
|
|
348
|
+
return [emb.values for emb in response.embeddings]
|
|
349
|
+
|
|
350
|
+
def structured_output(
|
|
351
|
+
self,
|
|
352
|
+
prompt: str,
|
|
353
|
+
schema: Union[BaseModel, Dict, type, TypedDict],
|
|
354
|
+
model: Optional[str] = None,
|
|
355
|
+
is_list: bool = False
|
|
356
|
+
) -> Dict:
|
|
357
|
+
"""Generate structured output according to a schema.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
prompt (str): Input prompt
|
|
361
|
+
schema (Union[BaseModel, Dict, type]): Schema definition (Pydantic model, TypedDict, or raw schema)
|
|
362
|
+
model (Optional[str]): Model to use
|
|
363
|
+
is_list (bool): Whether to wrap the schema in a list
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
Dict: Structured response matching schema
|
|
367
|
+
"""
|
|
368
|
+
model = model or self.default_model
|
|
369
|
+
|
|
370
|
+
# Handle TypedDict differently than Pydantic models
|
|
371
|
+
if isinstance(schema, type):
|
|
372
|
+
if hasattr(schema, '__origin__') and schema.__origin__ is list:
|
|
373
|
+
# Handle list[TypedDict] case
|
|
374
|
+
inner_type = schema.__args__[0]
|
|
375
|
+
config = types.GenerateContentConfig(
|
|
376
|
+
response_mime_type='application/json',
|
|
377
|
+
response_schema=schema
|
|
378
|
+
)
|
|
379
|
+
elif hasattr(schema, '__annotations__'): # TypedDict check
|
|
380
|
+
# Create API-compatible schema
|
|
381
|
+
schema_dict = {
|
|
382
|
+
'type': 'OBJECT',
|
|
383
|
+
'properties': {},
|
|
384
|
+
'required': list(schema.__annotations__.keys()) # TypedDict fields are required by default
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
for field_name, field_type in schema.__annotations__.items():
|
|
388
|
+
if hasattr(field_type, '__base__') and field_type.__base__ == enum.Enum:
|
|
389
|
+
schema_dict['properties'][field_name] = {
|
|
390
|
+
'type': 'STRING',
|
|
391
|
+
'enum': [e.value for e in field_type]
|
|
392
|
+
}
|
|
393
|
+
elif field_type is str:
|
|
394
|
+
schema_dict['properties'][field_name] = {'type': 'STRING'}
|
|
395
|
+
elif field_type is int:
|
|
396
|
+
schema_dict['properties'][field_name] = {'type': 'INTEGER'}
|
|
397
|
+
elif field_type is float:
|
|
398
|
+
schema_dict['properties'][field_name] = {'type': 'NUMBER'}
|
|
399
|
+
elif field_type is bool:
|
|
400
|
+
schema_dict['properties'][field_name] = {'type': 'BOOLEAN'}
|
|
401
|
+
else:
|
|
402
|
+
schema_dict['properties'][field_name] = {'type': 'STRING'}
|
|
403
|
+
|
|
404
|
+
if is_list:
|
|
405
|
+
schema_dict = {
|
|
406
|
+
'type': 'ARRAY',
|
|
407
|
+
'items': schema_dict
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
config = types.GenerateContentConfig(
|
|
411
|
+
response_mime_type='application/json',
|
|
412
|
+
response_schema=schema_dict
|
|
413
|
+
)
|
|
414
|
+
elif hasattr(schema, '__origin__') and schema.__origin__ is list:
|
|
415
|
+
# Handle List[TypeDict] case
|
|
416
|
+
inner_type = schema.__args__[0]
|
|
417
|
+
if hasattr(inner_type, '__annotations__'): # Check if inner type is TypedDict
|
|
418
|
+
# Create API-compatible schema for the inner TypedDict
|
|
419
|
+
schema_dict = {
|
|
420
|
+
'type': 'OBJECT',
|
|
421
|
+
'properties': {},
|
|
422
|
+
'required': list(inner_type.__annotations__.keys())
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
for field_name, field_type in inner_type.__annotations__.items():
|
|
426
|
+
if hasattr(field_type, '__base__') and field_type.__base__ == enum.Enum:
|
|
427
|
+
schema_dict['properties'][field_name] = {
|
|
428
|
+
'type': 'STRING',
|
|
429
|
+
'enum': [e.value for e in field_type]
|
|
430
|
+
}
|
|
431
|
+
elif field_type is str:
|
|
432
|
+
schema_dict['properties'][field_name] = {'type': 'STRING'}
|
|
433
|
+
elif field_type is int:
|
|
434
|
+
schema_dict['properties'][field_name] = {'type': 'INTEGER'}
|
|
435
|
+
elif field_type is float:
|
|
436
|
+
schema_dict['properties'][field_name] = {'type': 'NUMBER'}
|
|
437
|
+
elif field_type is bool:
|
|
438
|
+
schema_dict['properties'][field_name] = {'type': 'BOOLEAN'}
|
|
439
|
+
else:
|
|
440
|
+
schema_dict['properties'][field_name] = {'type': 'STRING'}
|
|
441
|
+
|
|
442
|
+
# Wrap in array type
|
|
443
|
+
array_schema = {
|
|
444
|
+
'type': 'ARRAY',
|
|
445
|
+
'items': schema_dict
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
config = types.GenerateContentConfig(
|
|
449
|
+
response_mime_type='application/json',
|
|
450
|
+
response_schema=array_schema
|
|
451
|
+
)
|
|
452
|
+
elif issubclass(schema, BaseModel):
|
|
453
|
+
# Convert Pydantic model to Google's schema format
|
|
454
|
+
schema_dict = {
|
|
455
|
+
'type': 'OBJECT',
|
|
456
|
+
'properties': {},
|
|
457
|
+
'required': []
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
for field_name, field_info in schema.model_fields.items():
|
|
461
|
+
# Handle different field types including enums
|
|
462
|
+
if isinstance(field_info.annotation, type) and issubclass(field_info.annotation, enum.Enum):
|
|
463
|
+
field_type = {
|
|
464
|
+
'type': 'STRING',
|
|
465
|
+
'enum': [e.value for e in field_info.annotation]
|
|
466
|
+
}
|
|
467
|
+
elif field_info.annotation is str:
|
|
468
|
+
field_type = {'type': 'STRING'}
|
|
469
|
+
elif field_info.annotation is int:
|
|
470
|
+
field_type = {'type': 'INTEGER'}
|
|
471
|
+
elif field_info.annotation is float:
|
|
472
|
+
field_type = {'type': 'NUMBER'}
|
|
473
|
+
elif field_info.annotation is bool:
|
|
474
|
+
field_type = {'type': 'BOOLEAN'}
|
|
475
|
+
elif field_info.annotation is list or (
|
|
476
|
+
hasattr(field_info.annotation, '__origin__') and
|
|
477
|
+
field_info.annotation.__origin__ is list
|
|
478
|
+
):
|
|
479
|
+
# Handle typed lists
|
|
480
|
+
if hasattr(field_info.annotation, '__args__'):
|
|
481
|
+
inner_type = field_info.annotation.__args__[0]
|
|
482
|
+
if inner_type is str:
|
|
483
|
+
item_type = 'STRING'
|
|
484
|
+
elif inner_type is int:
|
|
485
|
+
item_type = 'INTEGER'
|
|
486
|
+
elif inner_type is float:
|
|
487
|
+
item_type = 'NUMBER'
|
|
488
|
+
elif inner_type is bool:
|
|
489
|
+
item_type = 'BOOLEAN'
|
|
490
|
+
else:
|
|
491
|
+
item_type = 'STRING'
|
|
492
|
+
else:
|
|
493
|
+
item_type = 'STRING'
|
|
494
|
+
field_type = {'type': 'ARRAY', 'items': {'type': item_type}}
|
|
495
|
+
else:
|
|
496
|
+
field_type = {'type': 'STRING'}
|
|
497
|
+
|
|
498
|
+
schema_dict['properties'][field_name] = field_type
|
|
499
|
+
if field_info.is_required:
|
|
500
|
+
schema_dict['required'].append(field_name)
|
|
501
|
+
|
|
502
|
+
if is_list:
|
|
503
|
+
schema_dict = {
|
|
504
|
+
'type': 'ARRAY',
|
|
505
|
+
'items': schema_dict
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
config = types.GenerateContentConfig(
|
|
509
|
+
response_mime_type='application/json',
|
|
510
|
+
response_schema=schema_dict
|
|
511
|
+
)
|
|
512
|
+
else:
|
|
513
|
+
# Handle raw schema dict
|
|
514
|
+
config = types.GenerateContentConfig(
|
|
515
|
+
response_mime_type='application/json',
|
|
516
|
+
response_schema=schema
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
response = self.client.models.generate_content(
|
|
520
|
+
model=model,
|
|
521
|
+
contents=prompt,
|
|
522
|
+
config=config
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
return response.text
|
|
526
|
+
def count_tokens(self, text: str, model: Optional[str] = None) -> int:
|
|
527
|
+
"""Count the number of tokens in the text.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
text (str): Input text
|
|
531
|
+
model (Optional[str]): Model to use for tokenization
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
int: Number of tokens
|
|
535
|
+
"""
|
|
536
|
+
model = model or self.default_model
|
|
537
|
+
response = self.client.models.count_tokens(
|
|
538
|
+
model=model,
|
|
539
|
+
contents=text
|
|
540
|
+
)
|
|
541
|
+
return response.total_tokens
|
|
542
|
+
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
from typing import Any, Dict, Optional, TYPE_CHECKING, Union
|
|
2
1
|
from dataclasses import dataclass, asdict
|
|
3
2
|
import json
|
|
3
|
+
from typing import Dict, Any
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
# Note: Moved TYPE_CHECKING to only be used where needed
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
8
7
|
|
|
9
8
|
@dataclass
|
|
10
9
|
class Document:
|
|
@@ -36,7 +35,8 @@ class Document:
|
|
|
36
35
|
"""Convert to JSON string - for compatibility with LangChain's Document."""
|
|
37
36
|
return json.dumps(self.to_dict())
|
|
38
37
|
|
|
39
|
-
|
|
38
|
+
# Move the type checking import and annotation inside the function
|
|
39
|
+
def convert_to_langchain_doc(doc: Document) -> Any: # Remove Union and LangchainDocument from return type
|
|
40
40
|
"""Convert our Document to a LangChain Document.
|
|
41
41
|
|
|
42
42
|
Returns Any when LangChain isn't available to avoid type errors.
|
sunholo/langfuse/evals.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
import json
|
|
3
3
|
|
|
4
4
|
from ..pubsub import decode_pubsub_message
|
|
5
|
-
|
|
5
|
+
|
|
6
6
|
import traceback
|
|
7
7
|
|
|
8
8
|
|
|
@@ -55,6 +55,7 @@ def direct_langfuse_evals(data, eval_funcs: list=[eval_length]):
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def do_evals(trace_id, eval_funcs: list=[eval_length], **kwargs) -> dict:
|
|
58
|
+
from langfuse import Langfuse
|
|
58
59
|
# Initialize Langfuse with environment variables
|
|
59
60
|
langfuse = Langfuse(
|
|
60
61
|
secret_key=os.environ["LANGFUSE_SECRET_KEY"],
|
sunholo/mcp/cli.py
CHANGED
|
@@ -3,7 +3,6 @@ import asyncio
|
|
|
3
3
|
from typing import Any, Sequence
|
|
4
4
|
from functools import lru_cache
|
|
5
5
|
import subprocess
|
|
6
|
-
from ..utils.version import sunholo_version
|
|
7
6
|
|
|
8
7
|
try:
|
|
9
8
|
from mcp.server import Server
|
|
@@ -68,7 +67,6 @@ class SunholoMCPServer:
|
|
|
68
67
|
async def read_resource(uri: AnyUrl) -> str:
|
|
69
68
|
"""Read Sunholo resources based on URI"""
|
|
70
69
|
logger.info(f"{uri} available")
|
|
71
|
-
console.print(f"{uri} available")
|
|
72
70
|
if str(uri) == "sunholo://vacs/list":
|
|
73
71
|
try:
|
|
74
72
|
# Execute sunholo vac list command
|
|
File without changes
|