sunholo 0.143.16__py3-none-any.whl → 0.144.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1042 @@
1
+ # Copyright [2024] [Holosun ApS]
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import traceback
19
+ import datetime
20
+ import uuid
21
+ import inspect
22
+ import asyncio
23
+ from typing import Dict, List, Optional, Callable, Any, TYPE_CHECKING
24
+ from functools import partial
25
+ from contextlib import asynccontextmanager
26
+
27
+ if TYPE_CHECKING:
28
+ from fastapi import FastAPI, Request, Response, HTTPException
29
+ from fastapi.responses import StreamingResponse, JSONResponse
30
+ from pydantic import BaseModel
31
+
32
+ try:
33
+ from fastapi import FastAPI, Request, Response, HTTPException
34
+ from fastapi.responses import StreamingResponse, JSONResponse
35
+ from pydantic import BaseModel
36
+ FASTAPI_AVAILABLE = True
37
+ except ImportError:
38
+ FastAPI = None
39
+ Request = None
40
+ Response = None
41
+ HTTPException = None
42
+ StreamingResponse = None
43
+ JSONResponse = None
44
+ BaseModel = object
45
+ FASTAPI_AVAILABLE = False
46
+
47
+ from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
48
+ from ...qna.parsers import parse_output
49
+ from ...streaming import start_streaming_chat, start_streaming_chat_async
50
+ from ...archive import archive_qa
51
+ from ...custom_logging import log
52
+ from ...utils import ConfigManager
53
+ from ...utils.version import sunholo_version
54
+
55
+ try:
56
+ from ...mcp.mcp_manager import MCPClientManager
57
+ except ImportError:
58
+ MCPClientManager = None
59
+
60
+ try:
61
+ from ...mcp.vac_mcp_server import VACMCPServer
62
+ except ImportError:
63
+ VACMCPServer = None
64
+
65
+ try:
66
+ from ...a2a.vac_a2a_agent import VACA2AAgent
67
+ except (ImportError, SyntaxError):
68
+ VACA2AAgent = None
69
+
70
+
71
+ class VACRequest(BaseModel):
72
+ """Request model for VAC endpoints."""
73
+ user_input: str
74
+ chat_history: Optional[List] = None
75
+ stream_wait_time: Optional[int] = 7
76
+ stream_timeout: Optional[int] = 120
77
+ vector_name: Optional[str] = None
78
+ trace_id: Optional[str] = None
79
+ eval_percent: Optional[float] = 0.01
80
+
81
+
82
+ class VACRoutesFastAPI:
83
+ """
84
+ FastAPI implementation of VAC routes with streaming support.
85
+
86
+ This class provides a FastAPI-compatible version of the Flask VACRoutes,
87
+ with proper async streaming support using callbacks.
88
+
89
+ Usage Example:
90
+ ```python
91
+ from fastapi import FastAPI
92
+ from sunholo.agents.fastapi import VACRoutesFastAPI
93
+
94
+ app = FastAPI()
95
+
96
+ async def stream_interpreter(question, vector_name, chat_history, callback, **kwargs):
97
+ # Implement your streaming logic with callbacks
98
+ ...
99
+
100
+ async def vac_interpreter(question, vector_name, chat_history, **kwargs):
101
+ # Implement your static VAC logic
102
+ ...
103
+
104
+ vac_routes = VACRoutesFastAPI(
105
+ app,
106
+ stream_interpreter,
107
+ vac_interpreter,
108
+ enable_mcp_server=True
109
+ )
110
+ ```
111
+ """
112
+
113
+ def __init__(
114
+ self,
115
+ app: FastAPI,
116
+ stream_interpreter: Callable,
117
+ vac_interpreter: Optional[Callable] = None,
118
+ additional_routes: Optional[List[Dict]] = None,
119
+ mcp_servers: Optional[List[Dict[str, Any]]] = None,
120
+ add_langfuse_eval: bool = True,
121
+ enable_mcp_server: bool = False,
122
+ enable_a2a_agent: bool = False,
123
+ a2a_vac_names: Optional[List[str]] = None
124
+ ):
125
+ """
126
+ Initialize FastAPI VAC routes.
127
+
128
+ Args:
129
+ app: FastAPI application instance
130
+ stream_interpreter: Async or sync function for streaming responses
131
+ vac_interpreter: Optional function for non-streaming responses
132
+ additional_routes: List of additional routes to register
133
+ mcp_servers: List of MCP server configurations
134
+ add_langfuse_eval: Whether to add Langfuse evaluation
135
+ enable_mcp_server: Whether to enable MCP server endpoint
136
+ enable_a2a_agent: Whether to enable A2A agent endpoints
137
+ a2a_vac_names: List of VAC names for A2A agent
138
+ """
139
+ self.app = app
140
+ self.stream_interpreter = stream_interpreter
141
+ self.vac_interpreter = vac_interpreter or partial(self.vac_interpreter_default)
142
+
143
+ # Detect if interpreters are async
144
+ self.stream_is_async = inspect.iscoroutinefunction(stream_interpreter)
145
+ self.vac_is_async = inspect.iscoroutinefunction(self.vac_interpreter)
146
+
147
+ # MCP client initialization
148
+ self.mcp_servers = mcp_servers or []
149
+ self.mcp_client_manager = MCPClientManager() if MCPClientManager else None
150
+ self._mcp_initialized = False
151
+
152
+ # MCP server initialization
153
+ self.enable_mcp_server = enable_mcp_server
154
+ self.vac_mcp_server = None
155
+ if self.enable_mcp_server and VACMCPServer:
156
+ self.vac_mcp_server = VACMCPServer(
157
+ stream_interpreter=self.stream_interpreter,
158
+ vac_interpreter=self.vac_interpreter
159
+ )
160
+
161
+ # A2A agent initialization
162
+ self.enable_a2a_agent = enable_a2a_agent
163
+ self.vac_a2a_agent = None
164
+ self.a2a_vac_names = a2a_vac_names
165
+
166
+ self.additional_routes = additional_routes or []
167
+ self.add_langfuse_eval = add_langfuse_eval
168
+
169
+ self.register_routes()
170
+
171
+ async def vac_interpreter_default(self, question: str, vector_name: str, chat_history=None, **kwargs):
172
+ """Default VAC interpreter that uses the stream interpreter without streaming."""
173
+ class NoOpCallback:
174
+ def on_llm_new_token(self, token):
175
+ pass
176
+ def on_llm_end(self, response):
177
+ pass
178
+ async def async_on_llm_new_token(self, token):
179
+ pass
180
+ async def async_on_llm_end(self, response):
181
+ pass
182
+
183
+ callback = NoOpCallback()
184
+
185
+ if self.stream_is_async:
186
+ result = await self.stream_interpreter(
187
+ question=question,
188
+ vector_name=vector_name,
189
+ chat_history=chat_history or [],
190
+ callback=callback,
191
+ **kwargs
192
+ )
193
+ else:
194
+ # Run sync function in executor
195
+ loop = asyncio.get_event_loop()
196
+ result = await loop.run_in_executor(
197
+ None,
198
+ self.stream_interpreter,
199
+ question,
200
+ vector_name,
201
+ chat_history or [],
202
+ callback,
203
+ **kwargs
204
+ )
205
+
206
+ return result
207
+
208
+ def register_routes(self):
209
+ """Register all VAC routes with the FastAPI application."""
210
+ # Basic routes
211
+ self.app.get("/")(self.home)
212
+ self.app.get("/health")(self.health)
213
+
214
+ # Streaming endpoints - both SSE and plain text
215
+ self.app.post("/vac/streaming/{vector_name}")(self.handle_stream_vac)
216
+ self.app.post("/vac/streaming/{vector_name}/sse")(self.handle_stream_vac_sse)
217
+
218
+ # Static VAC endpoint
219
+ self.app.post("/vac/{vector_name}")(self.handle_process_vac)
220
+
221
+ # OpenAI compatible endpoints
222
+ self.app.get("/openai/health")(self.openai_health)
223
+ self.app.post("/openai/health")(self.openai_health)
224
+ self.app.post("/openai/v1/chat/completions")(self.handle_openai_compatible)
225
+ self.app.post("/openai/v1/chat/completions/{vector_name}")(self.handle_openai_compatible)
226
+
227
+ # MCP client routes
228
+ if self.mcp_servers and self.mcp_client_manager:
229
+ self.app.get("/mcp/tools")(self.handle_mcp_list_tools)
230
+ self.app.get("/mcp/tools/{server_name}")(self.handle_mcp_list_tools)
231
+ self.app.post("/mcp/call")(self.handle_mcp_call_tool)
232
+ self.app.get("/mcp/resources")(self.handle_mcp_list_resources)
233
+ self.app.post("/mcp/resources/read")(self.handle_mcp_read_resource)
234
+
235
+ # MCP server endpoint
236
+ if self.enable_mcp_server and self.vac_mcp_server:
237
+ self.app.post("/mcp")(self.handle_mcp_server)
238
+ self.app.get("/mcp")(self.handle_mcp_server_info)
239
+
240
+ # A2A agent endpoints
241
+ if self.enable_a2a_agent:
242
+ self.app.get("/.well-known/agent.json")(self.handle_a2a_agent_card)
243
+ self.app.post("/a2a/tasks/send")(self.handle_a2a_task_send)
244
+ self.app.post("/a2a/tasks/sendSubscribe")(self.handle_a2a_task_send_subscribe)
245
+ self.app.post("/a2a/tasks/get")(self.handle_a2a_task_get)
246
+ self.app.post("/a2a/tasks/cancel")(self.handle_a2a_task_cancel)
247
+ self.app.post("/a2a/tasks/pushNotification/set")(self.handle_a2a_push_notification)
248
+
249
+ # Register additional custom routes
250
+ for route in self.additional_routes:
251
+ self.app.add_api_route(
252
+ route["path"],
253
+ route["handler"],
254
+ methods=route.get("methods", ["GET"])
255
+ )
256
+
257
+ # Set up lifespan for MCP initialization
258
+ self._setup_lifespan()
259
+
260
+ def _setup_lifespan(self):
261
+ """Set up lifespan context manager for app initialization."""
262
+ # Only set lifespan if we have MCP servers to initialize
263
+ if not (self.mcp_servers and self.mcp_client_manager):
264
+ return
265
+
266
+ # Store the existing lifespan if any
267
+ existing_lifespan = getattr(self.app, 'router', self.app).lifespan_context
268
+
269
+ @asynccontextmanager
270
+ async def lifespan(app: FastAPI):
271
+ # Startup
272
+ if not self._mcp_initialized:
273
+ await self._initialize_mcp_servers()
274
+ self._mcp_initialized = True
275
+
276
+ # Call existing lifespan startup if any
277
+ if existing_lifespan:
278
+ async with existing_lifespan(app) as lifespan_state:
279
+ yield lifespan_state
280
+ else:
281
+ yield
282
+
283
+ # Shutdown (no cleanup needed for now)
284
+
285
+ # Set the new lifespan
286
+ self.app.router.lifespan_context = lifespan
287
+
288
+ async def home(self):
289
+ """Home endpoint."""
290
+ return JSONResponse(content="OK")
291
+
292
+ async def health(self):
293
+ """Health check endpoint."""
294
+ return JSONResponse(content={"status": "healthy"})
295
+
296
+ async def handle_stream_vac(self, vector_name: str, request: Request):
297
+ """
298
+ Handle streaming VAC requests with plain text response.
299
+ Compatible with Flask implementation.
300
+ """
301
+ data = await request.json()
302
+ vac_request = VACRequest(**data)
303
+
304
+ prep = await self.prep_vac_async(vac_request, vector_name)
305
+ all_input = prep["all_input"]
306
+
307
+ log.info(f'Streaming data with: {all_input}')
308
+
309
+ async def generate_response():
310
+ try:
311
+ if self.stream_is_async:
312
+ # Use async streaming
313
+ async for chunk in start_streaming_chat_async(
314
+ question=all_input["user_input"],
315
+ vector_name=vector_name,
316
+ qna_func_async=self.stream_interpreter,
317
+ chat_history=all_input["chat_history"],
318
+ wait_time=all_input["stream_wait_time"],
319
+ timeout=all_input["stream_timeout"],
320
+ **all_input["kwargs"]
321
+ ):
322
+ if isinstance(chunk, dict) and 'answer' in chunk:
323
+ archive_qa(chunk, vector_name) # This is a sync function, not async
324
+ yield json.dumps(chunk)
325
+ else:
326
+ yield chunk
327
+ else:
328
+ # Run sync streaming in executor
329
+ loop = asyncio.get_event_loop()
330
+
331
+ # Create a queue for passing chunks from sync to async
332
+ queue = asyncio.Queue()
333
+
334
+ def run_sync_streaming():
335
+ for chunk in start_streaming_chat(
336
+ question=all_input["user_input"],
337
+ vector_name=vector_name,
338
+ qna_func=self.stream_interpreter,
339
+ chat_history=all_input["chat_history"],
340
+ wait_time=all_input["stream_wait_time"],
341
+ timeout=all_input["stream_timeout"],
342
+ **all_input["kwargs"]
343
+ ):
344
+ asyncio.run_coroutine_threadsafe(queue.put(chunk), loop)
345
+ asyncio.run_coroutine_threadsafe(queue.put(None), loop)
346
+
347
+ # Run sync function in thread
348
+ await loop.run_in_executor(None, run_sync_streaming)
349
+
350
+ # Yield from queue
351
+ while True:
352
+ chunk = await queue.get()
353
+ if chunk is None:
354
+ break
355
+ if isinstance(chunk, dict) and 'answer' in chunk:
356
+ archive_qa(chunk, vector_name) # This is a sync function, not async
357
+ yield json.dumps(chunk)
358
+ else:
359
+ yield chunk
360
+
361
+ except Exception as e:
362
+ yield f"Streaming Error: {str(e)} {traceback.format_exc()}"
363
+
364
+ return StreamingResponse(
365
+ generate_response(),
366
+ media_type='text/plain; charset=utf-8',
367
+ headers={
368
+ 'Transfer-Encoding': 'chunked',
369
+ 'Cache-Control': 'no-cache',
370
+ 'Connection': 'keep-alive'
371
+ }
372
+ )
373
+
374
+ async def handle_stream_vac_sse(self, vector_name: str, request: Request):
375
+ """
376
+ Handle streaming VAC requests with Server-Sent Events format.
377
+ Better for browser-based clients.
378
+ """
379
+ data = await request.json()
380
+ vac_request = VACRequest(**data)
381
+
382
+ prep = await self.prep_vac_async(vac_request, vector_name)
383
+ all_input = prep["all_input"]
384
+
385
+ log.info(f'SSE Streaming data with: {all_input}')
386
+
387
+ async def generate_sse():
388
+ try:
389
+ if self.stream_is_async:
390
+ log.info(f"Starting async streaming for {vector_name}")
391
+ async for chunk in start_streaming_chat_async(
392
+ question=all_input["user_input"],
393
+ vector_name=vector_name,
394
+ qna_func_async=self.stream_interpreter,
395
+ chat_history=all_input["chat_history"],
396
+ wait_time=all_input["stream_wait_time"],
397
+ timeout=all_input["stream_timeout"],
398
+ **all_input["kwargs"]
399
+ ):
400
+ log.info(f"Got chunk from start_streaming_chat_async: type={type(chunk)}, is_dict={isinstance(chunk, dict)}, has_answer={'answer' in chunk if isinstance(chunk, dict) else 'N/A'}")
401
+ if isinstance(chunk, dict) and 'answer' in chunk:
402
+ # This is the final response with answer and sources
403
+ log.info(f"Final response received: {list(chunk.keys())}")
404
+ archive_qa(chunk, vector_name) # This is a sync function, not async
405
+ # Send the complete response with sources
406
+ final_data = f"data: {json.dumps(chunk)}\n\n"
407
+ log.info(f"Yielding final response: {final_data[:100]}...")
408
+ yield final_data
409
+ # Then send the completion signal
410
+ done_signal = "data: [DONE]\n\n"
411
+ log.info("Yielding [DONE] signal")
412
+ yield done_signal
413
+ log.info("Sent [DONE] signal, breaking loop")
414
+ break # Exit after sending final response
415
+ elif chunk: # Only send non-empty chunks
416
+ # This is a streaming text chunk
417
+ yield f"data: {json.dumps({'chunk': chunk})}\n\n"
418
+ log.info("SSE generator completed")
419
+ else:
420
+ # Handle sync interpreter - similar to above
421
+ loop = asyncio.get_event_loop()
422
+ queue = asyncio.Queue()
423
+
424
+ def run_sync_streaming():
425
+ for chunk in start_streaming_chat(
426
+ question=all_input["user_input"],
427
+ vector_name=vector_name,
428
+ qna_func=self.stream_interpreter,
429
+ chat_history=all_input["chat_history"],
430
+ wait_time=all_input["stream_wait_time"],
431
+ timeout=all_input["stream_timeout"],
432
+ **all_input["kwargs"]
433
+ ):
434
+ asyncio.run_coroutine_threadsafe(queue.put(chunk), loop)
435
+ asyncio.run_coroutine_threadsafe(queue.put(None), loop)
436
+
437
+ await loop.run_in_executor(None, run_sync_streaming)
438
+
439
+ while True:
440
+ chunk = await queue.get()
441
+ if chunk is None:
442
+ break
443
+ if isinstance(chunk, dict) and 'answer' in chunk:
444
+ # This is the final response with answer and sources
445
+ archive_qa(chunk, vector_name) # This is a sync function, not async
446
+ # Send the complete response with sources
447
+ yield f"data: {json.dumps(chunk)}\n\n"
448
+ # Then send the completion signal
449
+ yield "data: [DONE]\n\n"
450
+ break # Exit after sending final response
451
+ elif chunk: # Only send non-empty chunks
452
+ # This is a streaming text chunk
453
+ yield f"data: {json.dumps({'chunk': chunk})}\n\n"
454
+
455
+ except Exception as e:
456
+ import traceback
457
+ log.error(f"Error in SSE generator: {e}\n{traceback.format_exc()}")
458
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
459
+
460
+ return StreamingResponse(
461
+ generate_sse(),
462
+ media_type='text/event-stream',
463
+ headers={
464
+ 'Cache-Control': 'no-cache',
465
+ 'Connection': 'keep-alive',
466
+ 'X-Accel-Buffering': 'no'
467
+ }
468
+ )
469
+
470
+ async def handle_process_vac(self, vector_name: str, request: Request):
471
+ """Handle non-streaming VAC requests."""
472
+ data = await request.json()
473
+ vac_request = VACRequest(**data)
474
+
475
+ prep = await self.prep_vac_async(vac_request, vector_name)
476
+ all_input = prep["all_input"]
477
+
478
+ try:
479
+ if self.vac_is_async:
480
+ bot_output = await self.vac_interpreter(
481
+ question=all_input["user_input"],
482
+ vector_name=vector_name,
483
+ chat_history=all_input["chat_history"],
484
+ **all_input["kwargs"]
485
+ )
486
+ else:
487
+ # Run sync function in executor
488
+ loop = asyncio.get_event_loop()
489
+ bot_output = await loop.run_in_executor(
490
+ None,
491
+ self.vac_interpreter,
492
+ all_input["user_input"],
493
+ vector_name,
494
+ all_input["chat_history"],
495
+ **all_input["kwargs"]
496
+ )
497
+
498
+ bot_output = parse_output(bot_output)
499
+ archive_qa(bot_output, vector_name) # This is a sync function, not async
500
+ log.info(f'==LLM Q:{all_input["user_input"]} - A:{bot_output}')
501
+
502
+ except Exception as err:
503
+ bot_output = {
504
+ 'answer': f'QNA_ERROR: An error occurred while processing /vac/{vector_name}: {str(err)} traceback: {traceback.format_exc()}'
505
+ }
506
+
507
+ return JSONResponse(content=bot_output)
508
+
509
+ async def prep_vac_async(self, vac_request: VACRequest, vector_name: str):
510
+ """Prepare VAC request data asynchronously."""
511
+ try:
512
+ vac_config = ConfigManager(vector_name)
513
+ except Exception as e:
514
+ raise ValueError(f"Unable to find vac_config for {vector_name} - {str(e)}")
515
+
516
+ # Extract chat history
517
+ paired_messages = await extract_chat_history_async_cached(vac_request.chat_history)
518
+
519
+ all_input = {
520
+ 'user_input': vac_request.user_input.strip(),
521
+ 'vector_name': vac_request.vector_name or vector_name,
522
+ 'chat_history': paired_messages,
523
+ 'stream_wait_time': vac_request.stream_wait_time,
524
+ 'stream_timeout': vac_request.stream_timeout,
525
+ 'eval_percent': vac_request.eval_percent,
526
+ 'kwargs': {}
527
+ }
528
+
529
+ return {
530
+ "all_input": all_input,
531
+ "vac_config": vac_config
532
+ }
533
+
534
+ async def openai_health(self):
535
+ """OpenAI health check endpoint."""
536
+ return JSONResponse(content={'message': 'Success'})
537
+
538
+ async def handle_openai_compatible(self, request: Request, vector_name: Optional[str] = None):
539
+ """Handle OpenAI-compatible chat completion requests."""
540
+ data = await request.json()
541
+ log.info(f'OpenAI compatible endpoint got data: {data} for vector: {vector_name}')
542
+
543
+ vector_name = vector_name or data.pop('model', None)
544
+ messages = data.pop('messages', None)
545
+ chat_history = data.pop('chat_history', None)
546
+ stream = data.pop('stream', False)
547
+
548
+ if not messages:
549
+ return JSONResponse(content={"error": "No messages provided"}, status_code=400)
550
+
551
+ # Extract user message
552
+ user_message = None
553
+ for msg in reversed(messages):
554
+ if msg['role'] == 'user':
555
+ if isinstance(msg['content'], list):
556
+ for content_item in msg['content']:
557
+ if content_item['type'] == 'text':
558
+ user_message = content_item['text']
559
+ break
560
+ else:
561
+ user_message = msg['content']
562
+ break
563
+
564
+ if not user_message:
565
+ return JSONResponse(content={"error": "No user message provided"}, status_code=400)
566
+
567
+ response_id = str(uuid.uuid4())
568
+
569
+ if stream:
570
+ async def generate_openai_stream():
571
+ if self.stream_is_async:
572
+ async for chunk in start_streaming_chat_async(
573
+ question=user_message,
574
+ vector_name=vector_name,
575
+ qna_func_async=self.stream_interpreter,
576
+ chat_history=chat_history or [],
577
+ wait_time=data.get("stream_wait_time", 1),
578
+ timeout=data.get("stream_timeout", 60),
579
+ **data
580
+ ):
581
+ if isinstance(chunk, dict) and 'answer' in chunk:
582
+ openai_chunk = {
583
+ "id": response_id,
584
+ "object": "chat.completion.chunk",
585
+ "created": int(datetime.datetime.now().timestamp()),
586
+ "model": vector_name,
587
+ "system_fingerprint": sunholo_version(),
588
+ "choices": [{
589
+ "index": 0,
590
+ "delta": {"content": chunk['answer']},
591
+ "logprobs": None,
592
+ "finish_reason": None
593
+ }]
594
+ }
595
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
596
+ else:
597
+ # Stream partial content
598
+ openai_chunk = {
599
+ "id": response_id,
600
+ "object": "chat.completion.chunk",
601
+ "created": int(datetime.datetime.now().timestamp()),
602
+ "model": vector_name,
603
+ "choices": [{
604
+ "index": 0,
605
+ "delta": {"content": chunk},
606
+ "finish_reason": None
607
+ }]
608
+ }
609
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
610
+
611
+ # Send final chunk
612
+ final_chunk = {
613
+ "id": response_id,
614
+ "object": "chat.completion.chunk",
615
+ "created": int(datetime.datetime.now().timestamp()),
616
+ "model": vector_name,
617
+ "choices": [{
618
+ "index": 0,
619
+ "delta": {},
620
+ "finish_reason": "stop"
621
+ }]
622
+ }
623
+ yield f"data: {json.dumps(final_chunk)}\n\n"
624
+ yield "data: [DONE]\n\n"
625
+
626
+ return StreamingResponse(
627
+ generate_openai_stream(),
628
+ media_type='text/event-stream'
629
+ )
630
+ else:
631
+ # Non-streaming response
632
+ try:
633
+ if self.vac_is_async:
634
+ bot_output = await self.vac_interpreter(
635
+ question=user_message,
636
+ vector_name=vector_name,
637
+ chat_history=chat_history or [],
638
+ **data
639
+ )
640
+ else:
641
+ loop = asyncio.get_event_loop()
642
+ bot_output = await loop.run_in_executor(
643
+ None,
644
+ self.vac_interpreter,
645
+ user_message,
646
+ vector_name,
647
+ chat_history or [],
648
+ **data
649
+ )
650
+
651
+ bot_output = parse_output(bot_output)
652
+ answer = bot_output.get('answer', '')
653
+
654
+ openai_response = {
655
+ "id": response_id,
656
+ "object": "chat.completion",
657
+ "created": int(datetime.datetime.now().timestamp()),
658
+ "model": vector_name,
659
+ "system_fingerprint": sunholo_version(),
660
+ "choices": [{
661
+ "index": 0,
662
+ "message": {
663
+ "role": "assistant",
664
+ "content": answer,
665
+ },
666
+ "logprobs": None,
667
+ "finish_reason": "stop"
668
+ }],
669
+ "usage": {
670
+ "prompt_tokens": len(user_message.split()),
671
+ "completion_tokens": len(answer.split()),
672
+ "total_tokens": len(user_message.split()) + len(answer.split())
673
+ }
674
+ }
675
+
676
+ return JSONResponse(content=openai_response)
677
+
678
+ except Exception as err:
679
+ log.error(f"OpenAI response error: {str(err)} traceback: {traceback.format_exc()}")
680
+ return JSONResponse(
681
+ content={"error": f"ERROR: {str(err)}"},
682
+ status_code=500
683
+ )
684
+
685
+ async def _initialize_mcp_servers(self):
686
+ """Initialize connections to configured MCP servers."""
687
+ for server_config in self.mcp_servers:
688
+ try:
689
+ await self.mcp_client_manager.connect_to_server(
690
+ server_name=server_config["name"],
691
+ command=server_config["command"],
692
+ args=server_config.get("args", [])
693
+ )
694
+ log.info(f"Connected to MCP server: {server_config['name']}")
695
+ except Exception as e:
696
+ log.error(f"Failed to connect to MCP server {server_config['name']}: {e}")
697
+
698
+ async def handle_mcp_list_tools(self, server_name: Optional[str] = None):
699
+ """List available MCP tools."""
700
+ if not self.mcp_client_manager:
701
+ raise HTTPException(status_code=501, detail="MCP client not available")
702
+
703
+ tools = await self.mcp_client_manager.list_tools(server_name)
704
+ return JSONResponse(content={
705
+ "tools": [
706
+ {
707
+ "name": tool.name,
708
+ "description": tool.description,
709
+ "inputSchema": tool.inputSchema,
710
+ "server": tool.metadata.get("server") if tool.metadata else server_name
711
+ }
712
+ for tool in tools
713
+ ]
714
+ })
715
+
716
+ async def handle_mcp_call_tool(self, request: Request):
717
+ """Call an MCP tool."""
718
+ if not self.mcp_client_manager:
719
+ raise HTTPException(status_code=501, detail="MCP client not available")
720
+
721
+ data = await request.json()
722
+ server_name = data.get("server")
723
+ tool_name = data.get("tool")
724
+ arguments = data.get("arguments", {})
725
+
726
+ if not server_name or not tool_name:
727
+ raise HTTPException(status_code=400, detail="Missing 'server' or 'tool' parameter")
728
+
729
+ try:
730
+ result = await self.mcp_client_manager.call_tool(server_name, tool_name, arguments)
731
+
732
+ # Convert result to JSON-serializable format
733
+ if hasattr(result, 'content'):
734
+ if hasattr(result.content, 'text'):
735
+ return JSONResponse(content={"result": result.content.text})
736
+ elif hasattr(result.content, 'data'):
737
+ return JSONResponse(content={"result": result.content.data})
738
+ else:
739
+ return JSONResponse(content={"result": str(result.content)})
740
+ else:
741
+ return JSONResponse(content={"result": str(result)})
742
+
743
+ except Exception as e:
744
+ raise HTTPException(status_code=500, detail=str(e))
745
+
746
+ async def handle_mcp_list_resources(self, request: Request):
747
+ """List available MCP resources."""
748
+ if not self.mcp_client_manager:
749
+ raise HTTPException(status_code=501, detail="MCP client not available")
750
+
751
+ server_name = request.query_params.get("server")
752
+ resources = await self.mcp_client_manager.list_resources(server_name)
753
+
754
+ return JSONResponse(content={
755
+ "resources": [
756
+ {
757
+ "uri": resource.uri,
758
+ "name": resource.name,
759
+ "description": resource.description,
760
+ "mimeType": resource.mimeType,
761
+ "server": resource.metadata.get("server") if resource.metadata else server_name
762
+ }
763
+ for resource in resources
764
+ ]
765
+ })
766
+
767
+ async def handle_mcp_read_resource(self, request: Request):
768
+ """Read an MCP resource."""
769
+ if not self.mcp_client_manager:
770
+ raise HTTPException(status_code=501, detail="MCP client not available")
771
+
772
+ data = await request.json()
773
+ server_name = data.get("server")
774
+ uri = data.get("uri")
775
+
776
+ if not server_name or not uri:
777
+ raise HTTPException(status_code=400, detail="Missing 'server' or 'uri' parameter")
778
+
779
+ try:
780
+ contents = await self.mcp_client_manager.read_resource(server_name, uri)
781
+ return JSONResponse(content={
782
+ "contents": [
783
+ {"text": content.text} if hasattr(content, 'text') else {"data": str(content)}
784
+ for content in contents
785
+ ]
786
+ })
787
+ except Exception as e:
788
+ raise HTTPException(status_code=500, detail=str(e))
789
+
790
+ async def handle_mcp_server(self, request: Request):
791
+ """Handle MCP server requests."""
792
+ if not self.vac_mcp_server:
793
+ raise HTTPException(status_code=501, detail="MCP server not enabled")
794
+
795
+ data = await request.json()
796
+ log.info(f"MCP server received: {data}")
797
+
798
+ # Process MCP request - simplified version
799
+ # Full implementation would handle all MCP protocol methods
800
+ method = data.get("method")
801
+ params = data.get("params", {})
802
+ request_id = data.get("id")
803
+
804
+ try:
805
+ if method == "initialize":
806
+ response = {
807
+ "jsonrpc": "2.0",
808
+ "result": {
809
+ "protocolVersion": "2025-06-18",
810
+ "capabilities": {"tools": {}},
811
+ "serverInfo": {
812
+ "name": "sunholo-vac-server",
813
+ "version": sunholo_version()
814
+ }
815
+ },
816
+ "id": request_id
817
+ }
818
+ elif method == "tools/list":
819
+ tools = [
820
+ {
821
+ "name": "vac_stream",
822
+ "description": "Stream responses from a Sunholo VAC",
823
+ "inputSchema": {
824
+ "type": "object",
825
+ "properties": {
826
+ "vector_name": {"type": "string"},
827
+ "user_input": {"type": "string"},
828
+ "chat_history": {"type": "array", "default": []}
829
+ },
830
+ "required": ["vector_name", "user_input"]
831
+ }
832
+ }
833
+ ]
834
+ if self.vac_interpreter:
835
+ tools.append({
836
+ "name": "vac_query",
837
+ "description": "Query a Sunholo VAC (non-streaming)",
838
+ "inputSchema": {
839
+ "type": "object",
840
+ "properties": {
841
+ "vector_name": {"type": "string"},
842
+ "user_input": {"type": "string"},
843
+ "chat_history": {"type": "array", "default": []}
844
+ },
845
+ "required": ["vector_name", "user_input"]
846
+ }
847
+ })
848
+ response = {
849
+ "jsonrpc": "2.0",
850
+ "result": {"tools": tools},
851
+ "id": request_id
852
+ }
853
+ elif method == "tools/call":
854
+ tool_name = params.get("name")
855
+ arguments = params.get("arguments", {})
856
+
857
+ if tool_name == "vac_stream":
858
+ result = await self.vac_mcp_server._handle_vac_stream(arguments)
859
+ elif tool_name == "vac_query":
860
+ result = await self.vac_mcp_server._handle_vac_query(arguments)
861
+ else:
862
+ raise ValueError(f"Unknown tool: {tool_name}")
863
+
864
+ response = {
865
+ "jsonrpc": "2.0",
866
+ "result": {"content": [item.model_dump() for item in result]},
867
+ "id": request_id
868
+ }
869
+ else:
870
+ raise ValueError(f"Unknown method: {method}")
871
+
872
+ except Exception as e:
873
+ response = {
874
+ "jsonrpc": "2.0",
875
+ "error": {
876
+ "code": -32603,
877
+ "message": str(e)
878
+ },
879
+ "id": request_id
880
+ }
881
+
882
+ return JSONResponse(content=response)
883
+
884
+ async def handle_mcp_server_info(self):
885
+ """Return MCP server information."""
886
+ return JSONResponse(content={
887
+ "name": "sunholo-vac-server",
888
+ "version": "1.0.0",
889
+ "transport": "http",
890
+ "endpoint": "/mcp",
891
+ "tools": ["vac_stream", "vac_query"] if self.vac_interpreter else ["vac_stream"]
892
+ })
893
+
894
+ def _get_or_create_a2a_agent(self, request: Request):
895
+ """Get or create the A2A agent instance with current request context."""
896
+ if not self.enable_a2a_agent or not VACA2AAgent:
897
+ return None
898
+
899
+ if self.vac_a2a_agent is None:
900
+ base_url = str(request.base_url).rstrip('/')
901
+ self.vac_a2a_agent = VACA2AAgent(
902
+ base_url=base_url,
903
+ stream_interpreter=self.stream_interpreter,
904
+ vac_interpreter=self.vac_interpreter,
905
+ vac_names=self.a2a_vac_names
906
+ )
907
+
908
+ return self.vac_a2a_agent
909
+
910
+ async def handle_a2a_agent_card(self, request: Request):
911
+ """Handle A2A agent card discovery request."""
912
+ agent = self._get_or_create_a2a_agent(request)
913
+ if not agent:
914
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
915
+
916
+ return JSONResponse(content=agent.get_agent_card())
917
+
918
+ async def handle_a2a_task_send(self, request: Request):
919
+ """Handle A2A task send request."""
920
+ agent = self._get_or_create_a2a_agent(request)
921
+ if not agent:
922
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
923
+
924
+ try:
925
+ data = await request.json()
926
+ response = await agent.handle_task_send(data)
927
+ return JSONResponse(content=response)
928
+ except Exception as e:
929
+ log.error(f"A2A task send error: {e}")
930
+ return JSONResponse(
931
+ content={
932
+ "jsonrpc": "2.0",
933
+ "error": {
934
+ "code": -32603,
935
+ "message": f"Internal error: {str(e)}"
936
+ },
937
+ "id": data.get("id") if 'data' in locals() else None
938
+ },
939
+ status_code=500
940
+ )
941
+
942
+ async def handle_a2a_task_send_subscribe(self, request: Request):
943
+ """Handle A2A task send with subscription (SSE)."""
944
+ agent = self._get_or_create_a2a_agent(request)
945
+ if not agent:
946
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
947
+
948
+ try:
949
+ data = await request.json()
950
+
951
+ async def sse_generator():
952
+ async for chunk in agent.handle_task_send_subscribe(data):
953
+ yield chunk
954
+
955
+ return StreamingResponse(
956
+ sse_generator(),
957
+ media_type='text/event-stream'
958
+ )
959
+
960
+ except Exception as e:
961
+ log.error(f"A2A task send subscribe error: {e}")
962
+ error_message = str(e)
963
+
964
+ async def error_generator():
965
+ yield f"data: {{\"error\": \"Internal error: {error_message}\"}}\n\n"
966
+
967
+ return StreamingResponse(
968
+ error_generator(),
969
+ media_type='text/event-stream'
970
+ )
971
+
972
+ async def handle_a2a_task_get(self, request: Request):
973
+ """Handle A2A task get request."""
974
+ agent = self._get_or_create_a2a_agent(request)
975
+ if not agent:
976
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
977
+
978
+ try:
979
+ data = await request.json()
980
+ response = await agent.handle_task_get(data)
981
+ return JSONResponse(content=response)
982
+ except Exception as e:
983
+ log.error(f"A2A task get error: {e}")
984
+ return JSONResponse(
985
+ content={
986
+ "jsonrpc": "2.0",
987
+ "error": {
988
+ "code": -32603,
989
+ "message": f"Internal error: {str(e)}"
990
+ },
991
+ "id": data.get("id") if 'data' in locals() else None
992
+ },
993
+ status_code=500
994
+ )
995
+
996
+ async def handle_a2a_task_cancel(self, request: Request):
997
+ """Handle A2A task cancel request."""
998
+ agent = self._get_or_create_a2a_agent(request)
999
+ if not agent:
1000
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
1001
+
1002
+ try:
1003
+ data = await request.json()
1004
+ response = await agent.handle_task_cancel(data)
1005
+ return JSONResponse(content=response)
1006
+ except Exception as e:
1007
+ log.error(f"A2A task cancel error: {e}")
1008
+ return JSONResponse(
1009
+ content={
1010
+ "jsonrpc": "2.0",
1011
+ "error": {
1012
+ "code": -32603,
1013
+ "message": f"Internal error: {str(e)}"
1014
+ },
1015
+ "id": data.get("id") if 'data' in locals() else None
1016
+ },
1017
+ status_code=500
1018
+ )
1019
+
1020
+ async def handle_a2a_push_notification(self, request: Request):
1021
+ """Handle A2A push notification settings."""
1022
+ agent = self._get_or_create_a2a_agent(request)
1023
+ if not agent:
1024
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
1025
+
1026
+ try:
1027
+ data = await request.json()
1028
+ response = await agent.handle_push_notification_set(data)
1029
+ return JSONResponse(content=response)
1030
+ except Exception as e:
1031
+ log.error(f"A2A push notification error: {e}")
1032
+ return JSONResponse(
1033
+ content={
1034
+ "jsonrpc": "2.0",
1035
+ "error": {
1036
+ "code": -32603,
1037
+ "message": f"Internal error: {str(e)}"
1038
+ },
1039
+ "id": data.get("id") if 'data' in locals() else None
1040
+ },
1041
+ status_code=500
1042
+ )