sunholo 0.143.16__py3-none-any.whl → 0.144.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1017 @@
1
+ # Copyright [2024] [Holosun ApS]
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import traceback
19
+ import datetime
20
+ import uuid
21
+ import inspect
22
+ import asyncio
23
+ from typing import Dict, List, Optional, Callable, Any, TYPE_CHECKING
24
+ from functools import partial
25
+
26
+ if TYPE_CHECKING:
27
+ from fastapi import FastAPI, Request, Response, HTTPException
28
+ from fastapi.responses import StreamingResponse, JSONResponse
29
+ from pydantic import BaseModel
30
+
31
+ try:
32
+ from fastapi import FastAPI, Request, Response, HTTPException
33
+ from fastapi.responses import StreamingResponse, JSONResponse
34
+ from pydantic import BaseModel
35
+ FASTAPI_AVAILABLE = True
36
+ except ImportError:
37
+ FastAPI = None
38
+ Request = None
39
+ Response = None
40
+ HTTPException = None
41
+ StreamingResponse = None
42
+ JSONResponse = None
43
+ BaseModel = object
44
+ FASTAPI_AVAILABLE = False
45
+
46
+ from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
47
+ from ...qna.parsers import parse_output
48
+ from ...streaming import start_streaming_chat, start_streaming_chat_async
49
+ from ...archive import archive_qa
50
+ from ...custom_logging import log
51
+ from ...utils import ConfigManager
52
+ from ...utils.version import sunholo_version
53
+
54
+ try:
55
+ from ...mcp.mcp_manager import MCPClientManager
56
+ except ImportError:
57
+ MCPClientManager = None
58
+
59
+ try:
60
+ from ...mcp.vac_mcp_server import VACMCPServer
61
+ except ImportError:
62
+ VACMCPServer = None
63
+
64
+ try:
65
+ from ...a2a.vac_a2a_agent import VACA2AAgent
66
+ except (ImportError, SyntaxError):
67
+ VACA2AAgent = None
68
+
69
+
70
+ class VACRequest(BaseModel):
71
+ """Request model for VAC endpoints."""
72
+ user_input: str
73
+ chat_history: Optional[List] = None
74
+ stream_wait_time: Optional[int] = 7
75
+ stream_timeout: Optional[int] = 120
76
+ vector_name: Optional[str] = None
77
+ trace_id: Optional[str] = None
78
+ eval_percent: Optional[float] = 0.01
79
+
80
+
81
+ class VACRoutesFastAPI:
82
+ """
83
+ FastAPI implementation of VAC routes with streaming support.
84
+
85
+ This class provides a FastAPI-compatible version of the Flask VACRoutes,
86
+ with proper async streaming support using callbacks.
87
+
88
+ Usage Example:
89
+ ```python
90
+ from fastapi import FastAPI
91
+ from sunholo.agents.fastapi import VACRoutesFastAPI
92
+
93
+ app = FastAPI()
94
+
95
+ async def stream_interpreter(question, vector_name, chat_history, callback, **kwargs):
96
+ # Implement your streaming logic with callbacks
97
+ ...
98
+
99
+ async def vac_interpreter(question, vector_name, chat_history, **kwargs):
100
+ # Implement your static VAC logic
101
+ ...
102
+
103
+ vac_routes = VACRoutesFastAPI(
104
+ app,
105
+ stream_interpreter,
106
+ vac_interpreter,
107
+ enable_mcp_server=True
108
+ )
109
+ ```
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ app: FastAPI,
115
+ stream_interpreter: Callable,
116
+ vac_interpreter: Optional[Callable] = None,
117
+ additional_routes: Optional[List[Dict]] = None,
118
+ mcp_servers: Optional[List[Dict[str, Any]]] = None,
119
+ add_langfuse_eval: bool = True,
120
+ enable_mcp_server: bool = False,
121
+ enable_a2a_agent: bool = False,
122
+ a2a_vac_names: Optional[List[str]] = None
123
+ ):
124
+ """
125
+ Initialize FastAPI VAC routes.
126
+
127
+ Args:
128
+ app: FastAPI application instance
129
+ stream_interpreter: Async or sync function for streaming responses
130
+ vac_interpreter: Optional function for non-streaming responses
131
+ additional_routes: List of additional routes to register
132
+ mcp_servers: List of MCP server configurations
133
+ add_langfuse_eval: Whether to add Langfuse evaluation
134
+ enable_mcp_server: Whether to enable MCP server endpoint
135
+ enable_a2a_agent: Whether to enable A2A agent endpoints
136
+ a2a_vac_names: List of VAC names for A2A agent
137
+ """
138
+ self.app = app
139
+ self.stream_interpreter = stream_interpreter
140
+ self.vac_interpreter = vac_interpreter or partial(self.vac_interpreter_default)
141
+
142
+ # Detect if interpreters are async
143
+ self.stream_is_async = inspect.iscoroutinefunction(stream_interpreter)
144
+ self.vac_is_async = inspect.iscoroutinefunction(self.vac_interpreter)
145
+
146
+ # MCP client initialization
147
+ self.mcp_servers = mcp_servers or []
148
+ self.mcp_client_manager = MCPClientManager() if MCPClientManager else None
149
+ self._mcp_initialized = False
150
+
151
+ # MCP server initialization
152
+ self.enable_mcp_server = enable_mcp_server
153
+ self.vac_mcp_server = None
154
+ if self.enable_mcp_server and VACMCPServer:
155
+ self.vac_mcp_server = VACMCPServer(
156
+ stream_interpreter=self.stream_interpreter,
157
+ vac_interpreter=self.vac_interpreter
158
+ )
159
+
160
+ # A2A agent initialization
161
+ self.enable_a2a_agent = enable_a2a_agent
162
+ self.vac_a2a_agent = None
163
+ self.a2a_vac_names = a2a_vac_names
164
+
165
+ self.additional_routes = additional_routes or []
166
+ self.add_langfuse_eval = add_langfuse_eval
167
+
168
+ self.register_routes()
169
+
170
+ async def vac_interpreter_default(self, question: str, vector_name: str, chat_history=None, **kwargs):
171
+ """Default VAC interpreter that uses the stream interpreter without streaming."""
172
+ class NoOpCallback:
173
+ def on_llm_new_token(self, token):
174
+ pass
175
+ def on_llm_end(self, response):
176
+ pass
177
+ async def async_on_llm_new_token(self, token):
178
+ pass
179
+ async def async_on_llm_end(self, response):
180
+ pass
181
+
182
+ callback = NoOpCallback()
183
+
184
+ if self.stream_is_async:
185
+ result = await self.stream_interpreter(
186
+ question=question,
187
+ vector_name=vector_name,
188
+ chat_history=chat_history or [],
189
+ callback=callback,
190
+ **kwargs
191
+ )
192
+ else:
193
+ # Run sync function in executor
194
+ loop = asyncio.get_event_loop()
195
+ result = await loop.run_in_executor(
196
+ None,
197
+ self.stream_interpreter,
198
+ question,
199
+ vector_name,
200
+ chat_history or [],
201
+ callback,
202
+ **kwargs
203
+ )
204
+
205
+ return result
206
+
207
+ def register_routes(self):
208
+ """Register all VAC routes with the FastAPI application."""
209
+ # Basic routes
210
+ self.app.get("/")(self.home)
211
+ self.app.get("/health")(self.health)
212
+
213
+ # Streaming endpoints - both SSE and plain text
214
+ self.app.post("/vac/streaming/{vector_name}")(self.handle_stream_vac)
215
+ self.app.post("/vac/streaming/{vector_name}/sse")(self.handle_stream_vac_sse)
216
+
217
+ # Static VAC endpoint
218
+ self.app.post("/vac/{vector_name}")(self.handle_process_vac)
219
+
220
+ # OpenAI compatible endpoints
221
+ self.app.get("/openai/health")(self.openai_health)
222
+ self.app.post("/openai/health")(self.openai_health)
223
+ self.app.post("/openai/v1/chat/completions")(self.handle_openai_compatible)
224
+ self.app.post("/openai/v1/chat/completions/{vector_name}")(self.handle_openai_compatible)
225
+
226
+ # MCP client routes
227
+ if self.mcp_servers and self.mcp_client_manager:
228
+ self.app.get("/mcp/tools")(self.handle_mcp_list_tools)
229
+ self.app.get("/mcp/tools/{server_name}")(self.handle_mcp_list_tools)
230
+ self.app.post("/mcp/call")(self.handle_mcp_call_tool)
231
+ self.app.get("/mcp/resources")(self.handle_mcp_list_resources)
232
+ self.app.post("/mcp/resources/read")(self.handle_mcp_read_resource)
233
+
234
+ # MCP server endpoint
235
+ if self.enable_mcp_server and self.vac_mcp_server:
236
+ self.app.post("/mcp")(self.handle_mcp_server)
237
+ self.app.get("/mcp")(self.handle_mcp_server_info)
238
+
239
+ # A2A agent endpoints
240
+ if self.enable_a2a_agent:
241
+ self.app.get("/.well-known/agent.json")(self.handle_a2a_agent_card)
242
+ self.app.post("/a2a/tasks/send")(self.handle_a2a_task_send)
243
+ self.app.post("/a2a/tasks/sendSubscribe")(self.handle_a2a_task_send_subscribe)
244
+ self.app.post("/a2a/tasks/get")(self.handle_a2a_task_get)
245
+ self.app.post("/a2a/tasks/cancel")(self.handle_a2a_task_cancel)
246
+ self.app.post("/a2a/tasks/pushNotification/set")(self.handle_a2a_push_notification)
247
+
248
+ # Register additional custom routes
249
+ for route in self.additional_routes:
250
+ self.app.add_api_route(
251
+ route["path"],
252
+ route["handler"],
253
+ methods=route.get("methods", ["GET"])
254
+ )
255
+
256
+ # Register startup event for MCP initialization
257
+ @self.app.on_event("startup")
258
+ async def startup_event():
259
+ if self.mcp_servers and self.mcp_client_manager and not self._mcp_initialized:
260
+ await self._initialize_mcp_servers()
261
+ self._mcp_initialized = True
262
+
263
+ async def home(self):
264
+ """Home endpoint."""
265
+ return JSONResponse(content="OK")
266
+
267
+ async def health(self):
268
+ """Health check endpoint."""
269
+ return JSONResponse(content={"status": "healthy"})
270
+
271
+ async def handle_stream_vac(self, vector_name: str, request: Request):
272
+ """
273
+ Handle streaming VAC requests with plain text response.
274
+ Compatible with Flask implementation.
275
+ """
276
+ data = await request.json()
277
+ vac_request = VACRequest(**data)
278
+
279
+ prep = await self.prep_vac_async(vac_request, vector_name)
280
+ all_input = prep["all_input"]
281
+
282
+ log.info(f'Streaming data with: {all_input}')
283
+
284
+ async def generate_response():
285
+ try:
286
+ if self.stream_is_async:
287
+ # Use async streaming
288
+ async for chunk in start_streaming_chat_async(
289
+ question=all_input["user_input"],
290
+ vector_name=vector_name,
291
+ qna_func_async=self.stream_interpreter,
292
+ chat_history=all_input["chat_history"],
293
+ wait_time=all_input["stream_wait_time"],
294
+ timeout=all_input["stream_timeout"],
295
+ **all_input["kwargs"]
296
+ ):
297
+ if isinstance(chunk, dict) and 'answer' in chunk:
298
+ archive_qa(chunk, vector_name) # This is a sync function, not async
299
+ yield json.dumps(chunk)
300
+ else:
301
+ yield chunk
302
+ else:
303
+ # Run sync streaming in executor
304
+ loop = asyncio.get_event_loop()
305
+
306
+ # Create a queue for passing chunks from sync to async
307
+ queue = asyncio.Queue()
308
+
309
+ def run_sync_streaming():
310
+ for chunk in start_streaming_chat(
311
+ question=all_input["user_input"],
312
+ vector_name=vector_name,
313
+ qna_func=self.stream_interpreter,
314
+ chat_history=all_input["chat_history"],
315
+ wait_time=all_input["stream_wait_time"],
316
+ timeout=all_input["stream_timeout"],
317
+ **all_input["kwargs"]
318
+ ):
319
+ asyncio.run_coroutine_threadsafe(queue.put(chunk), loop)
320
+ asyncio.run_coroutine_threadsafe(queue.put(None), loop)
321
+
322
+ # Run sync function in thread
323
+ await loop.run_in_executor(None, run_sync_streaming)
324
+
325
+ # Yield from queue
326
+ while True:
327
+ chunk = await queue.get()
328
+ if chunk is None:
329
+ break
330
+ if isinstance(chunk, dict) and 'answer' in chunk:
331
+ archive_qa(chunk, vector_name) # This is a sync function, not async
332
+ yield json.dumps(chunk)
333
+ else:
334
+ yield chunk
335
+
336
+ except Exception as e:
337
+ yield f"Streaming Error: {str(e)} {traceback.format_exc()}"
338
+
339
+ return StreamingResponse(
340
+ generate_response(),
341
+ media_type='text/plain; charset=utf-8',
342
+ headers={
343
+ 'Transfer-Encoding': 'chunked',
344
+ 'Cache-Control': 'no-cache',
345
+ 'Connection': 'keep-alive'
346
+ }
347
+ )
348
+
349
+ async def handle_stream_vac_sse(self, vector_name: str, request: Request):
350
+ """
351
+ Handle streaming VAC requests with Server-Sent Events format.
352
+ Better for browser-based clients.
353
+ """
354
+ data = await request.json()
355
+ vac_request = VACRequest(**data)
356
+
357
+ prep = await self.prep_vac_async(vac_request, vector_name)
358
+ all_input = prep["all_input"]
359
+
360
+ log.info(f'SSE Streaming data with: {all_input}')
361
+
362
+ async def generate_sse():
363
+ try:
364
+ if self.stream_is_async:
365
+ log.info(f"Starting async streaming for {vector_name}")
366
+ async for chunk in start_streaming_chat_async(
367
+ question=all_input["user_input"],
368
+ vector_name=vector_name,
369
+ qna_func_async=self.stream_interpreter,
370
+ chat_history=all_input["chat_history"],
371
+ wait_time=all_input["stream_wait_time"],
372
+ timeout=all_input["stream_timeout"],
373
+ **all_input["kwargs"]
374
+ ):
375
+ log.info(f"Got chunk from start_streaming_chat_async: type={type(chunk)}, is_dict={isinstance(chunk, dict)}, has_answer={'answer' in chunk if isinstance(chunk, dict) else 'N/A'}")
376
+ if isinstance(chunk, dict) and 'answer' in chunk:
377
+ # This is the final response with answer and sources
378
+ log.info(f"Final response received: {list(chunk.keys())}")
379
+ archive_qa(chunk, vector_name) # This is a sync function, not async
380
+ # Send the complete response with sources
381
+ final_data = f"data: {json.dumps(chunk)}\n\n"
382
+ log.info(f"Yielding final response: {final_data[:100]}...")
383
+ yield final_data
384
+ # Then send the completion signal
385
+ done_signal = "data: [DONE]\n\n"
386
+ log.info("Yielding [DONE] signal")
387
+ yield done_signal
388
+ log.info("Sent [DONE] signal, breaking loop")
389
+ break # Exit after sending final response
390
+ elif chunk: # Only send non-empty chunks
391
+ # This is a streaming text chunk
392
+ yield f"data: {json.dumps({'chunk': chunk})}\n\n"
393
+ log.info("SSE generator completed")
394
+ else:
395
+ # Handle sync interpreter - similar to above
396
+ loop = asyncio.get_event_loop()
397
+ queue = asyncio.Queue()
398
+
399
+ def run_sync_streaming():
400
+ for chunk in start_streaming_chat(
401
+ question=all_input["user_input"],
402
+ vector_name=vector_name,
403
+ qna_func=self.stream_interpreter,
404
+ chat_history=all_input["chat_history"],
405
+ wait_time=all_input["stream_wait_time"],
406
+ timeout=all_input["stream_timeout"],
407
+ **all_input["kwargs"]
408
+ ):
409
+ asyncio.run_coroutine_threadsafe(queue.put(chunk), loop)
410
+ asyncio.run_coroutine_threadsafe(queue.put(None), loop)
411
+
412
+ await loop.run_in_executor(None, run_sync_streaming)
413
+
414
+ while True:
415
+ chunk = await queue.get()
416
+ if chunk is None:
417
+ break
418
+ if isinstance(chunk, dict) and 'answer' in chunk:
419
+ # This is the final response with answer and sources
420
+ archive_qa(chunk, vector_name) # This is a sync function, not async
421
+ # Send the complete response with sources
422
+ yield f"data: {json.dumps(chunk)}\n\n"
423
+ # Then send the completion signal
424
+ yield "data: [DONE]\n\n"
425
+ break # Exit after sending final response
426
+ elif chunk: # Only send non-empty chunks
427
+ # This is a streaming text chunk
428
+ yield f"data: {json.dumps({'chunk': chunk})}\n\n"
429
+
430
+ except Exception as e:
431
+ import traceback
432
+ log.error(f"Error in SSE generator: {e}\n{traceback.format_exc()}")
433
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
434
+
435
+ return StreamingResponse(
436
+ generate_sse(),
437
+ media_type='text/event-stream',
438
+ headers={
439
+ 'Cache-Control': 'no-cache',
440
+ 'Connection': 'keep-alive',
441
+ 'X-Accel-Buffering': 'no'
442
+ }
443
+ )
444
+
445
+ async def handle_process_vac(self, vector_name: str, request: Request):
446
+ """Handle non-streaming VAC requests."""
447
+ data = await request.json()
448
+ vac_request = VACRequest(**data)
449
+
450
+ prep = await self.prep_vac_async(vac_request, vector_name)
451
+ all_input = prep["all_input"]
452
+
453
+ try:
454
+ if self.vac_is_async:
455
+ bot_output = await self.vac_interpreter(
456
+ question=all_input["user_input"],
457
+ vector_name=vector_name,
458
+ chat_history=all_input["chat_history"],
459
+ **all_input["kwargs"]
460
+ )
461
+ else:
462
+ # Run sync function in executor
463
+ loop = asyncio.get_event_loop()
464
+ bot_output = await loop.run_in_executor(
465
+ None,
466
+ self.vac_interpreter,
467
+ all_input["user_input"],
468
+ vector_name,
469
+ all_input["chat_history"],
470
+ **all_input["kwargs"]
471
+ )
472
+
473
+ bot_output = parse_output(bot_output)
474
+ archive_qa(bot_output, vector_name) # This is a sync function, not async
475
+ log.info(f'==LLM Q:{all_input["user_input"]} - A:{bot_output}')
476
+
477
+ except Exception as err:
478
+ bot_output = {
479
+ 'answer': f'QNA_ERROR: An error occurred while processing /vac/{vector_name}: {str(err)} traceback: {traceback.format_exc()}'
480
+ }
481
+
482
+ return JSONResponse(content=bot_output)
483
+
484
+ async def prep_vac_async(self, vac_request: VACRequest, vector_name: str):
485
+ """Prepare VAC request data asynchronously."""
486
+ try:
487
+ vac_config = ConfigManager(vector_name)
488
+ except Exception as e:
489
+ raise ValueError(f"Unable to find vac_config for {vector_name} - {str(e)}")
490
+
491
+ # Extract chat history
492
+ paired_messages = await extract_chat_history_async_cached(vac_request.chat_history)
493
+
494
+ all_input = {
495
+ 'user_input': vac_request.user_input.strip(),
496
+ 'vector_name': vac_request.vector_name or vector_name,
497
+ 'chat_history': paired_messages,
498
+ 'stream_wait_time': vac_request.stream_wait_time,
499
+ 'stream_timeout': vac_request.stream_timeout,
500
+ 'eval_percent': vac_request.eval_percent,
501
+ 'kwargs': {}
502
+ }
503
+
504
+ return {
505
+ "all_input": all_input,
506
+ "vac_config": vac_config
507
+ }
508
+
509
+ async def openai_health(self):
510
+ """OpenAI health check endpoint."""
511
+ return JSONResponse(content={'message': 'Success'})
512
+
513
+ async def handle_openai_compatible(self, request: Request, vector_name: Optional[str] = None):
514
+ """Handle OpenAI-compatible chat completion requests."""
515
+ data = await request.json()
516
+ log.info(f'OpenAI compatible endpoint got data: {data} for vector: {vector_name}')
517
+
518
+ vector_name = vector_name or data.pop('model', None)
519
+ messages = data.pop('messages', None)
520
+ chat_history = data.pop('chat_history', None)
521
+ stream = data.pop('stream', False)
522
+
523
+ if not messages:
524
+ return JSONResponse(content={"error": "No messages provided"}, status_code=400)
525
+
526
+ # Extract user message
527
+ user_message = None
528
+ for msg in reversed(messages):
529
+ if msg['role'] == 'user':
530
+ if isinstance(msg['content'], list):
531
+ for content_item in msg['content']:
532
+ if content_item['type'] == 'text':
533
+ user_message = content_item['text']
534
+ break
535
+ else:
536
+ user_message = msg['content']
537
+ break
538
+
539
+ if not user_message:
540
+ return JSONResponse(content={"error": "No user message provided"}, status_code=400)
541
+
542
+ response_id = str(uuid.uuid4())
543
+
544
+ if stream:
545
+ async def generate_openai_stream():
546
+ if self.stream_is_async:
547
+ async for chunk in start_streaming_chat_async(
548
+ question=user_message,
549
+ vector_name=vector_name,
550
+ qna_func_async=self.stream_interpreter,
551
+ chat_history=chat_history or [],
552
+ wait_time=data.get("stream_wait_time", 1),
553
+ timeout=data.get("stream_timeout", 60),
554
+ **data
555
+ ):
556
+ if isinstance(chunk, dict) and 'answer' in chunk:
557
+ openai_chunk = {
558
+ "id": response_id,
559
+ "object": "chat.completion.chunk",
560
+ "created": int(datetime.datetime.now().timestamp()),
561
+ "model": vector_name,
562
+ "system_fingerprint": sunholo_version(),
563
+ "choices": [{
564
+ "index": 0,
565
+ "delta": {"content": chunk['answer']},
566
+ "logprobs": None,
567
+ "finish_reason": None
568
+ }]
569
+ }
570
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
571
+ else:
572
+ # Stream partial content
573
+ openai_chunk = {
574
+ "id": response_id,
575
+ "object": "chat.completion.chunk",
576
+ "created": int(datetime.datetime.now().timestamp()),
577
+ "model": vector_name,
578
+ "choices": [{
579
+ "index": 0,
580
+ "delta": {"content": chunk},
581
+ "finish_reason": None
582
+ }]
583
+ }
584
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
585
+
586
+ # Send final chunk
587
+ final_chunk = {
588
+ "id": response_id,
589
+ "object": "chat.completion.chunk",
590
+ "created": int(datetime.datetime.now().timestamp()),
591
+ "model": vector_name,
592
+ "choices": [{
593
+ "index": 0,
594
+ "delta": {},
595
+ "finish_reason": "stop"
596
+ }]
597
+ }
598
+ yield f"data: {json.dumps(final_chunk)}\n\n"
599
+ yield "data: [DONE]\n\n"
600
+
601
+ return StreamingResponse(
602
+ generate_openai_stream(),
603
+ media_type='text/event-stream'
604
+ )
605
+ else:
606
+ # Non-streaming response
607
+ try:
608
+ if self.vac_is_async:
609
+ bot_output = await self.vac_interpreter(
610
+ question=user_message,
611
+ vector_name=vector_name,
612
+ chat_history=chat_history or [],
613
+ **data
614
+ )
615
+ else:
616
+ loop = asyncio.get_event_loop()
617
+ bot_output = await loop.run_in_executor(
618
+ None,
619
+ self.vac_interpreter,
620
+ user_message,
621
+ vector_name,
622
+ chat_history or [],
623
+ **data
624
+ )
625
+
626
+ bot_output = parse_output(bot_output)
627
+ answer = bot_output.get('answer', '')
628
+
629
+ openai_response = {
630
+ "id": response_id,
631
+ "object": "chat.completion",
632
+ "created": int(datetime.datetime.now().timestamp()),
633
+ "model": vector_name,
634
+ "system_fingerprint": sunholo_version(),
635
+ "choices": [{
636
+ "index": 0,
637
+ "message": {
638
+ "role": "assistant",
639
+ "content": answer,
640
+ },
641
+ "logprobs": None,
642
+ "finish_reason": "stop"
643
+ }],
644
+ "usage": {
645
+ "prompt_tokens": len(user_message.split()),
646
+ "completion_tokens": len(answer.split()),
647
+ "total_tokens": len(user_message.split()) + len(answer.split())
648
+ }
649
+ }
650
+
651
+ return JSONResponse(content=openai_response)
652
+
653
+ except Exception as err:
654
+ log.error(f"OpenAI response error: {str(err)} traceback: {traceback.format_exc()}")
655
+ return JSONResponse(
656
+ content={"error": f"ERROR: {str(err)}"},
657
+ status_code=500
658
+ )
659
+
660
+ async def _initialize_mcp_servers(self):
661
+ """Initialize connections to configured MCP servers."""
662
+ for server_config in self.mcp_servers:
663
+ try:
664
+ await self.mcp_client_manager.connect_to_server(
665
+ server_name=server_config["name"],
666
+ command=server_config["command"],
667
+ args=server_config.get("args", [])
668
+ )
669
+ log.info(f"Connected to MCP server: {server_config['name']}")
670
+ except Exception as e:
671
+ log.error(f"Failed to connect to MCP server {server_config['name']}: {e}")
672
+
673
+ async def handle_mcp_list_tools(self, server_name: Optional[str] = None):
674
+ """List available MCP tools."""
675
+ if not self.mcp_client_manager:
676
+ raise HTTPException(status_code=501, detail="MCP client not available")
677
+
678
+ tools = await self.mcp_client_manager.list_tools(server_name)
679
+ return JSONResponse(content={
680
+ "tools": [
681
+ {
682
+ "name": tool.name,
683
+ "description": tool.description,
684
+ "inputSchema": tool.inputSchema,
685
+ "server": tool.metadata.get("server") if tool.metadata else server_name
686
+ }
687
+ for tool in tools
688
+ ]
689
+ })
690
+
691
+ async def handle_mcp_call_tool(self, request: Request):
692
+ """Call an MCP tool."""
693
+ if not self.mcp_client_manager:
694
+ raise HTTPException(status_code=501, detail="MCP client not available")
695
+
696
+ data = await request.json()
697
+ server_name = data.get("server")
698
+ tool_name = data.get("tool")
699
+ arguments = data.get("arguments", {})
700
+
701
+ if not server_name or not tool_name:
702
+ raise HTTPException(status_code=400, detail="Missing 'server' or 'tool' parameter")
703
+
704
+ try:
705
+ result = await self.mcp_client_manager.call_tool(server_name, tool_name, arguments)
706
+
707
+ # Convert result to JSON-serializable format
708
+ if hasattr(result, 'content'):
709
+ if hasattr(result.content, 'text'):
710
+ return JSONResponse(content={"result": result.content.text})
711
+ elif hasattr(result.content, 'data'):
712
+ return JSONResponse(content={"result": result.content.data})
713
+ else:
714
+ return JSONResponse(content={"result": str(result.content)})
715
+ else:
716
+ return JSONResponse(content={"result": str(result)})
717
+
718
+ except Exception as e:
719
+ raise HTTPException(status_code=500, detail=str(e))
720
+
721
+ async def handle_mcp_list_resources(self, request: Request):
722
+ """List available MCP resources."""
723
+ if not self.mcp_client_manager:
724
+ raise HTTPException(status_code=501, detail="MCP client not available")
725
+
726
+ server_name = request.query_params.get("server")
727
+ resources = await self.mcp_client_manager.list_resources(server_name)
728
+
729
+ return JSONResponse(content={
730
+ "resources": [
731
+ {
732
+ "uri": resource.uri,
733
+ "name": resource.name,
734
+ "description": resource.description,
735
+ "mimeType": resource.mimeType,
736
+ "server": resource.metadata.get("server") if resource.metadata else server_name
737
+ }
738
+ for resource in resources
739
+ ]
740
+ })
741
+
742
+ async def handle_mcp_read_resource(self, request: Request):
743
+ """Read an MCP resource."""
744
+ if not self.mcp_client_manager:
745
+ raise HTTPException(status_code=501, detail="MCP client not available")
746
+
747
+ data = await request.json()
748
+ server_name = data.get("server")
749
+ uri = data.get("uri")
750
+
751
+ if not server_name or not uri:
752
+ raise HTTPException(status_code=400, detail="Missing 'server' or 'uri' parameter")
753
+
754
+ try:
755
+ contents = await self.mcp_client_manager.read_resource(server_name, uri)
756
+ return JSONResponse(content={
757
+ "contents": [
758
+ {"text": content.text} if hasattr(content, 'text') else {"data": str(content)}
759
+ for content in contents
760
+ ]
761
+ })
762
+ except Exception as e:
763
+ raise HTTPException(status_code=500, detail=str(e))
764
+
765
+ async def handle_mcp_server(self, request: Request):
766
+ """Handle MCP server requests."""
767
+ if not self.vac_mcp_server:
768
+ raise HTTPException(status_code=501, detail="MCP server not enabled")
769
+
770
+ data = await request.json()
771
+ log.info(f"MCP server received: {data}")
772
+
773
+ # Process MCP request - simplified version
774
+ # Full implementation would handle all MCP protocol methods
775
+ method = data.get("method")
776
+ params = data.get("params", {})
777
+ request_id = data.get("id")
778
+
779
+ try:
780
+ if method == "initialize":
781
+ response = {
782
+ "jsonrpc": "2.0",
783
+ "result": {
784
+ "protocolVersion": "2025-06-18",
785
+ "capabilities": {"tools": {}},
786
+ "serverInfo": {
787
+ "name": "sunholo-vac-server",
788
+ "version": sunholo_version()
789
+ }
790
+ },
791
+ "id": request_id
792
+ }
793
+ elif method == "tools/list":
794
+ tools = [
795
+ {
796
+ "name": "vac_stream",
797
+ "description": "Stream responses from a Sunholo VAC",
798
+ "inputSchema": {
799
+ "type": "object",
800
+ "properties": {
801
+ "vector_name": {"type": "string"},
802
+ "user_input": {"type": "string"},
803
+ "chat_history": {"type": "array", "default": []}
804
+ },
805
+ "required": ["vector_name", "user_input"]
806
+ }
807
+ }
808
+ ]
809
+ if self.vac_interpreter:
810
+ tools.append({
811
+ "name": "vac_query",
812
+ "description": "Query a Sunholo VAC (non-streaming)",
813
+ "inputSchema": {
814
+ "type": "object",
815
+ "properties": {
816
+ "vector_name": {"type": "string"},
817
+ "user_input": {"type": "string"},
818
+ "chat_history": {"type": "array", "default": []}
819
+ },
820
+ "required": ["vector_name", "user_input"]
821
+ }
822
+ })
823
+ response = {
824
+ "jsonrpc": "2.0",
825
+ "result": {"tools": tools},
826
+ "id": request_id
827
+ }
828
+ elif method == "tools/call":
829
+ tool_name = params.get("name")
830
+ arguments = params.get("arguments", {})
831
+
832
+ if tool_name == "vac_stream":
833
+ result = await self.vac_mcp_server._handle_vac_stream(arguments)
834
+ elif tool_name == "vac_query":
835
+ result = await self.vac_mcp_server._handle_vac_query(arguments)
836
+ else:
837
+ raise ValueError(f"Unknown tool: {tool_name}")
838
+
839
+ response = {
840
+ "jsonrpc": "2.0",
841
+ "result": {"content": [item.model_dump() for item in result]},
842
+ "id": request_id
843
+ }
844
+ else:
845
+ raise ValueError(f"Unknown method: {method}")
846
+
847
+ except Exception as e:
848
+ response = {
849
+ "jsonrpc": "2.0",
850
+ "error": {
851
+ "code": -32603,
852
+ "message": str(e)
853
+ },
854
+ "id": request_id
855
+ }
856
+
857
+ return JSONResponse(content=response)
858
+
859
+ async def handle_mcp_server_info(self):
860
+ """Return MCP server information."""
861
+ return JSONResponse(content={
862
+ "name": "sunholo-vac-server",
863
+ "version": "1.0.0",
864
+ "transport": "http",
865
+ "endpoint": "/mcp",
866
+ "tools": ["vac_stream", "vac_query"] if self.vac_interpreter else ["vac_stream"]
867
+ })
868
+
869
+ def _get_or_create_a2a_agent(self, request: Request):
870
+ """Get or create the A2A agent instance with current request context."""
871
+ if not self.enable_a2a_agent or not VACA2AAgent:
872
+ return None
873
+
874
+ if self.vac_a2a_agent is None:
875
+ base_url = str(request.base_url).rstrip('/')
876
+ self.vac_a2a_agent = VACA2AAgent(
877
+ base_url=base_url,
878
+ stream_interpreter=self.stream_interpreter,
879
+ vac_interpreter=self.vac_interpreter,
880
+ vac_names=self.a2a_vac_names
881
+ )
882
+
883
+ return self.vac_a2a_agent
884
+
885
+ async def handle_a2a_agent_card(self, request: Request):
886
+ """Handle A2A agent card discovery request."""
887
+ agent = self._get_or_create_a2a_agent(request)
888
+ if not agent:
889
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
890
+
891
+ return JSONResponse(content=agent.get_agent_card())
892
+
893
+ async def handle_a2a_task_send(self, request: Request):
894
+ """Handle A2A task send request."""
895
+ agent = self._get_or_create_a2a_agent(request)
896
+ if not agent:
897
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
898
+
899
+ try:
900
+ data = await request.json()
901
+ response = await agent.handle_task_send(data)
902
+ return JSONResponse(content=response)
903
+ except Exception as e:
904
+ log.error(f"A2A task send error: {e}")
905
+ return JSONResponse(
906
+ content={
907
+ "jsonrpc": "2.0",
908
+ "error": {
909
+ "code": -32603,
910
+ "message": f"Internal error: {str(e)}"
911
+ },
912
+ "id": data.get("id") if 'data' in locals() else None
913
+ },
914
+ status_code=500
915
+ )
916
+
917
+ async def handle_a2a_task_send_subscribe(self, request: Request):
918
+ """Handle A2A task send with subscription (SSE)."""
919
+ agent = self._get_or_create_a2a_agent(request)
920
+ if not agent:
921
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
922
+
923
+ try:
924
+ data = await request.json()
925
+
926
+ async def sse_generator():
927
+ async for chunk in agent.handle_task_send_subscribe(data):
928
+ yield chunk
929
+
930
+ return StreamingResponse(
931
+ sse_generator(),
932
+ media_type='text/event-stream'
933
+ )
934
+
935
+ except Exception as e:
936
+ log.error(f"A2A task send subscribe error: {e}")
937
+ error_message = str(e)
938
+
939
+ async def error_generator():
940
+ yield f"data: {{\"error\": \"Internal error: {error_message}\"}}\n\n"
941
+
942
+ return StreamingResponse(
943
+ error_generator(),
944
+ media_type='text/event-stream'
945
+ )
946
+
947
+ async def handle_a2a_task_get(self, request: Request):
948
+ """Handle A2A task get request."""
949
+ agent = self._get_or_create_a2a_agent(request)
950
+ if not agent:
951
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
952
+
953
+ try:
954
+ data = await request.json()
955
+ response = await agent.handle_task_get(data)
956
+ return JSONResponse(content=response)
957
+ except Exception as e:
958
+ log.error(f"A2A task get error: {e}")
959
+ return JSONResponse(
960
+ content={
961
+ "jsonrpc": "2.0",
962
+ "error": {
963
+ "code": -32603,
964
+ "message": f"Internal error: {str(e)}"
965
+ },
966
+ "id": data.get("id") if 'data' in locals() else None
967
+ },
968
+ status_code=500
969
+ )
970
+
971
+ async def handle_a2a_task_cancel(self, request: Request):
972
+ """Handle A2A task cancel request."""
973
+ agent = self._get_or_create_a2a_agent(request)
974
+ if not agent:
975
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
976
+
977
+ try:
978
+ data = await request.json()
979
+ response = await agent.handle_task_cancel(data)
980
+ return JSONResponse(content=response)
981
+ except Exception as e:
982
+ log.error(f"A2A task cancel error: {e}")
983
+ return JSONResponse(
984
+ content={
985
+ "jsonrpc": "2.0",
986
+ "error": {
987
+ "code": -32603,
988
+ "message": f"Internal error: {str(e)}"
989
+ },
990
+ "id": data.get("id") if 'data' in locals() else None
991
+ },
992
+ status_code=500
993
+ )
994
+
995
+ async def handle_a2a_push_notification(self, request: Request):
996
+ """Handle A2A push notification settings."""
997
+ agent = self._get_or_create_a2a_agent(request)
998
+ if not agent:
999
+ raise HTTPException(status_code=501, detail="A2A agent not enabled")
1000
+
1001
+ try:
1002
+ data = await request.json()
1003
+ response = await agent.handle_push_notification_set(data)
1004
+ return JSONResponse(content=response)
1005
+ except Exception as e:
1006
+ log.error(f"A2A push notification error: {e}")
1007
+ return JSONResponse(
1008
+ content={
1009
+ "jsonrpc": "2.0",
1010
+ "error": {
1011
+ "code": -32603,
1012
+ "message": f"Internal error: {str(e)}"
1013
+ },
1014
+ "id": data.get("id") if 'data' in locals() else None
1015
+ },
1016
+ status_code=500
1017
+ )