sunholo 0.139.1__py3-none-any.whl → 0.140.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/agents/__init__.py +1 -1
- sunholo/agents/chat_history.py +63 -0
- sunholo/agents/flask/__init__.py +0 -1
- sunholo/agents/flask/vac_routes.py +118 -7
- {sunholo-0.139.1.dist-info → sunholo-0.140.4.dist-info}/METADATA +1 -1
- {sunholo-0.139.1.dist-info → sunholo-0.140.4.dist-info}/RECORD +10 -11
- {sunholo-0.139.1.dist-info → sunholo-0.140.4.dist-info}/WHEEL +1 -1
- sunholo/agents/flask/qna_routes.py +0 -604
- {sunholo-0.139.1.dist-info → sunholo-0.140.4.dist-info}/entry_points.txt +0 -0
- {sunholo-0.139.1.dist-info → sunholo-0.140.4.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.139.1.dist-info → sunholo-0.140.4.dist-info}/top_level.txt +0 -0
sunholo/agents/__init__.py
CHANGED
@@ -2,6 +2,6 @@ from .chat_history import extract_chat_history
|
|
2
2
|
from .dispatch_to_qa import send_to_qa, send_to_qa_async
|
3
3
|
from .pubsub import process_pubsub
|
4
4
|
from .special_commands import handle_special_commands, app_to_store, handle_files
|
5
|
-
from .flask import
|
5
|
+
from .flask import create_app, VACRoutes
|
6
6
|
from .fastapi import register_qna_fastapi_routes, create_fastapi_app
|
7
7
|
from .swagger import config_to_swagger
|
sunholo/agents/chat_history.py
CHANGED
@@ -1,6 +1,69 @@
|
|
1
1
|
import json
|
2
2
|
from ..custom_logging import log
|
3
3
|
|
4
|
+
|
5
|
+
async def extract_chat_history_async(chat_history=None):
|
6
|
+
"""
|
7
|
+
Extracts paired chat history between human and AI messages.
|
8
|
+
|
9
|
+
For this lightweight processing, we use a simpler approach that minimizes overhead.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
chat_history (list): List of chat messages.
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
list: List of tuples with paired human and AI messages.
|
16
|
+
"""
|
17
|
+
if not chat_history:
|
18
|
+
log.info("No chat history found")
|
19
|
+
return []
|
20
|
+
|
21
|
+
log.info(f"Extracting chat history: {chat_history}")
|
22
|
+
paired_messages = []
|
23
|
+
|
24
|
+
# Handle special case of initial bot message
|
25
|
+
if chat_history and is_bot(chat_history[0]):
|
26
|
+
first_message = chat_history[0]
|
27
|
+
log.info(f"Extracting first_message: {first_message}")
|
28
|
+
blank_human_message = {"name": "Human", "content": "", "embeds": []}
|
29
|
+
|
30
|
+
# Since create_message_element is so lightweight, we don't need async here
|
31
|
+
blank_element = create_message_element(blank_human_message)
|
32
|
+
bot_element = create_message_element(first_message)
|
33
|
+
|
34
|
+
paired_messages.append((blank_element, bot_element))
|
35
|
+
chat_history = chat_history[1:]
|
36
|
+
|
37
|
+
# Pre-process all messages in one batch (more efficient than one-by-one)
|
38
|
+
message_types = []
|
39
|
+
message_contents = []
|
40
|
+
|
41
|
+
for message in chat_history:
|
42
|
+
is_human_msg = is_human(message)
|
43
|
+
is_bot_msg = is_bot(message)
|
44
|
+
|
45
|
+
# Extract content for all messages at once
|
46
|
+
content = create_message_element(message)
|
47
|
+
|
48
|
+
message_types.append((is_human_msg, is_bot_msg))
|
49
|
+
message_contents.append(content)
|
50
|
+
|
51
|
+
# Pair messages efficiently
|
52
|
+
last_human_message = ""
|
53
|
+
for i, ((is_human_msg, is_bot_msg), content) in enumerate(zip(message_types, message_contents)):
|
54
|
+
if is_human_msg:
|
55
|
+
last_human_message = content
|
56
|
+
log.info(f"Extracted human message: {last_human_message}")
|
57
|
+
elif is_bot_msg:
|
58
|
+
ai_message = content
|
59
|
+
log.info(f"Extracted AI message: {ai_message}")
|
60
|
+
paired_messages.append((last_human_message, ai_message))
|
61
|
+
last_human_message = ""
|
62
|
+
|
63
|
+
log.info(f"Paired messages: {paired_messages}")
|
64
|
+
return paired_messages
|
65
|
+
|
66
|
+
|
4
67
|
def extract_chat_history(chat_history=None):
|
5
68
|
"""
|
6
69
|
Extracts paired chat history between human and AI messages.
|
sunholo/agents/flask/__init__.py
CHANGED
@@ -8,6 +8,7 @@ import inspect
|
|
8
8
|
import asyncio
|
9
9
|
|
10
10
|
from ...agents import extract_chat_history, handle_special_commands
|
11
|
+
from ..chat_history import extract_chat_history_async
|
11
12
|
from ...qna.parsers import parse_output
|
12
13
|
from ...streaming import start_streaming_chat, start_streaming_chat_async
|
13
14
|
from ...archive import archive_qa
|
@@ -57,11 +58,12 @@ if __name__ == "__main__":
|
|
57
58
|
```
|
58
59
|
|
59
60
|
"""
|
60
|
-
def __init__(self, app, stream_interpreter, vac_interpreter=None, additional_routes=None):
|
61
|
+
def __init__(self, app, stream_interpreter: callable, vac_interpreter:callable=None, additional_routes:dict=None, async_stream:bool=False):
|
61
62
|
self.app = app
|
62
63
|
self.stream_interpreter = stream_interpreter
|
63
64
|
self.vac_interpreter = vac_interpreter or partial(self.vac_interpreter_default)
|
64
65
|
self.additional_routes = additional_routes if additional_routes is not None else []
|
66
|
+
self.async_stream = async_stream
|
65
67
|
self.register_routes()
|
66
68
|
|
67
69
|
|
@@ -94,12 +96,16 @@ if __name__ == "__main__":
|
|
94
96
|
# Basic routes
|
95
97
|
self.app.route("/", methods=['GET'])(self.home)
|
96
98
|
self.app.route("/health", methods=['GET'])(self.health)
|
97
|
-
|
98
|
-
#
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
99
|
+
|
100
|
+
if self.async_stream: # Use async treatment
|
101
|
+
log.info("async_stream enabled")
|
102
|
+
self.app.route('/vac/streaming/<vector_name>',
|
103
|
+
methods=['POST'],
|
104
|
+
provide_automatic_options=False)(self.handle_stream_vac_async)
|
105
|
+
else:
|
106
|
+
self.app.route('/vac/streaming/<vector_name>',
|
107
|
+
methods=['POST'],
|
108
|
+
provide_automatic_options=False)(self.handle_stream_vac)
|
103
109
|
# Static VAC
|
104
110
|
self.app.route('/vac/<vector_name>',
|
105
111
|
methods=['POST'],
|
@@ -332,6 +338,51 @@ if __name__ == "__main__":
|
|
332
338
|
|
333
339
|
return response
|
334
340
|
|
341
|
+
async def handle_stream_vac_async(self, vector_name):
|
342
|
+
observed_stream_interpreter = self.stream_interpreter
|
343
|
+
is_async = inspect.iscoroutinefunction(self.stream_interpreter)
|
344
|
+
|
345
|
+
if not is_async:
|
346
|
+
raise ValueError(f"Stream interpreter must be async: {observed_stream_interpreter}")
|
347
|
+
|
348
|
+
# Use the async version of prep_vac
|
349
|
+
prep = await self.prep_vac_async(request, vector_name)
|
350
|
+
log.info(f"Processing async prep: {prep}")
|
351
|
+
all_input = prep["all_input"]
|
352
|
+
|
353
|
+
log.info(f'Streaming async data with: {all_input}')
|
354
|
+
|
355
|
+
async def generate_response_content():
|
356
|
+
try:
|
357
|
+
# Direct async handling without the queue/thread approach
|
358
|
+
async_gen = start_streaming_chat_async(
|
359
|
+
question=all_input["user_input"],
|
360
|
+
vector_name=vector_name,
|
361
|
+
qna_func_async=observed_stream_interpreter,
|
362
|
+
chat_history=all_input["chat_history"],
|
363
|
+
wait_time=all_input["stream_wait_time"],
|
364
|
+
timeout=all_input["stream_timeout"],
|
365
|
+
**all_input["kwargs"]
|
366
|
+
)
|
367
|
+
|
368
|
+
log.info(f"{async_gen=}")
|
369
|
+
async for chunk in async_gen:
|
370
|
+
if isinstance(chunk, dict) and 'answer' in chunk:
|
371
|
+
await archive_qa(chunk, vector_name)
|
372
|
+
yield json.dumps(chunk)
|
373
|
+
else:
|
374
|
+
yield chunk
|
375
|
+
|
376
|
+
except Exception as e:
|
377
|
+
yield f"Streaming async Error: {str(e)} {traceback.format_exc()}"
|
378
|
+
|
379
|
+
response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
|
380
|
+
response.headers['Transfer-Encoding'] = 'chunked'
|
381
|
+
|
382
|
+
log.debug(f"streaming async response: {response}")
|
383
|
+
|
384
|
+
return response
|
385
|
+
|
335
386
|
@staticmethod
|
336
387
|
async def _async_generator_to_stream(async_gen_func):
|
337
388
|
"""Helper function to stream the async generator's values to the client."""
|
@@ -699,6 +750,66 @@ if __name__ == "__main__":
|
|
699
750
|
"vac_config": vac_config
|
700
751
|
}
|
701
752
|
|
753
|
+
async def prep_vac_async(self, request, vector_name):
|
754
|
+
"""Async version of prep_vac."""
|
755
|
+
# Parse request data
|
756
|
+
if request.content_type.startswith('application/json'):
|
757
|
+
data = request.get_json()
|
758
|
+
elif request.content_type.startswith('multipart/form-data'):
|
759
|
+
data = request.form.to_dict()
|
760
|
+
if 'file' in request.files:
|
761
|
+
file = request.files['file']
|
762
|
+
if file.filename != '':
|
763
|
+
log.info(f"Found file: {file.filename} to upload to GCS")
|
764
|
+
try:
|
765
|
+
# Make file upload async if possible
|
766
|
+
image_uri, mime_type = await self.handle_file_upload_async(file, vector_name)
|
767
|
+
data["image_uri"] = image_uri
|
768
|
+
data["mime"] = mime_type
|
769
|
+
except Exception as e:
|
770
|
+
log.error(traceback.format_exc())
|
771
|
+
return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
|
772
|
+
else:
|
773
|
+
log.error("No file selected")
|
774
|
+
return jsonify({"error": "No file selected"}), 400
|
775
|
+
else:
|
776
|
+
return jsonify({"error": "Unsupported content type"}), 400
|
777
|
+
|
778
|
+
log.info(f"vac/{vector_name} got data: {data}")
|
779
|
+
|
780
|
+
# Run these operations concurrently
|
781
|
+
tasks = []
|
782
|
+
|
783
|
+
# Extract other data while configs load
|
784
|
+
user_input = data.pop('user_input').strip()
|
785
|
+
stream_wait_time = data.pop('stream_wait_time', 7)
|
786
|
+
stream_timeout = data.pop('stream_timeout', 120)
|
787
|
+
chat_history = data.pop('chat_history', None)
|
788
|
+
vector_name_param = data.pop('vector_name', vector_name)
|
789
|
+
data.pop('trace_id', None) # to ensure not in kwargs
|
790
|
+
|
791
|
+
# Task 3: Process chat history
|
792
|
+
chat_history_task = asyncio.create_task(extract_chat_history_async(chat_history))
|
793
|
+
tasks.append(chat_history_task)
|
794
|
+
|
795
|
+
# Await all tasks concurrently
|
796
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
797
|
+
|
798
|
+
paired_messages = results[0] if not isinstance(results[0], Exception) else []
|
799
|
+
|
800
|
+
# Only create span after we have trace
|
801
|
+
all_input = {
|
802
|
+
'user_input': user_input,
|
803
|
+
'vector_name': vector_name_param,
|
804
|
+
'chat_history': paired_messages,
|
805
|
+
'stream_wait_time': stream_wait_time,
|
806
|
+
'stream_timeout': stream_timeout,
|
807
|
+
'kwargs': data
|
808
|
+
}
|
809
|
+
|
810
|
+
return {
|
811
|
+
"all_input": all_input
|
812
|
+
}
|
702
813
|
|
703
814
|
def handle_file_upload(self, file, vector_name):
|
704
815
|
try:
|
@@ -1,8 +1,8 @@
|
|
1
1
|
sunholo/__init__.py,sha256=InRbX4V0-qdNHo9zYH3GEye7ASLR6LX8-SMvPV4Jsaw,1212
|
2
2
|
sunholo/custom_logging.py,sha256=JXZTnXp_DixP3jwYfKw4LYRDS9IuTq7ctCgfZbI2rxA,22023
|
3
3
|
sunholo/langchain_types.py,sha256=uZ4zvgej_f7pLqjtu4YP7qMC_eZD5ym_5x4pyvA1Ih4,1834
|
4
|
-
sunholo/agents/__init__.py,sha256=
|
5
|
-
sunholo/agents/chat_history.py,sha256=
|
4
|
+
sunholo/agents/__init__.py,sha256=AauG3l0y4r5Fzx1zJfZ634M4o-0o7B7J5T8k_gPvNqE,370
|
5
|
+
sunholo/agents/chat_history.py,sha256=4jGCHBP8dZfUjSJPxgKyh6nOqhnHRn1x9U3CnGb0I5E,7624
|
6
6
|
sunholo/agents/dispatch_to_qa.py,sha256=NHihwAoCJ5_Lk11e_jZnucVUGQyZHCB-YpkfMHBCpQk,8882
|
7
7
|
sunholo/agents/langserve.py,sha256=C46ph2mnygr6bdHijYWYyfQDI9ylAF0_9Kx2PfcCJpU,4414
|
8
8
|
sunholo/agents/pubsub.py,sha256=TscZN_6am6DfaQkC-Yl18ZIBOoLE-0nDSiil6GpQEh4,1344
|
@@ -12,10 +12,9 @@ sunholo/agents/swagger.py,sha256=2tzGmpveUMmTREykZvVnDj3j295wyOMu7mUFDnXdY3c,106
|
|
12
12
|
sunholo/agents/fastapi/__init__.py,sha256=S_pj4_bTUmDGoq_exaREHlOKThi0zTuGT0VZY0YfODQ,88
|
13
13
|
sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1Q,2521
|
14
14
|
sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
|
15
|
-
sunholo/agents/flask/__init__.py,sha256=
|
15
|
+
sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
|
16
16
|
sunholo/agents/flask/base.py,sha256=HLz3Z5efWaewTwSFEM6JH48NA9otoJBoVFJlARGk9L8,788
|
17
|
-
sunholo/agents/flask/
|
18
|
-
sunholo/agents/flask/vac_routes.py,sha256=9bytTeoOJQOYxsPGLIXLItDmnbB9zDXmYM0lBIwDe8w,28335
|
17
|
+
sunholo/agents/flask/vac_routes.py,sha256=Dk9QrPvXNRzAWxaTWsYgHVxmK-Rjrvgd6-sAuvqt9P8,33236
|
19
18
|
sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
|
20
19
|
sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
|
21
20
|
sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
|
@@ -169,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
169
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
170
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
171
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
172
|
-
sunholo-0.
|
173
|
-
sunholo-0.
|
174
|
-
sunholo-0.
|
175
|
-
sunholo-0.
|
176
|
-
sunholo-0.
|
177
|
-
sunholo-0.
|
171
|
+
sunholo-0.140.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.140.4.dist-info/METADATA,sha256=oKjtRKqFPtwaoV177G0nRWfv3P9xfGB3U4fonaffJrk,10067
|
173
|
+
sunholo-0.140.4.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
174
|
+
sunholo-0.140.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.140.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.140.4.dist-info/RECORD,,
|
@@ -1,604 +0,0 @@
|
|
1
|
-
# Copyright [2024] [Holosun ApS]
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
import json
|
17
|
-
import traceback
|
18
|
-
import uuid
|
19
|
-
|
20
|
-
from ...agents import extract_chat_history, handle_special_commands
|
21
|
-
from ...qna.parsers import parse_output
|
22
|
-
from ...streaming import start_streaming_chat
|
23
|
-
from ...archive import archive_qa
|
24
|
-
from ...custom_logging import log
|
25
|
-
from ...utils.config import load_config
|
26
|
-
from ...utils import ConfigManager
|
27
|
-
from ...utils.version import sunholo_version
|
28
|
-
import os
|
29
|
-
from ...gcs.add_file import add_file_to_gcs, handle_base64_image
|
30
|
-
from ..swagger import validate_api_key
|
31
|
-
from datetime import datetime, timedelta
|
32
|
-
|
33
|
-
try:
|
34
|
-
from flask import request, jsonify, Response
|
35
|
-
except ImportError:
|
36
|
-
pass
|
37
|
-
|
38
|
-
try:
|
39
|
-
from langfuse.decorators import langfuse_context, observe
|
40
|
-
except ImportError:
|
41
|
-
pass
|
42
|
-
|
43
|
-
# Cache dictionary to store validated API keys
|
44
|
-
api_key_cache = {}
|
45
|
-
cache_duration = timedelta(minutes=5) # Cache duration
|
46
|
-
|
47
|
-
def make_openai_response(user_message, vector_name, answer):
|
48
|
-
response_id = str(uuid.uuid4())
|
49
|
-
log.info(f"openai response: Q: {user_message} to VECTOR_NAME: {vector_name} - A: {answer}")
|
50
|
-
openai_response = {
|
51
|
-
"id": response_id,
|
52
|
-
"object": "chat.completion",
|
53
|
-
"created": str(int(datetime.now().timestamp())),
|
54
|
-
"model": vector_name,
|
55
|
-
"system_fingerprint": sunholo_version(),
|
56
|
-
"choices": [{
|
57
|
-
"index": 0,
|
58
|
-
"message": {
|
59
|
-
"role": "assistant",
|
60
|
-
"content": answer,
|
61
|
-
},
|
62
|
-
"logprobs": None,
|
63
|
-
"finish_reason": "stop"
|
64
|
-
}],
|
65
|
-
"usage": {
|
66
|
-
"prompt_tokens": 0,
|
67
|
-
"completion_tokens": 0,
|
68
|
-
"total_tokens": 0
|
69
|
-
}
|
70
|
-
}
|
71
|
-
|
72
|
-
log.info(f"OpenAI response: {openai_response}")
|
73
|
-
return jsonify(openai_response)
|
74
|
-
|
75
|
-
def register_qna_routes(app, stream_interpreter, vac_interpreter):
|
76
|
-
"""
|
77
|
-
Register Q&A routes for a Flask application.
|
78
|
-
|
79
|
-
This function sets up multiple routes for handling Q&A operations,
|
80
|
-
including streaming responses and processing static responses.
|
81
|
-
|
82
|
-
Args:
|
83
|
-
app (Flask): The Flask application instance.
|
84
|
-
stream_interpreter (function): Function to handle streaming Q&A responses.
|
85
|
-
vac_interpreter (function): Function to handle static Q&A responses.
|
86
|
-
|
87
|
-
Returns:
|
88
|
-
None
|
89
|
-
|
90
|
-
Example:
|
91
|
-
from flask import Flask
|
92
|
-
app = Flask(__name__)
|
93
|
-
|
94
|
-
def dummy_stream_interpreter(...):
|
95
|
-
...
|
96
|
-
|
97
|
-
def dummy_vac_interpreter(...):
|
98
|
-
...
|
99
|
-
|
100
|
-
register_qna_routes(app, dummy_stream_interpreter, dummy_vac_interpreter)
|
101
|
-
"""
|
102
|
-
@app.route("/")
|
103
|
-
def home():
|
104
|
-
return jsonify("OK")
|
105
|
-
|
106
|
-
@app.route("/health")
|
107
|
-
def health():
|
108
|
-
return jsonify({"status": "healthy"})
|
109
|
-
|
110
|
-
@app.route('/vac/streaming/<vector_name>', methods=['POST'])
|
111
|
-
def stream_qa(vector_name):
|
112
|
-
"""
|
113
|
-
Handle streaming Q&A responses.
|
114
|
-
|
115
|
-
This function sets up a route to handle streaming Q&A responses based on
|
116
|
-
the provided vector name.
|
117
|
-
|
118
|
-
Args:
|
119
|
-
vector_name (str): The name of the vector for the request.
|
120
|
-
|
121
|
-
Returns:
|
122
|
-
Response: A Flask response object streaming the Q&A response content.
|
123
|
-
|
124
|
-
Example:
|
125
|
-
response = stream_qa("example_vector")
|
126
|
-
"""
|
127
|
-
observed_stream_interpreter = observe()(stream_interpreter)
|
128
|
-
prep = prep_vac(request, vector_name)
|
129
|
-
log.debug(f"Processing prep: {prep}")
|
130
|
-
trace = prep["trace"]
|
131
|
-
span = prep["span"]
|
132
|
-
command_response = prep["command_response"]
|
133
|
-
vac_config = prep["vac_config"]
|
134
|
-
all_input = prep["all_input"]
|
135
|
-
|
136
|
-
if command_response:
|
137
|
-
return jsonify(command_response)
|
138
|
-
|
139
|
-
log.info(f'Streaming data with: {all_input}')
|
140
|
-
if span:
|
141
|
-
generation = span.generation(
|
142
|
-
name="start_streaming_chat",
|
143
|
-
metadata=vac_config,
|
144
|
-
input = all_input,
|
145
|
-
completion_start_time=datetime.now(),
|
146
|
-
model=vac_config.get("model") or vac_config.get("llm")
|
147
|
-
)
|
148
|
-
|
149
|
-
def generate_response_content():
|
150
|
-
|
151
|
-
for chunk in start_streaming_chat(question=all_input["user_input"],
|
152
|
-
vector_name=vector_name,
|
153
|
-
qna_func=observed_stream_interpreter,
|
154
|
-
chat_history=all_input["chat_history"],
|
155
|
-
wait_time=all_input["stream_wait_time"],
|
156
|
-
timeout=all_input["stream_timeout"],
|
157
|
-
#kwargs
|
158
|
-
**all_input["kwargs"]
|
159
|
-
):
|
160
|
-
if isinstance(chunk, dict) and 'answer' in chunk:
|
161
|
-
# When we encounter the dictionary, we yield it as a JSON string
|
162
|
-
# and stop the generator.
|
163
|
-
if trace:
|
164
|
-
chunk["trace"] = trace.id
|
165
|
-
chunk["trace_url"] = trace.get_trace_url()
|
166
|
-
archive_qa(chunk, vector_name)
|
167
|
-
if trace:
|
168
|
-
generation.end(output=json.dumps(chunk))
|
169
|
-
span.end(output=json.dumps(chunk))
|
170
|
-
trace.update(output=json.dumps(chunk))
|
171
|
-
|
172
|
-
return json.dumps(chunk)
|
173
|
-
|
174
|
-
else:
|
175
|
-
# Otherwise, we yield the plain text chunks as they come in.
|
176
|
-
yield chunk
|
177
|
-
|
178
|
-
# Here, the generator function will handle streaming the content to the client.
|
179
|
-
response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
|
180
|
-
response.headers['Transfer-Encoding'] = 'chunked'
|
181
|
-
|
182
|
-
log.debug(f"streaming response: {response}")
|
183
|
-
if trace:
|
184
|
-
generation.end(output=response)
|
185
|
-
span.end(output=response)
|
186
|
-
trace.update(output=response)
|
187
|
-
|
188
|
-
#if 'user_id' in all_input["kwargs"]:
|
189
|
-
# kwargs = all_input["kwargs"]
|
190
|
-
# config = ConfigManager(vector_name)
|
191
|
-
# add_user_history_rag(kwargs.pop('user_id'),
|
192
|
-
# config,
|
193
|
-
# question=all_input.pop("user_input"),
|
194
|
-
# answer=response.get('answer'),
|
195
|
-
# metadata=all_input)
|
196
|
-
|
197
|
-
return response
|
198
|
-
|
199
|
-
@app.route('/vac/<vector_name>', methods=['POST'])
|
200
|
-
def process_qna(vector_name):
|
201
|
-
"""
|
202
|
-
Handle static Q&A responses.
|
203
|
-
|
204
|
-
This function sets up a route to handle static Q&A responses based on
|
205
|
-
the provided vector name.
|
206
|
-
|
207
|
-
Args:
|
208
|
-
vector_name (str): The name of the vector for the request.
|
209
|
-
|
210
|
-
Returns:
|
211
|
-
Response: A Flask response object with the Q&A response content.
|
212
|
-
|
213
|
-
Example:
|
214
|
-
response = process_qna("example_vector")
|
215
|
-
"""
|
216
|
-
observed_vac_interpreter = observe()(vac_interpreter)
|
217
|
-
prep = prep_vac(request, vector_name)
|
218
|
-
log.debug(f"Processing prep: {prep}")
|
219
|
-
trace = prep["trace"]
|
220
|
-
span = prep["span"]
|
221
|
-
command_response = prep["command_response"]
|
222
|
-
vac_config = prep["vac_config"]
|
223
|
-
all_input = prep["all_input"]
|
224
|
-
|
225
|
-
if command_response:
|
226
|
-
return jsonify(command_response)
|
227
|
-
|
228
|
-
try:
|
229
|
-
if span:
|
230
|
-
generation = span.generation(
|
231
|
-
name="vac_interpreter",
|
232
|
-
metadata=vac_config,
|
233
|
-
input = all_input,
|
234
|
-
model=vac_config.get("model") or vac_config.get("llm")
|
235
|
-
)
|
236
|
-
bot_output = observed_vac_interpreter(
|
237
|
-
question=all_input["user_input"],
|
238
|
-
vector_name=vector_name,
|
239
|
-
chat_history=all_input["chat_history"],
|
240
|
-
**all_input["kwargs"]
|
241
|
-
)
|
242
|
-
if span:
|
243
|
-
generation.end(output=bot_output)
|
244
|
-
# {"answer": "The answer", "source_documents": [{"page_content": "The page content", "metadata": "The metadata"}]}
|
245
|
-
bot_output = parse_output(bot_output)
|
246
|
-
if trace:
|
247
|
-
bot_output["trace"] = trace.id
|
248
|
-
bot_output["trace_url"] = trace.get_trace_url()
|
249
|
-
archive_qa(bot_output, vector_name)
|
250
|
-
log.info(f'==LLM Q:{all_input["user_input"]} - A:{bot_output}')
|
251
|
-
|
252
|
-
|
253
|
-
except Exception as err:
|
254
|
-
bot_output = {'answer': f'QNA_ERROR: An error occurred while processing /vac/{vector_name}: {str(err)} traceback: {traceback.format_exc()}'}
|
255
|
-
|
256
|
-
if trace:
|
257
|
-
span.end(output=jsonify(bot_output))
|
258
|
-
trace.update(output=jsonify(bot_output))
|
259
|
-
|
260
|
-
# {'answer': 'output'}
|
261
|
-
return jsonify(bot_output)
|
262
|
-
|
263
|
-
@app.before_request
|
264
|
-
def check_authentication_header():
|
265
|
-
if request.path.startswith('/openai/'):
|
266
|
-
log.debug(f'Request headers: {request.headers}')
|
267
|
-
# the header forwarded
|
268
|
-
auth_header = request.headers.get('X-Forwarded-Authorization')
|
269
|
-
if auth_header:
|
270
|
-
|
271
|
-
if auth_header.startswith('Bearer '):
|
272
|
-
api_key = auth_header.split(' ')[1] # Assuming "Bearer <api_key>"
|
273
|
-
else:
|
274
|
-
return jsonify({'error': 'Invalid authorization header does not start with "Bearer " - got: {auth_header}'}), 401
|
275
|
-
|
276
|
-
endpoints_host = os.getenv('_ENDPOINTS_HOST')
|
277
|
-
if not endpoints_host:
|
278
|
-
return jsonify({'error': '_ENDPOINTS_HOST environment variable not found'}), 401
|
279
|
-
|
280
|
-
# Check cache first
|
281
|
-
current_time = datetime.now()
|
282
|
-
if api_key in api_key_cache:
|
283
|
-
cached_result, cache_time = api_key_cache[api_key]
|
284
|
-
if current_time - cache_time < cache_duration:
|
285
|
-
if not cached_result:
|
286
|
-
return jsonify({'error': 'Invalid cached API key'}), 401
|
287
|
-
else:
|
288
|
-
return # Valid API key, continue to the endpoint
|
289
|
-
else:
|
290
|
-
# Cache expired, remove from cache
|
291
|
-
del api_key_cache[api_key]
|
292
|
-
|
293
|
-
# Validate API key
|
294
|
-
is_valid = validate_api_key(api_key, endpoints_host)
|
295
|
-
# Update cache
|
296
|
-
api_key_cache[api_key] = (is_valid, current_time)
|
297
|
-
|
298
|
-
if not is_valid:
|
299
|
-
return jsonify({'error': 'Invalid API key'}), 401
|
300
|
-
else:
|
301
|
-
return jsonify({'error': 'Missing Authorization header'}), 401
|
302
|
-
|
303
|
-
@app.route('/openai/health', methods=['GET', 'POST'])
|
304
|
-
def openai_health_endpoint():
|
305
|
-
return jsonify({'message': 'Success'})
|
306
|
-
|
307
|
-
@app.route('/openai/v1/chat/completions', methods=['POST'])
|
308
|
-
@app.route('/openai/v1/chat/completions/<vector_name>', methods=['POST'])
|
309
|
-
def openai_compatible_endpoint(vector_name=None):
|
310
|
-
"""
|
311
|
-
Handle OpenAI-compatible chat completions.
|
312
|
-
|
313
|
-
This function sets up routes to handle OpenAI-compatible chat completion requests,
|
314
|
-
both with and without a specified vector name.
|
315
|
-
|
316
|
-
Args:
|
317
|
-
vector_name (str, optional): The name of the vector for the request. Defaults to None.
|
318
|
-
|
319
|
-
Returns:
|
320
|
-
Response: A Flask response object with the chat completion content.
|
321
|
-
|
322
|
-
Example:
|
323
|
-
response = openai_compatible_endpoint("example_vector")
|
324
|
-
"""
|
325
|
-
data = request.get_json()
|
326
|
-
log.info(f'openai_compatible_endpoint got data: {data} for vector: {vector_name}')
|
327
|
-
|
328
|
-
vector_name = vector_name or data.pop('model', None)
|
329
|
-
messages = data.pop('messages', None)
|
330
|
-
chat_history = data.pop('chat_history', None)
|
331
|
-
stream = data.pop('stream', False)
|
332
|
-
|
333
|
-
if not messages:
|
334
|
-
return jsonify({"error": "No messages provided"}), 400
|
335
|
-
|
336
|
-
user_message = None
|
337
|
-
image_uri = None
|
338
|
-
mime_type = None
|
339
|
-
|
340
|
-
for msg in reversed(messages):
|
341
|
-
if msg['role'] == 'user':
|
342
|
-
if isinstance(msg['content'], list):
|
343
|
-
for content_item in msg['content']:
|
344
|
-
if content_item['type'] == 'text':
|
345
|
-
user_message = content_item['text']
|
346
|
-
elif content_item['type'] == 'image_url':
|
347
|
-
base64_data = content_item['image_url']['url']
|
348
|
-
image_uri, mime_type = handle_base64_image(base64_data, vector_name)
|
349
|
-
else:
|
350
|
-
user_message = msg['content']
|
351
|
-
break
|
352
|
-
|
353
|
-
if not user_message:
|
354
|
-
return jsonify({"error": "No user message provided"}), 400
|
355
|
-
else:
|
356
|
-
log.info(f"User message: {user_message}")
|
357
|
-
|
358
|
-
paired_messages = extract_chat_history(chat_history)
|
359
|
-
command_response = handle_special_commands(user_message, vector_name, paired_messages)
|
360
|
-
|
361
|
-
if command_response is not None:
|
362
|
-
|
363
|
-
return make_openai_response(user_message, vector_name, command_response)
|
364
|
-
|
365
|
-
if image_uri:
|
366
|
-
data["image_uri"] = image_uri
|
367
|
-
data["mime"] = mime_type
|
368
|
-
|
369
|
-
all_input = {
|
370
|
-
"user_input": user_message,
|
371
|
-
"chat_history": chat_history,
|
372
|
-
"kwargs": data
|
373
|
-
}
|
374
|
-
|
375
|
-
observed_stream_interpreter = observe()(stream_interpreter)
|
376
|
-
|
377
|
-
response_id = str(uuid.uuid4())
|
378
|
-
|
379
|
-
def generate_response_content():
|
380
|
-
for chunk in start_streaming_chat(question=user_message,
|
381
|
-
vector_name=vector_name,
|
382
|
-
qna_func=observed_stream_interpreter,
|
383
|
-
chat_history=all_input["chat_history"],
|
384
|
-
wait_time=all_input.get("stream_wait_time", 1),
|
385
|
-
timeout=all_input.get("stream_timeout", 60),
|
386
|
-
**all_input["kwargs"]
|
387
|
-
):
|
388
|
-
if isinstance(chunk, dict) and 'answer' in chunk:
|
389
|
-
openai_chunk = {
|
390
|
-
"id": response_id,
|
391
|
-
"object": "chat.completion.chunk",
|
392
|
-
"created": str(int(datetime.now().timestamp())),
|
393
|
-
"model": vector_name,
|
394
|
-
"system_fingerprint": sunholo_version(),
|
395
|
-
"choices": [{
|
396
|
-
"index": 0,
|
397
|
-
"delta": {"content": chunk['answer']},
|
398
|
-
"logprobs": None,
|
399
|
-
"finish_reason": None
|
400
|
-
}]
|
401
|
-
}
|
402
|
-
yield json.dumps(openai_chunk) + "\n"
|
403
|
-
else:
|
404
|
-
log.info(f"Unknown chunk: {chunk}")
|
405
|
-
|
406
|
-
final_chunk = {
|
407
|
-
"id": response_id,
|
408
|
-
"object": "chat.completion.chunk",
|
409
|
-
"created": str(int(datetime.now().timestamp())),
|
410
|
-
"model": vector_name,
|
411
|
-
"system_fingerprint": sunholo_version(),
|
412
|
-
"choices": [{
|
413
|
-
"index": 0,
|
414
|
-
"delta": {},
|
415
|
-
"logprobs": None,
|
416
|
-
"finish_reason": "stop"
|
417
|
-
}]
|
418
|
-
}
|
419
|
-
yield json.dumps(final_chunk) + "\n"
|
420
|
-
|
421
|
-
if stream:
|
422
|
-
log.info("Streaming openai chunks")
|
423
|
-
return Response(generate_response_content(), content_type='text/plain; charset=utf-8')
|
424
|
-
|
425
|
-
try:
|
426
|
-
observed_vac_interpreter = observe()(vac_interpreter)
|
427
|
-
bot_output = observed_vac_interpreter(
|
428
|
-
question=user_message,
|
429
|
-
vector_name=vector_name,
|
430
|
-
chat_history=all_input["chat_history"],
|
431
|
-
**all_input["kwargs"]
|
432
|
-
)
|
433
|
-
bot_output = parse_output(bot_output)
|
434
|
-
|
435
|
-
log.info(f"Bot output: {bot_output}")
|
436
|
-
if bot_output:
|
437
|
-
return make_openai_response(user_message, vector_name, bot_output.get('answer', ''))
|
438
|
-
else:
|
439
|
-
return make_openai_response(user_message, vector_name, 'ERROR: could not find an answer')
|
440
|
-
|
441
|
-
except Exception as err:
|
442
|
-
log.error(f"OpenAI response error: {str(err)} traceback: {traceback.format_exc()}")
|
443
|
-
|
444
|
-
return make_openai_response(user_message, vector_name, f'ERROR: {str(err)}')
|
445
|
-
|
446
|
-
|
447
|
-
def create_langfuse_trace(request, vector_name):
|
448
|
-
"""
|
449
|
-
Create a Langfuse trace for tracking requests.
|
450
|
-
|
451
|
-
This function initializes a Langfuse trace object based on the request headers
|
452
|
-
and vector name.
|
453
|
-
|
454
|
-
Args:
|
455
|
-
request (Request): The Flask request object.
|
456
|
-
vector_name (str): The name of the vector for the request.
|
457
|
-
|
458
|
-
Returns:
|
459
|
-
Langfuse.Trace: The Langfuse trace object.
|
460
|
-
|
461
|
-
Example:
|
462
|
-
trace = create_langfuse_trace(request, "example_vector")
|
463
|
-
"""
|
464
|
-
try:
|
465
|
-
from langfuse import Langfuse
|
466
|
-
langfuse = Langfuse()
|
467
|
-
except ImportError as err:
|
468
|
-
print(f"No langfuse installed for agents.flask.register_qna_routes, install via `pip install sunholo[http]` - {str(err)}")
|
469
|
-
|
470
|
-
return None
|
471
|
-
|
472
|
-
user_id = request.headers.get("X-User-ID")
|
473
|
-
session_id = request.headers.get("X-Session-ID")
|
474
|
-
message_source = request.headers.get("X-Message-Source")
|
475
|
-
|
476
|
-
package_version = sunholo_version()
|
477
|
-
tags = [package_version]
|
478
|
-
if message_source:
|
479
|
-
tags.append(message_source)
|
480
|
-
|
481
|
-
return langfuse.trace(
|
482
|
-
name = f"/vac/{vector_name}",
|
483
|
-
user_id = user_id,
|
484
|
-
session_id = session_id,
|
485
|
-
tags = tags,
|
486
|
-
release = f"sunholo-v{package_version}"
|
487
|
-
)
|
488
|
-
|
489
|
-
def prep_vac(request, vector_name):
|
490
|
-
"""
|
491
|
-
Prepare the input data for a VAC request.
|
492
|
-
|
493
|
-
This function processes the incoming request data, extracts relevant
|
494
|
-
information, and prepares the data for VAC processing.
|
495
|
-
|
496
|
-
Args:
|
497
|
-
request (Request): The Flask request object.
|
498
|
-
vector_name (str): The name of the vector for the request.
|
499
|
-
|
500
|
-
Returns:
|
501
|
-
dict: A dictionary containing prepared input data and metadata.
|
502
|
-
|
503
|
-
Example:
|
504
|
-
prep_data = prep_vac(request, "example_vector")
|
505
|
-
"""
|
506
|
-
#trace = create_langfuse_trace(request, vector_name)
|
507
|
-
trace = None
|
508
|
-
span = None
|
509
|
-
|
510
|
-
if request.content_type.startswith('application/json'):
|
511
|
-
data = request.get_json()
|
512
|
-
elif request.content_type.startswith('multipart/form-data'):
|
513
|
-
data = request.form.to_dict()
|
514
|
-
if 'file' in request.files:
|
515
|
-
file = request.files['file']
|
516
|
-
if file.filename != '':
|
517
|
-
log.info(f"Found file: {file.filename} to upload to GCS")
|
518
|
-
try:
|
519
|
-
image_uri, mime_type = handle_file_upload(file, vector_name)
|
520
|
-
data["image_uri"] = image_uri
|
521
|
-
data["image_url"] = image_uri
|
522
|
-
data["mime"] = mime_type
|
523
|
-
except Exception as e:
|
524
|
-
log.error(f"Error uploading file: {str(e)}")
|
525
|
-
else:
|
526
|
-
log.info("No file selected to upload to GCS")
|
527
|
-
else:
|
528
|
-
log.warning(f"Error uploading file: Unsupported content type {request.content_type}")
|
529
|
-
|
530
|
-
log.info(f"vac/{vector_name} got data: {data}")
|
531
|
-
|
532
|
-
config, _ = load_config("config/llm_config.yaml")
|
533
|
-
vac_configs = config.get("vac")
|
534
|
-
if vac_configs:
|
535
|
-
vac_config = vac_configs.get(vector_name)
|
536
|
-
if not vac_config:
|
537
|
-
log.warning("Not a local configured VAC, may be a remote config not synced yet")
|
538
|
-
|
539
|
-
if trace and vac_config:
|
540
|
-
trace.update(input=data, metadata=vac_config)
|
541
|
-
|
542
|
-
user_input = data.pop('user_input').strip()
|
543
|
-
stream_wait_time = data.pop('stream_wait_time', 7)
|
544
|
-
stream_timeout = data.pop('stream_timeout', 120)
|
545
|
-
chat_history = data.pop('chat_history', None)
|
546
|
-
vector_name = data.pop('vector_name', vector_name)
|
547
|
-
|
548
|
-
log.info("Turning chat_history into paired tuples")
|
549
|
-
paired_messages = extract_chat_history(chat_history)
|
550
|
-
|
551
|
-
all_input = {'user_input': user_input,
|
552
|
-
'vector_name': vector_name,
|
553
|
-
'chat_history': paired_messages,
|
554
|
-
'stream_wait_time': stream_wait_time,
|
555
|
-
'stream_timeout': stream_timeout,
|
556
|
-
'kwargs': data}
|
557
|
-
|
558
|
-
if trace:
|
559
|
-
span = trace.span(
|
560
|
-
name="VAC",
|
561
|
-
metadata=vac_config,
|
562
|
-
input = all_input
|
563
|
-
)
|
564
|
-
command_response = handle_special_commands(user_input, vector_name, paired_messages)
|
565
|
-
if command_response is not None:
|
566
|
-
if trace:
|
567
|
-
trace.update(output=jsonify(command_response))
|
568
|
-
|
569
|
-
return {
|
570
|
-
"trace": trace,
|
571
|
-
"span": span,
|
572
|
-
"command_response": command_response,
|
573
|
-
"all_input": all_input,
|
574
|
-
"vac_config": vac_config
|
575
|
-
}
|
576
|
-
|
577
|
-
|
578
|
-
def handle_file_upload(file, vector_name):
|
579
|
-
"""
|
580
|
-
Handle file upload and store the file in Google Cloud Storage.
|
581
|
-
|
582
|
-
This function saves the uploaded file locally, uploads it to Google Cloud Storage,
|
583
|
-
and then removes the local copy.
|
584
|
-
|
585
|
-
Args:
|
586
|
-
file (FileStorage): The uploaded file.
|
587
|
-
vector_name (str): The name of the vector for the request.
|
588
|
-
|
589
|
-
Returns:
|
590
|
-
tuple: A tuple containing the URI of the uploaded file and its MIME type.
|
591
|
-
|
592
|
-
Raises:
|
593
|
-
Exception: If the file upload fails.
|
594
|
-
|
595
|
-
Example:
|
596
|
-
uri, mime_type = handle_file_upload(file, "example_vector")
|
597
|
-
"""
|
598
|
-
try:
|
599
|
-
file.save(file.filename)
|
600
|
-
image_uri = add_file_to_gcs(file.filename, vector_name)
|
601
|
-
os.remove(file.filename) # Clean up the saved file
|
602
|
-
return image_uri, file.mimetype
|
603
|
-
except Exception as e:
|
604
|
-
raise Exception(f'File upload failed: {str(e)}')
|
File without changes
|
File without changes
|
File without changes
|