sunholo 0.139.1__py3-none-any.whl → 0.140.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,6 @@ from .chat_history import extract_chat_history
2
2
  from .dispatch_to_qa import send_to_qa, send_to_qa_async
3
3
  from .pubsub import process_pubsub
4
4
  from .special_commands import handle_special_commands, app_to_store, handle_files
5
- from .flask import register_qna_routes, create_app, VACRoutes
5
+ from .flask import create_app, VACRoutes
6
6
  from .fastapi import register_qna_fastapi_routes, create_fastapi_app
7
7
  from .swagger import config_to_swagger
@@ -1,6 +1,69 @@
1
1
  import json
2
2
  from ..custom_logging import log
3
3
 
4
+
5
+ async def extract_chat_history_async(chat_history=None):
6
+ """
7
+ Extracts paired chat history between human and AI messages.
8
+
9
+ For this lightweight processing, we use a simpler approach that minimizes overhead.
10
+
11
+ Args:
12
+ chat_history (list): List of chat messages.
13
+
14
+ Returns:
15
+ list: List of tuples with paired human and AI messages.
16
+ """
17
+ if not chat_history:
18
+ log.info("No chat history found")
19
+ return []
20
+
21
+ log.info(f"Extracting chat history: {chat_history}")
22
+ paired_messages = []
23
+
24
+ # Handle special case of initial bot message
25
+ if chat_history and is_bot(chat_history[0]):
26
+ first_message = chat_history[0]
27
+ log.info(f"Extracting first_message: {first_message}")
28
+ blank_human_message = {"name": "Human", "content": "", "embeds": []}
29
+
30
+ # Since create_message_element is so lightweight, we don't need async here
31
+ blank_element = create_message_element(blank_human_message)
32
+ bot_element = create_message_element(first_message)
33
+
34
+ paired_messages.append((blank_element, bot_element))
35
+ chat_history = chat_history[1:]
36
+
37
+ # Pre-process all messages in one batch (more efficient than one-by-one)
38
+ message_types = []
39
+ message_contents = []
40
+
41
+ for message in chat_history:
42
+ is_human_msg = is_human(message)
43
+ is_bot_msg = is_bot(message)
44
+
45
+ # Extract content for all messages at once
46
+ content = create_message_element(message)
47
+
48
+ message_types.append((is_human_msg, is_bot_msg))
49
+ message_contents.append(content)
50
+
51
+ # Pair messages efficiently
52
+ last_human_message = ""
53
+ for i, ((is_human_msg, is_bot_msg), content) in enumerate(zip(message_types, message_contents)):
54
+ if is_human_msg:
55
+ last_human_message = content
56
+ log.info(f"Extracted human message: {last_human_message}")
57
+ elif is_bot_msg:
58
+ ai_message = content
59
+ log.info(f"Extracted AI message: {ai_message}")
60
+ paired_messages.append((last_human_message, ai_message))
61
+ last_human_message = ""
62
+
63
+ log.info(f"Paired messages: {paired_messages}")
64
+ return paired_messages
65
+
66
+
4
67
  def extract_chat_history(chat_history=None):
5
68
  """
6
69
  Extracts paired chat history between human and AI messages.
@@ -1,3 +1,2 @@
1
- from .qna_routes import register_qna_routes
2
1
  from .base import create_app
3
2
  from .vac_routes import VACRoutes
@@ -8,6 +8,7 @@ import inspect
8
8
  import asyncio
9
9
 
10
10
  from ...agents import extract_chat_history, handle_special_commands
11
+ from ..chat_history import extract_chat_history_async
11
12
  from ...qna.parsers import parse_output
12
13
  from ...streaming import start_streaming_chat, start_streaming_chat_async
13
14
  from ...archive import archive_qa
@@ -57,11 +58,12 @@ if __name__ == "__main__":
57
58
  ```
58
59
 
59
60
  """
60
- def __init__(self, app, stream_interpreter, vac_interpreter=None, additional_routes=None):
61
+ def __init__(self, app, stream_interpreter: callable, vac_interpreter:callable=None, additional_routes:dict=None, async_stream:bool=False):
61
62
  self.app = app
62
63
  self.stream_interpreter = stream_interpreter
63
64
  self.vac_interpreter = vac_interpreter or partial(self.vac_interpreter_default)
64
65
  self.additional_routes = additional_routes if additional_routes is not None else []
66
+ self.async_stream = async_stream
65
67
  self.register_routes()
66
68
 
67
69
 
@@ -94,12 +96,16 @@ if __name__ == "__main__":
94
96
  # Basic routes
95
97
  self.app.route("/", methods=['GET'])(self.home)
96
98
  self.app.route("/health", methods=['GET'])(self.health)
97
-
98
- # Streaming VAC
99
- self.app.route('/vac/streaming/<vector_name>',
100
- methods=['POST'],
101
- provide_automatic_options=False)(self.handle_stream_vac)
102
-
99
+
100
+ if self.async_stream: # Use async treatment
101
+ log.info("async_stream enabled")
102
+ self.app.route('/vac/streaming/<vector_name>',
103
+ methods=['POST'],
104
+ provide_automatic_options=False)(self.handle_stream_vac_async)
105
+ else:
106
+ self.app.route('/vac/streaming/<vector_name>',
107
+ methods=['POST'],
108
+ provide_automatic_options=False)(self.handle_stream_vac)
103
109
  # Static VAC
104
110
  self.app.route('/vac/<vector_name>',
105
111
  methods=['POST'],
@@ -332,6 +338,51 @@ if __name__ == "__main__":
332
338
 
333
339
  return response
334
340
 
341
+ async def handle_stream_vac_async(self, vector_name):
342
+ observed_stream_interpreter = self.stream_interpreter
343
+ is_async = inspect.iscoroutinefunction(self.stream_interpreter)
344
+
345
+ if not is_async:
346
+ raise ValueError(f"Stream interpreter must be async: {observed_stream_interpreter}")
347
+
348
+ # Use the async version of prep_vac
349
+ prep = await self.prep_vac_async(request, vector_name)
350
+ log.info(f"Processing async prep: {prep}")
351
+ all_input = prep["all_input"]
352
+
353
+ log.info(f'Streaming async data with: {all_input}')
354
+
355
+ async def generate_response_content():
356
+ try:
357
+ # Direct async handling without the queue/thread approach
358
+ async_gen = start_streaming_chat_async(
359
+ question=all_input["user_input"],
360
+ vector_name=vector_name,
361
+ qna_func_async=observed_stream_interpreter,
362
+ chat_history=all_input["chat_history"],
363
+ wait_time=all_input["stream_wait_time"],
364
+ timeout=all_input["stream_timeout"],
365
+ **all_input["kwargs"]
366
+ )
367
+
368
+ log.info(f"{async_gen=}")
369
+ async for chunk in async_gen:
370
+ if isinstance(chunk, dict) and 'answer' in chunk:
371
+ await archive_qa(chunk, vector_name)
372
+ yield json.dumps(chunk)
373
+ else:
374
+ yield chunk
375
+
376
+ except Exception as e:
377
+ yield f"Streaming async Error: {str(e)} {traceback.format_exc()}"
378
+
379
+ response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
380
+ response.headers['Transfer-Encoding'] = 'chunked'
381
+
382
+ log.debug(f"streaming async response: {response}")
383
+
384
+ return response
385
+
335
386
  @staticmethod
336
387
  async def _async_generator_to_stream(async_gen_func):
337
388
  """Helper function to stream the async generator's values to the client."""
@@ -699,6 +750,66 @@ if __name__ == "__main__":
699
750
  "vac_config": vac_config
700
751
  }
701
752
 
753
+ async def prep_vac_async(self, request, vector_name):
754
+ """Async version of prep_vac."""
755
+ # Parse request data
756
+ if request.content_type.startswith('application/json'):
757
+ data = request.get_json()
758
+ elif request.content_type.startswith('multipart/form-data'):
759
+ data = request.form.to_dict()
760
+ if 'file' in request.files:
761
+ file = request.files['file']
762
+ if file.filename != '':
763
+ log.info(f"Found file: {file.filename} to upload to GCS")
764
+ try:
765
+ # Make file upload async if possible
766
+ image_uri, mime_type = await self.handle_file_upload_async(file, vector_name)
767
+ data["image_uri"] = image_uri
768
+ data["mime"] = mime_type
769
+ except Exception as e:
770
+ log.error(traceback.format_exc())
771
+ return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
772
+ else:
773
+ log.error("No file selected")
774
+ return jsonify({"error": "No file selected"}), 400
775
+ else:
776
+ return jsonify({"error": "Unsupported content type"}), 400
777
+
778
+ log.info(f"vac/{vector_name} got data: {data}")
779
+
780
+ # Run these operations concurrently
781
+ tasks = []
782
+
783
+ # Extract other data while configs load
784
+ user_input = data.pop('user_input').strip()
785
+ stream_wait_time = data.pop('stream_wait_time', 7)
786
+ stream_timeout = data.pop('stream_timeout', 120)
787
+ chat_history = data.pop('chat_history', None)
788
+ vector_name_param = data.pop('vector_name', vector_name)
789
+ data.pop('trace_id', None) # to ensure not in kwargs
790
+
791
+ # Task 3: Process chat history
792
+ chat_history_task = asyncio.create_task(extract_chat_history_async(chat_history))
793
+ tasks.append(chat_history_task)
794
+
795
+ # Await all tasks concurrently
796
+ results = await asyncio.gather(*tasks, return_exceptions=True)
797
+
798
+ paired_messages = results[0] if not isinstance(results[0], Exception) else []
799
+
800
+ # Only create span after we have trace
801
+ all_input = {
802
+ 'user_input': user_input,
803
+ 'vector_name': vector_name_param,
804
+ 'chat_history': paired_messages,
805
+ 'stream_wait_time': stream_wait_time,
806
+ 'stream_timeout': stream_timeout,
807
+ 'kwargs': data
808
+ }
809
+
810
+ return {
811
+ "all_input": all_input
812
+ }
702
813
 
703
814
  def handle_file_upload(self, file, vector_name):
704
815
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.139.1
3
+ Version: 0.140.4
4
4
  Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -1,8 +1,8 @@
1
1
  sunholo/__init__.py,sha256=InRbX4V0-qdNHo9zYH3GEye7ASLR6LX8-SMvPV4Jsaw,1212
2
2
  sunholo/custom_logging.py,sha256=JXZTnXp_DixP3jwYfKw4LYRDS9IuTq7ctCgfZbI2rxA,22023
3
3
  sunholo/langchain_types.py,sha256=uZ4zvgej_f7pLqjtu4YP7qMC_eZD5ym_5x4pyvA1Ih4,1834
4
- sunholo/agents/__init__.py,sha256=X2I3pPkGeKWjc3d0QgSpkTyqD8J8JtrEWqwrumf1MMc,391
5
- sunholo/agents/chat_history.py,sha256=Gph_CdlP2otYnNdR1q1Umyyyvcad2F6K3LxU5yBQ9l0,5387
4
+ sunholo/agents/__init__.py,sha256=AauG3l0y4r5Fzx1zJfZ634M4o-0o7B7J5T8k_gPvNqE,370
5
+ sunholo/agents/chat_history.py,sha256=4jGCHBP8dZfUjSJPxgKyh6nOqhnHRn1x9U3CnGb0I5E,7624
6
6
  sunholo/agents/dispatch_to_qa.py,sha256=NHihwAoCJ5_Lk11e_jZnucVUGQyZHCB-YpkfMHBCpQk,8882
7
7
  sunholo/agents/langserve.py,sha256=C46ph2mnygr6bdHijYWYyfQDI9ylAF0_9Kx2PfcCJpU,4414
8
8
  sunholo/agents/pubsub.py,sha256=TscZN_6am6DfaQkC-Yl18ZIBOoLE-0nDSiil6GpQEh4,1344
@@ -12,10 +12,9 @@ sunholo/agents/swagger.py,sha256=2tzGmpveUMmTREykZvVnDj3j295wyOMu7mUFDnXdY3c,106
12
12
  sunholo/agents/fastapi/__init__.py,sha256=S_pj4_bTUmDGoq_exaREHlOKThi0zTuGT0VZY0YfODQ,88
13
13
  sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1Q,2521
14
14
  sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
15
- sunholo/agents/flask/__init__.py,sha256=poJDKMr2qj8qMb99JqCvCPSiEt1tj2tLQ3hKW3f2aVw,107
15
+ sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
16
16
  sunholo/agents/flask/base.py,sha256=HLz3Z5efWaewTwSFEM6JH48NA9otoJBoVFJlARGk9L8,788
17
- sunholo/agents/flask/qna_routes.py,sha256=uwUD1yrzOPH27m2AXpiQrPk_2VfJOQOM6dAynOWQtoQ,22532
18
- sunholo/agents/flask/vac_routes.py,sha256=9bytTeoOJQOYxsPGLIXLItDmnbB9zDXmYM0lBIwDe8w,28335
17
+ sunholo/agents/flask/vac_routes.py,sha256=Dk9QrPvXNRzAWxaTWsYgHVxmK-Rjrvgd6-sAuvqt9P8,33236
19
18
  sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
20
19
  sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
21
20
  sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
@@ -169,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
169
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
170
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
171
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
172
- sunholo-0.139.1.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
173
- sunholo-0.139.1.dist-info/METADATA,sha256=vOG7X6ZpBgF3og9_BNDil-Loy2tAW38orcqYo3ObTTk,10067
174
- sunholo-0.139.1.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
175
- sunholo-0.139.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
176
- sunholo-0.139.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
177
- sunholo-0.139.1.dist-info/RECORD,,
171
+ sunholo-0.140.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.140.4.dist-info/METADATA,sha256=oKjtRKqFPtwaoV177G0nRWfv3P9xfGB3U4fonaffJrk,10067
173
+ sunholo-0.140.4.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
174
+ sunholo-0.140.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.140.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.140.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,604 +0,0 @@
1
- # Copyright [2024] [Holosun ApS]
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- import json
17
- import traceback
18
- import uuid
19
-
20
- from ...agents import extract_chat_history, handle_special_commands
21
- from ...qna.parsers import parse_output
22
- from ...streaming import start_streaming_chat
23
- from ...archive import archive_qa
24
- from ...custom_logging import log
25
- from ...utils.config import load_config
26
- from ...utils import ConfigManager
27
- from ...utils.version import sunholo_version
28
- import os
29
- from ...gcs.add_file import add_file_to_gcs, handle_base64_image
30
- from ..swagger import validate_api_key
31
- from datetime import datetime, timedelta
32
-
33
- try:
34
- from flask import request, jsonify, Response
35
- except ImportError:
36
- pass
37
-
38
- try:
39
- from langfuse.decorators import langfuse_context, observe
40
- except ImportError:
41
- pass
42
-
43
- # Cache dictionary to store validated API keys
44
- api_key_cache = {}
45
- cache_duration = timedelta(minutes=5) # Cache duration
46
-
47
- def make_openai_response(user_message, vector_name, answer):
48
- response_id = str(uuid.uuid4())
49
- log.info(f"openai response: Q: {user_message} to VECTOR_NAME: {vector_name} - A: {answer}")
50
- openai_response = {
51
- "id": response_id,
52
- "object": "chat.completion",
53
- "created": str(int(datetime.now().timestamp())),
54
- "model": vector_name,
55
- "system_fingerprint": sunholo_version(),
56
- "choices": [{
57
- "index": 0,
58
- "message": {
59
- "role": "assistant",
60
- "content": answer,
61
- },
62
- "logprobs": None,
63
- "finish_reason": "stop"
64
- }],
65
- "usage": {
66
- "prompt_tokens": 0,
67
- "completion_tokens": 0,
68
- "total_tokens": 0
69
- }
70
- }
71
-
72
- log.info(f"OpenAI response: {openai_response}")
73
- return jsonify(openai_response)
74
-
75
- def register_qna_routes(app, stream_interpreter, vac_interpreter):
76
- """
77
- Register Q&A routes for a Flask application.
78
-
79
- This function sets up multiple routes for handling Q&A operations,
80
- including streaming responses and processing static responses.
81
-
82
- Args:
83
- app (Flask): The Flask application instance.
84
- stream_interpreter (function): Function to handle streaming Q&A responses.
85
- vac_interpreter (function): Function to handle static Q&A responses.
86
-
87
- Returns:
88
- None
89
-
90
- Example:
91
- from flask import Flask
92
- app = Flask(__name__)
93
-
94
- def dummy_stream_interpreter(...):
95
- ...
96
-
97
- def dummy_vac_interpreter(...):
98
- ...
99
-
100
- register_qna_routes(app, dummy_stream_interpreter, dummy_vac_interpreter)
101
- """
102
- @app.route("/")
103
- def home():
104
- return jsonify("OK")
105
-
106
- @app.route("/health")
107
- def health():
108
- return jsonify({"status": "healthy"})
109
-
110
- @app.route('/vac/streaming/<vector_name>', methods=['POST'])
111
- def stream_qa(vector_name):
112
- """
113
- Handle streaming Q&A responses.
114
-
115
- This function sets up a route to handle streaming Q&A responses based on
116
- the provided vector name.
117
-
118
- Args:
119
- vector_name (str): The name of the vector for the request.
120
-
121
- Returns:
122
- Response: A Flask response object streaming the Q&A response content.
123
-
124
- Example:
125
- response = stream_qa("example_vector")
126
- """
127
- observed_stream_interpreter = observe()(stream_interpreter)
128
- prep = prep_vac(request, vector_name)
129
- log.debug(f"Processing prep: {prep}")
130
- trace = prep["trace"]
131
- span = prep["span"]
132
- command_response = prep["command_response"]
133
- vac_config = prep["vac_config"]
134
- all_input = prep["all_input"]
135
-
136
- if command_response:
137
- return jsonify(command_response)
138
-
139
- log.info(f'Streaming data with: {all_input}')
140
- if span:
141
- generation = span.generation(
142
- name="start_streaming_chat",
143
- metadata=vac_config,
144
- input = all_input,
145
- completion_start_time=datetime.now(),
146
- model=vac_config.get("model") or vac_config.get("llm")
147
- )
148
-
149
- def generate_response_content():
150
-
151
- for chunk in start_streaming_chat(question=all_input["user_input"],
152
- vector_name=vector_name,
153
- qna_func=observed_stream_interpreter,
154
- chat_history=all_input["chat_history"],
155
- wait_time=all_input["stream_wait_time"],
156
- timeout=all_input["stream_timeout"],
157
- #kwargs
158
- **all_input["kwargs"]
159
- ):
160
- if isinstance(chunk, dict) and 'answer' in chunk:
161
- # When we encounter the dictionary, we yield it as a JSON string
162
- # and stop the generator.
163
- if trace:
164
- chunk["trace"] = trace.id
165
- chunk["trace_url"] = trace.get_trace_url()
166
- archive_qa(chunk, vector_name)
167
- if trace:
168
- generation.end(output=json.dumps(chunk))
169
- span.end(output=json.dumps(chunk))
170
- trace.update(output=json.dumps(chunk))
171
-
172
- return json.dumps(chunk)
173
-
174
- else:
175
- # Otherwise, we yield the plain text chunks as they come in.
176
- yield chunk
177
-
178
- # Here, the generator function will handle streaming the content to the client.
179
- response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
180
- response.headers['Transfer-Encoding'] = 'chunked'
181
-
182
- log.debug(f"streaming response: {response}")
183
- if trace:
184
- generation.end(output=response)
185
- span.end(output=response)
186
- trace.update(output=response)
187
-
188
- #if 'user_id' in all_input["kwargs"]:
189
- # kwargs = all_input["kwargs"]
190
- # config = ConfigManager(vector_name)
191
- # add_user_history_rag(kwargs.pop('user_id'),
192
- # config,
193
- # question=all_input.pop("user_input"),
194
- # answer=response.get('answer'),
195
- # metadata=all_input)
196
-
197
- return response
198
-
199
- @app.route('/vac/<vector_name>', methods=['POST'])
200
- def process_qna(vector_name):
201
- """
202
- Handle static Q&A responses.
203
-
204
- This function sets up a route to handle static Q&A responses based on
205
- the provided vector name.
206
-
207
- Args:
208
- vector_name (str): The name of the vector for the request.
209
-
210
- Returns:
211
- Response: A Flask response object with the Q&A response content.
212
-
213
- Example:
214
- response = process_qna("example_vector")
215
- """
216
- observed_vac_interpreter = observe()(vac_interpreter)
217
- prep = prep_vac(request, vector_name)
218
- log.debug(f"Processing prep: {prep}")
219
- trace = prep["trace"]
220
- span = prep["span"]
221
- command_response = prep["command_response"]
222
- vac_config = prep["vac_config"]
223
- all_input = prep["all_input"]
224
-
225
- if command_response:
226
- return jsonify(command_response)
227
-
228
- try:
229
- if span:
230
- generation = span.generation(
231
- name="vac_interpreter",
232
- metadata=vac_config,
233
- input = all_input,
234
- model=vac_config.get("model") or vac_config.get("llm")
235
- )
236
- bot_output = observed_vac_interpreter(
237
- question=all_input["user_input"],
238
- vector_name=vector_name,
239
- chat_history=all_input["chat_history"],
240
- **all_input["kwargs"]
241
- )
242
- if span:
243
- generation.end(output=bot_output)
244
- # {"answer": "The answer", "source_documents": [{"page_content": "The page content", "metadata": "The metadata"}]}
245
- bot_output = parse_output(bot_output)
246
- if trace:
247
- bot_output["trace"] = trace.id
248
- bot_output["trace_url"] = trace.get_trace_url()
249
- archive_qa(bot_output, vector_name)
250
- log.info(f'==LLM Q:{all_input["user_input"]} - A:{bot_output}')
251
-
252
-
253
- except Exception as err:
254
- bot_output = {'answer': f'QNA_ERROR: An error occurred while processing /vac/{vector_name}: {str(err)} traceback: {traceback.format_exc()}'}
255
-
256
- if trace:
257
- span.end(output=jsonify(bot_output))
258
- trace.update(output=jsonify(bot_output))
259
-
260
- # {'answer': 'output'}
261
- return jsonify(bot_output)
262
-
263
- @app.before_request
264
- def check_authentication_header():
265
- if request.path.startswith('/openai/'):
266
- log.debug(f'Request headers: {request.headers}')
267
- # the header forwarded
268
- auth_header = request.headers.get('X-Forwarded-Authorization')
269
- if auth_header:
270
-
271
- if auth_header.startswith('Bearer '):
272
- api_key = auth_header.split(' ')[1] # Assuming "Bearer <api_key>"
273
- else:
274
- return jsonify({'error': 'Invalid authorization header does not start with "Bearer " - got: {auth_header}'}), 401
275
-
276
- endpoints_host = os.getenv('_ENDPOINTS_HOST')
277
- if not endpoints_host:
278
- return jsonify({'error': '_ENDPOINTS_HOST environment variable not found'}), 401
279
-
280
- # Check cache first
281
- current_time = datetime.now()
282
- if api_key in api_key_cache:
283
- cached_result, cache_time = api_key_cache[api_key]
284
- if current_time - cache_time < cache_duration:
285
- if not cached_result:
286
- return jsonify({'error': 'Invalid cached API key'}), 401
287
- else:
288
- return # Valid API key, continue to the endpoint
289
- else:
290
- # Cache expired, remove from cache
291
- del api_key_cache[api_key]
292
-
293
- # Validate API key
294
- is_valid = validate_api_key(api_key, endpoints_host)
295
- # Update cache
296
- api_key_cache[api_key] = (is_valid, current_time)
297
-
298
- if not is_valid:
299
- return jsonify({'error': 'Invalid API key'}), 401
300
- else:
301
- return jsonify({'error': 'Missing Authorization header'}), 401
302
-
303
- @app.route('/openai/health', methods=['GET', 'POST'])
304
- def openai_health_endpoint():
305
- return jsonify({'message': 'Success'})
306
-
307
- @app.route('/openai/v1/chat/completions', methods=['POST'])
308
- @app.route('/openai/v1/chat/completions/<vector_name>', methods=['POST'])
309
- def openai_compatible_endpoint(vector_name=None):
310
- """
311
- Handle OpenAI-compatible chat completions.
312
-
313
- This function sets up routes to handle OpenAI-compatible chat completion requests,
314
- both with and without a specified vector name.
315
-
316
- Args:
317
- vector_name (str, optional): The name of the vector for the request. Defaults to None.
318
-
319
- Returns:
320
- Response: A Flask response object with the chat completion content.
321
-
322
- Example:
323
- response = openai_compatible_endpoint("example_vector")
324
- """
325
- data = request.get_json()
326
- log.info(f'openai_compatible_endpoint got data: {data} for vector: {vector_name}')
327
-
328
- vector_name = vector_name or data.pop('model', None)
329
- messages = data.pop('messages', None)
330
- chat_history = data.pop('chat_history', None)
331
- stream = data.pop('stream', False)
332
-
333
- if not messages:
334
- return jsonify({"error": "No messages provided"}), 400
335
-
336
- user_message = None
337
- image_uri = None
338
- mime_type = None
339
-
340
- for msg in reversed(messages):
341
- if msg['role'] == 'user':
342
- if isinstance(msg['content'], list):
343
- for content_item in msg['content']:
344
- if content_item['type'] == 'text':
345
- user_message = content_item['text']
346
- elif content_item['type'] == 'image_url':
347
- base64_data = content_item['image_url']['url']
348
- image_uri, mime_type = handle_base64_image(base64_data, vector_name)
349
- else:
350
- user_message = msg['content']
351
- break
352
-
353
- if not user_message:
354
- return jsonify({"error": "No user message provided"}), 400
355
- else:
356
- log.info(f"User message: {user_message}")
357
-
358
- paired_messages = extract_chat_history(chat_history)
359
- command_response = handle_special_commands(user_message, vector_name, paired_messages)
360
-
361
- if command_response is not None:
362
-
363
- return make_openai_response(user_message, vector_name, command_response)
364
-
365
- if image_uri:
366
- data["image_uri"] = image_uri
367
- data["mime"] = mime_type
368
-
369
- all_input = {
370
- "user_input": user_message,
371
- "chat_history": chat_history,
372
- "kwargs": data
373
- }
374
-
375
- observed_stream_interpreter = observe()(stream_interpreter)
376
-
377
- response_id = str(uuid.uuid4())
378
-
379
- def generate_response_content():
380
- for chunk in start_streaming_chat(question=user_message,
381
- vector_name=vector_name,
382
- qna_func=observed_stream_interpreter,
383
- chat_history=all_input["chat_history"],
384
- wait_time=all_input.get("stream_wait_time", 1),
385
- timeout=all_input.get("stream_timeout", 60),
386
- **all_input["kwargs"]
387
- ):
388
- if isinstance(chunk, dict) and 'answer' in chunk:
389
- openai_chunk = {
390
- "id": response_id,
391
- "object": "chat.completion.chunk",
392
- "created": str(int(datetime.now().timestamp())),
393
- "model": vector_name,
394
- "system_fingerprint": sunholo_version(),
395
- "choices": [{
396
- "index": 0,
397
- "delta": {"content": chunk['answer']},
398
- "logprobs": None,
399
- "finish_reason": None
400
- }]
401
- }
402
- yield json.dumps(openai_chunk) + "\n"
403
- else:
404
- log.info(f"Unknown chunk: {chunk}")
405
-
406
- final_chunk = {
407
- "id": response_id,
408
- "object": "chat.completion.chunk",
409
- "created": str(int(datetime.now().timestamp())),
410
- "model": vector_name,
411
- "system_fingerprint": sunholo_version(),
412
- "choices": [{
413
- "index": 0,
414
- "delta": {},
415
- "logprobs": None,
416
- "finish_reason": "stop"
417
- }]
418
- }
419
- yield json.dumps(final_chunk) + "\n"
420
-
421
- if stream:
422
- log.info("Streaming openai chunks")
423
- return Response(generate_response_content(), content_type='text/plain; charset=utf-8')
424
-
425
- try:
426
- observed_vac_interpreter = observe()(vac_interpreter)
427
- bot_output = observed_vac_interpreter(
428
- question=user_message,
429
- vector_name=vector_name,
430
- chat_history=all_input["chat_history"],
431
- **all_input["kwargs"]
432
- )
433
- bot_output = parse_output(bot_output)
434
-
435
- log.info(f"Bot output: {bot_output}")
436
- if bot_output:
437
- return make_openai_response(user_message, vector_name, bot_output.get('answer', ''))
438
- else:
439
- return make_openai_response(user_message, vector_name, 'ERROR: could not find an answer')
440
-
441
- except Exception as err:
442
- log.error(f"OpenAI response error: {str(err)} traceback: {traceback.format_exc()}")
443
-
444
- return make_openai_response(user_message, vector_name, f'ERROR: {str(err)}')
445
-
446
-
447
- def create_langfuse_trace(request, vector_name):
448
- """
449
- Create a Langfuse trace for tracking requests.
450
-
451
- This function initializes a Langfuse trace object based on the request headers
452
- and vector name.
453
-
454
- Args:
455
- request (Request): The Flask request object.
456
- vector_name (str): The name of the vector for the request.
457
-
458
- Returns:
459
- Langfuse.Trace: The Langfuse trace object.
460
-
461
- Example:
462
- trace = create_langfuse_trace(request, "example_vector")
463
- """
464
- try:
465
- from langfuse import Langfuse
466
- langfuse = Langfuse()
467
- except ImportError as err:
468
- print(f"No langfuse installed for agents.flask.register_qna_routes, install via `pip install sunholo[http]` - {str(err)}")
469
-
470
- return None
471
-
472
- user_id = request.headers.get("X-User-ID")
473
- session_id = request.headers.get("X-Session-ID")
474
- message_source = request.headers.get("X-Message-Source")
475
-
476
- package_version = sunholo_version()
477
- tags = [package_version]
478
- if message_source:
479
- tags.append(message_source)
480
-
481
- return langfuse.trace(
482
- name = f"/vac/{vector_name}",
483
- user_id = user_id,
484
- session_id = session_id,
485
- tags = tags,
486
- release = f"sunholo-v{package_version}"
487
- )
488
-
489
- def prep_vac(request, vector_name):
490
- """
491
- Prepare the input data for a VAC request.
492
-
493
- This function processes the incoming request data, extracts relevant
494
- information, and prepares the data for VAC processing.
495
-
496
- Args:
497
- request (Request): The Flask request object.
498
- vector_name (str): The name of the vector for the request.
499
-
500
- Returns:
501
- dict: A dictionary containing prepared input data and metadata.
502
-
503
- Example:
504
- prep_data = prep_vac(request, "example_vector")
505
- """
506
- #trace = create_langfuse_trace(request, vector_name)
507
- trace = None
508
- span = None
509
-
510
- if request.content_type.startswith('application/json'):
511
- data = request.get_json()
512
- elif request.content_type.startswith('multipart/form-data'):
513
- data = request.form.to_dict()
514
- if 'file' in request.files:
515
- file = request.files['file']
516
- if file.filename != '':
517
- log.info(f"Found file: {file.filename} to upload to GCS")
518
- try:
519
- image_uri, mime_type = handle_file_upload(file, vector_name)
520
- data["image_uri"] = image_uri
521
- data["image_url"] = image_uri
522
- data["mime"] = mime_type
523
- except Exception as e:
524
- log.error(f"Error uploading file: {str(e)}")
525
- else:
526
- log.info("No file selected to upload to GCS")
527
- else:
528
- log.warning(f"Error uploading file: Unsupported content type {request.content_type}")
529
-
530
- log.info(f"vac/{vector_name} got data: {data}")
531
-
532
- config, _ = load_config("config/llm_config.yaml")
533
- vac_configs = config.get("vac")
534
- if vac_configs:
535
- vac_config = vac_configs.get(vector_name)
536
- if not vac_config:
537
- log.warning("Not a local configured VAC, may be a remote config not synced yet")
538
-
539
- if trace and vac_config:
540
- trace.update(input=data, metadata=vac_config)
541
-
542
- user_input = data.pop('user_input').strip()
543
- stream_wait_time = data.pop('stream_wait_time', 7)
544
- stream_timeout = data.pop('stream_timeout', 120)
545
- chat_history = data.pop('chat_history', None)
546
- vector_name = data.pop('vector_name', vector_name)
547
-
548
- log.info("Turning chat_history into paired tuples")
549
- paired_messages = extract_chat_history(chat_history)
550
-
551
- all_input = {'user_input': user_input,
552
- 'vector_name': vector_name,
553
- 'chat_history': paired_messages,
554
- 'stream_wait_time': stream_wait_time,
555
- 'stream_timeout': stream_timeout,
556
- 'kwargs': data}
557
-
558
- if trace:
559
- span = trace.span(
560
- name="VAC",
561
- metadata=vac_config,
562
- input = all_input
563
- )
564
- command_response = handle_special_commands(user_input, vector_name, paired_messages)
565
- if command_response is not None:
566
- if trace:
567
- trace.update(output=jsonify(command_response))
568
-
569
- return {
570
- "trace": trace,
571
- "span": span,
572
- "command_response": command_response,
573
- "all_input": all_input,
574
- "vac_config": vac_config
575
- }
576
-
577
-
578
- def handle_file_upload(file, vector_name):
579
- """
580
- Handle file upload and store the file in Google Cloud Storage.
581
-
582
- This function saves the uploaded file locally, uploads it to Google Cloud Storage,
583
- and then removes the local copy.
584
-
585
- Args:
586
- file (FileStorage): The uploaded file.
587
- vector_name (str): The name of the vector for the request.
588
-
589
- Returns:
590
- tuple: A tuple containing the URI of the uploaded file and its MIME type.
591
-
592
- Raises:
593
- Exception: If the file upload fails.
594
-
595
- Example:
596
- uri, mime_type = handle_file_upload(file, "example_vector")
597
- """
598
- try:
599
- file.save(file.filename)
600
- image_uri = add_file_to_gcs(file.filename, vector_name)
601
- os.remove(file.filename) # Clean up the saved file
602
- return image_uri, file.mimetype
603
- except Exception as e:
604
- raise Exception(f'File upload failed: {str(e)}')