sunholo 0.67.10__tar.gz → 0.68.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sunholo-0.67.10 → sunholo-0.68.1}/PKG-INFO +2 -2
- {sunholo-0.67.10 → sunholo-0.68.1}/setup.py +1 -1
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/chat_history.py +5 -1
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/flask/qna_routes.py +9 -3
- sunholo-0.68.1/sunholo/agents/flask/vac_routes.py +501 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/components/retriever.py +9 -4
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/alloydb.py +3 -183
- sunholo-0.68.1/sunholo/database/alloydb_client.py +196 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/gcs/add_file.py +4 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo.egg-info/PKG-INFO +2 -2
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo.egg-info/SOURCES.txt +2 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/LICENSE.txt +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/MANIFEST.in +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/README.md +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/setup.cfg +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/dispatch_to_qa.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/fastapi/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/fastapi/base.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/fastapi/qna_routes.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/flask/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/flask/base.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/langserve.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/pubsub.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/route.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/special_commands.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/agents/swagger.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/archive/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/archive/archive.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/auth/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/auth/run.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/bots/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/bots/discord.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/bots/github_webhook.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/bots/webapp.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/chunker/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/chunker/data_to_embed_pubsub.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/chunker/doc_handling.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/chunker/images.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/chunker/loaders.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/chunker/message_data.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/chunker/pdfs.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/chunker/publish.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/chunker/splitter.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/chat_vac.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/cli.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/cli_init.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/configs.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/deploy.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/embedder.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/merge_texts.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/run_proxy.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/sun_rich.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/cli/swagger.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/components/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/components/llm.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/components/vectorstore.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/database.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/lancedb.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/sql/sb/create_function.sql +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/sql/sb/create_function_time.sql +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/sql/sb/create_table.sql +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/sql/sb/return_sources.sql +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/sql/sb/setup.sql +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/static_dbs.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/database/uuid.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/embedder/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/embedder/embed_chunk.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/gcs/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/gcs/download_url.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/gcs/metadata.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/langfuse/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/langfuse/callback.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/langfuse/prompts.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/llamaindex/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/llamaindex/generate.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/llamaindex/get_files.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/llamaindex/import_files.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/logging.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/lookup/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/lookup/model_lookup.yaml +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/patches/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/patches/langchain/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/patches/langchain/lancedb.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/patches/langchain/vertexai.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/pubsub/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/pubsub/process_pubsub.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/pubsub/pubsub_manager.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/qna/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/qna/parsers.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/qna/retry.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/streaming/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/streaming/content_buffer.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/streaming/langserve.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/streaming/stream_lookup.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/streaming/streaming.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/summarise/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/summarise/summarise.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/api_key.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/big_context.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/config.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/config_schema.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/gcp.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/gcp_project.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/parsers.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/timedelta.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/user_ids.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/utils/version.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/vertex/__init__.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/vertex/init.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/vertex/memory_tools.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo/vertex/safety.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo.egg-info/dependency_links.txt +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo.egg-info/entry_points.txt +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo.egg-info/requires.txt +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/sunholo.egg-info/top_level.txt +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/tests/test_chat_history.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/tests/test_chunker.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/tests/test_config.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/tests/test_dispatch_to_qa.py +0 -0
- {sunholo-0.67.10 → sunholo-0.68.1}/tests/test_swagger.py +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.68.1
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.68.1.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -103,7 +103,7 @@ def create_message_element(message: dict):
|
|
|
103
103
|
if 'text' in message: # This is a Slack or Google Chat message
|
|
104
104
|
log.info(f"Found text element - {message['text']}")
|
|
105
105
|
return message['text']
|
|
106
|
-
elif 'content' in message: # Discord message
|
|
106
|
+
elif 'content' in message: # Discord or OpenAI history message
|
|
107
107
|
log.info(f"Found content element - {message['content']}")
|
|
108
108
|
return message['content']
|
|
109
109
|
else:
|
|
@@ -130,6 +130,8 @@ def is_human(message: dict):
|
|
|
130
130
|
return message["name"] == "Human"
|
|
131
131
|
elif 'sender' in message: # Google Chat
|
|
132
132
|
return message['sender']['type'] == 'HUMAN'
|
|
133
|
+
elif 'role' in message:
|
|
134
|
+
return message['role'] == 'user'
|
|
133
135
|
else:
|
|
134
136
|
# Slack: Check for the 'user' field and absence of 'bot_id' field
|
|
135
137
|
return 'user' in message and 'bot_id' not in message
|
|
@@ -174,5 +176,7 @@ def is_ai(message: dict):
|
|
|
174
176
|
return message["name"] == "AI"
|
|
175
177
|
elif 'sender' in message: # Google Chat
|
|
176
178
|
return message['sender']['type'] == 'BOT'
|
|
179
|
+
elif 'role' in message:
|
|
180
|
+
return message['role'] == 'assistant'
|
|
177
181
|
else:
|
|
178
182
|
return 'bot_id' in message # Slack
|
|
@@ -258,7 +258,12 @@ def register_qna_routes(app, stream_interpreter, vac_interpreter):
|
|
|
258
258
|
# the header forwarded
|
|
259
259
|
auth_header = request.headers.get('X-Forwarded-Authorization')
|
|
260
260
|
if auth_header:
|
|
261
|
-
|
|
261
|
+
|
|
262
|
+
if auth_header.startswith('Bearer '):
|
|
263
|
+
api_key = auth_header.split(' ')[1] # Assuming "Bearer <api_key>"
|
|
264
|
+
else:
|
|
265
|
+
return jsonify({'error': 'Invalid authorization header does not start with "Bearer " - got: {auth_header}'}), 401
|
|
266
|
+
|
|
262
267
|
endpoints_host = os.getenv('_ENDPOINTS_HOST')
|
|
263
268
|
if not endpoints_host:
|
|
264
269
|
return jsonify({'error': '_ENDPOINTS_HOST environment variable not found'}), 401
|
|
@@ -425,8 +430,9 @@ def register_qna_routes(app, stream_interpreter, vac_interpreter):
|
|
|
425
430
|
return make_openai_response(user_message, vector_name, 'ERROR: could not find an answer')
|
|
426
431
|
|
|
427
432
|
except Exception as err:
|
|
428
|
-
log.error(f"OpenAI response error: {err}")
|
|
429
|
-
|
|
433
|
+
log.error(f"OpenAI response error: {str(err)} traceback: {traceback.format_exc()}")
|
|
434
|
+
|
|
435
|
+
return make_openai_response(user_message, vector_name, f'ERROR: {str(err)}')
|
|
430
436
|
|
|
431
437
|
|
|
432
438
|
def create_langfuse_trace(request, vector_name):
|
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import traceback
|
|
3
|
+
import datetime
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from ...agents import extract_chat_history, handle_special_commands
|
|
7
|
+
from ...qna.parsers import parse_output
|
|
8
|
+
from ...streaming import start_streaming_chat
|
|
9
|
+
from ...archive import archive_qa
|
|
10
|
+
from ...logging import log
|
|
11
|
+
from ...utils.config import load_config
|
|
12
|
+
from ...utils.version import sunholo_version
|
|
13
|
+
import os
|
|
14
|
+
from ...gcs.add_file import add_file_to_gcs, handle_base64_image
|
|
15
|
+
from ..swagger import validate_api_key
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from flask import request, jsonify, Response
|
|
20
|
+
except ImportError:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
from langfuse.decorators import langfuse_context, observe
|
|
25
|
+
except ImportError:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
# Cache dictionary to store validated API keys
|
|
29
|
+
api_key_cache = {}
|
|
30
|
+
cache_duration = timedelta(minutes=5) # Cache duration
|
|
31
|
+
|
|
32
|
+
class VACRoutes:
|
|
33
|
+
"""
|
|
34
|
+
**Usage Example:**
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from agents.flask import VACRoutes
|
|
38
|
+
|
|
39
|
+
app = Flask(__name__)
|
|
40
|
+
|
|
41
|
+
def stream_interpreter(question, vector_name, chat_history, **kwargs):
|
|
42
|
+
# Implement your streaming logic
|
|
43
|
+
...
|
|
44
|
+
|
|
45
|
+
def vac_interpreter(question, vector_name, chat_history, **kwargs):
|
|
46
|
+
# Implement your static VAC logic
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
vac_routes = VACRoutes(app, stream_interpreter, vac_interpreter)
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
app.run(debug=True)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
def __init__(self, app, stream_interpreter, vac_interpreter):
|
|
57
|
+
self.app = app
|
|
58
|
+
self.stream_interpreter = stream_interpreter
|
|
59
|
+
self.vac_interpreter = vac_interpreter
|
|
60
|
+
self.register_routes()
|
|
61
|
+
|
|
62
|
+
def register_routes(self):
|
|
63
|
+
"""
|
|
64
|
+
Registers all the VAC routes for the Flask application.
|
|
65
|
+
"""
|
|
66
|
+
# Basic routes
|
|
67
|
+
self.app.route("/", methods=['GET'])(self.home)
|
|
68
|
+
self.app.route("/health", methods=['GET'])(self.health)
|
|
69
|
+
|
|
70
|
+
# Streaming VAC
|
|
71
|
+
self.app.route('/vac/streaming/<vector_name>', methods=['POST'])(self.handle_stream_vac)
|
|
72
|
+
|
|
73
|
+
# Static VAC
|
|
74
|
+
self.app.route('/vac/<vector_name>', methods=['POST'])(self.handle_process_vac)
|
|
75
|
+
|
|
76
|
+
# Authentication middleware
|
|
77
|
+
self.app.before_request(self.check_authentication)
|
|
78
|
+
|
|
79
|
+
# OpenAI health endpoint
|
|
80
|
+
self.app.route('/openai/health', methods=['GET', 'POST'])(self.openai_health_endpoint)
|
|
81
|
+
|
|
82
|
+
# OpenAI compatible endpoint
|
|
83
|
+
self.app.route('/openai/v1/chat/completions', methods=['POST'])(self.handle_openai_compatible_endpoint)
|
|
84
|
+
self.app.route('/openai/v1/chat/completions/<vector_name>', methods=['POST'])(self.handle_openai_compatible_endpoint)
|
|
85
|
+
|
|
86
|
+
def home(self):
|
|
87
|
+
return jsonify("OK")
|
|
88
|
+
|
|
89
|
+
def health(self):
|
|
90
|
+
return jsonify({"status": "healthy"})
|
|
91
|
+
|
|
92
|
+
def make_openai_response(self, user_message, vector_name, answer):
|
|
93
|
+
response_id = str(uuid.uuid4())
|
|
94
|
+
log.info("openai response: Q: {user_message} to VECTOR_NAME: {vector_name} - A: {answer}")
|
|
95
|
+
openai_response = {
|
|
96
|
+
"id": response_id,
|
|
97
|
+
"object": "chat.completion",
|
|
98
|
+
"created": str(int(datetime.now().timestamp())),
|
|
99
|
+
"model": vector_name,
|
|
100
|
+
"system_fingerprint": sunholo_version(),
|
|
101
|
+
"choices": [{
|
|
102
|
+
"index": 0,
|
|
103
|
+
"message": {
|
|
104
|
+
"role": "assistant",
|
|
105
|
+
"content": answer,
|
|
106
|
+
},
|
|
107
|
+
"logprobs": None,
|
|
108
|
+
"finish_reason": "stop"
|
|
109
|
+
}],
|
|
110
|
+
"usage": {
|
|
111
|
+
"prompt_tokens": len(user_message.split()),
|
|
112
|
+
"completion_tokens": len(answer.split()),
|
|
113
|
+
"total_tokens": len(user_message.split()) + len(answer.split())
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
log.info(f"OpenAI response: {openai_response}")
|
|
118
|
+
return jsonify(openai_response)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def handle_stream_vac(self, vector_name):
|
|
122
|
+
observed_stream_interpreter = observe()(self.stream_interpreter)
|
|
123
|
+
prep = self.prep_vac(request, vector_name)
|
|
124
|
+
log.debug(f"Processing prep: {prep}")
|
|
125
|
+
trace = prep["trace"]
|
|
126
|
+
span = prep["span"]
|
|
127
|
+
command_response = prep["command_response"]
|
|
128
|
+
vac_config = prep["vac_config"]
|
|
129
|
+
all_input = prep["all_input"]
|
|
130
|
+
|
|
131
|
+
if command_response:
|
|
132
|
+
return jsonify(command_response)
|
|
133
|
+
|
|
134
|
+
log.info(f'Streaming data with: {all_input}')
|
|
135
|
+
if span:
|
|
136
|
+
generation = span.generation(
|
|
137
|
+
name="start_streaming_chat",
|
|
138
|
+
metadata=vac_config,
|
|
139
|
+
input = all_input,
|
|
140
|
+
completion_start_time=datetime.datetime.now(),
|
|
141
|
+
model=vac_config.get("model") or vac_config.get("llm")
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def generate_response_content():
|
|
145
|
+
|
|
146
|
+
for chunk in start_streaming_chat(question=all_input["user_input"],
|
|
147
|
+
vector_name=vector_name,
|
|
148
|
+
qna_func=observed_stream_interpreter,
|
|
149
|
+
chat_history=all_input["chat_history"],
|
|
150
|
+
wait_time=all_input["stream_wait_time"],
|
|
151
|
+
timeout=all_input["stream_timeout"],
|
|
152
|
+
#kwargs
|
|
153
|
+
**all_input["kwargs"]
|
|
154
|
+
):
|
|
155
|
+
if isinstance(chunk, dict) and 'answer' in chunk:
|
|
156
|
+
# When we encounter the dictionary, we yield it as a JSON string
|
|
157
|
+
# and stop the generator.
|
|
158
|
+
if trace:
|
|
159
|
+
chunk["trace"] = trace.id
|
|
160
|
+
chunk["trace_url"] = trace.get_trace_url()
|
|
161
|
+
archive_qa(chunk, vector_name)
|
|
162
|
+
if trace:
|
|
163
|
+
generation.end(output=json.dumps(chunk))
|
|
164
|
+
span.end(output=json.dumps(chunk))
|
|
165
|
+
trace.update(output=json.dumps(chunk))
|
|
166
|
+
|
|
167
|
+
return json.dumps(chunk)
|
|
168
|
+
|
|
169
|
+
else:
|
|
170
|
+
# Otherwise, we yield the plain text chunks as they come in.
|
|
171
|
+
yield chunk
|
|
172
|
+
|
|
173
|
+
# Here, the generator function will handle streaming the content to the client.
|
|
174
|
+
response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
|
|
175
|
+
response.headers['Transfer-Encoding'] = 'chunked'
|
|
176
|
+
|
|
177
|
+
log.debug(f"streaming response: {response}")
|
|
178
|
+
if trace:
|
|
179
|
+
generation.end(output=response)
|
|
180
|
+
span.end(output=response)
|
|
181
|
+
trace.update(output=response)
|
|
182
|
+
|
|
183
|
+
return response
|
|
184
|
+
|
|
185
|
+
def handle_process_vac(self, vector_name):
|
|
186
|
+
observed_vac_interpreter = observe()(self.vac_interpreter)
|
|
187
|
+
prep = self.prep_vac(request, vector_name)
|
|
188
|
+
log.debug(f"Processing prep: {prep}")
|
|
189
|
+
trace = prep["trace"]
|
|
190
|
+
span = prep["span"]
|
|
191
|
+
command_response = prep["command_response"]
|
|
192
|
+
vac_config = prep["vac_config"]
|
|
193
|
+
all_input = prep["all_input"]
|
|
194
|
+
|
|
195
|
+
if command_response:
|
|
196
|
+
return jsonify(command_response)
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
if span:
|
|
200
|
+
generation = span.generation(
|
|
201
|
+
name="vac_interpreter",
|
|
202
|
+
metadata=vac_config,
|
|
203
|
+
input = all_input,
|
|
204
|
+
model=vac_config.get("model") or vac_config.get("llm")
|
|
205
|
+
)
|
|
206
|
+
bot_output = observed_vac_interpreter(
|
|
207
|
+
question=all_input["user_input"],
|
|
208
|
+
vector_name=vector_name,
|
|
209
|
+
chat_history=all_input["chat_history"],
|
|
210
|
+
**all_input["kwargs"]
|
|
211
|
+
)
|
|
212
|
+
if span:
|
|
213
|
+
generation.end(output=bot_output)
|
|
214
|
+
# {"answer": "The answer", "source_documents": [{"page_content": "The page content", "metadata": "The metadata"}]}
|
|
215
|
+
bot_output = parse_output(bot_output)
|
|
216
|
+
if trace:
|
|
217
|
+
bot_output["trace"] = trace.id
|
|
218
|
+
bot_output["trace_url"] = trace.get_trace_url()
|
|
219
|
+
archive_qa(bot_output, vector_name)
|
|
220
|
+
log.info(f'==LLM Q:{all_input["user_input"]} - A:{bot_output}')
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
except Exception as err:
|
|
224
|
+
bot_output = {'answer': f'QNA_ERROR: An error occurred while processing /vac/{vector_name}: {str(err)} traceback: {traceback.format_exc()}'}
|
|
225
|
+
|
|
226
|
+
if trace:
|
|
227
|
+
span.end(output=jsonify(bot_output))
|
|
228
|
+
trace.update(output=jsonify(bot_output))
|
|
229
|
+
|
|
230
|
+
# {'answer': 'output'}
|
|
231
|
+
return jsonify(bot_output)
|
|
232
|
+
|
|
233
|
+
def check_authentication(self):
|
|
234
|
+
if request.path.startswith('/openai/'):
|
|
235
|
+
log.debug(f'Request headers: {request.headers}')
|
|
236
|
+
# the header forwarded
|
|
237
|
+
auth_header = request.headers.get('X-Forwarded-Authorization')
|
|
238
|
+
if auth_header:
|
|
239
|
+
|
|
240
|
+
if auth_header.startswith('Bearer '):
|
|
241
|
+
api_key = auth_header.split(' ')[1] # Assuming "Bearer <api_key>"
|
|
242
|
+
else:
|
|
243
|
+
return jsonify({'error': 'Invalid authorization header does not start with "Bearer " - got: {auth_header}'}), 401
|
|
244
|
+
|
|
245
|
+
endpoints_host = os.getenv('_ENDPOINTS_HOST')
|
|
246
|
+
if not endpoints_host:
|
|
247
|
+
return jsonify({'error': '_ENDPOINTS_HOST environment variable not found'}), 401
|
|
248
|
+
|
|
249
|
+
# Check cache first
|
|
250
|
+
current_time = datetime.now()
|
|
251
|
+
if api_key in api_key_cache:
|
|
252
|
+
cached_result, cache_time = api_key_cache[api_key]
|
|
253
|
+
if current_time - cache_time < cache_duration:
|
|
254
|
+
if not cached_result:
|
|
255
|
+
return jsonify({'error': 'Invalid cached API key'}), 401
|
|
256
|
+
else:
|
|
257
|
+
return # Valid API key, continue to the endpoint
|
|
258
|
+
else:
|
|
259
|
+
# Cache expired, remove from cache
|
|
260
|
+
del api_key_cache[api_key]
|
|
261
|
+
|
|
262
|
+
# Validate API key
|
|
263
|
+
is_valid = validate_api_key(api_key, endpoints_host)
|
|
264
|
+
# Update cache
|
|
265
|
+
api_key_cache[api_key] = (is_valid, current_time)
|
|
266
|
+
|
|
267
|
+
if not is_valid:
|
|
268
|
+
return jsonify({'error': 'Invalid API key'}), 401
|
|
269
|
+
else:
|
|
270
|
+
return jsonify({'error': 'Missing Authorization header'}), 401
|
|
271
|
+
|
|
272
|
+
def openai_health_endpoint():
|
|
273
|
+
return jsonify({'message': 'Success'})
|
|
274
|
+
|
|
275
|
+
def handle_openai_compatible_endpoint(self, vector_name=None):
|
|
276
|
+
data = request.get_json()
|
|
277
|
+
log.info(f'openai_compatible_endpoint got data: {data} for vector: {vector_name}')
|
|
278
|
+
|
|
279
|
+
vector_name = vector_name or data.pop('model', None)
|
|
280
|
+
messages = data.pop('messages', None)
|
|
281
|
+
chat_history = data.pop('chat_history', None)
|
|
282
|
+
stream = data.pop('stream', False)
|
|
283
|
+
|
|
284
|
+
if not messages:
|
|
285
|
+
return jsonify({"error": "No messages provided"}), 400
|
|
286
|
+
|
|
287
|
+
user_message = None
|
|
288
|
+
image_uri = None
|
|
289
|
+
mime_type = None
|
|
290
|
+
|
|
291
|
+
for msg in reversed(messages):
|
|
292
|
+
if msg['role'] == 'user':
|
|
293
|
+
if isinstance(msg['content'], list):
|
|
294
|
+
for content_item in msg['content']:
|
|
295
|
+
if content_item['type'] == 'text':
|
|
296
|
+
user_message = content_item['text']
|
|
297
|
+
elif content_item['type'] == 'image_url':
|
|
298
|
+
base64_data = content_item['image_url']['url']
|
|
299
|
+
image_uri, mime_type = handle_base64_image(base64_data, vector_name)
|
|
300
|
+
else:
|
|
301
|
+
user_message = msg['content']
|
|
302
|
+
break
|
|
303
|
+
|
|
304
|
+
if not user_message:
|
|
305
|
+
return jsonify({"error": "No user message provided"}), 400
|
|
306
|
+
else:
|
|
307
|
+
log.info(f"User message: {user_message}")
|
|
308
|
+
|
|
309
|
+
paired_messages = extract_chat_history(chat_history)
|
|
310
|
+
command_response = handle_special_commands(user_message, vector_name, paired_messages)
|
|
311
|
+
|
|
312
|
+
if command_response is not None:
|
|
313
|
+
|
|
314
|
+
return self.make_openai_response(user_message, vector_name, command_response)
|
|
315
|
+
|
|
316
|
+
if image_uri:
|
|
317
|
+
data["image_uri"] = image_uri
|
|
318
|
+
data["mime"] = mime_type
|
|
319
|
+
|
|
320
|
+
all_input = {
|
|
321
|
+
"user_input": user_message,
|
|
322
|
+
"chat_history": chat_history,
|
|
323
|
+
"kwargs": data
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
observed_stream_interpreter = observe()(self.stream_interpreter)
|
|
327
|
+
|
|
328
|
+
response_id = str(uuid.uuid4())
|
|
329
|
+
|
|
330
|
+
def generate_response_content():
|
|
331
|
+
for chunk in start_streaming_chat(question=user_message,
|
|
332
|
+
vector_name=vector_name,
|
|
333
|
+
qna_func=observed_stream_interpreter,
|
|
334
|
+
chat_history=all_input["chat_history"],
|
|
335
|
+
wait_time=all_input.get("stream_wait_time", 1),
|
|
336
|
+
timeout=all_input.get("stream_timeout", 60),
|
|
337
|
+
**all_input["kwargs"]
|
|
338
|
+
):
|
|
339
|
+
if isinstance(chunk, dict) and 'answer' in chunk:
|
|
340
|
+
openai_chunk = {
|
|
341
|
+
"id": response_id,
|
|
342
|
+
"object": "chat.completion.chunk",
|
|
343
|
+
"created": str(int(datetime.now().timestamp())),
|
|
344
|
+
"model": vector_name,
|
|
345
|
+
"system_fingerprint": sunholo_version(),
|
|
346
|
+
"choices": [{
|
|
347
|
+
"index": 0,
|
|
348
|
+
"delta": {"content": chunk['answer']},
|
|
349
|
+
"logprobs": None,
|
|
350
|
+
"finish_reason": None
|
|
351
|
+
}]
|
|
352
|
+
}
|
|
353
|
+
yield json.dumps(openai_chunk) + "\n"
|
|
354
|
+
else:
|
|
355
|
+
log.info(f"Unknown chunk: {chunk}")
|
|
356
|
+
|
|
357
|
+
final_chunk = {
|
|
358
|
+
"id": response_id,
|
|
359
|
+
"object": "chat.completion.chunk",
|
|
360
|
+
"created": str(int(datetime.now().timestamp())),
|
|
361
|
+
"model": vector_name,
|
|
362
|
+
"system_fingerprint": sunholo_version(),
|
|
363
|
+
"choices": [{
|
|
364
|
+
"index": 0,
|
|
365
|
+
"delta": {},
|
|
366
|
+
"logprobs": None,
|
|
367
|
+
"finish_reason": "stop"
|
|
368
|
+
}]
|
|
369
|
+
}
|
|
370
|
+
yield json.dumps(final_chunk) + "\n"
|
|
371
|
+
|
|
372
|
+
if stream:
|
|
373
|
+
log.info("Streaming openai chunks")
|
|
374
|
+
return Response(generate_response_content(), content_type='text/plain; charset=utf-8')
|
|
375
|
+
|
|
376
|
+
try:
|
|
377
|
+
observed_vac_interpreter = observe()(self.vac_interpreter)
|
|
378
|
+
bot_output = observed_vac_interpreter(
|
|
379
|
+
question=user_message,
|
|
380
|
+
vector_name=vector_name,
|
|
381
|
+
chat_history=all_input["chat_history"],
|
|
382
|
+
**all_input["kwargs"]
|
|
383
|
+
)
|
|
384
|
+
bot_output = parse_output(bot_output)
|
|
385
|
+
|
|
386
|
+
log.info(f"Bot output: {bot_output}")
|
|
387
|
+
if bot_output:
|
|
388
|
+
return self.make_openai_response(user_message, vector_name, bot_output.get('answer', ''))
|
|
389
|
+
else:
|
|
390
|
+
return self.make_openai_response(user_message, vector_name, 'ERROR: could not find an answer')
|
|
391
|
+
|
|
392
|
+
except Exception as err:
|
|
393
|
+
log.error(f"OpenAI response error: {str(err)} traceback: {traceback.format_exc()}")
|
|
394
|
+
|
|
395
|
+
return self.make_openai_response(user_message, vector_name, f'ERROR: {str(err)}')
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def create_langfuse_trace(self, request, vector_name):
|
|
399
|
+
try:
|
|
400
|
+
from langfuse import Langfuse
|
|
401
|
+
langfuse = Langfuse()
|
|
402
|
+
except ImportError as err:
|
|
403
|
+
print(f"No langfuse installed for agents.flask.register_qna_routes, install via `pip install sunholo[http]` - {str(err)}")
|
|
404
|
+
|
|
405
|
+
return None
|
|
406
|
+
|
|
407
|
+
user_id = request.headers.get("X-User-ID")
|
|
408
|
+
session_id = request.headers.get("X-Session-ID")
|
|
409
|
+
message_source = request.headers.get("X-Message-Source")
|
|
410
|
+
|
|
411
|
+
package_version = sunholo_version()
|
|
412
|
+
tags = [package_version]
|
|
413
|
+
if message_source:
|
|
414
|
+
tags.append(message_source)
|
|
415
|
+
|
|
416
|
+
return langfuse.trace(
|
|
417
|
+
name = f"/vac/{vector_name}",
|
|
418
|
+
user_id = user_id,
|
|
419
|
+
session_id = session_id,
|
|
420
|
+
tags = tags,
|
|
421
|
+
release = f"sunholo-v{package_version}"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
def prep_vac(self, request, vector_name):
|
|
425
|
+
trace = self.create_langfuse_trace(request, vector_name)
|
|
426
|
+
span = None
|
|
427
|
+
|
|
428
|
+
if request.content_type.startswith('application/json'):
|
|
429
|
+
data = request.get_json()
|
|
430
|
+
elif request.content_type.startswith('multipart/form-data'):
|
|
431
|
+
data = request.form.to_dict()
|
|
432
|
+
if 'file' in request.files:
|
|
433
|
+
file = request.files['file']
|
|
434
|
+
if file.filename != '':
|
|
435
|
+
log.info(f"Found file: {file.filename} to upload to GCS")
|
|
436
|
+
try:
|
|
437
|
+
image_uri, mime_type = self.handle_file_upload(file, vector_name)
|
|
438
|
+
data["image_uri"] = image_uri
|
|
439
|
+
data["mime"] = mime_type
|
|
440
|
+
except Exception as e:
|
|
441
|
+
return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
|
|
442
|
+
else:
|
|
443
|
+
return jsonify({"error": "No file selected"}), 400
|
|
444
|
+
else:
|
|
445
|
+
return jsonify({"error": "Unsupported content type"}), 400
|
|
446
|
+
|
|
447
|
+
log.info(f"vac/{vector_name} got data: {data}")
|
|
448
|
+
|
|
449
|
+
config, _ = load_config("config/llm_config.yaml")
|
|
450
|
+
vac_configs = config.get("vac")
|
|
451
|
+
if vac_configs:
|
|
452
|
+
vac_config = vac_configs[vector_name]
|
|
453
|
+
|
|
454
|
+
if trace:
|
|
455
|
+
trace.update(input=data, metadata=vac_config)
|
|
456
|
+
|
|
457
|
+
user_input = data.pop('user_input').strip()
|
|
458
|
+
stream_wait_time = data.pop('stream_wait_time', 7)
|
|
459
|
+
stream_timeout = data.pop('stream_timeout', 120)
|
|
460
|
+
chat_history = data.pop('chat_history', None)
|
|
461
|
+
vector_name = data.pop('vector_name', vector_name)
|
|
462
|
+
|
|
463
|
+
paired_messages = extract_chat_history(chat_history)
|
|
464
|
+
|
|
465
|
+
all_input = {'user_input': user_input,
|
|
466
|
+
'vector_name': vector_name,
|
|
467
|
+
'chat_history': paired_messages,
|
|
468
|
+
'stream_wait_time': stream_wait_time,
|
|
469
|
+
'stream_timeout': stream_timeout,
|
|
470
|
+
'kwargs': data}
|
|
471
|
+
|
|
472
|
+
if trace:
|
|
473
|
+
span = trace.span(
|
|
474
|
+
name="VAC",
|
|
475
|
+
metadata=vac_config,
|
|
476
|
+
input = all_input
|
|
477
|
+
)
|
|
478
|
+
command_response = handle_special_commands(user_input, vector_name, paired_messages)
|
|
479
|
+
if command_response is not None:
|
|
480
|
+
if trace:
|
|
481
|
+
trace.update(output=jsonify(command_response))
|
|
482
|
+
|
|
483
|
+
return {
|
|
484
|
+
"trace": trace,
|
|
485
|
+
"span": span,
|
|
486
|
+
"command_response": command_response,
|
|
487
|
+
"all_input": all_input,
|
|
488
|
+
"vac_config": vac_config
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def handle_file_upload(self, file, vector_name):
|
|
493
|
+
try:
|
|
494
|
+
file.save(file.filename)
|
|
495
|
+
image_uri = add_file_to_gcs(file.filename, vector_name)
|
|
496
|
+
os.remove(file.filename) # Clean up the saved file
|
|
497
|
+
return image_uri, file.mimetype
|
|
498
|
+
except Exception as e:
|
|
499
|
+
raise Exception(f'File upload failed: {str(e)}')
|
|
500
|
+
|
|
501
|
+
|
|
@@ -54,10 +54,15 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
54
54
|
|
|
55
55
|
embeddings = embeddings or get_embeddings(vector_name)
|
|
56
56
|
read_only = value.get('read_only')
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
try:
|
|
58
|
+
vectorstore = pick_vectorstore(vectorstore,
|
|
59
|
+
vector_name=vector_name,
|
|
60
|
+
embeddings=embeddings,
|
|
61
|
+
read_only=read_only)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
log.error(f"Failed to pick_vectorstore {vectorstore} for {vector_name} - {str(e)} - skipping")
|
|
64
|
+
continue
|
|
65
|
+
|
|
61
66
|
k_override = value.get('k', 3)
|
|
62
67
|
vs_retriever = vectorstore.as_retriever(search_kwargs=dict(k=k_override))
|
|
63
68
|
retriever_list.append(vs_retriever)
|