sunholo 0.139.0__tar.gz → 0.140.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sunholo-0.139.0/src/sunholo.egg-info → sunholo-0.140.2}/PKG-INFO +1 -1
- {sunholo-0.139.0 → sunholo-0.140.2}/pyproject.toml +1 -1
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/chat_history.py +63 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/flask/vac_routes.py +117 -2
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/gcs/download_gcs_text.py +20 -2
- sunholo-0.140.2/src/sunholo/gcs/metadata.py +66 -0
- {sunholo-0.139.0 → sunholo-0.140.2/src/sunholo.egg-info}/PKG-INFO +1 -1
- sunholo-0.139.0/src/sunholo/gcs/metadata.py +0 -33
- {sunholo-0.139.0 → sunholo-0.140.2}/LICENSE.txt +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/MANIFEST.in +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/README.md +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/setup.cfg +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/dispatch_to_qa.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/fastapi/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/fastapi/base.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/fastapi/qna_routes.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/flask/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/flask/base.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/flask/qna_routes.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/langserve.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/pubsub.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/route.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/special_commands.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/agents/swagger.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/archive/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/archive/archive.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/auth/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/auth/gcloud.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/auth/refresh.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/auth/run.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/azure/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/azure/auth.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/azure/blobs.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/azure/event_grid.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/bots/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/bots/discord.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/bots/github_webhook.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/bots/webapp.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/azure.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/doc_handling.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/encode_metadata.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/images.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/loaders.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/message_data.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/pdfs.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/process_chunker_data.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/publish.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/pubsub.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/chunker/splitter.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/chat_vac.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/cli.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/cli_init.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/configs.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/deploy.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/embedder.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/merge_texts.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/run_proxy.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/sun_rich.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/swagger.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/cli/vertex.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/components/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/components/llm.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/components/retriever.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/components/vectorstore.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/custom_logging.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/alloydb.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/alloydb_client.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/database.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/lancedb.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/sql/sb/create_function.sql +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/sql/sb/create_function_time.sql +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/sql/sb/create_table.sql +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/sql/sb/delete_source_row.sql +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/sql/sb/return_sources.sql +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/sql/sb/setup.sql +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/static_dbs.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/database/uuid.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/discovery_engine/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/discovery_engine/chunker_handler.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/discovery_engine/cli.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/discovery_engine/create_new.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/discovery_engine/discovery_engine_client.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/discovery_engine/get_ai_search_chunks.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/embedder/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/embedder/embed_chunk.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/embedder/embed_metadata.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/excel/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/excel/plugin.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/gcs/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/gcs/add_file.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/gcs/download_folder.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/gcs/download_url.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/gcs/extract_and_sign.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/genai/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/genai/file_handling.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/genai/genaiv2.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/genai/images.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/genai/init.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/genai/process_funcs_cls.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/genai/safety.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/invoke/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/invoke/async_class.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/invoke/direct_vac_func.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/invoke/invoke_vac_utils.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/langchain_types.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/langfuse/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/langfuse/callback.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/langfuse/evals.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/langfuse/prompts.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/llamaindex/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/llamaindex/get_files.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/llamaindex/import_files.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/llamaindex/llamaindex_class.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/llamaindex/user_history.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/lookup/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/lookup/model_lookup.yaml +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/mcp/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/mcp/cli.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/ollama/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/ollama/ollama_images.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/pubsub/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/pubsub/process_pubsub.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/pubsub/pubsub_manager.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/qna/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/qna/parsers.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/qna/retry.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/senses/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/senses/stream_voice.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/streaming/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/streaming/content_buffer.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/streaming/langserve.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/streaming/stream_lookup.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/streaming/streaming.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/summarise/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/summarise/summarise.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/agent/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/agent/agent_service.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/agent/app.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/agent/my_log.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/agent/tools/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/agent/tools/your_agent.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/agent/vac_service.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/project/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/project/app.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/project/my_log.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/project/vac_service.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/system_services/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/system_services/app.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/templates/system_services/my_log.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/terraform/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/terraform/tfvars_editor.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/tools/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/tools/web_browser.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/api_key.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/big_context.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/config.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/config_class.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/config_schema.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/gcp.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/gcp_project.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/mime.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/parsers.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/timedelta.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/user_ids.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/utils/version.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/vertex/__init__.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/vertex/extensions_call.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/vertex/extensions_class.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/vertex/genai_functions.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/vertex/init.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/vertex/memory_tools.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/vertex/safety.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo/vertex/type_dict_to_json.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo.egg-info/SOURCES.txt +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo.egg-info/dependency_links.txt +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo.egg-info/entry_points.txt +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo.egg-info/requires.txt +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/src/sunholo.egg-info/top_level.txt +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/tests/test_async.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/tests/test_async_genai2.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/tests/test_chat_history.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/tests/test_config.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/tests/test_genai2.py +0 -0
- {sunholo-0.139.0 → sunholo-0.140.2}/tests/test_unstructured.py +0 -0
@@ -1,6 +1,69 @@
|
|
1
1
|
import json
|
2
2
|
from ..custom_logging import log
|
3
3
|
|
4
|
+
|
5
|
+
async def extract_chat_history_async(chat_history=None):
|
6
|
+
"""
|
7
|
+
Extracts paired chat history between human and AI messages.
|
8
|
+
|
9
|
+
For this lightweight processing, we use a simpler approach that minimizes overhead.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
chat_history (list): List of chat messages.
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
list: List of tuples with paired human and AI messages.
|
16
|
+
"""
|
17
|
+
if not chat_history:
|
18
|
+
log.info("No chat history found")
|
19
|
+
return []
|
20
|
+
|
21
|
+
log.info(f"Extracting chat history: {chat_history}")
|
22
|
+
paired_messages = []
|
23
|
+
|
24
|
+
# Handle special case of initial bot message
|
25
|
+
if chat_history and is_bot(chat_history[0]):
|
26
|
+
first_message = chat_history[0]
|
27
|
+
log.info(f"Extracting first_message: {first_message}")
|
28
|
+
blank_human_message = {"name": "Human", "content": "", "embeds": []}
|
29
|
+
|
30
|
+
# Since create_message_element is so lightweight, we don't need async here
|
31
|
+
blank_element = create_message_element(blank_human_message)
|
32
|
+
bot_element = create_message_element(first_message)
|
33
|
+
|
34
|
+
paired_messages.append((blank_element, bot_element))
|
35
|
+
chat_history = chat_history[1:]
|
36
|
+
|
37
|
+
# Pre-process all messages in one batch (more efficient than one-by-one)
|
38
|
+
message_types = []
|
39
|
+
message_contents = []
|
40
|
+
|
41
|
+
for message in chat_history:
|
42
|
+
is_human_msg = is_human(message)
|
43
|
+
is_bot_msg = is_bot(message)
|
44
|
+
|
45
|
+
# Extract content for all messages at once
|
46
|
+
content = create_message_element(message)
|
47
|
+
|
48
|
+
message_types.append((is_human_msg, is_bot_msg))
|
49
|
+
message_contents.append(content)
|
50
|
+
|
51
|
+
# Pair messages efficiently
|
52
|
+
last_human_message = ""
|
53
|
+
for i, ((is_human_msg, is_bot_msg), content) in enumerate(zip(message_types, message_contents)):
|
54
|
+
if is_human_msg:
|
55
|
+
last_human_message = content
|
56
|
+
log.info(f"Extracted human message: {last_human_message}")
|
57
|
+
elif is_bot_msg:
|
58
|
+
ai_message = content
|
59
|
+
log.info(f"Extracted AI message: {ai_message}")
|
60
|
+
paired_messages.append((last_human_message, ai_message))
|
61
|
+
last_human_message = ""
|
62
|
+
|
63
|
+
log.info(f"Paired messages: {paired_messages}")
|
64
|
+
return paired_messages
|
65
|
+
|
66
|
+
|
4
67
|
def extract_chat_history(chat_history=None):
|
5
68
|
"""
|
6
69
|
Extracts paired chat history between human and AI messages.
|
@@ -8,6 +8,7 @@ import inspect
|
|
8
8
|
import asyncio
|
9
9
|
|
10
10
|
from ...agents import extract_chat_history, handle_special_commands
|
11
|
+
from ..chat_history import extract_chat_history_async
|
11
12
|
from ...qna.parsers import parse_output
|
12
13
|
from ...streaming import start_streaming_chat, start_streaming_chat_async
|
13
14
|
from ...archive import archive_qa
|
@@ -57,11 +58,12 @@ if __name__ == "__main__":
|
|
57
58
|
```
|
58
59
|
|
59
60
|
"""
|
60
|
-
def __init__(self, app, stream_interpreter, vac_interpreter=None, additional_routes=None):
|
61
|
+
def __init__(self, app, stream_interpreter: callable, vac_interpreter:callable=None, additional_routes:dict=None, async_stream:bool=False):
|
61
62
|
self.app = app
|
62
63
|
self.stream_interpreter = stream_interpreter
|
63
64
|
self.vac_interpreter = vac_interpreter or partial(self.vac_interpreter_default)
|
64
65
|
self.additional_routes = additional_routes if additional_routes is not None else []
|
66
|
+
self.async_stream = async_stream
|
65
67
|
self.register_routes()
|
66
68
|
|
67
69
|
|
@@ -99,7 +101,15 @@ if __name__ == "__main__":
|
|
99
101
|
self.app.route('/vac/streaming/<vector_name>',
|
100
102
|
methods=['POST'],
|
101
103
|
provide_automatic_options=False)(self.handle_stream_vac)
|
102
|
-
|
104
|
+
|
105
|
+
if self.async_stream: # Use async treatment
|
106
|
+
self.app.route('/vac/streaming/<vector_name>',
|
107
|
+
methods=['POST'],
|
108
|
+
provide_automatic_options=False)(self.handle_stream_vac_async)
|
109
|
+
else:
|
110
|
+
self.app.route('/vac/streaming/<vector_name>',
|
111
|
+
methods=['POST'],
|
112
|
+
provide_automatic_options=False)(self.handle_stream_vac)
|
103
113
|
# Static VAC
|
104
114
|
self.app.route('/vac/<vector_name>',
|
105
115
|
methods=['POST'],
|
@@ -332,6 +342,51 @@ if __name__ == "__main__":
|
|
332
342
|
|
333
343
|
return response
|
334
344
|
|
345
|
+
async def handle_stream_vac_async(self, vector_name):
|
346
|
+
observed_stream_interpreter = self.stream_interpreter
|
347
|
+
is_async = inspect.iscoroutinefunction(self.stream_interpreter)
|
348
|
+
|
349
|
+
if not is_async:
|
350
|
+
raise ValueError(f"Stream interpreter must be async: {observed_stream_interpreter}")
|
351
|
+
|
352
|
+
# Use the async version of prep_vac
|
353
|
+
prep = await self.prep_vac_async(request, vector_name)
|
354
|
+
log.info(f"Processing prep: {prep}")
|
355
|
+
all_input = prep["all_input"]
|
356
|
+
|
357
|
+
log.info(f'Streaming data with: {all_input}')
|
358
|
+
|
359
|
+
async def generate_response_content():
|
360
|
+
try:
|
361
|
+
# Direct async handling without the queue/thread approach
|
362
|
+
async_gen = start_streaming_chat_async(
|
363
|
+
question=all_input["user_input"],
|
364
|
+
vector_name=vector_name,
|
365
|
+
qna_func_async=observed_stream_interpreter,
|
366
|
+
chat_history=all_input["chat_history"],
|
367
|
+
wait_time=all_input["stream_wait_time"],
|
368
|
+
timeout=all_input["stream_timeout"],
|
369
|
+
**all_input["kwargs"]
|
370
|
+
)
|
371
|
+
|
372
|
+
log.info(f"{async_gen=}")
|
373
|
+
async for chunk in async_gen:
|
374
|
+
if isinstance(chunk, dict) and 'answer' in chunk:
|
375
|
+
await archive_qa(chunk, vector_name)
|
376
|
+
yield json.dumps(chunk)
|
377
|
+
else:
|
378
|
+
yield chunk
|
379
|
+
|
380
|
+
except Exception as e:
|
381
|
+
yield f"Streaming Error: {str(e)} {traceback.format_exc()}"
|
382
|
+
|
383
|
+
response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
|
384
|
+
response.headers['Transfer-Encoding'] = 'chunked'
|
385
|
+
|
386
|
+
log.debug(f"streaming response: {response}")
|
387
|
+
|
388
|
+
return response
|
389
|
+
|
335
390
|
@staticmethod
|
336
391
|
async def _async_generator_to_stream(async_gen_func):
|
337
392
|
"""Helper function to stream the async generator's values to the client."""
|
@@ -699,6 +754,66 @@ if __name__ == "__main__":
|
|
699
754
|
"vac_config": vac_config
|
700
755
|
}
|
701
756
|
|
757
|
+
async def prep_vac_async(self, request, vector_name):
|
758
|
+
"""Async version of prep_vac."""
|
759
|
+
# Parse request data
|
760
|
+
if request.content_type.startswith('application/json'):
|
761
|
+
data = request.get_json()
|
762
|
+
elif request.content_type.startswith('multipart/form-data'):
|
763
|
+
data = request.form.to_dict()
|
764
|
+
if 'file' in request.files:
|
765
|
+
file = request.files['file']
|
766
|
+
if file.filename != '':
|
767
|
+
log.info(f"Found file: {file.filename} to upload to GCS")
|
768
|
+
try:
|
769
|
+
# Make file upload async if possible
|
770
|
+
image_uri, mime_type = await self.handle_file_upload_async(file, vector_name)
|
771
|
+
data["image_uri"] = image_uri
|
772
|
+
data["mime"] = mime_type
|
773
|
+
except Exception as e:
|
774
|
+
log.error(traceback.format_exc())
|
775
|
+
return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
|
776
|
+
else:
|
777
|
+
log.error("No file selected")
|
778
|
+
return jsonify({"error": "No file selected"}), 400
|
779
|
+
else:
|
780
|
+
return jsonify({"error": "Unsupported content type"}), 400
|
781
|
+
|
782
|
+
log.info(f"vac/{vector_name} got data: {data}")
|
783
|
+
|
784
|
+
# Run these operations concurrently
|
785
|
+
tasks = []
|
786
|
+
|
787
|
+
# Extract other data while configs load
|
788
|
+
user_input = data.pop('user_input').strip()
|
789
|
+
stream_wait_time = data.pop('stream_wait_time', 7)
|
790
|
+
stream_timeout = data.pop('stream_timeout', 120)
|
791
|
+
chat_history = data.pop('chat_history', None)
|
792
|
+
vector_name_param = data.pop('vector_name', vector_name)
|
793
|
+
data.pop('trace_id', None) # to ensure not in kwargs
|
794
|
+
|
795
|
+
# Task 3: Process chat history
|
796
|
+
chat_history_task = asyncio.create_task(extract_chat_history_async(chat_history))
|
797
|
+
tasks.append(chat_history_task)
|
798
|
+
|
799
|
+
# Await all tasks concurrently
|
800
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
801
|
+
|
802
|
+
paired_messages = results[0] if not isinstance(results[0], Exception) else []
|
803
|
+
|
804
|
+
# Only create span after we have trace
|
805
|
+
all_input = {
|
806
|
+
'user_input': user_input,
|
807
|
+
'vector_name': vector_name_param,
|
808
|
+
'chat_history': paired_messages,
|
809
|
+
'stream_wait_time': stream_wait_time,
|
810
|
+
'stream_timeout': stream_timeout,
|
811
|
+
'kwargs': data
|
812
|
+
}
|
813
|
+
|
814
|
+
return {
|
815
|
+
"all_input": all_input
|
816
|
+
}
|
702
817
|
|
703
818
|
def handle_file_upload(self, file, vector_name):
|
704
819
|
try:
|
@@ -3,12 +3,21 @@ import json
|
|
3
3
|
|
4
4
|
from ..custom_logging import log
|
5
5
|
from ..utils.mime import get_mime_type_gemini
|
6
|
+
from .metadata import check_gcs_file_size
|
6
7
|
from .download_url import get_bytes_from_gcs
|
7
8
|
|
8
|
-
def download_gcs_source_to_string(source:str) -> str:
|
9
|
+
def download_gcs_source_to_string(source:str, max_size_bytes: int = 1024*1024) -> str:
|
9
10
|
"""
|
10
|
-
|
11
|
+
Download a file from Google Cloud Storage and convert it to a string.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
source: str The Google Cloud Storage URI of the file to download (e.g., 'gs://bucket_name/file_name').
|
15
|
+
max_size_bytes: int Maximum file size to download, defaults to 1MB (1024*1024 bytes)
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
str: The contents of the file as a string, or an empty string if the file could not be downloaded.
|
11
19
|
"""
|
20
|
+
|
12
21
|
mime_type = get_mime_type_gemini(source)
|
13
22
|
if mime_type == "":
|
14
23
|
log.warning(f"Can not download to string file source {source}")
|
@@ -52,6 +61,15 @@ def download_gcs_source_to_string(source:str) -> str:
|
|
52
61
|
|
53
62
|
try:
|
54
63
|
log.info(f"Extracting text for {source}")
|
64
|
+
# Check file size before downloading
|
65
|
+
file_size = check_gcs_file_size(source)
|
66
|
+
if file_size == -1:
|
67
|
+
log.warning(f"Could not determine file size for {source}")
|
68
|
+
return ""
|
69
|
+
elif file_size > max_size_bytes:
|
70
|
+
log.warning(f"File size {file_size} bytes exceeds maximum size limit of {max_size_bytes} bytes for {source}")
|
71
|
+
return ""
|
72
|
+
|
55
73
|
bytes = get_bytes_from_gcs(source)
|
56
74
|
string = bytes.decode('utf-8', errors='replace')
|
57
75
|
log.info(f"Extracted {len(string)} characters from {source}: {string[:100]}")
|
@@ -0,0 +1,66 @@
|
|
1
|
+
try:
|
2
|
+
from google.cloud import storage
|
3
|
+
except ImportError:
|
4
|
+
storage = None
|
5
|
+
|
6
|
+
from ..custom_logging import log
|
7
|
+
|
8
|
+
|
9
|
+
def get_object_metadata(bucket_name, object_name):
|
10
|
+
|
11
|
+
if not storage:
|
12
|
+
return None
|
13
|
+
|
14
|
+
if bucket_name is None or object_name is None:
|
15
|
+
log.warning("Got invalid bucket name and object name")
|
16
|
+
return None
|
17
|
+
try:
|
18
|
+
storage_client = storage.Client()
|
19
|
+
except Exception as e:
|
20
|
+
log.warning(f"Could not connect to Google Cloud Storage for metadata: {str(e)}")
|
21
|
+
return None
|
22
|
+
|
23
|
+
bucket = storage_client.bucket(bucket_name)
|
24
|
+
blob = bucket.blob(object_name)
|
25
|
+
|
26
|
+
# Fetch the blob's metadata
|
27
|
+
blob.reload() # Make sure to reload the blob to get the most up-to-date metadata
|
28
|
+
|
29
|
+
# Access custom metadata
|
30
|
+
custom_metadata = blob.metadata
|
31
|
+
|
32
|
+
log.info(f"Custom Metadata for {object_name}: {custom_metadata}")
|
33
|
+
return custom_metadata
|
34
|
+
|
35
|
+
def check_gcs_file_size(source: str) -> int:
|
36
|
+
"""
|
37
|
+
Check the size of a file in Google Cloud Storage without downloading the entire file.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
source: str The Google Cloud Storage URI of the file to check (e.g., 'gs://bucket_name/file_name').
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
int: The size of the file in bytes, or -1 if the size cannot be determined.
|
44
|
+
"""
|
45
|
+
from google.cloud import storage
|
46
|
+
|
47
|
+
try:
|
48
|
+
# Parse the GCS URI
|
49
|
+
if not source.startswith('gs://'):
|
50
|
+
log.warning(f"Invalid GCS URI format: {source}")
|
51
|
+
return -1
|
52
|
+
|
53
|
+
bucket_name, blob_path = source[5:].split('/', 1)
|
54
|
+
|
55
|
+
# Create a client and get the bucket
|
56
|
+
storage_client = storage.Client()
|
57
|
+
bucket = storage_client.bucket(bucket_name)
|
58
|
+
|
59
|
+
# Get the blob (file) and retrieve its metadata
|
60
|
+
blob = bucket.blob(blob_path)
|
61
|
+
blob.reload() # Fetch the latest metadata
|
62
|
+
|
63
|
+
return blob.size
|
64
|
+
except Exception as err:
|
65
|
+
log.error(f"Error checking file size for {source}: {str(err)}")
|
66
|
+
return -1
|
@@ -1,33 +0,0 @@
|
|
1
|
-
try:
|
2
|
-
from google.cloud import storage
|
3
|
-
except ImportError:
|
4
|
-
storage = None
|
5
|
-
|
6
|
-
from ..custom_logging import log
|
7
|
-
|
8
|
-
|
9
|
-
def get_object_metadata(bucket_name, object_name):
|
10
|
-
|
11
|
-
if not storage:
|
12
|
-
return None
|
13
|
-
|
14
|
-
if bucket_name is None or object_name is None:
|
15
|
-
log.warning("Got invalid bucket name and object name")
|
16
|
-
return None
|
17
|
-
try:
|
18
|
-
storage_client = storage.Client()
|
19
|
-
except Exception as e:
|
20
|
-
log.warning(f"Could not connect to Google Cloud Storage for metadata: {str(e)}")
|
21
|
-
return None
|
22
|
-
|
23
|
-
bucket = storage_client.bucket(bucket_name)
|
24
|
-
blob = bucket.blob(object_name)
|
25
|
-
|
26
|
-
# Fetch the blob's metadata
|
27
|
-
blob.reload() # Make sure to reload the blob to get the most up-to-date metadata
|
28
|
-
|
29
|
-
# Access custom metadata
|
30
|
-
custom_metadata = blob.metadata
|
31
|
-
|
32
|
-
log.info(f"Custom Metadata for {object_name}: {custom_metadata}")
|
33
|
-
return custom_metadata
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|