sunholo 0.138.0__tar.gz → 0.139.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sunholo-0.138.0/src/sunholo.egg-info → sunholo-0.139.0}/PKG-INFO +1 -1
- {sunholo-0.138.0 → sunholo-0.139.0}/pyproject.toml +1 -1
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/special_commands.py +3 -2
- sunholo-0.139.0/src/sunholo/gcs/download_gcs_text.py +132 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/config.py +8 -8
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/mime.py +50 -0
- {sunholo-0.138.0 → sunholo-0.139.0/src/sunholo.egg-info}/PKG-INFO +1 -1
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo.egg-info/SOURCES.txt +1 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/LICENSE.txt +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/MANIFEST.in +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/README.md +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/setup.cfg +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/chat_history.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/dispatch_to_qa.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/fastapi/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/fastapi/base.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/fastapi/qna_routes.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/flask/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/flask/base.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/flask/qna_routes.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/flask/vac_routes.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/langserve.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/pubsub.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/route.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/agents/swagger.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/archive/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/archive/archive.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/auth/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/auth/gcloud.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/auth/refresh.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/auth/run.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/azure/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/azure/auth.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/azure/blobs.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/azure/event_grid.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/bots/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/bots/discord.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/bots/github_webhook.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/bots/webapp.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/azure.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/doc_handling.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/encode_metadata.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/images.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/loaders.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/message_data.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/pdfs.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/process_chunker_data.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/publish.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/pubsub.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/chunker/splitter.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/chat_vac.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/cli.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/cli_init.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/configs.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/deploy.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/embedder.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/merge_texts.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/run_proxy.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/sun_rich.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/swagger.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/cli/vertex.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/components/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/components/llm.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/components/retriever.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/components/vectorstore.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/custom_logging.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/alloydb.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/alloydb_client.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/database.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/lancedb.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/sql/sb/create_function.sql +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/sql/sb/create_function_time.sql +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/sql/sb/create_table.sql +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/sql/sb/delete_source_row.sql +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/sql/sb/return_sources.sql +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/sql/sb/setup.sql +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/static_dbs.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/database/uuid.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/discovery_engine/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/discovery_engine/chunker_handler.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/discovery_engine/cli.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/discovery_engine/create_new.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/discovery_engine/discovery_engine_client.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/discovery_engine/get_ai_search_chunks.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/embedder/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/embedder/embed_chunk.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/embedder/embed_metadata.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/excel/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/excel/plugin.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/gcs/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/gcs/add_file.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/gcs/download_folder.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/gcs/download_url.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/gcs/extract_and_sign.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/gcs/metadata.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/genai/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/genai/file_handling.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/genai/genaiv2.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/genai/images.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/genai/init.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/genai/process_funcs_cls.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/genai/safety.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/invoke/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/invoke/async_class.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/invoke/direct_vac_func.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/invoke/invoke_vac_utils.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/langchain_types.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/langfuse/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/langfuse/callback.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/langfuse/evals.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/langfuse/prompts.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/llamaindex/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/llamaindex/get_files.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/llamaindex/import_files.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/llamaindex/llamaindex_class.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/llamaindex/user_history.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/lookup/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/lookup/model_lookup.yaml +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/mcp/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/mcp/cli.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/ollama/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/ollama/ollama_images.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/pubsub/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/pubsub/process_pubsub.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/pubsub/pubsub_manager.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/qna/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/qna/parsers.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/qna/retry.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/senses/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/senses/stream_voice.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/streaming/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/streaming/content_buffer.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/streaming/langserve.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/streaming/stream_lookup.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/streaming/streaming.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/summarise/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/summarise/summarise.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/agent/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/agent/agent_service.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/agent/app.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/agent/my_log.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/agent/tools/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/agent/tools/your_agent.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/agent/vac_service.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/project/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/project/app.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/project/my_log.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/project/vac_service.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/system_services/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/system_services/app.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/templates/system_services/my_log.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/terraform/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/terraform/tfvars_editor.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/tools/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/tools/web_browser.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/api_key.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/big_context.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/config_class.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/config_schema.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/gcp.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/gcp_project.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/parsers.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/timedelta.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/user_ids.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/utils/version.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/vertex/__init__.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/vertex/extensions_call.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/vertex/extensions_class.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/vertex/genai_functions.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/vertex/init.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/vertex/memory_tools.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/vertex/safety.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo/vertex/type_dict_to_json.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo.egg-info/dependency_links.txt +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo.egg-info/entry_points.txt +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo.egg-info/requires.txt +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/src/sunholo.egg-info/top_level.txt +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/tests/test_async.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/tests/test_async_genai2.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/tests/test_chat_history.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/tests/test_config.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/tests/test_genai2.py +0 -0
- {sunholo-0.138.0 → sunholo-0.139.0}/tests/test_unstructured.py +0 -0
@@ -25,7 +25,7 @@ from ..database import delete_row_from_source, return_sources_last24
|
|
25
25
|
from ..utils.parsers import contains_url, extract_urls
|
26
26
|
from ..chunker.publish import publish_text
|
27
27
|
from ..gcs.add_file import add_file_to_gcs
|
28
|
-
from ..utils
|
28
|
+
from ..utils import ConfigManager
|
29
29
|
from ..custom_logging import log
|
30
30
|
|
31
31
|
# config file?
|
@@ -48,8 +48,9 @@ def handle_special_commands(user_input,
|
|
48
48
|
|
49
49
|
user_input = user_input.strip()
|
50
50
|
|
51
|
+
config = ConfigManager("vector_name")
|
51
52
|
if not cmds:
|
52
|
-
cmds =
|
53
|
+
cmds = config.vacConfig("user_special_cmds")
|
53
54
|
if not cmds:
|
54
55
|
return None
|
55
56
|
|
@@ -0,0 +1,132 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
|
4
|
+
from ..custom_logging import log
|
5
|
+
from ..utils.mime import get_mime_type_gemini
|
6
|
+
from .download_url import get_bytes_from_gcs
|
7
|
+
|
8
|
+
def download_gcs_source_to_string(source:str) -> str:
|
9
|
+
"""
|
10
|
+
source: str The Google Cloud Storage URI of the file to download (e.g., 'gs://bucket_name/file_name').
|
11
|
+
"""
|
12
|
+
mime_type = get_mime_type_gemini(source)
|
13
|
+
if mime_type == "":
|
14
|
+
log.warning(f"Can not download to string file source {source}")
|
15
|
+
return ""
|
16
|
+
"""
|
17
|
+
mime_types = {
|
18
|
+
|
19
|
+
# Images
|
20
|
+
'png': 'image/png',
|
21
|
+
'jpg': 'image/jpeg',
|
22
|
+
'jpeg': 'image/jpeg',
|
23
|
+
'gif': 'image/gif',
|
24
|
+
'webp': 'image/webp',
|
25
|
+
|
26
|
+
# Document formats
|
27
|
+
'pdf': 'application/pdf',
|
28
|
+
|
29
|
+
# Programming languages
|
30
|
+
'js': 'text/javascript',
|
31
|
+
'py': 'text/x-python',
|
32
|
+
|
33
|
+
# Web formats
|
34
|
+
'html': 'text/html',
|
35
|
+
'htm': 'text/html',
|
36
|
+
'css': 'text/css',
|
37
|
+
|
38
|
+
# Text formats
|
39
|
+
'txt': 'text/plain',
|
40
|
+
'md': 'text/md',
|
41
|
+
'csv': 'text/csv',
|
42
|
+
'xml': 'text/xml',
|
43
|
+
'rtf': 'text/rtf',
|
44
|
+
|
45
|
+
# Special case: JSON files are treated as plain text
|
46
|
+
'json': 'text/plain'
|
47
|
+
}
|
48
|
+
"""
|
49
|
+
if mime_type.startswith("image/") or mime_type == "application/pdf":
|
50
|
+
log.warning(f"Can not download to string file source {source} of type {mime_type}")
|
51
|
+
return ""
|
52
|
+
|
53
|
+
try:
|
54
|
+
log.info(f"Extracting text for {source}")
|
55
|
+
bytes = get_bytes_from_gcs(source)
|
56
|
+
string = bytes.decode('utf-8', errors='replace')
|
57
|
+
log.info(f"Extracted {len(string)} characters from {source}: {string[:100]}")
|
58
|
+
|
59
|
+
except Exception as err:
|
60
|
+
log.error(f"Could not extract string text for {source}: {str(err)}")
|
61
|
+
|
62
|
+
return ""
|
63
|
+
|
64
|
+
if not string:
|
65
|
+
raise ValueError(f"No string text for {source}")
|
66
|
+
|
67
|
+
file_ext = os.path.splitext(source)[1].lower().lstrip('.')
|
68
|
+
if file_ext == "json":
|
69
|
+
try:
|
70
|
+
extracted_data = json.loads(string)
|
71
|
+
log.debug("Turning json text into markdown format so as not to confuse structured output", log_struct=extracted_data)
|
72
|
+
string = json_data_to_markdown(extracted_data)
|
73
|
+
except json.JSONDecodeError:
|
74
|
+
log.warning(f"Could not get valid json from .json file: {source}")
|
75
|
+
|
76
|
+
return string
|
77
|
+
|
78
|
+
def json_data_to_markdown(data, indent_level: int = 0) -> str:
|
79
|
+
"""
|
80
|
+
Recursively converts a Python object (from parsed JSON) into a Markdown string.
|
81
|
+
"""
|
82
|
+
indent = " " * indent_level # Use 2 spaces for indentation
|
83
|
+
markdown_parts = []
|
84
|
+
|
85
|
+
if isinstance(data, dict):
|
86
|
+
if not data:
|
87
|
+
return f"{indent}(empty object)"
|
88
|
+
for key, value in data.items():
|
89
|
+
# Process the value recursively
|
90
|
+
value_md = json_data_to_markdown(value, indent_level + 1)
|
91
|
+
# Determine if the rendered value is complex (multi-line or was list/dict)
|
92
|
+
is_complex_render = "\n" in value_md.strip() or (isinstance(value, (dict, list)) and value)
|
93
|
+
|
94
|
+
if is_complex_render:
|
95
|
+
markdown_parts.append(f"{indent}**{key}**:")
|
96
|
+
markdown_parts.append(value_md)
|
97
|
+
else:
|
98
|
+
# Simple value rendering, strip its own indent before adding key
|
99
|
+
markdown_parts.append(f"{indent}**{key}**: {value_md.strip()}")
|
100
|
+
return "\n".join(markdown_parts)
|
101
|
+
|
102
|
+
elif isinstance(data, list):
|
103
|
+
if not data:
|
104
|
+
return f"{indent}(empty list)"
|
105
|
+
for item in data:
|
106
|
+
# Process item recursively
|
107
|
+
item_md = json_data_to_markdown(item, indent_level + 1)
|
108
|
+
# Remove leading indent from the recursive call before processing lines
|
109
|
+
lines = item_md.lstrip(' ').split('\n')
|
110
|
+
# Add bullet point to the first line
|
111
|
+
first_line = f"{indent}- {lines[0]}"
|
112
|
+
# Ensure subsequent lines are indented correctly relative to the bullet
|
113
|
+
rest_lines = [f"{indent} {line}" for line in lines[1:]]
|
114
|
+
markdown_parts.append(first_line)
|
115
|
+
markdown_parts.extend(rest_lines)
|
116
|
+
return "\n".join(markdown_parts)
|
117
|
+
|
118
|
+
elif isinstance(data, str):
|
119
|
+
# Handle multi-line strings: indent subsequent lines
|
120
|
+
lines = data.split('\n')
|
121
|
+
if len(lines) <= 1:
|
122
|
+
return f"{indent}{data}" # Single line string
|
123
|
+
else:
|
124
|
+
indented_lines = [f"{indent}{lines[0]}"] + [f"{indent} {line}" for line in lines[1:]]
|
125
|
+
return "\n".join(indented_lines)
|
126
|
+
|
127
|
+
elif data is None:
|
128
|
+
return f"{indent}*null*" # Represent None distinctly
|
129
|
+
elif isinstance(data, bool):
|
130
|
+
return f"{indent}{str(data).lower()}" # true / false
|
131
|
+
else: # Numbers (int, float)
|
132
|
+
return f"{indent}{str(data)}"
|
@@ -58,12 +58,12 @@ def load_all_configs():
|
|
58
58
|
"""
|
59
59
|
from ..custom_logging import log
|
60
60
|
|
61
|
-
if not os.getenv("
|
62
|
-
log.
|
61
|
+
if not os.getenv("VAC_CONFIG_FOLDER"):
|
62
|
+
log.warning("VAC_CONFIG_FOLDER is not set, using os.getcwd() instead")
|
63
63
|
else:
|
64
|
-
log.
|
64
|
+
log.warning(f"VAC_CONFIG_FOLDER set to: {os.getenv('VAC_CONFIG_FOLDER')}")
|
65
65
|
|
66
|
-
config_folder = os.getenv("
|
66
|
+
config_folder = os.getenv("VAC_CONFIG_FOLDER", os.getcwd())
|
67
67
|
config_folder = os.path.join(config_folder, "config")
|
68
68
|
|
69
69
|
log.debug(f"Loading all configs from folder: {config_folder}")
|
@@ -119,7 +119,7 @@ def reload_config_file(config_file, filename):
|
|
119
119
|
def load_config(filename: str=None) -> tuple[dict, str]:
|
120
120
|
"""
|
121
121
|
Load configuration from a yaml or json file.
|
122
|
-
Will look relative to `
|
122
|
+
Will look relative to `VAC_CONFIG_FOLDER` environment variable if available, else current directory.
|
123
123
|
|
124
124
|
Args:
|
125
125
|
filename (str, optional): The name of the configuration file. Defaults to the `_CONFIG_FILE` environment variable.
|
@@ -154,11 +154,11 @@ def load_config(filename: str=None) -> tuple[dict, str]:
|
|
154
154
|
log.debug(f"Cache expired for {filename}, reloading...")
|
155
155
|
|
156
156
|
|
157
|
-
if os.getenv("
|
158
|
-
log.debug(f"
|
157
|
+
if os.getenv("VAC_CONFIG_FOLDER"):
|
158
|
+
log.debug(f"VAC_CONFIG_FOLDER: {os.getenv('VAC_CONFIG_FOLDER')}")
|
159
159
|
|
160
160
|
# Join the script directory with the filename
|
161
|
-
config_folder = os.getenv("
|
161
|
+
config_folder = os.getenv("VAC_CONFIG_FOLDER") if os.getenv("VAC_CONFIG_FOLDER") else os.getcwd()
|
162
162
|
|
163
163
|
config_file = os.path.join(config_folder, filename)
|
164
164
|
log.debug(f"Loading config file {config_file}")
|
@@ -66,3 +66,53 @@ def guess_mime_type(file_path: str) -> str:
|
|
66
66
|
|
67
67
|
return mime
|
68
68
|
|
69
|
+
|
70
|
+
def get_mime_type_gemini(file_path:str) -> str:
|
71
|
+
"""
|
72
|
+
Determine the MIME type based on file extension.
|
73
|
+
Only returns valid Gemini formats, or None if they are not supported.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
file_path (str): Path to the file
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
str: The appropriate MIME type for the file
|
80
|
+
"""
|
81
|
+
# Extract the file extension (lowercase)
|
82
|
+
ext = os.path.splitext(file_path)[1].lower().lstrip('.')
|
83
|
+
|
84
|
+
# Define the mapping of extensions to MIME types
|
85
|
+
mime_types = {
|
86
|
+
|
87
|
+
# Images
|
88
|
+
'png': 'image/png',
|
89
|
+
'jpg': 'image/jpeg',
|
90
|
+
'jpeg': 'image/jpeg',
|
91
|
+
'gif': 'image/gif',
|
92
|
+
'webp': 'image/webp',
|
93
|
+
|
94
|
+
# Document formats
|
95
|
+
'pdf': 'application/pdf',
|
96
|
+
|
97
|
+
# Programming languages
|
98
|
+
'js': 'text/javascript',
|
99
|
+
'py': 'text/x-python',
|
100
|
+
|
101
|
+
# Web formats
|
102
|
+
'html': 'text/html',
|
103
|
+
'htm': 'text/html',
|
104
|
+
'css': 'text/css',
|
105
|
+
|
106
|
+
# Text formats
|
107
|
+
'txt': 'text/plain',
|
108
|
+
'md': 'text/md',
|
109
|
+
'csv': 'text/csv',
|
110
|
+
'xml': 'text/xml',
|
111
|
+
'rtf': 'text/rtf',
|
112
|
+
|
113
|
+
# Special case: JSON files are treated as plain text
|
114
|
+
'json': 'text/plain'
|
115
|
+
}
|
116
|
+
|
117
|
+
# Return the appropriate MIME type, defaulting to None if unknown
|
118
|
+
return mime_types.get(ext, "")
|
@@ -95,6 +95,7 @@ src/sunholo/excel/plugin.py
|
|
95
95
|
src/sunholo/gcs/__init__.py
|
96
96
|
src/sunholo/gcs/add_file.py
|
97
97
|
src/sunholo/gcs/download_folder.py
|
98
|
+
src/sunholo/gcs/download_gcs_text.py
|
98
99
|
src/sunholo/gcs/download_url.py
|
99
100
|
src/sunholo/gcs/extract_and_sign.py
|
100
101
|
src/sunholo/gcs/metadata.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|