sunholo 0.72.0__tar.gz → 0.73.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. {sunholo-0.72.0 → sunholo-0.73.3}/PKG-INFO +4 -3
  2. {sunholo-0.72.0 → sunholo-0.73.3}/setup.py +3 -2
  3. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/dispatch_to_qa.py +10 -7
  4. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/langserve.py +1 -1
  5. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/route.py +24 -9
  6. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/chat_vac.py +119 -67
  7. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/cli.py +3 -3
  8. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/embedder.py +2 -1
  9. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/components/retriever.py +2 -2
  10. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/gcs/add_file.py +19 -11
  11. sunholo-0.73.3/sunholo/invoke/__init__.py +1 -0
  12. sunholo-0.73.3/sunholo/invoke/invoke_vac_utils.py +151 -0
  13. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/langfuse/prompts.py +9 -3
  14. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/llamaindex/import_files.py +8 -7
  15. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/streaming/langserve.py +4 -1
  16. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/config.py +1 -1
  17. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/config_class.py +21 -9
  18. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/vertex/extensions_class.py +179 -64
  19. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/vertex/memory_tools.py +1 -1
  20. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo.egg-info/PKG-INFO +4 -3
  21. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo.egg-info/SOURCES.txt +3 -4
  22. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo.egg-info/requires.txt +2 -1
  23. sunholo-0.72.0/sunholo/vertex/extensions.py +0 -326
  24. sunholo-0.72.0/tests/test_dispatch_to_qa.py +0 -28
  25. sunholo-0.72.0/tests/test_swagger.py +0 -15
  26. {sunholo-0.72.0 → sunholo-0.73.3}/LICENSE.txt +0 -0
  27. {sunholo-0.72.0 → sunholo-0.73.3}/MANIFEST.in +0 -0
  28. {sunholo-0.72.0 → sunholo-0.73.3}/README.md +0 -0
  29. {sunholo-0.72.0 → sunholo-0.73.3}/setup.cfg +0 -0
  30. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/__init__.py +0 -0
  31. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/__init__.py +0 -0
  32. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/chat_history.py +0 -0
  33. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/fastapi/__init__.py +0 -0
  34. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/fastapi/base.py +0 -0
  35. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/fastapi/qna_routes.py +0 -0
  36. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/flask/__init__.py +0 -0
  37. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/flask/base.py +0 -0
  38. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/flask/qna_routes.py +0 -0
  39. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/flask/vac_routes.py +0 -0
  40. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/pubsub.py +0 -0
  41. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/special_commands.py +0 -0
  42. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/agents/swagger.py +0 -0
  43. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/archive/__init__.py +0 -0
  44. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/archive/archive.py +0 -0
  45. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/auth/__init__.py +0 -0
  46. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/auth/run.py +0 -0
  47. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/bots/__init__.py +0 -0
  48. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/bots/discord.py +0 -0
  49. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/bots/github_webhook.py +0 -0
  50. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/bots/webapp.py +0 -0
  51. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/chunker/__init__.py +0 -0
  52. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/chunker/data_to_embed_pubsub.py +0 -0
  53. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/chunker/doc_handling.py +0 -0
  54. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/chunker/images.py +0 -0
  55. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/chunker/loaders.py +0 -0
  56. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/chunker/message_data.py +0 -0
  57. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/chunker/pdfs.py +0 -0
  58. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/chunker/publish.py +0 -0
  59. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/chunker/splitter.py +0 -0
  60. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/__init__.py +0 -0
  61. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/cli_init.py +0 -0
  62. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/configs.py +0 -0
  63. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/deploy.py +0 -0
  64. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/merge_texts.py +0 -0
  65. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/run_proxy.py +0 -0
  66. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/sun_rich.py +0 -0
  67. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/cli/swagger.py +0 -0
  68. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/components/__init__.py +0 -0
  69. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/components/llm.py +0 -0
  70. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/components/vectorstore.py +0 -0
  71. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/__init__.py +0 -0
  72. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/alloydb.py +0 -0
  73. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/alloydb_client.py +0 -0
  74. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/database.py +0 -0
  75. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/lancedb.py +0 -0
  76. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/sql/sb/create_function.sql +0 -0
  77. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  78. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/sql/sb/create_table.sql +0 -0
  79. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  80. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/sql/sb/return_sources.sql +0 -0
  81. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/sql/sb/setup.sql +0 -0
  82. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/static_dbs.py +0 -0
  83. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/database/uuid.py +0 -0
  84. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/discovery_engine/__init__.py +0 -0
  85. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/discovery_engine/chunker_handler.py +0 -0
  86. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/discovery_engine/create_new.py +0 -0
  87. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/discovery_engine/discovery_engine_client.py +0 -0
  88. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/embedder/__init__.py +0 -0
  89. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/embedder/embed_chunk.py +0 -0
  90. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/gcs/__init__.py +0 -0
  91. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/gcs/download_url.py +0 -0
  92. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/gcs/metadata.py +0 -0
  93. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/langfuse/__init__.py +0 -0
  94. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/langfuse/callback.py +0 -0
  95. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/llamaindex/__init__.py +0 -0
  96. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/llamaindex/generate.py +0 -0
  97. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/llamaindex/get_files.py +0 -0
  98. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/logging.py +0 -0
  99. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/lookup/__init__.py +0 -0
  100. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/lookup/model_lookup.yaml +0 -0
  101. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/patches/__init__.py +0 -0
  102. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/patches/langchain/__init__.py +0 -0
  103. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/patches/langchain/lancedb.py +0 -0
  104. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/patches/langchain/vertexai.py +0 -0
  105. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/pubsub/__init__.py +0 -0
  106. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/pubsub/process_pubsub.py +0 -0
  107. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/pubsub/pubsub_manager.py +0 -0
  108. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/qna/__init__.py +0 -0
  109. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/qna/parsers.py +0 -0
  110. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/qna/retry.py +0 -0
  111. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/streaming/__init__.py +0 -0
  112. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/streaming/content_buffer.py +0 -0
  113. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/streaming/stream_lookup.py +0 -0
  114. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/streaming/streaming.py +0 -0
  115. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/summarise/__init__.py +0 -0
  116. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/summarise/summarise.py +0 -0
  117. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/tools/__init__.py +0 -0
  118. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/tools/web_browser.py +0 -0
  119. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/__init__.py +0 -0
  120. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/api_key.py +0 -0
  121. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/big_context.py +0 -0
  122. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/config_schema.py +0 -0
  123. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/gcp.py +0 -0
  124. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/gcp_project.py +0 -0
  125. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/parsers.py +0 -0
  126. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/timedelta.py +0 -0
  127. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/user_ids.py +0 -0
  128. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/utils/version.py +0 -0
  129. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/vertex/__init__.py +0 -0
  130. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/vertex/init.py +0 -0
  131. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo/vertex/safety.py +0 -0
  132. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo.egg-info/dependency_links.txt +0 -0
  133. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo.egg-info/entry_points.txt +0 -0
  134. {sunholo-0.72.0 → sunholo-0.73.3}/sunholo.egg-info/top_level.txt +0 -0
  135. {sunholo-0.72.0 → sunholo-0.73.3}/tests/test_chat_history.py +0 -0
  136. {sunholo-0.72.0 → sunholo-0.73.3}/tests/test_chunker.py +0 -0
  137. {sunholo-0.72.0 → sunholo-0.73.3}/tests/test_config.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.72.0
3
+ Version: 0.73.3
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.72.0.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.73.3.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -92,7 +92,7 @@ Requires-Dist: google-api-python-client; extra == "gcp"
92
92
  Requires-Dist: google-cloud-alloydb-connector[pg8000]; extra == "gcp"
93
93
  Requires-Dist: google-auth-httplib2; extra == "gcp"
94
94
  Requires-Dist: google-auth-oauthlib; extra == "gcp"
95
- Requires-Dist: google-cloud-aiplatform; extra == "gcp"
95
+ Requires-Dist: google-cloud-aiplatform>=1.58.0; extra == "gcp"
96
96
  Requires-Dist: google-cloud-bigquery; extra == "gcp"
97
97
  Requires-Dist: google-cloud-build; extra == "gcp"
98
98
  Requires-Dist: google-cloud-service-control; extra == "gcp"
@@ -110,6 +110,7 @@ Requires-Dist: tiktoken; extra == "openai"
110
110
  Provides-Extra: anthropic
111
111
  Requires-Dist: langchain-anthropic>=0.1.13; extra == "anthropic"
112
112
  Provides-Extra: tools
113
+ Requires-Dist: openapi-spec-validator; extra == "tools"
113
114
  Requires-Dist: playwright; extra == "tools"
114
115
  Provides-Extra: http
115
116
  Requires-Dist: fastapi; extra == "http"
@@ -1,7 +1,7 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
3
  # Define your base version
4
- version = '0.72.0'
4
+ version = '0.73.3'
5
5
 
6
6
  setup(
7
7
  name='sunholo',
@@ -112,7 +112,7 @@ setup(
112
112
  "google-cloud-alloydb-connector[pg8000]",
113
113
  "google-auth-httplib2",
114
114
  "google-auth-oauthlib",
115
- "google-cloud-aiplatform",
115
+ "google-cloud-aiplatform>=1.58.0",
116
116
  "google-cloud-bigquery",
117
117
  "google-cloud-build",
118
118
  "google-cloud-service-control",
@@ -134,6 +134,7 @@ setup(
134
134
  "langchain-anthropic>=0.1.13",
135
135
  ],
136
136
  'tools' : [
137
+ 'openapi-spec-validator',
137
138
  'playwright'
138
139
  ],
139
140
  'http': [
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from ..logging import log
15
- from ..utils import load_config_key
15
+ from ..utils import ConfigManager
16
16
  from ..auth import get_header
17
17
  import requests
18
18
  import aiohttp
@@ -46,26 +46,29 @@ def prep_request_payload(user_input, chat_history, vector_name, stream, **kwargs
46
46
  ```
47
47
  """
48
48
 
49
+ config = ConfigManager(vector_name)
50
+
49
51
  # Add chat_history/vector_name to kwargs so langserve can use them too
50
52
  kwargs['chat_history'] = chat_history
51
53
 
52
- agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
53
- agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
54
+ agent = config.vacConfig("agent")
55
+ agent_type = config.vacConfig("agent_type")
54
56
 
55
57
  override_endpoint = kwargs.get("override_endpoint")
56
58
  if override_endpoint:
57
59
  log.info(f"Overriding endpoint with {override_endpoint}")
58
60
 
59
61
  # {'stream': '', 'invoke': ''}
60
- endpoints = route_endpoint(vector_name, override_endpoint=override_endpoint)
62
+ post_endpoints = route_endpoint(override_endpoint=override_endpoint, config=config)
61
63
 
62
64
  if stream:
63
- qna_endpoint = endpoints["stream"]
65
+ qna_endpoint = post_endpoints["stream"]
64
66
  else:
65
- qna_endpoint = endpoints["invoke"]
67
+ qna_endpoint = post_endpoints["invoke"]
66
68
 
67
69
  if agent == "langserve" or agent_type == "langserve":
68
- qna_data = prepare_request_data(user_input, endpoints["input_schema"], vector_name, **kwargs)
70
+ get_endpoints = route_endpoint(override_endpoint=override_endpoint, method = 'get', config=config)
71
+ qna_data = prepare_request_data(user_input, get_endpoints["input_schema"], vector_name, **kwargs)
69
72
  else:
70
73
  # Base qna_data dictionary
71
74
  qna_data = {
@@ -97,6 +97,6 @@ def prepare_request_data(user_input, endpoint, vector_name, **kwargs):
97
97
 
98
98
  return request_data
99
99
  else:
100
- log.error("Invalid or no input schema available.")
100
+ log.error(f"Invalid or no input schema available for {endpoint=} {input_schema=}")
101
101
  return None
102
102
 
@@ -12,18 +12,24 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from ..logging import log
15
- from ..utils import load_config_key, load_config
15
+ from ..utils import load_config, ConfigManager
16
16
 
17
- def route_vac(vector_name: str) -> str :
17
+ def route_vac(vector_name: str=None, config=None) -> str :
18
18
  """
19
19
  Considers what VAC this vector_name belongs to
20
20
  """
21
- agent_url = load_config_key('agent_url', vector_name=vector_name, kind="vacConfig")
21
+ if not vector_name and not config:
22
+ raise ValueError("Must provide config or vector_name argument")
23
+
24
+ if not config:
25
+ config = ConfigManager(vector_name)
26
+
27
+ agent_url = config.vacConfig('agent_url')
22
28
  if agent_url:
23
29
  log.info('agent_url found in llm_config.yaml')
24
30
  return agent_url
25
31
 
26
- agent = load_config_key('agent', vector_name, kind="vacConfig")
32
+ agent = config.vacConfig('agent')
27
33
  log.info(f'agent_type: {agent}')
28
34
 
29
35
  agent_route, _ = load_config('config/cloud_run_urls.json')
@@ -37,15 +43,24 @@ def route_vac(vector_name: str) -> str :
37
43
  log.info(f'agent_url: {agent_url}')
38
44
  return agent_url
39
45
 
40
- def route_endpoint(vector_name, method = 'post', override_endpoint=None):
46
+ def route_endpoint(vector_name=None, method = 'post', override_endpoint=None, config=None):
47
+
48
+ if vector_name is None and config is None:
49
+ raise ValueError('vector_name and config can not both be None')
50
+
51
+ if config:
52
+ vector_name = config.vector_name
53
+
54
+ if not config:
55
+ config = ConfigManager(vector_name)
41
56
 
42
- agent_type = load_config_key('agent_type', vector_name, kind="vacConfig")
57
+ agent_type = config.vacConfig('agent_type')
43
58
  if not agent_type:
44
- agent_type = load_config_key('agent', vector_name, kind="vacConfig")
59
+ agent_type = config.vacConfig('agent')
45
60
 
46
- stem = route_vac(vector_name) if not override_endpoint else override_endpoint
61
+ stem = route_vac(config=config) if not override_endpoint else override_endpoint
47
62
 
48
- agents_config = load_config_key(agent_type, vector_name, kind="agentConfig")
63
+ agents_config = config.agentConfig(agent_type)
49
64
 
50
65
  log.info(f"agents_config: {agents_config}")
51
66
  if method not in agents_config:
@@ -1,19 +1,21 @@
1
1
  from ..agents import send_to_qa, handle_special_commands
2
2
  from ..streaming import generate_proxy_stream, can_agent_stream
3
3
  from ..utils.user_ids import generate_user_id
4
- from ..utils.config import load_config_key
4
+ from ..utils import ConfigManager
5
5
  from ..utils.api_key import has_multivac_api_key
6
6
  from ..logging import log
7
7
  from ..qna.parsers import parse_output
8
8
  from ..gcs.add_file import add_file_to_gcs
9
9
  from .run_proxy import clean_proxy_list, start_proxy, stop_proxy
10
+ from ..invoke import invoke_vac
11
+ from ..utils.big_context import has_text_extension, merge_text_files, load_gitignore_patterns, build_file_tree
12
+ import tempfile
10
13
 
11
14
  import uuid
12
15
  import os
13
16
  import sys
14
17
  import subprocess
15
18
  import json
16
- import requests
17
19
  from pathlib import Path
18
20
 
19
21
  from rich import print
@@ -24,13 +26,62 @@ from rich.panel import Panel
24
26
  from rich.text import Text
25
27
  from rich.table import Table
26
28
 
29
+ def read_and_add_to_user_input(user_input):
30
+ read_input = None
31
+
32
+ path = user_input.split(" ", 1)[1] if " " in user_input else None
33
+ if not path:
34
+ console.print("[bold red]Please provide a valid file or folder path.[/bold red]")
35
+ return None
36
+
37
+ if os.path.isfile(path):
38
+ if not has_text_extension(path):
39
+ console.print("[bold red]Unsupported file type. Please provide a text file or preprocess to text, or use !upload (e.g. images) or `sunholo embed`.[/bold red]")
40
+ return None
41
+
42
+ try:
43
+ with open(path, 'r', encoding='utf-8') as file:
44
+ file_content = file.read()
45
+ read_input = file_content
46
+ console.print(f"[bold yellow]File content from {path} read into user_input: [{len(read_input.split())}] words[/bold yellow]")
47
+ except FileNotFoundError:
48
+ console.print("[bold red]File not found. Please check the path and try again.[/bold red]")
49
+ return None
50
+ except IOError:
51
+ console.print("[bold red]File could not be read. Please ensure it is a readable text file.[/bold red]")
52
+ return None
53
+ elif os.path.isdir(path):
54
+ patterns = []
55
+ gitignore_path = os.path.join(path, '.gitignore')
56
+
57
+ if os.path.exists(gitignore_path):
58
+ patterns = load_gitignore_patterns(gitignore_path)
59
+
60
+ try:
61
+ with tempfile.NamedTemporaryFile(delete=False, mode='w+', encoding='utf-8') as temp_file:
62
+ temp_file_path = temp_file.name
63
+ file_tree = merge_text_files(path, temp_file_path, patterns)
64
+ console.print(f"[bold yellow]Contents of the folder '{path}' have been merged add added to input.[/bold yellow]")
65
+ console.print("\n".join(file_tree))
66
+ temp_file.seek(0)
67
+ read_input = temp_file.read()
68
+ console.print(f"[bold yellow]Total words: [{len(read_input.split())}] - watch out for high token costs! Use !clear_read to reset[/bold yellow]")
69
+ os.remove(temp_file_path) # Clean up the temporary file
70
+ except Exception as e:
71
+ console.print(f"[bold red]An error occurred while reading the folder: {str(e)}[/bold red]")
72
+ return None
73
+ else:
74
+ console.print("[bold red]The provided path is neither a file nor a folder. Please check the path and try again.[/bold red]")
75
+ return None
76
+
77
+ return read_input
27
78
 
28
79
  def get_service_url(vac_name, project, region, no_config=False):
29
80
 
30
81
  if no_config:
31
82
  agent_name = vac_name
32
83
  else:
33
- agent_name = load_config_key("agent", vac_name, kind="vacConfig")
84
+ agent_name = ConfigManager(vac_name).vacConfig("agent")
34
85
 
35
86
  proxies = clean_proxy_list()
36
87
  if agent_name in proxies:
@@ -50,7 +101,7 @@ def handle_file_upload(file, vector_name):
50
101
  if not Path(file).is_file():
51
102
  return None
52
103
 
53
- agent_name = load_config_key("agent", vector_name, kind="vacConfig")
104
+ agent_name = ConfigManager(vector_name).vacConfig("agent")
54
105
  # vertex can't handle directories
55
106
  bucket_filepath = f"{vector_name}/uploads/{os.path.basename(file)}" if agent_name != "vertex-genai" else os.path.basename(file)
56
107
 
@@ -65,7 +116,10 @@ def stream_chat_session(service_url, service_name, stream=True):
65
116
 
66
117
  user_id = generate_user_id()
67
118
  chat_history = []
68
- agent_name = load_config_key("agent", service_name, kind="vacConfig")
119
+ agent_name = ConfigManager(service_name).vacConfig("agent")
120
+ file_reply = None
121
+ read_file = None
122
+ read_file_count = None
69
123
  while True:
70
124
  session_id = str(uuid.uuid4())
71
125
  user_input = Prompt.ask("[bold cyan]You[/bold cyan]")
@@ -80,9 +134,26 @@ def stream_chat_session(service_url, service_name, stream=True):
80
134
 
81
135
  if special_reply:
82
136
  console.print(f"[bold yellow]{service_name}:[/bold yellow] {special_reply}", end='\n')
83
- continue
84
-
85
- if user_input.lower().startswith("upload"):
137
+ continue
138
+
139
+ if user_input.lower().startswith("!read"):
140
+ read_file = read_and_add_to_user_input(user_input)
141
+ if read_file:
142
+ read_file_count = len(read_file.split())
143
+ continue
144
+
145
+ if user_input.lower().startswith("!ls"):
146
+ items = os.listdir(os.getcwd())
147
+ for item in items:
148
+ console.print(item)
149
+ continue
150
+
151
+ if user_input.lower().startswith("!tree"):
152
+ tree = build_file_tree(os.getcwd(), patterns=[])
153
+ console.print(tree)
154
+ continue
155
+
156
+ if user_input.lower().startswith("!upload"):
86
157
  file_path = user_input.split(" ", 1)[1] if " " in user_input else None
87
158
  if not file_path:
88
159
  console.print("[bold red]Please provide a valid file path.[/bold red]")
@@ -94,7 +165,7 @@ def stream_chat_session(service_url, service_name, stream=True):
94
165
  console.print("[bold red]Invalid file upload[/bold red]")
95
166
  continue
96
167
 
97
- console.print(f"[bold yellow]{service_name}:[/bold yellow] Uploaded {file_path} to {file_reply} - image will be sent each reply until you issue 'clear_upload' ", end='\n')
168
+ console.print(f"[bold yellow]{service_name}:[/bold yellow] Uploaded {file_path} to {file_reply} - image will be sent each reply until you issue '!clear_upload' ", end='\n')
98
169
 
99
170
  except FileNotFoundError:
100
171
  console.print("[bold red]File not found. Please check the path and try again.[/bold red]")
@@ -102,10 +173,25 @@ def stream_chat_session(service_url, service_name, stream=True):
102
173
  # file_reply stays for each message from now on
103
174
  continue
104
175
 
105
- if user_input.lower().startswith("clear_upload"):
176
+ if user_input.lower().startswith("!clear_upload"):
106
177
  console.print("[bold yellow]File upload path cleared.[/bold yellow]")
107
178
  file_path = None
179
+ continue
180
+
181
+ if user_input.lower().startswith("!clear_read"):
182
+ console.print("[bold yellow]Read in file(s) cleared.[/bold yellow]")
183
+ read_file = None
184
+ read_file_count = None
185
+ continue
108
186
 
187
+ if read_file:
188
+ user_input = f"<user added file>{read_file}</user added file>\n{user_input}"
189
+
190
+ # guardrail
191
+ if len(user_input)> 1000000:
192
+ console.print("[bold red]Over 1 million characters in user_input, aborting as probably unintentional. Use API directly instead.[/bold red]")
193
+ continue
194
+
109
195
  if not stream:
110
196
  vac_response = send_to_qa(user_input,
111
197
  vector_name=service_name,
@@ -165,8 +251,15 @@ def stream_chat_session(service_url, service_name, stream=True):
165
251
  response_started = False
166
252
  vac_response = ""
167
253
 
168
- # point or star?
169
- with console.status(f"[bold orange]Thinking...{file_reply}[/bold orange]", spinner="star") as status:
254
+
255
+ thinking = "[bold orange]Thinking...[/bold orange]"
256
+ if file_reply:
257
+ thinking = f"[bold orange]Thinking with upload {file_reply} - issue !clear_upload to remove...[/bold orange]"
258
+
259
+ if read_file:
260
+ thinking = f"{thinking} - [bold orange]additional [{read_file_count}] words added via !read_file contents - issue !clear_read to remove[/bold orange]"
261
+
262
+ with console.status(thinking, spinner="star") as status:
170
263
  for token in stream_response():
171
264
  if not response_started:
172
265
  status.stop()
@@ -274,15 +367,19 @@ def resolve_service_url(args, no_config=False):
274
367
 
275
368
  return args.url_override
276
369
 
277
- agent_name = load_config_key("agent", args.vac_name, kind="vacConfig")
278
- agent_url = load_config_key("agent_url", args.vac_name, "vacConfig")
370
+ config = ConfigManager(args.vac_name)
371
+ global_config = ConfigManager("global")
372
+
373
+ agent_name = config.vacConfig("agent")
374
+ agent_url = config.vacConfig("agent_url")
375
+
279
376
  if agent_url:
280
377
  console.print("Found agent_url within vacConfig: {agent_url}")
281
378
 
282
379
  # via public cloud endpoints - assumes no gcloud auth
283
380
  if has_multivac_api_key():
284
381
  log.debug("Found MULTIVAC_API_KEY")
285
- gcp_config = load_config_key("gcp_config", "global", "vacConfig")
382
+ gcp_config = global_config.vacConfig("gcp_config")
286
383
  endpoints_base_url = gcp_config.get("endpoints_base_url")
287
384
  if not endpoints_base_url:
288
385
  console.print("[bold red]MULTIVAC_API_KEY env var is set but no config.gcp_config.endpoints_base_url can be found[/bold red]")
@@ -310,6 +407,8 @@ def resolve_service_url(args, no_config=False):
310
407
 
311
408
  def vac_command(args):
312
409
 
410
+ config = ConfigManager(args.vac_name)
411
+
313
412
  if args.action == 'list':
314
413
 
315
414
  list_cloud_run_services(args.project, args.region)
@@ -324,7 +423,7 @@ def vac_command(args):
324
423
 
325
424
  elif args.action == 'chat':
326
425
  service_url = resolve_service_url(args)
327
- agent_name = load_config_key("agent", args.vac_name, kind="vacConfig")
426
+ agent_name = config.vacConfig("agent")
328
427
 
329
428
  streamer = can_agent_stream(agent_name)
330
429
  log.debug(f"streamer: {streamer}")
@@ -334,9 +433,10 @@ def vac_command(args):
334
433
  if args.headless:
335
434
  headless_mode(service_url, args.vac_name, args.user_input, args.chat_history, stream=streamer)
336
435
  else:
337
- display_name = load_config_key("display_name", vector_name=args.vac_name, kind="vacConfig")
338
- description = load_config_key("description", vector_name=args.vac_name, kind="vacConfig")
339
- endpoints_config = load_config_key(agent_name, "dummy_value", kind="agentConfig")
436
+ display_name = config.vacConfig("display_name")
437
+ description = config.vacConfig("description")
438
+ endpoints_config = config.agentConfig(agent_name)
439
+
340
440
  post_endpoints = endpoints_config['post']
341
441
 
342
442
  display_endpoints = ' '.join(f"{key}: {value}" for key, value in post_endpoints.items())
@@ -362,54 +462,6 @@ def vac_command(args):
362
462
 
363
463
  invoke_vac(service_url, args.data, is_file=args.is_file)
364
464
 
365
- def invoke_vac(service_url, data, vector_name=None, metadata=None, is_file=False):
366
- try:
367
- if is_file:
368
- console.print("Uploading file...")
369
- # Handle file upload
370
- if not isinstance(data, Path) or not data.is_file():
371
- raise ValueError("For file uploads, 'data' must be a Path object pointing to a valid file.")
372
-
373
- files = {
374
- 'file': (data.name, open(data, 'rb')),
375
- }
376
- form_data = {
377
- 'vector_name': vector_name,
378
- 'metadata': json.dumps(metadata) if metadata else '',
379
- }
380
-
381
- response = requests.post(service_url, files=files, data=form_data)
382
- else:
383
- console.print("Uploading JSON...")
384
- try:
385
- if isinstance(data, dict):
386
- json_data = data
387
- else:
388
- json_data = json.loads(data)
389
- except json.JSONDecodeError as err:
390
- console.print(f"[bold red]ERROR: invalid JSON: {str(err)} [/bold red]")
391
- sys.exit(1)
392
- except Exception as err:
393
- console.print(f"[bold red]ERROR: could not parse JSON: {str(err)} [/bold red]")
394
- sys.exit(1)
395
-
396
- log.debug(f"Sending data: {data} or json_data: {json.dumps(json_data)}")
397
- # Handle JSON data
398
- headers = {"Content-Type": "application/json"}
399
- response = requests.post(service_url, headers=headers, data=json.dumps(json_data))
400
-
401
- response.raise_for_status()
402
-
403
- the_data = response.json()
404
- console.print(the_data)
405
-
406
- return the_data
407
-
408
- except requests.exceptions.RequestException as e:
409
- console.print(f"[bold red]ERROR: Failed to invoke VAC: {e}[/bold red]")
410
- except Exception as e:
411
- console.print(f"[bold red]ERROR: An unexpected error occurred: {e}[/bold red]")
412
-
413
465
 
414
466
  def list_cloud_run_services(project, region):
415
467
  """
@@ -10,7 +10,7 @@ from .chat_vac import setup_vac_subparser
10
10
  from .embedder import setup_embedder_subparser
11
11
  from .swagger import setup_swagger_subparser
12
12
 
13
- from ..utils.config import load_config_key
13
+ from ..utils import ConfigManager
14
14
 
15
15
  from ..logging import log
16
16
 
@@ -20,9 +20,9 @@ from rich.panel import Panel
20
20
 
21
21
  def load_default_gcp_config():
22
22
  try:
23
- gcp_config = load_config_key('gcp_config', 'global', kind="vacConfig")
23
+ gcp_config = ConfigManager("global").vacConfig("gcp_config")
24
24
  except FileNotFoundError as e:
25
- console.print(f"{e} - move config/ folder to working directory or set the _CONFIG_FOLDER environment variable to its location")
25
+ console.print(f"{e} - move config/ folder to working directory or set the VAC_CONFIG_FOLDER environment variable to its location")
26
26
  sys.exit(1)
27
27
 
28
28
  if gcp_config:
@@ -8,7 +8,8 @@ from pathlib import Path
8
8
  from .sun_rich import console
9
9
  from rich.progress import Progress
10
10
 
11
- from .chat_vac import resolve_service_url, invoke_vac
11
+ from ..invoke import invoke_vac
12
+ from .chat_vac import resolve_service_url
12
13
  from .run_proxy import stop_proxy
13
14
 
14
15
  def create_metadata(vac, metadata):
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  from ..logging import log
15
15
  from .vectorstore import pick_vectorstore
16
- from ..utils import load_config_key
16
+ from ..utils import load_config_key, ConfigManager
17
17
  from .llm import get_embeddings
18
18
  from ..utils.gcp_project import get_gcp_project
19
19
 
@@ -27,7 +27,7 @@ from langchain.retrievers import ContextualCompressionRetriever
27
27
 
28
28
 
29
29
  def load_memories(vector_name):
30
- memories = load_config_key("memory", vector_name, kind="vacConfig")
30
+ memories = ConfigManager(vector_name).vacConfig("memory")
31
31
  log.info(f"Found memory settings for {vector_name}: {memories}")
32
32
  if not memories or len(memories) == 0:
33
33
  log.info(f"No memory settings found for {vector_name}")
@@ -22,7 +22,7 @@ except ImportError:
22
22
  storage = None
23
23
 
24
24
  from ..logging import log
25
- from ..utils.config import load_config_key
25
+ from ..utils import load_config_key, ConfigManager
26
26
 
27
27
 
28
28
  def handle_base64_image(base64_data: str, vector_name: str, extension: str):
@@ -37,7 +37,8 @@ def handle_base64_image(base64_data: str, vector_name: str, extension: str):
37
37
  Returns:
38
38
  Tuple[str, str]: The URI of the uploaded image and the MIME type.
39
39
  """
40
- model = load_config_key("llm", vector_name, "vacConfig")
40
+
41
+ model = ConfigManager(vector_name).vacConfig("llm")
41
42
  if model.startswith("openai"): # pass it to gpt directly
42
43
  return base64_data, base64_data.split(",", 1)
43
44
 
@@ -69,16 +70,19 @@ def handle_base64_image(base64_data: str, vector_name: str, extension: str):
69
70
 
70
71
  def resolve_bucket(vector_name):
71
72
  if os.getenv('EXTENSIONS_BUCKET'):
73
+ log.warning('Resolving to EXTENSIONS_BUCKET environment variable')
72
74
  return os.getenv('EXTENSIONS_BUCKET')
73
75
 
74
- bucket_config = load_config_key("upload", vector_name, "vacConfig")
75
- if bucket_config:
76
- if bucket_config.get("buckets"):
77
- bucket_name = bucket_config.get("buckets").get("all")
78
- else:
79
- bucket_name = os.getenv('GCS_BUCKET')
80
- if not bucket_name:
81
- raise ValueError("No bucket found to upload to: GCS_BUCKET returned None")
76
+ if vector_name:
77
+ bucket_config = ConfigManager(vector_name).vacConfig("upload")
78
+
79
+ if bucket_config:
80
+ if bucket_config.get("buckets"):
81
+ bucket_name = bucket_config.get("buckets").get("all")
82
+
83
+ bucket_name = bucket_name or os.getenv('GCS_BUCKET')
84
+ if not bucket_name:
85
+ raise ValueError("No bucket found to upload to: GCS_BUCKET returned None")
82
86
 
83
87
  if bucket_name.startswith("gs://"):
84
88
  bucket_name = bucket_name.removeprefix("gs://")
@@ -86,7 +90,7 @@ def resolve_bucket(vector_name):
86
90
  return bucket_name
87
91
 
88
92
  def add_file_to_gcs(filename: str,
89
- vector_name:str,
93
+ vector_name:str=None,
90
94
  bucket_name: str=None,
91
95
  metadata:dict=None,
92
96
  bucket_filepath:str=None):
@@ -114,7 +118,11 @@ def add_file_to_gcs(filename: str,
114
118
  if os.getenv('EXTENSIONS_BUCKET'):
115
119
  bucket_filepath = os.path.basename(filename)
116
120
 
121
+ if vector_name is None:
122
+ vector_name = "global"
123
+
117
124
  if not bucket_filepath:
125
+
118
126
  bucket_filepath = f"{vector_name}/{year}/{month}/{day}/{hour}/{os.path.basename(filename)}"
119
127
  bucket_filepath_prev = f"{vector_name}/{year}/{month}/{day}/{hour_prev}/{os.path.basename(filename)}"
120
128
 
@@ -0,0 +1 @@
1
+ from .invoke_vac_utils import invoke_vac, invoke_vac_qa