sunholo 0.80.4__tar.gz → 0.80.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {sunholo-0.80.4 → sunholo-0.80.6}/PKG-INFO +2 -2
  2. {sunholo-0.80.4 → sunholo-0.80.6}/setup.py +1 -1
  3. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/process_chunker_data.py +4 -1
  4. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/components/retriever.py +4 -0
  5. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/discovery_engine/__init__.py +1 -0
  6. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/discovery_engine/chunker_handler.py +34 -16
  7. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/discovery_engine/create_new.py +4 -4
  8. sunholo-0.80.6/sunholo/discovery_engine/get_ai_search_chunks.py +53 -0
  9. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/gcs/add_file.py +3 -2
  10. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo.egg-info/PKG-INFO +2 -2
  11. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo.egg-info/SOURCES.txt +1 -0
  12. {sunholo-0.80.4 → sunholo-0.80.6}/LICENSE.txt +0 -0
  13. {sunholo-0.80.4 → sunholo-0.80.6}/MANIFEST.in +0 -0
  14. {sunholo-0.80.4 → sunholo-0.80.6}/README.md +0 -0
  15. {sunholo-0.80.4 → sunholo-0.80.6}/setup.cfg +0 -0
  16. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/__init__.py +0 -0
  17. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/__init__.py +0 -0
  18. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/chat_history.py +0 -0
  19. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/dispatch_to_qa.py +0 -0
  20. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/fastapi/__init__.py +0 -0
  21. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/fastapi/base.py +0 -0
  22. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/fastapi/qna_routes.py +0 -0
  23. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/flask/__init__.py +0 -0
  24. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/flask/base.py +0 -0
  25. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/flask/qna_routes.py +0 -0
  26. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/flask/vac_routes.py +0 -0
  27. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/langserve.py +0 -0
  28. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/pubsub.py +0 -0
  29. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/route.py +0 -0
  30. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/special_commands.py +0 -0
  31. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/agents/swagger.py +0 -0
  32. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/archive/__init__.py +0 -0
  33. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/archive/archive.py +0 -0
  34. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/auth/__init__.py +0 -0
  35. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/auth/gcloud.py +0 -0
  36. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/auth/refresh.py +0 -0
  37. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/auth/run.py +0 -0
  38. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/azure/__init__.py +0 -0
  39. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/azure/auth.py +0 -0
  40. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/azure/blobs.py +0 -0
  41. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/azure/event_grid.py +0 -0
  42. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/bots/__init__.py +0 -0
  43. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/bots/discord.py +0 -0
  44. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/bots/github_webhook.py +0 -0
  45. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/bots/webapp.py +0 -0
  46. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/__init__.py +0 -0
  47. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/azure.py +0 -0
  48. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/doc_handling.py +0 -0
  49. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/encode_metadata.py +0 -0
  50. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/images.py +0 -0
  51. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/loaders.py +0 -0
  52. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/message_data.py +0 -0
  53. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/pdfs.py +0 -0
  54. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/publish.py +0 -0
  55. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/pubsub.py +0 -0
  56. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/chunker/splitter.py +0 -0
  57. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/__init__.py +0 -0
  58. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/chat_vac.py +0 -0
  59. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/cli.py +0 -0
  60. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/cli_init.py +0 -0
  61. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/configs.py +0 -0
  62. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/deploy.py +0 -0
  63. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/embedder.py +0 -0
  64. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/merge_texts.py +0 -0
  65. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/run_proxy.py +0 -0
  66. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/sun_rich.py +0 -0
  67. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/swagger.py +0 -0
  68. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/cli/vertex.py +0 -0
  69. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/components/__init__.py +0 -0
  70. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/components/llm.py +0 -0
  71. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/components/vectorstore.py +0 -0
  72. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/custom_logging.py +0 -0
  73. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/__init__.py +0 -0
  74. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/alloydb.py +0 -0
  75. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/alloydb_client.py +0 -0
  76. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/database.py +0 -0
  77. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/lancedb.py +0 -0
  78. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/sql/sb/create_function.sql +0 -0
  79. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  80. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/sql/sb/create_table.sql +0 -0
  81. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  82. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/sql/sb/return_sources.sql +0 -0
  83. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/sql/sb/setup.sql +0 -0
  84. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/static_dbs.py +0 -0
  85. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/database/uuid.py +0 -0
  86. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/discovery_engine/discovery_engine_client.py +0 -0
  87. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/embedder/__init__.py +0 -0
  88. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/embedder/embed_chunk.py +0 -0
  89. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/gcs/__init__.py +0 -0
  90. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/gcs/download_folder.py +0 -0
  91. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/gcs/download_url.py +0 -0
  92. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/gcs/metadata.py +0 -0
  93. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/invoke/__init__.py +0 -0
  94. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/invoke/direct_vac_func.py +0 -0
  95. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/invoke/invoke_vac_utils.py +0 -0
  96. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/langfuse/__init__.py +0 -0
  97. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/langfuse/callback.py +0 -0
  98. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/langfuse/prompts.py +0 -0
  99. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/llamaindex/__init__.py +0 -0
  100. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/llamaindex/get_files.py +0 -0
  101. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/llamaindex/import_files.py +0 -0
  102. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/llamaindex/llamaindex_class.py +0 -0
  103. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/llamaindex/user_history.py +0 -0
  104. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/lookup/__init__.py +0 -0
  105. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/lookup/model_lookup.yaml +0 -0
  106. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/patches/__init__.py +0 -0
  107. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/patches/langchain/__init__.py +0 -0
  108. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/patches/langchain/lancedb.py +0 -0
  109. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/patches/langchain/vertexai.py +0 -0
  110. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/pubsub/__init__.py +0 -0
  111. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/pubsub/process_pubsub.py +0 -0
  112. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/pubsub/pubsub_manager.py +0 -0
  113. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/qna/__init__.py +0 -0
  114. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/qna/parsers.py +0 -0
  115. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/qna/retry.py +0 -0
  116. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/streaming/__init__.py +0 -0
  117. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/streaming/content_buffer.py +0 -0
  118. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/streaming/langserve.py +0 -0
  119. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/streaming/stream_lookup.py +0 -0
  120. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/streaming/streaming.py +0 -0
  121. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/summarise/__init__.py +0 -0
  122. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/summarise/summarise.py +0 -0
  123. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/tools/__init__.py +0 -0
  124. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/tools/web_browser.py +0 -0
  125. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/__init__.py +0 -0
  126. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/api_key.py +0 -0
  127. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/big_context.py +0 -0
  128. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/config.py +0 -0
  129. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/config_class.py +0 -0
  130. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/config_schema.py +0 -0
  131. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/gcp.py +0 -0
  132. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/gcp_project.py +0 -0
  133. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/parsers.py +0 -0
  134. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/timedelta.py +0 -0
  135. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/user_ids.py +0 -0
  136. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/utils/version.py +0 -0
  137. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/vertex/__init__.py +0 -0
  138. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/vertex/extensions_call.py +0 -0
  139. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/vertex/extensions_class.py +0 -0
  140. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/vertex/genai_functions.py +0 -0
  141. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/vertex/init.py +0 -0
  142. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/vertex/memory_tools.py +0 -0
  143. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/vertex/safety.py +0 -0
  144. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo/vertex/type_dict_to_json.py +0 -0
  145. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo.egg-info/dependency_links.txt +0 -0
  146. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo.egg-info/entry_points.txt +0 -0
  147. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo.egg-info/requires.txt +0 -0
  148. {sunholo-0.80.4 → sunholo-0.80.6}/sunholo.egg-info/top_level.txt +0 -0
  149. {sunholo-0.80.4 → sunholo-0.80.6}/tests/test_chat_history.py +0 -0
  150. {sunholo-0.80.4 → sunholo-0.80.6}/tests/test_config.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.80.4
3
+ Version: 0.80.6
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.80.4.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.80.6.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -1,7 +1,7 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
3
  # Define your base version
4
- version = '0.80.4'
4
+ version = '0.80.6'
5
5
 
6
6
  setup(
7
7
  name='sunholo',
@@ -15,6 +15,7 @@ from ..discovery_engine.chunker_handler import discovery_engine_chunker_check
15
15
  from .publish import process_docs_chunks_vector_name
16
16
  from .splitter import chunk_doc_to_docs
17
17
  from ..azure.blobs import is_azure_blob
18
+ from ..utils import ConfigManager
18
19
 
19
20
  from ..custom_logging import log
20
21
 
@@ -29,13 +30,15 @@ def process_chunker_data(message_data, metadata, vector_name):
29
30
 
30
31
  log.debug(f"Found metadata in pubsub: {metadata=}")
31
32
 
33
+ config=ConfigManager(vector_name)
34
+
32
35
  # checks if only a llamaindex chunking/embedder, return early as no other processing needed
33
36
  llamacheck = llamaindex_chunker_check(message_data, metadata, vector_name)
34
37
  if llamacheck:
35
38
  return llamacheck
36
39
 
37
40
  # if only a discovery engine memory, return early as no other processing needed
38
- discovery_check = discovery_engine_chunker_check(message_data, metadata, vector_name)
41
+ discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config)
39
42
  if discovery_check:
40
43
  return discovery_check
41
44
 
@@ -60,6 +60,9 @@ def pick_retriever(vector_name:str=None, config:ConfigManager=None, embeddings=N
60
60
 
61
61
  if vectorstore == "vertex_ai_search" or vectorstore == "discovery_engine":
62
62
  # use direct retriever
63
+ if value.get('chunks'):
64
+ log.warning(f"{config.vector_name} will not be using GoogleVertexAISearchRetriever with chunks vertex AI search as not supported yet")
65
+ continue
63
66
  from langchain.retrievers import GoogleVertexAISearchRetriever
64
67
  gcp_config = config.vacConfig('gcp_config')
65
68
  try:
@@ -76,6 +79,7 @@ def pick_retriever(vector_name:str=None, config:ConfigManager=None, embeddings=N
76
79
  continue
77
80
 
78
81
  retriever_list.append(gcp_retriever)
82
+ continue
79
83
 
80
84
  from_metadata_id = value.get('from_metadata_id')
81
85
  if from_metadata_id:
@@ -1 +1,2 @@
1
1
  from .discovery_engine_client import DiscoveryEngineClient
2
+ from .get_ai_search_chunks import get_all_chunks
@@ -1,5 +1,5 @@
1
1
  from ..custom_logging import log
2
- from ..utils.config import load_config_key
2
+ from ..utils import ConfigManager
3
3
  from ..utils.gcp_project import get_gcp_project
4
4
  from ..components import load_memories
5
5
 
@@ -7,7 +7,7 @@ from .discovery_engine_client import DiscoveryEngineClient
7
7
  from .create_new import create_new_discovery_engine
8
8
 
9
9
 
10
- def do_discovery_engine(message_data, metadata, vector_name):
10
+ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=None):
11
11
  """
12
12
 
13
13
  Example:
@@ -15,13 +15,13 @@ def do_discovery_engine(message_data, metadata, vector_name):
15
15
  message_data = "gs://bucket_name/path_to_file.txt"
16
16
  metadata = {"user": "admin"}
17
17
  vector_name = "example_vector"
18
- response = do_discovery_engine(message_data, metadata, vector_name)
18
+ response = do_discovery_engine(message_data, metadata, config=config)
19
19
  print(response)
20
20
  # Imported file to corpus: {'status': 'success'}
21
21
  ```
22
22
  """
23
23
 
24
- memories = load_memories(vector_name)
24
+ memories = load_memories(config=config)
25
25
  tools = []
26
26
 
27
27
  if not memories:
@@ -38,7 +38,7 @@ def do_discovery_engine(message_data, metadata, vector_name):
38
38
  continue
39
39
  #location = gcp_config.get('location')
40
40
  corpus = DiscoveryEngineClient(
41
- data_store_id=vector_name,
41
+ data_store_id=config.vector_name,
42
42
  project_id=get_gcp_project(),
43
43
  # location needs to be 'eu' or 'us' which doesn't work with other configurations
44
44
  #location=location or global_location
@@ -65,14 +65,14 @@ def do_discovery_engine(message_data, metadata, vector_name):
65
65
  log.error(f"Error importing {message_data} - {corp=} - {str(err)}")
66
66
 
67
67
  if str(err).startswith("404"):
68
- log.info(f"Attempting to create a new DiscoveryEngine corpus: {vector_name}")
68
+ log.info(f"Attempting to create a new DiscoveryEngine corpus: {config.vector_name}")
69
69
  try:
70
- new_corp = create_new_discovery_engine(vector_name)
70
+ new_corp = create_new_discovery_engine(config)
71
71
  except Exception as err:
72
- log.error(f"Failed to create new DiscoveryEngine {vector_name} - {str(err)}")
72
+ log.error(f"Failed to create new DiscoveryEngine {config.vector_name} - {str(err)}")
73
73
  continue
74
74
  if new_corp:
75
- log.info(f"Found new DiscoveryEngine {vector_name=} - {new_corp=}")
75
+ log.info(f"Found new DiscoveryEngine {config.vector_name=} - {new_corp=}")
76
76
  response = corp.import_documents(
77
77
  gcs_uri=message_data
78
78
  )
@@ -86,8 +86,9 @@ def do_discovery_engine(message_data, metadata, vector_name):
86
86
  log.warning("Only gs:// data is supported for Discovery Engine")
87
87
 
88
88
 
89
- def check_discovery_engine_in_memory(vector_name):
90
- memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
89
+ def check_discovery_engine_in_memory(config:ConfigManager):
90
+ memories = config.vacConfig("memory")
91
+
91
92
  for memory in memories: # Iterate over the list
92
93
  for key, value in memory.items(): # Now iterate over the dictionary
93
94
  log.info(f"Found memory {key}")
@@ -99,16 +100,33 @@ def check_discovery_engine_in_memory(vector_name):
99
100
 
100
101
  return False
101
102
 
102
- def discovery_engine_chunker_check(message_data, metadata, vector_name):
103
+ def check_write_memories(config:ConfigManager):
104
+ write_mem = []
105
+ memories = config.vacConfig("memory")
106
+ for memory in memories:
107
+ for key, value in memory.items():
108
+ if value.get('read_only'):
109
+ continue
110
+ write_mem.append(memory)
111
+
112
+ return write_mem
113
+
114
+ def discovery_engine_chunker_check(message_data, metadata, vector_name:str=None, config:ConfigManager=None):
115
+
116
+ if config is None:
117
+ if vector_name is None:
118
+ raise ValueError("Must provide config or vector_name")
119
+ config = ConfigManager(vector_name=vector_name)
120
+
103
121
  # discovery engine handles its own chunking/embedding
104
- memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
122
+ memories = config.vacConfig("memory")
105
123
  if not memories:
106
124
  return None
107
125
 
108
- total_memories = len(memories)
126
+ total_memories = len(check_write_memories(config))
109
127
  llama = None
110
- if check_discovery_engine_in_memory(vector_name):
111
- llama = do_discovery_engine(message_data, metadata, vector_name)
128
+ if check_discovery_engine_in_memory(config):
129
+ llama = do_discovery_engine(message_data, metadata, config=config)
112
130
  log.info(f"Processed discovery engine: {llama}")
113
131
 
114
132
  # If discovery engine is the only entry, return
@@ -1,10 +1,10 @@
1
1
  from .discovery_engine_client import DiscoveryEngineClient
2
- from ..utils.config import load_config_key
2
+ from ..utils import ConfigManager
3
3
  from ..utils.gcp_project import get_gcp_project
4
4
 
5
- def create_new_discovery_engine(vector_name):
5
+ def create_new_discovery_engine(config:ConfigManager):
6
6
 
7
- chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
7
+ chunker_config = config.vacConfig("chunker")
8
8
 
9
9
  chunk_size = 500
10
10
  if chunker_config:
@@ -18,7 +18,7 @@ def create_new_discovery_engine(vector_name):
18
18
  #location = gcp_config.get('location')
19
19
 
20
20
  de = DiscoveryEngineClient(
21
- data_store_id=vector_name,
21
+ data_store_id=config.vector_name,
22
22
  project_id=project_id,
23
23
  # location needs to be 'eu' or 'us' which doesn't work with other configurations
24
24
  #location=location
@@ -0,0 +1,53 @@
1
+ from ..utils import ConfigManager
2
+ from ..utils.gcp_project import get_gcp_project
3
+ from ..custom_logging import log
4
+ from .discovery_engine_client import DiscoveryEngineClient
5
+ from ..components import load_memories
6
+
7
+ def get_all_chunks(question:str, config:ConfigManager):
8
+ """
9
+ Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
10
+
11
+ args: question - question to search similarity for
12
+ config: A ConfigManager object
13
+
14
+ returns: a big string of chunks
15
+ """
16
+ memories = load_memories(config=config)
17
+ chunks = []
18
+
19
+ if not memories:
20
+ return None
21
+
22
+ vector_name = config.vector_name
23
+ for memory in memories:
24
+ for key, value in memory.items(): # Now iterate over the dictionary
25
+ log.info(f"Found memory {key}")
26
+ vectorstore = value.get('vectorstore')
27
+ if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
28
+ if value.get('read_only'):
29
+ new_vector_name = value.get('vector_name')
30
+ if not new_vector_name:
31
+ log.warning("read_only specified but no new vector_name to read from")
32
+ vector_name = new_vector_name
33
+
34
+ num_chunks = value.get('num_chunks') or 3
35
+
36
+ chunk = get_chunks(question, vector_name, num_chunks)
37
+ if chunk:
38
+ chunks.append(chunk)
39
+ if chunks:
40
+ return "\n".join(chunks)
41
+
42
+ log.warning(f"No chunks found for {vector_name}")
43
+ return None
44
+
45
+ def get_chunks(question, vector_name, num_chunks):
46
+ de = DiscoveryEngineClient(vector_name, project_id=get_gcp_project())
47
+ try:
48
+ return de.get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks)
49
+ except Exception as err:
50
+ log.error(f"No discovery engine chunks found: {str(err)}")
51
+
52
+
53
+
@@ -118,12 +118,13 @@ def add_file_to_gcs(filename: str,
118
118
  if os.getenv('EXTENSIONS_BUCKET'):
119
119
  bucket_filepath = os.path.basename(filename)
120
120
 
121
- if vector_name is None:
122
- vector_name = "global"
121
+ if not vector_name:
122
+ vector_name = "global"
123
123
 
124
124
  if not bucket_filepath:
125
125
 
126
126
  bucket_filepath = f"{vector_name}/{year}/{month}/{day}/{hour}/{os.path.basename(filename)}"
127
+
127
128
  bucket_filepath_prev = f"{vector_name}/{year}/{month}/{day}/{hour_prev}/{os.path.basename(filename)}"
128
129
 
129
130
  blob = bucket.blob(bucket_filepath)
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.80.4
3
+ Version: 0.80.6
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.80.4.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.80.6.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -85,6 +85,7 @@ sunholo/discovery_engine/__init__.py
85
85
  sunholo/discovery_engine/chunker_handler.py
86
86
  sunholo/discovery_engine/create_new.py
87
87
  sunholo/discovery_engine/discovery_engine_client.py
88
+ sunholo/discovery_engine/get_ai_search_chunks.py
88
89
  sunholo/embedder/__init__.py
89
90
  sunholo/embedder/embed_chunk.py
90
91
  sunholo/gcs/__init__.py
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes