sunholo 0.80.5__tar.gz → 0.81.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. {sunholo-0.80.5 → sunholo-0.81.0}/PKG-INFO +2 -2
  2. sunholo-0.81.0/setup.cfg +16 -0
  3. {sunholo-0.80.5 → sunholo-0.81.0}/setup.py +1 -1
  4. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/process_chunker_data.py +4 -1
  5. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/components/retriever.py +1 -0
  6. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/__init__.py +1 -0
  7. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/alloydb.py +7 -2
  8. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/alloydb_client.py +99 -5
  9. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/discovery_engine/__init__.py +1 -0
  10. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/discovery_engine/chunker_handler.py +10 -10
  11. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/discovery_engine/create_new.py +4 -4
  12. sunholo-0.81.0/sunholo/discovery_engine/get_ai_search_chunks.py +53 -0
  13. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/config_class.py +2 -2
  14. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/gcp_project.py +1 -0
  15. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo.egg-info/PKG-INFO +2 -2
  16. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo.egg-info/SOURCES.txt +1 -0
  17. sunholo-0.80.5/setup.cfg +0 -7
  18. {sunholo-0.80.5 → sunholo-0.81.0}/LICENSE.txt +0 -0
  19. {sunholo-0.80.5 → sunholo-0.81.0}/MANIFEST.in +0 -0
  20. {sunholo-0.80.5 → sunholo-0.81.0}/README.md +0 -0
  21. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/__init__.py +0 -0
  22. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/__init__.py +0 -0
  23. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/chat_history.py +0 -0
  24. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/dispatch_to_qa.py +0 -0
  25. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/fastapi/__init__.py +0 -0
  26. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/fastapi/base.py +0 -0
  27. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/fastapi/qna_routes.py +0 -0
  28. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/flask/__init__.py +0 -0
  29. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/flask/base.py +0 -0
  30. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/flask/qna_routes.py +0 -0
  31. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/flask/vac_routes.py +0 -0
  32. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/langserve.py +0 -0
  33. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/pubsub.py +0 -0
  34. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/route.py +0 -0
  35. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/special_commands.py +0 -0
  36. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/agents/swagger.py +0 -0
  37. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/archive/__init__.py +0 -0
  38. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/archive/archive.py +0 -0
  39. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/auth/__init__.py +0 -0
  40. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/auth/gcloud.py +0 -0
  41. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/auth/refresh.py +0 -0
  42. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/auth/run.py +0 -0
  43. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/azure/__init__.py +0 -0
  44. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/azure/auth.py +0 -0
  45. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/azure/blobs.py +0 -0
  46. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/azure/event_grid.py +0 -0
  47. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/bots/__init__.py +0 -0
  48. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/bots/discord.py +0 -0
  49. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/bots/github_webhook.py +0 -0
  50. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/bots/webapp.py +0 -0
  51. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/__init__.py +0 -0
  52. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/azure.py +0 -0
  53. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/doc_handling.py +0 -0
  54. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/encode_metadata.py +0 -0
  55. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/images.py +0 -0
  56. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/loaders.py +0 -0
  57. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/message_data.py +0 -0
  58. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/pdfs.py +0 -0
  59. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/publish.py +0 -0
  60. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/pubsub.py +0 -0
  61. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/chunker/splitter.py +0 -0
  62. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/__init__.py +0 -0
  63. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/chat_vac.py +0 -0
  64. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/cli.py +0 -0
  65. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/cli_init.py +0 -0
  66. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/configs.py +0 -0
  67. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/deploy.py +0 -0
  68. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/embedder.py +0 -0
  69. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/merge_texts.py +0 -0
  70. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/run_proxy.py +0 -0
  71. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/sun_rich.py +0 -0
  72. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/swagger.py +0 -0
  73. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/cli/vertex.py +0 -0
  74. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/components/__init__.py +0 -0
  75. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/components/llm.py +0 -0
  76. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/components/vectorstore.py +0 -0
  77. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/custom_logging.py +0 -0
  78. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/database.py +0 -0
  79. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/lancedb.py +0 -0
  80. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/sql/sb/create_function.sql +0 -0
  81. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  82. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/sql/sb/create_table.sql +0 -0
  83. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  84. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/sql/sb/return_sources.sql +0 -0
  85. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/sql/sb/setup.sql +0 -0
  86. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/static_dbs.py +0 -0
  87. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/database/uuid.py +0 -0
  88. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/discovery_engine/discovery_engine_client.py +0 -0
  89. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/embedder/__init__.py +0 -0
  90. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/embedder/embed_chunk.py +0 -0
  91. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/gcs/__init__.py +0 -0
  92. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/gcs/add_file.py +0 -0
  93. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/gcs/download_folder.py +0 -0
  94. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/gcs/download_url.py +0 -0
  95. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/gcs/metadata.py +0 -0
  96. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/invoke/__init__.py +0 -0
  97. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/invoke/direct_vac_func.py +0 -0
  98. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/invoke/invoke_vac_utils.py +0 -0
  99. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/langfuse/__init__.py +0 -0
  100. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/langfuse/callback.py +0 -0
  101. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/langfuse/prompts.py +0 -0
  102. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/llamaindex/__init__.py +0 -0
  103. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/llamaindex/get_files.py +0 -0
  104. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/llamaindex/import_files.py +0 -0
  105. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/llamaindex/llamaindex_class.py +0 -0
  106. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/llamaindex/user_history.py +0 -0
  107. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/lookup/__init__.py +0 -0
  108. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/lookup/model_lookup.yaml +0 -0
  109. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/patches/__init__.py +0 -0
  110. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/patches/langchain/__init__.py +0 -0
  111. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/patches/langchain/lancedb.py +0 -0
  112. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/patches/langchain/vertexai.py +0 -0
  113. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/pubsub/__init__.py +0 -0
  114. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/pubsub/process_pubsub.py +0 -0
  115. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/pubsub/pubsub_manager.py +0 -0
  116. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/qna/__init__.py +0 -0
  117. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/qna/parsers.py +0 -0
  118. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/qna/retry.py +0 -0
  119. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/streaming/__init__.py +0 -0
  120. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/streaming/content_buffer.py +0 -0
  121. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/streaming/langserve.py +0 -0
  122. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/streaming/stream_lookup.py +0 -0
  123. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/streaming/streaming.py +0 -0
  124. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/summarise/__init__.py +0 -0
  125. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/summarise/summarise.py +0 -0
  126. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/tools/__init__.py +0 -0
  127. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/tools/web_browser.py +0 -0
  128. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/__init__.py +0 -0
  129. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/api_key.py +0 -0
  130. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/big_context.py +0 -0
  131. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/config.py +0 -0
  132. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/config_schema.py +0 -0
  133. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/gcp.py +0 -0
  134. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/parsers.py +0 -0
  135. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/timedelta.py +0 -0
  136. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/user_ids.py +0 -0
  137. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/utils/version.py +0 -0
  138. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/vertex/__init__.py +0 -0
  139. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/vertex/extensions_call.py +0 -0
  140. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/vertex/extensions_class.py +0 -0
  141. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/vertex/genai_functions.py +0 -0
  142. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/vertex/init.py +0 -0
  143. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/vertex/memory_tools.py +0 -0
  144. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/vertex/safety.py +0 -0
  145. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo/vertex/type_dict_to_json.py +0 -0
  146. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo.egg-info/dependency_links.txt +0 -0
  147. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo.egg-info/entry_points.txt +0 -0
  148. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo.egg-info/requires.txt +0 -0
  149. {sunholo-0.80.5 → sunholo-0.81.0}/sunholo.egg-info/top_level.txt +0 -0
  150. {sunholo-0.80.5 → sunholo-0.81.0}/tests/test_chat_history.py +0 -0
  151. {sunholo-0.80.5 → sunholo-0.81.0}/tests/test_config.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.80.5
3
+ Version: 0.81.0
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.80.5.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.81.0.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -0,0 +1,16 @@
1
+ [metadata]
2
+ description-file = README.md
3
+
4
+ [mypy]
5
+ files = sunholo, tests
6
+ python_version = 3.9
7
+ disallow_untyped_calls = True
8
+ warn_return_any = True
9
+ warn_unused_ignores = True
10
+ strict_optional = True
11
+ ignore_missing_imports = True
12
+
13
+ [egg_info]
14
+ tag_build =
15
+ tag_date = 0
16
+
@@ -1,7 +1,7 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
3
  # Define your base version
4
- version = '0.80.5'
4
+ version = '0.81.0'
5
5
 
6
6
  setup(
7
7
  name='sunholo',
@@ -15,6 +15,7 @@ from ..discovery_engine.chunker_handler import discovery_engine_chunker_check
15
15
  from .publish import process_docs_chunks_vector_name
16
16
  from .splitter import chunk_doc_to_docs
17
17
  from ..azure.blobs import is_azure_blob
18
+ from ..utils import ConfigManager
18
19
 
19
20
  from ..custom_logging import log
20
21
 
@@ -29,13 +30,15 @@ def process_chunker_data(message_data, metadata, vector_name):
29
30
 
30
31
  log.debug(f"Found metadata in pubsub: {metadata=}")
31
32
 
33
+ config=ConfigManager(vector_name)
34
+
32
35
  # checks if only a llamaindex chunking/embedder, return early as no other processing needed
33
36
  llamacheck = llamaindex_chunker_check(message_data, metadata, vector_name)
34
37
  if llamacheck:
35
38
  return llamacheck
36
39
 
37
40
  # if only a discovery engine memory, return early as no other processing needed
38
- discovery_check = discovery_engine_chunker_check(message_data, metadata, vector_name)
41
+ discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config)
39
42
  if discovery_check:
40
43
  return discovery_check
41
44
 
@@ -170,4 +170,5 @@ def process_retrieval(retriever_list: list, config: ConfigManager):
170
170
  base_compressor=pipeline, base_retriever=lotr,
171
171
  k=k_override)
172
172
 
173
+ log.info(f"Returning Langchain retrieval object: {retriever}")
173
174
  return retriever
@@ -4,3 +4,4 @@ from .database import setup_database
4
4
  from .database import return_sources_last24
5
5
  from .database import delete_row_from_source
6
6
  from .static_dbs import get_db_directory
7
+ from .alloydb_client import AlloyDBClient
@@ -14,6 +14,8 @@ from .alloydb_client import AlloyDBClient
14
14
  from ..custom_logging import log
15
15
  from ..utils.config import load_config_key
16
16
 
17
+ from typing import List
18
+
17
19
 
18
20
  def create_alloydb_engine(vector_name):
19
21
 
@@ -231,8 +233,11 @@ async def load_alloydb_sql_async(sql, vector_name):
231
233
 
232
234
  return documents
233
235
 
234
- def and_or_ilike(sources, search_type="OR", operator="ILIKE"):
235
- unique_sources = set(sources.split())
236
+ def and_or_ilike(sources:List[str], search_type:str="OR", operator:str="ILIKE"):
237
+ if not isinstance(sources, list) or not all(isinstance(source, str) for source in sources):
238
+ raise TypeError("The `sources` argument must be a list of strings.")
239
+
240
+ unique_sources = set(sources)
236
241
  # Choose the delimiter based on the search_type argument
237
242
  delimiter = ' AND ' if search_type.upper() == "AND" else ' OR '
238
243
 
@@ -1,3 +1,4 @@
1
+ import os
1
2
  try:
2
3
  import pg8000
3
4
  import sqlalchemy
@@ -9,6 +10,7 @@ except ImportError:
9
10
 
10
11
  from .database import get_vector_size
11
12
  from ..custom_logging import log
13
+ from ..utils import ConfigManager
12
14
 
13
15
  class AlloyDBClient:
14
16
  """
@@ -35,11 +37,12 @@ class AlloyDBClient:
35
37
  """
36
38
 
37
39
  def __init__(self,
38
- project_id: str,
39
- region: str,
40
- cluster_name:str,
41
- instance_name:str,
42
- user:str,
40
+ config:ConfigManager,
41
+ project_id: str=None,
42
+ region: str=None,
43
+ cluster_name:str=None,
44
+ instance_name:str=None,
45
+ user:str=None,
43
46
  password=None,
44
47
  db="postgres"):
45
48
  """Initializes the AlloyDB client.
@@ -51,6 +54,23 @@ class AlloyDBClient:
51
54
  - password (str): The database user's password.
52
55
  - db_name (str): The name of the database.
53
56
  """
57
+ if config is None:
58
+ if project_id is None or region is None or cluster_name is None or instance_name is None:
59
+ raise ValueError("Must specify config or project_id, region, cluster_name, instance_name")
60
+ if config:
61
+ alloydb_config = config.vacConfig("alloydb_config")
62
+ if not alloydb_config:
63
+ raise ValueError("Must specify vac.alloydb_config")
64
+ project_id = alloydb_config["project_id"]
65
+ region = alloydb_config["region"]
66
+ cluster_name = alloydb_config["cluster"]
67
+ instance_name = alloydb_config["instance"]
68
+
69
+ ALLOYDB_DB = os.environ.get("ALLOYDB_DB")
70
+ if ALLOYDB_DB is None and alloydb_config.get("database") is None:
71
+ log.warning("Could not locate ALLOYDB_DB environment variable or 'alloydb_config.database'")
72
+
73
+ self.database = alloydb_config.get("database") or ALLOYDB_DB,
54
74
  self.connector = Connector()
55
75
  self.inst_uri = self._build_instance_uri(project_id, region, cluster_name, instance_name)
56
76
  self.engine = self._create_engine(self.inst_uri, user, password, db)
@@ -100,6 +120,80 @@ class AlloyDBClient:
100
120
  conn.close()
101
121
 
102
122
  return result
123
+
124
+ async def execute_sql_async(self, sql_statement):
125
+ """Executes a given SQL statement asynchronously with error handling."""
126
+ sql_ = sqlalchemy.text(sql_statement)
127
+ result = None
128
+ async with self.engine.connect() as conn:
129
+ try:
130
+ log.info(f"Executing SQL statement asynchronously: {sql_}")
131
+ result = await conn.execute(sql_)
132
+ except DatabaseError as e:
133
+ if "already exists" in str(e):
134
+ log.warning(f"Error ignored: {str(e)}. Assuming object already exists.")
135
+ else:
136
+ raise
137
+ finally:
138
+ await conn.close()
139
+
140
+ return result
141
+
142
+ async def get_sources_from_docstore_async(self, sources, vector_name, search_type="OR", just_source_name=False):
143
+ """Fetches sources from the docstore asynchronously."""
144
+ if just_source_name:
145
+ query = self._list_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
146
+ else:
147
+ query = self._get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
148
+
149
+ if not query:
150
+ return []
151
+
152
+ documents = await self.execute_sql_async(query)
153
+ return documents
154
+
155
+ def _get_sources_from_docstore(self, sources, vector_name, search_type="OR"):
156
+ """Helper function to build the SQL query for fetching sources."""
157
+ if not sources:
158
+ log.warning("No sources found for alloydb fetch")
159
+ return ""
160
+
161
+ table_name = f"{vector_name}_docstore"
162
+
163
+ conditions = self._and_or_ilike(sources, search_type=search_type)
164
+
165
+ query = f"""
166
+ SELECT *
167
+ FROM {table_name}
168
+ WHERE {conditions}
169
+ ORDER BY langchain_metadata->>'objectId' ASC
170
+ LIMIT 500;
171
+ """
172
+
173
+ return query
174
+
175
+ def _list_sources_from_docstore(self, sources, vector_name, search_type="OR"):
176
+ """Helper function to build the SQL query for listing sources."""
177
+ table_name = f"{vector_name}_docstore"
178
+
179
+ if sources:
180
+ conditions = self._and_or_ilike(sources, search_type=search_type)
181
+ query = f"""
182
+ SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
183
+ FROM {table_name}
184
+ WHERE {conditions}
185
+ ORDER BY langchain_metadata->>'objectId' ASC
186
+ LIMIT 500;
187
+ """
188
+ else:
189
+ query = f"""
190
+ SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
191
+ FROM {table_name}
192
+ ORDER BY langchain_metadata->>'objectId' ASC
193
+ LIMIT 500;
194
+ """
195
+
196
+ return query
103
197
 
104
198
  @staticmethod
105
199
  def _and_or_ilike(sources, search_type="OR", operator="ILIKE"):
@@ -1 +1,2 @@
1
1
  from .discovery_engine_client import DiscoveryEngineClient
2
+ from .get_ai_search_chunks import get_all_chunks
@@ -1,5 +1,5 @@
1
1
  from ..custom_logging import log
2
- from ..utils import load_config_key, ConfigManager
2
+ from ..utils import ConfigManager
3
3
  from ..utils.gcp_project import get_gcp_project
4
4
  from ..components import load_memories
5
5
 
@@ -7,7 +7,7 @@ from .discovery_engine_client import DiscoveryEngineClient
7
7
  from .create_new import create_new_discovery_engine
8
8
 
9
9
 
10
- def do_discovery_engine(message_data, metadata, vector_name):
10
+ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=None):
11
11
  """
12
12
 
13
13
  Example:
@@ -15,13 +15,13 @@ def do_discovery_engine(message_data, metadata, vector_name):
15
15
  message_data = "gs://bucket_name/path_to_file.txt"
16
16
  metadata = {"user": "admin"}
17
17
  vector_name = "example_vector"
18
- response = do_discovery_engine(message_data, metadata, vector_name)
18
+ response = do_discovery_engine(message_data, metadata, config=config)
19
19
  print(response)
20
20
  # Imported file to corpus: {'status': 'success'}
21
21
  ```
22
22
  """
23
23
 
24
- memories = load_memories(vector_name)
24
+ memories = load_memories(config=config)
25
25
  tools = []
26
26
 
27
27
  if not memories:
@@ -38,7 +38,7 @@ def do_discovery_engine(message_data, metadata, vector_name):
38
38
  continue
39
39
  #location = gcp_config.get('location')
40
40
  corpus = DiscoveryEngineClient(
41
- data_store_id=vector_name,
41
+ data_store_id=config.vector_name,
42
42
  project_id=get_gcp_project(),
43
43
  # location needs to be 'eu' or 'us' which doesn't work with other configurations
44
44
  #location=location or global_location
@@ -65,14 +65,14 @@ def do_discovery_engine(message_data, metadata, vector_name):
65
65
  log.error(f"Error importing {message_data} - {corp=} - {str(err)}")
66
66
 
67
67
  if str(err).startswith("404"):
68
- log.info(f"Attempting to create a new DiscoveryEngine corpus: {vector_name}")
68
+ log.info(f"Attempting to create a new DiscoveryEngine corpus: {config.vector_name}")
69
69
  try:
70
- new_corp = create_new_discovery_engine(vector_name)
70
+ new_corp = create_new_discovery_engine(config)
71
71
  except Exception as err:
72
- log.error(f"Failed to create new DiscoveryEngine {vector_name} - {str(err)}")
72
+ log.error(f"Failed to create new DiscoveryEngine {config.vector_name} - {str(err)}")
73
73
  continue
74
74
  if new_corp:
75
- log.info(f"Found new DiscoveryEngine {vector_name=} - {new_corp=}")
75
+ log.info(f"Found new DiscoveryEngine {config.vector_name=} - {new_corp=}")
76
76
  response = corp.import_documents(
77
77
  gcs_uri=message_data
78
78
  )
@@ -126,7 +126,7 @@ def discovery_engine_chunker_check(message_data, metadata, vector_name:str=None,
126
126
  total_memories = len(check_write_memories(config))
127
127
  llama = None
128
128
  if check_discovery_engine_in_memory(config):
129
- llama = do_discovery_engine(message_data, metadata, vector_name)
129
+ llama = do_discovery_engine(message_data, metadata, config=config)
130
130
  log.info(f"Processed discovery engine: {llama}")
131
131
 
132
132
  # If discovery engine is the only entry, return
@@ -1,10 +1,10 @@
1
1
  from .discovery_engine_client import DiscoveryEngineClient
2
- from ..utils.config import load_config_key
2
+ from ..utils import ConfigManager
3
3
  from ..utils.gcp_project import get_gcp_project
4
4
 
5
- def create_new_discovery_engine(vector_name):
5
+ def create_new_discovery_engine(config:ConfigManager):
6
6
 
7
- chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
7
+ chunker_config = config.vacConfig("chunker")
8
8
 
9
9
  chunk_size = 500
10
10
  if chunker_config:
@@ -18,7 +18,7 @@ def create_new_discovery_engine(vector_name):
18
18
  #location = gcp_config.get('location')
19
19
 
20
20
  de = DiscoveryEngineClient(
21
- data_store_id=vector_name,
21
+ data_store_id=config.vector_name,
22
22
  project_id=project_id,
23
23
  # location needs to be 'eu' or 'us' which doesn't work with other configurations
24
24
  #location=location
@@ -0,0 +1,53 @@
1
+ from ..utils import ConfigManager
2
+ from ..utils.gcp_project import get_gcp_project
3
+ from ..custom_logging import log
4
+ from .discovery_engine_client import DiscoveryEngineClient
5
+ from ..components import load_memories
6
+
7
+ def get_all_chunks(question:str, config:ConfigManager):
8
+ """
9
+ Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
10
+
11
+ args: question - question to search similarity for
12
+ config: A ConfigManager object
13
+
14
+ returns: a big string of chunks
15
+ """
16
+ memories = load_memories(config=config)
17
+ chunks = []
18
+
19
+ if not memories:
20
+ return None
21
+
22
+ vector_name = config.vector_name
23
+ for memory in memories:
24
+ for key, value in memory.items(): # Now iterate over the dictionary
25
+ log.info(f"Found memory {key}")
26
+ vectorstore = value.get('vectorstore')
27
+ if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
28
+ if value.get('read_only'):
29
+ new_vector_name = value.get('vector_name')
30
+ if not new_vector_name:
31
+ log.warning("read_only specified but no new vector_name to read from")
32
+ vector_name = new_vector_name
33
+
34
+ num_chunks = value.get('num_chunks') or 3
35
+
36
+ chunk = get_chunks(question, vector_name, num_chunks)
37
+ if chunk:
38
+ chunks.append(chunk)
39
+ if chunks:
40
+ return "\n".join(chunks)
41
+
42
+ log.warning(f"No chunks found for {vector_name}")
43
+ return None
44
+
45
+ def get_chunks(question, vector_name, num_chunks):
46
+ de = DiscoveryEngineClient(vector_name, project_id=get_gcp_project(include_config=True))
47
+ try:
48
+ return de.get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks)
49
+ except Exception as err:
50
+ log.error(f"No discovery engine chunks found: {str(err)}")
51
+
52
+
53
+
@@ -37,9 +37,10 @@ class ConfigManager:
37
37
  self.config_folder = os.getenv("VAC_CONFIG_FOLDER", os.getcwd())
38
38
  self.local_config_folder = local_config_folder
39
39
  self.configs_by_kind = self.load_all_configs()
40
+ self.validate = validate
40
41
 
41
42
  test_agent = self.vacConfig("agent")
42
- if not test_agent and self.vector_name != "global" and validate:
43
+ if not test_agent and self.vector_name != "global" and self.validate:
43
44
  print(f"WARNING: No vacConfig.agent found for {self.vector_name} - are you in right folder? {local_config_folder=} {self.config_folder=}")
44
45
 
45
46
  def load_all_configs(self):
@@ -125,7 +126,6 @@ class ConfigManager:
125
126
  else:
126
127
  config = yaml.safe_load(file)
127
128
  self.config_cache[filename] = (config, datetime.now())
128
- log.debug(f"Loaded and cached {config_file}")
129
129
  if is_local:
130
130
  log.warning(f"Local configuration override for {filename} via {self.local_config_folder}")
131
131
  return config
@@ -24,6 +24,7 @@ def get_gcp_project(include_config=False):
24
24
  gcp_config = load_config_key("gcp_config", "global", "vacConfig")
25
25
  if gcp_config:
26
26
  if gcp_config.get('project_id'):
27
+ logging.info("Using project_id from vacConfig.gcp_config.project_id")
27
28
  return gcp_config.get('project_id')
28
29
 
29
30
  project_id = get_env_project_id()
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.80.5
3
+ Version: 0.81.0
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.80.5.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.81.0.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -85,6 +85,7 @@ sunholo/discovery_engine/__init__.py
85
85
  sunholo/discovery_engine/chunker_handler.py
86
86
  sunholo/discovery_engine/create_new.py
87
87
  sunholo/discovery_engine/discovery_engine_client.py
88
+ sunholo/discovery_engine/get_ai_search_chunks.py
88
89
  sunholo/embedder/__init__.py
89
90
  sunholo/embedder/embed_chunk.py
90
91
  sunholo/gcs/__init__.py
sunholo-0.80.5/setup.cfg DELETED
@@ -1,7 +0,0 @@
1
- [metadata]
2
- description-file = README.md
3
-
4
- [egg_info]
5
- tag_build =
6
- tag_date = 0
7
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes