sunholo 0.69.0__tar.gz → 0.69.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {sunholo-0.69.0 → sunholo-0.69.3}/PKG-INFO +2 -2
  2. {sunholo-0.69.0 → sunholo-0.69.3}/setup.py +1 -1
  3. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/chunker/data_to_embed_pubsub.py +8 -3
  4. sunholo-0.69.3/sunholo/discovery_engine/chunker_handler.py +109 -0
  5. sunholo-0.69.3/sunholo/discovery_engine/create_new.py +28 -0
  6. sunholo-0.69.0/sunholo/database/discovery_engine.py → sunholo-0.69.3/sunholo/discovery_engine/discovery_engine_client.py +60 -38
  7. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/llamaindex/import_files.py +2 -22
  8. sunholo-0.69.3/sunholo/patches/langchain/__init__.py +0 -0
  9. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo.egg-info/PKG-INFO +2 -2
  10. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo.egg-info/SOURCES.txt +4 -1
  11. {sunholo-0.69.0 → sunholo-0.69.3}/LICENSE.txt +0 -0
  12. {sunholo-0.69.0 → sunholo-0.69.3}/MANIFEST.in +0 -0
  13. {sunholo-0.69.0 → sunholo-0.69.3}/README.md +0 -0
  14. {sunholo-0.69.0 → sunholo-0.69.3}/setup.cfg +0 -0
  15. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/__init__.py +0 -0
  16. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/__init__.py +0 -0
  17. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/chat_history.py +0 -0
  18. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/dispatch_to_qa.py +0 -0
  19. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/fastapi/__init__.py +0 -0
  20. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/fastapi/base.py +0 -0
  21. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/fastapi/qna_routes.py +0 -0
  22. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/flask/__init__.py +0 -0
  23. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/flask/base.py +0 -0
  24. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/flask/qna_routes.py +0 -0
  25. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/flask/vac_routes.py +0 -0
  26. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/langserve.py +0 -0
  27. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/pubsub.py +0 -0
  28. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/route.py +0 -0
  29. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/special_commands.py +0 -0
  30. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/agents/swagger.py +0 -0
  31. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/archive/__init__.py +0 -0
  32. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/archive/archive.py +0 -0
  33. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/auth/__init__.py +0 -0
  34. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/auth/run.py +0 -0
  35. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/bots/__init__.py +0 -0
  36. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/bots/discord.py +0 -0
  37. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/bots/github_webhook.py +0 -0
  38. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/bots/webapp.py +0 -0
  39. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/chunker/__init__.py +0 -0
  40. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/chunker/doc_handling.py +0 -0
  41. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/chunker/images.py +0 -0
  42. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/chunker/loaders.py +0 -0
  43. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/chunker/message_data.py +0 -0
  44. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/chunker/pdfs.py +0 -0
  45. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/chunker/publish.py +0 -0
  46. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/chunker/splitter.py +0 -0
  47. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/__init__.py +0 -0
  48. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/chat_vac.py +0 -0
  49. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/cli.py +0 -0
  50. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/cli_init.py +0 -0
  51. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/configs.py +0 -0
  52. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/deploy.py +0 -0
  53. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/embedder.py +0 -0
  54. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/merge_texts.py +0 -0
  55. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/run_proxy.py +0 -0
  56. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/sun_rich.py +0 -0
  57. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/cli/swagger.py +0 -0
  58. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/components/__init__.py +0 -0
  59. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/components/llm.py +0 -0
  60. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/components/retriever.py +0 -0
  61. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/components/vectorstore.py +0 -0
  62. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/__init__.py +0 -0
  63. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/alloydb.py +0 -0
  64. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/alloydb_client.py +0 -0
  65. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/database.py +0 -0
  66. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/lancedb.py +0 -0
  67. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/sql/sb/create_function.sql +0 -0
  68. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  69. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/sql/sb/create_table.sql +0 -0
  70. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  71. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/sql/sb/return_sources.sql +0 -0
  72. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/sql/sb/setup.sql +0 -0
  73. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/static_dbs.py +0 -0
  74. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/database/uuid.py +0 -0
  75. {sunholo-0.69.0/sunholo/langfuse → sunholo-0.69.3/sunholo/discovery_engine}/__init__.py +0 -0
  76. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/embedder/__init__.py +0 -0
  77. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/embedder/embed_chunk.py +0 -0
  78. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/gcs/__init__.py +0 -0
  79. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/gcs/add_file.py +0 -0
  80. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/gcs/download_url.py +0 -0
  81. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/gcs/metadata.py +0 -0
  82. {sunholo-0.69.0/sunholo/llamaindex → sunholo-0.69.3/sunholo/langfuse}/__init__.py +0 -0
  83. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/langfuse/callback.py +0 -0
  84. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/langfuse/prompts.py +0 -0
  85. {sunholo-0.69.0/sunholo/lookup → sunholo-0.69.3/sunholo/llamaindex}/__init__.py +0 -0
  86. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/llamaindex/generate.py +0 -0
  87. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/llamaindex/get_files.py +0 -0
  88. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/logging.py +0 -0
  89. {sunholo-0.69.0/sunholo/patches → sunholo-0.69.3/sunholo/lookup}/__init__.py +0 -0
  90. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/lookup/model_lookup.yaml +0 -0
  91. {sunholo-0.69.0/sunholo/patches/langchain → sunholo-0.69.3/sunholo/patches}/__init__.py +0 -0
  92. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/patches/langchain/lancedb.py +0 -0
  93. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/patches/langchain/vertexai.py +0 -0
  94. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/pubsub/__init__.py +0 -0
  95. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/pubsub/process_pubsub.py +0 -0
  96. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/pubsub/pubsub_manager.py +0 -0
  97. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/qna/__init__.py +0 -0
  98. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/qna/parsers.py +0 -0
  99. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/qna/retry.py +0 -0
  100. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/streaming/__init__.py +0 -0
  101. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/streaming/content_buffer.py +0 -0
  102. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/streaming/langserve.py +0 -0
  103. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/streaming/stream_lookup.py +0 -0
  104. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/streaming/streaming.py +0 -0
  105. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/summarise/__init__.py +0 -0
  106. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/summarise/summarise.py +0 -0
  107. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/__init__.py +0 -0
  108. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/api_key.py +0 -0
  109. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/big_context.py +0 -0
  110. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/config.py +0 -0
  111. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/config_schema.py +0 -0
  112. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/gcp.py +0 -0
  113. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/gcp_project.py +0 -0
  114. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/parsers.py +0 -0
  115. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/timedelta.py +0 -0
  116. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/user_ids.py +0 -0
  117. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/utils/version.py +0 -0
  118. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/vertex/__init__.py +0 -0
  119. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/vertex/init.py +0 -0
  120. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/vertex/memory_tools.py +0 -0
  121. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo/vertex/safety.py +0 -0
  122. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo.egg-info/dependency_links.txt +0 -0
  123. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo.egg-info/entry_points.txt +0 -0
  124. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo.egg-info/requires.txt +0 -0
  125. {sunholo-0.69.0 → sunholo-0.69.3}/sunholo.egg-info/top_level.txt +0 -0
  126. {sunholo-0.69.0 → sunholo-0.69.3}/tests/test_chat_history.py +0 -0
  127. {sunholo-0.69.0 → sunholo-0.69.3}/tests/test_chunker.py +0 -0
  128. {sunholo-0.69.0 → sunholo-0.69.3}/tests/test_config.py +0 -0
  129. {sunholo-0.69.0 → sunholo-0.69.3}/tests/test_dispatch_to_qa.py +0 -0
  130. {sunholo-0.69.0 → sunholo-0.69.3}/tests/test_swagger.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.69.0
3
+ Version: 0.69.3
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.0.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.3.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -1,7 +1,7 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
3
  # Define your base version
4
- version = '0.69.0'
4
+ version = '0.69.3'
5
5
 
6
6
  setup(
7
7
  name='sunholo',
@@ -20,6 +20,8 @@ from .publish import process_docs_chunks_vector_name
20
20
  from .splitter import chunk_doc_to_docs
21
21
 
22
22
  from ..llamaindex.import_files import llamaindex_chunker_check
23
+ from ..discovery_engine.chunker_handler import discovery_engine_chunker_check
24
+
23
25
  from . import loaders
24
26
 
25
27
  def direct_file_to_embed(file_name: pathlib.Path, metadata: dict, vector_name: str):
@@ -58,17 +60,20 @@ def process_chunker_data(message_data, metadata, vector_name):
58
60
  metadata["vector_name"] = vector_name
59
61
 
60
62
  if message_data is None:
61
- log.error("No message_data was found in data: {data}")
63
+ log.error(f"No message_data was found in data: {message_data}")
62
64
  return
63
65
 
64
66
  log.debug(f"Found metadata in pubsub: {metadata}")
65
67
 
66
68
  # checks if only a llamaindex chunking/embedder, return early as no other processing needed
67
69
  llamacheck = llamaindex_chunker_check(message_data, metadata, vector_name)
68
-
69
70
  if llamacheck:
70
-
71
71
  return llamacheck
72
+
73
+ # if only a discovery engine memory, return early as no other processing needed
74
+ discovery_check = discovery_engine_chunker_check(message_data, metadata, vector_name)
75
+ if discovery_check:
76
+ return discovery_check
72
77
 
73
78
  chunks = []
74
79
 
@@ -0,0 +1,109 @@
1
+ from ..logging import log
2
+ from ..utils.config import load_config_key
3
+ from ..components import load_memories
4
+
5
+ from .discovery_engine_client import DiscoveryEngineClient
6
+
7
+
8
+ def do_discovery_engine(message_data, metadata, vector_name):
9
+ """
10
+
11
+ Example:
12
+ ```python
13
+ message_data = "gs://bucket_name/path_to_file.txt"
14
+ metadata = {"user": "admin"}
15
+ vector_name = "example_vector"
16
+ response = do_discovery_engine(message_data, metadata, vector_name)
17
+ print(response)
18
+ # Imported file to corpus: {'status': 'success'}
19
+ ```
20
+ """
21
+
22
+ gcp_config = load_config_key("gcp_config", vector_name=vector_name, kind="vacConfig")
23
+ if not gcp_config:
24
+ raise ValueError(f"Need config.{vector_name}.gcp_config to configure discovery engine")
25
+
26
+ global_project_id = gcp_config.get('project_id')
27
+ #global_location = gcp_config.get('location')
28
+ global_data_store_id = gcp_config.get('data_store_id')
29
+
30
+ memories = load_memories(vector_name)
31
+ tools = []
32
+
33
+ if not memories:
34
+ return tools
35
+
36
+ corpuses = []
37
+ for memory in memories:
38
+ for key, value in memory.items(): # Now iterate over the dictionary
39
+ log.info(f"Found memory {key}")
40
+ vectorstore = value.get('vectorstore')
41
+ if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
42
+ log.info(f"Found vectorstore {vectorstore}")
43
+ data_store_id = value.get('data_store_id')
44
+ project_id = gcp_config.get('project_id')
45
+ #location = gcp_config.get('location')
46
+ corpus = DiscoveryEngineClient(
47
+ data_store_id=data_store_id or global_data_store_id,
48
+ project_id=project_id or global_project_id,
49
+ # location needs to be 'eu' or 'us' which doesn't work with other configurations
50
+ #location=location or global_location
51
+ )
52
+
53
+ corpuses.append(corpus)
54
+ if not corpuses:
55
+ log.error("Could not find any Discovery Engine corpus to import data to")
56
+ return None
57
+
58
+ log.info(f"Found Discovery Engine / Vertex AI Search {corpuses=}")
59
+
60
+ if message_data.startswith("gs://"):
61
+ log.info(f"DiscoveryEngineClient.import_files for {message_data}")
62
+ for corp in corpuses:
63
+ try:
64
+ response = corp.import_documents(
65
+ gcs_uri=message_data
66
+ )
67
+ log.info(f"Imported file to corpus: {response} with metadata: {metadata}")
68
+ except Exception as err:
69
+ log.error(f"Error importing {message_data} - {corp=} - {str(err)}")
70
+ continue
71
+
72
+ metadata["source"] = message_data
73
+ return metadata
74
+
75
+ else:
76
+ log.warning("Only gs:// data is supported for Discovery Engine")
77
+
78
+
79
+ def check_discovery_engine_in_memory(vector_name):
80
+ memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
81
+ for memory in memories: # Iterate over the list
82
+ for key, value in memory.items(): # Now iterate over the dictionary
83
+ log.info(f"Found memory {key}")
84
+ vectorstore = value.get('vectorstore')
85
+ if vectorstore:
86
+ if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
87
+ log.info(f"Found vectorstore {vectorstore}")
88
+ return True
89
+
90
+ return False
91
+
92
+ def discovery_engine_chunker_check(message_data, metadata, vector_name):
93
+ # discovery engine handles its own chunking/embedding
94
+ memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
95
+ total_memories = len(memories)
96
+ llama = None
97
+ if check_discovery_engine_in_memory(vector_name):
98
+ llama = do_discovery_engine(message_data, metadata, vector_name)
99
+ log.info(f"Processed discovery engine: {llama}")
100
+
101
+ # If discovery engine is the only entry, return
102
+ if llama and total_memories == 1:
103
+
104
+ return llama
105
+
106
+ elif llama:
107
+ log.info("Discovery Engine found but not the only memory, continuing with other processes.")
108
+
109
+ return None
@@ -0,0 +1,28 @@
1
+ from .discovery_engine_client import DiscoveryEngineClient
2
+ from ..utils.config import load_config_key
3
+
4
+ def create_new_discovery_engine(vector_name):
5
+ gcp_config = load_config_key("gcp_config", vector_name=vector_name, kind="vacConfig")
6
+
7
+ chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
8
+
9
+ if chunker_config:
10
+ chunk_size = chunker_config.get("chunk_size")
11
+
12
+ if not chunk_size:
13
+ chunk_size = 500
14
+
15
+
16
+ project_id = gcp_config.get('project_id')
17
+ #location = gcp_config.get('location')
18
+
19
+ de = DiscoveryEngineClient(
20
+ data_store_id=vector_name,
21
+ project_id=project_id,
22
+ # location needs to be 'eu' or 'us' which doesn't work with other configurations
23
+ #location=location
24
+ )
25
+
26
+ new_store = de.create_data_store(chunk_size=chunk_size)
27
+
28
+ return new_store
@@ -6,6 +6,7 @@ except ImportError:
6
6
  discoveryengine = None
7
7
 
8
8
  from ..logging import log
9
+ from typing import Optional
9
10
 
10
11
  class DiscoveryEngineClient:
11
12
  """
@@ -19,7 +20,7 @@ class DiscoveryEngineClient:
19
20
  Example:
20
21
  ```python
21
22
  client = DiscoveryEngineClient(project_id='your-project-id', data_store_id='your-data-store-id')
22
-
23
+
23
24
  # Create a collection
24
25
  collection_name = client.create_collection("my_new_collection")
25
26
 
@@ -61,39 +62,13 @@ class DiscoveryEngineClient:
61
62
  else None
62
63
  )
63
64
  self.client = discoveryengine.DataStoreServiceClient(client_options=client_options)
64
-
65
-
66
- def create_collection(self, collection_id: str) -> str:
67
- """
68
- Creates a new collection within the specified data store.
69
-
70
- Args:
71
- collection_id (str): The ID of the collection to create.
72
-
73
- Returns:
74
- str: The resource name of the created collection.
75
-
76
- Example:
77
- ```python
78
- collection_name = client.create_collection('my_new_collection')
79
- `
80
- """
81
-
82
- parent = self.client.data_store_path(
83
- project=self.project_id, location=self.location, data_store=self.data_store_id
84
- )
85
-
86
- collection = discoveryengine.Collection(display_name=collection_id)
87
- request = discoveryengine.CreateCollectionRequest(
88
- parent=parent, collection_id=collection_id, collection=collection
65
+ self.parent = self.client.branch_path(
66
+ project=project_id,
67
+ location=location,
68
+ data_store=data_store_id,
69
+ branch="default_branch",
89
70
  )
90
71
 
91
- operation = self.client.create_collection(request=request)
92
- log.info(f"Waiting for operation to complete: {operation.operation.name}")
93
- response = operation.result()
94
-
95
- return response.name
96
-
97
72
  def create_data_store(
98
73
  self, chunk_size: int = 500
99
74
  ) -> str:
@@ -106,7 +81,6 @@ class DiscoveryEngineClient:
106
81
  Returns:
107
82
  str: The name of the long-running operation for data store creation.
108
83
  """
109
- parent = self.client.common_location_path(project=self.project_id, location=self.location)
110
84
 
111
85
  # https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1alpha.types.DocumentProcessingConfig
112
86
  doc_config = discoveryengine.DocumentProcessingConfig(
@@ -137,7 +111,7 @@ class DiscoveryEngineClient:
137
111
 
138
112
  # https://cloud.google.com/python/docs/reference/discoveryengine/0.11.4/google.cloud.discoveryengine_v1alpha.types.CreateDataStoreRequest
139
113
  request = discoveryengine.CreateDataStoreRequest(
140
- parent=parent,
114
+ parent=self.parent,
141
115
  data_store_id=self.data_store_id,
142
116
  data_store=data_store,
143
117
  # Optional: For Advanced Site Search Only
@@ -162,7 +136,6 @@ class DiscoveryEngineClient:
162
136
  def get_chunks(
163
137
  self,
164
138
  query: str,
165
- collection_id: str,
166
139
  num_previous_chunks: int = 3,
167
140
  num_next_chunks: int = 3,
168
141
  page_size: int = 10,
@@ -196,13 +169,10 @@ class DiscoveryEngineClient:
196
169
  serving_config="default_serving_config")
197
170
  ).name
198
171
 
199
- filter = f'content_search=true AND collection_id="{collection_id}"'
200
-
201
172
  search_request = discoveryengine.SearchRequest(
202
173
  serving_config=serving_config,
203
174
  query=query,
204
175
  page_size=page_size,
205
- filter=filter,
206
176
  content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
207
177
  #snippet_spec=discoveryengine.SearchRequest.ContentSearchSpec.SnippetSpec(
208
178
  # return_snippet=True
@@ -219,3 +189,55 @@ class DiscoveryEngineClient:
219
189
 
220
190
  return search_response
221
191
 
192
+ def import_documents(self,
193
+ gcs_uri: Optional[str] = None,
194
+ data_schema="content",
195
+ bigquery_dataset: Optional[str] = None,
196
+ bigquery_table: Optional[str] = None,
197
+ bigquery_project_id: Optional[str] = None,
198
+ ) -> str:
199
+ """
200
+ Args:
201
+ - gcs_uri: Required. List of Cloud Storage URIs to input files. Each URI can be up to 2000 characters long. URIs can match the full object path (for example, gs://bucket/directory/object.json) or a pattern matching one or more files, such as gs://bucket/directory/*.json. A request can contain at most 100 files (or 100,000 files if data_schema is content). Each file can be up to 2 GB (or 100 MB if data_schema is content).
202
+ - data_schema: Must be one of 'user_event', 'custom' or 'document' if using BigQuery. Default 'content' only for GCS. The schema to use when parsing the data from the source. Supported values for document imports: - document (default): One JSON Document per line. Each document must have a valid Document.id. - content: Unstructured data (e.g. PDF, HTML). Each file matched by input_uris becomes a document, with the ID set to the first 128 bits of SHA256(URI) encoded as a hex string. - custom: One custom data JSON per row in arbitrary format that conforms to the defined Schema of the data store. This can only be used by the GENERIC Data Store vertical. - csv: A CSV file with header conforming to the defined Schema of the data store. Each entry after the header is imported as a Document. This can only be used by the GENERIC Data Store vertical. Supported values for user event imports: - user_event (default): One JSON UserEvent per line.
203
+
204
+ """
205
+
206
+ if gcs_uri:
207
+ request = discoveryengine.ImportDocumentsRequest(
208
+ parent=self.parent,
209
+ # https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1alpha.types.GcsSource
210
+ gcs_source=discoveryengine.GcsSource(
211
+ input_uris=[gcs_uri], data_schema=data_schema,
212
+ ),
213
+ # Options: `FULL`, `INCREMENTAL`
214
+ reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL,
215
+ )
216
+ else:
217
+ request = discoveryengine.ImportDocumentsRequest(
218
+ parent=self.parent,
219
+ bigquery_source=discoveryengine.BigQuerySource(
220
+ project_id=bigquery_project_id or self.project_id,
221
+ dataset_id=bigquery_dataset,
222
+ table_id=bigquery_table,
223
+ data_schema=data_schema,
224
+ ),
225
+ # Options: `FULL`, `INCREMENTAL`
226
+ reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL,
227
+ )
228
+
229
+ # Make the request
230
+ operation = self.client.import_documents(request=request)
231
+
232
+ log.info(f"Waiting for operation to complete: {operation.operation.name}")
233
+ response = operation.result()
234
+
235
+ # Once the operation is complete,
236
+ # get information from operation metadata
237
+ metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)
238
+
239
+ # Handle the response
240
+ log.info(f"{response=} {metadata=}")
241
+
242
+ return operation.operation.name
243
+
@@ -105,28 +105,8 @@ def do_llamaindex(message_data, metadata, vector_name):
105
105
  return metadata
106
106
 
107
107
  else:
108
- raise NotImplementedError("Only gs:// and https://drive data is supported")
109
- # write text to file and upload it
110
- # TODO(developer): Update and un-comment below lines
111
- # project_id = "PROJECT_ID"
112
- # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}"
113
- # path = "path/to/local/file.txt"
114
- # display_name = "file_display_name"
115
- # description = "file description"
116
-
117
- # Initialize Vertex AI API once per session
118
- #path = 'path/to/local/file.txt'
119
-
120
- # Write the message_data to a file
121
- #with open(path, 'w') as file:
122
- # file.write(message_data)
123
-
124
- #rag_file = rag.upload_file(
125
- # corpus_name=corpus_name,
126
- # path=path,
127
- # display_name=display_name,
128
- # description=description,
129
- #)
108
+ log.warning("Only gs:// and https://drive data is supported for llamaindex")
109
+
130
110
 
131
111
  def check_llamaindex_in_memory(vector_name):
132
112
  memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
File without changes
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.69.0
3
+ Version: 0.69.3
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.0.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.3.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -62,7 +62,6 @@ sunholo/database/__init__.py
62
62
  sunholo/database/alloydb.py
63
63
  sunholo/database/alloydb_client.py
64
64
  sunholo/database/database.py
65
- sunholo/database/discovery_engine.py
66
65
  sunholo/database/lancedb.py
67
66
  sunholo/database/static_dbs.py
68
67
  sunholo/database/uuid.py
@@ -72,6 +71,10 @@ sunholo/database/sql/sb/create_table.sql
72
71
  sunholo/database/sql/sb/delete_source_row.sql
73
72
  sunholo/database/sql/sb/return_sources.sql
74
73
  sunholo/database/sql/sb/setup.sql
74
+ sunholo/discovery_engine/__init__.py
75
+ sunholo/discovery_engine/chunker_handler.py
76
+ sunholo/discovery_engine/create_new.py
77
+ sunholo/discovery_engine/discovery_engine_client.py
75
78
  sunholo/embedder/__init__.py
76
79
  sunholo/embedder/embed_chunk.py
77
80
  sunholo/gcs/__init__.py
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes