sunholo 0.80.5__py3-none-any.whl → 0.80.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/process_chunker_data.py +4 -1
- sunholo/discovery_engine/__init__.py +1 -0
- sunholo/discovery_engine/chunker_handler.py +10 -10
- sunholo/discovery_engine/create_new.py +4 -4
- sunholo/discovery_engine/get_ai_search_chunks.py +53 -0
- {sunholo-0.80.5.dist-info → sunholo-0.80.6.dist-info}/METADATA +2 -2
- {sunholo-0.80.5.dist-info → sunholo-0.80.6.dist-info}/RECORD +11 -10
- {sunholo-0.80.5.dist-info → sunholo-0.80.6.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.80.5.dist-info → sunholo-0.80.6.dist-info}/WHEEL +0 -0
- {sunholo-0.80.5.dist-info → sunholo-0.80.6.dist-info}/entry_points.txt +0 -0
- {sunholo-0.80.5.dist-info → sunholo-0.80.6.dist-info}/top_level.txt +0 -0
|
@@ -15,6 +15,7 @@ from ..discovery_engine.chunker_handler import discovery_engine_chunker_check
|
|
|
15
15
|
from .publish import process_docs_chunks_vector_name
|
|
16
16
|
from .splitter import chunk_doc_to_docs
|
|
17
17
|
from ..azure.blobs import is_azure_blob
|
|
18
|
+
from ..utils import ConfigManager
|
|
18
19
|
|
|
19
20
|
from ..custom_logging import log
|
|
20
21
|
|
|
@@ -29,13 +30,15 @@ def process_chunker_data(message_data, metadata, vector_name):
|
|
|
29
30
|
|
|
30
31
|
log.debug(f"Found metadata in pubsub: {metadata=}")
|
|
31
32
|
|
|
33
|
+
config=ConfigManager(vector_name)
|
|
34
|
+
|
|
32
35
|
# checks if only a llamaindex chunking/embedder, return early as no other processing needed
|
|
33
36
|
llamacheck = llamaindex_chunker_check(message_data, metadata, vector_name)
|
|
34
37
|
if llamacheck:
|
|
35
38
|
return llamacheck
|
|
36
39
|
|
|
37
40
|
# if only a discovery engine memory, return early as no other processing needed
|
|
38
|
-
discovery_check = discovery_engine_chunker_check(message_data, metadata,
|
|
41
|
+
discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config)
|
|
39
42
|
if discovery_check:
|
|
40
43
|
return discovery_check
|
|
41
44
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from ..custom_logging import log
|
|
2
|
-
from ..utils import
|
|
2
|
+
from ..utils import ConfigManager
|
|
3
3
|
from ..utils.gcp_project import get_gcp_project
|
|
4
4
|
from ..components import load_memories
|
|
5
5
|
|
|
@@ -7,7 +7,7 @@ from .discovery_engine_client import DiscoveryEngineClient
|
|
|
7
7
|
from .create_new import create_new_discovery_engine
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def do_discovery_engine(message_data, metadata,
|
|
10
|
+
def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=None):
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
Example:
|
|
@@ -15,13 +15,13 @@ def do_discovery_engine(message_data, metadata, vector_name):
|
|
|
15
15
|
message_data = "gs://bucket_name/path_to_file.txt"
|
|
16
16
|
metadata = {"user": "admin"}
|
|
17
17
|
vector_name = "example_vector"
|
|
18
|
-
response = do_discovery_engine(message_data, metadata,
|
|
18
|
+
response = do_discovery_engine(message_data, metadata, config=config)
|
|
19
19
|
print(response)
|
|
20
20
|
# Imported file to corpus: {'status': 'success'}
|
|
21
21
|
```
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
memories = load_memories(
|
|
24
|
+
memories = load_memories(config=config)
|
|
25
25
|
tools = []
|
|
26
26
|
|
|
27
27
|
if not memories:
|
|
@@ -38,7 +38,7 @@ def do_discovery_engine(message_data, metadata, vector_name):
|
|
|
38
38
|
continue
|
|
39
39
|
#location = gcp_config.get('location')
|
|
40
40
|
corpus = DiscoveryEngineClient(
|
|
41
|
-
data_store_id=vector_name,
|
|
41
|
+
data_store_id=config.vector_name,
|
|
42
42
|
project_id=get_gcp_project(),
|
|
43
43
|
# location needs to be 'eu' or 'us' which doesn't work with other configurations
|
|
44
44
|
#location=location or global_location
|
|
@@ -65,14 +65,14 @@ def do_discovery_engine(message_data, metadata, vector_name):
|
|
|
65
65
|
log.error(f"Error importing {message_data} - {corp=} - {str(err)}")
|
|
66
66
|
|
|
67
67
|
if str(err).startswith("404"):
|
|
68
|
-
log.info(f"Attempting to create a new DiscoveryEngine corpus: {vector_name}")
|
|
68
|
+
log.info(f"Attempting to create a new DiscoveryEngine corpus: {config.vector_name}")
|
|
69
69
|
try:
|
|
70
|
-
new_corp = create_new_discovery_engine(
|
|
70
|
+
new_corp = create_new_discovery_engine(config)
|
|
71
71
|
except Exception as err:
|
|
72
|
-
log.error(f"Failed to create new DiscoveryEngine {vector_name} - {str(err)}")
|
|
72
|
+
log.error(f"Failed to create new DiscoveryEngine {config.vector_name} - {str(err)}")
|
|
73
73
|
continue
|
|
74
74
|
if new_corp:
|
|
75
|
-
log.info(f"Found new DiscoveryEngine {vector_name=} - {new_corp=}")
|
|
75
|
+
log.info(f"Found new DiscoveryEngine {config.vector_name=} - {new_corp=}")
|
|
76
76
|
response = corp.import_documents(
|
|
77
77
|
gcs_uri=message_data
|
|
78
78
|
)
|
|
@@ -126,7 +126,7 @@ def discovery_engine_chunker_check(message_data, metadata, vector_name:str=None,
|
|
|
126
126
|
total_memories = len(check_write_memories(config))
|
|
127
127
|
llama = None
|
|
128
128
|
if check_discovery_engine_in_memory(config):
|
|
129
|
-
llama = do_discovery_engine(message_data, metadata,
|
|
129
|
+
llama = do_discovery_engine(message_data, metadata, config=config)
|
|
130
130
|
log.info(f"Processed discovery engine: {llama}")
|
|
131
131
|
|
|
132
132
|
# If discovery engine is the only entry, return
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from .discovery_engine_client import DiscoveryEngineClient
|
|
2
|
-
from ..utils
|
|
2
|
+
from ..utils import ConfigManager
|
|
3
3
|
from ..utils.gcp_project import get_gcp_project
|
|
4
4
|
|
|
5
|
-
def create_new_discovery_engine(
|
|
5
|
+
def create_new_discovery_engine(config:ConfigManager):
|
|
6
6
|
|
|
7
|
-
chunker_config =
|
|
7
|
+
chunker_config = config.vacConfig("chunker")
|
|
8
8
|
|
|
9
9
|
chunk_size = 500
|
|
10
10
|
if chunker_config:
|
|
@@ -18,7 +18,7 @@ def create_new_discovery_engine(vector_name):
|
|
|
18
18
|
#location = gcp_config.get('location')
|
|
19
19
|
|
|
20
20
|
de = DiscoveryEngineClient(
|
|
21
|
-
data_store_id=vector_name,
|
|
21
|
+
data_store_id=config.vector_name,
|
|
22
22
|
project_id=project_id,
|
|
23
23
|
# location needs to be 'eu' or 'us' which doesn't work with other configurations
|
|
24
24
|
#location=location
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from ..utils import ConfigManager
|
|
2
|
+
from ..utils.gcp_project import get_gcp_project
|
|
3
|
+
from ..custom_logging import log
|
|
4
|
+
from .discovery_engine_client import DiscoveryEngineClient
|
|
5
|
+
from ..components import load_memories
|
|
6
|
+
|
|
7
|
+
def get_all_chunks(question:str, config:ConfigManager):
|
|
8
|
+
"""
|
|
9
|
+
Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
|
|
10
|
+
|
|
11
|
+
args: question - question to search similarity for
|
|
12
|
+
config: A ConfigManager object
|
|
13
|
+
|
|
14
|
+
returns: a big string of chunks
|
|
15
|
+
"""
|
|
16
|
+
memories = load_memories(config=config)
|
|
17
|
+
chunks = []
|
|
18
|
+
|
|
19
|
+
if not memories:
|
|
20
|
+
return None
|
|
21
|
+
|
|
22
|
+
vector_name = config.vector_name
|
|
23
|
+
for memory in memories:
|
|
24
|
+
for key, value in memory.items(): # Now iterate over the dictionary
|
|
25
|
+
log.info(f"Found memory {key}")
|
|
26
|
+
vectorstore = value.get('vectorstore')
|
|
27
|
+
if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
|
|
28
|
+
if value.get('read_only'):
|
|
29
|
+
new_vector_name = value.get('vector_name')
|
|
30
|
+
if not new_vector_name:
|
|
31
|
+
log.warning("read_only specified but no new vector_name to read from")
|
|
32
|
+
vector_name = new_vector_name
|
|
33
|
+
|
|
34
|
+
num_chunks = value.get('num_chunks') or 3
|
|
35
|
+
|
|
36
|
+
chunk = get_chunks(question, vector_name, num_chunks)
|
|
37
|
+
if chunk:
|
|
38
|
+
chunks.append(chunk)
|
|
39
|
+
if chunks:
|
|
40
|
+
return "\n".join(chunks)
|
|
41
|
+
|
|
42
|
+
log.warning(f"No chunks found for {vector_name}")
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
def get_chunks(question, vector_name, num_chunks):
|
|
46
|
+
de = DiscoveryEngineClient(vector_name, project_id=get_gcp_project())
|
|
47
|
+
try:
|
|
48
|
+
return de.get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks)
|
|
49
|
+
except Exception as err:
|
|
50
|
+
log.error(f"No discovery engine chunks found: {str(err)}")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.80.
|
|
3
|
+
Version: 0.80.6
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.80.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.80.6.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -37,7 +37,7 @@ sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,186
|
|
|
37
37
|
sunholo/chunker/loaders.py,sha256=CCB0IGigNAWT__2ImVin_j83W3eGS2Qe5I6U18YQzoM,10275
|
|
38
38
|
sunholo/chunker/message_data.py,sha256=EaiY7_HClpcfPUAYaAm6Zk5ReeZ9s9F_jBVd0kDgI-4,10836
|
|
39
39
|
sunholo/chunker/pdfs.py,sha256=njDPop751GMHi3cOwIKd2Yct-_lWR2gqcB7WykfHphs,2480
|
|
40
|
-
sunholo/chunker/process_chunker_data.py,sha256=
|
|
40
|
+
sunholo/chunker/process_chunker_data.py,sha256=uO-YOEHIjAOy0ZMJ0vea9OMNsQBISHfhbtgoyuHiP6s,3598
|
|
41
41
|
sunholo/chunker/publish.py,sha256=AX5u-fcyDytED67IfizMzvOMcYPXEo6XBJvyk_7maK8,2939
|
|
42
42
|
sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
|
|
43
43
|
sunholo/chunker/splitter.py,sha256=QLAEsJOpEYFZr9-UGZUuAlNVyjfCWb8jvzCHg0rVShE,6751
|
|
@@ -70,10 +70,11 @@ sunholo/database/sql/sb/create_table.sql,sha256=SbcOrf5tUiVKGUohu1lau7IsbDRbTFbr
|
|
|
70
70
|
sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjsmfHZNNOo1ptwLLSo,75
|
|
71
71
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
|
72
72
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
|
73
|
-
sunholo/discovery_engine/__init__.py,sha256=
|
|
74
|
-
sunholo/discovery_engine/chunker_handler.py,sha256=
|
|
75
|
-
sunholo/discovery_engine/create_new.py,sha256=
|
|
73
|
+
sunholo/discovery_engine/__init__.py,sha256=P00bB8aVVWefOZbCQvzHsVMuP_sd-_d_4o5xCuCpN3g,108
|
|
74
|
+
sunholo/discovery_engine/chunker_handler.py,sha256=Fv4BLOBi_7ap3AiAy4TlTN48CLZSMurJ3TkvC75Euro,5123
|
|
75
|
+
sunholo/discovery_engine/create_new.py,sha256=NzhSh6nG6nQ5J9gZh8IDph4JiEVT_DC5GGvP0GuwTWs,943
|
|
76
76
|
sunholo/discovery_engine/discovery_engine_client.py,sha256=oORB2SVVqrYrz7E3srPrknyuR6Dl3SJJwaVrbVXJER4,17726
|
|
77
|
+
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=7yMpGaLU1nL3ttFLE-cIhNao-Vq9SY1edFC3T-7wH2I,1944
|
|
77
78
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
78
79
|
sunholo/embedder/embed_chunk.py,sha256=MCbTePWjUbIRVDFFhHJ94BvOZvIom62-mTr0PmfQyt0,6951
|
|
79
80
|
sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
|
|
@@ -133,9 +134,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
133
134
|
sunholo/vertex/memory_tools.py,sha256=pgSahVDh7GPEulu3nl-w0jb5lTClb4TCnVxPnMokNZY,7533
|
|
134
135
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
135
136
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
136
|
-
sunholo-0.80.
|
|
137
|
-
sunholo-0.80.
|
|
138
|
-
sunholo-0.80.
|
|
139
|
-
sunholo-0.80.
|
|
140
|
-
sunholo-0.80.
|
|
141
|
-
sunholo-0.80.
|
|
137
|
+
sunholo-0.80.6.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
138
|
+
sunholo-0.80.6.dist-info/METADATA,sha256=CQEUn95SrlED6d_I0m_V-ZhLHTOXD0nVugPZbWl3N4k,7348
|
|
139
|
+
sunholo-0.80.6.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
140
|
+
sunholo-0.80.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
141
|
+
sunholo-0.80.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
142
|
+
sunholo-0.80.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|