sunholo 0.119.8__py3-none-any.whl → 0.119.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/process_chunker_data.py +1 -1
- sunholo/discovery_engine/chunker_handler.py +6 -2
- sunholo/discovery_engine/cli.py +2 -3
- sunholo/discovery_engine/get_ai_search_chunks.py +28 -11
- {sunholo-0.119.8.dist-info → sunholo-0.119.10.dist-info}/METADATA +1 -1
- {sunholo-0.119.8.dist-info → sunholo-0.119.10.dist-info}/RECORD +10 -10
- {sunholo-0.119.8.dist-info → sunholo-0.119.10.dist-info}/WHEEL +1 -1
- {sunholo-0.119.8.dist-info → sunholo-0.119.10.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.119.8.dist-info → sunholo-0.119.10.dist-info}/entry_points.txt +0 -0
- {sunholo-0.119.8.dist-info → sunholo-0.119.10.dist-info}/top_level.txt +0 -0
|
@@ -38,7 +38,7 @@ def process_chunker_data(message_data, metadata, vector_name):
|
|
|
38
38
|
return llamacheck
|
|
39
39
|
|
|
40
40
|
# if only a discovery engine memory, return early as no other processing needed
|
|
41
|
-
discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config)
|
|
41
|
+
discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config, process=False)
|
|
42
42
|
if discovery_check:
|
|
43
43
|
return discovery_check
|
|
44
44
|
|
|
@@ -125,7 +125,11 @@ def check_write_memories(config:ConfigManager):
|
|
|
125
125
|
|
|
126
126
|
return write_mem
|
|
127
127
|
|
|
128
|
-
def discovery_engine_chunker_check(message_data,
|
|
128
|
+
def discovery_engine_chunker_check(message_data,
|
|
129
|
+
metadata,
|
|
130
|
+
vector_name:str=None,
|
|
131
|
+
config:ConfigManager=None,
|
|
132
|
+
process:bool=True):
|
|
129
133
|
|
|
130
134
|
if config is None:
|
|
131
135
|
if vector_name is None:
|
|
@@ -139,7 +143,7 @@ def discovery_engine_chunker_check(message_data, metadata, vector_name:str=None,
|
|
|
139
143
|
|
|
140
144
|
total_memories = len(check_write_memories(config))
|
|
141
145
|
llama = None
|
|
142
|
-
if check_discovery_engine_in_memory(config):
|
|
146
|
+
if check_discovery_engine_in_memory(config) and process:
|
|
143
147
|
llama = do_discovery_engine(message_data, metadata, config=config)
|
|
144
148
|
log.info(f"Processed discovery engine: {llama}")
|
|
145
149
|
|
sunholo/discovery_engine/cli.py
CHANGED
|
@@ -223,12 +223,11 @@ def setup_discovery_engine_subparser(subparsers):
|
|
|
223
223
|
# Search subcommand
|
|
224
224
|
search_parser = discovery_engine_subparsers.add_parser('search', help='Search a Discovery Engine datastore')
|
|
225
225
|
search_parser.add_argument('--query', required=True, help='The search query')
|
|
226
|
-
search_parser.add_argument('--
|
|
227
|
-
search_parser.add_argument('--num-next-chunks', type=int, default=3, help='Number of next chunks to return for context')
|
|
226
|
+
search_parser.add_argument('--data-store-id', required=True, help='Data store ID to search')
|
|
228
227
|
search_parser.add_argument('--page-size', type=int, default=10, help='The maximum number of results to return per page')
|
|
229
228
|
search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Combine chunks into a single string')
|
|
230
229
|
search_parser.add_argument('--serving-config', default='default_serving_config', help='The serving configuration to use')
|
|
231
|
-
|
|
230
|
+
|
|
232
231
|
search_parser.set_defaults(func=discovery_engine_command)
|
|
233
232
|
|
|
234
233
|
# Search by ID and/or Date subcommand
|
|
@@ -5,12 +5,13 @@ from .discovery_engine_client import DiscoveryEngineClient
|
|
|
5
5
|
from ..components import load_memories
|
|
6
6
|
import traceback
|
|
7
7
|
|
|
8
|
-
def get_all_chunks(question:str, config:ConfigManager):
|
|
8
|
+
def get_all_chunks(question:str, config:ConfigManager, filter_str=None):
|
|
9
9
|
"""
|
|
10
10
|
Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
|
|
11
11
|
|
|
12
12
|
args: question - question to search similarity for
|
|
13
13
|
config: A ConfigManager object
|
|
14
|
+
filter_str: A filter that will restrict ai search via its metadata. See https://cloud.google.com/generative-ai-app-builder/docs/filter-search-metadata
|
|
14
15
|
|
|
15
16
|
returns: a big string of chunks
|
|
16
17
|
"""
|
|
@@ -39,7 +40,7 @@ def get_all_chunks(question:str, config:ConfigManager):
|
|
|
39
40
|
project_id = gcp_config.get('project_id')
|
|
40
41
|
serving_config = value.get('serving_config')
|
|
41
42
|
|
|
42
|
-
chunk = get_chunks(question, vector_name, num_chunks, project_id=project_id, serving_config=serving_config)
|
|
43
|
+
chunk = get_chunks(question, vector_name, num_chunks, filter_str=filter_str, project_id=project_id, serving_config=serving_config)
|
|
43
44
|
if chunk:
|
|
44
45
|
chunks.append(chunk)
|
|
45
46
|
if chunks:
|
|
@@ -48,23 +49,30 @@ def get_all_chunks(question:str, config:ConfigManager):
|
|
|
48
49
|
log.warning(f"No chunks found for {vector_name}")
|
|
49
50
|
return None
|
|
50
51
|
|
|
51
|
-
def get_chunks(question, vector_name, num_chunks, project_id=None, serving_config=None):
|
|
52
|
+
def get_chunks(question, vector_name, num_chunks, filter_str=None, project_id=None, serving_config=None):
|
|
52
53
|
if serving_config is None:
|
|
53
54
|
serving_config = "default_serving_config"
|
|
54
55
|
de = DiscoveryEngineClient(vector_name, project_id=project_id or get_gcp_project(include_config=True))
|
|
55
56
|
try:
|
|
56
|
-
|
|
57
|
+
if filter_str:
|
|
58
|
+
return de.search_with_filters(query=question,
|
|
59
|
+
filter_str=filter_str,
|
|
60
|
+
num_previous_chunks=num_chunks,
|
|
61
|
+
num_next_chunks=num_chunks)
|
|
62
|
+
else:
|
|
63
|
+
return de.get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks, serving_config=serving_config)
|
|
57
64
|
except Exception as err:
|
|
58
|
-
log.error(f"No discovery engine chunks found: {str(err)}")
|
|
65
|
+
log.error(f"No discovery engine chunks found: {str(err)} {traceback.format_exc()}")
|
|
59
66
|
|
|
60
67
|
|
|
61
68
|
|
|
62
|
-
async def async_get_all_chunks(question:str, config:ConfigManager):
|
|
69
|
+
async def async_get_all_chunks(question:str, config:ConfigManager, filter_str=None):
|
|
63
70
|
"""
|
|
64
71
|
Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
|
|
65
72
|
|
|
66
73
|
args: question - question to search similarity for
|
|
67
74
|
config: A ConfigManager object
|
|
75
|
+
filter_str: A filter that will restrict ai search via its metadata. See https://cloud.google.com/generative-ai-app-builder/docs/filter-search-metadata
|
|
68
76
|
|
|
69
77
|
returns: a big string of chunks
|
|
70
78
|
"""
|
|
@@ -87,10 +95,16 @@ async def async_get_all_chunks(question:str, config:ConfigManager):
|
|
|
87
95
|
continue
|
|
88
96
|
else:
|
|
89
97
|
vector_name = new_vector_name
|
|
90
|
-
|
|
98
|
+
|
|
99
|
+
project_id = value.get('project_id') or get_gcp_project(include_config=True)
|
|
91
100
|
num_chunks = value.get('num_chunks') or 3
|
|
92
101
|
|
|
93
|
-
chunk = await async_get_chunks(question,
|
|
102
|
+
chunk = await async_get_chunks(question,
|
|
103
|
+
vector_name=vector_name,
|
|
104
|
+
num_chunks=num_chunks,
|
|
105
|
+
filter_str=filter_str,
|
|
106
|
+
project_id=project_id)
|
|
107
|
+
|
|
94
108
|
if chunk:
|
|
95
109
|
chunks.append(chunk)
|
|
96
110
|
if chunks:
|
|
@@ -99,9 +113,12 @@ async def async_get_all_chunks(question:str, config:ConfigManager):
|
|
|
99
113
|
log.warning(f"No chunks found for {vector_name}")
|
|
100
114
|
return None
|
|
101
115
|
|
|
102
|
-
async def async_get_chunks(question, vector_name, num_chunks):
|
|
103
|
-
de = DiscoveryEngineClient(vector_name, project_id=
|
|
116
|
+
async def async_get_chunks(question, vector_name, num_chunks, filter_str, project_id=None):
|
|
117
|
+
de = DiscoveryEngineClient(vector_name, project_id=project_id)
|
|
104
118
|
try:
|
|
105
|
-
return await de.
|
|
119
|
+
return await de.search_with_filters(query=question,
|
|
120
|
+
filter_str=filter_str,
|
|
121
|
+
num_previous_chunks=num_chunks,
|
|
122
|
+
num_next_chunks=num_chunks)
|
|
106
123
|
except Exception as err:
|
|
107
124
|
log.error(f"No discovery engine chunks found: {str(err)} {traceback.format_exc()}")
|
|
@@ -38,7 +38,7 @@ sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,186
|
|
|
38
38
|
sunholo/chunker/loaders.py,sha256=5NXrMxV-WdbFpxeLhFzccw0_zhf1UQ7yKFFeaMkc9Bc,11105
|
|
39
39
|
sunholo/chunker/message_data.py,sha256=bpb8QWQttqazm5lr7fTFJ5JDwf-P0SQ5cOIf6NikNyI,10836
|
|
40
40
|
sunholo/chunker/pdfs.py,sha256=xwbuMJrbypcyPXfZ8tiUidWeMr80C2NhfTC1mwa8SHY,2477
|
|
41
|
-
sunholo/chunker/process_chunker_data.py,sha256=
|
|
41
|
+
sunholo/chunker/process_chunker_data.py,sha256=xjOAf1FvHDwQaBm7kgDzLQUnwm6AW8qf4fTrwDnwmtc,3613
|
|
42
42
|
sunholo/chunker/publish.py,sha256=IDud-NhRcEZFv9GkyWJFRKwfptIU052kSPKEx8AYW68,2943
|
|
43
43
|
sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
|
|
44
44
|
sunholo/chunker/splitter.py,sha256=RfekLPkjhCcNd1PFXIj_FxusJMJ8_3cyWl7bsYvtQ0g,7068
|
|
@@ -72,11 +72,11 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
|
|
|
72
72
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
|
75
|
-
sunholo/discovery_engine/chunker_handler.py,sha256=
|
|
76
|
-
sunholo/discovery_engine/cli.py,sha256=
|
|
75
|
+
sunholo/discovery_engine/chunker_handler.py,sha256=CYsyZLz_VZ0S_TcVlYmPxYz3JiAiPtw3tbcjvE3IFYA,6023
|
|
76
|
+
sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
|
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
|
78
78
|
sunholo/discovery_engine/discovery_engine_client.py,sha256=0h3h_Vy8vCcvfSBhGl8qSgVzDakOmsaPPFqHIGh3mv8,33053
|
|
79
|
-
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=
|
|
79
|
+
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=IDb9cm4X5XqQbUytZVQrBsm_oiSQxe3KjBAxaSZ3JyQ,5548
|
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
|
82
82
|
sunholo/embedder/embed_metadata.py,sha256=2ziUIdVwnbCUU8gOwQWEvkrRcyp-7IeyZfSsWNkMquA,866
|
|
@@ -166,9 +166,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
166
166
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
|
167
167
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
168
168
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
169
|
-
sunholo-0.119.
|
|
170
|
-
sunholo-0.119.
|
|
171
|
-
sunholo-0.119.
|
|
172
|
-
sunholo-0.119.
|
|
173
|
-
sunholo-0.119.
|
|
174
|
-
sunholo-0.119.
|
|
169
|
+
sunholo-0.119.10.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
170
|
+
sunholo-0.119.10.dist-info/METADATA,sha256=svQpxFiR6l0HgzfJ6Kb5ihFL3_3iv1DYZllOFDjGPwA,9655
|
|
171
|
+
sunholo-0.119.10.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
|
|
172
|
+
sunholo-0.119.10.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
173
|
+
sunholo-0.119.10.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
174
|
+
sunholo-0.119.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|