sunholo 0.119.7__py3-none-any.whl → 0.119.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/process_chunker_data.py +1 -1
- sunholo/discovery_engine/chunker_handler.py +6 -2
- sunholo/discovery_engine/cli.py +2 -3
- sunholo/discovery_engine/discovery_engine_client.py +7 -1
- {sunholo-0.119.7.dist-info → sunholo-0.119.9.dist-info}/METADATA +1 -1
- {sunholo-0.119.7.dist-info → sunholo-0.119.9.dist-info}/RECORD +10 -10
- {sunholo-0.119.7.dist-info → sunholo-0.119.9.dist-info}/WHEEL +1 -1
- {sunholo-0.119.7.dist-info → sunholo-0.119.9.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.119.7.dist-info → sunholo-0.119.9.dist-info}/entry_points.txt +0 -0
- {sunholo-0.119.7.dist-info → sunholo-0.119.9.dist-info}/top_level.txt +0 -0
|
@@ -38,7 +38,7 @@ def process_chunker_data(message_data, metadata, vector_name):
|
|
|
38
38
|
return llamacheck
|
|
39
39
|
|
|
40
40
|
# if only a discovery engine memory, return early as no other processing needed
|
|
41
|
-
discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config)
|
|
41
|
+
discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config, process=False)
|
|
42
42
|
if discovery_check:
|
|
43
43
|
return discovery_check
|
|
44
44
|
|
|
@@ -125,7 +125,11 @@ def check_write_memories(config:ConfigManager):
|
|
|
125
125
|
|
|
126
126
|
return write_mem
|
|
127
127
|
|
|
128
|
-
def discovery_engine_chunker_check(message_data,
|
|
128
|
+
def discovery_engine_chunker_check(message_data,
|
|
129
|
+
metadata,
|
|
130
|
+
vector_name:str=None,
|
|
131
|
+
config:ConfigManager=None,
|
|
132
|
+
process:bool=True):
|
|
129
133
|
|
|
130
134
|
if config is None:
|
|
131
135
|
if vector_name is None:
|
|
@@ -139,7 +143,7 @@ def discovery_engine_chunker_check(message_data, metadata, vector_name:str=None,
|
|
|
139
143
|
|
|
140
144
|
total_memories = len(check_write_memories(config))
|
|
141
145
|
llama = None
|
|
142
|
-
if check_discovery_engine_in_memory(config):
|
|
146
|
+
if check_discovery_engine_in_memory(config) and process:
|
|
143
147
|
llama = do_discovery_engine(message_data, metadata, config=config)
|
|
144
148
|
log.info(f"Processed discovery engine: {llama}")
|
|
145
149
|
|
sunholo/discovery_engine/cli.py
CHANGED
|
@@ -223,12 +223,11 @@ def setup_discovery_engine_subparser(subparsers):
|
|
|
223
223
|
# Search subcommand
|
|
224
224
|
search_parser = discovery_engine_subparsers.add_parser('search', help='Search a Discovery Engine datastore')
|
|
225
225
|
search_parser.add_argument('--query', required=True, help='The search query')
|
|
226
|
-
search_parser.add_argument('--
|
|
227
|
-
search_parser.add_argument('--num-next-chunks', type=int, default=3, help='Number of next chunks to return for context')
|
|
226
|
+
search_parser.add_argument('--data-store-id', required=True, help='Data store ID to search')
|
|
228
227
|
search_parser.add_argument('--page-size', type=int, default=10, help='The maximum number of results to return per page')
|
|
229
228
|
search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Combine chunks into a single string')
|
|
230
229
|
search_parser.add_argument('--serving-config', default='default_serving_config', help='The serving configuration to use')
|
|
231
|
-
|
|
230
|
+
|
|
232
231
|
search_parser.set_defaults(func=discovery_engine_command)
|
|
233
232
|
|
|
234
233
|
# Search by ID and/or Date subcommand
|
|
@@ -607,6 +607,12 @@ class DiscoveryEngineClient:
|
|
|
607
607
|
|
|
608
608
|
return self._import_document_request(request)
|
|
609
609
|
|
|
610
|
+
def _create_unique_gsuri_docid(self, gcs_uri:str):
|
|
611
|
+
import hashlib
|
|
612
|
+
# Create SHA-256 hash of the URI
|
|
613
|
+
hash_object = hashlib.sha256(gcs_uri.encode())
|
|
614
|
+
# Take first 16 bytes (128 bits) and encode as hex
|
|
615
|
+
return hash_object.hexdigest()[:32]
|
|
610
616
|
|
|
611
617
|
def import_document_with_metadata(self, gcs_uri: str, metadata: dict, branch="default_branch"):
|
|
612
618
|
"""
|
|
@@ -622,7 +628,7 @@ class DiscoveryEngineClient:
|
|
|
622
628
|
"""
|
|
623
629
|
try:
|
|
624
630
|
# 1. Generate a unique document ID
|
|
625
|
-
document_id =
|
|
631
|
+
document_id = self._create_unique_gsuri_docid(gcs_uri)
|
|
626
632
|
|
|
627
633
|
# 2. Create a Document object
|
|
628
634
|
parent = self.doc_client.branch_path(
|
|
@@ -38,7 +38,7 @@ sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,186
|
|
|
38
38
|
sunholo/chunker/loaders.py,sha256=5NXrMxV-WdbFpxeLhFzccw0_zhf1UQ7yKFFeaMkc9Bc,11105
|
|
39
39
|
sunholo/chunker/message_data.py,sha256=bpb8QWQttqazm5lr7fTFJ5JDwf-P0SQ5cOIf6NikNyI,10836
|
|
40
40
|
sunholo/chunker/pdfs.py,sha256=xwbuMJrbypcyPXfZ8tiUidWeMr80C2NhfTC1mwa8SHY,2477
|
|
41
|
-
sunholo/chunker/process_chunker_data.py,sha256=
|
|
41
|
+
sunholo/chunker/process_chunker_data.py,sha256=xjOAf1FvHDwQaBm7kgDzLQUnwm6AW8qf4fTrwDnwmtc,3613
|
|
42
42
|
sunholo/chunker/publish.py,sha256=IDud-NhRcEZFv9GkyWJFRKwfptIU052kSPKEx8AYW68,2943
|
|
43
43
|
sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
|
|
44
44
|
sunholo/chunker/splitter.py,sha256=RfekLPkjhCcNd1PFXIj_FxusJMJ8_3cyWl7bsYvtQ0g,7068
|
|
@@ -72,10 +72,10 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
|
|
|
72
72
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
|
75
|
-
sunholo/discovery_engine/chunker_handler.py,sha256=
|
|
76
|
-
sunholo/discovery_engine/cli.py,sha256=
|
|
75
|
+
sunholo/discovery_engine/chunker_handler.py,sha256=CYsyZLz_VZ0S_TcVlYmPxYz3JiAiPtw3tbcjvE3IFYA,6023
|
|
76
|
+
sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
|
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=0h3h_Vy8vCcvfSBhGl8qSgVzDakOmsaPPFqHIGh3mv8,33053
|
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=hsFGOQugSeTMPEaQ16XTs_D45F8NABBm2IsAEdTk7kQ,4316
|
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
|
@@ -166,9 +166,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
166
166
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
|
167
167
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
168
168
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
169
|
-
sunholo-0.119.
|
|
170
|
-
sunholo-0.119.
|
|
171
|
-
sunholo-0.119.
|
|
172
|
-
sunholo-0.119.
|
|
173
|
-
sunholo-0.119.
|
|
174
|
-
sunholo-0.119.
|
|
169
|
+
sunholo-0.119.9.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
170
|
+
sunholo-0.119.9.dist-info/METADATA,sha256=XDQ0g73OJrI5qnlk8SuhbYxaUPF7yuAZQyMUUqkZ0_4,9654
|
|
171
|
+
sunholo-0.119.9.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
|
|
172
|
+
sunholo-0.119.9.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
173
|
+
sunholo-0.119.9.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
174
|
+
sunholo-0.119.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|