sunholo 0.126.2__py3-none-any.whl → 0.126.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/chunker_handler.py +21 -6
- sunholo/discovery_engine/discovery_engine_client.py +7 -2
- sunholo/embedder/embed_metadata.py +1 -1
- {sunholo-0.126.2.dist-info → sunholo-0.126.4.dist-info}/METADATA +1 -1
- {sunholo-0.126.2.dist-info → sunholo-0.126.4.dist-info}/RECORD +9 -9
- {sunholo-0.126.2.dist-info → sunholo-0.126.4.dist-info}/WHEEL +0 -0
- {sunholo-0.126.2.dist-info → sunholo-0.126.4.dist-info}/entry_points.txt +0 -0
- {sunholo-0.126.2.dist-info → sunholo-0.126.4.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.126.2.dist-info → sunholo-0.126.4.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,7 @@ from ..components import load_memories
|
|
6
6
|
from .discovery_engine_client import DiscoveryEngineClient
|
7
7
|
from .create_new import create_new_discovery_engine
|
8
8
|
from ..embedder.embed_metadata import audit_metadata
|
9
|
+
import traceback
|
9
10
|
|
10
11
|
def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=None):
|
11
12
|
"""
|
@@ -35,6 +36,7 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
35
36
|
if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
|
36
37
|
log.info(f"Found vectorstore {vectorstore}")
|
37
38
|
if value.get('read_only'):
|
39
|
+
log.info(f"{vectorstore} is read only, skipping")
|
38
40
|
continue
|
39
41
|
|
40
42
|
project_id = value.get("project_id")
|
@@ -49,6 +51,7 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
49
51
|
if not project_id:
|
50
52
|
raise ValueError("Couldn't retrieve project_id for vertex_ai_search")
|
51
53
|
|
54
|
+
log.info(f"Using {project_id} and {location} for DiscoveryEngineClient")
|
52
55
|
corpus = DiscoveryEngineClient(
|
53
56
|
data_store_id=config.vector_name,
|
54
57
|
project_id=project_id,
|
@@ -66,10 +69,13 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
66
69
|
if message_data.startswith("gs://"):
|
67
70
|
log.info(f"DiscoveryEngineClient.import_files for {message_data}")
|
68
71
|
if "/pdf_parts/" in message_data:
|
72
|
+
log.info(f"Not processing files with /pdf_parts/ - {message_data}")
|
69
73
|
return None
|
70
74
|
for corp in corpuses:
|
71
75
|
try:
|
76
|
+
|
72
77
|
metadata = audit_metadata(metadata, chunk_length=500)
|
78
|
+
log.info(f"Importing {message_data} {metadata=} to {corp}")
|
73
79
|
response = corp.import_document_with_metadata(
|
74
80
|
gcs_uri=message_data,
|
75
81
|
metadata=metadata
|
@@ -79,7 +85,7 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
79
85
|
else:
|
80
86
|
log.warning(f"Could not import {message_data} got not response")
|
81
87
|
except Exception as err:
|
82
|
-
log.error(f"Error importing {message_data} - {corp=} - {str(err)}")
|
88
|
+
log.error(f"Error importing {message_data} - {corp=} - {str(err)} {traceback.format_exc()}")
|
83
89
|
|
84
90
|
if str(err).startswith("404"):
|
85
91
|
log.info(f"Attempting to create a new DiscoveryEngine corpus: {config.vector_name}")
|
@@ -94,8 +100,8 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
94
100
|
gcs_uri=message_data,
|
95
101
|
metadata=metadata
|
96
102
|
)
|
97
|
-
|
98
|
-
|
103
|
+
else:
|
104
|
+
raise Exception(f"Error importing {message_data} - {corp=} - {str(err)}")
|
99
105
|
|
100
106
|
metadata["source"] = message_data
|
101
107
|
return metadata
|
@@ -156,9 +162,18 @@ def discovery_engine_chunker_check(message_data,
|
|
156
162
|
return metadata
|
157
163
|
|
158
164
|
if total_discovery_memories > 0:
|
159
|
-
|
160
|
-
|
161
|
-
|
165
|
+
try:
|
166
|
+
log.info(f"Process discovery engine for {metadata}")
|
167
|
+
disc_meta = do_discovery_engine(message_data, metadata, config=config)
|
168
|
+
if disc_meta is None:
|
169
|
+
log.error(f"No disc_meta found for {metadata}")
|
170
|
+
else:
|
171
|
+
log.info(f"Processed discovery engine: {disc_meta}")
|
172
|
+
except Exception as err:
|
173
|
+
log.error(f"Error processing discovery engine: {str(err)} {traceback.format_exc()}")
|
174
|
+
disc_meta = None
|
175
|
+
|
176
|
+
return disc_meta
|
162
177
|
|
163
178
|
# If discovery engine is the only entry, return
|
164
179
|
if total_discovery_memories == total_memories:
|
@@ -16,6 +16,7 @@ import asyncio
|
|
16
16
|
import json
|
17
17
|
import uuid
|
18
18
|
from ..utils.mime import guess_mime_type
|
19
|
+
import traceback
|
19
20
|
|
20
21
|
class DiscoveryEngineClient:
|
21
22
|
"""
|
@@ -80,8 +81,10 @@ class DiscoveryEngineClient:
|
|
80
81
|
self.async_search_client = discoveryengine.SearchServiceAsyncClient(client_options=client_options)
|
81
82
|
except RuntimeError:
|
82
83
|
# No event loop in non-async environment, set async client to None
|
83
|
-
log.info("No event loop detected; skipping async client initialization")
|
84
|
+
log.info("No event loop detected; skipping Discoveryengine async client initialization")
|
84
85
|
self.async_search_client = None
|
86
|
+
|
87
|
+
log.info(f"Discovery Engine client initialized with {self.project_id=}, {self.data_store_id=}, {self.location=}")
|
85
88
|
|
86
89
|
@classmethod
|
87
90
|
def my_retry(cls):
|
@@ -490,6 +493,7 @@ class DiscoveryEngineClient:
|
|
490
493
|
return doc_client.import_documents(request=request)
|
491
494
|
|
492
495
|
try:
|
496
|
+
log.debug(f"Requesting import of documents: {request=}")
|
493
497
|
operation = import_documents_with_retry(self.doc_client, request)
|
494
498
|
except ResourceExhausted as e:
|
495
499
|
log.error(f"DiscoveryEngine Operation failed after retries due to quota exceeded: {e}")
|
@@ -632,6 +636,7 @@ class DiscoveryEngineClient:
|
|
632
636
|
str: The operation name.
|
633
637
|
"""
|
634
638
|
try:
|
639
|
+
log.info(f"Importing doc with metadata: {gcs_uri=}, {metadata=}")
|
635
640
|
# 1. Generate a unique document ID
|
636
641
|
document_id = self._create_unique_gsuri_docid(gcs_uri)
|
637
642
|
|
@@ -662,7 +667,7 @@ class DiscoveryEngineClient:
|
|
662
667
|
return self._import_document_request(request)
|
663
668
|
|
664
669
|
except Exception as e:
|
665
|
-
log.error(f"Error importing document with metadata: {e}")
|
670
|
+
log.error(f"Error importing document with metadata: {e} {traceback.format_exc()}")
|
666
671
|
raise e
|
667
672
|
|
668
673
|
def get_mime_type(self, uri:str):
|
@@ -6,7 +6,7 @@ from ..utils.mime import guess_mime_type
|
|
6
6
|
|
7
7
|
from ..custom_logging import log
|
8
8
|
|
9
|
-
def audit_metadata(metadata, chunk_length=None):
|
9
|
+
def audit_metadata(metadata:dict, chunk_length:int=None) -> dict:
|
10
10
|
|
11
11
|
if 'eventTime' not in metadata:
|
12
12
|
metadata['eventTime'] = datetime.datetime.now().isoformat(timespec='microseconds') + "Z"
|
@@ -72,14 +72,14 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
|
|
72
72
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
75
|
-
sunholo/discovery_engine/chunker_handler.py,sha256=
|
75
|
+
sunholo/discovery_engine/chunker_handler.py,sha256=44qlTpdtz2GKzrhoQrxVMk-RPVFp7vQDPJoe9KmCcsw,7517
|
76
76
|
sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=8jebH3cccdGxl1XO5txjj0cA1JPgzEZmYSfv9z86UdA,37271
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
82
|
-
sunholo/embedder/embed_metadata.py,sha256=
|
82
|
+
sunholo/embedder/embed_metadata.py,sha256=qjv6oELuJRYKvR5SU5YHt-JAc_QfNOTNHbYeEXlQd1o,6617
|
83
83
|
sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
|
84
84
|
sunholo/excel/plugin.py,sha256=TJJdcKWyqEIce1agCJImvqvNp2CvLhzi4wUmLYHcLc8,4032
|
85
85
|
sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.126.
|
172
|
-
sunholo-0.126.
|
173
|
-
sunholo-0.126.
|
174
|
-
sunholo-0.126.
|
175
|
-
sunholo-0.126.
|
176
|
-
sunholo-0.126.
|
171
|
+
sunholo-0.126.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.126.4.dist-info/METADATA,sha256=RInwd4S0z69DV7OLnEaqcJV3CiQXzoJ1OOJl_AZTXBQ,10001
|
173
|
+
sunholo-0.126.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
174
|
+
sunholo-0.126.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.126.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.126.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|