sunholo 0.126.3__py3-none-any.whl → 0.126.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/chunker_handler.py +9 -1
- sunholo/discovery_engine/discovery_engine_client.py +7 -2
- sunholo/embedder/embed_metadata.py +1 -1
- {sunholo-0.126.3.dist-info → sunholo-0.126.4.dist-info}/METADATA +1 -1
- {sunholo-0.126.3.dist-info → sunholo-0.126.4.dist-info}/RECORD +9 -9
- {sunholo-0.126.3.dist-info → sunholo-0.126.4.dist-info}/WHEEL +0 -0
- {sunholo-0.126.3.dist-info → sunholo-0.126.4.dist-info}/entry_points.txt +0 -0
- {sunholo-0.126.3.dist-info → sunholo-0.126.4.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.126.3.dist-info → sunholo-0.126.4.dist-info}/top_level.txt +0 -0
@@ -36,6 +36,7 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
36
36
|
if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
|
37
37
|
log.info(f"Found vectorstore {vectorstore}")
|
38
38
|
if value.get('read_only'):
|
39
|
+
log.info(f"{vectorstore} is read only, skipping")
|
39
40
|
continue
|
40
41
|
|
41
42
|
project_id = value.get("project_id")
|
@@ -50,6 +51,7 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
50
51
|
if not project_id:
|
51
52
|
raise ValueError("Couldn't retrieve project_id for vertex_ai_search")
|
52
53
|
|
54
|
+
log.info(f"Using {project_id} and {location} for DiscoveryEngineClient")
|
53
55
|
corpus = DiscoveryEngineClient(
|
54
56
|
data_store_id=config.vector_name,
|
55
57
|
project_id=project_id,
|
@@ -67,10 +69,13 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
67
69
|
if message_data.startswith("gs://"):
|
68
70
|
log.info(f"DiscoveryEngineClient.import_files for {message_data}")
|
69
71
|
if "/pdf_parts/" in message_data:
|
72
|
+
log.info(f"Not processing files with /pdf_parts/ - {message_data}")
|
70
73
|
return None
|
71
74
|
for corp in corpuses:
|
72
75
|
try:
|
76
|
+
|
73
77
|
metadata = audit_metadata(metadata, chunk_length=500)
|
78
|
+
log.info(f"Importing {message_data} {metadata=} to {corp}")
|
74
79
|
response = corp.import_document_with_metadata(
|
75
80
|
gcs_uri=message_data,
|
76
81
|
metadata=metadata
|
@@ -160,7 +165,10 @@ def discovery_engine_chunker_check(message_data,
|
|
160
165
|
try:
|
161
166
|
log.info(f"Process discovery engine for {metadata}")
|
162
167
|
disc_meta = do_discovery_engine(message_data, metadata, config=config)
|
163
|
-
|
168
|
+
if disc_meta is None:
|
169
|
+
log.error(f"No disc_meta found for {metadata}")
|
170
|
+
else:
|
171
|
+
log.info(f"Processed discovery engine: {disc_meta}")
|
164
172
|
except Exception as err:
|
165
173
|
log.error(f"Error processing discovery engine: {str(err)} {traceback.format_exc()}")
|
166
174
|
disc_meta = None
|
@@ -16,6 +16,7 @@ import asyncio
|
|
16
16
|
import json
|
17
17
|
import uuid
|
18
18
|
from ..utils.mime import guess_mime_type
|
19
|
+
import traceback
|
19
20
|
|
20
21
|
class DiscoveryEngineClient:
|
21
22
|
"""
|
@@ -80,8 +81,10 @@ class DiscoveryEngineClient:
|
|
80
81
|
self.async_search_client = discoveryengine.SearchServiceAsyncClient(client_options=client_options)
|
81
82
|
except RuntimeError:
|
82
83
|
# No event loop in non-async environment, set async client to None
|
83
|
-
log.info("No event loop detected; skipping async client initialization")
|
84
|
+
log.info("No event loop detected; skipping Discoveryengine async client initialization")
|
84
85
|
self.async_search_client = None
|
86
|
+
|
87
|
+
log.info(f"Discovery Engine client initialized with {self.project_id=}, {self.data_store_id=}, {self.location=}")
|
85
88
|
|
86
89
|
@classmethod
|
87
90
|
def my_retry(cls):
|
@@ -490,6 +493,7 @@ class DiscoveryEngineClient:
|
|
490
493
|
return doc_client.import_documents(request=request)
|
491
494
|
|
492
495
|
try:
|
496
|
+
log.debug(f"Requesting import of documents: {request=}")
|
493
497
|
operation = import_documents_with_retry(self.doc_client, request)
|
494
498
|
except ResourceExhausted as e:
|
495
499
|
log.error(f"DiscoveryEngine Operation failed after retries due to quota exceeded: {e}")
|
@@ -632,6 +636,7 @@ class DiscoveryEngineClient:
|
|
632
636
|
str: The operation name.
|
633
637
|
"""
|
634
638
|
try:
|
639
|
+
log.info(f"Importing doc with metadata: {gcs_uri=}, {metadata=}")
|
635
640
|
# 1. Generate a unique document ID
|
636
641
|
document_id = self._create_unique_gsuri_docid(gcs_uri)
|
637
642
|
|
@@ -662,7 +667,7 @@ class DiscoveryEngineClient:
|
|
662
667
|
return self._import_document_request(request)
|
663
668
|
|
664
669
|
except Exception as e:
|
665
|
-
log.error(f"Error importing document with metadata: {e}")
|
670
|
+
log.error(f"Error importing document with metadata: {e} {traceback.format_exc()}")
|
666
671
|
raise e
|
667
672
|
|
668
673
|
def get_mime_type(self, uri:str):
|
@@ -6,7 +6,7 @@ from ..utils.mime import guess_mime_type
|
|
6
6
|
|
7
7
|
from ..custom_logging import log
|
8
8
|
|
9
|
-
def audit_metadata(metadata, chunk_length=None):
|
9
|
+
def audit_metadata(metadata:dict, chunk_length:int=None) -> dict:
|
10
10
|
|
11
11
|
if 'eventTime' not in metadata:
|
12
12
|
metadata['eventTime'] = datetime.datetime.now().isoformat(timespec='microseconds') + "Z"
|
@@ -72,14 +72,14 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
|
|
72
72
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
75
|
-
sunholo/discovery_engine/chunker_handler.py,sha256=
|
75
|
+
sunholo/discovery_engine/chunker_handler.py,sha256=44qlTpdtz2GKzrhoQrxVMk-RPVFp7vQDPJoe9KmCcsw,7517
|
76
76
|
sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=8jebH3cccdGxl1XO5txjj0cA1JPgzEZmYSfv9z86UdA,37271
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
82
|
-
sunholo/embedder/embed_metadata.py,sha256=
|
82
|
+
sunholo/embedder/embed_metadata.py,sha256=qjv6oELuJRYKvR5SU5YHt-JAc_QfNOTNHbYeEXlQd1o,6617
|
83
83
|
sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
|
84
84
|
sunholo/excel/plugin.py,sha256=TJJdcKWyqEIce1agCJImvqvNp2CvLhzi4wUmLYHcLc8,4032
|
85
85
|
sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.126.
|
172
|
-
sunholo-0.126.
|
173
|
-
sunholo-0.126.
|
174
|
-
sunholo-0.126.
|
175
|
-
sunholo-0.126.
|
176
|
-
sunholo-0.126.
|
171
|
+
sunholo-0.126.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.126.4.dist-info/METADATA,sha256=RInwd4S0z69DV7OLnEaqcJV3CiQXzoJ1OOJl_AZTXBQ,10001
|
173
|
+
sunholo-0.126.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
174
|
+
sunholo-0.126.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.126.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.126.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|