sunholo 0.126.3__py3-none-any.whl → 0.126.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,7 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
36
36
  if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
37
37
  log.info(f"Found vectorstore {vectorstore}")
38
38
  if value.get('read_only'):
39
+ log.info(f"{vectorstore} is read only, skipping")
39
40
  continue
40
41
 
41
42
  project_id = value.get("project_id")
@@ -50,6 +51,7 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
50
51
  if not project_id:
51
52
  raise ValueError("Couldn't retrieve project_id for vertex_ai_search")
52
53
 
54
+ log.info(f"Using {project_id} and {location} for DiscoveryEngineClient")
53
55
  corpus = DiscoveryEngineClient(
54
56
  data_store_id=config.vector_name,
55
57
  project_id=project_id,
@@ -67,10 +69,13 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
67
69
  if message_data.startswith("gs://"):
68
70
  log.info(f"DiscoveryEngineClient.import_files for {message_data}")
69
71
  if "/pdf_parts/" in message_data:
72
+ log.info(f"Not processing files with /pdf_parts/ - {message_data}")
70
73
  return None
71
74
  for corp in corpuses:
72
75
  try:
76
+
73
77
  metadata = audit_metadata(metadata, chunk_length=500)
78
+ log.info(f"Importing {message_data} {metadata=} to {corp}")
74
79
  response = corp.import_document_with_metadata(
75
80
  gcs_uri=message_data,
76
81
  metadata=metadata
@@ -160,7 +165,10 @@ def discovery_engine_chunker_check(message_data,
160
165
  try:
161
166
  log.info(f"Process discovery engine for {metadata}")
162
167
  disc_meta = do_discovery_engine(message_data, metadata, config=config)
163
- log.info(f"Processed discovery engine: {disc_meta}")
168
+ if disc_meta is None:
169
+ log.error(f"No disc_meta found for {metadata}")
170
+ else:
171
+ log.info(f"Processed discovery engine: {disc_meta}")
164
172
  except Exception as err:
165
173
  log.error(f"Error processing discovery engine: {str(err)} {traceback.format_exc()}")
166
174
  disc_meta = None
@@ -16,6 +16,7 @@ import asyncio
16
16
  import json
17
17
  import uuid
18
18
  from ..utils.mime import guess_mime_type
19
+ import traceback
19
20
 
20
21
  class DiscoveryEngineClient:
21
22
  """
@@ -80,8 +81,10 @@ class DiscoveryEngineClient:
80
81
  self.async_search_client = discoveryengine.SearchServiceAsyncClient(client_options=client_options)
81
82
  except RuntimeError:
82
83
  # No event loop in non-async environment, set async client to None
83
- log.info("No event loop detected; skipping async client initialization")
84
+ log.info("No event loop detected; skipping Discoveryengine async client initialization")
84
85
  self.async_search_client = None
86
+
87
+ log.info(f"Discovery Engine client initialized with {self.project_id=}, {self.data_store_id=}, {self.location=}")
85
88
 
86
89
  @classmethod
87
90
  def my_retry(cls):
@@ -490,6 +493,7 @@ class DiscoveryEngineClient:
490
493
  return doc_client.import_documents(request=request)
491
494
 
492
495
  try:
496
+ log.debug(f"Requesting import of documents: {request=}")
493
497
  operation = import_documents_with_retry(self.doc_client, request)
494
498
  except ResourceExhausted as e:
495
499
  log.error(f"DiscoveryEngine Operation failed after retries due to quota exceeded: {e}")
@@ -632,6 +636,7 @@ class DiscoveryEngineClient:
632
636
  str: The operation name.
633
637
  """
634
638
  try:
639
+ log.info(f"Importing doc with metadata: {gcs_uri=}, {metadata=}")
635
640
  # 1. Generate a unique document ID
636
641
  document_id = self._create_unique_gsuri_docid(gcs_uri)
637
642
 
@@ -662,7 +667,7 @@ class DiscoveryEngineClient:
662
667
  return self._import_document_request(request)
663
668
 
664
669
  except Exception as e:
665
- log.error(f"Error importing document with metadata: {e}")
670
+ log.error(f"Error importing document with metadata: {e} {traceback.format_exc()}")
666
671
  raise e
667
672
 
668
673
  def get_mime_type(self, uri:str):
@@ -6,7 +6,7 @@ from ..utils.mime import guess_mime_type
6
6
 
7
7
  from ..custom_logging import log
8
8
 
9
- def audit_metadata(metadata, chunk_length=None):
9
+ def audit_metadata(metadata:dict, chunk_length:int=None) -> dict:
10
10
 
11
11
  if 'eventTime' not in metadata:
12
12
  metadata['eventTime'] = datetime.datetime.now().isoformat(timespec='microseconds') + "Z"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.126.3
3
+ Version: 0.126.4
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -72,14 +72,14 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
72
72
  sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
73
73
  sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
74
74
  sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
75
- sunholo/discovery_engine/chunker_handler.py,sha256=3KL33prRvN_fwaiZpOIXzxpKFUJdJiPQRZzhxYqunj0,7065
75
+ sunholo/discovery_engine/chunker_handler.py,sha256=44qlTpdtz2GKzrhoQrxVMk-RPVFp7vQDPJoe9KmCcsw,7517
76
76
  sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
77
77
  sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
78
- sunholo/discovery_engine/discovery_engine_client.py,sha256=NjIcP10I2-8yj6QZKrxGzNbh3SqQ5vGYsq9OwxCpWas,36935
78
+ sunholo/discovery_engine/discovery_engine_client.py,sha256=8jebH3cccdGxl1XO5txjj0cA1JPgzEZmYSfv9z86UdA,37271
79
79
  sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
80
80
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
81
81
  sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
82
- sunholo/embedder/embed_metadata.py,sha256=h9_L3Mkd7Mtnr8OwV4nNRrdSKoxhqh9LnSsht6j-vIY,6600
82
+ sunholo/embedder/embed_metadata.py,sha256=qjv6oELuJRYKvR5SU5YHt-JAc_QfNOTNHbYeEXlQd1o,6617
83
83
  sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
84
84
  sunholo/excel/plugin.py,sha256=TJJdcKWyqEIce1agCJImvqvNp2CvLhzi4wUmLYHcLc8,4032
85
85
  sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.126.3.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.126.3.dist-info/METADATA,sha256=_ekEbwlgG0P-PXQWNNfe9JnJDNDU_zXQ7ehPUNZprTM,10001
173
- sunholo-0.126.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
174
- sunholo-0.126.3.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.126.3.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.126.3.dist-info/RECORD,,
171
+ sunholo-0.126.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.126.4.dist-info/METADATA,sha256=RInwd4S0z69DV7OLnEaqcJV3CiQXzoJ1OOJl_AZTXBQ,10001
173
+ sunholo-0.126.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
174
+ sunholo-0.126.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.126.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.126.4.dist-info/RECORD,,