sunholo 0.134.2__py3-none-any.whl → 0.134.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -198,16 +198,28 @@ def search_command(args):
198
198
  data_store_id=args.data_store_id, # Target datastore
199
199
  location=args.location
200
200
  )
201
- # This calls get_chunks which returns string or pager
202
- results_data = client.get_chunks(
203
- query=args.query,
204
- # num_previous_chunks=args.num_previous_chunks, # Ensure these args are added to parser if needed
205
- # num_next_chunks=args.num_next_chunks, # Ensure these args are added to parser if needed
206
- page_size=args.page_size,
207
- parse_chunks_to_string=args.parse_chunks_to_string,
208
- serving_config=args.serving_config,
209
- # data_store_ids=args.data_store_ids # Ensure these args are added to parser if needed
210
- )
201
+
202
+ if args.content_search_spec_type == "chunks":
203
+ # This calls get_chunks which returns string or pager
204
+ results_data = client.get_chunks(
205
+ query=args.query,
206
+ # num_previous_chunks=args.num_previous_chunks, # Ensure these args are added to parser if needed
207
+ # num_next_chunks=args.num_next_chunks, # Ensure these args are added to parser if needed
208
+ page_size=args.page_size,
209
+ parse_chunks_to_string=args.parse_chunks_to_string,
210
+ serving_config=args.serving_config,
211
+ # data_store_ids=args.data_store_ids # Ensure these args are added to parser if needed
212
+ )
213
+ elif args.content_search_spec_type == "documents":
214
+ results_data = client.get_documents(
215
+ query=args.query,
216
+ page_size=args.page_size,
217
+ parse_documents_to_string=args.parse_chunks_to_string,
218
+ serving_config=args.serving_config,
219
+ # data_store_ids=args.data_store_ids # Ensure these args are added to parser if needed
220
+ )
221
+ else:
222
+ raise ValueError("Invalid content_search_spec_type. Must be 'chunks' or 'documents'.")
211
223
 
212
224
  if args.parse_chunks_to_string:
213
225
  console.print("\n[bold magenta]--- Combined Chunk String ---[/bold magenta]")
@@ -512,8 +524,9 @@ def setup_discovery_engine_subparser(subparsers):
512
524
  search_parser.add_argument('--query', required=True, help='The search query')
513
525
  search_parser.add_argument('--data-store-id', required=True, help='Data store ID to search')
514
526
  search_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
515
- search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string')
527
+ search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string. Only applicable for "chunks"')
516
528
  search_parser.add_argument('--serving-config', default='default_config', help='Serving config ID for the data store')
529
+ search_parser.add_argument('--content_search_spec_type', default="chunks", help='"chunks" or "documents" depending on data store type')
517
530
  # Add arguments for num_previous_chunks, num_next_chunks, data_store_ids if needed
518
531
  # search_parser.add_argument('--num-previous-chunks', type=int, default=3)
519
532
  # search_parser.add_argument('--num-next-chunks', type=int, default=3)
@@ -529,6 +542,8 @@ def setup_discovery_engine_subparser(subparsers):
529
542
  search_by_id_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
530
543
  search_by_id_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string')
531
544
  search_by_id_parser.add_argument('--serving-config', default='default_config', help='Serving config ID')
545
+ search_by_id_parser.add_argument('--content_search_spec_type', default="chunks", help='"chunks" or "documents" depending on data store type')
546
+
532
547
  # Add arguments for num_previous_chunks, num_next_chunks, data_store_ids if needed
533
548
  # search_by_id_parser.add_argument('--num-previous-chunks', type=int, default=3)
534
549
  # search_by_id_parser.add_argument('--num-next-chunks', type=int, default=3)
@@ -218,16 +218,16 @@ class DiscoveryEngineClient:
218
218
 
219
219
  Args:
220
220
  query (str): The search query.
221
- collection_id (str): The ID of the collection to search.
222
221
  num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
223
222
  num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
224
223
  page_size (int, optional): The maximum number of results to return per page (default is 10).
225
224
  parse_chunks_to_string: If True will put chunks in one big string, False will return object
226
225
  serving_config: The resource name of the Search serving config
227
- data_store_ids: If you want to search over many data stores, not just the one that was used to init the class. They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
226
+ data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
227
+ They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
228
228
 
229
229
  Returns:
230
- discoveryengine.SearchResponse: The search response object containing the search results.
230
+ discoveryengine.SearchResponse or str: The search response object or string of chunks.
231
231
 
232
232
  Example:
233
233
  ```python
@@ -237,52 +237,19 @@ class DiscoveryEngineClient:
237
237
  print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
238
238
  ```
239
239
  """
240
-
241
- serving_config_path = self.search_client.serving_config_path(
242
- self.project_id,
243
- self.location,
244
- self.data_store_id,
245
- serving_config
246
- )
247
-
248
- search_request = discoveryengine.SearchRequest(
249
- serving_config=serving_config_path,
240
+ # Use search_with_filters with filter_str=None to perform a regular search
241
+ return self.search_with_filters(
250
242
  query=query,
251
- page_size=page_size,
252
- content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
253
- search_result_mode="CHUNKS",
254
- chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
255
- num_previous_chunks=num_previous_chunks,
256
- num_next_chunks=num_next_chunks,
257
- ),
258
- ),
243
+ filter_str=None,
244
+ num_previous_chunks=num_previous_chunks,
245
+ num_next_chunks=num_next_chunks,
246
+ page_size=page_size,
247
+ parse_chunks_to_string=parse_chunks_to_string,
248
+ serving_config=serving_config,
249
+ data_store_ids=data_store_ids,
250
+ content_search_spec_type="chunks"
259
251
  )
260
252
 
261
- if data_store_ids:
262
- search_request.data_store_specs = [
263
- discoveryengine.SearchRequest.DataStoreSpec(
264
- data_store=self._search_data_store_path(data_store_id, serving_config=serving_config)
265
- )
266
- for data_store_id in data_store_ids
267
- ]
268
-
269
- try:
270
- log.info(f"Discovery engine request: {search_request=}")
271
- search_response = self.search_client.search(search_request)
272
- except Exception as err:
273
- log.warning(f"Error searching {search_request=} - no results found? {str(err)}")
274
- search_response = []
275
-
276
- if parse_chunks_to_string:
277
-
278
- big_string = self.process_chunks(search_response)
279
- log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
280
-
281
- return big_string
282
-
283
- log.info("Discovery engine response object")
284
- return search_response
285
-
286
253
  async def async_get_chunks(
287
254
  self,
288
255
  query: str,
@@ -293,73 +260,32 @@ class DiscoveryEngineClient:
293
260
  serving_config: str = "default_serving_config",
294
261
  data_store_ids: Optional[List[str]] = None,
295
262
  ):
296
- """Retrieves chunks or documents based on a query.
263
+ """Asynchronously retrieves chunks or documents based on a query.
297
264
 
298
265
  Args:
299
266
  query (str): The search query.
300
- collection_id (str): The ID of the collection to search.
301
267
  num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
302
268
  num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
303
269
  page_size (int, optional): The maximum number of results to return per page (default is 10).
304
270
  parse_chunks_to_string: If True will put chunks in one big string, False will return object
305
271
  serving_config: The resource name of the Search serving config
306
- data_store_ids: If you want to search over many data stores, not just the one that was used to init the class. They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
272
+ data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
273
+ They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
307
274
 
308
275
  Returns:
309
- discoveryengine.SearchResponse: The search response object containing the search results.
310
-
311
- Example:
312
- ```python
313
- search_response = client.get_chunks('your query', 'your_collection_id')
314
- for result in search_response.results:
315
- for chunk in result.document.chunks:
316
- print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
317
- ```
276
+ discoveryengine.SearchResponse or str: The search response object or string of chunks.
318
277
  """
319
-
320
- serving_config_path = self.async_search_client.serving_config_path(
321
- self.project_id,
322
- self.location,
323
- self.data_store_id,
324
- serving_config
325
- )
326
-
327
-
328
- search_request = discoveryengine.SearchRequest(
329
- serving_config=serving_config_path,
278
+ # Use async_search_with_filters with filter_str=None to perform a regular search
279
+ return await self.async_search_with_filters(
330
280
  query=query,
331
- page_size=page_size,
332
- content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
333
- search_result_mode="CHUNKS",
334
- chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
335
- num_previous_chunks=num_previous_chunks,
336
- num_next_chunks=num_next_chunks,
337
- ),
338
- ),
281
+ filter_str=None,
282
+ num_previous_chunks=num_previous_chunks,
283
+ num_next_chunks=num_next_chunks,
284
+ page_size=page_size,
285
+ parse_chunks_to_string=parse_chunks_to_string,
286
+ serving_config=serving_config,
287
+ data_store_ids=data_store_ids
339
288
  )
340
-
341
- if data_store_ids:
342
- search_request.data_store_specs = [
343
- discoveryengine.SearchRequest.DataStoreSpec(data_store=data_store_id)
344
- for data_store_id in data_store_ids
345
- ]
346
-
347
- try:
348
- log.info(f"Discovery engine request: {search_request=}")
349
- search_response = self.async_search_client.search(search_request)
350
- except Exception as err:
351
- log.warning(f"Error searching {search_request=} - no results found? {str(err)}")
352
- search_response = []
353
-
354
- if parse_chunks_to_string:
355
-
356
- big_string = await self.async_process_chunks(search_response)
357
- log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
358
-
359
- return big_string
360
-
361
- log.info("Discovery engine response object")
362
- return search_response
363
289
 
364
290
  def chunk_format(self, chunk):
365
291
 
@@ -432,6 +358,79 @@ class DiscoveryEngineClient:
432
358
  result_string = "\n".join(all_chunks)
433
359
 
434
360
  return result_string
361
+
362
+ def get_documents(
363
+ self,
364
+ query: str,
365
+ page_size: int = 10,
366
+ parse_documents_to_string: bool = True,
367
+ serving_config: str = "default_serving_config",
368
+ data_store_ids: Optional[List[str]] = None,
369
+ ):
370
+ """Retrieves entire documents based on a query.
371
+
372
+ Args:
373
+ query (str): The search query.
374
+ page_size (int, optional): The maximum number of results to return per page (default is 10).
375
+ parse_documents_to_string: If True will put documents in one big string, False will return object
376
+ serving_config: The resource name of the Search serving config
377
+ data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
378
+ They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
379
+
380
+ Returns:
381
+ discoveryengine.SearchResponse or str: The search response object or string of documents.
382
+
383
+ Example:
384
+ ```python
385
+ search_response = client.get_documents('your query')
386
+ for result in search_response.results:
387
+ doc = result.document
388
+ print(f"Document: {doc.name}, Title: {doc.derived_struct_data.get('title')}")
389
+ ```
390
+ """
391
+ # Use search_with_filters with content_search_spec_type="documents" to get documents instead of chunks
392
+ return self.search_with_filters(
393
+ query=query,
394
+ filter_str=None,
395
+ page_size=page_size,
396
+ parse_chunks_to_string=parse_documents_to_string,
397
+ serving_config=serving_config,
398
+ data_store_ids=data_store_ids,
399
+ content_search_spec_type="documents"
400
+ )
401
+
402
+ async def async_get_documents(
403
+ self,
404
+ query: str,
405
+ page_size: int = 10,
406
+ parse_documents_to_string: bool = True,
407
+ serving_config: str = "default_serving_config",
408
+ data_store_ids: Optional[List[str]] = None,
409
+ ):
410
+ """Asynchronously retrieves entire documents based on a query.
411
+
412
+ Args:
413
+ query (str): The search query.
414
+ page_size (int, optional): The maximum number of results to return per page (default is 10).
415
+ parse_documents_to_string: If True will put documents in one big string, False will return object
416
+ serving_config: The resource name of the Search serving config
417
+ data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
418
+ They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
419
+
420
+ Returns:
421
+ discoveryengine.SearchResponse or str: The search response object or string of documents.
422
+ """
423
+ # Note: You'll need to update async_search_with_filters to handle content_search_spec_type
424
+ # as it doesn't currently have that parameter
425
+ return await self.async_search_with_filters(
426
+ query=query,
427
+ filter_str=None,
428
+ page_size=page_size,
429
+ parse_chunks_to_string=parse_documents_to_string,
430
+ serving_config=serving_config,
431
+ data_store_ids=data_store_ids,
432
+ content_search_spec_type="documents"
433
+ )
435
434
 
436
435
  def create_engine(self,
437
436
  engine_id: str,
@@ -693,7 +692,8 @@ class DiscoveryEngineClient:
693
692
  num_previous_chunks=3, num_next_chunks=3,
694
693
  page_size=10, parse_chunks_to_string=True,
695
694
  serving_config="default_serving_config",
696
- data_store_ids: Optional[List[str]] = None):
695
+ data_store_ids: Optional[List[str]] = None,
696
+ content_search_spec_type="chunks"):
697
697
  """
698
698
  Searches with a generic filter string.
699
699
 
@@ -713,17 +713,26 @@ class DiscoveryEngineClient:
713
713
  serving_config
714
714
  )
715
715
 
716
+ if content_search_spec_type == "chunks":
717
+ content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
718
+ search_result_mode="CHUNKS",
719
+ chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
720
+ num_previous_chunks=num_previous_chunks,
721
+ num_next_chunks=num_next_chunks,
722
+ ),
723
+ )
724
+ elif content_search_spec_type == "documents":
725
+ content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
726
+ search_result_mode="DOCUMENTS"
727
+ )
728
+ else:
729
+ raise ValueError(f"Unknown content_search_spec_type={content_search_spec_type}")
730
+
716
731
  search_request = discoveryengine.SearchRequest(
717
732
  serving_config=serving_config_path,
718
733
  query=query,
719
734
  page_size=page_size,
720
- content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
721
- search_result_mode="CHUNKS",
722
- chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
723
- num_previous_chunks=num_previous_chunks,
724
- num_next_chunks=num_next_chunks,
725
- ),
726
- ),
735
+ content_search_spec=content_search_spec,
727
736
  filter=filter_str # name:'ANY("king kong")'
728
737
  )
729
738
 
@@ -756,7 +765,8 @@ class DiscoveryEngineClient:
756
765
  num_previous_chunks=3, num_next_chunks=3,
757
766
  page_size=10, parse_chunks_to_string=True,
758
767
  serving_config="default_serving_config",
759
- data_store_ids: Optional[List[str]] = None):
768
+ data_store_ids: Optional[List[str]] = None,
769
+ content_search_spec_type="chunks"):
760
770
  """
761
771
  Searches with a generic filter string asynchronously.
762
772
 
@@ -776,17 +786,26 @@ class DiscoveryEngineClient:
776
786
  serving_config
777
787
  )
778
788
 
789
+ if content_search_spec_type == "chunks":
790
+ content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
791
+ search_result_mode="CHUNKS",
792
+ chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
793
+ num_previous_chunks=num_previous_chunks,
794
+ num_next_chunks=num_next_chunks,
795
+ ),
796
+ )
797
+ elif content_search_spec_type == "documents":
798
+ content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
799
+ search_result_mode="DOCUMENTS"
800
+ )
801
+ else:
802
+ raise ValueError(f"Unknown content_search_spec_type={content_search_spec_type}")
803
+
779
804
  search_request = discoveryengine.SearchRequest(
780
805
  serving_config=serving_config_path,
781
806
  query=query,
782
807
  page_size=page_size,
783
- content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
784
- search_result_mode="CHUNKS",
785
- chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
786
- num_previous_chunks=num_previous_chunks,
787
- num_next_chunks=num_next_chunks,
788
- ),
789
- ),
808
+ content_search_spec=content_search_spec,
790
809
  filter=filter_str # name:'ANY("king kong")'
791
810
  )
792
811
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.134.2
3
+ Version: 0.134.3
4
4
  Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -73,9 +73,9 @@ sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUt
73
73
  sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
74
74
  sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
75
75
  sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
76
- sunholo/discovery_engine/cli.py,sha256=so8blTu708TjbyifWdZG_eju0p9L98wLq5Lpl9T-yGo,33159
76
+ sunholo/discovery_engine/cli.py,sha256=bUhCPoKrkMtdeTBHLyCZf9syVwHn5kE0yXpqDBIzmTc,34120
77
77
  sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
78
- sunholo/discovery_engine/discovery_engine_client.py,sha256=lB6D05ZOXm9Avl6hM6vJZvPZD_TzNroyBl-E5cJYWAk,52661
78
+ sunholo/discovery_engine/discovery_engine_client.py,sha256=D_OiMiMDScwC426xzgbMpAPNV9Q8xaz4y_waDeRPhVQ,54496
79
79
  sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
80
80
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
81
81
  sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.134.2.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.134.2.dist-info/METADATA,sha256=Ujn_UARqsdpkBJdyHwDnHwS38VO5l58T9vNMD6R55x8,10067
173
- sunholo-0.134.2.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
174
- sunholo-0.134.2.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.134.2.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.134.2.dist-info/RECORD,,
171
+ sunholo-0.134.3.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.134.3.dist-info/METADATA,sha256=XicSY1z4sd8PfmmNYnZyKvKYEDOMauxj-uf7WCOs328,10067
173
+ sunholo-0.134.3.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
174
+ sunholo-0.134.3.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.134.3.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.134.3.dist-info/RECORD,,