sunholo 0.134.2__py3-none-any.whl → 0.134.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/cli.py +26 -11
- sunholo/discovery_engine/discovery_engine_client.py +238 -126
- {sunholo-0.134.2.dist-info → sunholo-0.134.4.dist-info}/METADATA +1 -1
- {sunholo-0.134.2.dist-info → sunholo-0.134.4.dist-info}/RECORD +8 -8
- {sunholo-0.134.2.dist-info → sunholo-0.134.4.dist-info}/WHEEL +0 -0
- {sunholo-0.134.2.dist-info → sunholo-0.134.4.dist-info}/entry_points.txt +0 -0
- {sunholo-0.134.2.dist-info → sunholo-0.134.4.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.134.2.dist-info → sunholo-0.134.4.dist-info}/top_level.txt +0 -0
sunholo/discovery_engine/cli.py
CHANGED
@@ -198,16 +198,28 @@ def search_command(args):
|
|
198
198
|
data_store_id=args.data_store_id, # Target datastore
|
199
199
|
location=args.location
|
200
200
|
)
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
201
|
+
|
202
|
+
if args.content_search_spec_type == "chunks":
|
203
|
+
# This calls get_chunks which returns string or pager
|
204
|
+
results_data = client.get_chunks(
|
205
|
+
query=args.query,
|
206
|
+
# num_previous_chunks=args.num_previous_chunks, # Ensure these args are added to parser if needed
|
207
|
+
# num_next_chunks=args.num_next_chunks, # Ensure these args are added to parser if needed
|
208
|
+
page_size=args.page_size,
|
209
|
+
parse_chunks_to_string=args.parse_chunks_to_string,
|
210
|
+
serving_config=args.serving_config,
|
211
|
+
# data_store_ids=args.data_store_ids # Ensure these args are added to parser if needed
|
212
|
+
)
|
213
|
+
elif args.content_search_spec_type == "documents":
|
214
|
+
results_data = client.get_documents(
|
215
|
+
query=args.query,
|
216
|
+
page_size=args.page_size,
|
217
|
+
parse_documents_to_string=args.parse_chunks_to_string,
|
218
|
+
serving_config=args.serving_config,
|
219
|
+
# data_store_ids=args.data_store_ids # Ensure these args are added to parser if needed
|
220
|
+
)
|
221
|
+
else:
|
222
|
+
raise ValueError("Invalid content_search_spec_type. Must be 'chunks' or 'documents'.")
|
211
223
|
|
212
224
|
if args.parse_chunks_to_string:
|
213
225
|
console.print("\n[bold magenta]--- Combined Chunk String ---[/bold magenta]")
|
@@ -512,8 +524,9 @@ def setup_discovery_engine_subparser(subparsers):
|
|
512
524
|
search_parser.add_argument('--query', required=True, help='The search query')
|
513
525
|
search_parser.add_argument('--data-store-id', required=True, help='Data store ID to search')
|
514
526
|
search_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
|
515
|
-
search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string')
|
527
|
+
search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string. Only applicable for "chunks"')
|
516
528
|
search_parser.add_argument('--serving-config', default='default_config', help='Serving config ID for the data store')
|
529
|
+
search_parser.add_argument('--content_search_spec_type', default="chunks", help='"chunks" or "documents" depending on data store type')
|
517
530
|
# Add arguments for num_previous_chunks, num_next_chunks, data_store_ids if needed
|
518
531
|
# search_parser.add_argument('--num-previous-chunks', type=int, default=3)
|
519
532
|
# search_parser.add_argument('--num-next-chunks', type=int, default=3)
|
@@ -529,6 +542,8 @@ def setup_discovery_engine_subparser(subparsers):
|
|
529
542
|
search_by_id_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
|
530
543
|
search_by_id_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string')
|
531
544
|
search_by_id_parser.add_argument('--serving-config', default='default_config', help='Serving config ID')
|
545
|
+
search_by_id_parser.add_argument('--content_search_spec_type', default="chunks", help='"chunks" or "documents" depending on data store type')
|
546
|
+
|
532
547
|
# Add arguments for num_previous_chunks, num_next_chunks, data_store_ids if needed
|
533
548
|
# search_by_id_parser.add_argument('--num-previous-chunks', type=int, default=3)
|
534
549
|
# search_by_id_parser.add_argument('--num-next-chunks', type=int, default=3)
|
@@ -218,16 +218,16 @@ class DiscoveryEngineClient:
|
|
218
218
|
|
219
219
|
Args:
|
220
220
|
query (str): The search query.
|
221
|
-
collection_id (str): The ID of the collection to search.
|
222
221
|
num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
|
223
222
|
num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
|
224
223
|
page_size (int, optional): The maximum number of results to return per page (default is 10).
|
225
224
|
parse_chunks_to_string: If True will put chunks in one big string, False will return object
|
226
225
|
serving_config: The resource name of the Search serving config
|
227
|
-
data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
|
226
|
+
data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
|
227
|
+
They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
|
228
228
|
|
229
229
|
Returns:
|
230
|
-
discoveryengine.SearchResponse: The search response object
|
230
|
+
discoveryengine.SearchResponse or str: The search response object or string of chunks.
|
231
231
|
|
232
232
|
Example:
|
233
233
|
```python
|
@@ -237,52 +237,19 @@ class DiscoveryEngineClient:
|
|
237
237
|
print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
|
238
238
|
```
|
239
239
|
"""
|
240
|
-
|
241
|
-
|
242
|
-
self.project_id,
|
243
|
-
self.location,
|
244
|
-
self.data_store_id,
|
245
|
-
serving_config
|
246
|
-
)
|
247
|
-
|
248
|
-
search_request = discoveryengine.SearchRequest(
|
249
|
-
serving_config=serving_config_path,
|
240
|
+
# Use search_with_filters with filter_str=None to perform a regular search
|
241
|
+
return self.search_with_filters(
|
250
242
|
query=query,
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
243
|
+
filter_str=None,
|
244
|
+
num_previous_chunks=num_previous_chunks,
|
245
|
+
num_next_chunks=num_next_chunks,
|
246
|
+
page_size=page_size,
|
247
|
+
parse_chunks_to_string=parse_chunks_to_string,
|
248
|
+
serving_config=serving_config,
|
249
|
+
data_store_ids=data_store_ids,
|
250
|
+
content_search_spec_type="chunks"
|
259
251
|
)
|
260
252
|
|
261
|
-
if data_store_ids:
|
262
|
-
search_request.data_store_specs = [
|
263
|
-
discoveryengine.SearchRequest.DataStoreSpec(
|
264
|
-
data_store=self._search_data_store_path(data_store_id, serving_config=serving_config)
|
265
|
-
)
|
266
|
-
for data_store_id in data_store_ids
|
267
|
-
]
|
268
|
-
|
269
|
-
try:
|
270
|
-
log.info(f"Discovery engine request: {search_request=}")
|
271
|
-
search_response = self.search_client.search(search_request)
|
272
|
-
except Exception as err:
|
273
|
-
log.warning(f"Error searching {search_request=} - no results found? {str(err)}")
|
274
|
-
search_response = []
|
275
|
-
|
276
|
-
if parse_chunks_to_string:
|
277
|
-
|
278
|
-
big_string = self.process_chunks(search_response)
|
279
|
-
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
280
|
-
|
281
|
-
return big_string
|
282
|
-
|
283
|
-
log.info("Discovery engine response object")
|
284
|
-
return search_response
|
285
|
-
|
286
253
|
async def async_get_chunks(
|
287
254
|
self,
|
288
255
|
query: str,
|
@@ -293,73 +260,32 @@ class DiscoveryEngineClient:
|
|
293
260
|
serving_config: str = "default_serving_config",
|
294
261
|
data_store_ids: Optional[List[str]] = None,
|
295
262
|
):
|
296
|
-
"""
|
263
|
+
"""Asynchronously retrieves chunks or documents based on a query.
|
297
264
|
|
298
265
|
Args:
|
299
266
|
query (str): The search query.
|
300
|
-
collection_id (str): The ID of the collection to search.
|
301
267
|
num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
|
302
268
|
num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
|
303
269
|
page_size (int, optional): The maximum number of results to return per page (default is 10).
|
304
270
|
parse_chunks_to_string: If True will put chunks in one big string, False will return object
|
305
271
|
serving_config: The resource name of the Search serving config
|
306
|
-
data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
|
272
|
+
data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
|
273
|
+
They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
|
307
274
|
|
308
275
|
Returns:
|
309
|
-
discoveryengine.SearchResponse: The search response object
|
310
|
-
|
311
|
-
Example:
|
312
|
-
```python
|
313
|
-
search_response = client.get_chunks('your query', 'your_collection_id')
|
314
|
-
for result in search_response.results:
|
315
|
-
for chunk in result.document.chunks:
|
316
|
-
print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
|
317
|
-
```
|
276
|
+
discoveryengine.SearchResponse or str: The search response object or string of chunks.
|
318
277
|
"""
|
319
|
-
|
320
|
-
|
321
|
-
self.project_id,
|
322
|
-
self.location,
|
323
|
-
self.data_store_id,
|
324
|
-
serving_config
|
325
|
-
)
|
326
|
-
|
327
|
-
|
328
|
-
search_request = discoveryengine.SearchRequest(
|
329
|
-
serving_config=serving_config_path,
|
278
|
+
# Use async_search_with_filters with filter_str=None to perform a regular search
|
279
|
+
return await self.async_search_with_filters(
|
330
280
|
query=query,
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
),
|
281
|
+
filter_str=None,
|
282
|
+
num_previous_chunks=num_previous_chunks,
|
283
|
+
num_next_chunks=num_next_chunks,
|
284
|
+
page_size=page_size,
|
285
|
+
parse_chunks_to_string=parse_chunks_to_string,
|
286
|
+
serving_config=serving_config,
|
287
|
+
data_store_ids=data_store_ids
|
339
288
|
)
|
340
|
-
|
341
|
-
if data_store_ids:
|
342
|
-
search_request.data_store_specs = [
|
343
|
-
discoveryengine.SearchRequest.DataStoreSpec(data_store=data_store_id)
|
344
|
-
for data_store_id in data_store_ids
|
345
|
-
]
|
346
|
-
|
347
|
-
try:
|
348
|
-
log.info(f"Discovery engine request: {search_request=}")
|
349
|
-
search_response = self.async_search_client.search(search_request)
|
350
|
-
except Exception as err:
|
351
|
-
log.warning(f"Error searching {search_request=} - no results found? {str(err)}")
|
352
|
-
search_response = []
|
353
|
-
|
354
|
-
if parse_chunks_to_string:
|
355
|
-
|
356
|
-
big_string = await self.async_process_chunks(search_response)
|
357
|
-
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
358
|
-
|
359
|
-
return big_string
|
360
|
-
|
361
|
-
log.info("Discovery engine response object")
|
362
|
-
return search_response
|
363
289
|
|
364
290
|
def chunk_format(self, chunk):
|
365
291
|
|
@@ -383,8 +309,11 @@ class DiscoveryEngineClient:
|
|
383
309
|
|
384
310
|
# Iterate through each result in the response
|
385
311
|
for result in response.results:
|
386
|
-
|
387
|
-
|
312
|
+
if hasattr(result, 'chunk'):
|
313
|
+
chunk = result.chunk
|
314
|
+
chunk_metadata = chunk.ChunkMetadata
|
315
|
+
else:
|
316
|
+
log.warning("No chunk found in result")
|
388
317
|
|
389
318
|
if hasattr(chunk_metadata, 'previous_chunks'):
|
390
319
|
# Process previous chunks
|
@@ -413,8 +342,11 @@ class DiscoveryEngineClient:
|
|
413
342
|
|
414
343
|
# Iterate through each result in the response
|
415
344
|
for result in response.results:
|
416
|
-
|
417
|
-
|
345
|
+
if hasattr(result, 'chunk'):
|
346
|
+
chunk = result.chunk
|
347
|
+
chunk_metadata = chunk.ChunkMetadata
|
348
|
+
else:
|
349
|
+
log.warning("No chunk found in result")
|
418
350
|
|
419
351
|
if hasattr(chunk_metadata, 'previous_chunks'):
|
420
352
|
# Process previous chunks
|
@@ -432,6 +364,156 @@ class DiscoveryEngineClient:
|
|
432
364
|
result_string = "\n".join(all_chunks)
|
433
365
|
|
434
366
|
return result_string
|
367
|
+
|
368
|
+
def get_documents(
|
369
|
+
self,
|
370
|
+
query: str,
|
371
|
+
page_size: int = 10,
|
372
|
+
parse_documents_to_string: bool = True,
|
373
|
+
serving_config: str = "default_serving_config",
|
374
|
+
data_store_ids: Optional[List[str]] = None,
|
375
|
+
):
|
376
|
+
"""Retrieves entire documents based on a query.
|
377
|
+
|
378
|
+
Args:
|
379
|
+
query (str): The search query.
|
380
|
+
page_size (int, optional): The maximum number of results to return per page (default is 10).
|
381
|
+
parse_documents_to_string: If True will put documents in one big string, False will return object
|
382
|
+
serving_config: The resource name of the Search serving config
|
383
|
+
data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
|
384
|
+
They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
|
385
|
+
|
386
|
+
Returns:
|
387
|
+
discoveryengine.SearchResponse or str: The search response object or string of documents.
|
388
|
+
|
389
|
+
Example:
|
390
|
+
```python
|
391
|
+
search_response = client.get_documents('your query')
|
392
|
+
for result in search_response.results:
|
393
|
+
doc = result.document
|
394
|
+
print(f"Document: {doc.name}, Title: {doc.derived_struct_data.get('title')}")
|
395
|
+
```
|
396
|
+
"""
|
397
|
+
# Use search_with_filters with content_search_spec_type="documents" to get documents instead of chunks
|
398
|
+
return self.search_with_filters(
|
399
|
+
query=query,
|
400
|
+
filter_str=None,
|
401
|
+
page_size=page_size,
|
402
|
+
parse_chunks_to_string=parse_documents_to_string,
|
403
|
+
serving_config=serving_config,
|
404
|
+
data_store_ids=data_store_ids,
|
405
|
+
content_search_spec_type="documents"
|
406
|
+
)
|
407
|
+
|
408
|
+
async def async_get_documents(
|
409
|
+
self,
|
410
|
+
query: str,
|
411
|
+
page_size: int = 10,
|
412
|
+
parse_documents_to_string: bool = True,
|
413
|
+
serving_config: str = "default_serving_config",
|
414
|
+
data_store_ids: Optional[List[str]] = None,
|
415
|
+
):
|
416
|
+
"""Asynchronously retrieves entire documents based on a query.
|
417
|
+
|
418
|
+
Args:
|
419
|
+
query (str): The search query.
|
420
|
+
page_size (int, optional): The maximum number of results to return per page (default is 10).
|
421
|
+
parse_documents_to_string: If True will put documents in one big string, False will return object
|
422
|
+
serving_config: The resource name of the Search serving config
|
423
|
+
data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
|
424
|
+
They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
|
425
|
+
|
426
|
+
Returns:
|
427
|
+
discoveryengine.SearchResponse or str: The search response object or string of documents.
|
428
|
+
"""
|
429
|
+
# Note: You'll need to update async_search_with_filters to handle content_search_spec_type
|
430
|
+
# as it doesn't currently have that parameter
|
431
|
+
return await self.async_search_with_filters(
|
432
|
+
query=query,
|
433
|
+
filter_str=None,
|
434
|
+
page_size=page_size,
|
435
|
+
parse_chunks_to_string=parse_documents_to_string,
|
436
|
+
serving_config=serving_config,
|
437
|
+
data_store_ids=data_store_ids,
|
438
|
+
content_search_spec_type="documents"
|
439
|
+
)
|
440
|
+
|
441
|
+
def document_format(self, document):
|
442
|
+
"""Format a document for string output."""
|
443
|
+
# Extract useful fields from the document
|
444
|
+
document_id = document.id
|
445
|
+
document_name = document.name
|
446
|
+
|
447
|
+
# Get content if available
|
448
|
+
content = ""
|
449
|
+
if hasattr(document, 'content') and document.content:
|
450
|
+
if hasattr(document.content, 'uri') and document.content.uri:
|
451
|
+
content = f"Content URI: {document.content.uri}\n"
|
452
|
+
if hasattr(document.content, 'mime_type') and document.content.mime_type:
|
453
|
+
content += f"Content Type: {document.content.mime_type}\n"
|
454
|
+
|
455
|
+
# Get structured data if available
|
456
|
+
struct_data = ""
|
457
|
+
if hasattr(document, 'struct_data') and document.struct_data:
|
458
|
+
struct_data = f"Structured Data: {dict(document.struct_data)}\n"
|
459
|
+
|
460
|
+
# Get derived structured data if available
|
461
|
+
derived_data = ""
|
462
|
+
if hasattr(document, 'derived_struct_data') and document.derived_struct_data:
|
463
|
+
derived_data = f"Derived Data: {dict(document.derived_struct_data)}\n"
|
464
|
+
|
465
|
+
# Return formatted document string
|
466
|
+
return (
|
467
|
+
f"# Document: {document_id}\n"
|
468
|
+
f"Resource Name: {document_name}\n"
|
469
|
+
f"{content}"
|
470
|
+
f"{struct_data}"
|
471
|
+
f"{derived_data}"
|
472
|
+
)
|
473
|
+
|
474
|
+
def process_documents(self, response):
|
475
|
+
"""Process a search response containing documents into a formatted string."""
|
476
|
+
all_documents = []
|
477
|
+
|
478
|
+
# Check if the response contains results
|
479
|
+
if not hasattr(response, 'results') or not response.results:
|
480
|
+
log.info(f'No results found in response: {response=}')
|
481
|
+
return []
|
482
|
+
|
483
|
+
# Iterate through each result in the response
|
484
|
+
for result in response.results:
|
485
|
+
if hasattr(result, 'document'):
|
486
|
+
document = result.document
|
487
|
+
all_documents.append(self.document_format(document))
|
488
|
+
else:
|
489
|
+
log.warning("No document found in result")
|
490
|
+
|
491
|
+
# Combine all documents into one long string
|
492
|
+
result_string = "\n\n".join(all_documents)
|
493
|
+
|
494
|
+
return result_string
|
495
|
+
|
496
|
+
async def async_process_documents(self, response):
|
497
|
+
"""Process a search response containing documents into a formatted string asynchronously."""
|
498
|
+
all_documents = []
|
499
|
+
|
500
|
+
# Check if the response contains results
|
501
|
+
if not hasattr(response, 'results') or not response.results:
|
502
|
+
log.info(f'No results found in response: {response=}')
|
503
|
+
return []
|
504
|
+
|
505
|
+
# Iterate through each result in the response
|
506
|
+
for result in response.results:
|
507
|
+
if hasattr(result, 'document'):
|
508
|
+
document = result.document
|
509
|
+
all_documents.append(self.document_format(document))
|
510
|
+
else:
|
511
|
+
log.warning("No document found in result")
|
512
|
+
|
513
|
+
# Combine all documents into one long string
|
514
|
+
result_string = "\n\n".join(all_documents)
|
515
|
+
|
516
|
+
return result_string
|
435
517
|
|
436
518
|
def create_engine(self,
|
437
519
|
engine_id: str,
|
@@ -693,7 +775,8 @@ class DiscoveryEngineClient:
|
|
693
775
|
num_previous_chunks=3, num_next_chunks=3,
|
694
776
|
page_size=10, parse_chunks_to_string=True,
|
695
777
|
serving_config="default_serving_config",
|
696
|
-
data_store_ids: Optional[List[str]] = None
|
778
|
+
data_store_ids: Optional[List[str]] = None,
|
779
|
+
content_search_spec_type="chunks"):
|
697
780
|
"""
|
698
781
|
Searches with a generic filter string.
|
699
782
|
|
@@ -713,17 +796,26 @@ class DiscoveryEngineClient:
|
|
713
796
|
serving_config
|
714
797
|
)
|
715
798
|
|
799
|
+
if content_search_spec_type == "chunks":
|
800
|
+
content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
|
801
|
+
search_result_mode="CHUNKS",
|
802
|
+
chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
|
803
|
+
num_previous_chunks=num_previous_chunks,
|
804
|
+
num_next_chunks=num_next_chunks,
|
805
|
+
),
|
806
|
+
)
|
807
|
+
elif content_search_spec_type == "documents":
|
808
|
+
content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
|
809
|
+
search_result_mode="DOCUMENTS"
|
810
|
+
)
|
811
|
+
else:
|
812
|
+
raise ValueError(f"Unknown content_search_spec_type={content_search_spec_type}")
|
813
|
+
|
716
814
|
search_request = discoveryengine.SearchRequest(
|
717
815
|
serving_config=serving_config_path,
|
718
816
|
query=query,
|
719
817
|
page_size=page_size,
|
720
|
-
content_search_spec=
|
721
|
-
search_result_mode="CHUNKS",
|
722
|
-
chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
|
723
|
-
num_previous_chunks=num_previous_chunks,
|
724
|
-
num_next_chunks=num_next_chunks,
|
725
|
-
),
|
726
|
-
),
|
818
|
+
content_search_spec=content_search_spec,
|
727
819
|
filter=filter_str # name:'ANY("king kong")'
|
728
820
|
)
|
729
821
|
|
@@ -744,9 +836,14 @@ class DiscoveryEngineClient:
|
|
744
836
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
745
837
|
return None
|
746
838
|
|
747
|
-
if
|
748
|
-
|
749
|
-
|
839
|
+
if content_search_spec_type=="chunks":
|
840
|
+
if parse_chunks_to_string:
|
841
|
+
big_string = self.process_chunks(search_response)
|
842
|
+
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
843
|
+
return big_string
|
844
|
+
elif content_search_spec_type=="documents":
|
845
|
+
big_string = self.process_documents(search_response)
|
846
|
+
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
750
847
|
return big_string
|
751
848
|
|
752
849
|
log.info("Discovery engine response object")
|
@@ -756,7 +853,8 @@ class DiscoveryEngineClient:
|
|
756
853
|
num_previous_chunks=3, num_next_chunks=3,
|
757
854
|
page_size=10, parse_chunks_to_string=True,
|
758
855
|
serving_config="default_serving_config",
|
759
|
-
data_store_ids: Optional[List[str]] = None
|
856
|
+
data_store_ids: Optional[List[str]] = None,
|
857
|
+
content_search_spec_type="chunks"):
|
760
858
|
"""
|
761
859
|
Searches with a generic filter string asynchronously.
|
762
860
|
|
@@ -776,17 +874,26 @@ class DiscoveryEngineClient:
|
|
776
874
|
serving_config
|
777
875
|
)
|
778
876
|
|
877
|
+
if content_search_spec_type == "chunks":
|
878
|
+
content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
|
879
|
+
search_result_mode="CHUNKS",
|
880
|
+
chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
|
881
|
+
num_previous_chunks=num_previous_chunks,
|
882
|
+
num_next_chunks=num_next_chunks,
|
883
|
+
),
|
884
|
+
)
|
885
|
+
elif content_search_spec_type == "documents":
|
886
|
+
content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
|
887
|
+
search_result_mode="DOCUMENTS"
|
888
|
+
)
|
889
|
+
else:
|
890
|
+
raise ValueError(f"Unknown content_search_spec_type={content_search_spec_type}")
|
891
|
+
|
779
892
|
search_request = discoveryengine.SearchRequest(
|
780
893
|
serving_config=serving_config_path,
|
781
894
|
query=query,
|
782
895
|
page_size=page_size,
|
783
|
-
content_search_spec=
|
784
|
-
search_result_mode="CHUNKS",
|
785
|
-
chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
|
786
|
-
num_previous_chunks=num_previous_chunks,
|
787
|
-
num_next_chunks=num_next_chunks,
|
788
|
-
),
|
789
|
-
),
|
896
|
+
content_search_spec=content_search_spec,
|
790
897
|
filter=filter_str # name:'ANY("king kong")'
|
791
898
|
)
|
792
899
|
|
@@ -805,9 +912,14 @@ class DiscoveryEngineClient:
|
|
805
912
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
806
913
|
return None
|
807
914
|
|
808
|
-
if
|
809
|
-
|
810
|
-
|
915
|
+
if content_search_spec_type=="chunks":
|
916
|
+
if parse_chunks_to_string:
|
917
|
+
big_string = self.process_chunks(search_response)
|
918
|
+
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
919
|
+
return big_string
|
920
|
+
elif content_search_spec_type=="documents":
|
921
|
+
big_string = self.process_documents(search_response)
|
922
|
+
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
811
923
|
return big_string
|
812
924
|
|
813
925
|
log.info("Discovery engine response object")
|
@@ -73,9 +73,9 @@ sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUt
|
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
75
75
|
sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
|
76
|
-
sunholo/discovery_engine/cli.py,sha256=
|
76
|
+
sunholo/discovery_engine/cli.py,sha256=bUhCPoKrkMtdeTBHLyCZf9syVwHn5kE0yXpqDBIzmTc,34120
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=Ak3VpadtgpPWfIEot87EiNh4vbDUg9gQVa-1UDnoGMA,58442
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.134.
|
172
|
-
sunholo-0.134.
|
173
|
-
sunholo-0.134.
|
174
|
-
sunholo-0.134.
|
175
|
-
sunholo-0.134.
|
176
|
-
sunholo-0.134.
|
171
|
+
sunholo-0.134.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.134.4.dist-info/METADATA,sha256=g2a99cYiyPfDeVa55srGe2k_5_MHsJhzwBll43hQ56M,10067
|
173
|
+
sunholo-0.134.4.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
174
|
+
sunholo-0.134.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.134.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.134.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|