sunholo 0.134.4__py3-none-any.whl → 0.134.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/cli.py +57 -32
- sunholo/discovery_engine/discovery_engine_client.py +21 -12
- {sunholo-0.134.4.dist-info → sunholo-0.134.6.dist-info}/METADATA +1 -1
- {sunholo-0.134.4.dist-info → sunholo-0.134.6.dist-info}/RECORD +8 -8
- {sunholo-0.134.4.dist-info → sunholo-0.134.6.dist-info}/WHEEL +0 -0
- {sunholo-0.134.4.dist-info → sunholo-0.134.6.dist-info}/entry_points.txt +0 -0
- {sunholo-0.134.4.dist-info → sunholo-0.134.6.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.134.4.dist-info → sunholo-0.134.6.dist-info}/top_level.txt +0 -0
sunholo/discovery_engine/cli.py
CHANGED
@@ -191,7 +191,7 @@ def import_document_with_metadata_command(args):
|
|
191
191
|
|
192
192
|
def search_command(args):
|
193
193
|
"""Handles the `discovery-engine search` subcommand (Data Store Chunks)."""
|
194
|
-
console.print(f"[cyan]Searching data store '{args.data_store_id}' for query: '{args.query}' (mode:
|
194
|
+
console.print(f"[cyan]Searching data store '{args.data_store_id}' for query: '{args.query}' (mode: {args.content_search_spec_type})[/cyan]")
|
195
195
|
try:
|
196
196
|
client = DiscoveryEngineClient(
|
197
197
|
project_id=args.project,
|
@@ -208,7 +208,7 @@ def search_command(args):
|
|
208
208
|
page_size=args.page_size,
|
209
209
|
parse_chunks_to_string=args.parse_chunks_to_string,
|
210
210
|
serving_config=args.serving_config,
|
211
|
-
|
211
|
+
filter_str=args.filter,
|
212
212
|
)
|
213
213
|
elif args.content_search_spec_type == "documents":
|
214
214
|
results_data = client.get_documents(
|
@@ -216,7 +216,7 @@ def search_command(args):
|
|
216
216
|
page_size=args.page_size,
|
217
217
|
parse_documents_to_string=args.parse_chunks_to_string,
|
218
218
|
serving_config=args.serving_config,
|
219
|
-
|
219
|
+
filter_str=args.filter,
|
220
220
|
)
|
221
221
|
else:
|
222
222
|
raise ValueError("Invalid content_search_spec_type. Must be 'chunks' or 'documents'.")
|
@@ -224,34 +224,56 @@ def search_command(args):
|
|
224
224
|
if args.parse_chunks_to_string:
|
225
225
|
console.print("\n[bold magenta]--- Combined Chunk String ---[/bold magenta]")
|
226
226
|
console.print(results_data if results_data else "[yellow]No results found or error occurred.[/yellow]")
|
227
|
+
elif isinstance(results_data, str):
|
228
|
+
# Handle string result when parse_chunks_to_string is False but a string was returned anyway
|
229
|
+
console.print("\n[bold magenta]--- Results String ---[/bold magenta]")
|
230
|
+
console.print(results_data)
|
227
231
|
elif results_data: # It's a pager object
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
232
|
+
if args.content_search_spec_type == "chunks":
|
233
|
+
console.print("\n[bold magenta]--- Individual Chunks ---[/bold magenta]")
|
234
|
+
chunk_count = 0
|
235
|
+
try:
|
236
|
+
# Iterate through the pager returned by get_chunks
|
237
|
+
for page in results_data.pages:
|
238
|
+
if not hasattr(page, 'results') or not page.results: continue
|
239
|
+
for result in page.results:
|
240
|
+
# Ensure the result structure is as expected by get_chunks
|
241
|
+
if hasattr(result, 'chunk'):
|
242
|
+
chunk_count += 1
|
243
|
+
console.print(f"\n[bold]Chunk {chunk_count}:[/bold]")
|
244
|
+
# Use the client's formatter if available
|
245
|
+
console.print(client.chunk_format(result.chunk))
|
246
|
+
elif hasattr(result, 'document') and hasattr(result.document, 'chunks'):
|
247
|
+
# Fallback if structure is different (e.g., document with chunks)
|
248
|
+
for chunk in result.document.chunks:
|
249
|
+
chunk_count += 1
|
250
|
+
console.print(f"\n[bold]Chunk {chunk_count} (from doc {result.document.id}):[/bold]")
|
251
|
+
console.print(f" Content: {getattr(chunk, 'content', 'N/A')}")
|
252
|
+
console.print(f" Doc Name: {getattr(chunk, 'document_metadata', {}).get('name', 'N/A')}")
|
253
|
+
if chunk_count == 0:
|
254
|
+
console.print("[yellow]No chunks found in the results.[/yellow]")
|
255
|
+
|
256
|
+
except Exception as page_err:
|
257
|
+
console.print(f"[bold red]Error processing search results pager: {page_err}[/bold red]")
|
258
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
259
|
+
elif args.content_search_spec_type == "documents":
|
260
|
+
console.print("\n[bold magenta]--- Individual Documents ---[/bold magenta]")
|
261
|
+
doc_count = 0
|
262
|
+
try:
|
263
|
+
# Iterate through the pager returned by get_documents
|
264
|
+
for page in results_data.pages:
|
265
|
+
if not hasattr(page, 'results') or not page.results: continue
|
266
|
+
for result in page.results:
|
267
|
+
if hasattr(result, 'document'):
|
268
|
+
doc_count += 1
|
269
|
+
console.print(f"\n[bold]Document {doc_count}:[/bold]")
|
270
|
+
console.print(client.document_format(result.document))
|
271
|
+
|
272
|
+
if doc_count == 0:
|
273
|
+
console.print("[yellow]No documents found in the results.[/yellow]")
|
274
|
+
except Exception as page_err:
|
275
|
+
console.print(f"[bold red]Error processing document results: {page_err}[/bold red]")
|
276
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
255
277
|
else:
|
256
278
|
console.print("[yellow]No results found or error occurred.[/yellow]")
|
257
279
|
|
@@ -519,14 +541,17 @@ def setup_discovery_engine_subparser(subparsers):
|
|
519
541
|
import_doc_meta_parser.add_argument('--branch', default='default_branch', help='Target branch')
|
520
542
|
import_doc_meta_parser.set_defaults(func=discovery_engine_command)
|
521
543
|
|
522
|
-
# --- Search Data Store (Chunks) subcommand ---
|
523
|
-
search_parser = discovery_engine_subparsers.add_parser('search', help='Search a datastore (fetches chunks)')
|
544
|
+
# --- Search Data Store (Chunks/Documents) subcommand ---
|
545
|
+
search_parser = discovery_engine_subparsers.add_parser('search', help='Search a datastore (fetches chunks or documents)')
|
524
546
|
search_parser.add_argument('--query', required=True, help='The search query')
|
525
547
|
search_parser.add_argument('--data-store-id', required=True, help='Data store ID to search')
|
526
548
|
search_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
|
527
549
|
search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string. Only applicable for "chunks"')
|
528
550
|
search_parser.add_argument('--serving-config', default='default_config', help='Serving config ID for the data store')
|
529
551
|
search_parser.add_argument('--content_search_spec_type', default="chunks", help='"chunks" or "documents" depending on data store type')
|
552
|
+
search_parser.add_argument('--filter', help='filter for the search')
|
553
|
+
|
554
|
+
|
530
555
|
# Add arguments for num_previous_chunks, num_next_chunks, data_store_ids if needed
|
531
556
|
# search_parser.add_argument('--num-previous-chunks', type=int, default=3)
|
532
557
|
# search_parser.add_argument('--num-next-chunks', type=int, default=3)
|
@@ -213,6 +213,7 @@ class DiscoveryEngineClient:
|
|
213
213
|
parse_chunks_to_string: bool = True,
|
214
214
|
serving_config: str = "default_serving_config",
|
215
215
|
data_store_ids: Optional[List[str]] = None,
|
216
|
+
filter_str:str=None
|
216
217
|
):
|
217
218
|
"""Retrieves chunks or documents based on a query.
|
218
219
|
|
@@ -237,10 +238,10 @@ class DiscoveryEngineClient:
|
|
237
238
|
print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
|
238
239
|
```
|
239
240
|
"""
|
240
|
-
|
241
|
+
|
241
242
|
return self.search_with_filters(
|
242
243
|
query=query,
|
243
|
-
filter_str=
|
244
|
+
filter_str=filter_str,
|
244
245
|
num_previous_chunks=num_previous_chunks,
|
245
246
|
num_next_chunks=num_next_chunks,
|
246
247
|
page_size=page_size,
|
@@ -259,6 +260,7 @@ class DiscoveryEngineClient:
|
|
259
260
|
parse_chunks_to_string: bool = True,
|
260
261
|
serving_config: str = "default_serving_config",
|
261
262
|
data_store_ids: Optional[List[str]] = None,
|
263
|
+
filter_str:str=None
|
262
264
|
):
|
263
265
|
"""Asynchronously retrieves chunks or documents based on a query.
|
264
266
|
|
@@ -372,6 +374,7 @@ class DiscoveryEngineClient:
|
|
372
374
|
parse_documents_to_string: bool = True,
|
373
375
|
serving_config: str = "default_serving_config",
|
374
376
|
data_store_ids: Optional[List[str]] = None,
|
377
|
+
filter_str:str=None
|
375
378
|
):
|
376
379
|
"""Retrieves entire documents based on a query.
|
377
380
|
|
@@ -397,7 +400,7 @@ class DiscoveryEngineClient:
|
|
397
400
|
# Use search_with_filters with content_search_spec_type="documents" to get documents instead of chunks
|
398
401
|
return self.search_with_filters(
|
399
402
|
query=query,
|
400
|
-
filter_str=
|
403
|
+
filter_str=filter_str,
|
401
404
|
page_size=page_size,
|
402
405
|
parse_chunks_to_string=parse_documents_to_string,
|
403
406
|
serving_config=serving_config,
|
@@ -412,6 +415,7 @@ class DiscoveryEngineClient:
|
|
412
415
|
parse_documents_to_string: bool = True,
|
413
416
|
serving_config: str = "default_serving_config",
|
414
417
|
data_store_ids: Optional[List[str]] = None,
|
418
|
+
filter_str:str=None
|
415
419
|
):
|
416
420
|
"""Asynchronously retrieves entire documents based on a query.
|
417
421
|
|
@@ -430,7 +434,7 @@ class DiscoveryEngineClient:
|
|
430
434
|
# as it doesn't currently have that parameter
|
431
435
|
return await self.async_search_with_filters(
|
432
436
|
query=query,
|
433
|
-
filter_str=
|
437
|
+
filter_str=filter_str,
|
434
438
|
page_size=page_size,
|
435
439
|
parse_chunks_to_string=parse_documents_to_string,
|
436
440
|
serving_config=serving_config,
|
@@ -836,17 +840,22 @@ class DiscoveryEngineClient:
|
|
836
840
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
837
841
|
return None
|
838
842
|
|
839
|
-
if
|
840
|
-
if
|
841
|
-
|
842
|
-
|
843
|
+
if parse_chunks_to_string:
|
844
|
+
if content_search_spec_type=="chunks":
|
845
|
+
if parse_chunks_to_string:
|
846
|
+
big_string = self.process_chunks(search_response)
|
847
|
+
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
848
|
+
|
849
|
+
return big_string
|
850
|
+
|
851
|
+
elif content_search_spec_type=="documents":
|
852
|
+
big_string = self.process_documents(search_response)
|
853
|
+
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
854
|
+
|
843
855
|
return big_string
|
844
|
-
elif content_search_spec_type=="documents":
|
845
|
-
big_string = self.process_documents(search_response)
|
846
|
-
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
847
|
-
return big_string
|
848
856
|
|
849
857
|
log.info("Discovery engine response object")
|
858
|
+
|
850
859
|
return search_response
|
851
860
|
|
852
861
|
async def async_search_with_filters(self, query, filter_str=None,
|
@@ -73,9 +73,9 @@ sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUt
|
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
75
75
|
sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
|
76
|
-
sunholo/discovery_engine/cli.py,sha256=
|
76
|
+
sunholo/discovery_engine/cli.py,sha256=tsKqNSDCEsDTz5-wuNwjttb3Xt35D97-KyyEiaqolMQ,35628
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=XOKPx2C9sXSx1Z1_23IynrA19NTzWwcXFdvZm-jYXQY,58589
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.134.
|
172
|
-
sunholo-0.134.
|
173
|
-
sunholo-0.134.
|
174
|
-
sunholo-0.134.
|
175
|
-
sunholo-0.134.
|
176
|
-
sunholo-0.134.
|
171
|
+
sunholo-0.134.6.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.134.6.dist-info/METADATA,sha256=olUgsXM6SFdKO9eS9oAh8I5EZ1zLhm8i_lEJ4WxoZak,10067
|
173
|
+
sunholo-0.134.6.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
174
|
+
sunholo-0.134.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.134.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.134.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|