sunholo 0.134.3__py3-none-any.whl → 0.134.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -224,34 +224,56 @@ def search_command(args):
224
224
  if args.parse_chunks_to_string:
225
225
  console.print("\n[bold magenta]--- Combined Chunk String ---[/bold magenta]")
226
226
  console.print(results_data if results_data else "[yellow]No results found or error occurred.[/yellow]")
227
+ elif isinstance(results_data, str):
228
+ # Handle string result when parse_chunks_to_string is False but a string was returned anyway
229
+ console.print("\n[bold magenta]--- Results String ---[/bold magenta]")
230
+ console.print(results_data)
227
231
  elif results_data: # It's a pager object
228
- console.print("\n[bold magenta]--- Individual Chunks ---[/bold magenta]")
229
- chunk_count = 0
230
- try:
231
- # Iterate through the pager returned by get_chunks
232
- for page in results_data.pages:
233
- if not hasattr(page, 'results') or not page.results: continue
234
- for result in page.results:
235
- # Ensure the result structure is as expected by get_chunks
236
- if hasattr(result, 'chunk'):
237
- chunk_count += 1
238
- console.print(f"\n[bold]Chunk {chunk_count}:[/bold]")
239
- # Use the client's formatter if available
240
- console.print(client.chunk_format(result.chunk))
241
- elif hasattr(result, 'document') and hasattr(result.document, 'chunks'):
242
- # Fallback if structure is different (e.g., document with chunks)
243
- for chunk in result.document.chunks:
244
- chunk_count += 1
245
- console.print(f"\n[bold]Chunk {chunk_count} (from doc {result.document.id}):[/bold]")
246
- console.print(f" Content: {getattr(chunk, 'content', 'N/A')}")
247
- console.print(f" Doc Name: {getattr(chunk, 'document_metadata', {}).get('name', 'N/A')}") # Example access
248
-
249
- if chunk_count == 0:
250
- console.print("[yellow]No chunks found in the results.[/yellow]")
251
-
252
- except Exception as page_err:
253
- console.print(f"[bold red]Error processing search results pager: {page_err}[/bold red]")
254
- console.print(f"[red]{traceback.format_exc()}[/red]")
232
+ if args.content_search_spec_type == "chunks":
233
+ console.print("\n[bold magenta]--- Individual Chunks ---[/bold magenta]")
234
+ chunk_count = 0
235
+ try:
236
+ # Iterate through the pager returned by get_chunks
237
+ for page in results_data.pages:
238
+ if not hasattr(page, 'results') or not page.results: continue
239
+ for result in page.results:
240
+ # Ensure the result structure is as expected by get_chunks
241
+ if hasattr(result, 'chunk'):
242
+ chunk_count += 1
243
+ console.print(f"\n[bold]Chunk {chunk_count}:[/bold]")
244
+ # Use the client's formatter if available
245
+ console.print(client.chunk_format(result.chunk))
246
+ elif hasattr(result, 'document') and hasattr(result.document, 'chunks'):
247
+ # Fallback if structure is different (e.g., document with chunks)
248
+ for chunk in result.document.chunks:
249
+ chunk_count += 1
250
+ console.print(f"\n[bold]Chunk {chunk_count} (from doc {result.document.id}):[/bold]")
251
+ console.print(f" Content: {getattr(chunk, 'content', 'N/A')}")
252
+ console.print(f" Doc Name: {getattr(chunk, 'document_metadata', {}).get('name', 'N/A')}")
253
+ if chunk_count == 0:
254
+ console.print("[yellow]No chunks found in the results.[/yellow]")
255
+
256
+ except Exception as page_err:
257
+ console.print(f"[bold red]Error processing search results pager: {page_err}[/bold red]")
258
+ console.print(f"[red]{traceback.format_exc()}[/red]")
259
+ elif args.content_search_spec_type == "documents":
260
+ console.print("\n[bold magenta]--- Individual Documents ---[/bold magenta]")
261
+ doc_count = 0
262
+ try:
263
+ # Iterate through the pager returned by get_documents
264
+ for page in results_data.pages:
265
+ if not hasattr(page, 'results') or not page.results: continue
266
+ for result in page.results:
267
+ if hasattr(result, 'document'):
268
+ doc_count += 1
269
+ console.print(f"\n[bold]Document {doc_count}:[/bold]")
270
+ console.print(client.document_format(result.document))
271
+
272
+ if doc_count == 0:
273
+ console.print("[yellow]No documents found in the results.[/yellow]")
274
+ except Exception as page_err:
275
+ console.print(f"[bold red]Error processing document results: {page_err}[/bold red]")
276
+ console.print(f"[red]{traceback.format_exc()}[/red]")
255
277
  else:
256
278
  console.print("[yellow]No results found or error occurred.[/yellow]")
257
279
 
@@ -309,8 +309,11 @@ class DiscoveryEngineClient:
309
309
 
310
310
  # Iterate through each result in the response
311
311
  for result in response.results:
312
- chunk = result.chunk
313
- chunk_metadata = chunk.ChunkMetadata
312
+ if hasattr(result, 'chunk'):
313
+ chunk = result.chunk
314
+ chunk_metadata = chunk.ChunkMetadata
315
+ else:
316
+ log.warning("No chunk found in result")
314
317
 
315
318
  if hasattr(chunk_metadata, 'previous_chunks'):
316
319
  # Process previous chunks
@@ -339,8 +342,11 @@ class DiscoveryEngineClient:
339
342
 
340
343
  # Iterate through each result in the response
341
344
  for result in response.results:
342
- chunk = result.chunk
343
- chunk_metadata = chunk.ChunkMetadata
345
+ if hasattr(result, 'chunk'):
346
+ chunk = result.chunk
347
+ chunk_metadata = chunk.ChunkMetadata
348
+ else:
349
+ log.warning("No chunk found in result")
344
350
 
345
351
  if hasattr(chunk_metadata, 'previous_chunks'):
346
352
  # Process previous chunks
@@ -431,6 +437,83 @@ class DiscoveryEngineClient:
431
437
  data_store_ids=data_store_ids,
432
438
  content_search_spec_type="documents"
433
439
  )
440
+
441
+ def document_format(self, document):
442
+ """Format a document for string output."""
443
+ # Extract useful fields from the document
444
+ document_id = document.id
445
+ document_name = document.name
446
+
447
+ # Get content if available
448
+ content = ""
449
+ if hasattr(document, 'content') and document.content:
450
+ if hasattr(document.content, 'uri') and document.content.uri:
451
+ content = f"Content URI: {document.content.uri}\n"
452
+ if hasattr(document.content, 'mime_type') and document.content.mime_type:
453
+ content += f"Content Type: {document.content.mime_type}\n"
454
+
455
+ # Get structured data if available
456
+ struct_data = ""
457
+ if hasattr(document, 'struct_data') and document.struct_data:
458
+ struct_data = f"Structured Data: {dict(document.struct_data)}\n"
459
+
460
+ # Get derived structured data if available
461
+ derived_data = ""
462
+ if hasattr(document, 'derived_struct_data') and document.derived_struct_data:
463
+ derived_data = f"Derived Data: {dict(document.derived_struct_data)}\n"
464
+
465
+ # Return formatted document string
466
+ return (
467
+ f"# Document: {document_id}\n"
468
+ f"Resource Name: {document_name}\n"
469
+ f"{content}"
470
+ f"{struct_data}"
471
+ f"{derived_data}"
472
+ )
473
+
474
+ def process_documents(self, response):
475
+ """Process a search response containing documents into a formatted string."""
476
+ all_documents = []
477
+
478
+ # Check if the response contains results
479
+ if not hasattr(response, 'results') or not response.results:
480
+ log.info(f'No results found in response: {response=}')
481
+ return []
482
+
483
+ # Iterate through each result in the response
484
+ for result in response.results:
485
+ if hasattr(result, 'document'):
486
+ document = result.document
487
+ all_documents.append(self.document_format(document))
488
+ else:
489
+ log.warning("No document found in result")
490
+
491
+ # Combine all documents into one long string
492
+ result_string = "\n\n".join(all_documents)
493
+
494
+ return result_string
495
+
496
+ async def async_process_documents(self, response):
497
+ """Process a search response containing documents into a formatted string asynchronously."""
498
+ all_documents = []
499
+
500
+ # Check if the response contains results
501
+ if not hasattr(response, 'results') or not response.results:
502
+ log.info(f'No results found in response: {response=}')
503
+ return []
504
+
505
+ # Iterate through each result in the response
506
+ for result in response.results:
507
+ if hasattr(result, 'document'):
508
+ document = result.document
509
+ all_documents.append(self.document_format(document))
510
+ else:
511
+ log.warning("No document found in result")
512
+
513
+ # Combine all documents into one long string
514
+ result_string = "\n\n".join(all_documents)
515
+
516
+ return result_string
434
517
 
435
518
  def create_engine(self,
436
519
  engine_id: str,
@@ -753,9 +836,14 @@ class DiscoveryEngineClient:
753
836
  log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
754
837
  return None
755
838
 
756
- if parse_chunks_to_string:
757
- big_string = self.process_chunks(search_response)
758
- log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
839
+ if content_search_spec_type=="chunks":
840
+ if parse_chunks_to_string:
841
+ big_string = self.process_chunks(search_response)
842
+ log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
843
+ return big_string
844
+ elif content_search_spec_type=="documents":
845
+ big_string = self.process_documents(search_response)
846
+ log.info(f"Discovery engine documents string sample: {big_string[:100]}")
759
847
  return big_string
760
848
 
761
849
  log.info("Discovery engine response object")
@@ -824,9 +912,14 @@ class DiscoveryEngineClient:
824
912
  log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
825
913
  return None
826
914
 
827
- if parse_chunks_to_string:
828
- big_string = await self.async_process_chunks(search_response)
829
- log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
915
+ if content_search_spec_type=="chunks":
916
+ if parse_chunks_to_string:
917
+ big_string = self.process_chunks(search_response)
918
+ log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
919
+ return big_string
920
+ elif content_search_spec_type=="documents":
921
+ big_string = self.process_documents(search_response)
922
+ log.info(f"Discovery engine documents string sample: {big_string[:100]}")
830
923
  return big_string
831
924
 
832
925
  log.info("Discovery engine response object")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.134.3
3
+ Version: 0.134.5
4
4
  Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -73,9 +73,9 @@ sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUt
73
73
  sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
74
74
  sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
75
75
  sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
76
- sunholo/discovery_engine/cli.py,sha256=bUhCPoKrkMtdeTBHLyCZf9syVwHn5kE0yXpqDBIzmTc,34120
76
+ sunholo/discovery_engine/cli.py,sha256=0FKitDCqnKwtONyGt7gmsRoE5W6HHCIDqaTt8S0Dw4s,35631
77
77
  sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
78
- sunholo/discovery_engine/discovery_engine_client.py,sha256=D_OiMiMDScwC426xzgbMpAPNV9Q8xaz4y_waDeRPhVQ,54496
78
+ sunholo/discovery_engine/discovery_engine_client.py,sha256=Ak3VpadtgpPWfIEot87EiNh4vbDUg9gQVa-1UDnoGMA,58442
79
79
  sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
80
80
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
81
81
  sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.134.3.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.134.3.dist-info/METADATA,sha256=XicSY1z4sd8PfmmNYnZyKvKYEDOMauxj-uf7WCOs328,10067
173
- sunholo-0.134.3.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
174
- sunholo-0.134.3.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.134.3.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.134.3.dist-info/RECORD,,
171
+ sunholo-0.134.5.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.134.5.dist-info/METADATA,sha256=OyzGPXRwE0gTZBioO_oT-pHLXH4s-Fw51ws6pWmT_Jc,10067
173
+ sunholo-0.134.5.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
174
+ sunholo-0.134.5.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.134.5.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.134.5.dist-info/RECORD,,