PyPI - sunholo - Versions diffs - 0.134.3__py3-none-any.whl → 0.134.5__py3-none-any.whl - Mend

sunholo 0.134.3py3-none-any.whl → 0.134.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

sunholo/discovery_engine/cli.py CHANGED Viewed

@@ -224,34 +224,56 @@ def search_command(args):
         if args.parse_chunks_to_string:
             console.print("\n[bold magenta]--- Combined Chunk String ---[/bold magenta]")
             console.print(results_data if results_data else "[yellow]No results found or error occurred.[/yellow]")
+        elif isinstance(results_data, str):
+            # Handle string result when parse_chunks_to_string is False but a string was returned anyway
+            console.print("\n[bold magenta]--- Results String ---[/bold magenta]")
+            console.print(results_data)
         elif results_data: # It's a pager object
-            console.print("\n[bold magenta]--- Individual Chunks ---[/bold magenta]")
-            chunk_count = 0
-            try:
-                 # Iterate through the pager returned by get_chunks
-                 for page in results_data.pages:
-                     if not hasattr(page, 'results') or not page.results: continue
-                     for result in page.results:
-                          # Ensure the result structure is as expected by get_chunks
-                          if hasattr(result, 'chunk'):
-                               chunk_count += 1
-                               console.print(f"\n[bold]Chunk {chunk_count}:[/bold]")
-                               # Use the client's formatter if available
-                               console.print(client.chunk_format(result.chunk))
-                          elif hasattr(result, 'document') and hasattr(result.document, 'chunks'):
-                               # Fallback if structure is different (e.g., document with chunks)
-                               for chunk in result.document.chunks:
-                                    chunk_count += 1
-                                    console.print(f"\n[bold]Chunk {chunk_count} (from doc {result.document.id}):[/bold]")
-                                    console.print(f"  Content: {getattr(chunk, 'content', 'N/A')}")
-                                    console.print(f"  Doc Name: {getattr(chunk, 'document_metadata', {}).get('name', 'N/A')}") # Example access
-                 if chunk_count == 0:
-                     console.print("[yellow]No chunks found in the results.[/yellow]")
-            except Exception as page_err:
-                 console.print(f"[bold red]Error processing search results pager: {page_err}[/bold red]")
-                 console.print(f"[red]{traceback.format_exc()}[/red]")
+            if args.content_search_spec_type == "chunks":
+                console.print("\n[bold magenta]--- Individual Chunks ---[/bold magenta]")
+                chunk_count = 0
+                try:
+                    # Iterate through the pager returned by get_chunks
+                    for page in results_data.pages:
+                        if not hasattr(page, 'results') or not page.results: continue
+                        for result in page.results:
+                            # Ensure the result structure is as expected by get_chunks
+                            if hasattr(result, 'chunk'):
+                                chunk_count += 1
+                                console.print(f"\n[bold]Chunk {chunk_count}:[/bold]")
+                                # Use the client's formatter if available
+                                console.print(client.chunk_format(result.chunk))
+                            elif hasattr(result, 'document') and hasattr(result.document, 'chunks'):
+                                # Fallback if structure is different (e.g., document with chunks)
+                                for chunk in result.document.chunks:
+                                        chunk_count += 1
+                                        console.print(f"\n[bold]Chunk {chunk_count} (from doc {result.document.id}):[/bold]")
+                                        console.print(f"  Content: {getattr(chunk, 'content', 'N/A')}")
+                                        console.print(f"  Doc Name: {getattr(chunk, 'document_metadata', {}).get('name', 'N/A')}")
+                    if chunk_count == 0:
+                        console.print("[yellow]No chunks found in the results.[/yellow]")
+                except Exception as page_err:
+                    console.print(f"[bold red]Error processing search results pager: {page_err}[/bold red]")
+                    console.print(f"[red]{traceback.format_exc()}[/red]")
+            elif args.content_search_spec_type == "documents":
+                console.print("\n[bold magenta]--- Individual Documents ---[/bold magenta]")
+                doc_count = 0
+                try:
+                    # Iterate through the pager returned by get_documents
+                    for page in results_data.pages:
+                        if not hasattr(page, 'results') or not page.results: continue
+                        for result in page.results:
+                            if hasattr(result, 'document'):
+                                doc_count += 1
+                                console.print(f"\n[bold]Document {doc_count}:[/bold]")
+                                console.print(client.document_format(result.document))
+                    if doc_count == 0:
+                        console.print("[yellow]No documents found in the results.[/yellow]")
+                except Exception as page_err:
+                    console.print(f"[bold red]Error processing document results: {page_err}[/bold red]")
+                    console.print(f"[red]{traceback.format_exc()}[/red]")
         else:
             console.print("[yellow]No results found or error occurred.[/yellow]")

sunholo/discovery_engine/discovery_engine_client.py CHANGED Viewed

@@ -309,8 +309,11 @@ class DiscoveryEngineClient:
         # Iterate through each result in the response
         for result in response.results:
-            chunk = result.chunk
-            chunk_metadata = chunk.ChunkMetadata
+            if hasattr(result, 'chunk'):
+                chunk = result.chunk
+                chunk_metadata = chunk.ChunkMetadata
+            else:
+                log.warning("No chunk found in result")
             if hasattr(chunk_metadata, 'previous_chunks'):
                 # Process previous chunks
@@ -339,8 +342,11 @@ class DiscoveryEngineClient:
         # Iterate through each result in the response
         for result in response.results:
-            chunk = result.chunk
-            chunk_metadata = chunk.ChunkMetadata
+            if hasattr(result, 'chunk'):
+                chunk = result.chunk
+                chunk_metadata = chunk.ChunkMetadata
+            else:
+                log.warning("No chunk found in result")
             if hasattr(chunk_metadata, 'previous_chunks'):
                 # Process previous chunks
@@ -431,6 +437,83 @@ class DiscoveryEngineClient:
             data_store_ids=data_store_ids,
             content_search_spec_type="documents"
         )
+    def document_format(self, document):
+        """Format a document for string output."""
+        # Extract useful fields from the document
+        document_id = document.id
+        document_name = document.name
+        # Get content if available
+        content = ""
+        if hasattr(document, 'content') and document.content:
+            if hasattr(document.content, 'uri') and document.content.uri:
+                content = f"Content URI: {document.content.uri}\n"
+            if hasattr(document.content, 'mime_type') and document.content.mime_type:
+                content += f"Content Type: {document.content.mime_type}\n"
+        # Get structured data if available
+        struct_data = ""
+        if hasattr(document, 'struct_data') and document.struct_data:
+            struct_data = f"Structured Data: {dict(document.struct_data)}\n"
+        # Get derived structured data if available
+        derived_data = ""
+        if hasattr(document, 'derived_struct_data') and document.derived_struct_data:
+            derived_data = f"Derived Data: {dict(document.derived_struct_data)}\n"
+        # Return formatted document string
+        return (
+            f"# Document: {document_id}\n"
+            f"Resource Name: {document_name}\n"
+            f"{content}"
+            f"{struct_data}"
+            f"{derived_data}"
+        )
+    def process_documents(self, response):
+        """Process a search response containing documents into a formatted string."""
+        all_documents = []
+        # Check if the response contains results
+        if not hasattr(response, 'results') or not response.results:
+            log.info(f'No results found in response: {response=}')
+            return []
+        # Iterate through each result in the response
+        for result in response.results:
+            if hasattr(result, 'document'):
+                document = result.document
+                all_documents.append(self.document_format(document))
+            else:
+                log.warning("No document found in result")
+        # Combine all documents into one long string
+        result_string = "\n\n".join(all_documents)
+        return result_string
+    async def async_process_documents(self, response):
+        """Process a search response containing documents into a formatted string asynchronously."""
+        all_documents = []
+        # Check if the response contains results
+        if not hasattr(response, 'results') or not response.results:
+            log.info(f'No results found in response: {response=}')
+            return []
+        # Iterate through each result in the response
+        for result in response.results:
+            if hasattr(result, 'document'):
+                document = result.document
+                all_documents.append(self.document_format(document))
+            else:
+                log.warning("No document found in result")
+        # Combine all documents into one long string
+        result_string = "\n\n".join(all_documents)
+        return result_string
     def create_engine(self,
         engine_id: str,
@@ -753,9 +836,14 @@ class DiscoveryEngineClient:
             log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
             return None
-        if parse_chunks_to_string:
-            big_string = self.process_chunks(search_response)
-            log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
+        if content_search_spec_type=="chunks":
+            if parse_chunks_to_string:
+                big_string = self.process_chunks(search_response)
+                log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
+                return big_string
+        elif content_search_spec_type=="documents":
+            big_string = self.process_documents(search_response)
+            log.info(f"Discovery engine documents string sample: {big_string[:100]}")
             return big_string
         log.info("Discovery engine response object")
@@ -824,9 +912,14 @@ class DiscoveryEngineClient:
             log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
             return None
-        if parse_chunks_to_string:
-            big_string = await self.async_process_chunks(search_response)
-            log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
+        if content_search_spec_type=="chunks":
+            if parse_chunks_to_string:
+                big_string = self.process_chunks(search_response)
+                log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
+                return big_string
+        elif content_search_spec_type=="documents":
+            big_string = self.process_documents(search_response)
+            log.info(f"Discovery engine documents string sample: {big_string[:100]}")
             return big_string
         log.info("Discovery engine response object")

{sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sunholo
-Version: 0.134.3
+Version: 0.134.5
 Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
 Author-email: Holosun ApS <multivac@sunholo.com>
 License: Apache License, Version 2.0

{sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/RECORD RENAMED Viewed

@@ -73,9 +73,9 @@ sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUt
 sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
 sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
 sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
-sunholo/discovery_engine/cli.py,sha256=bUhCPoKrkMtdeTBHLyCZf9syVwHn5kE0yXpqDBIzmTc,34120
+sunholo/discovery_engine/cli.py,sha256=0FKitDCqnKwtONyGt7gmsRoE5W6HHCIDqaTt8S0Dw4s,35631
 sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
-sunholo/discovery_engine/discovery_engine_client.py,sha256=D_OiMiMDScwC426xzgbMpAPNV9Q8xaz4y_waDeRPhVQ,54496
+sunholo/discovery_engine/discovery_engine_client.py,sha256=Ak3VpadtgpPWfIEot87EiNh4vbDUg9gQVa-1UDnoGMA,58442
 sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
 sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
 sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
 sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
 sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
 sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
-sunholo-0.134.3.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
-sunholo-0.134.3.dist-info/METADATA,sha256=XicSY1z4sd8PfmmNYnZyKvKYEDOMauxj-uf7WCOs328,10067
-sunholo-0.134.3.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
-sunholo-0.134.3.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
-sunholo-0.134.3.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
-sunholo-0.134.3.dist-info/RECORD,,
+sunholo-0.134.5.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
+sunholo-0.134.5.dist-info/METADATA,sha256=OyzGPXRwE0gTZBioO_oT-pHLXH4s-Fw51ws6pWmT_Jc,10067
+sunholo-0.134.5.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
+sunholo-0.134.5.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
+sunholo-0.134.5.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
+sunholo-0.134.5.dist-info/RECORD,,

{sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

sunholo 0.134.3__py3-none-any.whl → 0.134.5__py3-none-any.whl

sunholo 0.134.3py3-none-any.whl → 0.134.5py3-none-any.whl