PyPI - wizit-context-ingestor - Versions diffs - 0.3.0b2__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

wizit-context-ingestor 0.3.0b2py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

wizit_context_ingestor/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
-from .main import ChunksManager, TranscriptionManager
+from .main import TranscriptionManager
+from .main_chunks import ChunksManager
 __all__ = ["ChunksManager", "TranscriptionManager"]

wizit_context_ingestor/application/context_chunk_service.py CHANGED Viewed

@@ -1,18 +1,20 @@
+import asyncio
+import logging
+from typing import Any, Dict, List, Optional
+from langchain_core.documents import Document
+from langchain_core.messages.human import HumanMessage
 from langchain_core.output_parsers.pydantic import PydanticOutputParser
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.documents import Document
 from ..data.prompts import CONTEXT_CHUNKS_IN_DOCUMENT_SYSTEM_PROMPT, ContextChunk
-from langchain_core.messages.human import HumanMessage
 from ..workflows.context_workflow import ContextWorkflow
-from typing import Dict, Any, Optional, List
 from .interfaces import (
     AiApplicationService,
+    EmbeddingsManager,
     PersistenceService,
     RagChunker,
-    EmbeddingsManager,
 )
-import logging
 logger = logging.getLogger(__name__)
@@ -38,13 +40,13 @@ class ContextChunksInDocumentService:
         self.rag_chunker = rag_chunker
         self.embeddings_manager = embeddings_manager
         self.target_language = target_language
-        self.embeddings_manager.init_vector_store()
+        # self.embeddings_manager.init_vector_store()
         self.chat_model = self.ai_application_service.load_chat_model()
         # TODO
         self.context_additional_instructions = ""
         self.metadata_source = "source"
-    def _retrieve_context_chunk_in_document_with_workflow(
+    async def _retrieve_context_chunk_in_document_with_workflow(
         self,
         workflow,
         markdown_content: str,
@@ -53,7 +55,7 @@ class ContextChunksInDocumentService:
     ) -> Document:
         """Retrieve context chunks in document."""
         try:
-            result = workflow.invoke(
+            result = await workflow.ainvoke(
                 {
                     "messages": [
                         HumanMessage(
@@ -74,9 +76,7 @@ class ContextChunksInDocumentService:
                     }
                 },
             )
-            # chunk.page_content = (
-            #     f"Context:{result['context']}, Content:{chunk.page_content}"
-            # )
+            chunk.page_content = f"<context>\n{result['context']}\n</context>\n <content>\n{chunk.page_content}\n</content>"
             chunk.metadata["context"] = result["context"]
             if chunk_metadata:
                 for key, value in chunk_metadata.items():
@@ -86,75 +86,7 @@ class ContextChunksInDocumentService:
             logger.error(f"Failed to retrieve context chunks in document: {str(e)}")
             raise
-    # def _retrieve_context_chunk_in_document(
-    #     self,
-    #     markdown_content: str,
-    #     chunk: Document,
-    #     chunk_metadata: Optional[Dict[str, Any]] = None,
-    # ) -> Document:
-    #     """Retrieve context chunks in document."""
-    #     try:
-    #         chunk_output_parser = PydanticOutputParser(pydantic_object=ContextChunk)
-    #         # Create the prompt template with image
-    #         prompt = ChatPromptTemplate.from_messages(
-    #             [
-    #                 ("system", CONTEXT_CHUNKS_IN_DOCUMENT_SYSTEM_PROMPT),
-    #                 (
-    #                     "human",
-    #                     [
-    #                         {
-    #                             "type": "text",
-    #                             "text": f"Generate context for the following chunk: <chunk>{chunk.page_content}</chunk>,  ensure all content chunks are generated in '{self.target_language}' language",
-    #                         }
-    #                     ],
-    #                 ),
-    #             ]
-    #         ).partial(
-    #             document_content=markdown_content,
-    #             format_instructions=chunk_output_parser.get_format_instructions(),
-    #         )
-    #         model_with_structured_output = self.chat_model.with_structured_output(
-    #             ContextChunk
-    #         )
-    #         # Create the chain
-    #         chain = prompt | model_with_structured_output
-    #         # Process the image
-    #         results = chain.invoke({})
-    #         # chunk.page_content = (
-    #         #     f"Context:{results.context}, Content:{chunk.page_content}"
-    #         # )
-    #         chunk.metadata["context"] = results.context
-    #         if chunk_metadata:
-    #             for key, value in chunk_metadata.items():
-    #                 chunk.metadata[key] = value
-    #         return chunk
-    #     except Exception as e:
-    #         logger.error(f"Failed to retrieve context chunks in document: {str(e)}")
-    #         raise
-    # def retrieve_context_chunks_in_document(
-    #     self,
-    #     markdown_content: str,
-    #     chunks: List[Document],
-    #     chunks_metadata: Optional[Dict[str, Any]] = None,
-    # ) -> List[Document]:
-    #     """Retrieve context chunks in document."""
-    #     try:
-    #         context_chunks = list(
-    #             map(
-    #                 lambda chunk: self._retrieve_context_chunk_in_document(
-    #                     markdown_content, chunk, chunks_metadata
-    #                 ),
-    #                 chunks,
-    #             )
-    #         )
-    #         return context_chunks
-    #     except Exception as e:
-    #         logger.error(f"Failed to retrieve context chunks in document: {str(e)}")
-    #         raise
-    def retrieve_context_chunks_in_document_with_workflow(
+    async def retrieve_context_chunks_in_document_with_workflow(
         self,
         markdown_content: str,
         chunks: List[Document],
@@ -167,7 +99,7 @@ class ContextChunksInDocumentService:
             )
             compiled_context_workflow = context_workflow.gen_workflow()
             compiled_context_workflow = compiled_context_workflow.compile()
-            context_chunks = list(
+            context_chunks_workflow_invocations = list(
                 map(
                     lambda chunk: self._retrieve_context_chunk_in_document_with_workflow(
                         compiled_context_workflow,
@@ -178,12 +110,13 @@ class ContextChunksInDocumentService:
                     chunks,
                 )
             )
+            context_chunks = await asyncio.gather(*context_chunks_workflow_invocations)
             return context_chunks
         except Exception as e:
             logger.error(f"Failed to retrieve context chunks in document: {str(e)}")
             raise
-    def get_context_chunks_in_document(self, file_key: str, file_tags: dict = {}):
+    async def get_context_chunks_in_document(self, file_key: str, file_tags: dict = {}):
         """
         Get the context chunks in a document.
         """
@@ -199,30 +132,13 @@ class ContextChunksInDocumentService:
             logger.info(f"Document loaded:{file_key}")
             chunks = self.rag_chunker.gen_chunks_for_document(langchain_rag_document)
             logger.info(f"Chunks generated:{len(chunks)}")
-            context_chunks = self.retrieve_context_chunks_in_document_with_workflow(
-                markdown_content, chunks, file_tags
+            context_chunks = (
+                await self.retrieve_context_chunks_in_document_with_workflow(
+                    markdown_content, chunks, file_tags
+                )
             )
             logger.info(f"Context chunks generated:{len(context_chunks)}")
-            # upsert validation
-            try:
-                print(f"deleting chunks: {file_key}")
-                self.delete_document_context_chunks(file_key)
-            except Exception as e:
-                logger.error(f"could not delete by source: {e}")
-            self.embeddings_manager.index_documents(context_chunks)
             return context_chunks
         except Exception as e:
-            logger.error("Error get_context_chunks_in_document")
-            raise e
-    def delete_document_context_chunks(self, file_key: str):
-        """
-        Delete the context chunks in a document.
-        """
-        try:
-            self.embeddings_manager.delete_documents_by_metadata_key(
-                self.metadata_source, file_key
-            )
-        except Exception as e:
-            logger.error(f"Error delete_document_context_chunks: {str(e)}")
+            logger.error(f"Error: {str(e)}")
             raise e

wizit_context_ingestor/application/interfaces.py CHANGED Viewed

@@ -1,13 +1,19 @@
 """
 Application interfaces defining application layer contracts.
 """
 from abc import ABC, abstractmethod
-from ..domain.models import ParsedDocPage, ParsedDoc
-from typing import List, Union, Optional
-from langchain_core.documents import Document
+from typing import List, Optional, Union
+from langchain.indexes import IndexingResult, SQLRecordManager
 from langchain_aws import ChatBedrockConverse
+from langchain_core.documents import Document
 from langchain_google_vertexai import ChatVertexAI
 from langchain_google_vertexai.model_garden import ChatAnthropicVertex
+from langchain_postgres import PGVectorStore
+from ..domain.models import ParsedDoc, ParsedDocPage
 class TranscriptionService(ABC):
     """Interface for transcription services."""
@@ -17,6 +23,7 @@ class TranscriptionService(ABC):
         """Parse a document page."""
         pass
 class AiApplicationService(ABC):
     """Interface for AI application services."""
@@ -26,7 +33,9 @@ class AiApplicationService(ABC):
     #     pass
     @abstractmethod
-    def load_chat_model(self, **kwargs) -> Union[ChatVertexAI, ChatAnthropicVertex, ChatBedrockConverse]:
+    def load_chat_model(
+        self, **kwargs
+    ) -> Union[ChatVertexAI, ChatAnthropicVertex, ChatBedrockConverse]:
         """Load a chat model."""
         pass
@@ -40,7 +49,9 @@ class PersistenceService(ABC):
     """Interface for persistence services."""
     @abstractmethod
-    def save_parsed_document(self, file_key: str, parsed_document: ParsedDoc, file_tags: Optional[dict] = {}):
+    def save_parsed_document(
+        self, file_key: str, parsed_document: ParsedDoc, file_tags: Optional[dict] = {}
+    ):
         """Save a parsed document."""
         pass
@@ -70,38 +81,56 @@ class EmbeddingsManager(ABC):
     @abstractmethod
     def configure_vector_store(
         self,
-        table_name: str = "langchain_pg_embedding",
-        vector_size: int = 768,
-        content_column: str = "document",
-        id_column: str = "id",
-        metadata_json_column: str = "cmetadata",
-        pg_record_manager: str = "postgres/langchain_pg_collection"
     ):
         """Configure the vector store."""
         pass
+    # @abstractmethod
+    # async def init_vector_store(
+    #     self,
+    #     table_name: str = "tenant_embeddings",
+    #     content_column: str = "document",
+    #     metadata_json_column: str = "cmetadata",
+    #     id_column: str = "id",
+    # ):
+    #     """Initialize the vector store."""
+    #     pass
     @abstractmethod
-    def init_vector_store(
+    def retrieve_vector_store(
         self,
-        table_name: str = "langchain_pg_embedding",
-        content_column: str = "document",
-        metadata_json_column: str = "cmetadata",
-        id_column: str = "id",
-    ):
-        """Initialize the vector store."""
+    ) -> tuple[PGVectorStore, SQLRecordManager]:
+        """Retrieve the vector store."""
         pass
     @abstractmethod
-    def index_documents(self, documents: list[Document]):
+    def index_documents(
+        self,
+        docs: list[Document],
+    ) -> IndexingResult:
         """Index documents."""
         pass
     @abstractmethod
-    def get_documents_keys_by_source_id(self, source_id: str):
-        """Get documents keys by source ID."""
+    def search_records(
+        self,
+        query: str,
+    ) -> list[Document]:
+        """Search documents."""
         pass
     @abstractmethod
-    def delete_documents_by_source_id(self, source_id: str):
-        """Delete documents by source ID."""
+    def create_index(
+        self,
+    ):
         pass
+    # @abstractmethod
+    # def get_documents_keys_by_source_id(self, source_id: str):
+    #     """Get documents keys by source ID."""
+    #     pass
+    # @abstractmethod
+    # def delete_documents_by_source_id(self, source_id: str):
+    #     """Delete documents by source ID."""
+    #     pass

wizit_context_ingestor/application/kdb_service.py ADDED Viewed

@@ -0,0 +1,59 @@
+import logging
+from langchain.indexes import SQLRecordManager
+from langchain_core.documents import Document
+from langchain_postgres import PGVectorStore
+from .interfaces import (
+    EmbeddingsManager,
+    RagChunker,
+)
+logger = logging.getLogger(__name__)
+class KdbService:
+    """
+    Service for chunking documents.
+    """
+    def __init__(
+        self,
+        embeddings_manager: EmbeddingsManager,
+    ):
+        """
+        Initialize the ChunkerService.
+        """
+        self.embeddings_manager = embeddings_manager
+        self._vector_store = None
+        self._records_manager = None
+    def configure_kdb(self):
+        try:
+            self.embeddings_manager.configure_vector_store()
+        except Exception as e:
+            raise Exception(f"Error configuring KDB: {e}")
+    def create_vector_store_hsnw_index(self):
+        try:
+            self.embeddings_manager.create_index()
+        except Exception as e:
+            logger.error(f"Error creating vector store index: {e}")
+            raise Exception(f"Error creating vector store index: {e}")
+    def search(self, query: str) -> list[Document]:
+        try:
+            records = []
+            records = self.embeddings_manager.search_records(query)
+            print(records)
+            return records
+        except Exception as e:
+            logger.error(f"Error indexing documents: {e}")
+            raise Exception(f"Error indexing documents: {e}")
+    def index_documents_in_vector_store(self, documents: list[Document]) -> None:
+        try:
+            self.embeddings_manager.index_documents(documents)
+        except Exception as e:
+            logger.error(f"Error indexing documents: {e}")
+            raise Exception(f"Error indexing documents: {e}")

wizit_context_ingestor/application/transcription_service.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import asyncio
 from typing import Tuple, List, Dict, Optional
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.output_parsers.pydantic import PydanticOutputParser
@@ -23,15 +24,15 @@ class TranscriptionService:
         persistence_service: PersistenceService,
         target_language: str = "es",
         transcription_additional_instructions: str = "",
-        transcription_accuracy_threshold: int = 90,
+        transcription_accuracy_threshold: float = 0.90,
         max_transcription_retries: int = 2,
     ):
         self.ai_application_service = ai_application_service
         self.persistence_service = persistence_service
         self.target_language = target_language
         if (
-            transcription_accuracy_threshold < 0
-            or transcription_accuracy_threshold > 95
+            transcription_accuracy_threshold < 0.0
+            or transcription_accuracy_threshold > 0.95
         ):
             raise ValueError(
                 "transcription_accuracy_threshold must be between 0 and 95"
@@ -46,6 +47,15 @@ class TranscriptionService:
             transcription_additional_instructions
         )
         self.chat_model = self.ai_application_service.load_chat_model()
+        self.transcription_workflow = TranscriptionWorkflow(
+            self.chat_model, self.transcription_additional_instructions
+        )
+        self.compiled_transcription_workflow = (
+            self.transcription_workflow.gen_workflow()
+        )
+        self.compiled_transcription_workflow = (
+            self.compiled_transcription_workflow.compile()
+        )
     # def parse_doc_page(self, document: ParsedDocPage) -> ParsedDocPage:
     #     """Transcribe an image to text.
@@ -101,19 +111,19 @@ class TranscriptionService:
     #         logger.error(f"Failed to parse document page: {str(e)}")
     #         raise
-    def parse_doc_page_with_workflow(self, document: ParsedDocPage) -> ParsedDocPage:
+    async def parse_doc_page_with_workflow(
+        self, document: ParsedDocPage, retries: int = 0
+    ) -> ParsedDocPage:
         """Transcribe an image to text using an agent.
         Args:
             document: The document with the image to transcribe
         Returns:
             Processed text
         """
-        transcription_workflow = TranscriptionWorkflow(
-            self.chat_model, self.transcription_additional_instructions
-        )
-        compiled_transcription_workflow = transcription_workflow.gen_workflow()
-        compiled_transcription_workflow = compiled_transcription_workflow.compile()
-        result = compiled_transcription_workflow.invoke(
+        if retries > 1:
+            logger.info("Max retries exceeded")
+            return document
+        result = await self.compiled_transcription_workflow.ainvoke(
             {
                 "messages": [
                     HumanMessage(
@@ -143,23 +153,44 @@ class TranscriptionService:
                 }
             },
         )
-        if result["transcription"]:
+        if "transcription" in result:
             document.page_text = result["transcription"]
         else:
-            raise ValueError("No transcription found")
+            return await self.parse_doc_page_with_workflow(
+                document, retries=retries + 1
+            )
         return document
-    def process_document(self, file_key: str) -> Tuple[List[ParsedDocPage], ParsedDoc]:
+    # def process_document(self, file_key: str) -> Tuple[List[ParsedDocPage], ParsedDoc]:
+    #     """
+    #     Process a document by parsing it and returning the parsed content.
+    #     """
+    #     raw_file_path = self.persistence_service.retrieve_raw_file(file_key)
+    #     parse_doc_model_service = ParseDocModelService(raw_file_path)
+    #     document_pages = parse_doc_model_service.parse_document_to_base64()
+    #     parsed_pages = []
+    #     for page in document_pages:
+    #         page = self.parse_doc_page_with_workflow(page)
+    #         parsed_pages.append(page)
+    #     logger.info(f"Parsed {len(parsed_pages)} pages")
+    #     parsed_document = parse_doc_model_service.create_md_content(parsed_pages)
+    #     return parsed_pages, parsed_document
+    async def process_document(
+        self, file_key: str
+    ) -> Tuple[List[ParsedDocPage], ParsedDoc]:
         """
         Process a document by parsing it and returning the parsed content.
         """
         raw_file_path = self.persistence_service.retrieve_raw_file(file_key)
         parse_doc_model_service = ParseDocModelService(raw_file_path)
         document_pages = parse_doc_model_service.parse_document_to_base64()
+        parse_pages_workflow_tasks = []
         parsed_pages = []
         for page in document_pages:
-            page = self.parse_doc_page_with_workflow(page)
-            parsed_pages.append(page)
+            parse_pages_workflow_tasks.append(self.parse_doc_page_with_workflow(page))
+        # here
+        parsed_pages = await asyncio.gather(*parse_pages_workflow_tasks)
         logger.info(f"Parsed {len(parsed_pages)} pages")
         parsed_document = parse_doc_model_service.create_md_content(parsed_pages)
         return parsed_pages, parsed_document

wizit_context_ingestor/data/kdb.py CHANGED Viewed

@@ -2,9 +2,12 @@ from enum import Enum
 from typing import Literal
-class KdbServices(Enum):
+class KdbServices(str, Enum):
     REDIS = "redis"
     CHROMA = "chroma"
+    PG = "pg"
-kdb_services = Literal[KdbServices.REDIS.value, KdbServices.CHROMA.value]
+kdb_services = Literal[
+    KdbServices.REDIS.value, KdbServices.CHROMA.value, KdbServices.PG.value
+]

wizit_context_ingestor/data/prompts.py CHANGED Viewed

@@ -227,7 +227,7 @@ Generate the optimized context following these specifications:
 WORKFLOW_CONTEXT_CHUNKS_IN_DOCUMENT_SYSTEM_PROMPT = """
 You are an expert RAG (Retrieval-Augmented Generation) context generator that creates optimized contextual chunks from markdown document content for enhanced search and retrieval performance.
-OBJECTIVE: Generate rich, searchable context descriptions that maximize retrieval accuracy and relevance in RAG systems.
+OBJECTIVE: Generate concise, searchable context descriptions that maximize retrieval accuracy and relevance in RAG systems.
 WORKFLOW:
 <task_analysis>
 1. LANGUAGE DETECTION: Identify the primary language used in the document content
@@ -243,10 +243,7 @@ Your generated context must synthesize ALL of these elements into a coherent des
 - chunk_keywords: Primary and secondary keywords, technical terms, and searchable phrases that would help users find this content
 - chunk_description: Clear explanation of what the chunk contains, including data types, concepts, and information presented
 - chunk_function: The chunk's specific purpose and role (e.g., definition, explanation, example, instruction, procedure, list, summary, analysis, conclusion)
-- chunk_structure: Format and organizational pattern (paragraph, bulleted list, numbered steps, table, code block, heading, etc.)
-- chunk_main_idea: The central concept, message, or takeaway that the chunk communicates
 - chunk_domain: Subject area or field of knowledge (e.g., technical documentation, legal text, medical information, business process)
-- chunk_audience: Intended reader level and background (e.g., beginner, expert, general audience, specific role)
 </context_elements>
 CRITICAL RULES:
 <critical_rules>
@@ -258,6 +255,7 @@ CRITICAL RULES:
 - Do NOT reproduce or quote the original chunk content verbatim
 - Ensure context is self-contained and understandable without the original chunk
 - Use natural language that flows well while incorporating all required elements
+- Do not generate extensive contexts, two sentences or less is required, ensure concise and succinct context.
 </critical_rules>
 SEARCH OPTIMIZATION GUIDELINES:

wizit_context_ingestor/data/storage.py CHANGED Viewed

@@ -2,7 +2,7 @@ from enum import Enum
 from typing import Literal
-class StorageServices(Enum):
+class StorageServices(str, Enum):
     S3 = "s3"
     LOCAL = "local"

wizit_context_ingestor/domain/services.py CHANGED Viewed

@@ -8,8 +8,9 @@ from ..domain.models import ParsedDocPage, ParsedDoc
 logger = logging.getLogger(__name__)
 # CHECK THIS THING IMPROVE THE WAY CODE IS STRUCTURED
-class ParseDocModelService():
+class ParseDocModelService:
     """
     Class for parsing PDF documents, converting pages to base64 images
     """
@@ -25,7 +26,6 @@ class ParseDocModelService():
         self.pdf_document = pymupdf.open(file_path)
         self.page_count = self.pdf_document.page_count
     def pdf_page_to_base64(self, page_number: int) -> ParsedDocPage:
         """
         Convert a PDF page to a base64-encoded PNG image.
@@ -48,10 +48,7 @@ class ParseDocModelService():
             img.save(buffer, format="PNG")
             b64_encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
             logger.info(f"Page {page_number} encoded successfully")
-            return ParsedDocPage(
-                page_number=page_number,
-                page_base64=b64_encoded_image
-            )
+            return ParsedDocPage(page_number=page_number, page_base64=b64_encoded_image)
         except Exception as e:
             logger.error(f"Failed to parse b64 image: {str(e)}")
             raise
@@ -87,12 +84,10 @@ class ParseDocModelService():
         Create a markdown content from a list of parsed pages.
         """
         md_content = ""
-        for page in parsed_pages:
+        sorted_pages = sorted(parsed_pages, key=lambda page: page.page_number)
+        for page in sorted_pages:
             md_content += f"## Page {page.page_number}\n\n"
             md_content += f"{page.page_text}\n\n"
-        return ParsedDoc(
-            pages=parsed_pages,
-            document_text=md_content
-        )
+        return ParsedDoc(pages=parsed_pages, document_text=md_content)
     # def

wizit_context_ingestor/infra/rag/chroma_embeddings.py CHANGED Viewed

@@ -46,26 +46,29 @@ class ChromaEmbeddingsManager(EmbeddingsManager):
             logger.error(f"Failed to initialize ChromaEmbeddingsManager: {str(e)}")
             raise
-    def configure_vector_store(
+    async def configure_vector_store(
         self,
         table_name: str = "",
         vector_size: int = 768,
         content_column: str = "document",
         id_column: str = "id",
+        metadata_json_column: str = "cmetadata",
+        pg_record_manager: str = "postgres/langchain_pg_collection",
     ):
         """Configure the vector store."""
         pass
-    def init_vector_store(
+    async def init_vector_store(
         self,
         table_name: str = "",
         content_column: str = "document",
+        metadata_json_column: str = "cmetadata",
         id_column: str = "id",
     ):
         """Initialize the vector store."""
         pass
-    def index_documents(self, documents: list[Document]):
+    async def index_documents(self, documents: list[Document]):
         """
         Add documents to the vector store with their embeddings.
@@ -85,7 +88,7 @@ class ChromaEmbeddingsManager(EmbeddingsManager):
         """
         try:
             logger.info(f"Indexing {len(documents)} documents in vector store")
-            self.chroma.add_documents(documents)
+            await self.chroma.aadd_documents(documents)
         except Exception as e:
             logger.error(f"Error indexing documents: {str(e)}")
             raise
@@ -110,12 +113,14 @@ class ChromaEmbeddingsManager(EmbeddingsManager):
             logger.error(f"Error deleting documents by ID: {str(e)}")
             raise
-    def delete_documents_by_metadata_key(self, metadata_key: str, metadata_value: str):
+    async def delete_documents_by_metadata_key(
+        self, metadata_key: str, metadata_value: str
+    ):
         """
         Delete documents by filter from the vector store.
         """
         try:
-            self.chroma.delete(where={metadata_key: metadata_value})
+            await self.chroma.adelete(where={metadata_key: metadata_value})
         except Exception as error:
             logger.error(
                 f"Error deleting documents by filter: {str(filter)}, error: {error} "

wizit-context-ingestor 0.3.0b2__py3-none-any.whl → 0.4.0__py3-none-any.whl

wizit-context-ingestor 0.3.0b2py3-none-any.whl → 0.4.0py3-none-any.whl