PyPI - wizit-context-ingestor - Versions diffs - 0.2.5b3__py3-none-any.whl → 0.3.0b2__py3-none-any.whl - Mend

wizit-context-ingestor 0.2.5b3py3-none-any.whl → 0.3.0b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

wizit_context_ingestor/__init__.py +2 -2
wizit_context_ingestor/application/context_chunk_service.py +149 -35
wizit_context_ingestor/application/transcription_service.py +132 -52
wizit_context_ingestor/data/kdb.py +10 -0
wizit_context_ingestor/data/prompts.py +150 -3
wizit_context_ingestor/data/storage.py +10 -0
wizit_context_ingestor/infra/persistence/local_storage.py +19 -9
wizit_context_ingestor/infra/persistence/s3_storage.py +29 -23
wizit_context_ingestor/infra/rag/chroma_embeddings.py +30 -31
wizit_context_ingestor/infra/rag/pg_embeddings.py +57 -54
wizit_context_ingestor/infra/rag/redis_embeddings.py +34 -25
wizit_context_ingestor/infra/rag/semantic_chunks.py +9 -1
wizit_context_ingestor/infra/vertex_model.py +56 -28
wizit_context_ingestor/main.py +192 -106
wizit_context_ingestor/utils/file_utils.py +13 -0
wizit_context_ingestor/workflows/context_nodes.py +73 -0
wizit_context_ingestor/workflows/context_state.py +10 -0
wizit_context_ingestor/workflows/context_tools.py +58 -0
wizit_context_ingestor/workflows/context_workflow.py +42 -0
wizit_context_ingestor/workflows/transcription_nodes.py +136 -0
wizit_context_ingestor/workflows/transcription_schemas.py +25 -0
wizit_context_ingestor/workflows/transcription_state.py +17 -0
wizit_context_ingestor/workflows/transcription_tools.py +54 -0
wizit_context_ingestor/workflows/transcription_workflow.py +42 -0
{wizit_context_ingestor-0.2.5b3.dist-info → wizit_context_ingestor-0.3.0b2.dist-info}/METADATA +9 -1
wizit_context_ingestor-0.3.0b2.dist-info/RECORD +44 -0
{wizit_context_ingestor-0.2.5b3.dist-info → wizit_context_ingestor-0.3.0b2.dist-info}/WHEEL +1 -1
wizit_context_ingestor-0.2.5b3.dist-info/RECORD +0 -32

wizit_context_ingestor/infra/vertex_model.py CHANGED Viewed

@@ -15,14 +15,23 @@ class VertexModels(AiApplicationService):
     A wrapper class for Google Cloud Vertex AI models that handles credentials and
     provides methods to load embeddings and chat models.
     """
-    __slots__ = ('project_id', 'location', 'json_service_account', 'scopes', 'llm_model_id')
+    __slots__ = (
+        "project_id",
+        "location",
+        "json_service_account",
+        "scopes",
+        "llm_model_id",
+    )
     def __init__(
-            self,
-            project_id: str,
-            location: str,
-            json_service_account: Dict[str, Any],
-            scopes: Optional[List[str]] = None,
-            llm_model_id: str = "claude-3-5-haiku@20241022"):
+        self,
+        project_id: str,
+        location: str,
+        json_service_account: Dict[str, Any],
+        scopes: Optional[List[str]] = None,
+        llm_model_id: str = "claude-sonnet-4@20250514",
+    ):
         """
         Initialize the VertexModels class with Google Cloud credentials.
@@ -36,25 +45,24 @@ class VertexModels(AiApplicationService):
             print(location)
             self.scopes = scopes or ["https://www.googleapis.com/auth/cloud-platform"]
             self.credentials = service_account.Credentials.from_service_account_info(
-                json_service_account,
-                scopes=self.scopes
+                json_service_account, scopes=self.scopes
             )
             self.llm_model_id = llm_model_id
             self.project_id = project_id
             self.location = location
             vertexai_init(
-                project=project_id,
-                location=location,
-                credentials=self.credentials
+                project=project_id, location=location, credentials=self.credentials
+            )
+            logger.info(
+                f"VertexModels initialized with project {project_id} in {location}"
             )
-            logger.info(f"VertexModels initialized with project {project_id} in {location}")
         except Exception as e:
             logger.error(f"Failed to initialize VertexModels: {str(e)}")
             raise
     def load_embeddings_model(
-        self,
-        embeddings_model_id: str = "text-multilingual-embedding-002") -> VertexAIEmbeddings:  # noqa: E125
+        self, embeddings_model_id: str = "text-multilingual-embedding-002"
+    ) -> VertexAIEmbeddings:  # noqa: E125
         """
         Load and return a Vertex AI embeddings model.
         default embeddings length is 768 https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings
@@ -73,14 +81,18 @@ class VertexModels(AiApplicationService):
             logger.debug(f"Loaded embedding model: {embeddings_model_id}")
             return embeddings
         except Exception as e:
-            logger.error(f"Failed to load embeddings model {embeddings_model_id}: {str(e)}")
+            logger.error(
+                f"Failed to load embeddings model {embeddings_model_id}: {str(e)}"
+            )
             raise
-    def load_chat_model(self,
+    def load_chat_model(
+        self,
         temperature: float = 0.15,
         max_tokens: int = 8192,
         stop: Optional[List[str]] = None,
-        **chat_model_params) -> Union[ChatVertexAI, ChatAnthropicVertex]:
+        **chat_model_params,
+    ) -> Union[ChatVertexAI, ChatAnthropicVertex]:
         """
         Load a Vertex AI chat model for text generation.
@@ -98,21 +110,35 @@ class VertexModels(AiApplicationService):
         """
         try:
             if "gemini" in self.llm_model_id:
-                return self.load_chat_model_gemini(self.llm_model_id, temperature, max_tokens, stop, **chat_model_params)
+                return self.load_chat_model_gemini(
+                    self.llm_model_id,
+                    temperature,
+                    max_tokens,
+                    stop,
+                    **chat_model_params,
+                )
             elif "claude" in self.llm_model_id:
-                return self.load_chat_model_anthropic(self.llm_model_id, temperature, max_tokens, stop, **chat_model_params)
+                return self.load_chat_model_anthropic(
+                    self.llm_model_id,
+                    temperature,
+                    max_tokens,
+                    stop,
+                    **chat_model_params,
+                )
             else:
                 raise ValueError(f"Unsupported chat model: {self.llm_model_id}")
         except Exception as e:
             logger.error(f"Failed to retrieve chat model {self.llm_model_id}: {str(e)}")
             raise
-    def load_chat_model_gemini(self,
+    def load_chat_model_gemini(
+        self,
         chat_model_id: str = "publishers/google/models/gemini-2.5-flash",
         temperature: float = 0.15,
-        max_tokens: int = 8192,
+        max_tokens: int = 64000,
         stop: Optional[List[str]] = None,
-        **chat_model_params) -> ChatVertexAI:
+        **chat_model_params,
+    ) -> ChatVertexAI:
         """
         Load a Vertex AI chat model for text generation.
@@ -137,7 +163,7 @@ class VertexModels(AiApplicationService):
                 max_tokens=max_tokens,
                 max_retries=1,
                 stop=stop,
-                **chat_model_params
+                **chat_model_params,
             )
             logger.debug(f"Retrieved chat model: {chat_model_id}")
             return self.llm_model
@@ -145,12 +171,14 @@ class VertexModels(AiApplicationService):
             logger.error(f"Failed to retrieve chat model {chat_model_id}: {str(e)}")
             raise
-    def load_chat_model_anthropic(self,
+    def load_chat_model_anthropic(
+        self,
         chat_model_id: str = "claude-3-5-haiku@20241022",
         temperature: float = 0.7,
-        max_tokens: int = 8000,
+        max_tokens: int = 64000,
         stop: Optional[List[str]] = None,
-        **chat_model_params) -> ChatAnthropicVertex:
+        **chat_model_params,
+    ) -> ChatAnthropicVertex:
         """
         Load a Vertex AI chat model for text generation.
         """
@@ -163,7 +191,7 @@ class VertexModels(AiApplicationService):
                 max_tokens=max_tokens,
                 max_retries=1,
                 stop=stop,
-                **chat_model_params
+                **chat_model_params,
             )
             logger.debug(f"Retrieved chat model: {chat_model_id}")
             return self.llm_model

wizit_context_ingestor/main.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+from typing import Dict, Any, Literal
 from .infra.vertex_model import VertexModels
 from .application.transcription_service import TranscriptionService
 from .application.context_chunk_service import ContextChunksInDocumentService
@@ -6,17 +7,79 @@ from .infra.persistence.s3_storage import S3StorageService
 from .infra.persistence.local_storage import LocalStorageService
 from .infra.rag.semantic_chunks import SemanticChunks
 from .infra.rag.redis_embeddings import RedisEmbeddingsManager
+from .infra.rag.chroma_embeddings import ChromaEmbeddingsManager
 from .infra.secrets.aws_secrets_manager import AwsSecretsManager
+from .data.storage import storage_services, StorageServices
+from .data.kdb import kdb_services, KdbServices
+from .utils.file_utils import has_invalid_file_name_format
+from langsmith import Client, tracing_context
-class DeelabTranscribeManager:
-    def __init__(self,
+class KdbManager:
+    def __init__(
+        self, embeddings_model, kdb_service: kdb_services, kdb_params: Dict[Any, Any]
+    ):
+        self.kdb_service = kdb_service
+        self.kdb_params = kdb_params
+        self.embeddings_model = embeddings_model
+    def retrieve_kdb_service(self):
+        if self.kdb_service == KdbServices.REDIS.value:
+            return RedisEmbeddingsManager(
+                self.embeddings_model,
+                **self.kdb_params,
+            )
+        elif self.kdb_service == KdbServices.CHROMA.value:
+            return ChromaEmbeddingsManager(
+                self.embeddings_model,
+                **self.kdb_params,
+            )
+        else:
+            raise ValueError(f"Unsupported kdb provider: {self.kdb_service}")
+class PersistenceManager:
+    def __init__(
+        self,
+        storage_service: storage_services,
+        source_storage_route,
+        target_storage_route,
+    ):
+        self.storage_service = storage_service
+        self.source_storage_route = source_storage_route
+        self.target_storage_route = target_storage_route
+    def retrieve_storage_service(self):
+        if self.storage_service == StorageServices.S3.value:
+            return S3StorageService(
+                origin_bucket_name=self.source_storage_route,
+                target_bucket_name=self.target_storage_route,
+            )
+        elif self.storage_service == StorageServices.LOCAL.value:
+            return LocalStorageService(
+                source_storage_route=self.source_storage_route,
+                target_storage_route=self.target_storage_route,
+            )
+        else:
+            raise ValueError(f"Unsupported storage service: {self.storage_service}")
+class TranscriptionManager:
+    def __init__(
+        self,
         gcp_project_id: str,
         gcp_project_location: str,
         gcp_secret_name: str,
+        langsmith_api_key: str,
+        langsmith_project_name: str,
+        storage_service: storage_services,
+        source_storage_route: str,
+        target_storage_route: str,
         llm_model_id: str = "claude-sonnet-4@20250514",
-        target_language: str = 'es',
-        transcription_additional_instructions: str = ''
+        target_language: str = "es",
+        transcription_additional_instructions: str = "",
+        transcription_accuracy_threshold: int = 90,
+        max_transcription_retries: int = 2,
     ):
         self.gcp_project_id = gcp_project_id
         self.gcp_project_location = gcp_project_location
@@ -24,9 +87,19 @@ class DeelabTranscribeManager:
         self.gcp_secret_name = gcp_secret_name
         self.llm_model_id = llm_model_id
         self.target_language = target_language
-        self.transcription_additional_instructions = transcription_additional_instructions
+        self.storage_service = storage_service
+        self.source_storage_route = source_storage_route
+        self.target_storage_route = target_storage_route
+        self.transcription_additional_instructions = (
+            transcription_additional_instructions
+        )
+        self.transcription_accuracy_threshold = transcription_accuracy_threshold
+        self.max_transcription_retries = max_transcription_retries
         self.gcp_sa_dict = self._get_gcp_sa_dict(gcp_secret_name)
         self.vertex_model = self._get_vertex_model()
+        self.langsmith_api_key = langsmith_api_key
+        self.langsmith_project_name = langsmith_project_name
+        self.langsmith_client = Client(api_key=self.langsmith_api_key)
     def _get_gcp_sa_dict(self, gcp_secret_name: str):
         vertex_gcp_sa = self.aws_secrets_manager.get_secret(gcp_secret_name)
@@ -38,51 +111,92 @@ class DeelabTranscribeManager:
             self.gcp_project_id,
             self.gcp_project_location,
             self.gcp_sa_dict,
-            llm_model_id=self.llm_model_id
+            llm_model_id=self.llm_model_id,
         )
         return vertex_model
-    def aws_cloud_transcribe_document(
-            self,
-            file_key: str,
-            s3_origin_bucket_name: str,
-            s3_target_bucket_name: str
-    ):
+    def tracing(func):
+        def gen_tracing_context(self, *args, **kwargs):
+            with tracing_context(
+                enabled=True,
+                project_name=self.langsmith_project_name,
+                client=self.langsmith_client,
+            ):
+                return func(self, *args, **kwargs)
+        return gen_tracing_context
+    @tracing
+    def transcribe_document(self, file_key: str):
+        """Transcribe a document from source storage to target storage.
+        This method serves as a generic interface for transcribing documents from
+        various storage sources to target destinations. The specific implementation
+        depends on the storage route types provided.
+        Args:
+            file_key (str): The unique identifier or path of the file to be transcribed.
+        Returns:
+            The result of the transcription process, typically the path or identifier
+            of the transcribed document.
+        Raises:
+            Exception: If an error occurs during the transcription process.
+        """
         try:
-            s3_persistence_service = S3StorageService(
-                origin_bucket_name=s3_origin_bucket_name,
-                target_bucket_name=s3_target_bucket_name
+            if has_invalid_file_name_format(file_key):
+                raise ValueError(
+                    "Invalid file name format, do not provide special characters or spaces (instead use underscores or hyphens)"
+                )
+            persistence_layer = PersistenceManager(
+                self.storage_service,
+                self.source_storage_route,
+                self.target_storage_route,
             )
+            persistence_service = persistence_layer.retrieve_storage_service()
             transcribe_document_service = TranscriptionService(
                 ai_application_service=self.vertex_model,
-                persistence_service=s3_persistence_service,
+                persistence_service=persistence_service,
                 target_language=self.target_language,
-                transcription_additional_instructions=self.transcription_additional_instructions
+                transcription_additional_instructions=self.transcription_additional_instructions,
+                transcription_accuracy_threshold=self.transcription_accuracy_threshold,
+                max_transcription_retries=self.max_transcription_retries,
+            )
+            parsed_pages, parsed_document = (
+                transcribe_document_service.process_document(file_key)
+            )
+            source_storage_file_tags = {}
+            if persistence_service.supports_tagging:
+                # source_storage_file_tags.tag_file(file_key, {"status": "transcribed"})
+                source_storage_file_tags = persistence_service.retrieve_file_tags(
+                    file_key, self.source_storage_route
+                )
+            transcribe_document_service.save_parsed_document(
+                f"{file_key}.md", parsed_document, source_storage_file_tags
             )
-            parsed_pages, parsed_document = transcribe_document_service.process_document(file_key)
-            origin_bucket_file_tags = s3_persistence_service.retrieve_file_tags(file_key, s3_origin_bucket_name)
-            transcribe_document_service.save_parsed_document(f"{file_key}.md", parsed_document, origin_bucket_file_tags)
             # create md document from parsed_pages
             print("parsed_pages", len(parsed_pages))
             # print("parsed_document", parsed_document)
             return f"{file_key}.md"
         except Exception as e:
-            print(f"Error transcribing document: {e}")
+            print(f"Error processing document: {e}")
             raise e
-class DeelabRedisChunksManager:
+class ChunksManager:
     def __init__(
-            self,
-            gcp_project_id: str,
-            gcp_project_location: str,
-            gcp_secret_name: str,
-            redis_connection_string: str,
-            llm_model_id: str = "claude-3-5-haiku@20241022",
-            embeddings_model_id: str = "text-multilingual-embedding-002",
-            target_language: str = "es"
+        self,
+        gcp_project_id: str,
+        gcp_project_location: str,
+        gcp_secret_name: str,
+        langsmith_api_key: str,
+        langsmith_project_name: str,
+        storage_service: storage_services,
+        kdb_service: Literal["redis", "chroma"],
+        kdb_params: Dict[Any, Any],
+        llm_model_id: str = "claude-3-5-haiku@20241022",
+        embeddings_model_id: str = "text-multilingual-embedding-002",
+        target_language: str = "es",
     ):
         self.gcp_project_id = gcp_project_id
         self.gcp_project_location = gcp_project_location
@@ -91,9 +205,16 @@ class DeelabRedisChunksManager:
         self.llm_model_id = llm_model_id
         self.target_language = target_language
         self.gcp_sa_dict = self._get_gcp_sa_dict(gcp_secret_name)
-        self.redis_connection_string = redis_connection_string
+        self.storage_service = storage_service
+        self.kdb_params = kdb_params
+        self.kdb_service = kdb_service
         self.vertex_model = self._get_vertex_model()
-        self.embeddings_model = self.vertex_model.load_embeddings_model(embeddings_model_id)
+        self.embeddings_model = self.vertex_model.load_embeddings_model(
+            embeddings_model_id
+        )
+        self.langsmith_api_key = langsmith_api_key
+        self.langsmith_project_name = langsmith_project_name
+        self.langsmith_client = Client(api_key=self.langsmith_api_key)
     def _get_gcp_sa_dict(self, gcp_secret_name: str):
         vertex_gcp_sa = self.aws_secrets_manager.get_secret(gcp_secret_name)
@@ -105,92 +226,57 @@ class DeelabRedisChunksManager:
             self.gcp_project_id,
             self.gcp_project_location,
             self.gcp_sa_dict,
-            llm_model_id=self.llm_model_id
+            llm_model_id=self.llm_model_id,
         )
         return vertex_model
-    def context_chunks_in_document(
-        self,
-        file_key: str
-    ):
-        try:
-            rag_chunker = SemanticChunks(self.embeddings_model)
-            redis_embeddings_manager = RedisEmbeddingsManager(
-                self.embeddings_model,
-                self.redis_connection_string,
-                {
-                    "file_key": file_key
-                }
-            )
-            local_persistence_service = LocalStorageService()
-            context_chunks_in_document_service = ContextChunksInDocumentService(
-                ai_application_service=self.vertex_model,
-                persistence_service=local_persistence_service,
-                rag_chunker=rag_chunker,
-                embeddings_manager=redis_embeddings_manager,
-                target_language=self.target_language
-            )
-            context_chunks = context_chunks_in_document_service.get_context_chunks_in_document(file_key)
-            print("context_chunks", context_chunks)
-            return context_chunks
-        except Exception as e:
-            print(f"Error getting context chunks in document: {e}")
-            raise e
+    def tracing(func):
+        def gen_tacing_context(self, *args, **kwargs):
+            with tracing_context(
+                enabled=True,
+                project_name=self.langsmith_project_name,
+                client=self.langsmith_client,
+            ):
+                return func(self, *args, **kwargs)
+        return gen_tacing_context
-    # TODO
-    def context_chunks_in_document_from_aws_cloud(
-            self,
-            file_key: str,
-            s3_origin_bucket_name: str,
-            s3_target_bucket_name: str
-        ):
+    @tracing
+    def gen_context_chunks(
+        self, file_key: str, source_storage_route: str, target_storage_route: str
+    ):
         try:
-            s3_persistence_service = S3StorageService(
-                origin_bucket_name=s3_origin_bucket_name,
-                target_bucket_name=s3_target_bucket_name
+            if has_invalid_file_name_format(file_key):
+                raise ValueError(
+                    "Invalid file name format, do not provide special characters or spaces (instead use underscores or hyphens)"
+                )
+            persistence_layer = PersistenceManager(
+                self.storage_service, source_storage_route, target_storage_route
             )
-            target_bucket_file_tags = s3_persistence_service.retrieve_file_tags(file_key, s3_target_bucket_name)
+            persistence_service = persistence_layer.retrieve_storage_service()
+            target_bucket_file_tags = []
+            if persistence_service.supports_tagging:
+                target_bucket_file_tags = persistence_service.retrieve_file_tags(
+                    file_key, target_storage_route
+                )
             rag_chunker = SemanticChunks(self.embeddings_model)
-            redis_embeddings_manager = RedisEmbeddingsManager(
-                embeddings_model=self.embeddings_model,
-                redis_conn_string=self.redis_connection_string,
-                metadata_tags=target_bucket_file_tags
+            kdb_manager = KdbManager(
+                self.embeddings_model, self.kdb_service, self.kdb_params
             )
+            kdb_service = kdb_manager.retrieve_kdb_service()
             context_chunks_in_document_service = ContextChunksInDocumentService(
                 ai_application_service=self.vertex_model,
-                persistence_service=s3_persistence_service,
+                persistence_service=persistence_service,
                 rag_chunker=rag_chunker,
-                embeddings_manager=redis_embeddings_manager,
-                target_language=self.target_language
+                embeddings_manager=kdb_service,
+                target_language=self.target_language,
+            )
+            context_chunks = (
+                context_chunks_in_document_service.get_context_chunks_in_document(
+                    file_key, target_bucket_file_tags
+                )
             )
-            context_chunks = context_chunks_in_document_service.get_context_chunks_in_document(file_key, target_bucket_file_tags)
             return context_chunks
         except Exception as e:
             print(f"Error getting context chunks in document: {e}")
             raise e
-    def delete_document_context_chunks_from_aws_cloud(
-            self,
-            file_key: str,
-            s3_origin_bucket_name: str,
-            s3_target_bucket_name: str
-        ):
-        pass
-        # rag_chunker = SemanticChunks(self.embeddings_model)
-        # pg_embeddings_manager = PgEmbeddingsManager(
-        #     embeddings_model=self.embeddings_model,
-        #     pg_connection=self.vector_store_connection
-        # )
-        # s3_persistence_service = S3StorageService(
-        #     origin_bucket_name=s3_origin_bucket_name,
-        #     target_bucket_name=s3_target_bucket_name
-        # )
-        # context_chunks_in_document_service = ContextChunksInDocumentService(
-        #     ai_application_service=self.vertex_model,
-        #     persistence_service=s3_persistence_service,
-        #     rag_chunker=rag_chunker,
-        #     embeddings_manager=pg_embeddings_manager
-        # )
-        # context_chunks_in_document_service.delete_document_context_chunks(file_key)

wizit_context_ingestor/utils/file_utils.py ADDED Viewed

@@ -0,0 +1,13 @@
+import re
+def has_invalid_file_name_format(file_name):
+    """Check if file name has special characters or spaces instead of underscores"""
+    # Check for spaces
+    if " " in file_name:
+        return True
+    # Check for special characters (anything that's not alphanumeric, underscore, dash, or dot)
+    if re.search(r"[^a-zA-Z0-9_.-]", file_name):
+        return True
+    return False

wizit_context_ingestor/workflows/context_nodes.py ADDED Viewed

@@ -0,0 +1,73 @@
+from ..data.prompts import WORKFLOW_CONTEXT_CHUNKS_IN_DOCUMENT_SYSTEM_PROMPT
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.prompts import MessagesPlaceholder
+from langchain_core.messages import SystemMessage, ToolMessage
+from langgraph.graph import END
+from langgraph.pregel.main import Command
+from .context_state import ContextState
+class ContextNodes:
+    def __init__(self, llm_model, tools, context_additional_instructions):
+        self.llm_model = llm_model
+        self.tools = tools
+        self.tools_by_name = {tool.name: tool for tool in tools}
+        self.context_additional_instructions = context_additional_instructions
+    def gen_context(self, state: ContextState, config):
+        try:
+            messages = state["messages"]
+            document_content = state["document_content"]
+            if not messages:
+                raise ValueError("No messages provided")
+            # parser = PydanticOutputParser(pydantic_object=Transcription)
+            # format_instructions=parser.get_format_instructions(),
+            formatted_context_system_prompt = WORKFLOW_CONTEXT_CHUNKS_IN_DOCUMENT_SYSTEM_PROMPT.format(
+                context_additional_instructions=self.context_additional_instructions,
+                document_content=document_content,
+            )
+            prompt = ChatPromptTemplate.from_messages(
+                [
+                    SystemMessage(content=formatted_context_system_prompt),
+                    MessagesPlaceholder("messages"),
+                ]
+            )
+            model_with_structured_output = self.llm_model.bind_tools(self.tools)
+            context_chain = prompt | model_with_structured_output
+            context_result = context_chain.invoke({"messages": messages})
+            return {"messages": [context_result]}
+        except Exception as e:
+            print(f"Error occurred: {e}")
+            raise e
+    def return_context(self, state: ContextState, config):
+        latest_message = state["messages"][-1]
+        if type(latest_message) is ToolMessage:
+            return Command(goto=END, update={"context": latest_message.content})
+        else:
+            raise ValueError("Invalid message type to return context")
+    def tool_node(self, state: ContextState, config):
+        messages = state["messages"]
+        tool_calls = messages[-1].tool_calls
+        should_end_workflow = False
+        observations = []
+        for tool_call in tool_calls:
+            tool_name = tool_call["name"]
+            tool = self.tools_by_name[tool_name]
+            tool_result = tool.invoke(tool_call["args"])
+            observations.append(
+                ToolMessage(
+                    content=tool_result,
+                    name=tool_call["name"],
+                    tool_call_id=tool_call["id"],
+                )
+            )
+            if tool_call["name"] == "complete_context_gen":
+                should_end_workflow = True
+        if should_end_workflow:
+            return Command(goto="return_context", update={"messages": observations})
+        else:
+            return Command(goto="gen_context", update={"messages": observations})

wizit_context_ingestor/workflows/context_state.py ADDED Viewed

@@ -0,0 +1,10 @@
+from typing_extensions import Annotated, TypedDict, Sequence
+from langchain_core.messages import BaseMessage
+from langgraph.graph.message import add_messages
+class ContextState(TypedDict):
+    messages: Annotated[Sequence[BaseMessage], add_messages]
+    document_content: str
+    context: str
+    context_relevance: float

wizit-context-ingestor 0.2.5b3__py3-none-any.whl → 0.3.0b2__py3-none-any.whl

wizit-context-ingestor 0.2.5b3py3-none-any.whl → 0.3.0b2py3-none-any.whl