PyPI - wizit-context-ingestor - Versions diffs - 0.2.3b0__tar.gz → 0.2.4__tar.gz - Mend

wizit-context-ingestor 0.2.3b0tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wizit-context-ingestor might be problematic. Click here for more details.

Files changed (31) hide show

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: wizit-context-ingestor
-Version: 0.2.3b0
+Version: 0.2.4
 Summary: Contextual Rag with Cloud Solutions
 Requires-Dist: anthropic[vertex]>=0.66.0
 Requires-Dist: boto3>=1.40.23
@@ -79,6 +79,14 @@ from main import context_chunks_in_document
 # Get semantic chunks from a document
 context_chunks_in_document("your-document.pdf")
 ```
+## Running Memory Profiler
+To run the memory profiler, use the following command:
+```bash
+python -m memray run test_redis.py
+```
 ## Project Structure

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/README.md RENAMED Viewed

@@ -64,6 +64,14 @@ from main import context_chunks_in_document
 # Get semantic chunks from a document
 context_chunks_in_document("your-document.pdf")
 ```
+## Running Memory Profiler
+To run the memory profiler, use the following command:
+```bash
+python -m memray run test_redis.py
+```
 ## Project Structure

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "wizit_context_ingestor"
-version = "0.2.3-beta"
+version = "0.2.4"
 description = "Contextual Rag with Cloud Solutions"
 readme = "README.md"
 requires-python = ">=3.12"
@@ -17,6 +17,8 @@ dependencies = [
 [dependency-groups]
 dev = [
+    "memray>=1.18.0",
+    "pyinstrument>=5.1.1",
     "python-dotenv>=1.1.1",
 ]

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/application/transcription_service.py RENAMED Viewed

@@ -56,6 +56,12 @@ class TranscriptionService:
                 # Create the chain
                 chain = prompt | model_with_structured_output
                 # Process the image
+                chain = chain.with_retry(
+                    stop_after_attempt=3,
+                    exponential_jitter_params={
+                        "initial": 60
+                    }
+                )
                 result = chain.invoke({})
                 if result.transcription:
                     document.page_text = result.transcription

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/aws_model.py RENAMED Viewed

@@ -12,7 +12,7 @@ class AWSModels(AiApplicationService):
     A wrapper class for Google Cloud Vertex AI models that handles credentials and
     provides methods to load embeddings and chat models.
     """
+    __slots__ = ('llm_model_id')
     def __init__(
         self,
         llm_model_id: str = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/persistence/s3_storage.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from ...application.interfaces import PersistenceService
 from ...domain.models import ParsedDoc
-import boto3
+from boto3 import client as boto3_client
 import logging
 import os
 from botocore.exceptions import ClientError
@@ -10,9 +10,9 @@ logger = logging.getLogger(__name__)
 class S3StorageService(PersistenceService):
     """Persistence service for S3 storage."""
+    __slots__ = ('origin_bucket_name', 'target_bucket_name', 'region_name')
     def __init__(self, origin_bucket_name: str, target_bucket_name: str, region_name: str = 'us-east-1'):
-        self.s3 = boto3.client('s3', region_name=region_name)
+        self.s3 = boto3_client('s3', region_name=region_name)
         self.origin_bucket_name = origin_bucket_name
         self.target_bucket_name = target_bucket_name

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/rag/pg_embeddings.py RENAMED Viewed

@@ -38,7 +38,7 @@ class PgEmbeddingsManager(EmbeddingsManager):
       ... )
       >>> documents = [Document(page_content="Sample text", metadata={"source": "example"})]
     """
+    __slots__ = ("embeddings_model", "pg_connection")
     def __init__(self, embeddings_model, pg_connection: str):
         """
           Initialize the PgEmbeddingsManager.

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/rag/redis_embeddings.py RENAMED Viewed

@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__)
 class RedisEmbeddingsManager(EmbeddingsManager):
+    __slots__ = ("embeddings_model", "redis_conn_string", "metadata_tags")
     def __init__(self, embeddings_model, redis_conn_string: str, metadata_tags: dict):
         """
         Initialize the RedisEmbeddingsManager.

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/rag/semantic_chunks.py RENAMED Viewed

@@ -16,7 +16,7 @@ class SemanticChunks(RagChunker):
     Class for semantically chunking documents into smaller pieces based on semantic similarity.
     Uses LangChain's SemanticChunker to create semantically coherent document chunks.
     """
+    __slots__ = ("embeddings_model",)
     def __init__(self, embeddings_model: Any):
         """
         Initialize a document chunker with an embeddings model.

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/secrets/aws_secrets_manager.py RENAMED Viewed

@@ -1,4 +1,4 @@
-import boto3
+from boto3 import client as boto3_client
 import logging
 logger = logging.getLogger(__name__)
@@ -6,7 +6,7 @@ logger = logging.getLogger(__name__)
 class AwsSecretsManager:
     def __init__(self):
-        self.client = boto3.client('secretsmanager')
+        self.client = boto3_client('secretsmanager')
     def get_secret(self, secret_name):
@@ -30,4 +30,4 @@ class AwsSecretsManager:
             return msg
         except Exception as e:
             logger.error(f"An unknown error occurred: {str(e)}.")
-            raise
+            raise

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/vertex_model.py RENAMED Viewed

@@ -1,4 +1,4 @@
-import vertexai
+from vertexai import init as vertexai_init
 from google.oauth2 import service_account
 from langchain_google_vertexai import VertexAIEmbeddings, ChatVertexAI
 from langchain_google_vertexai.model_garden import ChatAnthropicVertex
@@ -15,7 +15,7 @@ class VertexModels(AiApplicationService):
     A wrapper class for Google Cloud Vertex AI models that handles credentials and
     provides methods to load embeddings and chat models.
     """
+    __slots__ = ('project_id', 'location', 'json_service_account', 'scopes', 'llm_model_id')
     def __init__(
             self,
             project_id: str,
@@ -42,7 +42,7 @@ class VertexModels(AiApplicationService):
             self.llm_model_id = llm_model_id
             self.project_id = project_id
             self.location = location
-            vertexai.init(
+            vertexai_init(
                 project=project_id,
                 location=location,
                 credentials=self.credentials
@@ -54,7 +54,7 @@ class VertexModels(AiApplicationService):
     def load_embeddings_model(
         self,
-        embeddings_model_id: str = "text-embedding-005") -> VertexAIEmbeddings:  # noqa: E125
+        embeddings_model_id: str = "text-multilingual-embedding-002") -> VertexAIEmbeddings:  # noqa: E125
         """
         Load and return a Vertex AI embeddings model.
         default embeddings length is 768 https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings

{wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/main.py RENAMED Viewed

@@ -78,6 +78,7 @@ class DeelabRedisChunksManager:
             gcp_secret_name: str,
             redis_connection_string: str,
             llm_model_id: str = "claude-3-5-haiku@20241022",
+            embeddings_model_id: str = "text-multilingual-embedding-002",
             target_language: str = "es"
     ):
         self.gcp_project_id = gcp_project_id
@@ -89,7 +90,7 @@ class DeelabRedisChunksManager:
         self.gcp_sa_dict = self._get_gcp_sa_dict(gcp_secret_name)
         self.redis_connection_string = redis_connection_string
         self.vertex_model = self._get_vertex_model()
-        self.embeddings_model = self.vertex_model.load_embeddings_model()
+        self.embeddings_model = self.vertex_model.load_embeddings_model(embeddings_model_id)
     def _get_gcp_sa_dict(self, gcp_secret_name: str):
         vertex_gcp_sa = self.aws_secrets_manager.get_secret(gcp_secret_name)