wizit-context-ingestor 0.2.3b0__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wizit-context-ingestor might be problematic. Click here for more details.

Files changed (31) hide show
  1. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/PKG-INFO +9 -1
  2. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/README.md +8 -0
  3. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/pyproject.toml +3 -1
  4. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/application/transcription_service.py +6 -0
  5. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/aws_model.py +1 -1
  6. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/persistence/s3_storage.py +3 -3
  7. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/rag/pg_embeddings.py +1 -1
  8. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/rag/redis_embeddings.py +1 -0
  9. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/rag/semantic_chunks.py +1 -1
  10. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/secrets/aws_secrets_manager.py +3 -3
  11. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/vertex_model.py +4 -4
  12. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/main.py +2 -1
  13. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/.DS_Store +0 -0
  14. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/__init__.py +0 -0
  15. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/application/__init__.py +0 -0
  16. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/application/context_chunk_service.py +0 -0
  17. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/application/interfaces.py +0 -0
  18. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/data/__init__.py +0 -0
  19. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/data/prompts.py +0 -0
  20. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/domain/__init__.py +0 -0
  21. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/domain/models.py +0 -0
  22. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/domain/services.py +0 -0
  23. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/__init__.py +0 -0
  24. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/persistence/__init__.py +0 -0
  25. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/persistence/local_storage.py +0 -0
  26. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/infra/secrets/__init__.py +0 -0
  27. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/services/.DS_Store +0 -0
  28. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/services/__init__.py +0 -0
  29. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/services/chunks.py +0 -0
  30. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/services/parse_doc.py +0 -0
  31. {wizit_context_ingestor-0.2.3b0 → wizit_context_ingestor-0.2.4}/src/wizit_context_ingestor/services/pg_embeddings_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wizit-context-ingestor
3
- Version: 0.2.3b0
3
+ Version: 0.2.4
4
4
  Summary: Contextual Rag with Cloud Solutions
5
5
  Requires-Dist: anthropic[vertex]>=0.66.0
6
6
  Requires-Dist: boto3>=1.40.23
@@ -79,6 +79,14 @@ from main import context_chunks_in_document
79
79
  # Get semantic chunks from a document
80
80
  context_chunks_in_document("your-document.pdf")
81
81
  ```
82
+ ## Running Memory Profiler
83
+
84
+ To run the memory profiler, use the following command:
85
+
86
+ ```bash
87
+ python -m memray run test_redis.py
88
+ ```
89
+
82
90
 
83
91
  ## Project Structure
84
92
 
@@ -64,6 +64,14 @@ from main import context_chunks_in_document
64
64
  # Get semantic chunks from a document
65
65
  context_chunks_in_document("your-document.pdf")
66
66
  ```
67
+ ## Running Memory Profiler
68
+
69
+ To run the memory profiler, use the following command:
70
+
71
+ ```bash
72
+ python -m memray run test_redis.py
73
+ ```
74
+
67
75
 
68
76
  ## Project Structure
69
77
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wizit_context_ingestor"
3
- version = "0.2.3-beta"
3
+ version = "0.2.4"
4
4
  description = "Contextual Rag with Cloud Solutions"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -17,6 +17,8 @@ dependencies = [
17
17
 
18
18
  [dependency-groups]
19
19
  dev = [
20
+ "memray>=1.18.0",
21
+ "pyinstrument>=5.1.1",
20
22
  "python-dotenv>=1.1.1",
21
23
  ]
22
24
 
@@ -56,6 +56,12 @@ class TranscriptionService:
56
56
  # Create the chain
57
57
  chain = prompt | model_with_structured_output
58
58
  # Process the image
59
+ chain = chain.with_retry(
60
+ stop_after_attempt=3,
61
+ exponential_jitter_params={
62
+ "initial": 60
63
+ }
64
+ )
59
65
  result = chain.invoke({})
60
66
  if result.transcription:
61
67
  document.page_text = result.transcription
@@ -12,7 +12,7 @@ class AWSModels(AiApplicationService):
12
12
  A wrapper class for Google Cloud Vertex AI models that handles credentials and
13
13
  provides methods to load embeddings and chat models.
14
14
  """
15
-
15
+ __slots__ = ('llm_model_id')
16
16
  def __init__(
17
17
  self,
18
18
  llm_model_id: str = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
@@ -1,6 +1,6 @@
1
1
  from ...application.interfaces import PersistenceService
2
2
  from ...domain.models import ParsedDoc
3
- import boto3
3
+ from boto3 import client as boto3_client
4
4
  import logging
5
5
  import os
6
6
  from botocore.exceptions import ClientError
@@ -10,9 +10,9 @@ logger = logging.getLogger(__name__)
10
10
 
11
11
  class S3StorageService(PersistenceService):
12
12
  """Persistence service for S3 storage."""
13
-
13
+ __slots__ = ('origin_bucket_name', 'target_bucket_name', 'region_name')
14
14
  def __init__(self, origin_bucket_name: str, target_bucket_name: str, region_name: str = 'us-east-1'):
15
- self.s3 = boto3.client('s3', region_name=region_name)
15
+ self.s3 = boto3_client('s3', region_name=region_name)
16
16
  self.origin_bucket_name = origin_bucket_name
17
17
  self.target_bucket_name = target_bucket_name
18
18
 
@@ -38,7 +38,7 @@ class PgEmbeddingsManager(EmbeddingsManager):
38
38
  ... )
39
39
  >>> documents = [Document(page_content="Sample text", metadata={"source": "example"})]
40
40
  """
41
-
41
+ __slots__ = ("embeddings_model", "pg_connection")
42
42
  def __init__(self, embeddings_model, pg_connection: str):
43
43
  """
44
44
  Initialize the PgEmbeddingsManager.
@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__)
11
11
 
12
12
  class RedisEmbeddingsManager(EmbeddingsManager):
13
13
 
14
+ __slots__ = ("embeddings_model", "redis_conn_string", "metadata_tags")
14
15
  def __init__(self, embeddings_model, redis_conn_string: str, metadata_tags: dict):
15
16
  """
16
17
  Initialize the RedisEmbeddingsManager.
@@ -16,7 +16,7 @@ class SemanticChunks(RagChunker):
16
16
  Class for semantically chunking documents into smaller pieces based on semantic similarity.
17
17
  Uses LangChain's SemanticChunker to create semantically coherent document chunks.
18
18
  """
19
-
19
+ __slots__ = ("embeddings_model",)
20
20
  def __init__(self, embeddings_model: Any):
21
21
  """
22
22
  Initialize a document chunker with an embeddings model.
@@ -1,4 +1,4 @@
1
- import boto3
1
+ from boto3 import client as boto3_client
2
2
  import logging
3
3
 
4
4
  logger = logging.getLogger(__name__)
@@ -6,7 +6,7 @@ logger = logging.getLogger(__name__)
6
6
  class AwsSecretsManager:
7
7
 
8
8
  def __init__(self):
9
- self.client = boto3.client('secretsmanager')
9
+ self.client = boto3_client('secretsmanager')
10
10
 
11
11
 
12
12
  def get_secret(self, secret_name):
@@ -30,4 +30,4 @@ class AwsSecretsManager:
30
30
  return msg
31
31
  except Exception as e:
32
32
  logger.error(f"An unknown error occurred: {str(e)}.")
33
- raise
33
+ raise
@@ -1,4 +1,4 @@
1
- import vertexai
1
+ from vertexai import init as vertexai_init
2
2
  from google.oauth2 import service_account
3
3
  from langchain_google_vertexai import VertexAIEmbeddings, ChatVertexAI
4
4
  from langchain_google_vertexai.model_garden import ChatAnthropicVertex
@@ -15,7 +15,7 @@ class VertexModels(AiApplicationService):
15
15
  A wrapper class for Google Cloud Vertex AI models that handles credentials and
16
16
  provides methods to load embeddings and chat models.
17
17
  """
18
-
18
+ __slots__ = ('project_id', 'location', 'json_service_account', 'scopes', 'llm_model_id')
19
19
  def __init__(
20
20
  self,
21
21
  project_id: str,
@@ -42,7 +42,7 @@ class VertexModels(AiApplicationService):
42
42
  self.llm_model_id = llm_model_id
43
43
  self.project_id = project_id
44
44
  self.location = location
45
- vertexai.init(
45
+ vertexai_init(
46
46
  project=project_id,
47
47
  location=location,
48
48
  credentials=self.credentials
@@ -54,7 +54,7 @@ class VertexModels(AiApplicationService):
54
54
 
55
55
  def load_embeddings_model(
56
56
  self,
57
- embeddings_model_id: str = "text-embedding-005") -> VertexAIEmbeddings: # noqa: E125
57
+ embeddings_model_id: str = "text-multilingual-embedding-002") -> VertexAIEmbeddings: # noqa: E125
58
58
  """
59
59
  Load and return a Vertex AI embeddings model.
60
60
  default embeddings length is 768 https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings
@@ -78,6 +78,7 @@ class DeelabRedisChunksManager:
78
78
  gcp_secret_name: str,
79
79
  redis_connection_string: str,
80
80
  llm_model_id: str = "claude-3-5-haiku@20241022",
81
+ embeddings_model_id: str = "text-multilingual-embedding-002",
81
82
  target_language: str = "es"
82
83
  ):
83
84
  self.gcp_project_id = gcp_project_id
@@ -89,7 +90,7 @@ class DeelabRedisChunksManager:
89
90
  self.gcp_sa_dict = self._get_gcp_sa_dict(gcp_secret_name)
90
91
  self.redis_connection_string = redis_connection_string
91
92
  self.vertex_model = self._get_vertex_model()
92
- self.embeddings_model = self.vertex_model.load_embeddings_model()
93
+ self.embeddings_model = self.vertex_model.load_embeddings_model(embeddings_model_id)
93
94
 
94
95
  def _get_gcp_sa_dict(self, gcp_secret_name: str):
95
96
  vertex_gcp_sa = self.aws_secrets_manager.get_secret(gcp_secret_name)