wizit-context-ingestor 0.3.0b1__tar.gz → 0.3.0b3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wizit-context-ingestor might be problematic. Click here for more details.

Files changed (44) hide show
  1. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/PKG-INFO +1 -1
  2. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/pyproject.toml +1 -1
  3. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/application/transcription_service.py +1 -1
  4. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/rag/chroma_embeddings.py +0 -4
  5. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/secrets/aws_secrets_manager.py +3 -4
  6. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/main.py +35 -1
  7. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/README.md +0 -0
  8. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/.DS_Store +0 -0
  9. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/__init__.py +0 -0
  10. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/application/__init__.py +0 -0
  11. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/application/context_chunk_service.py +0 -0
  12. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/application/interfaces.py +0 -0
  13. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/data/__init__.py +0 -0
  14. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/data/kdb.py +0 -0
  15. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/data/prompts.py +0 -0
  16. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/data/storage.py +0 -0
  17. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/domain/__init__.py +0 -0
  18. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/domain/models.py +0 -0
  19. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/domain/services.py +0 -0
  20. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/__init__.py +0 -0
  21. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/aws_model.py +0 -0
  22. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/persistence/__init__.py +0 -0
  23. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/persistence/local_storage.py +0 -0
  24. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/persistence/s3_storage.py +0 -0
  25. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/rag/pg_embeddings.py +0 -0
  26. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/rag/redis_embeddings.py +0 -0
  27. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/rag/semantic_chunks.py +0 -0
  28. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/secrets/__init__.py +0 -0
  29. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/vertex_model.py +0 -0
  30. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/.DS_Store +0 -0
  31. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/__init__.py +0 -0
  32. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/chunks.py +0 -0
  33. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/parse_doc.py +0 -0
  34. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/pg_embeddings_manager.py +0 -0
  35. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/utils/file_utils.py +0 -0
  36. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/context_nodes.py +0 -0
  37. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/context_state.py +0 -0
  38. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/context_tools.py +0 -0
  39. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/context_workflow.py +0 -0
  40. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_nodes.py +0 -0
  41. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_schemas.py +0 -0
  42. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_state.py +0 -0
  43. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_tools.py +0 -0
  44. {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_workflow.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wizit-context-ingestor
3
- Version: 0.3.0b1
3
+ Version: 0.3.0b3
4
4
  Summary: Contextual Rag with Cloud Solutions
5
5
  Requires-Dist: anthropic[vertex]>=0.66.0
6
6
  Requires-Dist: boto3>=1.40.23
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wizit_context_ingestor"
3
- version = "0.3.0-beta-1"
3
+ version = "0.3.0-beta-3"
4
4
  description = "Contextual Rag with Cloud Solutions"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -146,7 +146,7 @@ class TranscriptionService:
146
146
  if result["transcription"]:
147
147
  document.page_text = result["transcription"]
148
148
  else:
149
- raise ValueError("No transcription found")
149
+ raise ValueError(f"No transcription found: {result} ")
150
150
  return document
151
151
 
152
152
  def process_document(self, file_key: str) -> Tuple[List[ParsedDocPage], ParsedDoc]:
@@ -1,10 +1,6 @@
1
- from typing_extensions import Sequence
2
- from test.test_typing import CoolEmployee
3
1
  from langchain_core.documents import Document
4
2
  from langchain_chroma import Chroma
5
- from typing import List
6
3
  import logging
7
- from uuid import uuid4
8
4
  from ...application.interfaces import EmbeddingsManager
9
5
 
10
6
  # load_dotenv()
@@ -3,11 +3,10 @@ import logging
3
3
 
4
4
  logger = logging.getLogger(__name__)
5
5
 
6
- class AwsSecretsManager:
7
-
8
- def __init__(self):
9
- self.client = boto3_client('secretsmanager')
10
6
 
7
+ class AwsSecretsManager:
8
+ def __init__(self, aws_region="us-east-1"):
9
+ self.client = boto3_client("secretsmanager", region_name=aws_region)
11
10
 
12
11
  def get_secret(self, secret_name):
13
12
  """
@@ -12,6 +12,7 @@ from .infra.secrets.aws_secrets_manager import AwsSecretsManager
12
12
  from .data.storage import storage_services, StorageServices
13
13
  from .data.kdb import kdb_services, KdbServices
14
14
  from .utils.file_utils import has_invalid_file_name_format
15
+ from langsmith import Client, tracing_context
15
16
 
16
17
 
17
18
  class KdbManager:
@@ -69,6 +70,8 @@ class TranscriptionManager:
69
70
  gcp_project_id: str,
70
71
  gcp_project_location: str,
71
72
  gcp_secret_name: str,
73
+ langsmith_api_key: str,
74
+ langsmith_project_name: str,
72
75
  storage_service: storage_services,
73
76
  source_storage_route: str,
74
77
  target_storage_route: str,
@@ -94,6 +97,9 @@ class TranscriptionManager:
94
97
  self.max_transcription_retries = max_transcription_retries
95
98
  self.gcp_sa_dict = self._get_gcp_sa_dict(gcp_secret_name)
96
99
  self.vertex_model = self._get_vertex_model()
100
+ self.langsmith_api_key = langsmith_api_key
101
+ self.langsmith_project_name = langsmith_project_name
102
+ self.langsmith_client = Client(api_key=self.langsmith_api_key)
97
103
 
98
104
  def _get_gcp_sa_dict(self, gcp_secret_name: str):
99
105
  vertex_gcp_sa = self.aws_secrets_manager.get_secret(gcp_secret_name)
@@ -109,6 +115,18 @@ class TranscriptionManager:
109
115
  )
110
116
  return vertex_model
111
117
 
118
+ def tracing(func):
119
+ def gen_tracing_context(self, *args, **kwargs):
120
+ with tracing_context(
121
+ enabled=True,
122
+ project_name=self.langsmith_project_name,
123
+ client=self.langsmith_client,
124
+ ):
125
+ return func(self, *args, **kwargs)
126
+
127
+ return gen_tracing_context
128
+
129
+ @tracing
112
130
  def transcribe_document(self, file_key: str):
113
131
  """Transcribe a document from source storage to target storage.
114
132
  This method serves as a generic interface for transcribing documents from
@@ -171,6 +189,8 @@ class ChunksManager:
171
189
  gcp_project_id: str,
172
190
  gcp_project_location: str,
173
191
  gcp_secret_name: str,
192
+ langsmith_api_key: str,
193
+ langsmith_project_name: str,
174
194
  storage_service: storage_services,
175
195
  kdb_service: Literal["redis", "chroma"],
176
196
  kdb_params: Dict[Any, Any],
@@ -188,11 +208,13 @@ class ChunksManager:
188
208
  self.storage_service = storage_service
189
209
  self.kdb_params = kdb_params
190
210
  self.kdb_service = kdb_service
191
- # self.redis_connection_string = redis_connection_string
192
211
  self.vertex_model = self._get_vertex_model()
193
212
  self.embeddings_model = self.vertex_model.load_embeddings_model(
194
213
  embeddings_model_id
195
214
  )
215
+ self.langsmith_api_key = langsmith_api_key
216
+ self.langsmith_project_name = langsmith_project_name
217
+ self.langsmith_client = Client(api_key=self.langsmith_api_key)
196
218
 
197
219
  def _get_gcp_sa_dict(self, gcp_secret_name: str):
198
220
  vertex_gcp_sa = self.aws_secrets_manager.get_secret(gcp_secret_name)
@@ -208,6 +230,18 @@ class ChunksManager:
208
230
  )
209
231
  return vertex_model
210
232
 
233
+ def tracing(func):
234
+ def gen_tacing_context(self, *args, **kwargs):
235
+ with tracing_context(
236
+ enabled=True,
237
+ project_name=self.langsmith_project_name,
238
+ client=self.langsmith_client,
239
+ ):
240
+ return func(self, *args, **kwargs)
241
+
242
+ return gen_tacing_context
243
+
244
+ @tracing
211
245
  def gen_context_chunks(
212
246
  self, file_key: str, source_storage_route: str, target_storage_route: str
213
247
  ):