wizit-context-ingestor 0.3.0b1__tar.gz → 0.3.0b3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wizit-context-ingestor might be problematic. Click here for more details.
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/PKG-INFO +1 -1
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/pyproject.toml +1 -1
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/application/transcription_service.py +1 -1
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/rag/chroma_embeddings.py +0 -4
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/secrets/aws_secrets_manager.py +3 -4
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/main.py +35 -1
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/README.md +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/.DS_Store +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/application/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/application/context_chunk_service.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/application/interfaces.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/data/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/data/kdb.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/data/prompts.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/data/storage.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/domain/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/domain/models.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/domain/services.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/aws_model.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/persistence/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/persistence/local_storage.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/persistence/s3_storage.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/rag/pg_embeddings.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/rag/redis_embeddings.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/rag/semantic_chunks.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/secrets/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/infra/vertex_model.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/.DS_Store +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/chunks.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/parse_doc.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/services/pg_embeddings_manager.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/utils/file_utils.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/context_nodes.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/context_state.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/context_tools.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/context_workflow.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_nodes.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_schemas.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_state.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_tools.py +0 -0
- {wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/workflows/transcription_workflow.py +0 -0
|
@@ -146,7 +146,7 @@ class TranscriptionService:
|
|
|
146
146
|
if result["transcription"]:
|
|
147
147
|
document.page_text = result["transcription"]
|
|
148
148
|
else:
|
|
149
|
-
raise ValueError("No transcription found")
|
|
149
|
+
raise ValueError(f"No transcription found: {result} ")
|
|
150
150
|
return document
|
|
151
151
|
|
|
152
152
|
def process_document(self, file_key: str) -> Tuple[List[ParsedDocPage], ParsedDoc]:
|
|
@@ -1,10 +1,6 @@
|
|
|
1
|
-
from typing_extensions import Sequence
|
|
2
|
-
from test.test_typing import CoolEmployee
|
|
3
1
|
from langchain_core.documents import Document
|
|
4
2
|
from langchain_chroma import Chroma
|
|
5
|
-
from typing import List
|
|
6
3
|
import logging
|
|
7
|
-
from uuid import uuid4
|
|
8
4
|
from ...application.interfaces import EmbeddingsManager
|
|
9
5
|
|
|
10
6
|
# load_dotenv()
|
|
@@ -3,11 +3,10 @@ import logging
|
|
|
3
3
|
|
|
4
4
|
logger = logging.getLogger(__name__)
|
|
5
5
|
|
|
6
|
-
class AwsSecretsManager:
|
|
7
|
-
|
|
8
|
-
def __init__(self):
|
|
9
|
-
self.client = boto3_client('secretsmanager')
|
|
10
6
|
|
|
7
|
+
class AwsSecretsManager:
|
|
8
|
+
def __init__(self, aws_region="us-east-1"):
|
|
9
|
+
self.client = boto3_client("secretsmanager", region_name=aws_region)
|
|
11
10
|
|
|
12
11
|
def get_secret(self, secret_name):
|
|
13
12
|
"""
|
{wizit_context_ingestor-0.3.0b1 → wizit_context_ingestor-0.3.0b3}/src/wizit_context_ingestor/main.py
RENAMED
|
@@ -12,6 +12,7 @@ from .infra.secrets.aws_secrets_manager import AwsSecretsManager
|
|
|
12
12
|
from .data.storage import storage_services, StorageServices
|
|
13
13
|
from .data.kdb import kdb_services, KdbServices
|
|
14
14
|
from .utils.file_utils import has_invalid_file_name_format
|
|
15
|
+
from langsmith import Client, tracing_context
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class KdbManager:
|
|
@@ -69,6 +70,8 @@ class TranscriptionManager:
|
|
|
69
70
|
gcp_project_id: str,
|
|
70
71
|
gcp_project_location: str,
|
|
71
72
|
gcp_secret_name: str,
|
|
73
|
+
langsmith_api_key: str,
|
|
74
|
+
langsmith_project_name: str,
|
|
72
75
|
storage_service: storage_services,
|
|
73
76
|
source_storage_route: str,
|
|
74
77
|
target_storage_route: str,
|
|
@@ -94,6 +97,9 @@ class TranscriptionManager:
|
|
|
94
97
|
self.max_transcription_retries = max_transcription_retries
|
|
95
98
|
self.gcp_sa_dict = self._get_gcp_sa_dict(gcp_secret_name)
|
|
96
99
|
self.vertex_model = self._get_vertex_model()
|
|
100
|
+
self.langsmith_api_key = langsmith_api_key
|
|
101
|
+
self.langsmith_project_name = langsmith_project_name
|
|
102
|
+
self.langsmith_client = Client(api_key=self.langsmith_api_key)
|
|
97
103
|
|
|
98
104
|
def _get_gcp_sa_dict(self, gcp_secret_name: str):
|
|
99
105
|
vertex_gcp_sa = self.aws_secrets_manager.get_secret(gcp_secret_name)
|
|
@@ -109,6 +115,18 @@ class TranscriptionManager:
|
|
|
109
115
|
)
|
|
110
116
|
return vertex_model
|
|
111
117
|
|
|
118
|
+
def tracing(func):
|
|
119
|
+
def gen_tracing_context(self, *args, **kwargs):
|
|
120
|
+
with tracing_context(
|
|
121
|
+
enabled=True,
|
|
122
|
+
project_name=self.langsmith_project_name,
|
|
123
|
+
client=self.langsmith_client,
|
|
124
|
+
):
|
|
125
|
+
return func(self, *args, **kwargs)
|
|
126
|
+
|
|
127
|
+
return gen_tracing_context
|
|
128
|
+
|
|
129
|
+
@tracing
|
|
112
130
|
def transcribe_document(self, file_key: str):
|
|
113
131
|
"""Transcribe a document from source storage to target storage.
|
|
114
132
|
This method serves as a generic interface for transcribing documents from
|
|
@@ -171,6 +189,8 @@ class ChunksManager:
|
|
|
171
189
|
gcp_project_id: str,
|
|
172
190
|
gcp_project_location: str,
|
|
173
191
|
gcp_secret_name: str,
|
|
192
|
+
langsmith_api_key: str,
|
|
193
|
+
langsmith_project_name: str,
|
|
174
194
|
storage_service: storage_services,
|
|
175
195
|
kdb_service: Literal["redis", "chroma"],
|
|
176
196
|
kdb_params: Dict[Any, Any],
|
|
@@ -188,11 +208,13 @@ class ChunksManager:
|
|
|
188
208
|
self.storage_service = storage_service
|
|
189
209
|
self.kdb_params = kdb_params
|
|
190
210
|
self.kdb_service = kdb_service
|
|
191
|
-
# self.redis_connection_string = redis_connection_string
|
|
192
211
|
self.vertex_model = self._get_vertex_model()
|
|
193
212
|
self.embeddings_model = self.vertex_model.load_embeddings_model(
|
|
194
213
|
embeddings_model_id
|
|
195
214
|
)
|
|
215
|
+
self.langsmith_api_key = langsmith_api_key
|
|
216
|
+
self.langsmith_project_name = langsmith_project_name
|
|
217
|
+
self.langsmith_client = Client(api_key=self.langsmith_api_key)
|
|
196
218
|
|
|
197
219
|
def _get_gcp_sa_dict(self, gcp_secret_name: str):
|
|
198
220
|
vertex_gcp_sa = self.aws_secrets_manager.get_secret(gcp_secret_name)
|
|
@@ -208,6 +230,18 @@ class ChunksManager:
|
|
|
208
230
|
)
|
|
209
231
|
return vertex_model
|
|
210
232
|
|
|
233
|
+
def tracing(func):
|
|
234
|
+
def gen_tacing_context(self, *args, **kwargs):
|
|
235
|
+
with tracing_context(
|
|
236
|
+
enabled=True,
|
|
237
|
+
project_name=self.langsmith_project_name,
|
|
238
|
+
client=self.langsmith_client,
|
|
239
|
+
):
|
|
240
|
+
return func(self, *args, **kwargs)
|
|
241
|
+
|
|
242
|
+
return gen_tacing_context
|
|
243
|
+
|
|
244
|
+
@tracing
|
|
211
245
|
def gen_context_chunks(
|
|
212
246
|
self, file_key: str, source_storage_route: str, target_storage_route: str
|
|
213
247
|
):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|