wizit-context-ingestor 0.3.0b2__tar.gz → 0.3.0b4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wizit-context-ingestor might be problematic. Click here for more details.
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/PKG-INFO +1 -1
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/pyproject.toml +1 -1
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/application/context_chunk_service.py +12 -10
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/application/transcription_service.py +40 -14
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/domain/services.py +6 -11
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/secrets/aws_secrets_manager.py +3 -4
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/main.py +13 -12
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/README.md +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/.DS_Store +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/application/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/application/interfaces.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/data/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/data/kdb.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/data/prompts.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/data/storage.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/domain/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/domain/models.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/aws_model.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/persistence/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/persistence/local_storage.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/persistence/s3_storage.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/rag/chroma_embeddings.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/rag/pg_embeddings.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/rag/redis_embeddings.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/rag/semantic_chunks.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/secrets/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/infra/vertex_model.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/services/.DS_Store +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/services/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/services/chunks.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/services/parse_doc.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/services/pg_embeddings_manager.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/utils/file_utils.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/workflows/context_nodes.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/workflows/context_state.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/workflows/context_tools.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/workflows/context_workflow.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/workflows/transcription_nodes.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/workflows/transcription_schemas.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/workflows/transcription_state.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/workflows/transcription_tools.py +0 -0
- {wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/workflows/transcription_workflow.py +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from langchain_core.output_parsers.pydantic import PydanticOutputParser
|
|
2
3
|
from langchain_core.prompts import ChatPromptTemplate
|
|
3
4
|
from langchain_core.documents import Document
|
|
@@ -44,7 +45,7 @@ class ContextChunksInDocumentService:
|
|
|
44
45
|
self.context_additional_instructions = ""
|
|
45
46
|
self.metadata_source = "source"
|
|
46
47
|
|
|
47
|
-
def _retrieve_context_chunk_in_document_with_workflow(
|
|
48
|
+
async def _retrieve_context_chunk_in_document_with_workflow(
|
|
48
49
|
self,
|
|
49
50
|
workflow,
|
|
50
51
|
markdown_content: str,
|
|
@@ -53,7 +54,7 @@ class ContextChunksInDocumentService:
|
|
|
53
54
|
) -> Document:
|
|
54
55
|
"""Retrieve context chunks in document."""
|
|
55
56
|
try:
|
|
56
|
-
result = workflow.
|
|
57
|
+
result = await workflow.ainvoke(
|
|
57
58
|
{
|
|
58
59
|
"messages": [
|
|
59
60
|
HumanMessage(
|
|
@@ -74,9 +75,7 @@ class ContextChunksInDocumentService:
|
|
|
74
75
|
}
|
|
75
76
|
},
|
|
76
77
|
)
|
|
77
|
-
|
|
78
|
-
# f"Context:{result['context']}, Content:{chunk.page_content}"
|
|
79
|
-
# )
|
|
78
|
+
chunk.page_content = f"<context>\n{result['context']}\n</context>\n <content>\n{chunk.page_content}\n</content>"
|
|
80
79
|
chunk.metadata["context"] = result["context"]
|
|
81
80
|
if chunk_metadata:
|
|
82
81
|
for key, value in chunk_metadata.items():
|
|
@@ -154,7 +153,7 @@ class ContextChunksInDocumentService:
|
|
|
154
153
|
# logger.error(f"Failed to retrieve context chunks in document: {str(e)}")
|
|
155
154
|
# raise
|
|
156
155
|
|
|
157
|
-
def retrieve_context_chunks_in_document_with_workflow(
|
|
156
|
+
async def retrieve_context_chunks_in_document_with_workflow(
|
|
158
157
|
self,
|
|
159
158
|
markdown_content: str,
|
|
160
159
|
chunks: List[Document],
|
|
@@ -167,7 +166,7 @@ class ContextChunksInDocumentService:
|
|
|
167
166
|
)
|
|
168
167
|
compiled_context_workflow = context_workflow.gen_workflow()
|
|
169
168
|
compiled_context_workflow = compiled_context_workflow.compile()
|
|
170
|
-
|
|
169
|
+
context_chunks_workflow_invocations = list(
|
|
171
170
|
map(
|
|
172
171
|
lambda chunk: self._retrieve_context_chunk_in_document_with_workflow(
|
|
173
172
|
compiled_context_workflow,
|
|
@@ -178,12 +177,13 @@ class ContextChunksInDocumentService:
|
|
|
178
177
|
chunks,
|
|
179
178
|
)
|
|
180
179
|
)
|
|
180
|
+
context_chunks = await asyncio.gather(*context_chunks_workflow_invocations)
|
|
181
181
|
return context_chunks
|
|
182
182
|
except Exception as e:
|
|
183
183
|
logger.error(f"Failed to retrieve context chunks in document: {str(e)}")
|
|
184
184
|
raise
|
|
185
185
|
|
|
186
|
-
def get_context_chunks_in_document(self, file_key: str, file_tags: dict = {}):
|
|
186
|
+
async def get_context_chunks_in_document(self, file_key: str, file_tags: dict = {}):
|
|
187
187
|
"""
|
|
188
188
|
Get the context chunks in a document.
|
|
189
189
|
"""
|
|
@@ -199,8 +199,10 @@ class ContextChunksInDocumentService:
|
|
|
199
199
|
logger.info(f"Document loaded:{file_key}")
|
|
200
200
|
chunks = self.rag_chunker.gen_chunks_for_document(langchain_rag_document)
|
|
201
201
|
logger.info(f"Chunks generated:{len(chunks)}")
|
|
202
|
-
context_chunks =
|
|
203
|
-
|
|
202
|
+
context_chunks = (
|
|
203
|
+
await self.retrieve_context_chunks_in_document_with_workflow(
|
|
204
|
+
markdown_content, chunks, file_tags
|
|
205
|
+
)
|
|
204
206
|
)
|
|
205
207
|
logger.info(f"Context chunks generated:{len(context_chunks)}")
|
|
206
208
|
# upsert validation
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from typing import Tuple, List, Dict, Optional
|
|
2
3
|
from langchain_core.prompts import ChatPromptTemplate
|
|
3
4
|
from langchain_core.output_parsers.pydantic import PydanticOutputParser
|
|
@@ -23,15 +24,15 @@ class TranscriptionService:
|
|
|
23
24
|
persistence_service: PersistenceService,
|
|
24
25
|
target_language: str = "es",
|
|
25
26
|
transcription_additional_instructions: str = "",
|
|
26
|
-
transcription_accuracy_threshold:
|
|
27
|
+
transcription_accuracy_threshold: float = 0.90,
|
|
27
28
|
max_transcription_retries: int = 2,
|
|
28
29
|
):
|
|
29
30
|
self.ai_application_service = ai_application_service
|
|
30
31
|
self.persistence_service = persistence_service
|
|
31
32
|
self.target_language = target_language
|
|
32
33
|
if (
|
|
33
|
-
transcription_accuracy_threshold < 0
|
|
34
|
-
or transcription_accuracy_threshold > 95
|
|
34
|
+
transcription_accuracy_threshold < 0.0
|
|
35
|
+
or transcription_accuracy_threshold > 0.95
|
|
35
36
|
):
|
|
36
37
|
raise ValueError(
|
|
37
38
|
"transcription_accuracy_threshold must be between 0 and 95"
|
|
@@ -46,6 +47,15 @@ class TranscriptionService:
|
|
|
46
47
|
transcription_additional_instructions
|
|
47
48
|
)
|
|
48
49
|
self.chat_model = self.ai_application_service.load_chat_model()
|
|
50
|
+
self.transcription_workflow = TranscriptionWorkflow(
|
|
51
|
+
self.chat_model, self.transcription_additional_instructions
|
|
52
|
+
)
|
|
53
|
+
self.compiled_transcription_workflow = (
|
|
54
|
+
self.transcription_workflow.gen_workflow()
|
|
55
|
+
)
|
|
56
|
+
self.compiled_transcription_workflow = (
|
|
57
|
+
self.compiled_transcription_workflow.compile()
|
|
58
|
+
)
|
|
49
59
|
|
|
50
60
|
# def parse_doc_page(self, document: ParsedDocPage) -> ParsedDocPage:
|
|
51
61
|
# """Transcribe an image to text.
|
|
@@ -101,19 +111,16 @@ class TranscriptionService:
|
|
|
101
111
|
# logger.error(f"Failed to parse document page: {str(e)}")
|
|
102
112
|
# raise
|
|
103
113
|
|
|
104
|
-
def parse_doc_page_with_workflow(
|
|
114
|
+
async def parse_doc_page_with_workflow(
|
|
115
|
+
self, document: ParsedDocPage
|
|
116
|
+
) -> ParsedDocPage:
|
|
105
117
|
"""Transcribe an image to text using an agent.
|
|
106
118
|
Args:
|
|
107
119
|
document: The document with the image to transcribe
|
|
108
120
|
Returns:
|
|
109
121
|
Processed text
|
|
110
122
|
"""
|
|
111
|
-
|
|
112
|
-
self.chat_model, self.transcription_additional_instructions
|
|
113
|
-
)
|
|
114
|
-
compiled_transcription_workflow = transcription_workflow.gen_workflow()
|
|
115
|
-
compiled_transcription_workflow = compiled_transcription_workflow.compile()
|
|
116
|
-
result = compiled_transcription_workflow.invoke(
|
|
123
|
+
result = await self.compiled_transcription_workflow.ainvoke(
|
|
117
124
|
{
|
|
118
125
|
"messages": [
|
|
119
126
|
HumanMessage(
|
|
@@ -146,20 +153,39 @@ class TranscriptionService:
|
|
|
146
153
|
if result["transcription"]:
|
|
147
154
|
document.page_text = result["transcription"]
|
|
148
155
|
else:
|
|
149
|
-
raise ValueError("No transcription found")
|
|
156
|
+
raise ValueError(f"No transcription found: {result} ")
|
|
150
157
|
return document
|
|
151
158
|
|
|
152
|
-
def process_document(self, file_key: str) -> Tuple[List[ParsedDocPage], ParsedDoc]:
|
|
159
|
+
# def process_document(self, file_key: str) -> Tuple[List[ParsedDocPage], ParsedDoc]:
|
|
160
|
+
# """
|
|
161
|
+
# Process a document by parsing it and returning the parsed content.
|
|
162
|
+
# """
|
|
163
|
+
# raw_file_path = self.persistence_service.retrieve_raw_file(file_key)
|
|
164
|
+
# parse_doc_model_service = ParseDocModelService(raw_file_path)
|
|
165
|
+
# document_pages = parse_doc_model_service.parse_document_to_base64()
|
|
166
|
+
# parsed_pages = []
|
|
167
|
+
# for page in document_pages:
|
|
168
|
+
# page = self.parse_doc_page_with_workflow(page)
|
|
169
|
+
# parsed_pages.append(page)
|
|
170
|
+
# logger.info(f"Parsed {len(parsed_pages)} pages")
|
|
171
|
+
# parsed_document = parse_doc_model_service.create_md_content(parsed_pages)
|
|
172
|
+
# return parsed_pages, parsed_document
|
|
173
|
+
|
|
174
|
+
async def process_document(
|
|
175
|
+
self, file_key: str
|
|
176
|
+
) -> Tuple[List[ParsedDocPage], ParsedDoc]:
|
|
153
177
|
"""
|
|
154
178
|
Process a document by parsing it and returning the parsed content.
|
|
155
179
|
"""
|
|
156
180
|
raw_file_path = self.persistence_service.retrieve_raw_file(file_key)
|
|
157
181
|
parse_doc_model_service = ParseDocModelService(raw_file_path)
|
|
158
182
|
document_pages = parse_doc_model_service.parse_document_to_base64()
|
|
183
|
+
parse_pages_workflow_tasks = []
|
|
159
184
|
parsed_pages = []
|
|
160
185
|
for page in document_pages:
|
|
161
|
-
|
|
162
|
-
|
|
186
|
+
parse_pages_workflow_tasks.append(self.parse_doc_page_with_workflow(page))
|
|
187
|
+
# here
|
|
188
|
+
parsed_pages = await asyncio.gather(*parse_pages_workflow_tasks)
|
|
163
189
|
logger.info(f"Parsed {len(parsed_pages)} pages")
|
|
164
190
|
parsed_document = parse_doc_model_service.create_md_content(parsed_pages)
|
|
165
191
|
return parsed_pages, parsed_document
|
|
@@ -8,8 +8,9 @@ from ..domain.models import ParsedDocPage, ParsedDoc
|
|
|
8
8
|
|
|
9
9
|
logger = logging.getLogger(__name__)
|
|
10
10
|
|
|
11
|
+
|
|
11
12
|
# CHECK THIS THING IMPROVE THE WAY CODE IS STRUCTURED
|
|
12
|
-
class ParseDocModelService
|
|
13
|
+
class ParseDocModelService:
|
|
13
14
|
"""
|
|
14
15
|
Class for parsing PDF documents, converting pages to base64 images
|
|
15
16
|
"""
|
|
@@ -25,7 +26,6 @@ class ParseDocModelService():
|
|
|
25
26
|
self.pdf_document = pymupdf.open(file_path)
|
|
26
27
|
self.page_count = self.pdf_document.page_count
|
|
27
28
|
|
|
28
|
-
|
|
29
29
|
def pdf_page_to_base64(self, page_number: int) -> ParsedDocPage:
|
|
30
30
|
"""
|
|
31
31
|
Convert a PDF page to a base64-encoded PNG image.
|
|
@@ -48,10 +48,7 @@ class ParseDocModelService():
|
|
|
48
48
|
img.save(buffer, format="PNG")
|
|
49
49
|
b64_encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
|
50
50
|
logger.info(f"Page {page_number} encoded successfully")
|
|
51
|
-
return ParsedDocPage(
|
|
52
|
-
page_number=page_number,
|
|
53
|
-
page_base64=b64_encoded_image
|
|
54
|
-
)
|
|
51
|
+
return ParsedDocPage(page_number=page_number, page_base64=b64_encoded_image)
|
|
55
52
|
except Exception as e:
|
|
56
53
|
logger.error(f"Failed to parse b64 image: {str(e)}")
|
|
57
54
|
raise
|
|
@@ -87,12 +84,10 @@ class ParseDocModelService():
|
|
|
87
84
|
Create a markdown content from a list of parsed pages.
|
|
88
85
|
"""
|
|
89
86
|
md_content = ""
|
|
90
|
-
|
|
87
|
+
sorted_pages = sorted(parsed_pages, key=lambda page: page.page_number)
|
|
88
|
+
for page in sorted_pages:
|
|
91
89
|
md_content += f"## Page {page.page_number}\n\n"
|
|
92
90
|
md_content += f"{page.page_text}\n\n"
|
|
93
|
-
return ParsedDoc(
|
|
94
|
-
pages=parsed_pages,
|
|
95
|
-
document_text=md_content
|
|
96
|
-
)
|
|
91
|
+
return ParsedDoc(pages=parsed_pages, document_text=md_content)
|
|
97
92
|
|
|
98
93
|
# def
|
|
@@ -3,11 +3,10 @@ import logging
|
|
|
3
3
|
|
|
4
4
|
logger = logging.getLogger(__name__)
|
|
5
5
|
|
|
6
|
-
class AwsSecretsManager:
|
|
7
|
-
|
|
8
|
-
def __init__(self):
|
|
9
|
-
self.client = boto3_client('secretsmanager')
|
|
10
6
|
|
|
7
|
+
class AwsSecretsManager:
|
|
8
|
+
def __init__(self, aws_region="us-east-1"):
|
|
9
|
+
self.client = boto3_client("secretsmanager", region_name=aws_region)
|
|
11
10
|
|
|
12
11
|
def get_secret(self, secret_name):
|
|
13
12
|
"""
|
{wizit_context_ingestor-0.3.0b2 → wizit_context_ingestor-0.3.0b4}/src/wizit_context_ingestor/main.py
RENAMED
|
@@ -78,7 +78,7 @@ class TranscriptionManager:
|
|
|
78
78
|
llm_model_id: str = "claude-sonnet-4@20250514",
|
|
79
79
|
target_language: str = "es",
|
|
80
80
|
transcription_additional_instructions: str = "",
|
|
81
|
-
transcription_accuracy_threshold:
|
|
81
|
+
transcription_accuracy_threshold: float = 0.90,
|
|
82
82
|
max_transcription_retries: int = 2,
|
|
83
83
|
):
|
|
84
84
|
self.gcp_project_id = gcp_project_id
|
|
@@ -116,18 +116,18 @@ class TranscriptionManager:
|
|
|
116
116
|
return vertex_model
|
|
117
117
|
|
|
118
118
|
def tracing(func):
|
|
119
|
-
def gen_tracing_context(self, *args, **kwargs):
|
|
119
|
+
async def gen_tracing_context(self, *args, **kwargs):
|
|
120
120
|
with tracing_context(
|
|
121
121
|
enabled=True,
|
|
122
122
|
project_name=self.langsmith_project_name,
|
|
123
123
|
client=self.langsmith_client,
|
|
124
124
|
):
|
|
125
|
-
return func(self, *args, **kwargs)
|
|
125
|
+
return await func(self, *args, **kwargs)
|
|
126
126
|
|
|
127
127
|
return gen_tracing_context
|
|
128
128
|
|
|
129
129
|
@tracing
|
|
130
|
-
def transcribe_document(self, file_key: str):
|
|
130
|
+
async def transcribe_document(self, file_key: str):
|
|
131
131
|
"""Transcribe a document from source storage to target storage.
|
|
132
132
|
This method serves as a generic interface for transcribing documents from
|
|
133
133
|
various storage sources to target destinations. The specific implementation
|
|
@@ -162,9 +162,10 @@ class TranscriptionManager:
|
|
|
162
162
|
transcription_accuracy_threshold=self.transcription_accuracy_threshold,
|
|
163
163
|
max_transcription_retries=self.max_transcription_retries,
|
|
164
164
|
)
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
165
|
+
(
|
|
166
|
+
parsed_pages,
|
|
167
|
+
parsed_document,
|
|
168
|
+
) = await transcribe_document_service.process_document(file_key)
|
|
168
169
|
source_storage_file_tags = {}
|
|
169
170
|
if persistence_service.supports_tagging:
|
|
170
171
|
# source_storage_file_tags.tag_file(file_key, {"status": "transcribed"})
|
|
@@ -231,18 +232,18 @@ class ChunksManager:
|
|
|
231
232
|
return vertex_model
|
|
232
233
|
|
|
233
234
|
def tracing(func):
|
|
234
|
-
def
|
|
235
|
+
async def gen_tracing_context(self, *args, **kwargs):
|
|
235
236
|
with tracing_context(
|
|
236
237
|
enabled=True,
|
|
237
238
|
project_name=self.langsmith_project_name,
|
|
238
239
|
client=self.langsmith_client,
|
|
239
240
|
):
|
|
240
|
-
return func(self, *args, **kwargs)
|
|
241
|
+
return await func(self, *args, **kwargs)
|
|
241
242
|
|
|
242
|
-
return
|
|
243
|
+
return gen_tracing_context
|
|
243
244
|
|
|
244
245
|
@tracing
|
|
245
|
-
def gen_context_chunks(
|
|
246
|
+
async def gen_context_chunks(
|
|
246
247
|
self, file_key: str, source_storage_route: str, target_storage_route: str
|
|
247
248
|
):
|
|
248
249
|
try:
|
|
@@ -272,7 +273,7 @@ class ChunksManager:
|
|
|
272
273
|
target_language=self.target_language,
|
|
273
274
|
)
|
|
274
275
|
context_chunks = (
|
|
275
|
-
context_chunks_in_document_service.get_context_chunks_in_document(
|
|
276
|
+
await context_chunks_in_document_service.get_context_chunks_in_document(
|
|
276
277
|
file_key, target_bucket_file_tags
|
|
277
278
|
)
|
|
278
279
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|