wizit-context-ingestor 0.3.0b4__py3-none-any.whl → 0.3.0b6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wizit-context-ingestor might be problematic. Click here for more details.
- wizit_context_ingestor/application/transcription_service.py +8 -3
- wizit_context_ingestor/utils/file_utils.py +2 -2
- {wizit_context_ingestor-0.3.0b4.dist-info → wizit_context_ingestor-0.3.0b6.dist-info}/METADATA +1 -1
- {wizit_context_ingestor-0.3.0b4.dist-info → wizit_context_ingestor-0.3.0b6.dist-info}/RECORD +5 -5
- {wizit_context_ingestor-0.3.0b4.dist-info → wizit_context_ingestor-0.3.0b6.dist-info}/WHEEL +0 -0
|
@@ -112,7 +112,7 @@ class TranscriptionService:
|
|
|
112
112
|
# raise
|
|
113
113
|
|
|
114
114
|
async def parse_doc_page_with_workflow(
|
|
115
|
-
self, document: ParsedDocPage
|
|
115
|
+
self, document: ParsedDocPage, retries: int = 0
|
|
116
116
|
) -> ParsedDocPage:
|
|
117
117
|
"""Transcribe an image to text using an agent.
|
|
118
118
|
Args:
|
|
@@ -120,6 +120,9 @@ class TranscriptionService:
|
|
|
120
120
|
Returns:
|
|
121
121
|
Processed text
|
|
122
122
|
"""
|
|
123
|
+
if retries > 1:
|
|
124
|
+
logger.info("Max retries exceeded")
|
|
125
|
+
return document
|
|
123
126
|
result = await self.compiled_transcription_workflow.ainvoke(
|
|
124
127
|
{
|
|
125
128
|
"messages": [
|
|
@@ -150,10 +153,12 @@ class TranscriptionService:
|
|
|
150
153
|
}
|
|
151
154
|
},
|
|
152
155
|
)
|
|
153
|
-
if
|
|
156
|
+
if "transcription" in result:
|
|
154
157
|
document.page_text = result["transcription"]
|
|
155
158
|
else:
|
|
156
|
-
|
|
159
|
+
return await self.parse_doc_page_with_workflow(
|
|
160
|
+
document, retries=retries + 1
|
|
161
|
+
)
|
|
157
162
|
return document
|
|
158
163
|
|
|
159
164
|
# def process_document(self, file_key: str) -> Tuple[List[ParsedDocPage], ParsedDoc]:
|
|
@@ -7,7 +7,7 @@ def has_invalid_file_name_format(file_name):
|
|
|
7
7
|
if " " in file_name:
|
|
8
8
|
return True
|
|
9
9
|
|
|
10
|
-
# Check for special characters (anything that's not alphanumeric, underscore, dash, or
|
|
11
|
-
if re.search(r"[^a-zA-Z0-9_
|
|
10
|
+
# Check for special characters (anything that's not alphanumeric, underscore, dash, dot, slash, or backslash)
|
|
11
|
+
if re.search(r"[^a-zA-Z0-9_.-/\\]", file_name):
|
|
12
12
|
return True
|
|
13
13
|
return False
|
{wizit_context_ingestor-0.3.0b4.dist-info → wizit_context_ingestor-0.3.0b6.dist-info}/RECORD
RENAMED
|
@@ -3,7 +3,7 @@ wizit_context_ingestor/__init__.py,sha256=TSTm5qSpNNCz9ilKYkXRUxupvmWG2AHfv7RBWF
|
|
|
3
3
|
wizit_context_ingestor/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
wizit_context_ingestor/application/context_chunk_service.py,sha256=LYRKBsY30IT2LczkgkYdPx7W3yycRy-0m7t3KKgq6Nw,9046
|
|
5
5
|
wizit_context_ingestor/application/interfaces.py,sha256=W0qonE3t-S-zwAoKtDYc4oyW_GOILKVmrdy8LnC8MVI,3193
|
|
6
|
-
wizit_context_ingestor/application/transcription_service.py,sha256=
|
|
6
|
+
wizit_context_ingestor/application/transcription_service.py,sha256=FlUcMGyAotAO8MmT5UMlPMbgIWVQLg7YO6rJx9ANn7A,8567
|
|
7
7
|
wizit_context_ingestor/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
wizit_context_ingestor/data/kdb.py,sha256=GCkXQmnk2JCXV_VJ-h0k55AOIX8qohzBJN2v-9D1dlU,194
|
|
9
9
|
wizit_context_ingestor/data/prompts.py,sha256=EnocoriDjPcFPd6Af9G6TUTB8NkO4EFN4AUHfpRVqYU,14406
|
|
@@ -29,7 +29,7 @@ wizit_context_ingestor/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
29
29
|
wizit_context_ingestor/services/chunks.py,sha256=tQQsdsOscZWzqVY5WxVxr3ii62FOJ3nMARaJJz6CvjQ,2011
|
|
30
30
|
wizit_context_ingestor/services/parse_doc.py,sha256=3CyZoGbiUfxbs0SXUWXjQevtusSzTBgvUVeNNSdxJLE,4491
|
|
31
31
|
wizit_context_ingestor/services/pg_embeddings_manager.py,sha256=n1HOmu_Z_Z71H-rVAyJS3FdPKbBckm5W8_XethY8nuM,4998
|
|
32
|
-
wizit_context_ingestor/utils/file_utils.py,sha256=
|
|
32
|
+
wizit_context_ingestor/utils/file_utils.py,sha256=Wuua14LivrfL8oBP-j3ZtSqc7uq4rrQzT-T-p7pxEpM,414
|
|
33
33
|
wizit_context_ingestor/workflows/context_nodes.py,sha256=3qlFcxPUmehx04mQHpmouneKq--To8rwSDHCRFyWICo,3168
|
|
34
34
|
wizit_context_ingestor/workflows/context_state.py,sha256=4MTIUjK-F2pWvIldovWZhMAqqCOpViKbvitJzETkSkY,324
|
|
35
35
|
wizit_context_ingestor/workflows/context_tools.py,sha256=E9VTL3AC0MwSIuc1e-juZK7XCxnZfFv0-KpHfR2CNH4,2764
|
|
@@ -39,6 +39,6 @@ wizit_context_ingestor/workflows/transcription_schemas.py,sha256=CQCl7LXD5voxhJO
|
|
|
39
39
|
wizit_context_ingestor/workflows/transcription_state.py,sha256=2Z_t2aZFEH_nAjdEO6RFBEmi_fwvr9cV0aLS1eIxiCQ,590
|
|
40
40
|
wizit_context_ingestor/workflows/transcription_tools.py,sha256=FtIfWFITn8_Rr5SEobCeR55aJGZoHRMgF2UxRT5vJ-E,1373
|
|
41
41
|
wizit_context_ingestor/workflows/transcription_workflow.py,sha256=77cLsYGdv01Py2GaKYpACuifPeSxH7tkVodvLv97sdg,1621
|
|
42
|
-
wizit_context_ingestor-0.3.
|
|
43
|
-
wizit_context_ingestor-0.3.
|
|
44
|
-
wizit_context_ingestor-0.3.
|
|
42
|
+
wizit_context_ingestor-0.3.0b6.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
43
|
+
wizit_context_ingestor-0.3.0b6.dist-info/METADATA,sha256=zp_AzRkME9KJkF_9yyiVLE2dNwxPxROK9bmivV8tOMw,3768
|
|
44
|
+
wizit_context_ingestor-0.3.0b6.dist-info/RECORD,,
|
|
File without changes
|