wizit-context-ingestor 0.3.0b4__py3-none-any.whl → 0.3.0b6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wizit-context-ingestor might be problematic. Click here for more details.

@@ -112,7 +112,7 @@ class TranscriptionService:
112
112
  # raise
113
113
 
114
114
  async def parse_doc_page_with_workflow(
115
- self, document: ParsedDocPage
115
+ self, document: ParsedDocPage, retries: int = 0
116
116
  ) -> ParsedDocPage:
117
117
  """Transcribe an image to text using an agent.
118
118
  Args:
@@ -120,6 +120,9 @@ class TranscriptionService:
120
120
  Returns:
121
121
  Processed text
122
122
  """
123
+ if retries > 1:
124
+ logger.info("Max retries exceeded")
125
+ return document
123
126
  result = await self.compiled_transcription_workflow.ainvoke(
124
127
  {
125
128
  "messages": [
@@ -150,10 +153,12 @@ class TranscriptionService:
150
153
  }
151
154
  },
152
155
  )
153
- if result["transcription"]:
156
+ if "transcription" in result:
154
157
  document.page_text = result["transcription"]
155
158
  else:
156
- raise ValueError(f"No transcription found: {result} ")
159
+ return await self.parse_doc_page_with_workflow(
160
+ document, retries=retries + 1
161
+ )
157
162
  return document
158
163
 
159
164
  # def process_document(self, file_key: str) -> Tuple[List[ParsedDocPage], ParsedDoc]:
@@ -7,7 +7,7 @@ def has_invalid_file_name_format(file_name):
7
7
  if " " in file_name:
8
8
  return True
9
9
 
10
- # Check for special characters (anything that's not alphanumeric, underscore, dash, or dot)
11
- if re.search(r"[^a-zA-Z0-9_.-]", file_name):
10
+ # Check for special characters (anything that's not alphanumeric, underscore, dash, dot, slash, or backslash)
11
+ if re.search(r"[^a-zA-Z0-9_.-/\\]", file_name):
12
12
  return True
13
13
  return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wizit-context-ingestor
3
- Version: 0.3.0b4
3
+ Version: 0.3.0b6
4
4
  Summary: Contextual Rag with Cloud Solutions
5
5
  Requires-Dist: anthropic[vertex]>=0.66.0
6
6
  Requires-Dist: boto3>=1.40.23
@@ -3,7 +3,7 @@ wizit_context_ingestor/__init__.py,sha256=TSTm5qSpNNCz9ilKYkXRUxupvmWG2AHfv7RBWF
3
3
  wizit_context_ingestor/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  wizit_context_ingestor/application/context_chunk_service.py,sha256=LYRKBsY30IT2LczkgkYdPx7W3yycRy-0m7t3KKgq6Nw,9046
5
5
  wizit_context_ingestor/application/interfaces.py,sha256=W0qonE3t-S-zwAoKtDYc4oyW_GOILKVmrdy8LnC8MVI,3193
6
- wizit_context_ingestor/application/transcription_service.py,sha256=jAjQE_sR0E3CSHLf0lq-24scl-_VKWy3crGhiodkoSM,8394
6
+ wizit_context_ingestor/application/transcription_service.py,sha256=FlUcMGyAotAO8MmT5UMlPMbgIWVQLg7YO6rJx9ANn7A,8567
7
7
  wizit_context_ingestor/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  wizit_context_ingestor/data/kdb.py,sha256=GCkXQmnk2JCXV_VJ-h0k55AOIX8qohzBJN2v-9D1dlU,194
9
9
  wizit_context_ingestor/data/prompts.py,sha256=EnocoriDjPcFPd6Af9G6TUTB8NkO4EFN4AUHfpRVqYU,14406
@@ -29,7 +29,7 @@ wizit_context_ingestor/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
29
29
  wizit_context_ingestor/services/chunks.py,sha256=tQQsdsOscZWzqVY5WxVxr3ii62FOJ3nMARaJJz6CvjQ,2011
30
30
  wizit_context_ingestor/services/parse_doc.py,sha256=3CyZoGbiUfxbs0SXUWXjQevtusSzTBgvUVeNNSdxJLE,4491
31
31
  wizit_context_ingestor/services/pg_embeddings_manager.py,sha256=n1HOmu_Z_Z71H-rVAyJS3FdPKbBckm5W8_XethY8nuM,4998
32
- wizit_context_ingestor/utils/file_utils.py,sha256=QnyncN0X5E-LjAYxFPxQiOrAj0DHcAcL2GliLVikF5o,393
32
+ wizit_context_ingestor/utils/file_utils.py,sha256=Wuua14LivrfL8oBP-j3ZtSqc7uq4rrQzT-T-p7pxEpM,414
33
33
  wizit_context_ingestor/workflows/context_nodes.py,sha256=3qlFcxPUmehx04mQHpmouneKq--To8rwSDHCRFyWICo,3168
34
34
  wizit_context_ingestor/workflows/context_state.py,sha256=4MTIUjK-F2pWvIldovWZhMAqqCOpViKbvitJzETkSkY,324
35
35
  wizit_context_ingestor/workflows/context_tools.py,sha256=E9VTL3AC0MwSIuc1e-juZK7XCxnZfFv0-KpHfR2CNH4,2764
@@ -39,6 +39,6 @@ wizit_context_ingestor/workflows/transcription_schemas.py,sha256=CQCl7LXD5voxhJO
39
39
  wizit_context_ingestor/workflows/transcription_state.py,sha256=2Z_t2aZFEH_nAjdEO6RFBEmi_fwvr9cV0aLS1eIxiCQ,590
40
40
  wizit_context_ingestor/workflows/transcription_tools.py,sha256=FtIfWFITn8_Rr5SEobCeR55aJGZoHRMgF2UxRT5vJ-E,1373
41
41
  wizit_context_ingestor/workflows/transcription_workflow.py,sha256=77cLsYGdv01Py2GaKYpACuifPeSxH7tkVodvLv97sdg,1621
42
- wizit_context_ingestor-0.3.0b4.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
43
- wizit_context_ingestor-0.3.0b4.dist-info/METADATA,sha256=iPcDYUP3VQKukxafk_9HeJQqzw8WqLKrZH71cmbuIYw,3768
44
- wizit_context_ingestor-0.3.0b4.dist-info/RECORD,,
42
+ wizit_context_ingestor-0.3.0b6.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
43
+ wizit_context_ingestor-0.3.0b6.dist-info/METADATA,sha256=zp_AzRkME9KJkF_9yyiVLE2dNwxPxROK9bmivV8tOMw,3768
44
+ wizit_context_ingestor-0.3.0b6.dist-info/RECORD,,