wizit-context-ingestor 0.3.0b5__tar.gz → 0.3.0b6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wizit-context-ingestor might be problematic. Click here for more details.
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/PKG-INFO +1 -1
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/pyproject.toml +1 -1
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/application/transcription_service.py +3 -1
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/utils/file_utils.py +2 -2
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/README.md +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/.DS_Store +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/application/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/application/context_chunk_service.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/application/interfaces.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/data/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/data/kdb.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/data/prompts.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/data/storage.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/domain/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/domain/models.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/domain/services.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/aws_model.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/persistence/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/persistence/local_storage.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/persistence/s3_storage.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/rag/chroma_embeddings.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/rag/pg_embeddings.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/rag/redis_embeddings.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/rag/semantic_chunks.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/secrets/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/secrets/aws_secrets_manager.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/infra/vertex_model.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/main.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/services/.DS_Store +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/services/__init__.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/services/chunks.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/services/parse_doc.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/services/pg_embeddings_manager.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/workflows/context_nodes.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/workflows/context_state.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/workflows/context_tools.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/workflows/context_workflow.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/workflows/transcription_nodes.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/workflows/transcription_schemas.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/workflows/transcription_state.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/workflows/transcription_tools.py +0 -0
- {wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/workflows/transcription_workflow.py +0 -0
|
@@ -156,7 +156,9 @@ class TranscriptionService:
|
|
|
156
156
|
if "transcription" in result:
|
|
157
157
|
document.page_text = result["transcription"]
|
|
158
158
|
else:
|
|
159
|
-
await self.parse_doc_page_with_workflow(
|
|
159
|
+
return await self.parse_doc_page_with_workflow(
|
|
160
|
+
document, retries=retries + 1
|
|
161
|
+
)
|
|
160
162
|
return document
|
|
161
163
|
|
|
162
164
|
# def process_document(self, file_key: str) -> Tuple[List[ParsedDocPage], ParsedDoc]:
|
|
@@ -7,7 +7,7 @@ def has_invalid_file_name_format(file_name):
|
|
|
7
7
|
if " " in file_name:
|
|
8
8
|
return True
|
|
9
9
|
|
|
10
|
-
# Check for special characters (anything that's not alphanumeric, underscore, dash, or
|
|
11
|
-
if re.search(r"[^a-zA-Z0-9_
|
|
10
|
+
# Check for special characters (anything that's not alphanumeric, underscore, dash, dot, slash, or backslash)
|
|
11
|
+
if re.search(r"[^a-zA-Z0-9_.-/\\]", file_name):
|
|
12
12
|
return True
|
|
13
13
|
return False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{wizit_context_ingestor-0.3.0b5 → wizit_context_ingestor-0.3.0b6}/src/wizit_context_ingestor/main.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|