xfmr-zem 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/CHANGELOG.md +12 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/PKG-INFO +5 -1
- xfmr_zem-0.2.9/data/chunk_122.wav +0 -0
- xfmr_zem-0.2.9/data/sample_digital.pdf +0 -0
- xfmr_zem-0.2.9/data/sample_scanned.pdf +1447 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/pyproject.toml +6 -1
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/cli.py +38 -1
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/client.py +38 -11
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/server.py +1 -0
- xfmr_zem-0.2.9/src/xfmr_zem/servers/voice/engines.py +66 -0
- xfmr_zem-0.2.9/src/xfmr_zem/servers/voice/parameters.yml +2 -0
- xfmr_zem-0.2.9/src/xfmr_zem/servers/voice/server.py +54 -0
- xfmr_zem-0.2.9/tests/manual/pdf_ocr_test.yaml +25 -0
- xfmr_zem-0.2.9/tests/manual/voice_test.yaml +15 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/uv.lock +245 -2
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/.github/workflows/deploy.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/.github/workflows/pypi-publish.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/.gitignore +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/AGENTS.md +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/LICENSE +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/README.md +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/big_data_output.parquet +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/big_data_sim.parquet +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/dup_cleaned.parquet +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/dup_data.parquet +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/dup_data_large.parquet +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/nemo_full_stack_result.parquet +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/nemo_real_result.parquet +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/ocr_test.png +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/output_result.jsonl +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/sample.jsonl +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/vietnamese_ocr.png +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/__init__.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/orchestrators/parallel_local.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/schemas.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/server.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/data_juicer/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/data_juicer/server.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/instruction_gen/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/instruction_gen/server.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/io/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/io/server.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/llm/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/llm/server.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/nemo_curator/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/nemo_curator/server.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/__init__.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/implementations.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/layout_recognizer.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/ocr.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/.gitattributes +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/README.md +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/ocr.res +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/operators.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/phases.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/pipeline.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/postprocess.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/recognizer.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/table_structure_recognizer.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/utils/__init__.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/utils/file_utils.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/__init__.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/base.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/vgg-seq2seq.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/__init__.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/cnn.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/vgg.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/seqmodel/seq2seq.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/transformerocr.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/vocab.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/config.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/translate.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/engines.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/install_models.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/profiler/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/profiler/server.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/sinks/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/sinks/server.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/unstructured/parameters.yml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/unstructured/server.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/zenml_wrapper.py +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/caching_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/hf_ocr_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/llm_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/multimodal_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/ocr_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/parallel_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/parquet_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/phase4_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/profiler_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/standard_data_pipeline.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/viet_ocr_test.yaml +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/website/docs/docs.css +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/website/docs/index.html +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/website/index.html +0 -0
- {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/website/style.css +0 -0
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.2.9] - 2026-02-03
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- **Voice Processing Module**: Added a new `voice` server with Automatic Speech Recognition (ASR) support using OpenAI Whisper.
|
|
9
|
+
- **Voice Transcription Tool**: Introduced `transcribe` tool for high-quality audio-to-text conversion.
|
|
10
|
+
|
|
11
|
+
## [0.2.8] - 2026-02-03
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
- **Parameter Support**: Fixed dot-notation support for hierarchical parameters in pipeline configurations (e.g., `ocr.temp_dir`).
|
|
15
|
+
- **OCR Server**: Added detailed debug logging for temporary file operations.
|
|
16
|
+
|
|
5
17
|
## [0.2.7] - 2026-02-03
|
|
6
18
|
|
|
7
19
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xfmr-zem
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.9
|
|
4
4
|
Summary: Zem: Unified Data Pipeline Framework (ZenML + NeMo Curator + DataJuicer) for multi-domain processing
|
|
5
5
|
Project-URL: Homepage, https://github.com/OAI-Labs/xfmr-zem
|
|
6
6
|
Project-URL: Repository, https://github.com/OAI-Labs/xfmr-zem
|
|
@@ -58,6 +58,10 @@ Requires-Dist: shapely; extra == 'ocr'
|
|
|
58
58
|
Requires-Dist: torch==2.5.1; extra == 'ocr'
|
|
59
59
|
Requires-Dist: torchvision==0.20.1; extra == 'ocr'
|
|
60
60
|
Requires-Dist: transformers>=4.40.0; extra == 'ocr'
|
|
61
|
+
Provides-Extra: voice
|
|
62
|
+
Requires-Dist: librosa; extra == 'voice'
|
|
63
|
+
Requires-Dist: openai-whisper; extra == 'voice'
|
|
64
|
+
Requires-Dist: soundfile; extra == 'voice'
|
|
61
65
|
Provides-Extra: zenml
|
|
62
66
|
Requires-Dist: zenml>=0.75.0; extra == 'zenml'
|
|
63
67
|
Description-Content-Type: text/markdown
|
|
Binary file
|
|
Binary file
|