xfmr-zem 0.2.6__tar.gz → 0.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/CHANGELOG.md +11 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/PKG-INFO +1 -1
- xfmr_zem-0.2.8/data/sample_digital.pdf +0 -0
- xfmr_zem-0.2.8/data/sample_scanned.pdf +1447 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/pyproject.toml +1 -1
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/client.py +38 -11
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/parameters.yml +3 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/server.py +36 -8
- xfmr_zem-0.2.8/tests/manual/pdf_ocr_test.yaml +25 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/uv.lock +1 -1
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/.github/workflows/deploy.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/.github/workflows/pypi-publish.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/.gitignore +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/AGENTS.md +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/LICENSE +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/README.md +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/big_data_output.parquet +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/big_data_sim.parquet +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/dup_cleaned.parquet +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/dup_data.parquet +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/dup_data_large.parquet +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/nemo_full_stack_result.parquet +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/nemo_real_result.parquet +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/ocr_test.png +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/output_result.jsonl +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/sample.jsonl +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/data/vietnamese_ocr.png +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/parameters.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/__init__.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/cli.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/orchestrators/parallel_local.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/schemas.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/server.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/data_juicer/parameters.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/data_juicer/server.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/instruction_gen/parameters.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/instruction_gen/server.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/io/parameters.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/io/server.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/llm/parameters.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/llm/server.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/nemo_curator/parameters.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/nemo_curator/server.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/__init__.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/implementations.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/layout_recognizer.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/ocr.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/.gitattributes +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/README.md +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/ocr.res +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/operators.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/phases.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/pipeline.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/postprocess.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/recognizer.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/table_structure_recognizer.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/utils/__init__.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/utils/file_utils.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/__init__.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/base.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/vgg-seq2seq.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/__init__.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/cnn.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/vgg.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/seqmodel/seq2seq.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/transformerocr.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/vocab.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/config.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/translate.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/engines.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/ocr/install_models.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/profiler/parameters.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/profiler/server.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/sinks/parameters.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/sinks/server.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/unstructured/parameters.yml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/servers/unstructured/server.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/src/xfmr_zem/zenml_wrapper.py +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/caching_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/hf_ocr_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/llm_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/multimodal_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/ocr_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/parallel_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/parquet_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/phase4_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/profiler_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/standard_data_pipeline.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/tests/manual/viet_ocr_test.yaml +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/website/docs/docs.css +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/website/docs/index.html +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/website/index.html +0 -0
- {xfmr_zem-0.2.6 → xfmr_zem-0.2.8}/website/style.css +0 -0
|
@@ -2,6 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.2.8] - 2026-02-03
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- **Parameter Support**: Fixed dot-notation support for hierarchical parameters in pipeline configurations (e.g., `ocr.temp_dir`).
|
|
9
|
+
- **OCR Server**: Added detailed debug logging for temporary file operations.
|
|
10
|
+
|
|
11
|
+
## [0.2.7] - 2026-02-03
|
|
12
|
+
|
|
13
|
+
### Added
|
|
14
|
+
- **Configurable OCR Parameters**: Added `scanned_threshold`, `zoom`, and `temp_dir` parameters to the OCR server for finer control over PDF processing.
|
|
15
|
+
|
|
5
16
|
## [0.2.6] - 2026-02-03
|
|
6
17
|
|
|
7
18
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xfmr-zem
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: Zem: Unified Data Pipeline Framework (ZenML + NeMo Curator + DataJuicer) for multi-domain processing
|
|
5
5
|
Project-URL: Homepage, https://github.com/OAI-Labs/xfmr-zem
|
|
6
6
|
Project-URL: Repository, https://github.com/OAI-Labs/xfmr-zem
|
|
Binary file
|