xfmr-zem 0.2.7__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/CHANGELOG.md +12 -0
  2. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/PKG-INFO +5 -1
  3. xfmr_zem-0.2.9/data/chunk_122.wav +0 -0
  4. xfmr_zem-0.2.9/data/sample_digital.pdf +0 -0
  5. xfmr_zem-0.2.9/data/sample_scanned.pdf +1447 -0
  6. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/pyproject.toml +6 -1
  7. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/cli.py +38 -1
  8. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/client.py +38 -11
  9. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/server.py +1 -0
  10. xfmr_zem-0.2.9/src/xfmr_zem/servers/voice/engines.py +66 -0
  11. xfmr_zem-0.2.9/src/xfmr_zem/servers/voice/parameters.yml +2 -0
  12. xfmr_zem-0.2.9/src/xfmr_zem/servers/voice/server.py +54 -0
  13. xfmr_zem-0.2.9/tests/manual/pdf_ocr_test.yaml +25 -0
  14. xfmr_zem-0.2.9/tests/manual/voice_test.yaml +15 -0
  15. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/uv.lock +245 -2
  16. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/.github/workflows/deploy.yml +0 -0
  17. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/.github/workflows/pypi-publish.yml +0 -0
  18. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/.gitignore +0 -0
  19. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/AGENTS.md +0 -0
  20. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/LICENSE +0 -0
  21. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/README.md +0 -0
  22. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/big_data_output.parquet +0 -0
  23. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/big_data_sim.parquet +0 -0
  24. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/dup_cleaned.parquet +0 -0
  25. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/dup_data.parquet +0 -0
  26. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/dup_data_large.parquet +0 -0
  27. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/nemo_full_stack_result.parquet +0 -0
  28. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/nemo_real_result.parquet +0 -0
  29. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/ocr_test.png +0 -0
  30. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/output_result.jsonl +0 -0
  31. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/sample.jsonl +0 -0
  32. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/data/vietnamese_ocr.png +0 -0
  33. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/parameters.yml +0 -0
  34. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/__init__.py +0 -0
  35. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/orchestrators/parallel_local.py +0 -0
  36. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/schemas.py +0 -0
  37. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/server.py +0 -0
  38. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/data_juicer/parameters.yml +0 -0
  39. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/data_juicer/server.py +0 -0
  40. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/instruction_gen/parameters.yml +0 -0
  41. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/instruction_gen/server.py +0 -0
  42. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/io/parameters.yml +0 -0
  43. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/io/server.py +0 -0
  44. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/llm/parameters.yml +0 -0
  45. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/llm/server.py +0 -0
  46. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/nemo_curator/parameters.yml +0 -0
  47. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/nemo_curator/server.py +0 -0
  48. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/__init__.py +0 -0
  49. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/implementations.py +0 -0
  50. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/layout_recognizer.py +0 -0
  51. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/ocr.py +0 -0
  52. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/.gitattributes +0 -0
  53. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/README.md +0 -0
  54. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/ocr.res +0 -0
  55. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/operators.py +0 -0
  56. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/phases.py +0 -0
  57. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/pipeline.py +0 -0
  58. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/postprocess.py +0 -0
  59. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/recognizer.py +0 -0
  60. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/table_structure_recognizer.py +0 -0
  61. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/utils/__init__.py +0 -0
  62. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/utils/file_utils.py +0 -0
  63. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/__init__.py +0 -0
  64. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/base.yml +0 -0
  65. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/vgg-seq2seq.yml +0 -0
  66. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/__init__.py +0 -0
  67. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/cnn.py +0 -0
  68. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/vgg.py +0 -0
  69. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/seqmodel/seq2seq.py +0 -0
  70. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/transformerocr.py +0 -0
  71. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/vocab.py +0 -0
  72. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/config.py +0 -0
  73. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/translate.py +0 -0
  74. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/engines.py +0 -0
  75. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/install_models.py +0 -0
  76. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/ocr/parameters.yml +0 -0
  77. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/profiler/parameters.yml +0 -0
  78. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/profiler/server.py +0 -0
  79. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/sinks/parameters.yml +0 -0
  80. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/sinks/server.py +0 -0
  81. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/unstructured/parameters.yml +0 -0
  82. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/servers/unstructured/server.py +0 -0
  83. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/src/xfmr_zem/zenml_wrapper.py +0 -0
  84. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/caching_test.yaml +0 -0
  85. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/hf_ocr_test.yaml +0 -0
  86. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/llm_test.yaml +0 -0
  87. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/multimodal_test.yaml +0 -0
  88. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/ocr_test.yaml +0 -0
  89. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/parallel_test.yaml +0 -0
  90. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/parquet_test.yaml +0 -0
  91. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/phase4_test.yaml +0 -0
  92. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/profiler_test.yaml +0 -0
  93. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/standard_data_pipeline.yaml +0 -0
  94. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/tests/manual/viet_ocr_test.yaml +0 -0
  95. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/website/docs/docs.css +0 -0
  96. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/website/docs/index.html +0 -0
  97. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/website/index.html +0 -0
  98. {xfmr_zem-0.2.7 → xfmr_zem-0.2.9}/website/style.css +0 -0
@@ -2,6 +2,18 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.2.9] - 2026-02-03
6
+
7
+ ### Added
8
+ - **Voice Processing Module**: Added a new `voice` server with Automatic Speech Recognition (ASR) support using OpenAI Whisper.
9
+ - **Voice Transcription Tool**: Introduced `transcribe` tool for high-quality audio-to-text conversion.
10
+
11
+ ## [0.2.8] - 2026-02-03
12
+
13
+ ### Fixed
14
+ - **Parameter Support**: Fixed dot-notation support for hierarchical parameters in pipeline configurations (e.g., `ocr.temp_dir`).
15
+ - **OCR Server**: Added detailed debug logging for temporary file operations.
16
+
5
17
  ## [0.2.7] - 2026-02-03
6
18
 
7
19
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xfmr-zem
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: Zem: Unified Data Pipeline Framework (ZenML + NeMo Curator + DataJuicer) for multi-domain processing
5
5
  Project-URL: Homepage, https://github.com/OAI-Labs/xfmr-zem
6
6
  Project-URL: Repository, https://github.com/OAI-Labs/xfmr-zem
@@ -58,6 +58,10 @@ Requires-Dist: shapely; extra == 'ocr'
58
58
  Requires-Dist: torch==2.5.1; extra == 'ocr'
59
59
  Requires-Dist: torchvision==0.20.1; extra == 'ocr'
60
60
  Requires-Dist: transformers>=4.40.0; extra == 'ocr'
61
+ Provides-Extra: voice
62
+ Requires-Dist: librosa; extra == 'voice'
63
+ Requires-Dist: openai-whisper; extra == 'voice'
64
+ Requires-Dist: soundfile; extra == 'voice'
61
65
  Provides-Extra: zenml
62
66
  Requires-Dist: zenml>=0.75.0; extra == 'zenml'
63
67
  Description-Content-Type: text/markdown
Binary file
Binary file