xfmr-zem 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. xfmr_zem/cli.py +32 -3
  2. xfmr_zem/client.py +59 -8
  3. xfmr_zem/server.py +21 -4
  4. xfmr_zem/servers/data_juicer/server.py +1 -1
  5. xfmr_zem/servers/instruction_gen/server.py +1 -1
  6. xfmr_zem/servers/io/server.py +1 -1
  7. xfmr_zem/servers/llm/parameters.yml +10 -0
  8. xfmr_zem/servers/nemo_curator/server.py +1 -1
  9. xfmr_zem/servers/ocr/deepdoc_vietocr/__init__.py +90 -0
  10. xfmr_zem/servers/ocr/deepdoc_vietocr/implementations.py +1286 -0
  11. xfmr_zem/servers/ocr/deepdoc_vietocr/layout_recognizer.py +562 -0
  12. xfmr_zem/servers/ocr/deepdoc_vietocr/ocr.py +512 -0
  13. xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/.gitattributes +35 -0
  14. xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/README.md +5 -0
  15. xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/ocr.res +6623 -0
  16. xfmr_zem/servers/ocr/deepdoc_vietocr/operators.py +725 -0
  17. xfmr_zem/servers/ocr/deepdoc_vietocr/phases.py +191 -0
  18. xfmr_zem/servers/ocr/deepdoc_vietocr/pipeline.py +561 -0
  19. xfmr_zem/servers/ocr/deepdoc_vietocr/postprocess.py +370 -0
  20. xfmr_zem/servers/ocr/deepdoc_vietocr/recognizer.py +436 -0
  21. xfmr_zem/servers/ocr/deepdoc_vietocr/table_structure_recognizer.py +569 -0
  22. xfmr_zem/servers/ocr/deepdoc_vietocr/utils/__init__.py +81 -0
  23. xfmr_zem/servers/ocr/deepdoc_vietocr/utils/file_utils.py +246 -0
  24. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/__init__.py +0 -0
  25. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/base.yml +58 -0
  26. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/vgg-seq2seq.yml +38 -0
  27. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/__init__.py +0 -0
  28. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/cnn.py +25 -0
  29. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/vgg.py +51 -0
  30. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/seqmodel/seq2seq.py +175 -0
  31. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/transformerocr.py +29 -0
  32. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/vocab.py +36 -0
  33. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/config.py +37 -0
  34. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/translate.py +111 -0
  35. xfmr_zem/servers/ocr/engines.py +242 -0
  36. xfmr_zem/servers/ocr/install_models.py +63 -0
  37. xfmr_zem/servers/ocr/parameters.yml +4 -0
  38. xfmr_zem/servers/ocr/server.py +44 -0
  39. xfmr_zem/servers/profiler/parameters.yml +4 -0
  40. xfmr_zem/servers/sinks/parameters.yml +6 -0
  41. xfmr_zem/servers/unstructured/parameters.yml +6 -0
  42. xfmr_zem/servers/unstructured/server.py +62 -0
  43. xfmr_zem/zenml_wrapper.py +20 -7
  44. {xfmr_zem-0.2.2.dist-info → xfmr_zem-0.2.5.dist-info}/METADATA +19 -1
  45. xfmr_zem-0.2.5.dist-info/RECORD +58 -0
  46. xfmr_zem-0.2.2.dist-info/RECORD +0 -23
  47. /xfmr_zem/servers/data_juicer/{parameter.yaml → parameters.yml} +0 -0
  48. /xfmr_zem/servers/instruction_gen/{parameter.yaml → parameters.yml} +0 -0
  49. /xfmr_zem/servers/io/{parameter.yaml → parameters.yml} +0 -0
  50. /xfmr_zem/servers/nemo_curator/{parameter.yaml → parameters.yml} +0 -0
  51. {xfmr_zem-0.2.2.dist-info → xfmr_zem-0.2.5.dist-info}/WHEEL +0 -0
  52. {xfmr_zem-0.2.2.dist-info → xfmr_zem-0.2.5.dist-info}/entry_points.txt +0 -0
  53. {xfmr_zem-0.2.2.dist-info → xfmr_zem-0.2.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xfmr-zem
3
- Version: 0.2.2
3
+ Version: 0.2.5
4
4
  Summary: Zem: Unified Data Pipeline Framework (ZenML + NeMo Curator + DataJuicer) for multi-domain processing
5
5
  Project-URL: Homepage, https://github.com/OAI-Labs/xfmr-zem
6
6
  Project-URL: Repository, https://github.com/OAI-Labs/xfmr-zem
@@ -22,8 +22,10 @@ Requires-Dist: numpy>=1.24.0
22
22
  Requires-Dist: pandas>=2.0.0
23
23
  Requires-Dist: pyarrow>=15.0.0
24
24
  Requires-Dist: pydantic>=2.0.0
25
+ Requires-Dist: python-magic>=0.4.27
25
26
  Requires-Dist: pyyaml>=6.0
26
27
  Requires-Dist: rich>=13.0.0
28
+ Requires-Dist: unstructured[all-docs]>=0.16.0
27
29
  Requires-Dist: zenml[local,server]>=0.75.0
28
30
  Provides-Extra: all
29
31
  Requires-Dist: nemo-curator>=0.6.0; extra == 'all'
@@ -39,6 +41,22 @@ Requires-Dist: pytest>=7.0.0; extra == 'dev'
39
41
  Requires-Dist: ruff>=0.1.0; extra == 'dev'
40
42
  Provides-Extra: nemo
41
43
  Requires-Dist: nemo-curator>=0.6.0; extra == 'nemo'
44
+ Provides-Extra: ocr
45
+ Requires-Dist: cachetools>=5.0.0; extra == 'ocr'
46
+ Requires-Dist: einops; extra == 'ocr'
47
+ Requires-Dist: onnxruntime>=1.16.0; extra == 'ocr'
48
+ Requires-Dist: opencv-python>=4.8.0; extra == 'ocr'
49
+ Requires-Dist: paddleocr>=2.7.0; extra == 'ocr'
50
+ Requires-Dist: paddlepaddle>=2.6.0; extra == 'ocr'
51
+ Requires-Dist: pdfplumber>=0.11.0; extra == 'ocr'
52
+ Requires-Dist: pillow>=10.0.0; extra == 'ocr'
53
+ Requires-Dist: pyclipper; extra == 'ocr'
54
+ Requires-Dist: pytesseract>=0.3.10; extra == 'ocr'
55
+ Requires-Dist: ruamel-yaml>=0.17.0; extra == 'ocr'
56
+ Requires-Dist: shapely; extra == 'ocr'
57
+ Requires-Dist: torch==2.5.1; extra == 'ocr'
58
+ Requires-Dist: torchvision==0.20.1; extra == 'ocr'
59
+ Requires-Dist: transformers>=4.40.0; extra == 'ocr'
42
60
  Provides-Extra: zenml
43
61
  Requires-Dist: zenml>=0.75.0; extra == 'zenml'
44
62
  Description-Content-Type: text/markdown
@@ -0,0 +1,58 @@
1
+ xfmr_zem/__init__.py,sha256=Abx2BepsZu-e7E93N2lOgu9w0b4TBZLN6MEzCzDCn_A,1138
2
+ xfmr_zem/cli.py,sha256=5oz4qxXthU4mXu7bSbfKreVkAvCqrieXpGoKhJBXBvk,12538
3
+ xfmr_zem/client.py,sha256=wf9N_fILDBvWd-08TnNq3B1PqKQPhR0pvVuJq0vidk0,11435
4
+ xfmr_zem/schemas.py,sha256=0tHM0ftOWTWxNiqmAZn_MyIYJwF2p9brHK0MHlOMlKY,494
5
+ xfmr_zem/server.py,sha256=EeohfqhUiCm0cGnV85H2ODZ4FLXjcTjbkdHrHuGHW4I,8363
6
+ xfmr_zem/zenml_wrapper.py,sha256=LHgDewuPBjCl4EiU6JZVU-_lyEi-ATURDSG9Vf7PbEY,6739
7
+ xfmr_zem/orchestrators/parallel_local.py,sha256=_ve7UBmDM3yoLFljKBu0cS6TcZsyo6pgDs554YmTWiQ,3037
8
+ xfmr_zem/servers/data_juicer/parameters.yml,sha256=dl7YdcDlCCAjF_upLmuI8YwD5gti5gLR3SWHcqE8L2c,299
9
+ xfmr_zem/servers/data_juicer/server.py,sha256=AGWnpy-17NqmXOOQda30zi0dyY4_60HRIco7Qkps2kQ,3242
10
+ xfmr_zem/servers/instruction_gen/parameters.yml,sha256=q5cnper2ufdH1ceYxo95aHJ5nXtOHbd_tc75VzRt2rc,505
11
+ xfmr_zem/servers/instruction_gen/server.py,sha256=XRxnNIUWJ8RdquukrKo5OGhK6_o4a5zILILpuVnsXhI,3129
12
+ xfmr_zem/servers/io/parameters.yml,sha256=CDyETx0Mbo85BUmrQ_okGVhcbKNfkFj-63VXvd_989k,182
13
+ xfmr_zem/servers/io/server.py,sha256=af48i4v-1n97N0Dsb1gENM-E8_TswhF6Bg7-AtHMiMc,3272
14
+ xfmr_zem/servers/llm/parameters.yml,sha256=IQqAHqPHQS8_ffA3CfNGsprRi8OqF-N8cBouTDDnhjU,157
15
+ xfmr_zem/servers/llm/server.py,sha256=ugCQ7bIuZmc-j_DCjo5GDI5AmC2fbFPx7SXAvwj1VAo,1930
16
+ xfmr_zem/servers/nemo_curator/parameters.yml,sha256=EGEzo0heI-ajkwFFy3xxq_YD7cXUO4n4bjl73XoFZpI,357
17
+ xfmr_zem/servers/nemo_curator/server.py,sha256=zcHoSwxxoK_rMaDIAbEy1s8qfdp68Ue4B-XBcjGxQak,3848
18
+ xfmr_zem/servers/ocr/engines.py,sha256=zScn4Qjxbpl2nB8UXEf3kd9l8z84TEwGs6bV5ka8Lks,10295
19
+ xfmr_zem/servers/ocr/install_models.py,sha256=t02zpoy8djVhITOLEaRJ2mjiMrFfA9H6fpeHD3hXuio,2135
20
+ xfmr_zem/servers/ocr/parameters.yml,sha256=04v59-6QXwN6XEpnHLc5pz6iTgNBDhloHtCCjHr8YRA,89
21
+ xfmr_zem/servers/ocr/server.py,sha256=Yef1CYJR5RDH38jffgbcpGE-1VZLaU4w1wi572oPZcY,1571
22
+ xfmr_zem/servers/ocr/deepdoc_vietocr/__init__.py,sha256=XJE7RnOu5oo5p902HPWPDBd7FhVQXetmnr2-kWEG0nI,2419
23
+ xfmr_zem/servers/ocr/deepdoc_vietocr/implementations.py,sha256=79fYr76fx8yZda3HaFcK1d5G-4sDVf1JFHNW_OBQAk8,47348
24
+ xfmr_zem/servers/ocr/deepdoc_vietocr/layout_recognizer.py,sha256=7BeLHzf9FQUkkHMb5jDpggruJmfXVMU78MF_EeZ9PG4,10462
25
+ xfmr_zem/servers/ocr/deepdoc_vietocr/ocr.py,sha256=Iqiqof4LtMjIbU93INYgtWRtkvftAKFk6__gFvV0TUQ,18948
26
+ xfmr_zem/servers/ocr/deepdoc_vietocr/operators.py,sha256=HT2VHEnk1cgGEMlaPRaYeHzgDyYgvNivm4w1savPhUA,23999
27
+ xfmr_zem/servers/ocr/deepdoc_vietocr/phases.py,sha256=3gt__xNlKRrHso7qRRFK_BdWQMYMNunn-V_PfKZBTyI,6605
28
+ xfmr_zem/servers/ocr/deepdoc_vietocr/pipeline.py,sha256=3nTfvbIianpIMwxKkcGiMKho_E844qAhONvk1awbZs8,21992
29
+ xfmr_zem/servers/ocr/deepdoc_vietocr/postprocess.py,sha256=8Ik32oIdRVOalp3qQS-IdWp7VCZkKqRqnqMNSnenGsU,13210
30
+ xfmr_zem/servers/ocr/deepdoc_vietocr/recognizer.py,sha256=dvU9SCid_vqL-hdkBKpPuAhKedGtIcIMQf384KBeZPU,16076
31
+ xfmr_zem/servers/ocr/deepdoc_vietocr/table_structure_recognizer.py,sha256=QFQtPUXQgx4kRhK7EXIdkMkpUzOJLkbzLXIgxMDYliU,21933
32
+ xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/.gitattributes,sha256=Ea1--iSXXuSww8OjjtGHN_Blil91oKlnh7V2p4oCM2E,1519
33
+ xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/README.md,sha256=oIP5wxeqC9ps7KWh3sQFUGbi0_mMdvIL6RQrRSI_zFA,119
34
+ xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/ocr.res,sha256=KLI2KtSrLcOHaapy_rU146nds_0qdYWgWSDmOTsdx_c,26249
35
+ xfmr_zem/servers/ocr/deepdoc_vietocr/utils/__init__.py,sha256=Eoefn8qLo4YhhlhAd5R4lNhKCL-HumlywFhicfeodmw,2488
36
+ xfmr_zem/servers/ocr/deepdoc_vietocr/utils/file_utils.py,sha256=sjw2t8WkjZpIW_eZJEjh60Cp3VBsZIsbYDEaMXmSXSw,7780
37
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/base.yml,sha256=8ujp9qH6FK3Lvaztiv0-scDxbXigtbAgMCfGdzYEkPQ,1808
39
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/vgg-seq2seq.yml,sha256=NeNBuoHlksCYmwpa3lDgakv2lOwbTIPSG6Y9HfXDmIQ,748
40
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/transformerocr.py,sha256=_qhwMR2YIQgQuw7NfBEj0gN5D88Q2sXinudJV2BnwMw,803
42
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/vocab.py,sha256=se7YdFVh0bXe2A1ytUh2yL-GFVckQj9L283jylvN7Zw,944
43
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/cnn.py,sha256=ag4VGrDGDv9CPDy_09smqx0GJ_mjd2zL5GJv49V90zo,673
44
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/vgg.py,sha256=_9nhphrAvib2v8xXFQdrh_3HDAGJn3FKcZfX34pWQOA,1584
45
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/seqmodel/seq2seq.py,sha256=EMSXVGmo5ZD8ygpvbnzeZdIvIn2ifry0JMW5fSUOXVg,5731
46
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/config.py,sha256=UGc4hsSWbPzMJMrVgwyxcOMmtNducAi2MJ_dWXHjSSA,1096
47
+ xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/translate.py,sha256=nWMcewxW6mBp568eyfRGx1frNaTf7SSG6QpFNfDZI6s,3377
48
+ xfmr_zem/servers/profiler/parameters.yml,sha256=2ruRnZ47GkiJoyAyGpDxq9IAFR_HvizFCP2CFfXlCxM,99
49
+ xfmr_zem/servers/profiler/server.py,sha256=GcBzroxHIQ9SwMgdgHSwaoqvFrKeGfUu9Y6Dk_OaTwM,2397
50
+ xfmr_zem/servers/sinks/parameters.yml,sha256=9HAnv84Utw2qWsVZH8uOjVE62lnAKBkzv4PZcPtDo6w,108
51
+ xfmr_zem/servers/sinks/server.py,sha256=jI_r4sq_U_avNwF1PiE0alpaDrYpzOI-qPeLU7hgHP0,1589
52
+ xfmr_zem/servers/unstructured/parameters.yml,sha256=N31cmc56GTr3rkVhbni4yOpbnHISReN8f-KnRZTDbBc,118
53
+ xfmr_zem/servers/unstructured/server.py,sha256=0XmXWMAUNEJboX-J4bn_8EBUfMHIqu_ylNC_s9YOZdk,1996
54
+ xfmr_zem-0.2.5.dist-info/METADATA,sha256=QxGjfN7Y4zZOGmcDwohYh9HcFj2JDw7XmKyC4400z6M,6332
55
+ xfmr_zem-0.2.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
+ xfmr_zem-0.2.5.dist-info/entry_points.txt,sha256=uxs-IXFxpSakHivpFN3mEr13cz-z-0vkeSF_4dEBMa4,65
57
+ xfmr_zem-0.2.5.dist-info/licenses/LICENSE,sha256=kf_ILr0zLkSy5-EBu0VF2PGaOykYo83z3UijI-bZeAE,11342
58
+ xfmr_zem-0.2.5.dist-info/RECORD,,
@@ -1,23 +0,0 @@
1
- xfmr_zem/__init__.py,sha256=Abx2BepsZu-e7E93N2lOgu9w0b4TBZLN6MEzCzDCn_A,1138
2
- xfmr_zem/cli.py,sha256=u3qzzoxPIBSgBy7f80X_pr8SyjACHP7R8uHwRxwjMWk,11367
3
- xfmr_zem/client.py,sha256=sAMhIB_N-JjmaUh9g0fSyxhbXvqctugsCOzf_0ctv8w,9027
4
- xfmr_zem/schemas.py,sha256=0tHM0ftOWTWxNiqmAZn_MyIYJwF2p9brHK0MHlOMlKY,494
5
- xfmr_zem/server.py,sha256=8ayF-v6P_YO60akD0SRjHBnsB3ZBsJ1ZY_BaHf3qR3I,7517
6
- xfmr_zem/zenml_wrapper.py,sha256=p6FbvIHFvakKAekzRGiauKi5AbWL0kJMw69iPrHJ8C0,6364
7
- xfmr_zem/orchestrators/parallel_local.py,sha256=_ve7UBmDM3yoLFljKBu0cS6TcZsyo6pgDs554YmTWiQ,3037
8
- xfmr_zem/servers/data_juicer/parameter.yaml,sha256=dl7YdcDlCCAjF_upLmuI8YwD5gti5gLR3SWHcqE8L2c,299
9
- xfmr_zem/servers/data_juicer/server.py,sha256=qmH6SeYa9OL6kMYIO3tTroKJUwoyefqI8SmuY08D_pk,3242
10
- xfmr_zem/servers/instruction_gen/parameter.yaml,sha256=q5cnper2ufdH1ceYxo95aHJ5nXtOHbd_tc75VzRt2rc,505
11
- xfmr_zem/servers/instruction_gen/server.py,sha256=orM1QSNjc37APgOHdDTa5joZEOvfM5KlNrBrNuX51Sw,3129
12
- xfmr_zem/servers/io/parameter.yaml,sha256=CDyETx0Mbo85BUmrQ_okGVhcbKNfkFj-63VXvd_989k,182
13
- xfmr_zem/servers/io/server.py,sha256=dQ3yWDeKXn7A8Fkwty3-6Yy-FmA0BpEDjzejHref7G0,3272
14
- xfmr_zem/servers/llm/server.py,sha256=ugCQ7bIuZmc-j_DCjo5GDI5AmC2fbFPx7SXAvwj1VAo,1930
15
- xfmr_zem/servers/nemo_curator/parameter.yaml,sha256=EGEzo0heI-ajkwFFy3xxq_YD7cXUO4n4bjl73XoFZpI,357
16
- xfmr_zem/servers/nemo_curator/server.py,sha256=lqN8I4uYhAOKyDyVV6BOewdijfvKTjksuwdr7JLKnkg,3848
17
- xfmr_zem/servers/profiler/server.py,sha256=GcBzroxHIQ9SwMgdgHSwaoqvFrKeGfUu9Y6Dk_OaTwM,2397
18
- xfmr_zem/servers/sinks/server.py,sha256=jI_r4sq_U_avNwF1PiE0alpaDrYpzOI-qPeLU7hgHP0,1589
19
- xfmr_zem-0.2.2.dist-info/METADATA,sha256=dWy86svcBMDE4qku23k3HsUALOxaQ8X0avxepNFO_n8,5516
20
- xfmr_zem-0.2.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
21
- xfmr_zem-0.2.2.dist-info/entry_points.txt,sha256=uxs-IXFxpSakHivpFN3mEr13cz-z-0vkeSF_4dEBMa4,65
22
- xfmr_zem-0.2.2.dist-info/licenses/LICENSE,sha256=kf_ILr0zLkSy5-EBu0VF2PGaOykYo83z3UijI-bZeAE,11342
23
- xfmr_zem-0.2.2.dist-info/RECORD,,
File without changes