xgen-doc2chunk 0.1.5__tar.gz → 0.1.51__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/PKG-INFO +1 -1
  2. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/pyproject.toml +1 -1
  3. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_validator.py +5 -5
  4. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/.gitignore +0 -0
  5. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/LICENSE +0 -0
  6. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/README.md +0 -0
  7. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/__init__.py +0 -0
  8. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/chunking/__init__.py +0 -0
  9. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/chunking/chunking.py +0 -0
  10. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/chunking/constants.py +0 -0
  11. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/chunking/page_chunker.py +0 -0
  12. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/chunking/protected_regions.py +0 -0
  13. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/chunking/sheet_processor.py +0 -0
  14. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/chunking/table_chunker.py +0 -0
  15. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/chunking/table_parser.py +0 -0
  16. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/chunking/text_chunker.py +0 -0
  17. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/__init__.py +0 -0
  18. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/document_processor.py +0 -0
  19. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/__init__.py +0 -0
  20. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/chart_extractor.py +0 -0
  21. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/chart_processor.py +0 -0
  22. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/file_converter.py +0 -0
  23. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/img_processor.py +0 -0
  24. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/metadata_extractor.py +0 -0
  25. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/page_tag_processor.py +0 -0
  26. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/preprocessor.py +0 -0
  27. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/storage_backend.py +0 -0
  28. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/table_extractor.py +0 -0
  29. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/table_processor.py +0 -0
  30. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/functions/utils.py +0 -0
  31. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/__init__.py +0 -0
  32. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/base_handler.py +0 -0
  33. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_handler.py +0 -0
  34. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_helper/__init__.py +0 -0
  35. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_helper/csv_constants.py +0 -0
  36. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_helper/csv_encoding.py +0 -0
  37. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_helper/csv_file_converter.py +0 -0
  38. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_helper/csv_image_processor.py +0 -0
  39. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_helper/csv_metadata.py +0 -0
  40. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_helper/csv_parser.py +0 -0
  41. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_helper/csv_preprocessor.py +0 -0
  42. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/csv_helper/csv_table.py +0 -0
  43. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/doc_handler.py +0 -0
  44. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/doc_helpers/__init__.py +0 -0
  45. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/doc_helpers/doc_file_converter.py +0 -0
  46. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/doc_helpers/doc_image_processor.py +0 -0
  47. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/doc_helpers/doc_preprocessor.py +0 -0
  48. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_handler.py +0 -0
  49. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/__init__.py +0 -0
  50. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_chart_extractor.py +0 -0
  51. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_constants.py +0 -0
  52. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_file_converter.py +0 -0
  53. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_image.py +0 -0
  54. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_image_processor.py +0 -0
  55. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_metadata.py +0 -0
  56. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_paragraph.py +0 -0
  57. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_preprocessor.py +0 -0
  58. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_table_extractor.py +0 -0
  59. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/docx_helper/docx_table_processor.py +0 -0
  60. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_handler.py +0 -0
  61. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/__init__.py +0 -0
  62. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/excel_chart_extractor.py +0 -0
  63. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/excel_file_converter.py +0 -0
  64. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/excel_image_processor.py +0 -0
  65. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/excel_layout_detector.py +0 -0
  66. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/excel_metadata.py +0 -0
  67. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/excel_preprocessor.py +0 -0
  68. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/excel_table_xls.py +0 -0
  69. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/excel_table_xlsx.py +0 -0
  70. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/excel_helper/excel_textbox.py +0 -0
  71. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/html_helper/__init__.py +0 -0
  72. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/html_helper/html_file_converter.py +0 -0
  73. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/html_helper/html_preprocessor.py +0 -0
  74. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/html_reprocessor.py +0 -0
  75. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_handler.py +0 -0
  76. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/__init__.py +0 -0
  77. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_chart_extractor.py +0 -0
  78. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_constants.py +0 -0
  79. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_decoder.py +0 -0
  80. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_docinfo.py +0 -0
  81. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_file_converter.py +0 -0
  82. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_image_processor.py +0 -0
  83. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_metadata.py +0 -0
  84. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_preprocessor.py +0 -0
  85. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_record.py +0 -0
  86. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_recovery.py +0 -0
  87. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwp_helper/hwp_table.py +0 -0
  88. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_handler.py +0 -0
  89. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/__init__.py +0 -0
  90. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/hwpx_chart_extractor.py +0 -0
  91. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/hwpx_constants.py +0 -0
  92. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/hwpx_file_converter.py +0 -0
  93. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/hwpx_image_processor.py +0 -0
  94. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/hwpx_metadata.py +0 -0
  95. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/hwpx_preprocessor.py +0 -0
  96. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/hwpx_section.py +0 -0
  97. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/hwpx_table_extractor.py +0 -0
  98. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/hwpx_helper/hwpx_table_processor.py +0 -0
  99. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/image_file_handler.py +0 -0
  100. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/image_file_helper/__init__.py +0 -0
  101. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/image_file_helper/image_file_converter.py +0 -0
  102. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/image_file_helper/image_file_image_processor.py +0 -0
  103. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/image_file_helper/image_file_preprocessor.py +0 -0
  104. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_handler.py +0 -0
  105. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/__init__.py +0 -0
  106. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_block_image_engine.py +0 -0
  107. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_cell_analysis.py +0 -0
  108. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_complexity_analyzer.py +0 -0
  109. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_element_merger.py +0 -0
  110. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_file_converter.py +0 -0
  111. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_graphic_detector.py +0 -0
  112. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_image_processor.py +0 -0
  113. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_layout_block_detector.py +0 -0
  114. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_line_analysis.py +0 -0
  115. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_metadata.py +0 -0
  116. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_page_analyzer.py +0 -0
  117. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_preprocessor.py +0 -0
  118. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_detection.py +0 -0
  119. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_processor.py +0 -0
  120. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_quality_analyzer.py +0 -0
  121. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_text_extractor.py +0 -0
  122. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_text_quality_analyzer.py +0 -0
  123. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_utils.py +0 -0
  124. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/pdf_vector_text_ocr.py +0 -0
  125. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/pdf_helpers/types.py +0 -0
  126. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_handler.py +0 -0
  127. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/__init__.py +0 -0
  128. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_bullet.py +0 -0
  129. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_chart_extractor.py +0 -0
  130. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_constants.py +0 -0
  131. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_file_converter.py +0 -0
  132. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_image_processor.py +0 -0
  133. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_metadata.py +0 -0
  134. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_preprocessor.py +0 -0
  135. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_shape.py +0 -0
  136. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_slide.py +0 -0
  137. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/ppt_helper/ppt_table.py +0 -0
  138. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_handler.py +0 -0
  139. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/__init__.py +0 -0
  140. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/rtf_constants.py +0 -0
  141. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/rtf_content_extractor.py +0 -0
  142. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/rtf_decoder.py +0 -0
  143. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/rtf_file_converter.py +0 -0
  144. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/rtf_metadata_extractor.py +0 -0
  145. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/rtf_preprocessor.py +0 -0
  146. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/rtf_region_finder.py +0 -0
  147. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/rtf_table_extractor.py +0 -0
  148. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/rtf_helper/rtf_text_cleaner.py +0 -0
  149. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/text_handler.py +0 -0
  150. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/text_helper/__init__.py +0 -0
  151. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/text_helper/text_file_converter.py +0 -0
  152. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/text_helper/text_image_processor.py +0 -0
  153. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/core/processor/text_helper/text_preprocessor.py +0 -0
  154. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/ocr/__init__.py +0 -0
  155. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/ocr/base.py +0 -0
  156. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/ocr/ocr_engine/__init__.py +0 -0
  157. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/ocr/ocr_engine/anthropic_ocr.py +0 -0
  158. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/ocr/ocr_engine/bedrock_ocr.py +0 -0
  159. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/ocr/ocr_engine/gemini_ocr.py +0 -0
  160. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/ocr/ocr_engine/openai_ocr.py +0 -0
  161. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/ocr/ocr_engine/vllm_ocr.py +0 -0
  162. {xgen_doc2chunk-0.1.5 → xgen_doc2chunk-0.1.51}/xgen_doc2chunk/ocr/ocr_processor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xgen-doc2chunk
3
- Version: 0.1.5
3
+ Version: 0.1.51
4
4
  Summary: Convert raw documents into AI-understandable context with intelligent text extraction, table detection, and semantic chunking
5
5
  Project-URL: Homepage, https://github.com/master0419/doc2chunk
6
6
  Project-URL: Documentation, https://github.com/master0419/doc2chunk#readme
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "xgen-doc2chunk"
7
- version = "0.1.5"
7
+ version = "0.1.51"
8
8
  description = "Convert raw documents into AI-understandable context with intelligent text extraction, table detection, and semantic chunking"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -383,11 +383,11 @@ class TableQualityValidator:
383
383
  # if num_rows > 5 and col2_has_paragraphs >= 2:
384
384
  # return False, f"col2_paragraphs({col2_has_paragraphs})"
385
385
 
386
- # Pattern 3: If first column is short and second is long overall, likely body text not key-value
387
- if num_rows > 10:
388
- col1_short_ratio = (col1_empty_count + col1_short_count) / num_rows
389
- if col1_short_ratio >= 0.8 and col2_long_count >= 5:
390
- return False, f"asymmetric_cols(short1={col1_short_ratio:.0%}, long2={col2_long_count})"
386
+ # # Pattern 3: If first column is short and second is long overall, likely body text not key-value
387
+ # if num_rows > 10:
388
+ # col1_short_ratio = (col1_empty_count + col1_short_count) / num_rows
389
+ # if col1_short_ratio >= 0.8 and col2_long_count >= 5:
390
+ # return False, f"asymmetric_cols(short1={col1_short_ratio:.0%}, long2={col2_long_count})"
391
391
 
392
392
  return True, "valid"
393
393
 
File without changes