xgen-doc2chunk 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. xgen_doc2chunk/__init__.py +42 -0
  2. xgen_doc2chunk/chunking/__init__.py +168 -0
  3. xgen_doc2chunk/chunking/chunking.py +786 -0
  4. xgen_doc2chunk/chunking/constants.py +134 -0
  5. xgen_doc2chunk/chunking/page_chunker.py +248 -0
  6. xgen_doc2chunk/chunking/protected_regions.py +715 -0
  7. xgen_doc2chunk/chunking/sheet_processor.py +406 -0
  8. xgen_doc2chunk/chunking/table_chunker.py +832 -0
  9. xgen_doc2chunk/chunking/table_parser.py +172 -0
  10. xgen_doc2chunk/chunking/text_chunker.py +443 -0
  11. xgen_doc2chunk/core/__init__.py +64 -0
  12. xgen_doc2chunk/core/document_processor.py +1307 -0
  13. xgen_doc2chunk/core/functions/__init__.py +85 -0
  14. xgen_doc2chunk/core/functions/chart_extractor.py +144 -0
  15. xgen_doc2chunk/core/functions/chart_processor.py +534 -0
  16. xgen_doc2chunk/core/functions/file_converter.py +220 -0
  17. xgen_doc2chunk/core/functions/img_processor.py +649 -0
  18. xgen_doc2chunk/core/functions/metadata_extractor.py +542 -0
  19. xgen_doc2chunk/core/functions/page_tag_processor.py +393 -0
  20. xgen_doc2chunk/core/functions/preprocessor.py +162 -0
  21. xgen_doc2chunk/core/functions/storage_backend.py +381 -0
  22. xgen_doc2chunk/core/functions/table_extractor.py +468 -0
  23. xgen_doc2chunk/core/functions/table_processor.py +299 -0
  24. xgen_doc2chunk/core/functions/utils.py +159 -0
  25. xgen_doc2chunk/core/processor/__init__.py +96 -0
  26. xgen_doc2chunk/core/processor/base_handler.py +544 -0
  27. xgen_doc2chunk/core/processor/csv_handler.py +135 -0
  28. xgen_doc2chunk/core/processor/csv_helper/__init__.py +89 -0
  29. xgen_doc2chunk/core/processor/csv_helper/csv_constants.py +63 -0
  30. xgen_doc2chunk/core/processor/csv_helper/csv_encoding.py +104 -0
  31. xgen_doc2chunk/core/processor/csv_helper/csv_file_converter.py +78 -0
  32. xgen_doc2chunk/core/processor/csv_helper/csv_image_processor.py +75 -0
  33. xgen_doc2chunk/core/processor/csv_helper/csv_metadata.py +168 -0
  34. xgen_doc2chunk/core/processor/csv_helper/csv_parser.py +225 -0
  35. xgen_doc2chunk/core/processor/csv_helper/csv_preprocessor.py +86 -0
  36. xgen_doc2chunk/core/processor/csv_helper/csv_table.py +266 -0
  37. xgen_doc2chunk/core/processor/doc_handler.py +579 -0
  38. xgen_doc2chunk/core/processor/doc_helpers/__init__.py +25 -0
  39. xgen_doc2chunk/core/processor/doc_helpers/doc_file_converter.py +160 -0
  40. xgen_doc2chunk/core/processor/doc_helpers/doc_image_processor.py +179 -0
  41. xgen_doc2chunk/core/processor/doc_helpers/doc_preprocessor.py +83 -0
  42. xgen_doc2chunk/core/processor/docx_handler.py +376 -0
  43. xgen_doc2chunk/core/processor/docx_helper/__init__.py +84 -0
  44. xgen_doc2chunk/core/processor/docx_helper/docx_chart_extractor.py +436 -0
  45. xgen_doc2chunk/core/processor/docx_helper/docx_constants.py +75 -0
  46. xgen_doc2chunk/core/processor/docx_helper/docx_file_converter.py +76 -0
  47. xgen_doc2chunk/core/processor/docx_helper/docx_image.py +145 -0
  48. xgen_doc2chunk/core/processor/docx_helper/docx_image_processor.py +410 -0
  49. xgen_doc2chunk/core/processor/docx_helper/docx_metadata.py +71 -0
  50. xgen_doc2chunk/core/processor/docx_helper/docx_paragraph.py +126 -0
  51. xgen_doc2chunk/core/processor/docx_helper/docx_preprocessor.py +82 -0
  52. xgen_doc2chunk/core/processor/docx_helper/docx_table_extractor.py +527 -0
  53. xgen_doc2chunk/core/processor/docx_helper/docx_table_processor.py +220 -0
  54. xgen_doc2chunk/core/processor/excel_handler.py +353 -0
  55. xgen_doc2chunk/core/processor/excel_helper/__init__.py +97 -0
  56. xgen_doc2chunk/core/processor/excel_helper/excel_chart_extractor.py +498 -0
  57. xgen_doc2chunk/core/processor/excel_helper/excel_file_converter.py +157 -0
  58. xgen_doc2chunk/core/processor/excel_helper/excel_image_processor.py +316 -0
  59. xgen_doc2chunk/core/processor/excel_helper/excel_layout_detector.py +739 -0
  60. xgen_doc2chunk/core/processor/excel_helper/excel_metadata.py +145 -0
  61. xgen_doc2chunk/core/processor/excel_helper/excel_preprocessor.py +83 -0
  62. xgen_doc2chunk/core/processor/excel_helper/excel_table_xls.py +357 -0
  63. xgen_doc2chunk/core/processor/excel_helper/excel_table_xlsx.py +361 -0
  64. xgen_doc2chunk/core/processor/excel_helper/excel_textbox.py +266 -0
  65. xgen_doc2chunk/core/processor/html_helper/__init__.py +7 -0
  66. xgen_doc2chunk/core/processor/html_helper/html_file_converter.py +92 -0
  67. xgen_doc2chunk/core/processor/html_helper/html_preprocessor.py +74 -0
  68. xgen_doc2chunk/core/processor/html_reprocessor.py +140 -0
  69. xgen_doc2chunk/core/processor/hwp_handler.py +401 -0
  70. xgen_doc2chunk/core/processor/hwp_helper/__init__.py +120 -0
  71. xgen_doc2chunk/core/processor/hwp_helper/hwp_chart_extractor.py +373 -0
  72. xgen_doc2chunk/core/processor/hwp_helper/hwp_constants.py +78 -0
  73. xgen_doc2chunk/core/processor/hwp_helper/hwp_decoder.py +106 -0
  74. xgen_doc2chunk/core/processor/hwp_helper/hwp_docinfo.py +174 -0
  75. xgen_doc2chunk/core/processor/hwp_helper/hwp_file_converter.py +60 -0
  76. xgen_doc2chunk/core/processor/hwp_helper/hwp_image_processor.py +413 -0
  77. xgen_doc2chunk/core/processor/hwp_helper/hwp_metadata.py +236 -0
  78. xgen_doc2chunk/core/processor/hwp_helper/hwp_preprocessor.py +82 -0
  79. xgen_doc2chunk/core/processor/hwp_helper/hwp_record.py +149 -0
  80. xgen_doc2chunk/core/processor/hwp_helper/hwp_recovery.py +217 -0
  81. xgen_doc2chunk/core/processor/hwp_helper/hwp_table.py +205 -0
  82. xgen_doc2chunk/core/processor/hwpx_handler.py +191 -0
  83. xgen_doc2chunk/core/processor/hwpx_helper/__init__.py +85 -0
  84. xgen_doc2chunk/core/processor/hwpx_helper/hwpx_chart_extractor.py +464 -0
  85. xgen_doc2chunk/core/processor/hwpx_helper/hwpx_constants.py +30 -0
  86. xgen_doc2chunk/core/processor/hwpx_helper/hwpx_file_converter.py +70 -0
  87. xgen_doc2chunk/core/processor/hwpx_helper/hwpx_image_processor.py +258 -0
  88. xgen_doc2chunk/core/processor/hwpx_helper/hwpx_metadata.py +163 -0
  89. xgen_doc2chunk/core/processor/hwpx_helper/hwpx_preprocessor.py +80 -0
  90. xgen_doc2chunk/core/processor/hwpx_helper/hwpx_section.py +242 -0
  91. xgen_doc2chunk/core/processor/hwpx_helper/hwpx_table_extractor.py +462 -0
  92. xgen_doc2chunk/core/processor/hwpx_helper/hwpx_table_processor.py +220 -0
  93. xgen_doc2chunk/core/processor/image_file_handler.py +212 -0
  94. xgen_doc2chunk/core/processor/image_file_helper/__init__.py +17 -0
  95. xgen_doc2chunk/core/processor/image_file_helper/image_file_converter.py +69 -0
  96. xgen_doc2chunk/core/processor/image_file_helper/image_file_image_processor.py +123 -0
  97. xgen_doc2chunk/core/processor/image_file_helper/image_file_preprocessor.py +84 -0
  98. xgen_doc2chunk/core/processor/pdf_handler.py +597 -0
  99. xgen_doc2chunk/core/processor/pdf_helpers/__init__.py +229 -0
  100. xgen_doc2chunk/core/processor/pdf_helpers/pdf_block_image_engine.py +667 -0
  101. xgen_doc2chunk/core/processor/pdf_helpers/pdf_cell_analysis.py +493 -0
  102. xgen_doc2chunk/core/processor/pdf_helpers/pdf_complexity_analyzer.py +598 -0
  103. xgen_doc2chunk/core/processor/pdf_helpers/pdf_element_merger.py +46 -0
  104. xgen_doc2chunk/core/processor/pdf_helpers/pdf_file_converter.py +72 -0
  105. xgen_doc2chunk/core/processor/pdf_helpers/pdf_graphic_detector.py +332 -0
  106. xgen_doc2chunk/core/processor/pdf_helpers/pdf_image_processor.py +321 -0
  107. xgen_doc2chunk/core/processor/pdf_helpers/pdf_layout_block_detector.py +1244 -0
  108. xgen_doc2chunk/core/processor/pdf_helpers/pdf_line_analysis.py +420 -0
  109. xgen_doc2chunk/core/processor/pdf_helpers/pdf_metadata.py +101 -0
  110. xgen_doc2chunk/core/processor/pdf_helpers/pdf_page_analyzer.py +114 -0
  111. xgen_doc2chunk/core/processor/pdf_helpers/pdf_preprocessor.py +106 -0
  112. xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_detection.py +1346 -0
  113. xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_processor.py +897 -0
  114. xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_quality_analyzer.py +750 -0
  115. xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_validator.py +401 -0
  116. xgen_doc2chunk/core/processor/pdf_helpers/pdf_text_extractor.py +155 -0
  117. xgen_doc2chunk/core/processor/pdf_helpers/pdf_text_quality_analyzer.py +655 -0
  118. xgen_doc2chunk/core/processor/pdf_helpers/pdf_utils.py +183 -0
  119. xgen_doc2chunk/core/processor/pdf_helpers/pdf_vector_text_ocr.py +302 -0
  120. xgen_doc2chunk/core/processor/pdf_helpers/types.py +278 -0
  121. xgen_doc2chunk/core/processor/ppt_handler.py +288 -0
  122. xgen_doc2chunk/core/processor/ppt_helper/__init__.py +96 -0
  123. xgen_doc2chunk/core/processor/ppt_helper/ppt_bullet.py +332 -0
  124. xgen_doc2chunk/core/processor/ppt_helper/ppt_chart_extractor.py +182 -0
  125. xgen_doc2chunk/core/processor/ppt_helper/ppt_constants.py +119 -0
  126. xgen_doc2chunk/core/processor/ppt_helper/ppt_file_converter.py +55 -0
  127. xgen_doc2chunk/core/processor/ppt_helper/ppt_image_processor.py +196 -0
  128. xgen_doc2chunk/core/processor/ppt_helper/ppt_metadata.py +71 -0
  129. xgen_doc2chunk/core/processor/ppt_helper/ppt_preprocessor.py +77 -0
  130. xgen_doc2chunk/core/processor/ppt_helper/ppt_shape.py +189 -0
  131. xgen_doc2chunk/core/processor/ppt_helper/ppt_slide.py +69 -0
  132. xgen_doc2chunk/core/processor/ppt_helper/ppt_table.py +386 -0
  133. xgen_doc2chunk/core/processor/rtf_handler.py +290 -0
  134. xgen_doc2chunk/core/processor/rtf_helper/__init__.py +128 -0
  135. xgen_doc2chunk/core/processor/rtf_helper/rtf_constants.py +94 -0
  136. xgen_doc2chunk/core/processor/rtf_helper/rtf_content_extractor.py +211 -0
  137. xgen_doc2chunk/core/processor/rtf_helper/rtf_decoder.py +141 -0
  138. xgen_doc2chunk/core/processor/rtf_helper/rtf_file_converter.py +87 -0
  139. xgen_doc2chunk/core/processor/rtf_helper/rtf_metadata_extractor.py +179 -0
  140. xgen_doc2chunk/core/processor/rtf_helper/rtf_preprocessor.py +426 -0
  141. xgen_doc2chunk/core/processor/rtf_helper/rtf_region_finder.py +91 -0
  142. xgen_doc2chunk/core/processor/rtf_helper/rtf_table_extractor.py +482 -0
  143. xgen_doc2chunk/core/processor/rtf_helper/rtf_text_cleaner.py +389 -0
  144. xgen_doc2chunk/core/processor/text_handler.py +95 -0
  145. xgen_doc2chunk/core/processor/text_helper/__init__.py +17 -0
  146. xgen_doc2chunk/core/processor/text_helper/text_file_converter.py +28 -0
  147. xgen_doc2chunk/core/processor/text_helper/text_image_processor.py +75 -0
  148. xgen_doc2chunk/core/processor/text_helper/text_preprocessor.py +82 -0
  149. xgen_doc2chunk/ocr/__init__.py +67 -0
  150. xgen_doc2chunk/ocr/base.py +209 -0
  151. xgen_doc2chunk/ocr/ocr_engine/__init__.py +22 -0
  152. xgen_doc2chunk/ocr/ocr_engine/anthropic_ocr.py +91 -0
  153. xgen_doc2chunk/ocr/ocr_engine/bedrock_ocr.py +172 -0
  154. xgen_doc2chunk/ocr/ocr_engine/gemini_ocr.py +91 -0
  155. xgen_doc2chunk/ocr/ocr_engine/openai_ocr.py +100 -0
  156. xgen_doc2chunk/ocr/ocr_engine/vllm_ocr.py +116 -0
  157. xgen_doc2chunk/ocr/ocr_processor.py +387 -0
  158. {xgen_doc2chunk-0.1.0.dist-info → xgen_doc2chunk-0.1.1.dist-info}/METADATA +1 -1
  159. xgen_doc2chunk-0.1.1.dist-info/RECORD +161 -0
  160. xgen_doc2chunk-0.1.0.dist-info/RECORD +0 -4
  161. {xgen_doc2chunk-0.1.0.dist-info → xgen_doc2chunk-0.1.1.dist-info}/WHEEL +0 -0
  162. {xgen_doc2chunk-0.1.0.dist-info → xgen_doc2chunk-0.1.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,161 @@
1
+ xgen_doc2chunk/__init__.py,sha256=YpWNnu7L3bWzl5vGrxSDgfr0IB3-0re3WknnWKIJGsg,1038
2
+ xgen_doc2chunk/chunking/__init__.py,sha256=p_ci5OS47tQ-0lcbbsPUvmC9P95eZG_jMlMM8MpkLZ4,4480
3
+ xgen_doc2chunk/chunking/chunking.py,sha256=kav4CDhvNLvZBBr5QGtBQ9qhBow14-NZX-zxJpmM_J0,30420
4
+ xgen_doc2chunk/chunking/constants.py,sha256=hLUcSYtNT4LE4n8ke9SEexXjEz2QYb_EiGqXTgyZl0M,5298
5
+ xgen_doc2chunk/chunking/page_chunker.py,sha256=4xYPRfP1NBuXL2HTNv8mjNkS13l4m1HpHvDi_Z9SDZg,8705
6
+ xgen_doc2chunk/chunking/protected_regions.py,sha256=mMQ0HxtQl-Ad7QiRtqhp-6WALSdESXsDuRTTyqfnMVg,33085
7
+ xgen_doc2chunk/chunking/sheet_processor.py,sha256=MuuVnlhvM9oY48lHD49F2Y3IzL7jPdiE8_6j71jT2nY,14674
8
+ xgen_doc2chunk/chunking/table_chunker.py,sha256=kOmHsPiEVbzVBel1m6vSUMD9hSxaTnUD8frqLlOHX_I,28072
9
+ xgen_doc2chunk/chunking/table_parser.py,sha256=rz3RodBg42YSjR_enZDeVs2WIzULtmJW-tKtekS31UA,5009
10
+ xgen_doc2chunk/chunking/text_chunker.py,sha256=pPVgUB1zs93wDl3ED1F_YsFOSzjC6-PBarS0Y6CEWTI,13620
11
+ xgen_doc2chunk/core/__init__.py,sha256=Jxy4eDe8DmCQ6irFZ86eDilg_0u-14xnvwLzkOA-368,1849
12
+ xgen_doc2chunk/core/document_processor.py,sha256=fwzldukixYwooA6-zDDsUvaJIMCoHyIXE7-AOrsnUg4,48083
13
+ xgen_doc2chunk/core/functions/__init__.py,sha256=aHu0Oj-pFqqTE7GbcJxB55orOrhxP3j2XWsAGYPFcJ4,2305
14
+ xgen_doc2chunk/core/functions/chart_extractor.py,sha256=_b4i4LQhxd_nCRGwApbnt1XieLM4a_ywNKUb23SmMHI,4356
15
+ xgen_doc2chunk/core/functions/chart_processor.py,sha256=qVy_VVeOffoAcaGwYxyHE_aqWBtouoslBDwFFPPWITQ,16097
16
+ xgen_doc2chunk/core/functions/file_converter.py,sha256=8HPdBx23cnzhn0aXWBE-AvUywhzmTRZf1nPwacmqWYo,6537
17
+ xgen_doc2chunk/core/functions/img_processor.py,sha256=oCUBp8qg9zZv_mEvro-T5j67ZEtUW5dSgFFma27WHSo,22036
18
+ xgen_doc2chunk/core/functions/metadata_extractor.py,sha256=Qu8yUu5MkbcSLh1FDbCrrCrjoGZJVzqSA8B_eQNFtSE,17901
19
+ xgen_doc2chunk/core/functions/page_tag_processor.py,sha256=JSnYJc6ixBP_IZZ87Fsx79n9IR1fMJt0nZ5qrzzg9UA,12719
20
+ xgen_doc2chunk/core/functions/preprocessor.py,sha256=sqdQ88iStzOzaHRPeGgisQr_pmdIQAaPi5gjLeLv-TY,5063
21
+ xgen_doc2chunk/core/functions/storage_backend.py,sha256=meNKScMI4uVvwM6l37DnZRAxmRUL9bkGZ7moZZt2viw,10784
22
+ xgen_doc2chunk/core/functions/table_extractor.py,sha256=E5KAb4pcQ5sqvD8Q-rgoMrQ8FYkj9POaZCa_nI-EGEQ,17756
23
+ xgen_doc2chunk/core/functions/table_processor.py,sha256=OWG-x0_fwu5yJMIXt0M8ExOwcjPXZSMSQIkdoJRts6E,11933
24
+ xgen_doc2chunk/core/functions/utils.py,sha256=FXIamLR6qxO4QTX12QooGzqD8yQAYMIPBIHmWUOmtJc,5206
25
+ xgen_doc2chunk/core/processor/__init__.py,sha256=RzlwESn8--LpB6N6WZ-i117TnIlh1EDZ_ImDaIjwYvA,3202
26
+ xgen_doc2chunk/core/processor/base_handler.py,sha256=z1L29Nx7JBHv2N1rlcowzGXBgwtTXL8zhI7xWN1_J6o,19546
27
+ xgen_doc2chunk/core/processor/csv_handler.py,sha256=SnAzRWycVivuRV4gjBLiI0HfLYxsGUpBK4Z4UyeyfWQ,4779
28
+ xgen_doc2chunk/core/processor/doc_handler.py,sha256=TCk1pNKEqqJHqV-dXusAJiD2NUAaxwOq_Nn9BN5fVRE,23146
29
+ xgen_doc2chunk/core/processor/docx_handler.py,sha256=jWeQe1C3tolMupweB41fgF42E8AsZq29BlSbz33HjVM,14676
30
+ xgen_doc2chunk/core/processor/excel_handler.py,sha256=VmJsTkFWn9bhj_tvHhBEkzQIgm2LGk1DdkiBJyiN05c,13502
31
+ xgen_doc2chunk/core/processor/html_reprocessor.py,sha256=yrufNBPKUCHu6tcWPS9sKHMCB6Vj_t1fJ3EgPHkTaBc,5076
32
+ xgen_doc2chunk/core/processor/hwp_handler.py,sha256=DJUbIj8JukoW4fRx5774QRmYgVELZFofh4ZfYJ4Zx4Y,15667
33
+ xgen_doc2chunk/core/processor/hwpx_handler.py,sha256=Kx-psS5sJraLDYkkMi7lesreohik_m7VNXpDQxWSC3g,7602
34
+ xgen_doc2chunk/core/processor/image_file_handler.py,sha256=6ND8_BeoYmJVUqiu0ZcnaYSUbElwEd6NBHacwM0LD2Y,8191
35
+ xgen_doc2chunk/core/processor/pdf_handler.py,sha256=g-blAJqL_81ZaN56CqNNUvVORDmZaumH1eWY5KxaknM,23242
36
+ xgen_doc2chunk/core/processor/ppt_handler.py,sha256=_Gll2c7JTYSWVuZddLSbnNfj09Y0Wr7XmU2CrSAldPo,12183
37
+ xgen_doc2chunk/core/processor/rtf_handler.py,sha256=H4NnNhNCKh_oc3nAM4irY2F1yc2NaO_9BGyHw4szrss,9607
38
+ xgen_doc2chunk/core/processor/text_handler.py,sha256=BWW0uTMt48O8bC-JqdXAZCEWcpRa3x1uHB2NgVipk7Q,3653
39
+ xgen_doc2chunk/core/processor/csv_helper/__init__.py,sha256=Zdqz3LfhroAGkomEzT4Z3c4O4TBOYu8ME3T7tWCR9TY,2004
40
+ xgen_doc2chunk/core/processor/csv_helper/csv_constants.py,sha256=JvcItnjSAxeTfqKNas6Rlin0tdmlFcapTI2CvGlh7IU,1341
41
+ xgen_doc2chunk/core/processor/csv_helper/csv_encoding.py,sha256=p5n3wVBdA21RlY0g_4MJbj9xvm5NRwAl27Y4-40TTtY,3110
42
+ xgen_doc2chunk/core/processor/csv_helper/csv_file_converter.py,sha256=XmQGZ42fqiNr8wOlHV-mfxklAh82-peO6hDetdES9Qc,2467
43
+ xgen_doc2chunk/core/processor/csv_helper/csv_image_processor.py,sha256=hOJ6Y5AhjuEa8YRBqpvt2UTpjs-RaQcU60o-nPa_358,2376
44
+ xgen_doc2chunk/core/processor/csv_helper/csv_metadata.py,sha256=lS6NEtOuKAvJDsuOcHXw0CiEjAJLOA9fK_kE2LRhaVY,5223
45
+ xgen_doc2chunk/core/processor/csv_helper/csv_parser.py,sha256=w2dlQhfkqqU52kP7WhH0qZnMojq1JDliX1jrpdUkzXI,5523
46
+ xgen_doc2chunk/core/processor/csv_helper/csv_preprocessor.py,sha256=mXYb68i9QtGSCdWKn7V7oevwb4LbHW7dj9zUXjMTV0o,2673
47
+ xgen_doc2chunk/core/processor/csv_helper/csv_table.py,sha256=mp7dVf0MOCJvGNtNHWmXrdYYp2Tqbt66PrQnWRdLLbs,8596
48
+ xgen_doc2chunk/core/processor/doc_helpers/__init__.py,sha256=W5d3zbwBscS4jIwcU843sPrLDEglC_VtCAVTspwGub0,736
49
+ xgen_doc2chunk/core/processor/doc_helpers/doc_file_converter.py,sha256=D7Kl5WVYjvd8jvLq4yo0b6NdvSKVErvpc2SF_sYbxug,5365
50
+ xgen_doc2chunk/core/processor/doc_helpers/doc_image_processor.py,sha256=r-esUKeyJHY2y4KviFjxq6jQapecoQjiWV0pbrQRpRE,5576
51
+ xgen_doc2chunk/core/processor/doc_helpers/doc_preprocessor.py,sha256=3iERWwNcxXjF0Vz0R5xo0Oec_ES9iLtQXBlY4G6Y56g,2636
52
+ xgen_doc2chunk/core/processor/docx_helper/__init__.py,sha256=D-JeAVe1PcJOKlq37RX36fTUe1NvMnpJK5jZMD-qmgM,2394
53
+ xgen_doc2chunk/core/processor/docx_helper/docx_chart_extractor.py,sha256=-7ONbBeOKeyyAEDfVdkJh8DGdR3FYJESlD1doHiw0A8,16224
54
+ xgen_doc2chunk/core/processor/docx_helper/docx_constants.py,sha256=2aERy2K0EpHEbrTWz1pzcBrdk2vJcCyBDXamyhHVlo8,2330
55
+ xgen_doc2chunk/core/processor/docx_helper/docx_file_converter.py,sha256=w6QtsXlT59PSGJm7bYaqspCUlLGcnwYUTQxTJ3CkswM,2076
56
+ xgen_doc2chunk/core/processor/docx_helper/docx_image.py,sha256=7L3_BGlI9KQ6A4ZPqawHRFMvzamxovGourMALEACz7o,4688
57
+ xgen_doc2chunk/core/processor/docx_helper/docx_image_processor.py,sha256=CFFd0ITAWeSP-IhSOvp1BzVQLmYDkmKN8BTQMWGl66c,13324
58
+ xgen_doc2chunk/core/processor/docx_helper/docx_metadata.py,sha256=-A1mJqyTPe5FGpbTq5m9tJRU675GJ28qxSQlRvihoZk,2196
59
+ xgen_doc2chunk/core/processor/docx_helper/docx_paragraph.py,sha256=2rzi27HKgzcA9Uqn_SIyfctZYfdmc4IObtgPxPRZcyU,4262
60
+ xgen_doc2chunk/core/processor/docx_helper/docx_preprocessor.py,sha256=DziwtwHne-XWdr8TA7eJxLZtUgQpGg4hftQgfv318bs,2584
61
+ xgen_doc2chunk/core/processor/docx_helper/docx_table_extractor.py,sha256=vMUI4bYVk-nzV3QixhGWziAadDXYD-DxUxkI2PEdefQ,20589
62
+ xgen_doc2chunk/core/processor/docx_helper/docx_table_processor.py,sha256=Ap6KSLvbDnynD5WewDkCpOdUc6bZiTJhnxkDnjK2y_s,6692
63
+ xgen_doc2chunk/core/processor/excel_helper/__init__.py,sha256=szH7y_DpRHGiIQdniGdfLNYxR01jRpXK-nVjPqwTuNM,2694
64
+ xgen_doc2chunk/core/processor/excel_helper/excel_chart_extractor.py,sha256=FvFKTbYwtI0bHkTgL7UXYdbYIg7IDtzg06EpAz5lr9M,17575
65
+ xgen_doc2chunk/core/processor/excel_helper/excel_file_converter.py,sha256=zVa6sfxLU6GvT81n9UFg1tK1dZUNyyuzTbAasY8CpgI,4773
66
+ xgen_doc2chunk/core/processor/excel_helper/excel_image_processor.py,sha256=r4HBU6UZ_FmU4iIdzaqvhkjagLHy1D2z7kF87m2Mdyc,10118
67
+ xgen_doc2chunk/core/processor/excel_helper/excel_layout_detector.py,sha256=WfArTRizXTOGBMkwu9ivWDI7bYO443mAJeyG8q9Ds7s,26206
68
+ xgen_doc2chunk/core/processor/excel_helper/excel_metadata.py,sha256=4pQDwqDI4eBDSUAFCsnfxmI0ThzG3OzjGChAa7Ke7C8,4448
69
+ xgen_doc2chunk/core/processor/excel_helper/excel_preprocessor.py,sha256=Lp10oXWDg8RiaAOhnJzUtpFYb3LNUubBX5U9orGMk9Q,2722
70
+ xgen_doc2chunk/core/processor/excel_helper/excel_table_xls.py,sha256=y1CqM02Z5LIlpUnQ9Vza4om2vIhEqpgG7A5S1QxDi-s,12320
71
+ xgen_doc2chunk/core/processor/excel_helper/excel_table_xlsx.py,sha256=bkVzHFiXNIFdeUEpI_myAJhgBWH301h8PcJ5i5UuZn0,14444
72
+ xgen_doc2chunk/core/processor/excel_helper/excel_textbox.py,sha256=fh21emzbdJTuCIZ2TQNNzXOouZ-c_mtm-OnwcQkq8_I,9703
73
+ xgen_doc2chunk/core/processor/html_helper/__init__.py,sha256=Ips5uxBa1uT4oVBO3mGsH6rkPGUxkbOSd3haS7kH0ps,234
74
+ xgen_doc2chunk/core/processor/html_helper/html_file_converter.py,sha256=jMIHwWhOMFKD-KlfK35uZJYJFNPQL39f18al3xqwiBs,2662
75
+ xgen_doc2chunk/core/processor/html_helper/html_preprocessor.py,sha256=h3Sty1XAeW2vV1EI037mMqAAN7DHbFZqT_IUKt4scg0,2146
76
+ xgen_doc2chunk/core/processor/hwp_helper/__init__.py,sha256=2pCdhldUcVSCXctTxmn9zJwUFQStUUHTRF204O7aLPc,3023
77
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_chart_extractor.py,sha256=x25lkD2EgShYTQ_7CwRUpuSt1rRDT8ohzbwmdX0Oa8A,12848
78
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_constants.py,sha256=tqxYKzBjX0ZFTeyTTh0fheIrtNh9g1dExptfQ_R2IxE,2909
79
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_decoder.py,sha256=GM6PxtdSbvBQV1JFmkuGlNd3Yn0cQg-YU5h9KQaAOK0,2795
80
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_docinfo.py,sha256=RgK_kb0MzEnlPjiiQZdQLg69JZeJhPPahqYzs2E6CvI,6958
81
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_file_converter.py,sha256=FVwuO-6Bng7q0jJcX_pDiEOP4ZUPyngG1DKRD4UTTNk,1716
82
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_image_processor.py,sha256=I4D6AKp0VoTsAItkhfT1zJgWix753Xc0GM9yVRGcsqo,13546
83
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_metadata.py,sha256=JVCsUEanpMYx7FJh3ymhr20w_hpJIU-JuMryZJUUb_s,8838
84
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_preprocessor.py,sha256=p7Tvv2ABvtmLTe1sr4I4RU_DijV90LVRvWg2-u85Kz4,2601
85
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_record.py,sha256=UZg3xyOyAbO9g6u_uSVRv2H9z3EMnX1JBPc7oA0WUJE,4858
86
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_recovery.py,sha256=f4MJHYKJ4ae78Wjrp1MoRSkUSYXT_omu1A5b1YIbu9w,5974
87
+ xgen_doc2chunk/core/processor/hwp_helper/hwp_table.py,sha256=Qt4v_1MtLGNfT9JAdvc7pnxr-qUic4q43OiHFEUDpww,6514
88
+ xgen_doc2chunk/core/processor/hwpx_helper/__init__.py,sha256=LsvxE0XXvQmHdC0SWT4SeLBDDj_8E5M8FNEjd4edV2Q,2368
89
+ xgen_doc2chunk/core/processor/hwpx_helper/hwpx_chart_extractor.py,sha256=x8RHspKVNasoB2-DQC7ThXZtoEuLufhtQMGLCxhMmmE,16510
90
+ xgen_doc2chunk/core/processor/hwpx_helper/hwpx_constants.py,sha256=Pb1HY2gwjh5kTWTOhX9ZeLJzDkl4BBktWHrwFV17bcs,956
91
+ xgen_doc2chunk/core/processor/hwpx_helper/hwpx_file_converter.py,sha256=y5lkgZou5IyVPUFNQ2Frug6ZJQ6r-yERHV1vE-Rtios,2033
92
+ xgen_doc2chunk/core/processor/hwpx_helper/hwpx_image_processor.py,sha256=tPOhy7jXanVgcv11KTq0q3pZMPokmD05Fa0n1iCikGw,7654
93
+ xgen_doc2chunk/core/processor/hwpx_helper/hwpx_metadata.py,sha256=ieFNgTv-ZN8D5GOQu4HREyGMD24WBZ5j8jsdI-AR33I,6200
94
+ xgen_doc2chunk/core/processor/hwpx_helper/hwpx_preprocessor.py,sha256=3h8dLsIQ7n8RtxYFfPAx7d19JFS4jN-MUrM6rXAM9d0,2487
95
+ xgen_doc2chunk/core/processor/hwpx_helper/hwpx_section.py,sha256=QE-z60JDvnFnGuj-gXiQK0-fqA3JJSy5D1pHIMSZsLI,8378
96
+ xgen_doc2chunk/core/processor/hwpx_helper/hwpx_table_extractor.py,sha256=4oDdDnyjbBZP9StPKLxwYb1Lxx08H-7m8YVSlbypxq8,17844
97
+ xgen_doc2chunk/core/processor/hwpx_helper/hwpx_table_processor.py,sha256=bhDfxhMBEw7T0ZsKNfOEeGrzqhXaGa2hHvJ_V9yvY1s,6692
98
+ xgen_doc2chunk/core/processor/image_file_helper/__init__.py,sha256=Y9Q4_GPNQt_nfypxuv5C_JnxNt8rY_bq81QSU-REhVc,422
99
+ xgen_doc2chunk/core/processor/image_file_helper/image_file_converter.py,sha256=D-KDRFpyIGLQa1ia57zgse35wRXD_Kca39gegoSVCPw,1999
100
+ xgen_doc2chunk/core/processor/image_file_helper/image_file_image_processor.py,sha256=XSXsWxXlszffcO7Z9CF7clZiLgOGOtfFfBsAsJ6fNps,4011
101
+ xgen_doc2chunk/core/processor/image_file_helper/image_file_preprocessor.py,sha256=RQumey6KpzoAwzK9as5tlMpNEfKH87Vvf8OhGRClK1k,2715
102
+ xgen_doc2chunk/core/processor/pdf_helpers/__init__.py,sha256=wdktoKiz_Bq5dqe6XjqwkSYaMMSNcciixP91xvTlxp8,5586
103
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_block_image_engine.py,sha256=Ew0YSd7y601G87pVmpA4vZXOKCwm4rS0mz_zL95qxtI,22460
104
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_cell_analysis.py,sha256=42WSUYlz5fkkaxnD2G7cLdC-aPxqo6eWts_TTSeMz_w,17232
105
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_complexity_analyzer.py,sha256=JGmdpyRX_UMaRC6v46_wGLfa5-jFPSOpbIpt32vXYRU,21303
106
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_element_merger.py,sha256=ozCLdxC8NwmsBWNc1qflEHLry35AOtDfRtYO8tB9ZEQ,1014
107
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_file_converter.py,sha256=u1It40gaSY14nQMLzzu9FjKOyZVv-ILLyfE7In6e9lk,1937
108
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_graphic_detector.py,sha256=RtUScXEbkDZRakjsQ0uGhWiECtlmvAVzYUe87WEzILQ,12329
109
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_image_processor.py,sha256=AQiUX0gI1cN6NAoALeU-Z_Fop2gArcxke1WQz65S1VY,10016
110
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_layout_block_detector.py,sha256=ZJWvrCgmXDQFJwr7p15m7reaGOFdd3htakE4rhjD-4k,47636
111
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_line_analysis.py,sha256=j8y8Tbycj7l8rD3JvhFViC_yfzm6p5ssBAkYyrcZQms,14805
112
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_metadata.py,sha256=7ZTeHXAfUqa_W9H7SQVvAGc76g1t390rg8hTSF3KEHw,2935
113
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_page_analyzer.py,sha256=4kpY8WY9hH-cfjd-Ai6vA4V7I8KwE5hSq8Yt4QXliqM,3009
114
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_preprocessor.py,sha256=qPgtMTMbaTm7_QyU7kKwVDtGAldf_yV4rTyoGVVgkTU,3406
115
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_detection.py,sha256=bwD6MVUuZJVYe3bWDsD6BpK1UZKKPsVyKOG6oHeoumw,47042
116
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_processor.py,sha256=KQ0eGnf-uZbooIK_BTr-Q_O6pTQaUhh1OAtHvErT72s,28164
117
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_quality_analyzer.py,sha256=v6VH-E6clI71-G2zJcT5754VFcPYqb1Qz4l3UcPeDeM,27863
118
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_validator.py,sha256=HXHl0tukTUHFSIWxQUcrYs8lYJ8gZnYV12HtSezWIho,16069
119
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_text_extractor.py,sha256=wAnOCAQ3cTsVgMg0uVavodZHV2DAvrVkugqA0c4MhTY,4754
120
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_text_quality_analyzer.py,sha256=8rCAnLvNRSVvIAbEiggXawrMOo-zWpMxwDc5Rrk19Co,22520
121
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_utils.py,sha256=W72HOARz7LjSzwzFTLo4-XTDQWvwBTGlqdovFyPBU7M,4724
122
+ xgen_doc2chunk/core/processor/pdf_helpers/pdf_vector_text_ocr.py,sha256=KWkaj7LT5ih5Nkb2EDggA02JuHIsIy3Sbm7pVIhxWuE,11736
123
+ xgen_doc2chunk/core/processor/pdf_helpers/types.py,sha256=IXV493hkpPa67DPZfH319m2rh6sIgL0R4nOd6pcd-to,9030
124
+ xgen_doc2chunk/core/processor/ppt_helper/__init__.py,sha256=ZXbfcH0U87sNm5JsrZP5jBwtRrX2gnORgjvT4RBhi6I,2344
125
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_bullet.py,sha256=LKdtQwo-MGbEyOIt89_bAnT6699_ZrrtT-6DyPwBfvk,10250
126
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_chart_extractor.py,sha256=caltXJ3S0OH0Wc4o9vUe4GcUH7kVHDa1FHxn7r-84is,6032
127
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_constants.py,sha256=uOqYdYBRHjgW4qj7mLPEttLxINkE67c4tU85q31f_A0,3539
128
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_file_converter.py,sha256=bADoYpgkhV9tHSbJAZoqbPNrRpjA-wcrGZfIvZjLNV4,1559
129
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_image_processor.py,sha256=r_Kk78qK5aHlvUC0v8BCcMMSKyDwjSO2BW96KbpBUUk,5808
130
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_metadata.py,sha256=Eoh5v6Px0_RhRW9ZAYi_llbdjso5e2EbBAgueikvmzc,2142
131
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_preprocessor.py,sha256=sBSNAhOnGm11YY5VfD-spN-4STJGPvoLngxS2FxweW8,2416
132
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_shape.py,sha256=0JrFJbmVbFO6eAmTlKkan6OK71LcA0phCOJRm9tn99Q,6000
133
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_slide.py,sha256=Oe5UvQpWyoPmt8-iu4-cpaaM4sH2r7gKLxbMdNp5f3M,2071
134
+ xgen_doc2chunk/core/processor/ppt_helper/ppt_table.py,sha256=OrK-rH1CPW8h_r8JYwkB6LSba3vPBgbD0fO9M0yKoOA,12188
135
+ xgen_doc2chunk/core/processor/rtf_helper/__init__.py,sha256=MFCKnARRwzVY5HYDNWrAxOIT-VhpJXnkYtL2Zl2YtKc,3169
136
+ xgen_doc2chunk/core/processor/rtf_helper/rtf_constants.py,sha256=FY6qeXiSjeTBX6HoJDS149g4ddXe_ShUod3mLphCsjA,3007
137
+ xgen_doc2chunk/core/processor/rtf_helper/rtf_content_extractor.py,sha256=Zs9SXrleJdlYHTew0XWjnlDZFNDuJpekHs_i47w3MzM,6443
138
+ xgen_doc2chunk/core/processor/rtf_helper/rtf_decoder.py,sha256=fMONsYw4Hqq3Wdp2foAliD8yhtOAa_NW5SLQjM1S8_M,3498
139
+ xgen_doc2chunk/core/processor/rtf_helper/rtf_file_converter.py,sha256=wRTmha1MHoAvbjWyVvyqzJEzsxcoESj_B8B3TEZCpKc,2306
140
+ xgen_doc2chunk/core/processor/rtf_helper/rtf_metadata_extractor.py,sha256=_ollUqUvnsdtnfd42XJ7alb2Jm99cvATFOfH5bGrXtU,5828
141
+ xgen_doc2chunk/core/processor/rtf_helper/rtf_preprocessor.py,sha256=mSnbpd2guQt9FSs45k2xU6bpvLAiGMagwJaAWEAMvDg,13868
142
+ xgen_doc2chunk/core/processor/rtf_helper/rtf_region_finder.py,sha256=R_TnneRM8FzctSpCKpkP1aHsr40WTIK7uuzKEjEExS4,2652
143
+ xgen_doc2chunk/core/processor/rtf_helper/rtf_table_extractor.py,sha256=-nkMHYJgiMQFbvpQEyrSWs9jNAFvLx_ugsqPO-Dw-ek,15465
144
+ xgen_doc2chunk/core/processor/rtf_helper/rtf_text_cleaner.py,sha256=wFgOnl2dWXzfL2hr1J4xyfiIp1c9B8kkGgr3vLggVHg,12146
145
+ xgen_doc2chunk/core/processor/text_helper/__init__.py,sha256=cN_XvFbC7eP5rz9Seg1NzBd40CY_pEJ3io8c6z6QzAQ,382
146
+ xgen_doc2chunk/core/processor/text_helper/text_file_converter.py,sha256=jEngq-gxgzGcyXeZQzU_9p2P2dNrMMY3ey5vy93SsSE,855
147
+ xgen_doc2chunk/core/processor/text_helper/text_image_processor.py,sha256=XPdC9PK8SWWS3Y0gor25ReBD3BaVqXTobPxISyioEvw,2396
148
+ xgen_doc2chunk/core/processor/text_helper/text_preprocessor.py,sha256=Pg5hOsnN3ONnbRsHFwVB59GMAo4LzwBImn05e9S579E,2435
149
+ xgen_doc2chunk/ocr/__init__.py,sha256=LOnnKVrde_Sf_wF0bzzgWH0jExpNXWpjzWrCqYFGxko,2141
150
+ xgen_doc2chunk/ocr/base.py,sha256=ODlwB9vM8WGuWCSNKBgC-Hj-P61_qDFWzFvSYeo6na0,7621
151
+ xgen_doc2chunk/ocr/ocr_processor.py,sha256=XNc664BiY-Hntg0pVP-flugER1MuSeFyKtBWcYVr_Q8,12750
152
+ xgen_doc2chunk/ocr/ocr_engine/__init__.py,sha256=wNBjV2meBZwt5JYM3r2mdrJTuXYq9bq6FUKV2Z98HZM,580
153
+ xgen_doc2chunk/ocr/ocr_engine/anthropic_ocr.py,sha256=IGZEk0ZUrBQXh8NP6zLSK7nmtKdKbi0-CQHwYFfDvno,2975
154
+ xgen_doc2chunk/ocr/ocr_engine/bedrock_ocr.py,sha256=4kIPb8u2_GSJ435GHJFXiIeQavMvBhuD2bP1ekt36qw,6310
155
+ xgen_doc2chunk/ocr/ocr_engine/gemini_ocr.py,sha256=A4V_AcC0tySYB4q-lNW7Tuhg7aTq0atj_RhMrCftKsM,2972
156
+ xgen_doc2chunk/ocr/ocr_engine/openai_ocr.py,sha256=ZN-3Dq1BehFmwFvxTaYmiEAdFUqujviONNDiR8c5X4A,3194
157
+ xgen_doc2chunk/ocr/ocr_engine/vllm_ocr.py,sha256=TeQOdPCPKQW8o4IyUb-4o6v6uTVzKupr4qh9NLjIj24,3672
158
+ xgen_doc2chunk-0.1.1.dist-info/METADATA,sha256=3_AmX8rdUCYS-X8x-h-GvoqA3m42729OGphprhcbmsw,7623
159
+ xgen_doc2chunk-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
160
+ xgen_doc2chunk-0.1.1.dist-info/licenses/LICENSE,sha256=pokMTCMoEcrcnjBAJ8cb7UVADBMGce6GLFbbRfqJVJc,11346
161
+ xgen_doc2chunk-0.1.1.dist-info/RECORD,,
@@ -1,4 +0,0 @@
1
- xgen_doc2chunk-0.1.0.dist-info/METADATA,sha256=AhjXWVu7rrLqyXgKmF674gMqlNvvG4CRJ54djJcFM4c,7623
2
- xgen_doc2chunk-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
3
- xgen_doc2chunk-0.1.0.dist-info/licenses/LICENSE,sha256=pokMTCMoEcrcnjBAJ8cb7UVADBMGce6GLFbbRfqJVJc,11346
4
- xgen_doc2chunk-0.1.0.dist-info/RECORD,,