xgen-doc2chunk 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xgen_doc2chunk/__init__.py +42 -0
- xgen_doc2chunk/chunking/__init__.py +168 -0
- xgen_doc2chunk/chunking/chunking.py +786 -0
- xgen_doc2chunk/chunking/constants.py +134 -0
- xgen_doc2chunk/chunking/page_chunker.py +248 -0
- xgen_doc2chunk/chunking/protected_regions.py +715 -0
- xgen_doc2chunk/chunking/sheet_processor.py +406 -0
- xgen_doc2chunk/chunking/table_chunker.py +832 -0
- xgen_doc2chunk/chunking/table_parser.py +172 -0
- xgen_doc2chunk/chunking/text_chunker.py +443 -0
- xgen_doc2chunk/core/__init__.py +64 -0
- xgen_doc2chunk/core/document_processor.py +1307 -0
- xgen_doc2chunk/core/functions/__init__.py +85 -0
- xgen_doc2chunk/core/functions/chart_extractor.py +144 -0
- xgen_doc2chunk/core/functions/chart_processor.py +534 -0
- xgen_doc2chunk/core/functions/file_converter.py +220 -0
- xgen_doc2chunk/core/functions/img_processor.py +649 -0
- xgen_doc2chunk/core/functions/metadata_extractor.py +542 -0
- xgen_doc2chunk/core/functions/page_tag_processor.py +393 -0
- xgen_doc2chunk/core/functions/preprocessor.py +162 -0
- xgen_doc2chunk/core/functions/storage_backend.py +381 -0
- xgen_doc2chunk/core/functions/table_extractor.py +468 -0
- xgen_doc2chunk/core/functions/table_processor.py +299 -0
- xgen_doc2chunk/core/functions/utils.py +159 -0
- xgen_doc2chunk/core/processor/__init__.py +96 -0
- xgen_doc2chunk/core/processor/base_handler.py +544 -0
- xgen_doc2chunk/core/processor/csv_handler.py +135 -0
- xgen_doc2chunk/core/processor/csv_helper/__init__.py +89 -0
- xgen_doc2chunk/core/processor/csv_helper/csv_constants.py +63 -0
- xgen_doc2chunk/core/processor/csv_helper/csv_encoding.py +104 -0
- xgen_doc2chunk/core/processor/csv_helper/csv_file_converter.py +78 -0
- xgen_doc2chunk/core/processor/csv_helper/csv_image_processor.py +75 -0
- xgen_doc2chunk/core/processor/csv_helper/csv_metadata.py +168 -0
- xgen_doc2chunk/core/processor/csv_helper/csv_parser.py +225 -0
- xgen_doc2chunk/core/processor/csv_helper/csv_preprocessor.py +86 -0
- xgen_doc2chunk/core/processor/csv_helper/csv_table.py +266 -0
- xgen_doc2chunk/core/processor/doc_handler.py +579 -0
- xgen_doc2chunk/core/processor/doc_helpers/__init__.py +25 -0
- xgen_doc2chunk/core/processor/doc_helpers/doc_file_converter.py +160 -0
- xgen_doc2chunk/core/processor/doc_helpers/doc_image_processor.py +179 -0
- xgen_doc2chunk/core/processor/doc_helpers/doc_preprocessor.py +83 -0
- xgen_doc2chunk/core/processor/docx_handler.py +376 -0
- xgen_doc2chunk/core/processor/docx_helper/__init__.py +84 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_chart_extractor.py +436 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_constants.py +75 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_file_converter.py +76 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_image.py +145 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_image_processor.py +410 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_metadata.py +71 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_paragraph.py +126 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_preprocessor.py +82 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_table_extractor.py +527 -0
- xgen_doc2chunk/core/processor/docx_helper/docx_table_processor.py +220 -0
- xgen_doc2chunk/core/processor/excel_handler.py +353 -0
- xgen_doc2chunk/core/processor/excel_helper/__init__.py +97 -0
- xgen_doc2chunk/core/processor/excel_helper/excel_chart_extractor.py +498 -0
- xgen_doc2chunk/core/processor/excel_helper/excel_file_converter.py +157 -0
- xgen_doc2chunk/core/processor/excel_helper/excel_image_processor.py +316 -0
- xgen_doc2chunk/core/processor/excel_helper/excel_layout_detector.py +739 -0
- xgen_doc2chunk/core/processor/excel_helper/excel_metadata.py +145 -0
- xgen_doc2chunk/core/processor/excel_helper/excel_preprocessor.py +83 -0
- xgen_doc2chunk/core/processor/excel_helper/excel_table_xls.py +357 -0
- xgen_doc2chunk/core/processor/excel_helper/excel_table_xlsx.py +361 -0
- xgen_doc2chunk/core/processor/excel_helper/excel_textbox.py +266 -0
- xgen_doc2chunk/core/processor/html_helper/__init__.py +7 -0
- xgen_doc2chunk/core/processor/html_helper/html_file_converter.py +92 -0
- xgen_doc2chunk/core/processor/html_helper/html_preprocessor.py +74 -0
- xgen_doc2chunk/core/processor/html_reprocessor.py +140 -0
- xgen_doc2chunk/core/processor/hwp_handler.py +401 -0
- xgen_doc2chunk/core/processor/hwp_helper/__init__.py +120 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_chart_extractor.py +373 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_constants.py +78 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_decoder.py +106 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_docinfo.py +174 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_file_converter.py +60 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_image_processor.py +413 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_metadata.py +236 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_preprocessor.py +82 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_record.py +149 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_recovery.py +217 -0
- xgen_doc2chunk/core/processor/hwp_helper/hwp_table.py +205 -0
- xgen_doc2chunk/core/processor/hwpx_handler.py +191 -0
- xgen_doc2chunk/core/processor/hwpx_helper/__init__.py +85 -0
- xgen_doc2chunk/core/processor/hwpx_helper/hwpx_chart_extractor.py +464 -0
- xgen_doc2chunk/core/processor/hwpx_helper/hwpx_constants.py +30 -0
- xgen_doc2chunk/core/processor/hwpx_helper/hwpx_file_converter.py +70 -0
- xgen_doc2chunk/core/processor/hwpx_helper/hwpx_image_processor.py +258 -0
- xgen_doc2chunk/core/processor/hwpx_helper/hwpx_metadata.py +163 -0
- xgen_doc2chunk/core/processor/hwpx_helper/hwpx_preprocessor.py +80 -0
- xgen_doc2chunk/core/processor/hwpx_helper/hwpx_section.py +242 -0
- xgen_doc2chunk/core/processor/hwpx_helper/hwpx_table_extractor.py +462 -0
- xgen_doc2chunk/core/processor/hwpx_helper/hwpx_table_processor.py +220 -0
- xgen_doc2chunk/core/processor/image_file_handler.py +212 -0
- xgen_doc2chunk/core/processor/image_file_helper/__init__.py +17 -0
- xgen_doc2chunk/core/processor/image_file_helper/image_file_converter.py +69 -0
- xgen_doc2chunk/core/processor/image_file_helper/image_file_image_processor.py +123 -0
- xgen_doc2chunk/core/processor/image_file_helper/image_file_preprocessor.py +84 -0
- xgen_doc2chunk/core/processor/pdf_handler.py +597 -0
- xgen_doc2chunk/core/processor/pdf_helpers/__init__.py +229 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_block_image_engine.py +667 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_cell_analysis.py +493 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_complexity_analyzer.py +598 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_element_merger.py +46 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_file_converter.py +72 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_graphic_detector.py +332 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_image_processor.py +321 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_layout_block_detector.py +1244 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_line_analysis.py +420 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_metadata.py +101 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_page_analyzer.py +114 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_preprocessor.py +106 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_detection.py +1346 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_processor.py +897 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_quality_analyzer.py +750 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_table_validator.py +401 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_text_extractor.py +155 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_text_quality_analyzer.py +655 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_utils.py +183 -0
- xgen_doc2chunk/core/processor/pdf_helpers/pdf_vector_text_ocr.py +302 -0
- xgen_doc2chunk/core/processor/pdf_helpers/types.py +278 -0
- xgen_doc2chunk/core/processor/ppt_handler.py +288 -0
- xgen_doc2chunk/core/processor/ppt_helper/__init__.py +96 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_bullet.py +332 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_chart_extractor.py +182 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_constants.py +119 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_file_converter.py +55 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_image_processor.py +196 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_metadata.py +71 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_preprocessor.py +77 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_shape.py +189 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_slide.py +69 -0
- xgen_doc2chunk/core/processor/ppt_helper/ppt_table.py +386 -0
- xgen_doc2chunk/core/processor/rtf_handler.py +290 -0
- xgen_doc2chunk/core/processor/rtf_helper/__init__.py +128 -0
- xgen_doc2chunk/core/processor/rtf_helper/rtf_constants.py +94 -0
- xgen_doc2chunk/core/processor/rtf_helper/rtf_content_extractor.py +211 -0
- xgen_doc2chunk/core/processor/rtf_helper/rtf_decoder.py +141 -0
- xgen_doc2chunk/core/processor/rtf_helper/rtf_file_converter.py +87 -0
- xgen_doc2chunk/core/processor/rtf_helper/rtf_metadata_extractor.py +179 -0
- xgen_doc2chunk/core/processor/rtf_helper/rtf_preprocessor.py +426 -0
- xgen_doc2chunk/core/processor/rtf_helper/rtf_region_finder.py +91 -0
- xgen_doc2chunk/core/processor/rtf_helper/rtf_table_extractor.py +482 -0
- xgen_doc2chunk/core/processor/rtf_helper/rtf_text_cleaner.py +389 -0
- xgen_doc2chunk/core/processor/text_handler.py +95 -0
- xgen_doc2chunk/core/processor/text_helper/__init__.py +17 -0
- xgen_doc2chunk/core/processor/text_helper/text_file_converter.py +28 -0
- xgen_doc2chunk/core/processor/text_helper/text_image_processor.py +75 -0
- xgen_doc2chunk/core/processor/text_helper/text_preprocessor.py +82 -0
- xgen_doc2chunk/ocr/__init__.py +67 -0
- xgen_doc2chunk/ocr/base.py +209 -0
- xgen_doc2chunk/ocr/ocr_engine/__init__.py +22 -0
- xgen_doc2chunk/ocr/ocr_engine/anthropic_ocr.py +91 -0
- xgen_doc2chunk/ocr/ocr_engine/bedrock_ocr.py +172 -0
- xgen_doc2chunk/ocr/ocr_engine/gemini_ocr.py +91 -0
- xgen_doc2chunk/ocr/ocr_engine/openai_ocr.py +100 -0
- xgen_doc2chunk/ocr/ocr_engine/vllm_ocr.py +116 -0
- xgen_doc2chunk/ocr/ocr_processor.py +387 -0
- {xgen_doc2chunk-0.1.0.dist-info → xgen_doc2chunk-0.1.2.dist-info}/METADATA +1 -1
- xgen_doc2chunk-0.1.2.dist-info/RECORD +161 -0
- xgen_doc2chunk-0.1.0.dist-info/RECORD +0 -4
- {xgen_doc2chunk-0.1.0.dist-info → xgen_doc2chunk-0.1.2.dist-info}/WHEEL +0 -0
- {xgen_doc2chunk-0.1.0.dist-info → xgen_doc2chunk-0.1.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# xgen_doc2chunk/core/functions/__init__.py
|
|
2
|
+
"""
|
|
3
|
+
Functions - Common Utility Functions Module
|
|
4
|
+
|
|
5
|
+
Provides common utility functions used in document processing.
|
|
6
|
+
|
|
7
|
+
Module Components:
|
|
8
|
+
- utils: Text cleaning, code cleaning, JSON sanitization utilities
|
|
9
|
+
- img_processor: Image processing and storage (ImageProcessor class)
|
|
10
|
+
- storage_backend: Storage backend implementations (Local, MinIO, S3)
|
|
11
|
+
- metadata_extractor: Document metadata extraction interface
|
|
12
|
+
|
|
13
|
+
Usage Example:
|
|
14
|
+
from xgen_doc2chunk.core.functions import clean_text, clean_code_text
|
|
15
|
+
from xgen_doc2chunk.core.functions import ImageProcessor, save_image_to_file
|
|
16
|
+
from xgen_doc2chunk.core.functions.storage_backend import LocalStorageBackend
|
|
17
|
+
from xgen_doc2chunk.core.functions.utils import sanitize_text_for_json
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from xgen_doc2chunk.core.functions.utils import (
|
|
21
|
+
clean_text,
|
|
22
|
+
clean_code_text,
|
|
23
|
+
sanitize_text_for_json,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Storage backend module
|
|
27
|
+
from xgen_doc2chunk.core.functions.storage_backend import (
|
|
28
|
+
StorageType,
|
|
29
|
+
BaseStorageBackend,
|
|
30
|
+
LocalStorageBackend,
|
|
31
|
+
MinIOStorageBackend,
|
|
32
|
+
S3StorageBackend,
|
|
33
|
+
create_storage_backend,
|
|
34
|
+
get_default_backend,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Image processor module
|
|
38
|
+
from xgen_doc2chunk.core.functions.img_processor import (
|
|
39
|
+
ImageProcessor,
|
|
40
|
+
ImageProcessorConfig,
|
|
41
|
+
ImageFormat,
|
|
42
|
+
NamingStrategy,
|
|
43
|
+
save_image_to_file,
|
|
44
|
+
create_image_processor,
|
|
45
|
+
DEFAULT_IMAGE_CONFIG,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Metadata extraction module
|
|
49
|
+
from xgen_doc2chunk.core.functions.metadata_extractor import (
|
|
50
|
+
MetadataField,
|
|
51
|
+
DocumentMetadata,
|
|
52
|
+
MetadataFormatter,
|
|
53
|
+
BaseMetadataExtractor,
|
|
54
|
+
format_metadata,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
__all__ = [
|
|
58
|
+
# Text utilities
|
|
59
|
+
"clean_text",
|
|
60
|
+
"clean_code_text",
|
|
61
|
+
"sanitize_text_for_json",
|
|
62
|
+
# Storage backends
|
|
63
|
+
"StorageType",
|
|
64
|
+
"BaseStorageBackend",
|
|
65
|
+
"LocalStorageBackend",
|
|
66
|
+
"MinIOStorageBackend",
|
|
67
|
+
"S3StorageBackend",
|
|
68
|
+
"create_storage_backend",
|
|
69
|
+
"get_default_backend",
|
|
70
|
+
# Image processor (base class for all format-specific processors)
|
|
71
|
+
"ImageProcessor",
|
|
72
|
+
"ImageProcessorConfig",
|
|
73
|
+
"ImageFormat",
|
|
74
|
+
"NamingStrategy",
|
|
75
|
+
"save_image_to_file",
|
|
76
|
+
"create_image_processor",
|
|
77
|
+
"DEFAULT_IMAGE_CONFIG",
|
|
78
|
+
# Metadata extraction
|
|
79
|
+
"MetadataField",
|
|
80
|
+
"DocumentMetadata",
|
|
81
|
+
"MetadataFormatter",
|
|
82
|
+
"BaseMetadataExtractor",
|
|
83
|
+
"format_metadata",
|
|
84
|
+
]
|
|
85
|
+
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Chart Extractor Base Module
|
|
3
|
+
|
|
4
|
+
Abstract base class for chart extraction across different file formats.
|
|
5
|
+
Each file handler should have its own ChartExtractor implementation.
|
|
6
|
+
|
|
7
|
+
Output format:
|
|
8
|
+
{chart_prefix}
|
|
9
|
+
Title: {chart_title}
|
|
10
|
+
Chart Type: {chart_type}
|
|
11
|
+
<table>...</table>
|
|
12
|
+
{chart_suffix}
|
|
13
|
+
"""
|
|
14
|
+
from abc import ABC, abstractmethod
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from xgen_doc2chunk.core.functions.chart_processor import ChartProcessor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class ChartData:
|
|
24
|
+
"""
|
|
25
|
+
Standardized chart data structure.
|
|
26
|
+
|
|
27
|
+
All chart extractors should convert their format-specific chart data
|
|
28
|
+
into this common structure before formatting.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
chart_type: Type of chart (e.g., "Bar Chart", "Line Chart", "Pie Chart")
|
|
32
|
+
title: Chart title (optional)
|
|
33
|
+
categories: List of category labels (X-axis values)
|
|
34
|
+
series: List of series data, each containing 'name' and 'values'
|
|
35
|
+
"""
|
|
36
|
+
chart_type: str = "Chart"
|
|
37
|
+
title: Optional[str] = None
|
|
38
|
+
categories: Optional[List[str]] = None
|
|
39
|
+
series: Optional[List[Dict[str, Any]]] = None
|
|
40
|
+
|
|
41
|
+
def has_data(self) -> bool:
|
|
42
|
+
"""Check if chart has extractable data."""
|
|
43
|
+
if not self.series:
|
|
44
|
+
return False
|
|
45
|
+
return any(s.get('values') for s in self.series)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class BaseChartExtractor(ABC):
|
|
49
|
+
"""
|
|
50
|
+
Abstract base class for chart extraction.
|
|
51
|
+
|
|
52
|
+
Each file format handler should implement its own ChartExtractor
|
|
53
|
+
that inherits from this class.
|
|
54
|
+
|
|
55
|
+
Usage:
|
|
56
|
+
class ExcelChartExtractor(BaseChartExtractor):
|
|
57
|
+
def extract(self, chart_element) -> ChartData:
|
|
58
|
+
# Excel-specific extraction logic
|
|
59
|
+
...
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self, chart_processor: "ChartProcessor"):
|
|
63
|
+
"""
|
|
64
|
+
Initialize chart extractor.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
chart_processor: ChartProcessor instance for formatting output
|
|
68
|
+
"""
|
|
69
|
+
self._chart_processor = chart_processor
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def chart_processor(self) -> "ChartProcessor":
|
|
73
|
+
"""ChartProcessor instance."""
|
|
74
|
+
return self._chart_processor
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
def extract(self, chart_element: Any) -> ChartData:
|
|
78
|
+
"""
|
|
79
|
+
Extract chart data from format-specific chart element.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
chart_element: Format-specific chart object/element
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
ChartData with extracted information
|
|
86
|
+
"""
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
def process(self, chart_element: Any) -> str:
|
|
90
|
+
"""
|
|
91
|
+
Extract and format chart data.
|
|
92
|
+
|
|
93
|
+
This is the main entry point for chart processing.
|
|
94
|
+
Extracts data using format-specific logic, then formats using ChartProcessor.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
chart_element: Format-specific chart object/element
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Formatted chart string with tags
|
|
101
|
+
"""
|
|
102
|
+
try:
|
|
103
|
+
chart_data = self.extract(chart_element)
|
|
104
|
+
|
|
105
|
+
if chart_data.has_data():
|
|
106
|
+
return self._chart_processor.format_chart_data(
|
|
107
|
+
chart_type=chart_data.chart_type,
|
|
108
|
+
title=chart_data.title,
|
|
109
|
+
categories=chart_data.categories,
|
|
110
|
+
series=chart_data.series
|
|
111
|
+
)
|
|
112
|
+
else:
|
|
113
|
+
return self._chart_processor.format_chart_fallback(
|
|
114
|
+
chart_type=chart_data.chart_type,
|
|
115
|
+
title=chart_data.title
|
|
116
|
+
)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
return self._chart_processor.format_chart_fallback(
|
|
119
|
+
chart_type="Unknown",
|
|
120
|
+
message=f"Error extracting chart: {str(e)}"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class NullChartExtractor(BaseChartExtractor):
|
|
125
|
+
"""
|
|
126
|
+
Null implementation for handlers that don't support charts.
|
|
127
|
+
|
|
128
|
+
Use this for file formats like PDF, CSV, TXT that don't contain charts.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def extract(self, chart_element: Any) -> ChartData:
|
|
132
|
+
"""Return empty chart data."""
|
|
133
|
+
return ChartData(chart_type="Unsupported")
|
|
134
|
+
|
|
135
|
+
def process(self, chart_element: Any) -> str:
|
|
136
|
+
"""Return empty string for unsupported formats."""
|
|
137
|
+
return ""
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
__all__ = [
|
|
141
|
+
'ChartData',
|
|
142
|
+
'BaseChartExtractor',
|
|
143
|
+
'NullChartExtractor',
|
|
144
|
+
]
|