splurge-dsv 2025.1.1__tar.gz → 2025.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {splurge_dsv-2025.1.1/splurge_dsv.egg-info → splurge_dsv-2025.1.2}/PKG-INFO +27 -2
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/README.md +26 -1
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/pyproject.toml +27 -1
- splurge_dsv-2025.1.2/splurge_dsv/__init__.py +84 -0
- splurge_dsv-2025.1.2/splurge_dsv/__main__.py +15 -0
- splurge_dsv-2025.1.2/splurge_dsv/cli.py +158 -0
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv/dsv_helper.py +29 -46
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv/exceptions.py +22 -9
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv/path_validator.py +102 -79
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv/resource_manager.py +77 -138
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv/string_tokenizer.py +5 -24
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv/text_file_helper.py +42 -64
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2/splurge_dsv.egg-info}/PKG-INFO +27 -2
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv.egg-info/SOURCES.txt +2 -7
- splurge_dsv-2025.1.1/splurge_dsv/__init__.py +0 -0
- splurge_dsv-2025.1.1/splurge_dsv/__main__.py +0 -0
- splurge_dsv-2025.1.1/tests/test_dsv_helper.py +0 -525
- splurge_dsv-2025.1.1/tests/test_exceptions.py +0 -255
- splurge_dsv-2025.1.1/tests/test_path_validator.py +0 -411
- splurge_dsv-2025.1.1/tests/test_resource_manager.py +0 -805
- splurge_dsv-2025.1.1/tests/test_string_tokenizer.py +0 -359
- splurge_dsv-2025.1.1/tests/test_text_file_helper.py +0 -579
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/LICENSE +0 -0
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/setup.cfg +0 -0
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv.egg-info/dependency_links.txt +0 -0
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv.egg-info/requires.txt +0 -0
- {splurge_dsv-2025.1.1 → splurge_dsv-2025.1.2}/splurge_dsv.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: splurge-dsv
|
3
|
-
Version: 2025.1.
|
3
|
+
Version: 2025.1.2
|
4
4
|
Summary: A utility library for working with DSV (Delimited String Values) files
|
5
5
|
Author: Jim Schilling
|
6
6
|
License-Expression: MIT
|
@@ -243,7 +243,32 @@ The project follows strict coding standards:
|
|
243
243
|
|
244
244
|
## Changelog
|
245
245
|
|
246
|
-
### 2025.1.
|
246
|
+
### 2025.1.2 (2025-09-02)
|
247
|
+
|
248
|
+
#### 🧪 Comprehensive End-to-End Testing
|
249
|
+
- **Complete E2E Test Suite**: Implemented 25 comprehensive end-to-end workflow tests covering all major CLI functionality
|
250
|
+
- **Real CLI Execution**: Tests run actual `splurge-dsv` commands with real files, not just mocked components
|
251
|
+
- **Workflow Coverage**: Tests cover CSV/TSV parsing, file operations, data processing, error handling, and performance scenarios
|
252
|
+
- **Cross-Platform Compatibility**: Handles Windows-specific encoding issues and platform differences gracefully
|
253
|
+
- **Performance Testing**: Large file processing tests (1,000+ and 10,000+ rows) with streaming and chunking validation
|
254
|
+
|
255
|
+
#### 📊 Test Coverage Improvements
|
256
|
+
- **CLI Coverage**: Increased from 64% to **95%** with comprehensive CLI workflow testing
|
257
|
+
- **DSV Helper Coverage**: Improved from 75% to **93%** with real-world usage scenarios
|
258
|
+
- **Overall Coverage**: Improved from 60% to **73%** across the entire codebase
|
259
|
+
- **Integration Testing**: Added real file system operations and complete pipeline validation
|
260
|
+
|
261
|
+
#### 🔄 Test Categories
|
262
|
+
- **CLI Workflows**: 19 tests covering basic parsing, custom delimiters, header/footer skipping, streaming, and error scenarios
|
263
|
+
- **Error Handling**: 3 tests for invalid arguments, missing parameters, and CLI error conditions
|
264
|
+
- **Integration Scenarios**: 3 tests for data analysis, transformation, and multi-format workflows
|
265
|
+
|
266
|
+
#### 📚 Documentation & Examples
|
267
|
+
- **E2E Testing Guide**: Created comprehensive documentation (`docs/e2e_testing_coverage.md`) explaining test coverage and usage
|
268
|
+
- **Real-World Examples**: Tests serve as practical examples of library usage patterns
|
269
|
+
- **Error Scenario Coverage**: Comprehensive testing of edge cases and failure conditions
|
270
|
+
|
271
|
+
### 2025.1.1 (2025-08-XX)
|
247
272
|
|
248
273
|
#### 🔧 Code Quality Improvements
|
249
274
|
- **Refactored Complex Regex Logic**: Extracted Windows drive letter validation logic from `_check_dangerous_characters` into a dedicated `_is_valid_windows_drive_pattern` helper method in `PathValidator` for better readability and maintainability
|
@@ -214,7 +214,32 @@ The project follows strict coding standards:
|
|
214
214
|
|
215
215
|
## Changelog
|
216
216
|
|
217
|
-
### 2025.1.
|
217
|
+
### 2025.1.2 (2025-09-02)
|
218
|
+
|
219
|
+
#### 🧪 Comprehensive End-to-End Testing
|
220
|
+
- **Complete E2E Test Suite**: Implemented 25 comprehensive end-to-end workflow tests covering all major CLI functionality
|
221
|
+
- **Real CLI Execution**: Tests run actual `splurge-dsv` commands with real files, not just mocked components
|
222
|
+
- **Workflow Coverage**: Tests cover CSV/TSV parsing, file operations, data processing, error handling, and performance scenarios
|
223
|
+
- **Cross-Platform Compatibility**: Handles Windows-specific encoding issues and platform differences gracefully
|
224
|
+
- **Performance Testing**: Large file processing tests (1,000+ and 10,000+ rows) with streaming and chunking validation
|
225
|
+
|
226
|
+
#### 📊 Test Coverage Improvements
|
227
|
+
- **CLI Coverage**: Increased from 64% to **95%** with comprehensive CLI workflow testing
|
228
|
+
- **DSV Helper Coverage**: Improved from 75% to **93%** with real-world usage scenarios
|
229
|
+
- **Overall Coverage**: Improved from 60% to **73%** across the entire codebase
|
230
|
+
- **Integration Testing**: Added real file system operations and complete pipeline validation
|
231
|
+
|
232
|
+
#### 🔄 Test Categories
|
233
|
+
- **CLI Workflows**: 19 tests covering basic parsing, custom delimiters, header/footer skipping, streaming, and error scenarios
|
234
|
+
- **Error Handling**: 3 tests for invalid arguments, missing parameters, and CLI error conditions
|
235
|
+
- **Integration Scenarios**: 3 tests for data analysis, transformation, and multi-format workflows
|
236
|
+
|
237
|
+
#### 📚 Documentation & Examples
|
238
|
+
- **E2E Testing Guide**: Created comprehensive documentation (`docs/e2e_testing_coverage.md`) explaining test coverage and usage
|
239
|
+
- **Real-World Examples**: Tests serve as practical examples of library usage patterns
|
240
|
+
- **Error Scenario Coverage**: Comprehensive testing of edge cases and failure conditions
|
241
|
+
|
242
|
+
### 2025.1.1 (2025-08-XX)
|
218
243
|
|
219
244
|
#### 🔧 Code Quality Improvements
|
220
245
|
- **Refactored Complex Regex Logic**: Extracted Windows drive letter validation logic from `_check_dangerous_characters` into a dedicated `_is_valid_windows_drive_pattern` helper method in `PathValidator` for better readability and maintainability
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "splurge-dsv"
|
7
|
-
version = "2025.1.
|
7
|
+
version = "2025.1.2"
|
8
8
|
description = "A utility library for working with DSV (Delimited String Values) files"
|
9
9
|
readme = "README.md"
|
10
10
|
requires-python = ">=3.10"
|
@@ -82,3 +82,29 @@ exclude_lines = [
|
|
82
82
|
|
83
83
|
[tool.coverage.html]
|
84
84
|
directory = "htmlcov"
|
85
|
+
|
86
|
+
[tool.ruff]
|
87
|
+
target-version = "py310"
|
88
|
+
line-length = 120
|
89
|
+
|
90
|
+
[tool.ruff.lint]
|
91
|
+
select = [
|
92
|
+
"E", # pycodestyle errors
|
93
|
+
"W", # pycodestyle warnings
|
94
|
+
"F", # pyflakes
|
95
|
+
"I", # isort
|
96
|
+
"B", # flake8-bugbear
|
97
|
+
"C4", # flake8-comprehensions
|
98
|
+
"UP", # pyupgrade
|
99
|
+
]
|
100
|
+
ignore = [
|
101
|
+
"E501", # line too long, handled by line-length
|
102
|
+
"B008", # do not perform function calls in argument defaults
|
103
|
+
"C901", # too complex
|
104
|
+
]
|
105
|
+
|
106
|
+
[tool.ruff.format]
|
107
|
+
quote-style = "double"
|
108
|
+
indent-style = "space"
|
109
|
+
skip-magic-trailing-comma = false
|
110
|
+
line-ending = "auto"
|
@@ -0,0 +1,84 @@
|
|
1
|
+
"""
|
2
|
+
Splurge DSV - A utility library for working with DSV (Delimited String Values) files.
|
3
|
+
|
4
|
+
This package provides utilities for parsing, processing, and manipulating
|
5
|
+
delimited string value files with support for various delimiters, text bookends,
|
6
|
+
and streaming operations.
|
7
|
+
|
8
|
+
Copyright (c) 2025 Jim Schilling
|
9
|
+
|
10
|
+
This module is licensed under the MIT License.
|
11
|
+
"""
|
12
|
+
|
13
|
+
# Local imports
|
14
|
+
from splurge_dsv.dsv_helper import DsvHelper
|
15
|
+
from splurge_dsv.exceptions import (
|
16
|
+
SplurgeConfigurationError,
|
17
|
+
SplurgeDataProcessingError,
|
18
|
+
SplurgeDsvError,
|
19
|
+
SplurgeFileEncodingError,
|
20
|
+
SplurgeFileNotFoundError,
|
21
|
+
SplurgeFileOperationError,
|
22
|
+
SplurgeFilePermissionError,
|
23
|
+
SplurgeFormatError,
|
24
|
+
SplurgeParameterError,
|
25
|
+
SplurgeParsingError,
|
26
|
+
SplurgePathValidationError,
|
27
|
+
SplurgePerformanceWarning,
|
28
|
+
SplurgeRangeError,
|
29
|
+
SplurgeResourceAcquisitionError,
|
30
|
+
SplurgeResourceError,
|
31
|
+
SplurgeResourceReleaseError,
|
32
|
+
SplurgeStreamingError,
|
33
|
+
SplurgeTypeConversionError,
|
34
|
+
SplurgeValidationError,
|
35
|
+
)
|
36
|
+
from splurge_dsv.path_validator import PathValidator
|
37
|
+
from splurge_dsv.resource_manager import (
|
38
|
+
FileResourceManager,
|
39
|
+
ResourceManager,
|
40
|
+
StreamResourceManager,
|
41
|
+
safe_file_operation,
|
42
|
+
safe_stream_operation,
|
43
|
+
)
|
44
|
+
from splurge_dsv.string_tokenizer import StringTokenizer
|
45
|
+
from splurge_dsv.text_file_helper import TextFileHelper
|
46
|
+
|
47
|
+
__version__ = "2025.1.2"
|
48
|
+
__author__ = "Jim Schilling"
|
49
|
+
__license__ = "MIT"
|
50
|
+
|
51
|
+
__all__ = [
|
52
|
+
# Main helper class
|
53
|
+
"DsvHelper",
|
54
|
+
# Exceptions
|
55
|
+
"SplurgeDsvError",
|
56
|
+
"SplurgeValidationError",
|
57
|
+
"SplurgeFileOperationError",
|
58
|
+
"SplurgeFileNotFoundError",
|
59
|
+
"SplurgeFilePermissionError",
|
60
|
+
"SplurgeFileEncodingError",
|
61
|
+
"SplurgePathValidationError",
|
62
|
+
"SplurgeDataProcessingError",
|
63
|
+
"SplurgeParsingError",
|
64
|
+
"SplurgeTypeConversionError",
|
65
|
+
"SplurgeStreamingError",
|
66
|
+
"SplurgeConfigurationError",
|
67
|
+
"SplurgeResourceError",
|
68
|
+
"SplurgeResourceAcquisitionError",
|
69
|
+
"SplurgeResourceReleaseError",
|
70
|
+
"SplurgePerformanceWarning",
|
71
|
+
"SplurgeParameterError",
|
72
|
+
"SplurgeRangeError",
|
73
|
+
"SplurgeFormatError",
|
74
|
+
# Utility classes
|
75
|
+
"StringTokenizer",
|
76
|
+
"TextFileHelper",
|
77
|
+
"PathValidator",
|
78
|
+
"ResourceManager",
|
79
|
+
"FileResourceManager",
|
80
|
+
"StreamResourceManager",
|
81
|
+
# Context managers
|
82
|
+
"safe_file_operation",
|
83
|
+
"safe_stream_operation",
|
84
|
+
]
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"""
|
2
|
+
Command-line interface entry point for splurge-dsv.
|
3
|
+
|
4
|
+
This module serves as the entry point when running the package as a module.
|
5
|
+
It imports and calls the main CLI function from the cli module.
|
6
|
+
"""
|
7
|
+
|
8
|
+
# Standard library imports
|
9
|
+
import sys
|
10
|
+
|
11
|
+
# Local imports
|
12
|
+
from splurge_dsv.cli import main
|
13
|
+
|
14
|
+
if __name__ == "__main__":
|
15
|
+
sys.exit(main())
|
@@ -0,0 +1,158 @@
|
|
1
|
+
"""
|
2
|
+
Command-line interface for splurge-dsv.
|
3
|
+
|
4
|
+
This module provides a command-line interface for the splurge-dsv library,
|
5
|
+
allowing users to parse DSV files from the command line.
|
6
|
+
|
7
|
+
Usage:
|
8
|
+
python -m splurge_dsv <file_path> [options]
|
9
|
+
python -m splurge_dsv --help
|
10
|
+
"""
|
11
|
+
|
12
|
+
# Standard library imports
|
13
|
+
import argparse
|
14
|
+
import sys
|
15
|
+
from pathlib import Path
|
16
|
+
|
17
|
+
# Local imports
|
18
|
+
from splurge_dsv.dsv_helper import DsvHelper
|
19
|
+
from splurge_dsv.exceptions import SplurgeDsvError
|
20
|
+
|
21
|
+
|
22
|
+
def parse_arguments() -> argparse.Namespace:
|
23
|
+
"""Parse command line arguments."""
|
24
|
+
parser = argparse.ArgumentParser(
|
25
|
+
description="Parse DSV (Delimited String Values) files",
|
26
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
27
|
+
epilog="""
|
28
|
+
Examples:
|
29
|
+
python -m splurge_dsv data.csv --delimiter ,
|
30
|
+
python -m splurge_dsv data.tsv --delimiter "\\t"
|
31
|
+
python -m splurge_dsv data.txt --delimiter "|" --bookend '"'
|
32
|
+
""",
|
33
|
+
)
|
34
|
+
|
35
|
+
parser.add_argument("file_path", type=str, help="Path to the DSV file to parse")
|
36
|
+
|
37
|
+
parser.add_argument("--delimiter", "-d", type=str, required=True, help="Delimiter character to use for parsing")
|
38
|
+
|
39
|
+
parser.add_argument("--bookend", "-b", type=str, help="Bookend character for text fields (e.g., '\"')")
|
40
|
+
|
41
|
+
parser.add_argument("--no-strip", action="store_true", help="Don't strip whitespace from values")
|
42
|
+
|
43
|
+
parser.add_argument("--no-bookend-strip", action="store_true", help="Don't strip whitespace from bookends")
|
44
|
+
|
45
|
+
parser.add_argument("--encoding", "-e", type=str, default="utf-8", help="File encoding (default: utf-8)")
|
46
|
+
|
47
|
+
parser.add_argument("--skip-header", type=int, default=0, help="Number of header rows to skip (default: 0)")
|
48
|
+
|
49
|
+
parser.add_argument("--skip-footer", type=int, default=0, help="Number of footer rows to skip (default: 0)")
|
50
|
+
|
51
|
+
parser.add_argument(
|
52
|
+
"--stream", "-s", action="store_true", help="Stream the file in chunks instead of loading entirely into memory"
|
53
|
+
)
|
54
|
+
|
55
|
+
parser.add_argument("--chunk-size", type=int, default=500, help="Chunk size for streaming (default: 500)")
|
56
|
+
|
57
|
+
parser.add_argument("--version", action="version", version="%(prog)s 2025.1.2")
|
58
|
+
|
59
|
+
return parser.parse_args()
|
60
|
+
|
61
|
+
|
62
|
+
def print_results(rows: list[list[str]], delimiter: str) -> None:
|
63
|
+
"""Print parsed results in a formatted way."""
|
64
|
+
if not rows:
|
65
|
+
print("No data found.")
|
66
|
+
return
|
67
|
+
|
68
|
+
# Find the maximum width for each column
|
69
|
+
if rows:
|
70
|
+
max_widths = []
|
71
|
+
for col_idx in range(len(rows[0])):
|
72
|
+
max_width = max(len(str(row[col_idx])) for row in rows)
|
73
|
+
max_widths.append(max_width)
|
74
|
+
|
75
|
+
# Print header separator
|
76
|
+
print("-" * (sum(max_widths) + len(max_widths) * 3 - 1))
|
77
|
+
|
78
|
+
# Print each row
|
79
|
+
for row_idx, row in enumerate(rows):
|
80
|
+
formatted_row = []
|
81
|
+
for col_idx, value in enumerate(row):
|
82
|
+
formatted_value = str(value).ljust(max_widths[col_idx])
|
83
|
+
formatted_row.append(formatted_value)
|
84
|
+
print(f"| {' | '.join(formatted_row)} |")
|
85
|
+
|
86
|
+
# Print separator after header
|
87
|
+
if row_idx == 0:
|
88
|
+
print("-" * (sum(max_widths) + len(max_widths) * 3 - 1))
|
89
|
+
|
90
|
+
|
91
|
+
def main() -> int:
|
92
|
+
"""Main entry point for the command-line interface."""
|
93
|
+
try:
|
94
|
+
args = parse_arguments()
|
95
|
+
|
96
|
+
# Validate file path
|
97
|
+
file_path = Path(args.file_path)
|
98
|
+
if not file_path.exists():
|
99
|
+
print(f"Error: File '{args.file_path}' not found.", file=sys.stderr)
|
100
|
+
return 1
|
101
|
+
|
102
|
+
if not file_path.is_file():
|
103
|
+
print(f"Error: '{args.file_path}' is not a file.", file=sys.stderr)
|
104
|
+
return 1
|
105
|
+
|
106
|
+
# Parse the file
|
107
|
+
if args.stream:
|
108
|
+
print(f"Streaming file '{args.file_path}' with delimiter '{args.delimiter}'...")
|
109
|
+
chunk_count = 0
|
110
|
+
total_rows = 0
|
111
|
+
|
112
|
+
for chunk in DsvHelper.parse_stream(
|
113
|
+
file_path,
|
114
|
+
delimiter=args.delimiter,
|
115
|
+
strip=not args.no_strip,
|
116
|
+
bookend=args.bookend,
|
117
|
+
bookend_strip=not args.no_bookend_strip,
|
118
|
+
encoding=args.encoding,
|
119
|
+
skip_header_rows=args.skip_header,
|
120
|
+
skip_footer_rows=args.skip_footer,
|
121
|
+
chunk_size=args.chunk_size,
|
122
|
+
):
|
123
|
+
chunk_count += 1
|
124
|
+
total_rows += len(chunk)
|
125
|
+
print(f"Chunk {chunk_count}: {len(chunk)} rows")
|
126
|
+
print_results(chunk, args.delimiter)
|
127
|
+
print()
|
128
|
+
|
129
|
+
print(f"Total: {total_rows} rows in {chunk_count} chunks")
|
130
|
+
else:
|
131
|
+
print(f"Parsing file '{args.file_path}' with delimiter '{args.delimiter}'...")
|
132
|
+
rows = DsvHelper.parse_file(
|
133
|
+
file_path,
|
134
|
+
delimiter=args.delimiter,
|
135
|
+
strip=not args.no_strip,
|
136
|
+
bookend=args.bookend,
|
137
|
+
bookend_strip=not args.no_bookend_strip,
|
138
|
+
encoding=args.encoding,
|
139
|
+
skip_header_rows=args.skip_header,
|
140
|
+
skip_footer_rows=args.skip_footer,
|
141
|
+
)
|
142
|
+
|
143
|
+
print(f"Parsed {len(rows)} rows")
|
144
|
+
print_results(rows, args.delimiter)
|
145
|
+
|
146
|
+
return 0
|
147
|
+
|
148
|
+
except KeyboardInterrupt:
|
149
|
+
print("\nOperation cancelled by user.", file=sys.stderr)
|
150
|
+
return 130
|
151
|
+
except SplurgeDsvError as e:
|
152
|
+
print(f"Error: {e.message}", file=sys.stderr)
|
153
|
+
if e.details:
|
154
|
+
print(f"Details: {e.details}", file=sys.stderr)
|
155
|
+
return 1
|
156
|
+
except Exception as e:
|
157
|
+
print(f"Unexpected error: {e}", file=sys.stderr)
|
158
|
+
return 1
|
@@ -8,12 +8,15 @@ Please preserve this header and all related material when sharing!
|
|
8
8
|
This module is licensed under the MIT License.
|
9
9
|
"""
|
10
10
|
|
11
|
+
# Standard library imports
|
12
|
+
from collections.abc import Iterator
|
11
13
|
from os import PathLike
|
12
|
-
from typing import Iterator
|
13
14
|
|
15
|
+
# Local imports
|
16
|
+
from splurge_dsv.exceptions import SplurgeParameterError
|
14
17
|
from splurge_dsv.string_tokenizer import StringTokenizer
|
15
18
|
from splurge_dsv.text_file_helper import TextFileHelper
|
16
|
-
|
19
|
+
|
17
20
|
|
18
21
|
class DsvHelper:
|
19
22
|
"""
|
@@ -38,7 +41,7 @@ class DsvHelper:
|
|
38
41
|
delimiter: str,
|
39
42
|
strip: bool = DEFAULT_STRIP,
|
40
43
|
bookend: str | None = None,
|
41
|
-
bookend_strip: bool = DEFAULT_BOOKEND_STRIP
|
44
|
+
bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
|
42
45
|
) -> list[str]:
|
43
46
|
"""
|
44
47
|
Parse a string into a list of strings.
|
@@ -68,10 +71,7 @@ class DsvHelper:
|
|
68
71
|
tokens: list[str] = StringTokenizer.parse(content, delimiter=delimiter, strip=strip)
|
69
72
|
|
70
73
|
if bookend:
|
71
|
-
tokens = [
|
72
|
-
StringTokenizer.remove_bookends(token, bookend=bookend, strip=bookend_strip)
|
73
|
-
for token in tokens
|
74
|
-
]
|
74
|
+
tokens = [StringTokenizer.remove_bookends(token, bookend=bookend, strip=bookend_strip) for token in tokens]
|
75
75
|
|
76
76
|
return tokens
|
77
77
|
|
@@ -83,7 +83,7 @@ class DsvHelper:
|
|
83
83
|
delimiter: str,
|
84
84
|
strip: bool = DEFAULT_STRIP,
|
85
85
|
bookend: str | None = None,
|
86
|
-
bookend_strip: bool = DEFAULT_BOOKEND_STRIP
|
86
|
+
bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
|
87
87
|
) -> list[list[str]]:
|
88
88
|
"""
|
89
89
|
Parse a list of strings into a list of lists of strings.
|
@@ -108,7 +108,7 @@ class DsvHelper:
|
|
108
108
|
"""
|
109
109
|
if not isinstance(content, list):
|
110
110
|
raise SplurgeParameterError("content must be a list")
|
111
|
-
|
111
|
+
|
112
112
|
if not all(isinstance(item, str) for item in content):
|
113
113
|
raise SplurgeParameterError("content must be a list of strings")
|
114
114
|
|
@@ -128,7 +128,7 @@ class DsvHelper:
|
|
128
128
|
bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
|
129
129
|
encoding: str = DEFAULT_ENCODING,
|
130
130
|
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
131
|
-
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS
|
131
|
+
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
|
132
132
|
) -> list[list[str]]:
|
133
133
|
"""
|
134
134
|
Parse a file into a list of lists of strings.
|
@@ -157,19 +157,10 @@ class DsvHelper:
|
|
157
157
|
[['header1', 'header2'], ['value1', 'value2']]
|
158
158
|
"""
|
159
159
|
lines: list[str] = TextFileHelper.read(
|
160
|
-
file_path,
|
161
|
-
encoding=encoding,
|
162
|
-
skip_header_rows=skip_header_rows,
|
163
|
-
skip_footer_rows=skip_footer_rows
|
160
|
+
file_path, encoding=encoding, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows
|
164
161
|
)
|
165
162
|
|
166
|
-
return cls.parses(
|
167
|
-
lines,
|
168
|
-
delimiter=delimiter,
|
169
|
-
strip=strip,
|
170
|
-
bookend=bookend,
|
171
|
-
bookend_strip=bookend_strip
|
172
|
-
)
|
163
|
+
return cls.parses(lines, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip)
|
173
164
|
|
174
165
|
@classmethod
|
175
166
|
def _process_stream_chunk(
|
@@ -179,28 +170,22 @@ class DsvHelper:
|
|
179
170
|
delimiter: str,
|
180
171
|
strip: bool = DEFAULT_STRIP,
|
181
172
|
bookend: str | None = None,
|
182
|
-
bookend_strip: bool = DEFAULT_BOOKEND_STRIP
|
173
|
+
bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
|
183
174
|
) -> list[list[str]]:
|
184
175
|
"""
|
185
176
|
Process a chunk of lines from the stream.
|
186
|
-
|
177
|
+
|
187
178
|
Args:
|
188
179
|
chunk: List of lines to process
|
189
180
|
delimiter: Delimiter to use for parsing
|
190
181
|
strip: Whether to strip whitespace
|
191
182
|
bookend: Bookend character for text fields
|
192
183
|
bookend_strip: Whether to strip whitespace from bookends
|
193
|
-
|
184
|
+
|
194
185
|
Returns:
|
195
186
|
list[list[str]]: Parsed rows
|
196
187
|
"""
|
197
|
-
return cls.parses(
|
198
|
-
chunk,
|
199
|
-
delimiter=delimiter,
|
200
|
-
strip=strip,
|
201
|
-
bookend=bookend,
|
202
|
-
bookend_strip=bookend_strip
|
203
|
-
)
|
188
|
+
return cls.parses(chunk, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip)
|
204
189
|
|
205
190
|
@classmethod
|
206
191
|
def parse_stream(
|
@@ -214,7 +199,7 @@ class DsvHelper:
|
|
214
199
|
encoding: str = DEFAULT_ENCODING,
|
215
200
|
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
216
201
|
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
|
217
|
-
chunk_size: int = DEFAULT_CHUNK_SIZE
|
202
|
+
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
218
203
|
) -> Iterator[list[list[str]]]:
|
219
204
|
"""
|
220
205
|
Stream-parse a DSV file in chunks of lines.
|
@@ -247,17 +232,15 @@ class DsvHelper:
|
|
247
232
|
skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
|
248
233
|
|
249
234
|
# Use TextFileHelper.read_as_stream for consistent error handling
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
bookend_strip=bookend_strip
|
263
|
-
)
|
235
|
+
yield from (
|
236
|
+
cls._process_stream_chunk(
|
237
|
+
chunk, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip
|
238
|
+
)
|
239
|
+
for chunk in TextFileHelper.read_as_stream(
|
240
|
+
file_path,
|
241
|
+
encoding=encoding,
|
242
|
+
skip_header_rows=skip_header_rows,
|
243
|
+
skip_footer_rows=skip_footer_rows,
|
244
|
+
chunk_size=chunk_size,
|
245
|
+
)
|
246
|
+
)
|
@@ -14,16 +14,11 @@ This module is licensed under the MIT License.
|
|
14
14
|
|
15
15
|
class SplurgeDsvError(Exception):
|
16
16
|
"""Base exception for all splurge-dsv errors."""
|
17
|
-
|
18
|
-
def __init__(
|
19
|
-
self,
|
20
|
-
message: str,
|
21
|
-
*,
|
22
|
-
details: str | None = None
|
23
|
-
) -> None:
|
17
|
+
|
18
|
+
def __init__(self, message: str, *, details: str | None = None) -> None:
|
24
19
|
"""
|
25
20
|
Initialize SplurgeDsvError.
|
26
|
-
|
21
|
+
|
27
22
|
Args:
|
28
23
|
message: Primary error message
|
29
24
|
details: Additional error details
|
@@ -35,89 +30,107 @@ class SplurgeDsvError(Exception):
|
|
35
30
|
|
36
31
|
class SplurgeValidationError(SplurgeDsvError):
|
37
32
|
"""Raised when data validation fails."""
|
33
|
+
|
38
34
|
pass
|
39
35
|
|
40
36
|
|
41
37
|
class SplurgeFileOperationError(SplurgeDsvError):
|
42
38
|
"""Base exception for file operation errors."""
|
39
|
+
|
43
40
|
pass
|
44
41
|
|
45
42
|
|
46
43
|
class SplurgeFileNotFoundError(SplurgeFileOperationError):
|
47
44
|
"""Raised when a file is not found."""
|
45
|
+
|
48
46
|
pass
|
49
47
|
|
50
48
|
|
51
49
|
class SplurgeFilePermissionError(SplurgeFileOperationError):
|
52
50
|
"""Raised when there are permission issues with file operations."""
|
51
|
+
|
53
52
|
pass
|
54
53
|
|
55
54
|
|
56
55
|
class SplurgeFileEncodingError(SplurgeFileOperationError):
|
57
56
|
"""Raised when there are encoding issues with file operations."""
|
57
|
+
|
58
58
|
pass
|
59
59
|
|
60
60
|
|
61
61
|
class SplurgePathValidationError(SplurgeFileOperationError):
|
62
62
|
"""Raised when file path validation fails."""
|
63
|
+
|
63
64
|
pass
|
64
65
|
|
65
66
|
|
66
67
|
class SplurgeDataProcessingError(SplurgeDsvError):
|
67
68
|
"""Base exception for data processing errors."""
|
69
|
+
|
68
70
|
pass
|
69
71
|
|
70
72
|
|
71
73
|
class SplurgeParsingError(SplurgeDataProcessingError):
|
72
74
|
"""Raised when data parsing fails."""
|
75
|
+
|
73
76
|
pass
|
74
77
|
|
75
78
|
|
76
79
|
class SplurgeTypeConversionError(SplurgeDataProcessingError):
|
77
80
|
"""Raised when type conversion fails."""
|
81
|
+
|
78
82
|
pass
|
79
83
|
|
80
84
|
|
81
85
|
class SplurgeStreamingError(SplurgeDataProcessingError):
|
82
86
|
"""Raised when streaming operations fail."""
|
87
|
+
|
83
88
|
pass
|
84
89
|
|
85
90
|
|
86
91
|
class SplurgeConfigurationError(SplurgeDsvError):
|
87
92
|
"""Raised when configuration is invalid."""
|
93
|
+
|
88
94
|
pass
|
89
95
|
|
90
96
|
|
91
97
|
class SplurgeResourceError(SplurgeDsvError):
|
92
98
|
"""Base exception for resource management errors."""
|
99
|
+
|
93
100
|
pass
|
94
101
|
|
95
102
|
|
96
103
|
class SplurgeResourceAcquisitionError(SplurgeResourceError):
|
97
104
|
"""Raised when resource acquisition fails."""
|
105
|
+
|
98
106
|
pass
|
99
107
|
|
100
108
|
|
101
109
|
class SplurgeResourceReleaseError(SplurgeResourceError):
|
102
110
|
"""Raised when resource release fails."""
|
111
|
+
|
103
112
|
pass
|
104
113
|
|
105
114
|
|
106
115
|
class SplurgePerformanceWarning(SplurgeDsvError):
|
107
116
|
"""Warning for performance-related issues."""
|
117
|
+
|
108
118
|
pass
|
109
119
|
|
110
120
|
|
111
121
|
class SplurgeParameterError(SplurgeValidationError):
|
112
122
|
"""Raised when function parameters are invalid."""
|
123
|
+
|
113
124
|
pass
|
114
125
|
|
115
126
|
|
116
127
|
class SplurgeRangeError(SplurgeValidationError):
|
117
128
|
"""Raised when values are outside expected ranges."""
|
129
|
+
|
118
130
|
pass
|
119
131
|
|
120
132
|
|
121
133
|
class SplurgeFormatError(SplurgeValidationError):
|
122
134
|
"""Raised when data format is invalid."""
|
123
|
-
|
135
|
+
|
136
|
+
pass
|