splurge-dsv 2025.1.3__py3-none-any.whl → 2025.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splurge_dsv/__init__.py +1 -1
- splurge_dsv/__main__.py +3 -3
- splurge_dsv/cli.py +53 -12
- {splurge_dsv-2025.1.3.dist-info → splurge_dsv-2025.1.5.dist-info}/METADATA +12 -87
- {splurge_dsv-2025.1.3.dist-info → splurge_dsv-2025.1.5.dist-info}/RECORD +8 -8
- {splurge_dsv-2025.1.3.dist-info → splurge_dsv-2025.1.5.dist-info}/WHEEL +0 -0
- {splurge_dsv-2025.1.3.dist-info → splurge_dsv-2025.1.5.dist-info}/licenses/LICENSE +0 -0
- {splurge_dsv-2025.1.3.dist-info → splurge_dsv-2025.1.5.dist-info}/top_level.txt +0 -0
splurge_dsv/__init__.py
CHANGED
@@ -44,7 +44,7 @@ from splurge_dsv.resource_manager import (
|
|
44
44
|
from splurge_dsv.string_tokenizer import StringTokenizer
|
45
45
|
from splurge_dsv.text_file_helper import TextFileHelper
|
46
46
|
|
47
|
-
__version__ = "2025.1.
|
47
|
+
__version__ = "2025.1.5"
|
48
48
|
__author__ = "Jim Schilling"
|
49
49
|
__license__ = "MIT"
|
50
50
|
|
splurge_dsv/__main__.py
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
Command-line interface entry point for splurge-dsv.
|
3
3
|
|
4
4
|
This module serves as the entry point when running the package as a module.
|
5
|
-
It imports and calls the
|
5
|
+
It imports and calls the run_cli function from the cli module.
|
6
6
|
"""
|
7
7
|
|
8
8
|
# Standard library imports
|
9
9
|
import sys
|
10
10
|
|
11
11
|
# Local imports
|
12
|
-
from splurge_dsv.cli import
|
12
|
+
from splurge_dsv.cli import run_cli
|
13
13
|
|
14
14
|
if __name__ == "__main__":
|
15
|
-
sys.exit(
|
15
|
+
sys.exit(run_cli())
|
splurge_dsv/cli.py
CHANGED
@@ -13,10 +13,12 @@ Please preserve this header and all related material when sharing!
|
|
13
13
|
|
14
14
|
# Standard library imports
|
15
15
|
import argparse
|
16
|
+
import json
|
16
17
|
import sys
|
17
18
|
from pathlib import Path
|
18
19
|
|
19
20
|
# Local imports
|
21
|
+
from splurge_dsv import __version__
|
20
22
|
from splurge_dsv.dsv_helper import DsvHelper
|
21
23
|
from splurge_dsv.exceptions import SplurgeDsvError
|
22
24
|
|
@@ -56,7 +58,14 @@ Examples:
|
|
56
58
|
|
57
59
|
parser.add_argument("--chunk-size", type=int, default=500, help="Chunk size for streaming (default: 500)")
|
58
60
|
|
59
|
-
parser.add_argument(
|
61
|
+
parser.add_argument(
|
62
|
+
"--output-format",
|
63
|
+
choices=["table", "json", "ndjson"],
|
64
|
+
default="table",
|
65
|
+
help="Output format for results (default: table)",
|
66
|
+
)
|
67
|
+
|
68
|
+
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
60
69
|
|
61
70
|
return parser.parse_args()
|
62
71
|
|
@@ -90,8 +99,25 @@ def print_results(rows: list[list[str]], delimiter: str) -> None:
|
|
90
99
|
print("-" * (sum(max_widths) + len(max_widths) * 3 - 1))
|
91
100
|
|
92
101
|
|
93
|
-
def
|
94
|
-
"""
|
102
|
+
def run_cli() -> int:
|
103
|
+
"""Run the command-line interface for DSV file parsing.
|
104
|
+
|
105
|
+
This function serves as the main entry point for the splurge-dsv CLI tool.
|
106
|
+
It parses command-line arguments, validates the input file, and processes
|
107
|
+
DSV files according to the specified options. Supports both regular parsing
|
108
|
+
and streaming modes for large files.
|
109
|
+
|
110
|
+
Returns:
|
111
|
+
int: Exit code indicating success or failure:
|
112
|
+
- 0: Success
|
113
|
+
- 1: Generic error (file not found, parsing error, etc.)
|
114
|
+
- 2: Invalid arguments
|
115
|
+
- 130: Operation interrupted (Ctrl+C)
|
116
|
+
|
117
|
+
Raises:
|
118
|
+
SystemExit: Terminates the program with the appropriate exit code.
|
119
|
+
This is handled internally and should not be caught by callers.
|
120
|
+
"""
|
95
121
|
try:
|
96
122
|
args = parse_arguments()
|
97
123
|
|
@@ -107,7 +133,8 @@ def main() -> int:
|
|
107
133
|
|
108
134
|
# Parse the file
|
109
135
|
if args.stream:
|
110
|
-
|
136
|
+
if args.output_format != "json":
|
137
|
+
print(f"Streaming file '{args.file_path}' with delimiter '{args.delimiter}'...")
|
111
138
|
chunk_count = 0
|
112
139
|
total_rows = 0
|
113
140
|
|
@@ -124,13 +151,21 @@ def main() -> int:
|
|
124
151
|
):
|
125
152
|
chunk_count += 1
|
126
153
|
total_rows += len(chunk)
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
154
|
+
if args.output_format == "json":
|
155
|
+
print(json.dumps(chunk, ensure_ascii=False))
|
156
|
+
elif args.output_format == "ndjson":
|
157
|
+
for row in chunk:
|
158
|
+
print(json.dumps(row, ensure_ascii=False))
|
159
|
+
else:
|
160
|
+
print(f"Chunk {chunk_count}: {len(chunk)} rows")
|
161
|
+
print_results(chunk, args.delimiter)
|
162
|
+
print()
|
163
|
+
|
164
|
+
if args.output_format not in ["json", "ndjson"]:
|
165
|
+
print(f"Total: {total_rows} rows in {chunk_count} chunks")
|
132
166
|
else:
|
133
|
-
|
167
|
+
if args.output_format not in ["json", "ndjson"]:
|
168
|
+
print(f"Parsing file '{args.file_path}' with delimiter '{args.delimiter}'...")
|
134
169
|
rows = DsvHelper.parse_file(
|
135
170
|
file_path,
|
136
171
|
delimiter=args.delimiter,
|
@@ -142,8 +177,14 @@ def main() -> int:
|
|
142
177
|
skip_footer_rows=args.skip_footer,
|
143
178
|
)
|
144
179
|
|
145
|
-
|
146
|
-
|
180
|
+
if args.output_format == "json":
|
181
|
+
print(json.dumps(rows, ensure_ascii=False))
|
182
|
+
elif args.output_format == "ndjson":
|
183
|
+
for row in rows:
|
184
|
+
print(json.dumps(row, ensure_ascii=False))
|
185
|
+
else:
|
186
|
+
print(f"Parsed {len(rows)} rows")
|
187
|
+
print_results(rows, args.delimiter)
|
147
188
|
|
148
189
|
return 0
|
149
190
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: splurge-dsv
|
3
|
-
Version: 2025.1.
|
3
|
+
Version: 2025.1.5
|
4
4
|
Summary: A utility library for working with DSV (Delimited String Values) files
|
5
5
|
Author: Jim Schilling
|
6
6
|
License-Expression: MIT
|
@@ -29,6 +29,11 @@ Dynamic: license-file
|
|
29
29
|
|
30
30
|
# splurge-dsv
|
31
31
|
|
32
|
+
[](https://pypi.org/project/splurge-dsv/)
|
33
|
+
[](https://pypi.org/project/splurge-dsv/)
|
34
|
+
[](https://opensource.org/licenses/MIT)
|
35
|
+
[](https://github.com/jim-schilling/splurge-dsv)
|
36
|
+
|
32
37
|
A robust Python library for parsing and processing delimited-separated value (DSV) files with advanced features for data validation, streaming, and error handling.
|
33
38
|
|
34
39
|
## Features
|
@@ -243,97 +248,17 @@ The project follows strict coding standards:
|
|
243
248
|
|
244
249
|
## Changelog
|
245
250
|
|
246
|
-
|
247
|
-
|
248
|
-
#### 🔧 Maintenance & Consistency
|
249
|
-
- **Version Alignment**: Bumped `__version__` and CLI `--version` to `2025.1.3` to match `pyproject.toml`.
|
250
|
-
- **CLI Path Validation**: Centralized validation using `PathValidator.validate_path(...)` for consistent error handling.
|
251
|
-
- **Type Correctness**: Fixed `PathValidator._is_valid_windows_drive_pattern` to return `bool` explicitly.
|
252
|
-
- **Docs Alignment**: Updated README coverage claims to reflect the `>=85%` coverage gate configured in CI.
|
253
|
-
|
254
|
-
### 2025.1.2 (2025-09-02)
|
255
|
-
|
256
|
-
#### 🧪 Comprehensive End-to-End Testing
|
257
|
-
- **Complete E2E Test Suite**: Implemented 25 comprehensive end-to-end workflow tests covering all major CLI functionality
|
258
|
-
- **Real CLI Execution**: Tests run actual `splurge-dsv` commands with real files, not just mocked components
|
259
|
-
- **Workflow Coverage**: Tests cover CSV/TSV parsing, file operations, data processing, error handling, and performance scenarios
|
260
|
-
- **Cross-Platform Compatibility**: Handles Windows-specific encoding issues and platform differences gracefully
|
261
|
-
- **Performance Testing**: Large file processing tests (1,000+ and 10,000+ rows) with streaming and chunking validation
|
262
|
-
|
263
|
-
#### 📊 Test Coverage Improvements
|
264
|
-
- **Integration Testing**: Added real file system operations and complete pipeline validation
|
265
|
-
|
266
|
-
#### 🔄 Test Categories
|
267
|
-
- **CLI Workflows**: 19 tests covering basic parsing, custom delimiters, header/footer skipping, streaming, and error scenarios
|
268
|
-
- **Error Handling**: 3 tests for invalid arguments, missing parameters, and CLI error conditions
|
269
|
-
- **Integration Scenarios**: 3 tests for data analysis, transformation, and multi-format workflows
|
270
|
-
|
271
|
-
#### 📚 Documentation & Examples
|
272
|
-
- **E2E Testing Guide**: Created comprehensive documentation (`docs/e2e_testing_coverage.md`) explaining test coverage and usage
|
273
|
-
- **Real-World Examples**: Tests serve as practical examples of library usage patterns
|
274
|
-
- **Error Scenario Coverage**: Comprehensive testing of edge cases and failure conditions
|
275
|
-
|
276
|
-
### 2025.1.1 (2025-08-XX)
|
277
|
-
|
278
|
-
#### 🔧 Code Quality Improvements
|
279
|
-
- **Refactored Complex Regex Logic**: Extracted Windows drive letter validation logic from `_check_dangerous_characters` into a dedicated `_is_valid_windows_drive_pattern` helper method in `PathValidator` for better readability and maintainability
|
280
|
-
- **Exception Handling Consistency**: Fixed inconsistency in `ResourceManager.acquire()` method to properly re-raise `NotImplementedError` without wrapping it in `SplurgeResourceAcquisitionError`
|
281
|
-
- **Import Organization**: Moved all imports to the top of modules across the entire codebase for better code structure and PEP 8 compliance
|
282
|
-
|
283
|
-
#### 🧪 Testing Enhancements
|
284
|
-
- **Public API Focus**: Removed all tests that validated private implementation details, focusing exclusively on public API behavior validation
|
285
|
-
- **Comprehensive Resource Manager Tests**: Added extensive test suite for `ResourceManager` module covering all public methods, edge cases, error scenarios, and context manager behavior
|
286
|
-
- **Bookend Logic Clarification**: Updated and corrected all tests related to `StringTokenizer.remove_bookends` to properly reflect its single-character, symmetric bookend matching behavior
|
287
|
-
- **Path Validation Test Clarity**: Clarified test expectations and comments for Windows drive-relative paths (e.g., "C:file.txt") to reflect the validator's intentionally strict security design
|
288
|
-
|
289
|
-
#### 🐛 Bug Fixes
|
290
|
-
- **Test Reliability**: Fixed failing tests in `ResourceManager` context manager scenarios by properly handling file truncation and line ending normalization
|
291
|
-
- **Ruff Compliance**: Resolved all linting warnings including unused variables and imports
|
292
|
-
|
293
|
-
#### 📚 Documentation Updates
|
294
|
-
- **Method Documentation**: Updated `ResourceManager.acquire()` docstring to include `NotImplementedError` in the Raises section
|
295
|
-
- **Test Comments**: Enhanced test documentation with clearer explanations of expected behaviors and edge cases
|
296
|
-
|
297
|
-
### 2025.1.0 (2025-08-25)
|
298
|
-
|
299
|
-
#### 🎉 Major Features
|
300
|
-
- **Complete DSV Parser**: Full-featured delimited-separated value parser with support for CSV, TSV, and custom delimiters
|
301
|
-
- **Streaming Support**: Memory-efficient streaming for large files with configurable chunk sizes
|
302
|
-
- **Advanced Parsing Options**: Bookend removal, whitespace handling, and encoding support
|
303
|
-
- **Header/Footer Skipping**: Skip specified numbers of rows from start or end of files
|
304
|
-
|
305
|
-
#### 🛡️ Security Enhancements
|
306
|
-
- **Path Validation System**: Comprehensive file path security validation with traversal attack prevention
|
307
|
-
- **File Permission Checks**: Automatic file accessibility and permission validation
|
308
|
-
- **Encoding Validation**: Robust encoding error detection and handling
|
309
|
-
|
310
|
-
#### 🔧 Core Components
|
311
|
-
- **DsvHelper**: Main DSV parsing class with parse, parses, parse_file, and parse_stream methods
|
312
|
-
- **TextFileHelper**: Utility class for text file operations (line counting, preview, reading, streaming)
|
313
|
-
- **PathValidator**: Security-focused path validation utilities
|
314
|
-
- **ResourceManager**: Context managers for safe resource handling
|
315
|
-
- **StringTokenizer**: Core string parsing functionality
|
316
|
-
|
317
|
-
#### 🧪 Testing & Quality
|
318
|
-
- **Comprehensive Test Suite**: 250+ tests with 85%+ coverage gate
|
319
|
-
- **Cross-Platform Testing**: Tested on Windows, Linux, and macOS
|
320
|
-
- **Type Safety**: Full type annotations throughout the codebase
|
321
|
-
- **Error Handling**: Custom exception hierarchy with detailed error messages
|
322
|
-
|
323
|
-
#### 📚 Documentation
|
324
|
-
- **Complete API Documentation**: Google-style docstrings for all public methods
|
325
|
-
- **Usage Examples**: Comprehensive examples for all major features
|
326
|
-
- **Error Documentation**: Detailed error handling documentation
|
327
|
-
|
328
|
-
#### 🚀 Performance
|
329
|
-
- **Memory Efficiency**: Streaming support for large files
|
330
|
-
- **Optimized Parsing**: Efficient string tokenization and processing
|
331
|
-
- **Resource Management**: Automatic cleanup and resource management
|
251
|
+
See the [CHANGELOG](CHANGELOG.md) for full release notes.
|
332
252
|
|
333
253
|
## License
|
334
254
|
|
335
255
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
336
256
|
|
257
|
+
## More Documentation
|
258
|
+
|
259
|
+
- Detailed docs: [docs/README-details.md](docs/README-details.md)
|
260
|
+
- E2E testing coverage: [docs/e2e_testing_coverage.md](docs/e2e_testing_coverage.md)
|
261
|
+
|
337
262
|
## Contributing
|
338
263
|
|
339
264
|
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
@@ -1,14 +1,14 @@
|
|
1
|
-
splurge_dsv/__init__.py,sha256=
|
2
|
-
splurge_dsv/__main__.py,sha256=
|
3
|
-
splurge_dsv/cli.py,sha256=
|
1
|
+
splurge_dsv/__init__.py,sha256=cqq1MreFbu8ib04OyzkydhLc6ZpJSnc3jyMqoEouABo,2415
|
2
|
+
splurge_dsv/__main__.py,sha256=CX8zVMaK6vaaCaC396FyXA1E_-jqKz4zh3CVW8mGKac,359
|
3
|
+
splurge_dsv/cli.py,sha256=QLFdb22MSpQF3iXwxdEhETYWTup9y2pGfT6Vt3TzKuA,7612
|
4
4
|
splurge_dsv/dsv_helper.py,sha256=VRq2ejx6y-JBtFKJdAaK-GD4V0eoxZZfmoX0I3CSbDI,9428
|
5
5
|
splurge_dsv/exceptions.py,sha256=cu9Jd2pGhy7GBbbngH6zs0lfZzLp_OvGwbnsG5khp80,3035
|
6
6
|
splurge_dsv/path_validator.py,sha256=RuRFjtHWE1Z5-DlSBBZMoHJegQEFYEQ0HJgN7ndre2k,9969
|
7
7
|
splurge_dsv/resource_manager.py,sha256=g4igv1hhJdPtw4A1P-WdaNKcYMfEGCtZ4xaU4ynVbKw,12045
|
8
8
|
splurge_dsv/string_tokenizer.py,sha256=wBKWdi68rreTqf3RF-8Oxh1nz6QdnbOyWflw2x8pGWY,4022
|
9
9
|
splurge_dsv/text_file_helper.py,sha256=vdhEv9uK0hsNXvTO5SoWwFsI9fPkhdIUXuaJs80DVDA,13573
|
10
|
-
splurge_dsv-2025.1.
|
11
|
-
splurge_dsv-2025.1.
|
12
|
-
splurge_dsv-2025.1.
|
13
|
-
splurge_dsv-2025.1.
|
14
|
-
splurge_dsv-2025.1.
|
10
|
+
splurge_dsv-2025.1.5.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
|
11
|
+
splurge_dsv-2025.1.5.dist-info/METADATA,sha256=-dWt2wSfflEcg7KWeftltEFaNEve5Mdwxx3MZ9SRFAQ,9023
|
12
|
+
splurge_dsv-2025.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
+
splurge_dsv-2025.1.5.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
|
14
|
+
splurge_dsv-2025.1.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|