splurge-dsv 2025.1.2__py3-none-any.whl → 2025.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splurge_dsv/__init__.py +1 -1
- splurge_dsv/__main__.py +3 -3
- splurge_dsv/cli.py +53 -16
- splurge_dsv/path_validator.py +1 -1
- {splurge_dsv-2025.1.2.dist-info → splurge_dsv-2025.1.4.dist-info}/METADATA +10 -85
- splurge_dsv-2025.1.4.dist-info/RECORD +14 -0
- splurge_dsv-2025.1.2.dist-info/RECORD +0 -14
- {splurge_dsv-2025.1.2.dist-info → splurge_dsv-2025.1.4.dist-info}/WHEEL +0 -0
- {splurge_dsv-2025.1.2.dist-info → splurge_dsv-2025.1.4.dist-info}/licenses/LICENSE +0 -0
- {splurge_dsv-2025.1.2.dist-info → splurge_dsv-2025.1.4.dist-info}/top_level.txt +0 -0
splurge_dsv/__init__.py
CHANGED
@@ -44,7 +44,7 @@ from splurge_dsv.resource_manager import (
|
|
44
44
|
from splurge_dsv.string_tokenizer import StringTokenizer
|
45
45
|
from splurge_dsv.text_file_helper import TextFileHelper
|
46
46
|
|
47
|
-
__version__ = "2025.1.
|
47
|
+
__version__ = "2025.1.4"
|
48
48
|
__author__ = "Jim Schilling"
|
49
49
|
__license__ = "MIT"
|
50
50
|
|
splurge_dsv/__main__.py
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
Command-line interface entry point for splurge-dsv.
|
3
3
|
|
4
4
|
This module serves as the entry point when running the package as a module.
|
5
|
-
It imports and calls the
|
5
|
+
It imports and calls the run_cli function from the cli module.
|
6
6
|
"""
|
7
7
|
|
8
8
|
# Standard library imports
|
9
9
|
import sys
|
10
10
|
|
11
11
|
# Local imports
|
12
|
-
from splurge_dsv.cli import
|
12
|
+
from splurge_dsv.cli import run_cli
|
13
13
|
|
14
14
|
if __name__ == "__main__":
|
15
|
-
sys.exit(
|
15
|
+
sys.exit(run_cli())
|
splurge_dsv/cli.py
CHANGED
@@ -4,17 +4,21 @@ Command-line interface for splurge-dsv.
|
|
4
4
|
This module provides a command-line interface for the splurge-dsv library,
|
5
5
|
allowing users to parse DSV files from the command line.
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
Copyright (c) 2025 Jim Schilling
|
8
|
+
|
9
|
+
This module is licensed under the MIT License.
|
10
|
+
|
11
|
+
Please preserve this header and all related material when sharing!
|
10
12
|
"""
|
11
13
|
|
12
14
|
# Standard library imports
|
13
15
|
import argparse
|
16
|
+
import json
|
14
17
|
import sys
|
15
18
|
from pathlib import Path
|
16
19
|
|
17
20
|
# Local imports
|
21
|
+
from splurge_dsv import __version__
|
18
22
|
from splurge_dsv.dsv_helper import DsvHelper
|
19
23
|
from splurge_dsv.exceptions import SplurgeDsvError
|
20
24
|
|
@@ -54,7 +58,14 @@ Examples:
|
|
54
58
|
|
55
59
|
parser.add_argument("--chunk-size", type=int, default=500, help="Chunk size for streaming (default: 500)")
|
56
60
|
|
57
|
-
parser.add_argument(
|
61
|
+
parser.add_argument(
|
62
|
+
"--output-format",
|
63
|
+
choices=["table", "json"],
|
64
|
+
default="table",
|
65
|
+
help="Output format for results (default: table)",
|
66
|
+
)
|
67
|
+
|
68
|
+
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
58
69
|
|
59
70
|
return parser.parse_args()
|
60
71
|
|
@@ -88,12 +99,29 @@ def print_results(rows: list[list[str]], delimiter: str) -> None:
|
|
88
99
|
print("-" * (sum(max_widths) + len(max_widths) * 3 - 1))
|
89
100
|
|
90
101
|
|
91
|
-
def
|
92
|
-
"""
|
102
|
+
def run_cli() -> int:
|
103
|
+
"""Run the command-line interface for DSV file parsing.
|
104
|
+
|
105
|
+
This function serves as the main entry point for the splurge-dsv CLI tool.
|
106
|
+
It parses command-line arguments, validates the input file, and processes
|
107
|
+
DSV files according to the specified options. Supports both regular parsing
|
108
|
+
and streaming modes for large files.
|
109
|
+
|
110
|
+
Returns:
|
111
|
+
int: Exit code indicating success or failure:
|
112
|
+
- 0: Success
|
113
|
+
- 1: Generic error (file not found, parsing error, etc.)
|
114
|
+
- 2: Invalid arguments
|
115
|
+
- 130: Operation interrupted (Ctrl+C)
|
116
|
+
|
117
|
+
Raises:
|
118
|
+
SystemExit: Terminates the program with the appropriate exit code.
|
119
|
+
This is handled internally and should not be caught by callers.
|
120
|
+
"""
|
93
121
|
try:
|
94
122
|
args = parse_arguments()
|
95
123
|
|
96
|
-
# Validate file path
|
124
|
+
# Validate file path (kept local to maintain test compatibility)
|
97
125
|
file_path = Path(args.file_path)
|
98
126
|
if not file_path.exists():
|
99
127
|
print(f"Error: File '{args.file_path}' not found.", file=sys.stderr)
|
@@ -105,7 +133,8 @@ def main() -> int:
|
|
105
133
|
|
106
134
|
# Parse the file
|
107
135
|
if args.stream:
|
108
|
-
|
136
|
+
if args.output_format != "json":
|
137
|
+
print(f"Streaming file '{args.file_path}' with delimiter '{args.delimiter}'...")
|
109
138
|
chunk_count = 0
|
110
139
|
total_rows = 0
|
111
140
|
|
@@ -122,13 +151,18 @@ def main() -> int:
|
|
122
151
|
):
|
123
152
|
chunk_count += 1
|
124
153
|
total_rows += len(chunk)
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
154
|
+
if args.output_format == "json":
|
155
|
+
print(json.dumps(chunk, ensure_ascii=False))
|
156
|
+
else:
|
157
|
+
print(f"Chunk {chunk_count}: {len(chunk)} rows")
|
158
|
+
print_results(chunk, args.delimiter)
|
159
|
+
print()
|
160
|
+
|
161
|
+
if args.output_format != "json":
|
162
|
+
print(f"Total: {total_rows} rows in {chunk_count} chunks")
|
130
163
|
else:
|
131
|
-
|
164
|
+
if args.output_format != "json":
|
165
|
+
print(f"Parsing file '{args.file_path}' with delimiter '{args.delimiter}'...")
|
132
166
|
rows = DsvHelper.parse_file(
|
133
167
|
file_path,
|
134
168
|
delimiter=args.delimiter,
|
@@ -140,8 +174,11 @@ def main() -> int:
|
|
140
174
|
skip_footer_rows=args.skip_footer,
|
141
175
|
)
|
142
176
|
|
143
|
-
|
144
|
-
|
177
|
+
if args.output_format == "json":
|
178
|
+
print(json.dumps(rows, ensure_ascii=False))
|
179
|
+
else:
|
180
|
+
print(f"Parsed {len(rows)} rows")
|
181
|
+
print_results(rows, args.delimiter)
|
145
182
|
|
146
183
|
return 0
|
147
184
|
|
splurge_dsv/path_validator.py
CHANGED
@@ -198,7 +198,7 @@ class PathValidator:
|
|
198
198
|
False otherwise
|
199
199
|
"""
|
200
200
|
# Must be C: at the end of the string, or C:\ (or C:/) followed by path
|
201
|
-
return re.match(r"^[A-Za-z]:$", path_str) or re.match(r"^[A-Za-z]:[\\/]", path_str)
|
201
|
+
return bool(re.match(r"^[A-Za-z]:$", path_str)) or bool(re.match(r"^[A-Za-z]:[\\/]", path_str))
|
202
202
|
|
203
203
|
@classmethod
|
204
204
|
def _check_dangerous_characters(cls, path_str: str) -> None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: splurge-dsv
|
3
|
-
Version: 2025.1.
|
3
|
+
Version: 2025.1.4
|
4
4
|
Summary: A utility library for working with DSV (Delimited String Values) files
|
5
5
|
Author: Jim Schilling
|
6
6
|
License-Expression: MIT
|
@@ -53,8 +53,8 @@ A robust Python library for parsing and processing delimited-separated value (DS
|
|
53
53
|
- **Error Recovery**: Graceful error handling with detailed error messages
|
54
54
|
|
55
55
|
### 🧪 Testing & Quality
|
56
|
-
- **Comprehensive Test Suite**:
|
57
|
-
- **Cross-Platform Support**: Tested on Windows, Linux
|
56
|
+
- **Comprehensive Test Suite**: 250+ tests with 85%+ coverage gate
|
57
|
+
- **Cross-Platform Support**: Tested on Windows, and should pass on Linux and macOS
|
58
58
|
- **Type Safety**: Full type annotations and validation
|
59
59
|
- **Documentation**: Complete API documentation with examples
|
60
60
|
|
@@ -238,97 +238,22 @@ The project follows strict coding standards:
|
|
238
238
|
- PEP 8 compliance
|
239
239
|
- Type annotations for all functions
|
240
240
|
- Google-style docstrings
|
241
|
-
-
|
241
|
+
- 85%+ coverage gate enforced via CI
|
242
242
|
- Comprehensive error handling
|
243
243
|
|
244
244
|
## Changelog
|
245
245
|
|
246
|
-
|
247
|
-
|
248
|
-
#### 🧪 Comprehensive End-to-End Testing
|
249
|
-
- **Complete E2E Test Suite**: Implemented 25 comprehensive end-to-end workflow tests covering all major CLI functionality
|
250
|
-
- **Real CLI Execution**: Tests run actual `splurge-dsv` commands with real files, not just mocked components
|
251
|
-
- **Workflow Coverage**: Tests cover CSV/TSV parsing, file operations, data processing, error handling, and performance scenarios
|
252
|
-
- **Cross-Platform Compatibility**: Handles Windows-specific encoding issues and platform differences gracefully
|
253
|
-
- **Performance Testing**: Large file processing tests (1,000+ and 10,000+ rows) with streaming and chunking validation
|
254
|
-
|
255
|
-
#### 📊 Test Coverage Improvements
|
256
|
-
- **CLI Coverage**: Increased from 64% to **95%** with comprehensive CLI workflow testing
|
257
|
-
- **DSV Helper Coverage**: Improved from 75% to **93%** with real-world usage scenarios
|
258
|
-
- **Overall Coverage**: Improved from 60% to **73%** across the entire codebase
|
259
|
-
- **Integration Testing**: Added real file system operations and complete pipeline validation
|
260
|
-
|
261
|
-
#### 🔄 Test Categories
|
262
|
-
- **CLI Workflows**: 19 tests covering basic parsing, custom delimiters, header/footer skipping, streaming, and error scenarios
|
263
|
-
- **Error Handling**: 3 tests for invalid arguments, missing parameters, and CLI error conditions
|
264
|
-
- **Integration Scenarios**: 3 tests for data analysis, transformation, and multi-format workflows
|
265
|
-
|
266
|
-
#### 📚 Documentation & Examples
|
267
|
-
- **E2E Testing Guide**: Created comprehensive documentation (`docs/e2e_testing_coverage.md`) explaining test coverage and usage
|
268
|
-
- **Real-World Examples**: Tests serve as practical examples of library usage patterns
|
269
|
-
- **Error Scenario Coverage**: Comprehensive testing of edge cases and failure conditions
|
270
|
-
|
271
|
-
### 2025.1.1 (2025-08-XX)
|
272
|
-
|
273
|
-
#### 🔧 Code Quality Improvements
|
274
|
-
- **Refactored Complex Regex Logic**: Extracted Windows drive letter validation logic from `_check_dangerous_characters` into a dedicated `_is_valid_windows_drive_pattern` helper method in `PathValidator` for better readability and maintainability
|
275
|
-
- **Exception Handling Consistency**: Fixed inconsistency in `ResourceManager.acquire()` method to properly re-raise `NotImplementedError` without wrapping it in `SplurgeResourceAcquisitionError`
|
276
|
-
- **Import Organization**: Moved all imports to the top of modules across the entire codebase for better code structure and PEP 8 compliance
|
277
|
-
|
278
|
-
#### 🧪 Testing Enhancements
|
279
|
-
- **Public API Focus**: Removed all tests that validated private implementation details, focusing exclusively on public API behavior validation
|
280
|
-
- **Comprehensive Resource Manager Tests**: Added extensive test suite for `ResourceManager` module covering all public methods, edge cases, error scenarios, and context manager behavior
|
281
|
-
- **Bookend Logic Clarification**: Updated and corrected all tests related to `StringTokenizer.remove_bookends` to properly reflect its single-character, symmetric bookend matching behavior
|
282
|
-
- **Path Validation Test Clarity**: Clarified test expectations and comments for Windows drive-relative paths (e.g., "C:file.txt") to reflect the validator's intentionally strict security design
|
283
|
-
|
284
|
-
#### 🐛 Bug Fixes
|
285
|
-
- **Test Reliability**: Fixed failing tests in `ResourceManager` context manager scenarios by properly handling file truncation and line ending normalization
|
286
|
-
- **Ruff Compliance**: Resolved all linting warnings including unused variables and imports
|
287
|
-
|
288
|
-
#### 📚 Documentation Updates
|
289
|
-
- **Method Documentation**: Updated `ResourceManager.acquire()` docstring to include `NotImplementedError` in the Raises section
|
290
|
-
- **Test Comments**: Enhanced test documentation with clearer explanations of expected behaviors and edge cases
|
291
|
-
|
292
|
-
### 2025.1.0 (2025-08-25)
|
293
|
-
|
294
|
-
#### 🎉 Major Features
|
295
|
-
- **Complete DSV Parser**: Full-featured delimited-separated value parser with support for CSV, TSV, and custom delimiters
|
296
|
-
- **Streaming Support**: Memory-efficient streaming for large files with configurable chunk sizes
|
297
|
-
- **Advanced Parsing Options**: Bookend removal, whitespace handling, and encoding support
|
298
|
-
- **Header/Footer Skipping**: Skip specified numbers of rows from start or end of files
|
299
|
-
|
300
|
-
#### 🛡️ Security Enhancements
|
301
|
-
- **Path Validation System**: Comprehensive file path security validation with traversal attack prevention
|
302
|
-
- **File Permission Checks**: Automatic file accessibility and permission validation
|
303
|
-
- **Encoding Validation**: Robust encoding error detection and handling
|
304
|
-
|
305
|
-
#### 🔧 Core Components
|
306
|
-
- **DsvHelper**: Main DSV parsing class with parse, parses, parse_file, and parse_stream methods
|
307
|
-
- **TextFileHelper**: Utility class for text file operations (line counting, preview, reading, streaming)
|
308
|
-
- **PathValidator**: Security-focused path validation utilities
|
309
|
-
- **ResourceManager**: Context managers for safe resource handling
|
310
|
-
- **StringTokenizer**: Core string parsing functionality
|
311
|
-
|
312
|
-
#### 🧪 Testing & Quality
|
313
|
-
- **Comprehensive Test Suite**: 250+ tests with 90%+ code coverage
|
314
|
-
- **Cross-Platform Testing**: Tested on Windows, Linux, and macOS
|
315
|
-
- **Type Safety**: Full type annotations throughout the codebase
|
316
|
-
- **Error Handling**: Custom exception hierarchy with detailed error messages
|
317
|
-
|
318
|
-
#### 📚 Documentation
|
319
|
-
- **Complete API Documentation**: Google-style docstrings for all public methods
|
320
|
-
- **Usage Examples**: Comprehensive examples for all major features
|
321
|
-
- **Error Documentation**: Detailed error handling documentation
|
322
|
-
|
323
|
-
#### 🚀 Performance
|
324
|
-
- **Memory Efficiency**: Streaming support for large files
|
325
|
-
- **Optimized Parsing**: Efficient string tokenization and processing
|
326
|
-
- **Resource Management**: Automatic cleanup and resource management
|
246
|
+
See the [CHANGELOG](CHANGELOG.md) for full release notes.
|
327
247
|
|
328
248
|
## License
|
329
249
|
|
330
250
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
331
251
|
|
252
|
+
## More Documentation
|
253
|
+
|
254
|
+
- Detailed docs: [docs/README-details.md](docs/README-details.md)
|
255
|
+
- E2E testing coverage: [docs/e2e_testing_coverage.md](docs/e2e_testing_coverage.md)
|
256
|
+
|
332
257
|
## Contributing
|
333
258
|
|
334
259
|
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
@@ -0,0 +1,14 @@
|
|
1
|
+
splurge_dsv/__init__.py,sha256=I_BAM3zHWRZLyY5ndCweBCg1w_cbBDZhdKlxUp6Sx8Q,2415
|
2
|
+
splurge_dsv/__main__.py,sha256=CX8zVMaK6vaaCaC396FyXA1E_-jqKz4zh3CVW8mGKac,359
|
3
|
+
splurge_dsv/cli.py,sha256=hcoF2OC3gHrN4SegSUFQeGxHuWWIBAJn-fTNW19KnyA,7261
|
4
|
+
splurge_dsv/dsv_helper.py,sha256=VRq2ejx6y-JBtFKJdAaK-GD4V0eoxZZfmoX0I3CSbDI,9428
|
5
|
+
splurge_dsv/exceptions.py,sha256=cu9Jd2pGhy7GBbbngH6zs0lfZzLp_OvGwbnsG5khp80,3035
|
6
|
+
splurge_dsv/path_validator.py,sha256=RuRFjtHWE1Z5-DlSBBZMoHJegQEFYEQ0HJgN7ndre2k,9969
|
7
|
+
splurge_dsv/resource_manager.py,sha256=g4igv1hhJdPtw4A1P-WdaNKcYMfEGCtZ4xaU4ynVbKw,12045
|
8
|
+
splurge_dsv/string_tokenizer.py,sha256=wBKWdi68rreTqf3RF-8Oxh1nz6QdnbOyWflw2x8pGWY,4022
|
9
|
+
splurge_dsv/text_file_helper.py,sha256=vdhEv9uK0hsNXvTO5SoWwFsI9fPkhdIUXuaJs80DVDA,13573
|
10
|
+
splurge_dsv-2025.1.4.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
|
11
|
+
splurge_dsv-2025.1.4.dist-info/METADATA,sha256=sxdtsP6DfbVRkdVx6LcgcH23DEguS01gniAzN9ELlZM,8573
|
12
|
+
splurge_dsv-2025.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
+
splurge_dsv-2025.1.4.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
|
14
|
+
splurge_dsv-2025.1.4.dist-info/RECORD,,
|
@@ -1,14 +0,0 @@
|
|
1
|
-
splurge_dsv/__init__.py,sha256=UBfew2IVQhg5VbHjTe_O7e2b1QeoYen_so--4B6p0h4,2415
|
2
|
-
splurge_dsv/__main__.py,sha256=RkXcU3P3tHpFa6iGJfx8sAZ4aUcz6z7TIN_1q_5N9xE,354
|
3
|
-
splurge_dsv/cli.py,sha256=tRIQT0InflJPzy8SGUaHyD0aPMr7CgmyHbSVRkaQeoo,5759
|
4
|
-
splurge_dsv/dsv_helper.py,sha256=VRq2ejx6y-JBtFKJdAaK-GD4V0eoxZZfmoX0I3CSbDI,9428
|
5
|
-
splurge_dsv/exceptions.py,sha256=cu9Jd2pGhy7GBbbngH6zs0lfZzLp_OvGwbnsG5khp80,3035
|
6
|
-
splurge_dsv/path_validator.py,sha256=R314U2TmQjYVe_KOv4ni08z1_tLuo4_Uuld__dhX_gw,9957
|
7
|
-
splurge_dsv/resource_manager.py,sha256=g4igv1hhJdPtw4A1P-WdaNKcYMfEGCtZ4xaU4ynVbKw,12045
|
8
|
-
splurge_dsv/string_tokenizer.py,sha256=wBKWdi68rreTqf3RF-8Oxh1nz6QdnbOyWflw2x8pGWY,4022
|
9
|
-
splurge_dsv/text_file_helper.py,sha256=vdhEv9uK0hsNXvTO5SoWwFsI9fPkhdIUXuaJs80DVDA,13573
|
10
|
-
splurge_dsv-2025.1.2.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
|
11
|
-
splurge_dsv-2025.1.2.dist-info/METADATA,sha256=z0qLKtJ0Y8PgVhstBLpVUTATsWRQEv-GI34aaJOxbfs,14021
|
12
|
-
splurge_dsv-2025.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
-
splurge_dsv-2025.1.2.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
|
14
|
-
splurge_dsv-2025.1.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|