splurge-dsv 2025.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splurge_dsv-2025.1.0/LICENSE +21 -0
- splurge_dsv-2025.1.0/PKG-INFO +292 -0
- splurge_dsv-2025.1.0/README.md +263 -0
- splurge_dsv-2025.1.0/pyproject.toml +84 -0
- splurge_dsv-2025.1.0/setup.cfg +4 -0
- splurge_dsv-2025.1.0/splurge_dsv/__init__.py +0 -0
- splurge_dsv-2025.1.0/splurge_dsv/__main__.py +0 -0
- splurge_dsv-2025.1.0/splurge_dsv/dsv_helper.py +263 -0
- splurge_dsv-2025.1.0/splurge_dsv/exceptions.py +123 -0
- splurge_dsv-2025.1.0/splurge_dsv/path_validator.py +262 -0
- splurge_dsv-2025.1.0/splurge_dsv/resource_manager.py +432 -0
- splurge_dsv-2025.1.0/splurge_dsv/string_tokenizer.py +136 -0
- splurge_dsv-2025.1.0/splurge_dsv/text_file_helper.py +343 -0
- splurge_dsv-2025.1.0/splurge_dsv.egg-info/PKG-INFO +292 -0
- splurge_dsv-2025.1.0/splurge_dsv.egg-info/SOURCES.txt +22 -0
- splurge_dsv-2025.1.0/splurge_dsv.egg-info/dependency_links.txt +1 -0
- splurge_dsv-2025.1.0/splurge_dsv.egg-info/requires.txt +5 -0
- splurge_dsv-2025.1.0/splurge_dsv.egg-info/top_level.txt +1 -0
- splurge_dsv-2025.1.0/tests/test_dsv_helper.py +521 -0
- splurge_dsv-2025.1.0/tests/test_exceptions.py +255 -0
- splurge_dsv-2025.1.0/tests/test_path_validator.py +413 -0
- splurge_dsv-2025.1.0/tests/test_resource_manager.py +504 -0
- splurge_dsv-2025.1.0/tests/test_string_tokenizer.py +297 -0
- splurge_dsv-2025.1.0/tests/test_text_file_helper.py +580 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Jim Schilling
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,292 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: splurge-dsv
|
3
|
+
Version: 2025.1.0
|
4
|
+
Summary: A utility library for working with DSV (Delimited String Values) files
|
5
|
+
Author: Jim Schilling
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/jim-schilling/splurge-dsv
|
8
|
+
Project-URL: Repository, https://github.com/jim-schilling/splurge-dsv
|
9
|
+
Project-URL: Documentation, https://github.com/jim-schilling/splurge-dsv#readme
|
10
|
+
Project-URL: Bug Tracker, https://github.com/jim-schilling/splurge-dsv/issues
|
11
|
+
Keywords: dsv,csv,tsv,delimited,parsing,file-processing
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
|
+
Classifier: Topic :: Text Processing :: Filters
|
21
|
+
Requires-Python: >=3.10
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
License-File: LICENSE
|
24
|
+
Provides-Extra: dev
|
25
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
26
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
27
|
+
Requires-Dist: pytest-xdist>=3.0.0; extra == "dev"
|
28
|
+
Dynamic: license-file
|
29
|
+
|
30
|
+
# splurge-dsv
|
31
|
+
|
32
|
+
A robust Python library for parsing and processing delimited-separated value (DSV) files with advanced features for data validation, streaming, and error handling.
|
33
|
+
|
34
|
+
## Features
|
35
|
+
|
36
|
+
### ๐ง Core Functionality
|
37
|
+
- **Multi-format DSV Support**: Parse CSV, TSV, pipe-delimited, semicolon-delimited, and custom delimiter files
|
38
|
+
- **Flexible Parsing Options**: Configurable whitespace handling, bookend removal, and encoding support
|
39
|
+
- **Memory-Efficient Streaming**: Process large files without loading entire content into memory
|
40
|
+
- **Header/Footer Skipping**: Skip specified numbers of rows from start or end of files
|
41
|
+
- **Unicode Support**: Full Unicode character and delimiter support
|
42
|
+
|
43
|
+
### ๐ก๏ธ Security & Validation
|
44
|
+
- **Path Validation**: Comprehensive file path security validation with traversal attack prevention
|
45
|
+
- **File Permission Checks**: Automatic file accessibility and permission validation
|
46
|
+
- **Encoding Validation**: Robust encoding error detection and handling
|
47
|
+
- **Resource Management**: Automatic file handle cleanup and resource management
|
48
|
+
|
49
|
+
### ๐ Advanced Processing
|
50
|
+
- **Chunked Processing**: Configurable chunk sizes for streaming large datasets
|
51
|
+
- **Mixed Content Handling**: Support for quoted and unquoted values in the same file
|
52
|
+
- **Line Ending Flexibility**: Automatic handling of different line ending formats
|
53
|
+
- **Error Recovery**: Graceful error handling with detailed error messages
|
54
|
+
|
55
|
+
### ๐งช Testing & Quality
|
56
|
+
- **Comprehensive Test Suite**: 90%+ code coverage with 250+ tests
|
57
|
+
- **Cross-Platform Support**: Tested on Windows, Linux, and macOS
|
58
|
+
- **Type Safety**: Full type annotations and validation
|
59
|
+
- **Documentation**: Complete API documentation with examples
|
60
|
+
|
61
|
+
## Installation
|
62
|
+
|
63
|
+
```bash
|
64
|
+
pip install splurge-dsv
|
65
|
+
```
|
66
|
+
|
67
|
+
## Quick Start
|
68
|
+
|
69
|
+
### Basic CSV Parsing
|
70
|
+
|
71
|
+
```python
|
72
|
+
from splurge_dsv import DsvHelper
|
73
|
+
|
74
|
+
# Parse a simple CSV string
|
75
|
+
data = DsvHelper.parse("a,b,c", delimiter=",")
|
76
|
+
print(data) # ['a', 'b', 'c']
|
77
|
+
|
78
|
+
# Parse a CSV file
|
79
|
+
rows = DsvHelper.parse_file("data.csv", delimiter=",")
|
80
|
+
for row in rows:
|
81
|
+
print(row) # ['col1', 'col2', 'col3']
|
82
|
+
```
|
83
|
+
|
84
|
+
### Streaming Large Files
|
85
|
+
|
86
|
+
```python
|
87
|
+
from splurge_dsv import DsvHelper
|
88
|
+
|
89
|
+
# Stream a large CSV file in chunks
|
90
|
+
for chunk in DsvHelper.parse_stream("large_file.csv", delimiter=",", chunk_size=1000):
|
91
|
+
for row in chunk:
|
92
|
+
process_row(row)
|
93
|
+
```
|
94
|
+
|
95
|
+
### Advanced Parsing Options
|
96
|
+
|
97
|
+
```python
|
98
|
+
from splurge_dsv import DsvHelper
|
99
|
+
|
100
|
+
# Parse with custom options
|
101
|
+
data = DsvHelper.parse(
|
102
|
+
'"a","b","c"',
|
103
|
+
delimiter=",",
|
104
|
+
bookend='"',
|
105
|
+
strip=True,
|
106
|
+
bookend_strip=True
|
107
|
+
)
|
108
|
+
print(data) # ['a', 'b', 'c']
|
109
|
+
|
110
|
+
# Skip header and footer rows
|
111
|
+
rows = DsvHelper.parse_file(
|
112
|
+
"data.csv",
|
113
|
+
delimiter=",",
|
114
|
+
skip_header_rows=1,
|
115
|
+
skip_footer_rows=2
|
116
|
+
)
|
117
|
+
```
|
118
|
+
|
119
|
+
### Text File Operations
|
120
|
+
|
121
|
+
```python
|
122
|
+
from splurge_dsv import TextFileHelper
|
123
|
+
|
124
|
+
# Count lines in a file
|
125
|
+
line_count = TextFileHelper.line_count("data.txt")
|
126
|
+
|
127
|
+
# Preview first N lines
|
128
|
+
preview = TextFileHelper.preview("data.txt", max_lines=10)
|
129
|
+
|
130
|
+
# Read entire file with options
|
131
|
+
lines = TextFileHelper.read(
|
132
|
+
"data.txt",
|
133
|
+
strip=True,
|
134
|
+
skip_header_rows=1,
|
135
|
+
skip_footer_rows=1
|
136
|
+
)
|
137
|
+
|
138
|
+
# Stream file content
|
139
|
+
for chunk in TextFileHelper.read_as_stream("large_file.txt", chunk_size=500):
|
140
|
+
process_chunk(chunk)
|
141
|
+
```
|
142
|
+
|
143
|
+
### Path Validation
|
144
|
+
|
145
|
+
```python
|
146
|
+
from splurge_dsv import PathValidator
|
147
|
+
|
148
|
+
# Validate a file path
|
149
|
+
valid_path = PathValidator.validate_path(
|
150
|
+
"data.csv",
|
151
|
+
must_exist=True,
|
152
|
+
must_be_file=True,
|
153
|
+
must_be_readable=True
|
154
|
+
)
|
155
|
+
|
156
|
+
# Check if path is safe
|
157
|
+
is_safe = PathValidator.is_safe_path("user_input_path.txt")
|
158
|
+
```
|
159
|
+
|
160
|
+
## API Reference
|
161
|
+
|
162
|
+
### DsvHelper
|
163
|
+
|
164
|
+
Main class for DSV parsing operations.
|
165
|
+
|
166
|
+
#### Methods
|
167
|
+
|
168
|
+
- `parse(content, delimiter, strip=True, bookend=None, bookend_strip=True)` - Parse a single string
|
169
|
+
- `parses(content_list, delimiter, strip=True, bookend=None, bookend_strip=True)` - Parse multiple strings
|
170
|
+
- `parse_file(file_path, delimiter, strip=True, bookend=None, bookend_strip=True, skip_header_rows=0, skip_footer_rows=0, encoding='utf-8')` - Parse a file
|
171
|
+
- `parse_stream(file_path, delimiter, strip=True, bookend=None, bookend_strip=True, skip_header_rows=0, skip_footer_rows=0, encoding='utf-8', chunk_size=500)` - Stream parse a file
|
172
|
+
|
173
|
+
### TextFileHelper
|
174
|
+
|
175
|
+
Utility class for text file operations.
|
176
|
+
|
177
|
+
#### Methods
|
178
|
+
|
179
|
+
- `line_count(file_path, encoding='utf-8')` - Count lines in a file
|
180
|
+
- `preview(file_path, max_lines=100, strip=True, encoding='utf-8', skip_header_rows=0)` - Preview file content
|
181
|
+
- `read(file_path, strip=True, encoding='utf-8', skip_header_rows=0, skip_footer_rows=0)` - Read entire file
|
182
|
+
- `read_as_stream(file_path, strip=True, encoding='utf-8', skip_header_rows=0, skip_footer_rows=0, chunk_size=500)` - Stream read file
|
183
|
+
|
184
|
+
### PathValidator
|
185
|
+
|
186
|
+
Security-focused path validation utilities.
|
187
|
+
|
188
|
+
#### Methods
|
189
|
+
|
190
|
+
- `validate_path(file_path, must_exist=False, must_be_file=False, must_be_readable=False, allow_relative=False, base_directory=None)` - Validate file path
|
191
|
+
- `is_safe_path(file_path)` - Check if path is safe
|
192
|
+
- `sanitize_filename(filename, default_name='file')` - Sanitize filename
|
193
|
+
|
194
|
+
### ResourceManager
|
195
|
+
|
196
|
+
Context managers for safe resource handling.
|
197
|
+
|
198
|
+
#### Classes
|
199
|
+
|
200
|
+
- `FileResourceManager` - Context manager for file operations
|
201
|
+
- `StreamResourceManager` - Context manager for stream operations
|
202
|
+
|
203
|
+
#### Functions
|
204
|
+
|
205
|
+
- `safe_file_operation(file_path, mode='r', encoding='utf-8', ...)` - Safe file operation context manager
|
206
|
+
- `safe_stream_operation(stream, auto_close=True)` - Safe stream operation context manager
|
207
|
+
|
208
|
+
## Error Handling
|
209
|
+
|
210
|
+
The library provides comprehensive error handling with custom exception classes:
|
211
|
+
|
212
|
+
- `SplurgeParameterError` - Invalid parameter values
|
213
|
+
- `SplurgeFileNotFoundError` - File not found
|
214
|
+
- `SplurgeFilePermissionError` - File permission issues
|
215
|
+
- `SplurgeFileEncodingError` - File encoding problems
|
216
|
+
- `SplurgePathValidationError` - Path validation failures
|
217
|
+
- `SplurgeResourceAcquisitionError` - Resource acquisition failures
|
218
|
+
- `SplurgeResourceReleaseError` - Resource cleanup failures
|
219
|
+
|
220
|
+
## Development
|
221
|
+
|
222
|
+
### Running Tests
|
223
|
+
|
224
|
+
```bash
|
225
|
+
# Run all tests
|
226
|
+
pytest tests/ -v
|
227
|
+
|
228
|
+
# Run with coverage
|
229
|
+
pytest tests/ --cov=splurge_dsv --cov-report=html
|
230
|
+
|
231
|
+
# Run specific test file
|
232
|
+
pytest tests/test_dsv_helper.py -v
|
233
|
+
```
|
234
|
+
|
235
|
+
### Code Quality
|
236
|
+
|
237
|
+
The project follows strict coding standards:
|
238
|
+
- PEP 8 compliance
|
239
|
+
- Type annotations for all functions
|
240
|
+
- Google-style docstrings
|
241
|
+
- 90%+ test coverage requirement
|
242
|
+
- Comprehensive error handling
|
243
|
+
|
244
|
+
## Changelog
|
245
|
+
|
246
|
+
### 2025.1.0 (2025-08-25)
|
247
|
+
|
248
|
+
#### ๐ Major Features
|
249
|
+
- **Complete DSV Parser**: Full-featured delimited-separated value parser with support for CSV, TSV, and custom delimiters
|
250
|
+
- **Streaming Support**: Memory-efficient streaming for large files with configurable chunk sizes
|
251
|
+
- **Advanced Parsing Options**: Bookend removal, whitespace handling, and encoding support
|
252
|
+
- **Header/Footer Skipping**: Skip specified numbers of rows from start or end of files
|
253
|
+
|
254
|
+
#### ๐ก๏ธ Security Enhancements
|
255
|
+
- **Path Validation System**: Comprehensive file path security validation with traversal attack prevention
|
256
|
+
- **File Permission Checks**: Automatic file accessibility and permission validation
|
257
|
+
- **Encoding Validation**: Robust encoding error detection and handling
|
258
|
+
|
259
|
+
#### ๐ง Core Components
|
260
|
+
- **DsvHelper**: Main DSV parsing class with parse, parses, parse_file, and parse_stream methods
|
261
|
+
- **TextFileHelper**: Utility class for text file operations (line counting, preview, reading, streaming)
|
262
|
+
- **PathValidator**: Security-focused path validation utilities
|
263
|
+
- **ResourceManager**: Context managers for safe resource handling
|
264
|
+
- **StringTokenizer**: Core string parsing functionality
|
265
|
+
|
266
|
+
#### ๐งช Testing & Quality
|
267
|
+
- **Comprehensive Test Suite**: 250+ tests with 90%+ code coverage
|
268
|
+
- **Cross-Platform Testing**: Tested on Windows, Linux, and macOS
|
269
|
+
- **Type Safety**: Full type annotations throughout the codebase
|
270
|
+
- **Error Handling**: Custom exception hierarchy with detailed error messages
|
271
|
+
|
272
|
+
#### ๐ Documentation
|
273
|
+
- **Complete API Documentation**: Google-style docstrings for all public methods
|
274
|
+
- **Usage Examples**: Comprehensive examples for all major features
|
275
|
+
- **Error Documentation**: Detailed error handling documentation
|
276
|
+
|
277
|
+
#### ๐ Performance
|
278
|
+
- **Memory Efficiency**: Streaming support for large files
|
279
|
+
- **Optimized Parsing**: Efficient string tokenization and processing
|
280
|
+
- **Resource Management**: Automatic cleanup and resource management
|
281
|
+
|
282
|
+
## License
|
283
|
+
|
284
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
285
|
+
|
286
|
+
## Contributing
|
287
|
+
|
288
|
+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
289
|
+
|
290
|
+
## Support
|
291
|
+
|
292
|
+
For support, please open an issue on the GitHub repository or contact the maintainers.
|
@@ -0,0 +1,263 @@
|
|
1
|
+
# splurge-dsv
|
2
|
+
|
3
|
+
A robust Python library for parsing and processing delimited-separated value (DSV) files with advanced features for data validation, streaming, and error handling.
|
4
|
+
|
5
|
+
## Features
|
6
|
+
|
7
|
+
### ๐ง Core Functionality
|
8
|
+
- **Multi-format DSV Support**: Parse CSV, TSV, pipe-delimited, semicolon-delimited, and custom delimiter files
|
9
|
+
- **Flexible Parsing Options**: Configurable whitespace handling, bookend removal, and encoding support
|
10
|
+
- **Memory-Efficient Streaming**: Process large files without loading entire content into memory
|
11
|
+
- **Header/Footer Skipping**: Skip specified numbers of rows from start or end of files
|
12
|
+
- **Unicode Support**: Full Unicode character and delimiter support
|
13
|
+
|
14
|
+
### ๐ก๏ธ Security & Validation
|
15
|
+
- **Path Validation**: Comprehensive file path security validation with traversal attack prevention
|
16
|
+
- **File Permission Checks**: Automatic file accessibility and permission validation
|
17
|
+
- **Encoding Validation**: Robust encoding error detection and handling
|
18
|
+
- **Resource Management**: Automatic file handle cleanup and resource management
|
19
|
+
|
20
|
+
### ๐ Advanced Processing
|
21
|
+
- **Chunked Processing**: Configurable chunk sizes for streaming large datasets
|
22
|
+
- **Mixed Content Handling**: Support for quoted and unquoted values in the same file
|
23
|
+
- **Line Ending Flexibility**: Automatic handling of different line ending formats
|
24
|
+
- **Error Recovery**: Graceful error handling with detailed error messages
|
25
|
+
|
26
|
+
### ๐งช Testing & Quality
|
27
|
+
- **Comprehensive Test Suite**: 90%+ code coverage with 250+ tests
|
28
|
+
- **Cross-Platform Support**: Tested on Windows, Linux, and macOS
|
29
|
+
- **Type Safety**: Full type annotations and validation
|
30
|
+
- **Documentation**: Complete API documentation with examples
|
31
|
+
|
32
|
+
## Installation
|
33
|
+
|
34
|
+
```bash
|
35
|
+
pip install splurge-dsv
|
36
|
+
```
|
37
|
+
|
38
|
+
## Quick Start
|
39
|
+
|
40
|
+
### Basic CSV Parsing
|
41
|
+
|
42
|
+
```python
|
43
|
+
from splurge_dsv import DsvHelper
|
44
|
+
|
45
|
+
# Parse a simple CSV string
|
46
|
+
data = DsvHelper.parse("a,b,c", delimiter=",")
|
47
|
+
print(data) # ['a', 'b', 'c']
|
48
|
+
|
49
|
+
# Parse a CSV file
|
50
|
+
rows = DsvHelper.parse_file("data.csv", delimiter=",")
|
51
|
+
for row in rows:
|
52
|
+
print(row) # ['col1', 'col2', 'col3']
|
53
|
+
```
|
54
|
+
|
55
|
+
### Streaming Large Files
|
56
|
+
|
57
|
+
```python
|
58
|
+
from splurge_dsv import DsvHelper
|
59
|
+
|
60
|
+
# Stream a large CSV file in chunks
|
61
|
+
for chunk in DsvHelper.parse_stream("large_file.csv", delimiter=",", chunk_size=1000):
|
62
|
+
for row in chunk:
|
63
|
+
process_row(row)
|
64
|
+
```
|
65
|
+
|
66
|
+
### Advanced Parsing Options
|
67
|
+
|
68
|
+
```python
|
69
|
+
from splurge_dsv import DsvHelper
|
70
|
+
|
71
|
+
# Parse with custom options
|
72
|
+
data = DsvHelper.parse(
|
73
|
+
'"a","b","c"',
|
74
|
+
delimiter=",",
|
75
|
+
bookend='"',
|
76
|
+
strip=True,
|
77
|
+
bookend_strip=True
|
78
|
+
)
|
79
|
+
print(data) # ['a', 'b', 'c']
|
80
|
+
|
81
|
+
# Skip header and footer rows
|
82
|
+
rows = DsvHelper.parse_file(
|
83
|
+
"data.csv",
|
84
|
+
delimiter=",",
|
85
|
+
skip_header_rows=1,
|
86
|
+
skip_footer_rows=2
|
87
|
+
)
|
88
|
+
```
|
89
|
+
|
90
|
+
### Text File Operations
|
91
|
+
|
92
|
+
```python
|
93
|
+
from splurge_dsv import TextFileHelper
|
94
|
+
|
95
|
+
# Count lines in a file
|
96
|
+
line_count = TextFileHelper.line_count("data.txt")
|
97
|
+
|
98
|
+
# Preview first N lines
|
99
|
+
preview = TextFileHelper.preview("data.txt", max_lines=10)
|
100
|
+
|
101
|
+
# Read entire file with options
|
102
|
+
lines = TextFileHelper.read(
|
103
|
+
"data.txt",
|
104
|
+
strip=True,
|
105
|
+
skip_header_rows=1,
|
106
|
+
skip_footer_rows=1
|
107
|
+
)
|
108
|
+
|
109
|
+
# Stream file content
|
110
|
+
for chunk in TextFileHelper.read_as_stream("large_file.txt", chunk_size=500):
|
111
|
+
process_chunk(chunk)
|
112
|
+
```
|
113
|
+
|
114
|
+
### Path Validation
|
115
|
+
|
116
|
+
```python
|
117
|
+
from splurge_dsv import PathValidator
|
118
|
+
|
119
|
+
# Validate a file path
|
120
|
+
valid_path = PathValidator.validate_path(
|
121
|
+
"data.csv",
|
122
|
+
must_exist=True,
|
123
|
+
must_be_file=True,
|
124
|
+
must_be_readable=True
|
125
|
+
)
|
126
|
+
|
127
|
+
# Check if path is safe
|
128
|
+
is_safe = PathValidator.is_safe_path("user_input_path.txt")
|
129
|
+
```
|
130
|
+
|
131
|
+
## API Reference
|
132
|
+
|
133
|
+
### DsvHelper
|
134
|
+
|
135
|
+
Main class for DSV parsing operations.
|
136
|
+
|
137
|
+
#### Methods
|
138
|
+
|
139
|
+
- `parse(content, delimiter, strip=True, bookend=None, bookend_strip=True)` - Parse a single string
|
140
|
+
- `parses(content_list, delimiter, strip=True, bookend=None, bookend_strip=True)` - Parse multiple strings
|
141
|
+
- `parse_file(file_path, delimiter, strip=True, bookend=None, bookend_strip=True, skip_header_rows=0, skip_footer_rows=0, encoding='utf-8')` - Parse a file
|
142
|
+
- `parse_stream(file_path, delimiter, strip=True, bookend=None, bookend_strip=True, skip_header_rows=0, skip_footer_rows=0, encoding='utf-8', chunk_size=500)` - Stream parse a file
|
143
|
+
|
144
|
+
### TextFileHelper
|
145
|
+
|
146
|
+
Utility class for text file operations.
|
147
|
+
|
148
|
+
#### Methods
|
149
|
+
|
150
|
+
- `line_count(file_path, encoding='utf-8')` - Count lines in a file
|
151
|
+
- `preview(file_path, max_lines=100, strip=True, encoding='utf-8', skip_header_rows=0)` - Preview file content
|
152
|
+
- `read(file_path, strip=True, encoding='utf-8', skip_header_rows=0, skip_footer_rows=0)` - Read entire file
|
153
|
+
- `read_as_stream(file_path, strip=True, encoding='utf-8', skip_header_rows=0, skip_footer_rows=0, chunk_size=500)` - Stream read file
|
154
|
+
|
155
|
+
### PathValidator
|
156
|
+
|
157
|
+
Security-focused path validation utilities.
|
158
|
+
|
159
|
+
#### Methods
|
160
|
+
|
161
|
+
- `validate_path(file_path, must_exist=False, must_be_file=False, must_be_readable=False, allow_relative=False, base_directory=None)` - Validate file path
|
162
|
+
- `is_safe_path(file_path)` - Check if path is safe
|
163
|
+
- `sanitize_filename(filename, default_name='file')` - Sanitize filename
|
164
|
+
|
165
|
+
### ResourceManager
|
166
|
+
|
167
|
+
Context managers for safe resource handling.
|
168
|
+
|
169
|
+
#### Classes
|
170
|
+
|
171
|
+
- `FileResourceManager` - Context manager for file operations
|
172
|
+
- `StreamResourceManager` - Context manager for stream operations
|
173
|
+
|
174
|
+
#### Functions
|
175
|
+
|
176
|
+
- `safe_file_operation(file_path, mode='r', encoding='utf-8', ...)` - Safe file operation context manager
|
177
|
+
- `safe_stream_operation(stream, auto_close=True)` - Safe stream operation context manager
|
178
|
+
|
179
|
+
## Error Handling
|
180
|
+
|
181
|
+
The library provides comprehensive error handling with custom exception classes:
|
182
|
+
|
183
|
+
- `SplurgeParameterError` - Invalid parameter values
|
184
|
+
- `SplurgeFileNotFoundError` - File not found
|
185
|
+
- `SplurgeFilePermissionError` - File permission issues
|
186
|
+
- `SplurgeFileEncodingError` - File encoding problems
|
187
|
+
- `SplurgePathValidationError` - Path validation failures
|
188
|
+
- `SplurgeResourceAcquisitionError` - Resource acquisition failures
|
189
|
+
- `SplurgeResourceReleaseError` - Resource cleanup failures
|
190
|
+
|
191
|
+
## Development
|
192
|
+
|
193
|
+
### Running Tests
|
194
|
+
|
195
|
+
```bash
|
196
|
+
# Run all tests
|
197
|
+
pytest tests/ -v
|
198
|
+
|
199
|
+
# Run with coverage
|
200
|
+
pytest tests/ --cov=splurge_dsv --cov-report=html
|
201
|
+
|
202
|
+
# Run specific test file
|
203
|
+
pytest tests/test_dsv_helper.py -v
|
204
|
+
```
|
205
|
+
|
206
|
+
### Code Quality
|
207
|
+
|
208
|
+
The project follows strict coding standards:
|
209
|
+
- PEP 8 compliance
|
210
|
+
- Type annotations for all functions
|
211
|
+
- Google-style docstrings
|
212
|
+
- 90%+ test coverage requirement
|
213
|
+
- Comprehensive error handling
|
214
|
+
|
215
|
+
## Changelog
|
216
|
+
|
217
|
+
### 2025.1.0 (2025-08-25)
|
218
|
+
|
219
|
+
#### ๐ Major Features
|
220
|
+
- **Complete DSV Parser**: Full-featured delimited-separated value parser with support for CSV, TSV, and custom delimiters
|
221
|
+
- **Streaming Support**: Memory-efficient streaming for large files with configurable chunk sizes
|
222
|
+
- **Advanced Parsing Options**: Bookend removal, whitespace handling, and encoding support
|
223
|
+
- **Header/Footer Skipping**: Skip specified numbers of rows from start or end of files
|
224
|
+
|
225
|
+
#### ๐ก๏ธ Security Enhancements
|
226
|
+
- **Path Validation System**: Comprehensive file path security validation with traversal attack prevention
|
227
|
+
- **File Permission Checks**: Automatic file accessibility and permission validation
|
228
|
+
- **Encoding Validation**: Robust encoding error detection and handling
|
229
|
+
|
230
|
+
#### ๐ง Core Components
|
231
|
+
- **DsvHelper**: Main DSV parsing class with parse, parses, parse_file, and parse_stream methods
|
232
|
+
- **TextFileHelper**: Utility class for text file operations (line counting, preview, reading, streaming)
|
233
|
+
- **PathValidator**: Security-focused path validation utilities
|
234
|
+
- **ResourceManager**: Context managers for safe resource handling
|
235
|
+
- **StringTokenizer**: Core string parsing functionality
|
236
|
+
|
237
|
+
#### ๐งช Testing & Quality
|
238
|
+
- **Comprehensive Test Suite**: 250+ tests with 90%+ code coverage
|
239
|
+
- **Cross-Platform Testing**: Tested on Windows, Linux, and macOS
|
240
|
+
- **Type Safety**: Full type annotations throughout the codebase
|
241
|
+
- **Error Handling**: Custom exception hierarchy with detailed error messages
|
242
|
+
|
243
|
+
#### ๐ Documentation
|
244
|
+
- **Complete API Documentation**: Google-style docstrings for all public methods
|
245
|
+
- **Usage Examples**: Comprehensive examples for all major features
|
246
|
+
- **Error Documentation**: Detailed error handling documentation
|
247
|
+
|
248
|
+
#### ๐ Performance
|
249
|
+
- **Memory Efficiency**: Streaming support for large files
|
250
|
+
- **Optimized Parsing**: Efficient string tokenization and processing
|
251
|
+
- **Resource Management**: Automatic cleanup and resource management
|
252
|
+
|
253
|
+
## License
|
254
|
+
|
255
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
256
|
+
|
257
|
+
## Contributing
|
258
|
+
|
259
|
+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
260
|
+
|
261
|
+
## Support
|
262
|
+
|
263
|
+
For support, please open an issue on the GitHub repository or contact the maintainers.
|
@@ -0,0 +1,84 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
3
|
+
build-backend = "setuptools.build_meta"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "splurge-dsv"
|
7
|
+
version = "2025.1.0"
|
8
|
+
description = "A utility library for working with DSV (Delimited String Values) files"
|
9
|
+
readme = "README.md"
|
10
|
+
requires-python = ">=3.10"
|
11
|
+
license = "MIT"
|
12
|
+
authors = [
|
13
|
+
{name = "Jim Schilling"}
|
14
|
+
]
|
15
|
+
keywords = ["dsv", "csv", "tsv", "delimited", "parsing", "file-processing"]
|
16
|
+
classifiers = [
|
17
|
+
"Development Status :: 3 - Alpha",
|
18
|
+
"Intended Audience :: Developers",
|
19
|
+
"Programming Language :: Python :: 3",
|
20
|
+
"Programming Language :: Python :: 3.10",
|
21
|
+
"Programming Language :: Python :: 3.11",
|
22
|
+
"Programming Language :: Python :: 3.12",
|
23
|
+
"Programming Language :: Python :: 3.13",
|
24
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
25
|
+
"Topic :: Text Processing :: Filters",
|
26
|
+
]
|
27
|
+
|
28
|
+
dependencies = []
|
29
|
+
|
30
|
+
[project.optional-dependencies]
|
31
|
+
dev = [
|
32
|
+
"pytest>=7.0.0",
|
33
|
+
"pytest-cov>=4.0.0",
|
34
|
+
"pytest-xdist>=3.0.0",
|
35
|
+
]
|
36
|
+
|
37
|
+
[project.urls]
|
38
|
+
Homepage = "https://github.com/jim-schilling/splurge-dsv"
|
39
|
+
Repository = "https://github.com/jim-schilling/splurge-dsv"
|
40
|
+
Documentation = "https://github.com/jim-schilling/splurge-dsv#readme"
|
41
|
+
"Bug Tracker" = "https://github.com/jim-schilling/splurge-dsv/issues"
|
42
|
+
|
43
|
+
[tool.setuptools.packages.find]
|
44
|
+
where = ["."]
|
45
|
+
include = ["splurge_dsv*"]
|
46
|
+
|
47
|
+
[tool.pytest.ini_options]
|
48
|
+
minversion = "7.0"
|
49
|
+
addopts = "-x -v --cov=splurge_dsv --cov-report=term-missing --cov-report=html --cov-fail-under=85"
|
50
|
+
testpaths = ["tests"]
|
51
|
+
python_files = ["test_*.py"]
|
52
|
+
python_classes = ["Test*"]
|
53
|
+
python_functions = ["test_*"]
|
54
|
+
filterwarnings = [
|
55
|
+
"ignore::DeprecationWarning",
|
56
|
+
"ignore::PendingDeprecationWarning",
|
57
|
+
]
|
58
|
+
|
59
|
+
[tool.coverage.run]
|
60
|
+
source = ["splurge_dsv"]
|
61
|
+
omit = [
|
62
|
+
"*/tests/*",
|
63
|
+
"*/test_*",
|
64
|
+
"*/__pycache__/*",
|
65
|
+
"*/venv/*",
|
66
|
+
"*/env/*",
|
67
|
+
]
|
68
|
+
|
69
|
+
[tool.coverage.report]
|
70
|
+
exclude_lines = [
|
71
|
+
"pragma: no cover",
|
72
|
+
"def __repr__",
|
73
|
+
"if self.debug:",
|
74
|
+
"if settings.DEBUG",
|
75
|
+
"raise AssertionError",
|
76
|
+
"raise NotImplementedError",
|
77
|
+
"if 0:",
|
78
|
+
"if __name__ == .__main__.:",
|
79
|
+
"class .*\\bProtocol\\):",
|
80
|
+
"@(abc\\.)?abstractmethod",
|
81
|
+
]
|
82
|
+
|
83
|
+
[tool.coverage.html]
|
84
|
+
directory = "htmlcov"
|
File without changes
|
File without changes
|