splurge-dsv 2025.1.4__py3-none-any.whl → 2025.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splurge_dsv/__init__.py +70 -58
- splurge_dsv/__main__.py +10 -6
- splurge_dsv/cli.py +56 -46
- splurge_dsv/dsv.py +256 -0
- splurge_dsv/dsv_helper.py +63 -52
- splurge_dsv/exceptions.py +92 -75
- splurge_dsv/path_validator.py +49 -36
- splurge_dsv/safe_text_file_reader.py +177 -0
- splurge_dsv/safe_text_file_writer.py +136 -0
- splurge_dsv/string_tokenizer.py +34 -26
- splurge_dsv/text_file_helper.py +96 -177
- splurge_dsv-2025.2.0.dist-info/METADATA +217 -0
- splurge_dsv-2025.2.0.dist-info/RECORD +17 -0
- splurge_dsv-2025.2.0.dist-info/entry_points.txt +2 -0
- splurge_dsv/resource_manager.py +0 -371
- splurge_dsv-2025.1.4.dist-info/METADATA +0 -263
- splurge_dsv-2025.1.4.dist-info/RECORD +0 -14
- {splurge_dsv-2025.1.4.dist-info → splurge_dsv-2025.2.0.dist-info}/WHEEL +0 -0
- {splurge_dsv-2025.1.4.dist-info → splurge_dsv-2025.2.0.dist-info}/licenses/LICENSE +0 -0
- {splurge_dsv-2025.1.4.dist-info → splurge_dsv-2025.2.0.dist-info}/top_level.txt +0 -0
splurge_dsv/text_file_helper.py
CHANGED
@@ -23,21 +23,24 @@ This module is licensed under the MIT License.
|
|
23
23
|
"""
|
24
24
|
|
25
25
|
# Standard library imports
|
26
|
-
from collections import deque
|
27
26
|
from collections.abc import Iterator
|
28
27
|
from os import PathLike
|
29
28
|
from pathlib import Path
|
30
29
|
|
31
30
|
# Local imports
|
32
|
-
from splurge_dsv.exceptions import
|
31
|
+
from splurge_dsv.exceptions import SplurgeDsvParameterError
|
33
32
|
from splurge_dsv.path_validator import PathValidator
|
34
|
-
from splurge_dsv.
|
33
|
+
from splurge_dsv.safe_text_file_reader import SafeTextFileReader
|
35
34
|
|
36
35
|
|
37
36
|
class TextFileHelper:
|
38
|
-
"""
|
39
|
-
|
40
|
-
All methods are
|
37
|
+
"""Utility helpers for working with text files.
|
38
|
+
|
39
|
+
All methods are provided as classmethods and are designed to be memory
|
40
|
+
efficient. This module enforces a deterministic newline policy: CRLF
|
41
|
+
("\r\n"), CR ("\r"), and LF ("\n") are normalized to a single ``\n``
|
42
|
+
newline. Methods return logical, normalized lines which makes behavior
|
43
|
+
consistent across platforms and simplifies testing.
|
41
44
|
"""
|
42
45
|
|
43
46
|
DEFAULT_ENCODING = "utf-8"
|
@@ -51,32 +54,35 @@ class TextFileHelper:
|
|
51
54
|
|
52
55
|
@classmethod
|
53
56
|
def line_count(cls, file_path: PathLike[str] | str, *, encoding: str = DEFAULT_ENCODING) -> int:
|
54
|
-
"""
|
55
|
-
Count the number of lines in a text file.
|
57
|
+
"""Return the number of logical lines in ``file_path``.
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
+
The file is iterated efficiently without reading the entire contents
|
60
|
+
into memory. Newlines are normalized according to the package newline
|
61
|
+
policy before counting.
|
59
62
|
|
60
63
|
Args:
|
61
|
-
file_path: Path to the text file
|
62
|
-
encoding:
|
64
|
+
file_path: Path to the text file to inspect.
|
65
|
+
encoding: Text encoding to use when reading the file.
|
63
66
|
|
64
67
|
Returns:
|
65
|
-
|
68
|
+
The number of logical lines in the file.
|
66
69
|
|
67
70
|
Raises:
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
71
|
+
SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
|
72
|
+
SplurgeDsvFilePermissionError: If the file cannot be read due to
|
73
|
+
permissions.
|
74
|
+
SplurgeDsvFileEncodingError: If the file cannot be decoded using the
|
75
|
+
provided ``encoding``.
|
76
|
+
SplurgeDsvPathValidationError: If path validation fails.
|
72
77
|
"""
|
73
78
|
# Validate file path
|
74
79
|
validated_path = PathValidator.validate_path(
|
75
80
|
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
76
81
|
)
|
77
82
|
|
78
|
-
|
79
|
-
|
83
|
+
# Delegate to SafeTextFileReader which centralizes newline normalization
|
84
|
+
reader = SafeTextFileReader(validated_path, encoding=encoding)
|
85
|
+
return len(reader.read(strip=False))
|
80
86
|
|
81
87
|
@classmethod
|
82
88
|
def preview(
|
@@ -88,31 +94,33 @@ class TextFileHelper:
|
|
88
94
|
encoding: str = DEFAULT_ENCODING,
|
89
95
|
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
90
96
|
) -> list[str]:
|
91
|
-
"""
|
92
|
-
Preview the first N lines of a text file.
|
97
|
+
"""Return the first ``max_lines`` logical lines from ``file_path``.
|
93
98
|
|
94
|
-
|
95
|
-
|
99
|
+
The preview respects header skipping and optional whitespace
|
100
|
+
stripping. Lines returned are normalized according to the package
|
101
|
+
newline policy.
|
96
102
|
|
97
103
|
Args:
|
98
|
-
file_path: Path to the text file
|
99
|
-
max_lines: Maximum number of lines to
|
100
|
-
strip:
|
101
|
-
encoding: File encoding to use
|
102
|
-
skip_header_rows: Number of
|
104
|
+
file_path: Path to the text file.
|
105
|
+
max_lines: Maximum number of lines to return (must be >= 1).
|
106
|
+
strip: If True, strip leading/trailing whitespace from each line.
|
107
|
+
encoding: File encoding to use when reading the file.
|
108
|
+
skip_header_rows: Number of leading lines to ignore before previewing.
|
103
109
|
|
104
110
|
Returns:
|
105
|
-
list
|
111
|
+
A list of logical lines (strings), up to ``max_lines`` in length.
|
106
112
|
|
107
113
|
Raises:
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
114
|
+
SplurgeDsvParameterError: If ``max_lines`` is less than 1.
|
115
|
+
SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
|
116
|
+
SplurgeDsvFilePermissionError: If the file cannot be read due to
|
117
|
+
permissions.
|
118
|
+
SplurgeDsvFileEncodingError: If the file cannot be decoded using the
|
119
|
+
provided ``encoding``.
|
120
|
+
SplurgeDsvPathValidationError: If path validation fails.
|
113
121
|
"""
|
114
122
|
if max_lines < 1:
|
115
|
-
raise
|
123
|
+
raise SplurgeDsvParameterError(
|
116
124
|
"TextFileHelper.preview: max_lines is less than 1", details="max_lines must be at least 1"
|
117
125
|
)
|
118
126
|
|
@@ -122,22 +130,8 @@ class TextFileHelper:
|
|
122
130
|
)
|
123
131
|
|
124
132
|
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
125
|
-
|
126
|
-
|
127
|
-
with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
|
128
|
-
# Skip header rows
|
129
|
-
for _ in range(skip_header_rows):
|
130
|
-
if not stream.readline():
|
131
|
-
return lines
|
132
|
-
|
133
|
-
# Read up to max_lines after skipping headers
|
134
|
-
for _ in range(max_lines):
|
135
|
-
line = stream.readline()
|
136
|
-
if not line:
|
137
|
-
break
|
138
|
-
lines.append(line.strip() if strip else line.rstrip("\n"))
|
139
|
-
|
140
|
-
return lines
|
133
|
+
reader = SafeTextFileReader(validated_path, encoding=encoding)
|
134
|
+
return reader.preview(max_lines=max_lines, strip=strip, skip_header_rows=skip_header_rows)
|
141
135
|
|
142
136
|
@classmethod
|
143
137
|
def read_as_stream(
|
@@ -150,33 +144,38 @@ class TextFileHelper:
|
|
150
144
|
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
|
151
145
|
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
152
146
|
) -> Iterator[list[str]]:
|
153
|
-
"""
|
154
|
-
Read a text file as a stream of line chunks.
|
147
|
+
"""Yield the file contents as successive chunks of logical lines.
|
155
148
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
149
|
+
Each yielded value is a list of lines (strings), where each chunk
|
150
|
+
contains up to ``chunk_size`` lines. Footer skipping is implemented
|
151
|
+
using a sliding-window technique so the file is not fully loaded into
|
152
|
+
memory.
|
160
153
|
|
161
154
|
Args:
|
162
|
-
file_path: Path to the text file
|
163
|
-
strip:
|
164
|
-
encoding:
|
165
|
-
skip_header_rows: Number of
|
166
|
-
skip_footer_rows: Number of
|
167
|
-
|
155
|
+
file_path: Path to the text file to stream.
|
156
|
+
strip: If True, strip leading/trailing whitespace from each line.
|
157
|
+
encoding: Text encoding used to read the file.
|
158
|
+
skip_header_rows: Number of leading lines to skip before yielding.
|
159
|
+
skip_footer_rows: Number of trailing lines to skip (handled via
|
160
|
+
an internal buffer; does not require reading the whole file).
|
161
|
+
chunk_size: Target number of lines per yielded chunk.
|
168
162
|
|
169
163
|
Yields:
|
170
|
-
|
164
|
+
Lists of logical lines (each a list[str]) for each chunk.
|
171
165
|
|
172
166
|
Raises:
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
167
|
+
SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
|
168
|
+
SplurgeDsvFilePermissionError: If the file cannot be read due to
|
169
|
+
permissions.
|
170
|
+
SplurgeDsvFileEncodingError: If the file cannot be decoded using the
|
171
|
+
provided ``encoding``.
|
172
|
+
SplurgeDsvPathValidationError: If path validation fails.
|
177
173
|
"""
|
178
|
-
#
|
179
|
-
chunk_size
|
174
|
+
# Allow small chunk sizes for testing, but enforce minimum for performance
|
175
|
+
# Only enforce minimum if chunk_size is "moderately small" (to prevent accidental small chunks)
|
176
|
+
if chunk_size >= 10: # If someone sets a chunk size >= 10, enforce minimum for performance
|
177
|
+
chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
|
178
|
+
# For very small chunk sizes (like 1-9), allow them (useful for testing)
|
180
179
|
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
181
180
|
skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
|
182
181
|
|
@@ -185,61 +184,11 @@ class TextFileHelper:
|
|
185
184
|
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
186
185
|
)
|
187
186
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
# Use a sliding window to handle footer skipping efficiently
|
195
|
-
if skip_footer_rows > 0:
|
196
|
-
# Buffer to hold the last skip_footer_rows lines
|
197
|
-
buffer: deque[str] = deque(maxlen=skip_footer_rows + 1)
|
198
|
-
current_chunk: list[str] = []
|
199
|
-
|
200
|
-
for line in stream:
|
201
|
-
processed_line = line.strip() if strip else line.rstrip("\n")
|
202
|
-
|
203
|
-
# Add current line to buffer
|
204
|
-
buffer.append(processed_line)
|
205
|
-
|
206
|
-
# Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
|
207
|
-
# This ensures we have enough lines to reliably identify and skip the footer rows at the end.
|
208
|
-
if len(buffer) < skip_footer_rows + 1:
|
209
|
-
continue
|
210
|
-
|
211
|
-
# Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
|
212
|
-
# is guaranteed not to be part of the footer and can be safely processed and added to the current chunk.
|
213
|
-
safe_line = buffer.popleft()
|
214
|
-
current_chunk.append(safe_line)
|
215
|
-
|
216
|
-
# Yield chunk when it reaches the desired size
|
217
|
-
if len(current_chunk) >= chunk_size:
|
218
|
-
yield current_chunk
|
219
|
-
current_chunk = []
|
220
|
-
|
221
|
-
# At the end, the buffer contains exactly the footer rows to skip
|
222
|
-
# All other lines have already been processed and yielded
|
223
|
-
|
224
|
-
# Yield any remaining lines in the final chunk
|
225
|
-
if current_chunk:
|
226
|
-
yield current_chunk
|
227
|
-
else:
|
228
|
-
# No footer skipping needed - simple streaming
|
229
|
-
chunk: list[str] = []
|
230
|
-
|
231
|
-
for line in stream:
|
232
|
-
processed_line = line.strip() if strip else line.rstrip("\n")
|
233
|
-
chunk.append(processed_line)
|
234
|
-
|
235
|
-
# Yield chunk when it reaches the desired size
|
236
|
-
if len(chunk) >= chunk_size:
|
237
|
-
yield chunk
|
238
|
-
chunk = []
|
239
|
-
|
240
|
-
# Yield any remaining lines in the final chunk
|
241
|
-
if chunk:
|
242
|
-
yield chunk
|
187
|
+
# Use SafeTextFileReader to centralize newline normalization and streaming behavior.
|
188
|
+
reader = SafeTextFileReader(validated_path, encoding=encoding)
|
189
|
+
yield from reader.read_as_stream(
|
190
|
+
strip=strip, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows, chunk_size=chunk_size
|
191
|
+
)
|
243
192
|
|
244
193
|
@classmethod
|
245
194
|
def read(
|
@@ -251,27 +200,30 @@ class TextFileHelper:
|
|
251
200
|
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
252
201
|
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
|
253
202
|
) -> list[str]:
|
254
|
-
"""
|
255
|
-
Read the entire contents of a text file into a list of strings.
|
203
|
+
"""Read all logical lines from ``file_path`` into memory.
|
256
204
|
|
257
|
-
This method
|
258
|
-
|
205
|
+
This convenience method returns the entire file as a list of
|
206
|
+
normalized lines. Header and footer rows may be skipped with the
|
207
|
+
corresponding parameters.
|
259
208
|
|
260
209
|
Args:
|
261
|
-
file_path: Path to the text file
|
262
|
-
strip:
|
263
|
-
encoding:
|
264
|
-
skip_header_rows: Number of
|
265
|
-
skip_footer_rows: Number of
|
210
|
+
file_path: Path to the text file to read.
|
211
|
+
strip: If True, strip leading/trailing whitespace from each line.
|
212
|
+
encoding: Text encoding used to read the file.
|
213
|
+
skip_header_rows: Number of leading lines to ignore.
|
214
|
+
skip_footer_rows: Number of trailing lines to ignore.
|
266
215
|
|
267
216
|
Returns:
|
268
|
-
|
217
|
+
A list containing every logical line from the file except skipped
|
218
|
+
header/footer lines.
|
269
219
|
|
270
220
|
Raises:
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
221
|
+
SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
|
222
|
+
SplurgeDsvFilePermissionError: If the file cannot be read due to
|
223
|
+
permissions.
|
224
|
+
SplurgeDsvFileEncodingError: If the file cannot be decoded using the
|
225
|
+
provided ``encoding``.
|
226
|
+
SplurgeDsvPathValidationError: If path validation fails.
|
275
227
|
"""
|
276
228
|
# Validate file path
|
277
229
|
validated_path = PathValidator.validate_path(
|
@@ -281,41 +233,8 @@ class TextFileHelper:
|
|
281
233
|
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
282
234
|
skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
|
283
235
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
try:
|
290
|
-
if skip_footer_rows > 0:
|
291
|
-
# Buffer to hold the last skip_footer_rows + 1 lines
|
292
|
-
buffer = deque(maxlen=skip_footer_rows + 1)
|
293
|
-
result: list[str] = []
|
294
|
-
|
295
|
-
for line in stream:
|
296
|
-
processed_line = line.strip() if strip else line.rstrip("\n")
|
297
|
-
|
298
|
-
# Add current line to buffer
|
299
|
-
buffer.append(processed_line)
|
300
|
-
|
301
|
-
# Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
|
302
|
-
# This ensures we have enough lines to reliably identify and skip the footer rows at the end.
|
303
|
-
if len(buffer) < skip_footer_rows + 1:
|
304
|
-
continue
|
305
|
-
|
306
|
-
# Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
|
307
|
-
# is guaranteed not to be part of the footer and can be safely processed and added to the result.
|
308
|
-
safe_line = buffer.popleft()
|
309
|
-
result.append(safe_line)
|
310
|
-
|
311
|
-
# At the end, the buffer contains exactly the footer rows to skip
|
312
|
-
# All other lines have already been processed and added to result
|
313
|
-
return result
|
314
|
-
else:
|
315
|
-
result: list[str] = []
|
316
|
-
for line in stream:
|
317
|
-
processed_line = line.strip() if strip else line.rstrip("\n")
|
318
|
-
result.append(processed_line)
|
319
|
-
return result
|
320
|
-
except UnicodeDecodeError as e:
|
321
|
-
raise SplurgeFileEncodingError(f"Encoding error reading file: {validated_path}", details=str(e)) from e
|
236
|
+
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
237
|
+
skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
|
238
|
+
|
239
|
+
reader = SafeTextFileReader(validated_path, encoding=encoding)
|
240
|
+
return reader.read(strip=strip, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows)
|
@@ -0,0 +1,217 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: splurge-dsv
|
3
|
+
Version: 2025.2.0
|
4
|
+
Summary: A utility library for working with DSV (Delimited String Values) files
|
5
|
+
Author: Jim Schilling
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/jim-schilling/splurge-dsv
|
8
|
+
Project-URL: Repository, https://github.com/jim-schilling/splurge-dsv
|
9
|
+
Project-URL: Documentation, https://github.com/jim-schilling/splurge-dsv#readme
|
10
|
+
Project-URL: Bug Tracker, https://github.com/jim-schilling/splurge-dsv/issues
|
11
|
+
Keywords: dsv,csv,tsv,delimited,parsing,file-processing
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
|
+
Classifier: Topic :: Text Processing :: Filters
|
21
|
+
Requires-Python: >=3.10
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
License-File: LICENSE
|
24
|
+
Provides-Extra: dev
|
25
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
26
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
27
|
+
Requires-Dist: pytest-xdist>=3.0.0; extra == "dev"
|
28
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
29
|
+
Requires-Dist: ruff>=0.0.241; extra == "dev"
|
30
|
+
Requires-Dist: pytest-mock>=3.0.0; extra == "dev"
|
31
|
+
Requires-Dist: hypothesis>=6.0.0; extra == "dev"
|
32
|
+
Dynamic: license-file
|
33
|
+
|
34
|
+
# splurge-dsv
|
35
|
+
|
36
|
+
[](https://pypi.org/project/splurge-dsv/)
|
37
|
+
[](https://pypi.org/project/splurge-dsv/)
|
38
|
+
[](https://opensource.org/licenses/MIT)
|
39
|
+
|
40
|
+
[](https://github.com/jim-schilling/splurge-dsv/actions/workflows/ci-quick-test.yml)
|
41
|
+
[](https://github.com/jim-schilling/splurge-dsv)
|
42
|
+
[](https://github.com/astral-sh/ruff)
|
43
|
+
[](https://mypy-lang.org/)
|
44
|
+
|
45
|
+
A robust Python library for parsing and processing delimited-separated value (DSV) files with advanced features for data validation, streaming, and error handling.
|
46
|
+
|
47
|
+
## Features
|
48
|
+
|
49
|
+
- **Multi-format DSV Support**: Parse CSV, TSV, pipe-delimited, and custom delimiter files
|
50
|
+
- **Memory-Efficient Streaming**: Process large files without loading entire content into memory
|
51
|
+
- **Security & Validation**: Comprehensive path validation and file permission checks
|
52
|
+
- **Unicode Support**: Full Unicode character and encoding support
|
53
|
+
- **Type Safety**: Full type annotations with mypy validation
|
54
|
+
- **Comprehensive Testing**: 420 tests (409 passed, 11 skipped) with 94% code coverage including property-based testing, edge case testing, and cross-platform compatibility
|
55
|
+
|
56
|
+
**⚠️ BREAKING CHANGES in v2025.2.0**
|
57
|
+
>
|
58
|
+
> - **Exception Names Changed**: All exceptions now use `SplurgeDsv*` prefix (e.g., `SplurgeParameterError` → `SplurgeDsvParameterError`)
|
59
|
+
> - **Resource Manager Removed**: The `ResourceManager` module and all related classes have been completely removed
|
60
|
+
>
|
61
|
+
> See the [CHANGELOG](CHANGELOG.md) for migration guidance.
|
62
|
+
|
63
|
+
## Installation
|
64
|
+
|
65
|
+
```bash
|
66
|
+
pip install splurge-dsv
|
67
|
+
```
|
68
|
+
|
69
|
+
## Quick Start
|
70
|
+
|
71
|
+
### CLI Usage
|
72
|
+
|
73
|
+
```bash
|
74
|
+
# Parse a CSV file
|
75
|
+
python -m splurge_dsv data.csv --delimiter ,
|
76
|
+
|
77
|
+
# Stream a large file
|
78
|
+
python -m splurge_dsv large_file.csv --delimiter , --stream --chunk-size 1000
|
79
|
+
```
|
80
|
+
|
81
|
+
### API Usage
|
82
|
+
|
83
|
+
```python
|
84
|
+
from splurge_dsv import DsvHelper
|
85
|
+
|
86
|
+
# Parse a CSV string
|
87
|
+
data = DsvHelper.parse("a,b,c", delimiter=",")
|
88
|
+
print(data) # ['a', 'b', 'c']
|
89
|
+
|
90
|
+
# Parse a CSV file
|
91
|
+
rows = DsvHelper.parse_file("data.csv", delimiter=",")
|
92
|
+
```
|
93
|
+
|
94
|
+
### Modern API
|
95
|
+
|
96
|
+
```python
|
97
|
+
from splurge_dsv import Dsv, DsvConfig
|
98
|
+
|
99
|
+
# Create configuration and parser
|
100
|
+
config = DsvConfig.csv(skip_header=1)
|
101
|
+
dsv = Dsv(config)
|
102
|
+
|
103
|
+
# Parse files
|
104
|
+
rows = dsv.parse_file("data.csv")
|
105
|
+
```
|
106
|
+
|
107
|
+
## Documentation
|
108
|
+
|
109
|
+
- **[Detailed Documentation](docs/README-details.md)**: Complete API reference, CLI options, and examples
|
110
|
+
- **[Testing Best Practices](docs/testing_best_practices.md)**: Comprehensive testing guidelines and patterns
|
111
|
+
- **[Hypothesis Usage Patterns](docs/hypothesis_usage_patterns.md)**: Property-based testing guide
|
112
|
+
- **[Changelog](CHANGELOG.md)**: Release notes and migration guides
|
113
|
+
|
114
|
+
## License
|
115
|
+
|
116
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
117
|
+
----------------------------
|
118
|
+
|
119
|
+
This library enforces deterministic newline handling for text files. The reader
|
120
|
+
normalizes CRLF (`\r\n`), CR (`\r`) and LF (`\n`) to LF internally and
|
121
|
+
returns logical lines. The writer utilities normalize any input newlines to LF
|
122
|
+
before writing. This avoids platform-dependent differences when reading files
|
123
|
+
produced by diverse sources.
|
124
|
+
|
125
|
+
Recommended usage:
|
126
|
+
|
127
|
+
- When creating files inside the project, prefer the `open_text_writer` context
|
128
|
+
manager or `SafeTextFileWriter` which will normalize to LF.
|
129
|
+
- When reading unknown files, the `open_text` / `SafeTextFileReader` will
|
130
|
+
provide deterministic normalization regardless of the source.
|
131
|
+
- `SplurgeResourceAcquisitionError` - Resource acquisition failures
|
132
|
+
- `SplurgeResourceReleaseError` - Resource cleanup failures
|
133
|
+
|
134
|
+
## Development
|
135
|
+
|
136
|
+
### Testing Suite
|
137
|
+
|
138
|
+
splurge-dsv features a comprehensive testing suite designed for robustness and reliability:
|
139
|
+
|
140
|
+
#### Test Categories
|
141
|
+
- **Unit Tests**: Core functionality testing (300+ tests)
|
142
|
+
- **Integration Tests**: End-to-end workflow validation (50+ tests)
|
143
|
+
- **Property-Based Tests**: Hypothesis-driven testing for edge cases (50+ tests)
|
144
|
+
- **Edge Case Tests**: Malformed input, encoding issues, filesystem anomalies
|
145
|
+
- **Cross-Platform Tests**: Path handling, line endings, encoding consistency
|
146
|
+
|
147
|
+
#### Running Tests
|
148
|
+
|
149
|
+
```bash
|
150
|
+
# Run all tests
|
151
|
+
pytest tests/ -v
|
152
|
+
|
153
|
+
# Run with coverage report
|
154
|
+
pytest tests/ --cov=splurge_dsv --cov-report=html
|
155
|
+
|
156
|
+
# Run specific test categories
|
157
|
+
pytest tests/unit/ -v # Unit tests only
|
158
|
+
pytest tests/integration/ -v # Integration tests only
|
159
|
+
pytest tests/property/ -v # Property-based tests only
|
160
|
+
pytest tests/platform/ -v # Cross-platform tests only
|
161
|
+
|
162
|
+
# Run with parallel execution
|
163
|
+
pytest tests/ -n 4 --cov=splurge_dsv
|
164
|
+
|
165
|
+
# Run performance benchmarks
|
166
|
+
pytest tests/ --durations=10
|
167
|
+
```
|
168
|
+
|
169
|
+
#### Test Quality Standards
|
170
|
+
- **94%+ Code Coverage**: All public APIs and critical paths covered
|
171
|
+
- **Property-Based Testing**: Hypothesis framework validates complex scenarios
|
172
|
+
- **Cross-Platform Compatibility**: Tests run on Windows, Linux, and macOS
|
173
|
+
- **Performance Regression Detection**: Automated benchmarks prevent slowdowns
|
174
|
+
- **Zero False Positives**: All property tests pass without spurious failures
|
175
|
+
|
176
|
+
#### Testing Best Practices
|
177
|
+
- Tests use `pytest-mock` for modern mocking patterns
|
178
|
+
- Property tests use Hypothesis strategies for comprehensive input generation
|
179
|
+
- Edge case tests validate error handling and boundary conditions
|
180
|
+
- Cross-platform tests ensure consistent behavior across operating systems
|
181
|
+
|
182
|
+
### Code Quality
|
183
|
+
|
184
|
+
The project follows strict coding standards:
|
185
|
+
- PEP 8 compliance
|
186
|
+
- Type annotations for all functions
|
187
|
+
- Google-style docstrings
|
188
|
+
- 85%+ coverage gate enforced via CI
|
189
|
+
- Comprehensive error handling
|
190
|
+
|
191
|
+
## Changelog
|
192
|
+
|
193
|
+
See the [CHANGELOG](CHANGELOG.md) for full release notes.
|
194
|
+
|
195
|
+
## License
|
196
|
+
|
197
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
198
|
+
|
199
|
+
## More Documentation
|
200
|
+
|
201
|
+
- Detailed docs: [docs/README-details.md](docs/README-details.md)
|
202
|
+
- E2E testing coverage: [docs/e2e_testing_coverage.md](docs/e2e_testing_coverage.md)
|
203
|
+
|
204
|
+
## Contributing
|
205
|
+
|
206
|
+
Contributions are welcome! Please see our [Contributing Guide](CONTRIBUTING.md) for detailed information on:
|
207
|
+
|
208
|
+
- Development setup and workflow
|
209
|
+
- Coding standards and best practices
|
210
|
+
- Testing requirements and guidelines
|
211
|
+
- Pull request process and review criteria
|
212
|
+
|
213
|
+
For major changes, please open an issue first to discuss what you would like to change.
|
214
|
+
|
215
|
+
## Support
|
216
|
+
|
217
|
+
For support, please open an issue on the GitHub repository or contact the maintainers.
|
@@ -0,0 +1,17 @@
|
|
1
|
+
splurge_dsv/__init__.py,sha256=cLZbXey-8aogRkpE2easqfaaJdkHCqv3M_KZlrDo3WU,3337
|
2
|
+
splurge_dsv/__main__.py,sha256=6dpfX_96hEpOqxv5X4bK73xX86YTgK0Adad1uTWSABM,426
|
3
|
+
splurge_dsv/cli.py,sha256=xdPL7dSdWGDUsG2-FbsIgRnbYuQRN-grLlx8iJ-HxZc,7600
|
4
|
+
splurge_dsv/dsv.py,sha256=6P0Ujma_8xED-HsTywLaTQYD_Wxo7J8xU25RdE-5I38,8859
|
5
|
+
splurge_dsv/dsv_helper.py,sha256=lAEquK2WDOB3yS1ynxmOcg_pn9Q3njHJgFs_wyCOi_k,10293
|
6
|
+
splurge_dsv/exceptions.py,sha256=hefUTjk3ULca5TdXoKe5L-cME7SU1RFcWVHxNpZ_w-Y,5274
|
7
|
+
splurge_dsv/path_validator.py,sha256=r08PkuMdL0eBY_iao00_irBMdT6ORJ2-cNK5AUssEKs,10681
|
8
|
+
splurge_dsv/safe_text_file_reader.py,sha256=9GCOGCTDDP5FJD0u2wZ107SQNEIj9Rm1zN6shYiKq7g,6659
|
9
|
+
splurge_dsv/safe_text_file_writer.py,sha256=zQIsDZ6jRN_ZWwLX4dpUZI35iudxzuv1Gjv7K1vSFJk,4562
|
10
|
+
splurge_dsv/string_tokenizer.py,sha256=jFgkqeGx5PnmKAvu7sn3xxHcQklZTZUy8x_eo5e6TWI,4497
|
11
|
+
splurge_dsv/text_file_helper.py,sha256=2SxbYtZtpMtHQ-5g1aQzgvQobBrlQH4EsrhBY5t3Xx4,10362
|
12
|
+
splurge_dsv-2025.2.0.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
|
13
|
+
splurge_dsv-2025.2.0.dist-info/METADATA,sha256=n8wCT3oOj_-60a4Et6AvbQ708tdM-pi3DL0lSlBsYuo,8518
|
14
|
+
splurge_dsv-2025.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
+
splurge_dsv-2025.2.0.dist-info/entry_points.txt,sha256=QmGyc3qHYtY61uanRxNOXw-waSJ01qypSCI8Kb3zgsU,56
|
16
|
+
splurge_dsv-2025.2.0.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
|
17
|
+
splurge_dsv-2025.2.0.dist-info/RECORD,,
|