splurge-dsv 2025.1.1__py3-none-any.whl → 2025.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splurge_dsv/__init__.py +84 -0
- splurge_dsv/__main__.py +15 -0
- splurge_dsv/cli.py +158 -0
- splurge_dsv/dsv_helper.py +29 -46
- splurge_dsv/exceptions.py +22 -9
- splurge_dsv/path_validator.py +102 -79
- splurge_dsv/resource_manager.py +77 -138
- splurge_dsv/string_tokenizer.py +5 -24
- splurge_dsv/text_file_helper.py +42 -64
- {splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.2.dist-info}/METADATA +27 -2
- splurge_dsv-2025.1.2.dist-info/RECORD +14 -0
- splurge_dsv-2025.1.1.dist-info/RECORD +0 -13
- {splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.2.dist-info}/WHEEL +0 -0
- {splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.2.dist-info}/licenses/LICENSE +0 -0
- {splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.2.dist-info}/top_level.txt +0 -0
splurge_dsv/text_file_helper.py
CHANGED
@@ -22,15 +22,14 @@ Please preserve this header and all related material when sharing!
|
|
22
22
|
This module is licensed under the MIT License.
|
23
23
|
"""
|
24
24
|
|
25
|
+
# Standard library imports
|
25
26
|
from collections import deque
|
27
|
+
from collections.abc import Iterator
|
26
28
|
from os import PathLike
|
27
29
|
from pathlib import Path
|
28
|
-
from typing import Iterator
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
SplurgeFileEncodingError
|
33
|
-
)
|
31
|
+
# Local imports
|
32
|
+
from splurge_dsv.exceptions import SplurgeFileEncodingError, SplurgeParameterError
|
34
33
|
from splurge_dsv.path_validator import PathValidator
|
35
34
|
from splurge_dsv.resource_manager import safe_file_operation
|
36
35
|
|
@@ -51,12 +50,7 @@ class TextFileHelper:
|
|
51
50
|
DEFAULT_MODE = "r"
|
52
51
|
|
53
52
|
@classmethod
|
54
|
-
def line_count(
|
55
|
-
cls,
|
56
|
-
file_path: PathLike[str] | str,
|
57
|
-
*,
|
58
|
-
encoding: str = DEFAULT_ENCODING
|
59
|
-
) -> int:
|
53
|
+
def line_count(cls, file_path: PathLike[str] | str, *, encoding: str = DEFAULT_ENCODING) -> int:
|
60
54
|
"""
|
61
55
|
Count the number of lines in a text file.
|
62
56
|
|
@@ -78,12 +72,9 @@ class TextFileHelper:
|
|
78
72
|
"""
|
79
73
|
# Validate file path
|
80
74
|
validated_path = PathValidator.validate_path(
|
81
|
-
Path(file_path),
|
82
|
-
must_exist=True,
|
83
|
-
must_be_file=True,
|
84
|
-
must_be_readable=True
|
75
|
+
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
85
76
|
)
|
86
|
-
|
77
|
+
|
87
78
|
with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
|
88
79
|
return sum(1 for _ in stream)
|
89
80
|
|
@@ -95,7 +86,7 @@ class TextFileHelper:
|
|
95
86
|
max_lines: int = DEFAULT_MAX_LINES,
|
96
87
|
strip: bool = DEFAULT_STRIP,
|
97
88
|
encoding: str = DEFAULT_ENCODING,
|
98
|
-
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS
|
89
|
+
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
99
90
|
) -> list[str]:
|
100
91
|
"""
|
101
92
|
Preview the first N lines of a text file.
|
@@ -122,34 +113,30 @@ class TextFileHelper:
|
|
122
113
|
"""
|
123
114
|
if max_lines < 1:
|
124
115
|
raise SplurgeParameterError(
|
125
|
-
"TextFileHelper.preview: max_lines is less than 1",
|
126
|
-
details="max_lines must be at least 1"
|
116
|
+
"TextFileHelper.preview: max_lines is less than 1", details="max_lines must be at least 1"
|
127
117
|
)
|
128
|
-
|
118
|
+
|
129
119
|
# Validate file path
|
130
120
|
validated_path = PathValidator.validate_path(
|
131
|
-
Path(file_path),
|
132
|
-
must_exist=True,
|
133
|
-
must_be_file=True,
|
134
|
-
must_be_readable=True
|
121
|
+
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
135
122
|
)
|
136
|
-
|
123
|
+
|
137
124
|
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
138
125
|
lines: list[str] = []
|
139
|
-
|
126
|
+
|
140
127
|
with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
|
141
128
|
# Skip header rows
|
142
129
|
for _ in range(skip_header_rows):
|
143
130
|
if not stream.readline():
|
144
131
|
return lines
|
145
|
-
|
132
|
+
|
146
133
|
# Read up to max_lines after skipping headers
|
147
134
|
for _ in range(max_lines):
|
148
135
|
line = stream.readline()
|
149
136
|
if not line:
|
150
137
|
break
|
151
138
|
lines.append(line.strip() if strip else line.rstrip("\n"))
|
152
|
-
|
139
|
+
|
153
140
|
return lines
|
154
141
|
|
155
142
|
@classmethod
|
@@ -161,7 +148,7 @@ class TextFileHelper:
|
|
161
148
|
encoding: str = DEFAULT_ENCODING,
|
162
149
|
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
163
150
|
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
|
164
|
-
chunk_size: int = DEFAULT_CHUNK_SIZE
|
151
|
+
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
165
152
|
) -> Iterator[list[str]]:
|
166
153
|
"""
|
167
154
|
Read a text file as a stream of line chunks.
|
@@ -192,67 +179,64 @@ class TextFileHelper:
|
|
192
179
|
chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
|
193
180
|
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
194
181
|
skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
|
195
|
-
|
182
|
+
|
196
183
|
# Validate file path
|
197
184
|
validated_path = PathValidator.validate_path(
|
198
|
-
Path(file_path),
|
199
|
-
|
200
|
-
|
201
|
-
must_be_readable=True
|
202
|
-
)
|
203
|
-
|
185
|
+
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
186
|
+
)
|
187
|
+
|
204
188
|
with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
|
205
189
|
# Skip header rows
|
206
190
|
for _ in range(skip_header_rows):
|
207
191
|
if not stream.readline():
|
208
192
|
return
|
209
|
-
|
193
|
+
|
210
194
|
# Use a sliding window to handle footer skipping efficiently
|
211
195
|
if skip_footer_rows > 0:
|
212
196
|
# Buffer to hold the last skip_footer_rows lines
|
213
197
|
buffer: deque[str] = deque(maxlen=skip_footer_rows + 1)
|
214
198
|
current_chunk: list[str] = []
|
215
|
-
|
199
|
+
|
216
200
|
for line in stream:
|
217
201
|
processed_line = line.strip() if strip else line.rstrip("\n")
|
218
|
-
|
202
|
+
|
219
203
|
# Add current line to buffer
|
220
204
|
buffer.append(processed_line)
|
221
|
-
|
205
|
+
|
222
206
|
# Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
|
223
207
|
# This ensures we have enough lines to reliably identify and skip the footer rows at the end.
|
224
208
|
if len(buffer) < skip_footer_rows + 1:
|
225
209
|
continue
|
226
|
-
|
210
|
+
|
227
211
|
# Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
|
228
212
|
# is guaranteed not to be part of the footer and can be safely processed and added to the current chunk.
|
229
213
|
safe_line = buffer.popleft()
|
230
214
|
current_chunk.append(safe_line)
|
231
|
-
|
215
|
+
|
232
216
|
# Yield chunk when it reaches the desired size
|
233
217
|
if len(current_chunk) >= chunk_size:
|
234
218
|
yield current_chunk
|
235
219
|
current_chunk = []
|
236
|
-
|
220
|
+
|
237
221
|
# At the end, the buffer contains exactly the footer rows to skip
|
238
222
|
# All other lines have already been processed and yielded
|
239
|
-
|
223
|
+
|
240
224
|
# Yield any remaining lines in the final chunk
|
241
225
|
if current_chunk:
|
242
226
|
yield current_chunk
|
243
227
|
else:
|
244
228
|
# No footer skipping needed - simple streaming
|
245
229
|
chunk: list[str] = []
|
246
|
-
|
230
|
+
|
247
231
|
for line in stream:
|
248
232
|
processed_line = line.strip() if strip else line.rstrip("\n")
|
249
233
|
chunk.append(processed_line)
|
250
|
-
|
234
|
+
|
251
235
|
# Yield chunk when it reaches the desired size
|
252
236
|
if len(chunk) >= chunk_size:
|
253
237
|
yield chunk
|
254
238
|
chunk = []
|
255
|
-
|
239
|
+
|
256
240
|
# Yield any remaining lines in the final chunk
|
257
241
|
if chunk:
|
258
242
|
yield chunk
|
@@ -265,7 +249,7 @@ class TextFileHelper:
|
|
265
249
|
strip: bool = DEFAULT_STRIP,
|
266
250
|
encoding: str = DEFAULT_ENCODING,
|
267
251
|
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
268
|
-
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS
|
252
|
+
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
|
269
253
|
) -> list[str]:
|
270
254
|
"""
|
271
255
|
Read the entire contents of a text file into a list of strings.
|
@@ -291,42 +275,39 @@ class TextFileHelper:
|
|
291
275
|
"""
|
292
276
|
# Validate file path
|
293
277
|
validated_path = PathValidator.validate_path(
|
294
|
-
Path(file_path),
|
295
|
-
must_exist=True,
|
296
|
-
must_be_file=True,
|
297
|
-
must_be_readable=True
|
278
|
+
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
298
279
|
)
|
299
|
-
|
280
|
+
|
300
281
|
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
301
282
|
skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
|
302
|
-
|
283
|
+
|
303
284
|
with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
|
304
285
|
for _ in range(skip_header_rows):
|
305
286
|
if not stream.readline():
|
306
287
|
return []
|
307
|
-
|
288
|
+
|
308
289
|
try:
|
309
290
|
if skip_footer_rows > 0:
|
310
291
|
# Buffer to hold the last skip_footer_rows + 1 lines
|
311
292
|
buffer = deque(maxlen=skip_footer_rows + 1)
|
312
293
|
result: list[str] = []
|
313
|
-
|
294
|
+
|
314
295
|
for line in stream:
|
315
296
|
processed_line = line.strip() if strip else line.rstrip("\n")
|
316
|
-
|
297
|
+
|
317
298
|
# Add current line to buffer
|
318
299
|
buffer.append(processed_line)
|
319
|
-
|
300
|
+
|
320
301
|
# Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
|
321
302
|
# This ensures we have enough lines to reliably identify and skip the footer rows at the end.
|
322
303
|
if len(buffer) < skip_footer_rows + 1:
|
323
304
|
continue
|
324
|
-
|
305
|
+
|
325
306
|
# Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
|
326
307
|
# is guaranteed not to be part of the footer and can be safely processed and added to the result.
|
327
308
|
safe_line = buffer.popleft()
|
328
309
|
result.append(safe_line)
|
329
|
-
|
310
|
+
|
330
311
|
# At the end, the buffer contains exactly the footer rows to skip
|
331
312
|
# All other lines have already been processed and added to result
|
332
313
|
return result
|
@@ -337,7 +318,4 @@ class TextFileHelper:
|
|
337
318
|
result.append(processed_line)
|
338
319
|
return result
|
339
320
|
except UnicodeDecodeError as e:
|
340
|
-
raise SplurgeFileEncodingError(
|
341
|
-
f"Encoding error reading file: {validated_path}",
|
342
|
-
details=str(e)
|
343
|
-
)
|
321
|
+
raise SplurgeFileEncodingError(f"Encoding error reading file: {validated_path}", details=str(e)) from e
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: splurge-dsv
|
3
|
-
Version: 2025.1.
|
3
|
+
Version: 2025.1.2
|
4
4
|
Summary: A utility library for working with DSV (Delimited String Values) files
|
5
5
|
Author: Jim Schilling
|
6
6
|
License-Expression: MIT
|
@@ -243,7 +243,32 @@ The project follows strict coding standards:
|
|
243
243
|
|
244
244
|
## Changelog
|
245
245
|
|
246
|
-
### 2025.1.
|
246
|
+
### 2025.1.2 (2025-09-02)
|
247
|
+
|
248
|
+
#### 🧪 Comprehensive End-to-End Testing
|
249
|
+
- **Complete E2E Test Suite**: Implemented 25 comprehensive end-to-end workflow tests covering all major CLI functionality
|
250
|
+
- **Real CLI Execution**: Tests run actual `splurge-dsv` commands with real files, not just mocked components
|
251
|
+
- **Workflow Coverage**: Tests cover CSV/TSV parsing, file operations, data processing, error handling, and performance scenarios
|
252
|
+
- **Cross-Platform Compatibility**: Handles Windows-specific encoding issues and platform differences gracefully
|
253
|
+
- **Performance Testing**: Large file processing tests (1,000+ and 10,000+ rows) with streaming and chunking validation
|
254
|
+
|
255
|
+
#### 📊 Test Coverage Improvements
|
256
|
+
- **CLI Coverage**: Increased from 64% to **95%** with comprehensive CLI workflow testing
|
257
|
+
- **DSV Helper Coverage**: Improved from 75% to **93%** with real-world usage scenarios
|
258
|
+
- **Overall Coverage**: Improved from 60% to **73%** across the entire codebase
|
259
|
+
- **Integration Testing**: Added real file system operations and complete pipeline validation
|
260
|
+
|
261
|
+
#### 🔄 Test Categories
|
262
|
+
- **CLI Workflows**: 19 tests covering basic parsing, custom delimiters, header/footer skipping, streaming, and error scenarios
|
263
|
+
- **Error Handling**: 3 tests for invalid arguments, missing parameters, and CLI error conditions
|
264
|
+
- **Integration Scenarios**: 3 tests for data analysis, transformation, and multi-format workflows
|
265
|
+
|
266
|
+
#### 📚 Documentation & Examples
|
267
|
+
- **E2E Testing Guide**: Created comprehensive documentation (`docs/e2e_testing_coverage.md`) explaining test coverage and usage
|
268
|
+
- **Real-World Examples**: Tests serve as practical examples of library usage patterns
|
269
|
+
- **Error Scenario Coverage**: Comprehensive testing of edge cases and failure conditions
|
270
|
+
|
271
|
+
### 2025.1.1 (2025-08-XX)
|
247
272
|
|
248
273
|
#### 🔧 Code Quality Improvements
|
249
274
|
- **Refactored Complex Regex Logic**: Extracted Windows drive letter validation logic from `_check_dangerous_characters` into a dedicated `_is_valid_windows_drive_pattern` helper method in `PathValidator` for better readability and maintainability
|
@@ -0,0 +1,14 @@
|
|
1
|
+
splurge_dsv/__init__.py,sha256=UBfew2IVQhg5VbHjTe_O7e2b1QeoYen_so--4B6p0h4,2415
|
2
|
+
splurge_dsv/__main__.py,sha256=RkXcU3P3tHpFa6iGJfx8sAZ4aUcz6z7TIN_1q_5N9xE,354
|
3
|
+
splurge_dsv/cli.py,sha256=tRIQT0InflJPzy8SGUaHyD0aPMr7CgmyHbSVRkaQeoo,5759
|
4
|
+
splurge_dsv/dsv_helper.py,sha256=VRq2ejx6y-JBtFKJdAaK-GD4V0eoxZZfmoX0I3CSbDI,9428
|
5
|
+
splurge_dsv/exceptions.py,sha256=cu9Jd2pGhy7GBbbngH6zs0lfZzLp_OvGwbnsG5khp80,3035
|
6
|
+
splurge_dsv/path_validator.py,sha256=R314U2TmQjYVe_KOv4ni08z1_tLuo4_Uuld__dhX_gw,9957
|
7
|
+
splurge_dsv/resource_manager.py,sha256=g4igv1hhJdPtw4A1P-WdaNKcYMfEGCtZ4xaU4ynVbKw,12045
|
8
|
+
splurge_dsv/string_tokenizer.py,sha256=wBKWdi68rreTqf3RF-8Oxh1nz6QdnbOyWflw2x8pGWY,4022
|
9
|
+
splurge_dsv/text_file_helper.py,sha256=vdhEv9uK0hsNXvTO5SoWwFsI9fPkhdIUXuaJs80DVDA,13573
|
10
|
+
splurge_dsv-2025.1.2.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
|
11
|
+
splurge_dsv-2025.1.2.dist-info/METADATA,sha256=z0qLKtJ0Y8PgVhstBLpVUTATsWRQEv-GI34aaJOxbfs,14021
|
12
|
+
splurge_dsv-2025.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
+
splurge_dsv-2025.1.2.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
|
14
|
+
splurge_dsv-2025.1.2.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
splurge_dsv/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
splurge_dsv/__main__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
splurge_dsv/dsv_helper.py,sha256=gv9Wwf_soqdXuZWkBpxSyvJVqryKDfEoSf9SoAaRX1A,9651
|
4
|
-
splurge_dsv/exceptions.py,sha256=tPVLXxf8LPc0yd8L8xjik331B4-NUVYfyd6ifPLUtm4,3052
|
5
|
-
splurge_dsv/path_validator.py,sha256=2vXsNnmRTLbdYedDrF5kQZsOUvfLhSxkXyRj26OZyj8,10036
|
6
|
-
splurge_dsv/resource_manager.py,sha256=fldvZQpzznANf4ahHre_lSqQ5_FUWndFuSlyccBylGE,13013
|
7
|
-
splurge_dsv/string_tokenizer.py,sha256=GDAtJht-d9vt8Kb4yglxfl2iMfSMWGT3esksISNklMU,4203
|
8
|
-
splurge_dsv/text_file_helper.py,sha256=htYT1Z0aKuQt24X2IfSKnFHB1AOY3dV7lRU8Vr6iqn0,14237
|
9
|
-
splurge_dsv-2025.1.1.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
|
10
|
-
splurge_dsv-2025.1.1.dist-info/METADATA,sha256=R9-qCXkrt-orUGkLNJbnc-cGLY67B8P1uJFTyjmIJoY,12245
|
11
|
-
splurge_dsv-2025.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
-
splurge_dsv-2025.1.1.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
|
13
|
-
splurge_dsv-2025.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|