splurge-dsv 2025.1.1__py3-none-any.whl → 2025.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,15 +22,14 @@ Please preserve this header and all related material when sharing!
22
22
  This module is licensed under the MIT License.
23
23
  """
24
24
 
25
+ # Standard library imports
25
26
  from collections import deque
27
+ from collections.abc import Iterator
26
28
  from os import PathLike
27
29
  from pathlib import Path
28
- from typing import Iterator
29
30
 
30
- from splurge_dsv.exceptions import (
31
- SplurgeParameterError,
32
- SplurgeFileEncodingError
33
- )
31
+ # Local imports
32
+ from splurge_dsv.exceptions import SplurgeFileEncodingError, SplurgeParameterError
34
33
  from splurge_dsv.path_validator import PathValidator
35
34
  from splurge_dsv.resource_manager import safe_file_operation
36
35
 
@@ -51,12 +50,7 @@ class TextFileHelper:
51
50
  DEFAULT_MODE = "r"
52
51
 
53
52
  @classmethod
54
- def line_count(
55
- cls,
56
- file_path: PathLike[str] | str,
57
- *,
58
- encoding: str = DEFAULT_ENCODING
59
- ) -> int:
53
+ def line_count(cls, file_path: PathLike[str] | str, *, encoding: str = DEFAULT_ENCODING) -> int:
60
54
  """
61
55
  Count the number of lines in a text file.
62
56
 
@@ -78,12 +72,9 @@ class TextFileHelper:
78
72
  """
79
73
  # Validate file path
80
74
  validated_path = PathValidator.validate_path(
81
- Path(file_path),
82
- must_exist=True,
83
- must_be_file=True,
84
- must_be_readable=True
75
+ Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
85
76
  )
86
-
77
+
87
78
  with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
88
79
  return sum(1 for _ in stream)
89
80
 
@@ -95,7 +86,7 @@ class TextFileHelper:
95
86
  max_lines: int = DEFAULT_MAX_LINES,
96
87
  strip: bool = DEFAULT_STRIP,
97
88
  encoding: str = DEFAULT_ENCODING,
98
- skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS
89
+ skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
99
90
  ) -> list[str]:
100
91
  """
101
92
  Preview the first N lines of a text file.
@@ -122,34 +113,30 @@ class TextFileHelper:
122
113
  """
123
114
  if max_lines < 1:
124
115
  raise SplurgeParameterError(
125
- "TextFileHelper.preview: max_lines is less than 1",
126
- details="max_lines must be at least 1"
116
+ "TextFileHelper.preview: max_lines is less than 1", details="max_lines must be at least 1"
127
117
  )
128
-
118
+
129
119
  # Validate file path
130
120
  validated_path = PathValidator.validate_path(
131
- Path(file_path),
132
- must_exist=True,
133
- must_be_file=True,
134
- must_be_readable=True
121
+ Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
135
122
  )
136
-
123
+
137
124
  skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
138
125
  lines: list[str] = []
139
-
126
+
140
127
  with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
141
128
  # Skip header rows
142
129
  for _ in range(skip_header_rows):
143
130
  if not stream.readline():
144
131
  return lines
145
-
132
+
146
133
  # Read up to max_lines after skipping headers
147
134
  for _ in range(max_lines):
148
135
  line = stream.readline()
149
136
  if not line:
150
137
  break
151
138
  lines.append(line.strip() if strip else line.rstrip("\n"))
152
-
139
+
153
140
  return lines
154
141
 
155
142
  @classmethod
@@ -161,7 +148,7 @@ class TextFileHelper:
161
148
  encoding: str = DEFAULT_ENCODING,
162
149
  skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
163
150
  skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
164
- chunk_size: int = DEFAULT_CHUNK_SIZE
151
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
165
152
  ) -> Iterator[list[str]]:
166
153
  """
167
154
  Read a text file as a stream of line chunks.
@@ -192,67 +179,64 @@ class TextFileHelper:
192
179
  chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
193
180
  skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
194
181
  skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
195
-
182
+
196
183
  # Validate file path
197
184
  validated_path = PathValidator.validate_path(
198
- Path(file_path),
199
- must_exist=True,
200
- must_be_file=True,
201
- must_be_readable=True
202
- )
203
-
185
+ Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
186
+ )
187
+
204
188
  with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
205
189
  # Skip header rows
206
190
  for _ in range(skip_header_rows):
207
191
  if not stream.readline():
208
192
  return
209
-
193
+
210
194
  # Use a sliding window to handle footer skipping efficiently
211
195
  if skip_footer_rows > 0:
212
196
  # Buffer to hold the last skip_footer_rows lines
213
197
  buffer: deque[str] = deque(maxlen=skip_footer_rows + 1)
214
198
  current_chunk: list[str] = []
215
-
199
+
216
200
  for line in stream:
217
201
  processed_line = line.strip() if strip else line.rstrip("\n")
218
-
202
+
219
203
  # Add current line to buffer
220
204
  buffer.append(processed_line)
221
-
205
+
222
206
  # Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
223
207
  # This ensures we have enough lines to reliably identify and skip the footer rows at the end.
224
208
  if len(buffer) < skip_footer_rows + 1:
225
209
  continue
226
-
210
+
227
211
  # Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
228
212
  # is guaranteed not to be part of the footer and can be safely processed and added to the current chunk.
229
213
  safe_line = buffer.popleft()
230
214
  current_chunk.append(safe_line)
231
-
215
+
232
216
  # Yield chunk when it reaches the desired size
233
217
  if len(current_chunk) >= chunk_size:
234
218
  yield current_chunk
235
219
  current_chunk = []
236
-
220
+
237
221
  # At the end, the buffer contains exactly the footer rows to skip
238
222
  # All other lines have already been processed and yielded
239
-
223
+
240
224
  # Yield any remaining lines in the final chunk
241
225
  if current_chunk:
242
226
  yield current_chunk
243
227
  else:
244
228
  # No footer skipping needed - simple streaming
245
229
  chunk: list[str] = []
246
-
230
+
247
231
  for line in stream:
248
232
  processed_line = line.strip() if strip else line.rstrip("\n")
249
233
  chunk.append(processed_line)
250
-
234
+
251
235
  # Yield chunk when it reaches the desired size
252
236
  if len(chunk) >= chunk_size:
253
237
  yield chunk
254
238
  chunk = []
255
-
239
+
256
240
  # Yield any remaining lines in the final chunk
257
241
  if chunk:
258
242
  yield chunk
@@ -265,7 +249,7 @@ class TextFileHelper:
265
249
  strip: bool = DEFAULT_STRIP,
266
250
  encoding: str = DEFAULT_ENCODING,
267
251
  skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
268
- skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS
252
+ skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
269
253
  ) -> list[str]:
270
254
  """
271
255
  Read the entire contents of a text file into a list of strings.
@@ -291,42 +275,39 @@ class TextFileHelper:
291
275
  """
292
276
  # Validate file path
293
277
  validated_path = PathValidator.validate_path(
294
- Path(file_path),
295
- must_exist=True,
296
- must_be_file=True,
297
- must_be_readable=True
278
+ Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
298
279
  )
299
-
280
+
300
281
  skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
301
282
  skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
302
-
283
+
303
284
  with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
304
285
  for _ in range(skip_header_rows):
305
286
  if not stream.readline():
306
287
  return []
307
-
288
+
308
289
  try:
309
290
  if skip_footer_rows > 0:
310
291
  # Buffer to hold the last skip_footer_rows + 1 lines
311
292
  buffer = deque(maxlen=skip_footer_rows + 1)
312
293
  result: list[str] = []
313
-
294
+
314
295
  for line in stream:
315
296
  processed_line = line.strip() if strip else line.rstrip("\n")
316
-
297
+
317
298
  # Add current line to buffer
318
299
  buffer.append(processed_line)
319
-
300
+
320
301
  # Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
321
302
  # This ensures we have enough lines to reliably identify and skip the footer rows at the end.
322
303
  if len(buffer) < skip_footer_rows + 1:
323
304
  continue
324
-
305
+
325
306
  # Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
326
307
  # is guaranteed not to be part of the footer and can be safely processed and added to the result.
327
308
  safe_line = buffer.popleft()
328
309
  result.append(safe_line)
329
-
310
+
330
311
  # At the end, the buffer contains exactly the footer rows to skip
331
312
  # All other lines have already been processed and added to result
332
313
  return result
@@ -337,7 +318,4 @@ class TextFileHelper:
337
318
  result.append(processed_line)
338
319
  return result
339
320
  except UnicodeDecodeError as e:
340
- raise SplurgeFileEncodingError(
341
- f"Encoding error reading file: {validated_path}",
342
- details=str(e)
343
- )
321
+ raise SplurgeFileEncodingError(f"Encoding error reading file: {validated_path}", details=str(e)) from e
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: splurge-dsv
3
- Version: 2025.1.1
3
+ Version: 2025.1.3
4
4
  Summary: A utility library for working with DSV (Delimited String Values) files
5
5
  Author: Jim Schilling
6
6
  License-Expression: MIT
@@ -53,8 +53,8 @@ A robust Python library for parsing and processing delimited-separated value (DS
53
53
  - **Error Recovery**: Graceful error handling with detailed error messages
54
54
 
55
55
  ### 🧪 Testing & Quality
56
- - **Comprehensive Test Suite**: 90%+ code coverage with 250+ tests
57
- - **Cross-Platform Support**: Tested on Windows, Linux, and macOS
56
+ - **Comprehensive Test Suite**: 250+ tests with 85%+ coverage gate
57
+ - **Cross-Platform Support**: Tested on Windows, and should pass on Linux and macOS
58
58
  - **Type Safety**: Full type annotations and validation
59
59
  - **Documentation**: Complete API documentation with examples
60
60
 
@@ -238,12 +238,42 @@ The project follows strict coding standards:
238
238
  - PEP 8 compliance
239
239
  - Type annotations for all functions
240
240
  - Google-style docstrings
241
- - 90%+ test coverage requirement
241
+ - 85%+ coverage gate enforced via CI
242
242
  - Comprehensive error handling
243
243
 
244
244
  ## Changelog
245
245
 
246
- ### 2025.1.1 (2025-01-XX)
246
+ ### 2025.1.3 (2025-09-03)
247
+
248
+ #### 🔧 Maintenance & Consistency
249
+ - **Version Alignment**: Bumped `__version__` and CLI `--version` to `2025.1.3` to match `pyproject.toml`.
250
+ - **CLI Path Validation**: Centralized validation using `PathValidator.validate_path(...)` for consistent error handling.
251
+ - **Type Correctness**: Fixed `PathValidator._is_valid_windows_drive_pattern` to return `bool` explicitly.
252
+ - **Docs Alignment**: Updated README coverage claims to reflect the `>=85%` coverage gate configured in CI.
253
+
254
+ ### 2025.1.2 (2025-09-02)
255
+
256
+ #### 🧪 Comprehensive End-to-End Testing
257
+ - **Complete E2E Test Suite**: Implemented 25 comprehensive end-to-end workflow tests covering all major CLI functionality
258
+ - **Real CLI Execution**: Tests run actual `splurge-dsv` commands with real files, not just mocked components
259
+ - **Workflow Coverage**: Tests cover CSV/TSV parsing, file operations, data processing, error handling, and performance scenarios
260
+ - **Cross-Platform Compatibility**: Handles Windows-specific encoding issues and platform differences gracefully
261
+ - **Performance Testing**: Large file processing tests (1,000+ and 10,000+ rows) with streaming and chunking validation
262
+
263
+ #### 📊 Test Coverage Improvements
264
+ - **Integration Testing**: Added real file system operations and complete pipeline validation
265
+
266
+ #### 🔄 Test Categories
267
+ - **CLI Workflows**: 19 tests covering basic parsing, custom delimiters, header/footer skipping, streaming, and error scenarios
268
+ - **Error Handling**: 3 tests for invalid arguments, missing parameters, and CLI error conditions
269
+ - **Integration Scenarios**: 3 tests for data analysis, transformation, and multi-format workflows
270
+
271
+ #### 📚 Documentation & Examples
272
+ - **E2E Testing Guide**: Created comprehensive documentation (`docs/e2e_testing_coverage.md`) explaining test coverage and usage
273
+ - **Real-World Examples**: Tests serve as practical examples of library usage patterns
274
+ - **Error Scenario Coverage**: Comprehensive testing of edge cases and failure conditions
275
+
276
+ ### 2025.1.1 (2025-08-XX)
247
277
 
248
278
  #### 🔧 Code Quality Improvements
249
279
  - **Refactored Complex Regex Logic**: Extracted Windows drive letter validation logic from `_check_dangerous_characters` into a dedicated `_is_valid_windows_drive_pattern` helper method in `PathValidator` for better readability and maintainability
@@ -285,7 +315,7 @@ The project follows strict coding standards:
285
315
  - **StringTokenizer**: Core string parsing functionality
286
316
 
287
317
  #### 🧪 Testing & Quality
288
- - **Comprehensive Test Suite**: 250+ tests with 90%+ code coverage
318
+ - **Comprehensive Test Suite**: 250+ tests with 85%+ coverage gate
289
319
  - **Cross-Platform Testing**: Tested on Windows, Linux, and macOS
290
320
  - **Type Safety**: Full type annotations throughout the codebase
291
321
  - **Error Handling**: Custom exception hierarchy with detailed error messages
@@ -0,0 +1,14 @@
1
+ splurge_dsv/__init__.py,sha256=YaY3yCQF5bNVO76yaITvqggFOg3QkpkY3CjtkkLf-C8,2415
2
+ splurge_dsv/__main__.py,sha256=RkXcU3P3tHpFa6iGJfx8sAZ4aUcz6z7TIN_1q_5N9xE,354
3
+ splurge_dsv/cli.py,sha256=hFchXI5DdLTpfdOgvh0Unq_nvsN8YNzJg86DgygdLrg,5866
4
+ splurge_dsv/dsv_helper.py,sha256=VRq2ejx6y-JBtFKJdAaK-GD4V0eoxZZfmoX0I3CSbDI,9428
5
+ splurge_dsv/exceptions.py,sha256=cu9Jd2pGhy7GBbbngH6zs0lfZzLp_OvGwbnsG5khp80,3035
6
+ splurge_dsv/path_validator.py,sha256=RuRFjtHWE1Z5-DlSBBZMoHJegQEFYEQ0HJgN7ndre2k,9969
7
+ splurge_dsv/resource_manager.py,sha256=g4igv1hhJdPtw4A1P-WdaNKcYMfEGCtZ4xaU4ynVbKw,12045
8
+ splurge_dsv/string_tokenizer.py,sha256=wBKWdi68rreTqf3RF-8Oxh1nz6QdnbOyWflw2x8pGWY,4022
9
+ splurge_dsv/text_file_helper.py,sha256=vdhEv9uK0hsNXvTO5SoWwFsI9fPkhdIUXuaJs80DVDA,13573
10
+ splurge_dsv-2025.1.3.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
11
+ splurge_dsv-2025.1.3.dist-info/METADATA,sha256=yKp4mvzuf18nvn2gD6iDBokkVHMqTiX5tfpYuqWtWk8,14298
12
+ splurge_dsv-2025.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ splurge_dsv-2025.1.3.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
14
+ splurge_dsv-2025.1.3.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- splurge_dsv/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- splurge_dsv/__main__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- splurge_dsv/dsv_helper.py,sha256=gv9Wwf_soqdXuZWkBpxSyvJVqryKDfEoSf9SoAaRX1A,9651
4
- splurge_dsv/exceptions.py,sha256=tPVLXxf8LPc0yd8L8xjik331B4-NUVYfyd6ifPLUtm4,3052
5
- splurge_dsv/path_validator.py,sha256=2vXsNnmRTLbdYedDrF5kQZsOUvfLhSxkXyRj26OZyj8,10036
6
- splurge_dsv/resource_manager.py,sha256=fldvZQpzznANf4ahHre_lSqQ5_FUWndFuSlyccBylGE,13013
7
- splurge_dsv/string_tokenizer.py,sha256=GDAtJht-d9vt8Kb4yglxfl2iMfSMWGT3esksISNklMU,4203
8
- splurge_dsv/text_file_helper.py,sha256=htYT1Z0aKuQt24X2IfSKnFHB1AOY3dV7lRU8Vr6iqn0,14237
9
- splurge_dsv-2025.1.1.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
10
- splurge_dsv-2025.1.1.dist-info/METADATA,sha256=R9-qCXkrt-orUGkLNJbnc-cGLY67B8P1uJFTyjmIJoY,12245
11
- splurge_dsv-2025.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- splurge_dsv-2025.1.1.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
13
- splurge_dsv-2025.1.1.dist-info/RECORD,,