splurge-dsv 2025.1.0__py3-none-any.whl → 2025.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,15 +22,14 @@ Please preserve this header and all related material when sharing!
22
22
  This module is licensed under the MIT License.
23
23
  """
24
24
 
25
+ # Standard library imports
25
26
  from collections import deque
27
+ from collections.abc import Iterator
26
28
  from os import PathLike
27
29
  from pathlib import Path
28
- from typing import Iterator
29
30
 
30
- from splurge_dsv.exceptions import (
31
- SplurgeParameterError,
32
- SplurgeFileEncodingError
33
- )
31
+ # Local imports
32
+ from splurge_dsv.exceptions import SplurgeFileEncodingError, SplurgeParameterError
34
33
  from splurge_dsv.path_validator import PathValidator
35
34
  from splurge_dsv.resource_manager import safe_file_operation
36
35
 
@@ -51,12 +50,7 @@ class TextFileHelper:
51
50
  DEFAULT_MODE = "r"
52
51
 
53
52
  @classmethod
54
- def line_count(
55
- cls,
56
- file_path: PathLike[str] | str,
57
- *,
58
- encoding: str = DEFAULT_ENCODING
59
- ) -> int:
53
+ def line_count(cls, file_path: PathLike[str] | str, *, encoding: str = DEFAULT_ENCODING) -> int:
60
54
  """
61
55
  Count the number of lines in a text file.
62
56
 
@@ -78,12 +72,9 @@ class TextFileHelper:
78
72
  """
79
73
  # Validate file path
80
74
  validated_path = PathValidator.validate_path(
81
- Path(file_path),
82
- must_exist=True,
83
- must_be_file=True,
84
- must_be_readable=True
75
+ Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
85
76
  )
86
-
77
+
87
78
  with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
88
79
  return sum(1 for _ in stream)
89
80
 
@@ -95,7 +86,7 @@ class TextFileHelper:
95
86
  max_lines: int = DEFAULT_MAX_LINES,
96
87
  strip: bool = DEFAULT_STRIP,
97
88
  encoding: str = DEFAULT_ENCODING,
98
- skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS
89
+ skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
99
90
  ) -> list[str]:
100
91
  """
101
92
  Preview the first N lines of a text file.
@@ -122,34 +113,30 @@ class TextFileHelper:
122
113
  """
123
114
  if max_lines < 1:
124
115
  raise SplurgeParameterError(
125
- "TextFileHelper.preview: max_lines is less than 1",
126
- details="max_lines must be at least 1"
116
+ "TextFileHelper.preview: max_lines is less than 1", details="max_lines must be at least 1"
127
117
  )
128
-
118
+
129
119
  # Validate file path
130
120
  validated_path = PathValidator.validate_path(
131
- Path(file_path),
132
- must_exist=True,
133
- must_be_file=True,
134
- must_be_readable=True
121
+ Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
135
122
  )
136
-
123
+
137
124
  skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
138
125
  lines: list[str] = []
139
-
126
+
140
127
  with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
141
128
  # Skip header rows
142
129
  for _ in range(skip_header_rows):
143
130
  if not stream.readline():
144
131
  return lines
145
-
132
+
146
133
  # Read up to max_lines after skipping headers
147
134
  for _ in range(max_lines):
148
135
  line = stream.readline()
149
136
  if not line:
150
137
  break
151
138
  lines.append(line.strip() if strip else line.rstrip("\n"))
152
-
139
+
153
140
  return lines
154
141
 
155
142
  @classmethod
@@ -161,7 +148,7 @@ class TextFileHelper:
161
148
  encoding: str = DEFAULT_ENCODING,
162
149
  skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
163
150
  skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
164
- chunk_size: int = DEFAULT_CHUNK_SIZE
151
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
165
152
  ) -> Iterator[list[str]]:
166
153
  """
167
154
  Read a text file as a stream of line chunks.
@@ -192,67 +179,64 @@ class TextFileHelper:
192
179
  chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
193
180
  skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
194
181
  skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
195
-
182
+
196
183
  # Validate file path
197
184
  validated_path = PathValidator.validate_path(
198
- Path(file_path),
199
- must_exist=True,
200
- must_be_file=True,
201
- must_be_readable=True
202
- )
203
-
185
+ Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
186
+ )
187
+
204
188
  with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
205
189
  # Skip header rows
206
190
  for _ in range(skip_header_rows):
207
191
  if not stream.readline():
208
192
  return
209
-
193
+
210
194
  # Use a sliding window to handle footer skipping efficiently
211
195
  if skip_footer_rows > 0:
212
196
  # Buffer to hold the last skip_footer_rows lines
213
197
  buffer: deque[str] = deque(maxlen=skip_footer_rows + 1)
214
198
  current_chunk: list[str] = []
215
-
199
+
216
200
  for line in stream:
217
201
  processed_line = line.strip() if strip else line.rstrip("\n")
218
-
202
+
219
203
  # Add current line to buffer
220
204
  buffer.append(processed_line)
221
-
205
+
222
206
  # Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
223
207
  # This ensures we have enough lines to reliably identify and skip the footer rows at the end.
224
208
  if len(buffer) < skip_footer_rows + 1:
225
209
  continue
226
-
210
+
227
211
  # Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
228
212
  # is guaranteed not to be part of the footer and can be safely processed and added to the current chunk.
229
213
  safe_line = buffer.popleft()
230
214
  current_chunk.append(safe_line)
231
-
215
+
232
216
  # Yield chunk when it reaches the desired size
233
217
  if len(current_chunk) >= chunk_size:
234
218
  yield current_chunk
235
219
  current_chunk = []
236
-
220
+
237
221
  # At the end, the buffer contains exactly the footer rows to skip
238
222
  # All other lines have already been processed and yielded
239
-
223
+
240
224
  # Yield any remaining lines in the final chunk
241
225
  if current_chunk:
242
226
  yield current_chunk
243
227
  else:
244
228
  # No footer skipping needed - simple streaming
245
229
  chunk: list[str] = []
246
-
230
+
247
231
  for line in stream:
248
232
  processed_line = line.strip() if strip else line.rstrip("\n")
249
233
  chunk.append(processed_line)
250
-
234
+
251
235
  # Yield chunk when it reaches the desired size
252
236
  if len(chunk) >= chunk_size:
253
237
  yield chunk
254
238
  chunk = []
255
-
239
+
256
240
  # Yield any remaining lines in the final chunk
257
241
  if chunk:
258
242
  yield chunk
@@ -265,7 +249,7 @@ class TextFileHelper:
265
249
  strip: bool = DEFAULT_STRIP,
266
250
  encoding: str = DEFAULT_ENCODING,
267
251
  skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
268
- skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS
252
+ skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
269
253
  ) -> list[str]:
270
254
  """
271
255
  Read the entire contents of a text file into a list of strings.
@@ -291,42 +275,39 @@ class TextFileHelper:
291
275
  """
292
276
  # Validate file path
293
277
  validated_path = PathValidator.validate_path(
294
- Path(file_path),
295
- must_exist=True,
296
- must_be_file=True,
297
- must_be_readable=True
278
+ Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
298
279
  )
299
-
280
+
300
281
  skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
301
282
  skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
302
-
283
+
303
284
  with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
304
285
  for _ in range(skip_header_rows):
305
286
  if not stream.readline():
306
287
  return []
307
-
288
+
308
289
  try:
309
290
  if skip_footer_rows > 0:
310
291
  # Buffer to hold the last skip_footer_rows + 1 lines
311
292
  buffer = deque(maxlen=skip_footer_rows + 1)
312
293
  result: list[str] = []
313
-
294
+
314
295
  for line in stream:
315
296
  processed_line = line.strip() if strip else line.rstrip("\n")
316
-
297
+
317
298
  # Add current line to buffer
318
299
  buffer.append(processed_line)
319
-
300
+
320
301
  # Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
321
302
  # This ensures we have enough lines to reliably identify and skip the footer rows at the end.
322
303
  if len(buffer) < skip_footer_rows + 1:
323
304
  continue
324
-
305
+
325
306
  # Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
326
307
  # is guaranteed not to be part of the footer and can be safely processed and added to the result.
327
308
  safe_line = buffer.popleft()
328
309
  result.append(safe_line)
329
-
310
+
330
311
  # At the end, the buffer contains exactly the footer rows to skip
331
312
  # All other lines have already been processed and added to result
332
313
  return result
@@ -337,7 +318,4 @@ class TextFileHelper:
337
318
  result.append(processed_line)
338
319
  return result
339
320
  except UnicodeDecodeError as e:
340
- raise SplurgeFileEncodingError(
341
- f"Encoding error reading file: {validated_path}",
342
- details=str(e)
343
- )
321
+ raise SplurgeFileEncodingError(f"Encoding error reading file: {validated_path}", details=str(e)) from e
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: splurge-dsv
3
- Version: 2025.1.0
3
+ Version: 2025.1.2
4
4
  Summary: A utility library for working with DSV (Delimited String Values) files
5
5
  Author: Jim Schilling
6
6
  License-Expression: MIT
@@ -243,6 +243,52 @@ The project follows strict coding standards:
243
243
 
244
244
  ## Changelog
245
245
 
246
+ ### 2025.1.2 (2025-09-02)
247
+
248
+ #### 🧪 Comprehensive End-to-End Testing
249
+ - **Complete E2E Test Suite**: Implemented 25 comprehensive end-to-end workflow tests covering all major CLI functionality
250
+ - **Real CLI Execution**: Tests run actual `splurge-dsv` commands with real files, not just mocked components
251
+ - **Workflow Coverage**: Tests cover CSV/TSV parsing, file operations, data processing, error handling, and performance scenarios
252
+ - **Cross-Platform Compatibility**: Handles Windows-specific encoding issues and platform differences gracefully
253
+ - **Performance Testing**: Large file processing tests (1,000+ and 10,000+ rows) with streaming and chunking validation
254
+
255
+ #### 📊 Test Coverage Improvements
256
+ - **CLI Coverage**: Increased from 64% to **95%** with comprehensive CLI workflow testing
257
+ - **DSV Helper Coverage**: Improved from 75% to **93%** with real-world usage scenarios
258
+ - **Overall Coverage**: Improved from 60% to **73%** across the entire codebase
259
+ - **Integration Testing**: Added real file system operations and complete pipeline validation
260
+
261
+ #### 🔄 Test Categories
262
+ - **CLI Workflows**: 19 tests covering basic parsing, custom delimiters, header/footer skipping, streaming, and error scenarios
263
+ - **Error Handling**: 3 tests for invalid arguments, missing parameters, and CLI error conditions
264
+ - **Integration Scenarios**: 3 tests for data analysis, transformation, and multi-format workflows
265
+
266
+ #### 📚 Documentation & Examples
267
+ - **E2E Testing Guide**: Created comprehensive documentation (`docs/e2e_testing_coverage.md`) explaining test coverage and usage
268
+ - **Real-World Examples**: Tests serve as practical examples of library usage patterns
269
+ - **Error Scenario Coverage**: Comprehensive testing of edge cases and failure conditions
270
+
271
+ ### 2025.1.1 (2025-08-XX)
272
+
273
+ #### 🔧 Code Quality Improvements
274
+ - **Refactored Complex Regex Logic**: Extracted Windows drive letter validation logic from `_check_dangerous_characters` into a dedicated `_is_valid_windows_drive_pattern` helper method in `PathValidator` for better readability and maintainability
275
+ - **Exception Handling Consistency**: Fixed inconsistency in `ResourceManager.acquire()` method to properly re-raise `NotImplementedError` without wrapping it in `SplurgeResourceAcquisitionError`
276
+ - **Import Organization**: Moved all imports to the top of modules across the entire codebase for better code structure and PEP 8 compliance
277
+
278
+ #### 🧪 Testing Enhancements
279
+ - **Public API Focus**: Removed all tests that validated private implementation details, focusing exclusively on public API behavior validation
280
+ - **Comprehensive Resource Manager Tests**: Added extensive test suite for `ResourceManager` module covering all public methods, edge cases, error scenarios, and context manager behavior
281
+ - **Bookend Logic Clarification**: Updated and corrected all tests related to `StringTokenizer.remove_bookends` to properly reflect its single-character, symmetric bookend matching behavior
282
+ - **Path Validation Test Clarity**: Clarified test expectations and comments for Windows drive-relative paths (e.g., "C:file.txt") to reflect the validator's intentionally strict security design
283
+
284
+ #### 🐛 Bug Fixes
285
+ - **Test Reliability**: Fixed failing tests in `ResourceManager` context manager scenarios by properly handling file truncation and line ending normalization
286
+ - **Ruff Compliance**: Resolved all linting warnings including unused variables and imports
287
+
288
+ #### 📚 Documentation Updates
289
+ - **Method Documentation**: Updated `ResourceManager.acquire()` docstring to include `NotImplementedError` in the Raises section
290
+ - **Test Comments**: Enhanced test documentation with clearer explanations of expected behaviors and edge cases
291
+
246
292
  ### 2025.1.0 (2025-08-25)
247
293
 
248
294
  #### 🎉 Major Features
@@ -0,0 +1,14 @@
1
+ splurge_dsv/__init__.py,sha256=UBfew2IVQhg5VbHjTe_O7e2b1QeoYen_so--4B6p0h4,2415
2
+ splurge_dsv/__main__.py,sha256=RkXcU3P3tHpFa6iGJfx8sAZ4aUcz6z7TIN_1q_5N9xE,354
3
+ splurge_dsv/cli.py,sha256=tRIQT0InflJPzy8SGUaHyD0aPMr7CgmyHbSVRkaQeoo,5759
4
+ splurge_dsv/dsv_helper.py,sha256=VRq2ejx6y-JBtFKJdAaK-GD4V0eoxZZfmoX0I3CSbDI,9428
5
+ splurge_dsv/exceptions.py,sha256=cu9Jd2pGhy7GBbbngH6zs0lfZzLp_OvGwbnsG5khp80,3035
6
+ splurge_dsv/path_validator.py,sha256=R314U2TmQjYVe_KOv4ni08z1_tLuo4_Uuld__dhX_gw,9957
7
+ splurge_dsv/resource_manager.py,sha256=g4igv1hhJdPtw4A1P-WdaNKcYMfEGCtZ4xaU4ynVbKw,12045
8
+ splurge_dsv/string_tokenizer.py,sha256=wBKWdi68rreTqf3RF-8Oxh1nz6QdnbOyWflw2x8pGWY,4022
9
+ splurge_dsv/text_file_helper.py,sha256=vdhEv9uK0hsNXvTO5SoWwFsI9fPkhdIUXuaJs80DVDA,13573
10
+ splurge_dsv-2025.1.2.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
11
+ splurge_dsv-2025.1.2.dist-info/METADATA,sha256=z0qLKtJ0Y8PgVhstBLpVUTATsWRQEv-GI34aaJOxbfs,14021
12
+ splurge_dsv-2025.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ splurge_dsv-2025.1.2.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
14
+ splurge_dsv-2025.1.2.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- splurge_dsv/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- splurge_dsv/__main__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- splurge_dsv/dsv_helper.py,sha256=gv9Wwf_soqdXuZWkBpxSyvJVqryKDfEoSf9SoAaRX1A,9651
4
- splurge_dsv/exceptions.py,sha256=tPVLXxf8LPc0yd8L8xjik331B4-NUVYfyd6ifPLUtm4,3052
5
- splurge_dsv/path_validator.py,sha256=2vXsNnmRTLbdYedDrF5kQZsOUvfLhSxkXyRj26OZyj8,10036
6
- splurge_dsv/resource_manager.py,sha256=fldvZQpzznANf4ahHre_lSqQ5_FUWndFuSlyccBylGE,13013
7
- splurge_dsv/string_tokenizer.py,sha256=GDAtJht-d9vt8Kb4yglxfl2iMfSMWGT3esksISNklMU,4203
8
- splurge_dsv/text_file_helper.py,sha256=htYT1Z0aKuQt24X2IfSKnFHB1AOY3dV7lRU8Vr6iqn0,14237
9
- splurge_dsv-2025.1.0.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
10
- splurge_dsv-2025.1.0.dist-info/METADATA,sha256=MUa4M1124GYLHmOYzLIne6-s_biC8wFi9a1PaBEmMXA,10282
11
- splurge_dsv-2025.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- splurge_dsv-2025.1.0.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
13
- splurge_dsv-2025.1.0.dist-info/RECORD,,