PyPI - splurge-dsv - Versions diffs - 2025.1.1__py3-none-any.whl → 2025.1.3__py3-none-any.whl - Mend

splurge-dsv 2025.1.1py3-none-any.whl → 2025.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

splurge_dsv/__init__.py +84 -0
splurge_dsv/__main__.py +15 -0
splurge_dsv/cli.py +160 -0
splurge_dsv/dsv_helper.py +29 -46
splurge_dsv/exceptions.py +22 -9
splurge_dsv/path_validator.py +102 -79
splurge_dsv/resource_manager.py +77 -138
splurge_dsv/string_tokenizer.py +5 -24
splurge_dsv/text_file_helper.py +42 -64
{splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.3.dist-info}/METADATA +36 -6
splurge_dsv-2025.1.3.dist-info/RECORD +14 -0
splurge_dsv-2025.1.1.dist-info/RECORD +0 -13
{splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.3.dist-info}/WHEEL +0 -0
{splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.3.dist-info}/licenses/LICENSE +0 -0
{splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.3.dist-info}/top_level.txt +0 -0

splurge_dsv/text_file_helper.py CHANGED Viewed

@@ -22,15 +22,14 @@ Please preserve this header and all related material when sharing!
 This module is licensed under the MIT License.
 """
+# Standard library imports
 from collections import deque
+from collections.abc import Iterator
 from os import PathLike
 from pathlib import Path
-from typing import Iterator
-from splurge_dsv.exceptions import (
-    SplurgeParameterError,
-    SplurgeFileEncodingError
-)
+# Local imports
+from splurge_dsv.exceptions import SplurgeFileEncodingError, SplurgeParameterError
 from splurge_dsv.path_validator import PathValidator
 from splurge_dsv.resource_manager import safe_file_operation
@@ -51,12 +50,7 @@ class TextFileHelper:
     DEFAULT_MODE = "r"
     @classmethod
-    def line_count(
-        cls,
-        file_path: PathLike[str] | str,
-        *,
-        encoding: str = DEFAULT_ENCODING
-    ) -> int:
+    def line_count(cls, file_path: PathLike[str] | str, *, encoding: str = DEFAULT_ENCODING) -> int:
         """
         Count the number of lines in a text file.
@@ -78,12 +72,9 @@ class TextFileHelper:
         """
         # Validate file path
         validated_path = PathValidator.validate_path(
-            Path(file_path),
-            must_exist=True,
-            must_be_file=True,
-            must_be_readable=True
+            Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
         )
         with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
             return sum(1 for _ in stream)
@@ -95,7 +86,7 @@ class TextFileHelper:
         max_lines: int = DEFAULT_MAX_LINES,
         strip: bool = DEFAULT_STRIP,
         encoding: str = DEFAULT_ENCODING,
-        skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS
+        skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
     ) -> list[str]:
         """
         Preview the first N lines of a text file.
@@ -122,34 +113,30 @@ class TextFileHelper:
         """
         if max_lines < 1:
             raise SplurgeParameterError(
-                "TextFileHelper.preview: max_lines is less than 1",
-                details="max_lines must be at least 1"
+                "TextFileHelper.preview: max_lines is less than 1", details="max_lines must be at least 1"
             )
         # Validate file path
         validated_path = PathValidator.validate_path(
-            Path(file_path),
-            must_exist=True,
-            must_be_file=True,
-            must_be_readable=True
+            Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
         )
         skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
         lines: list[str] = []
         with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
             # Skip header rows
             for _ in range(skip_header_rows):
                 if not stream.readline():
                     return lines
             # Read up to max_lines after skipping headers
             for _ in range(max_lines):
                 line = stream.readline()
                 if not line:
                     break
                 lines.append(line.strip() if strip else line.rstrip("\n"))
         return lines
     @classmethod
@@ -161,7 +148,7 @@ class TextFileHelper:
         encoding: str = DEFAULT_ENCODING,
         skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
         skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
-        chunk_size: int = DEFAULT_CHUNK_SIZE
+        chunk_size: int = DEFAULT_CHUNK_SIZE,
     ) -> Iterator[list[str]]:
         """
         Read a text file as a stream of line chunks.
@@ -192,67 +179,64 @@ class TextFileHelper:
         chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
         skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
         skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
         # Validate file path
         validated_path = PathValidator.validate_path(
-            Path(file_path),
-            must_exist=True,
-            must_be_file=True,
-            must_be_readable=True
-        )
+            Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
+        )
         with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
             # Skip header rows
             for _ in range(skip_header_rows):
                 if not stream.readline():
                     return
             # Use a sliding window to handle footer skipping efficiently
             if skip_footer_rows > 0:
                 # Buffer to hold the last skip_footer_rows lines
                 buffer: deque[str] = deque(maxlen=skip_footer_rows + 1)
                 current_chunk: list[str] = []
                 for line in stream:
                     processed_line = line.strip() if strip else line.rstrip("\n")
                     # Add current line to buffer
                     buffer.append(processed_line)
                     # Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
                     # This ensures we have enough lines to reliably identify and skip the footer rows at the end.
                     if len(buffer) < skip_footer_rows + 1:
                         continue
                     # Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
                     # is guaranteed not to be part of the footer and can be safely processed and added to the current chunk.
                     safe_line = buffer.popleft()
                     current_chunk.append(safe_line)
                     # Yield chunk when it reaches the desired size
                     if len(current_chunk) >= chunk_size:
                         yield current_chunk
                         current_chunk = []
                 # At the end, the buffer contains exactly the footer rows to skip
                 # All other lines have already been processed and yielded
                 # Yield any remaining lines in the final chunk
                 if current_chunk:
                     yield current_chunk
             else:
                 # No footer skipping needed - simple streaming
                 chunk: list[str] = []
                 for line in stream:
                     processed_line = line.strip() if strip else line.rstrip("\n")
                     chunk.append(processed_line)
                     # Yield chunk when it reaches the desired size
                     if len(chunk) >= chunk_size:
                         yield chunk
                         chunk = []
                 # Yield any remaining lines in the final chunk
                 if chunk:
                     yield chunk
@@ -265,7 +249,7 @@ class TextFileHelper:
         strip: bool = DEFAULT_STRIP,
         encoding: str = DEFAULT_ENCODING,
         skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
-        skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS
+        skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
     ) -> list[str]:
         """
         Read the entire contents of a text file into a list of strings.
@@ -291,42 +275,39 @@ class TextFileHelper:
         """
         # Validate file path
         validated_path = PathValidator.validate_path(
-            Path(file_path),
-            must_exist=True,
-            must_be_file=True,
-            must_be_readable=True
+            Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
         )
         skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
         skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
         with safe_file_operation(validated_path, encoding=encoding, mode=cls.DEFAULT_MODE) as stream:
             for _ in range(skip_header_rows):
                 if not stream.readline():
                     return []
             try:
                 if skip_footer_rows > 0:
                     # Buffer to hold the last skip_footer_rows + 1 lines
                     buffer = deque(maxlen=skip_footer_rows + 1)
                     result: list[str] = []
                     for line in stream:
                         processed_line = line.strip() if strip else line.rstrip("\n")
                         # Add current line to buffer
                         buffer.append(processed_line)
                         # Wait until the buffer is full (skip_footer_rows + 1 lines) before processing lines.
                         # This ensures we have enough lines to reliably identify and skip the footer rows at the end.
                         if len(buffer) < skip_footer_rows + 1:
                             continue
                         # Once the buffer contains more than skip_footer_rows lines, the oldest line (removed with popleft)
                         # is guaranteed not to be part of the footer and can be safely processed and added to the result.
                         safe_line = buffer.popleft()
                         result.append(safe_line)
                     # At the end, the buffer contains exactly the footer rows to skip
                     # All other lines have already been processed and added to result
                     return result
@@ -337,7 +318,4 @@ class TextFileHelper:
                         result.append(processed_line)
                     return result
             except UnicodeDecodeError as e:
-                raise SplurgeFileEncodingError(
-                    f"Encoding error reading file: {validated_path}",
-                    details=str(e)
-                )
+                raise SplurgeFileEncodingError(f"Encoding error reading file: {validated_path}", details=str(e)) from e

{splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: splurge-dsv
-Version: 2025.1.1
+Version: 2025.1.3
 Summary: A utility library for working with DSV (Delimited String Values) files
 Author: Jim Schilling
 License-Expression: MIT
@@ -53,8 +53,8 @@ A robust Python library for parsing and processing delimited-separated value (DS
 - **Error Recovery**: Graceful error handling with detailed error messages
 ### 🧪 Testing & Quality
-- **Comprehensive Test Suite**: 90%+ code coverage with 250+ tests
-- **Cross-Platform Support**: Tested on Windows, Linux, and macOS
+- **Comprehensive Test Suite**: 250+ tests with 85%+ coverage gate
+- **Cross-Platform Support**: Tested on Windows, and should pass on Linux and macOS
 - **Type Safety**: Full type annotations and validation
 - **Documentation**: Complete API documentation with examples
@@ -238,12 +238,42 @@ The project follows strict coding standards:
 - PEP 8 compliance
 - Type annotations for all functions
 - Google-style docstrings
-- 90%+ test coverage requirement
+- 85%+ coverage gate enforced via CI
 - Comprehensive error handling
 ## Changelog
-### 2025.1.1 (2025-01-XX)
+### 2025.1.3 (2025-09-03)
+#### 🔧 Maintenance & Consistency
+- **Version Alignment**: Bumped `__version__` and CLI `--version` to `2025.1.3` to match `pyproject.toml`.
+- **CLI Path Validation**: Centralized validation using `PathValidator.validate_path(...)` for consistent error handling.
+- **Type Correctness**: Fixed `PathValidator._is_valid_windows_drive_pattern` to return `bool` explicitly.
+- **Docs Alignment**: Updated README coverage claims to reflect the `>=85%` coverage gate configured in CI.
+### 2025.1.2 (2025-09-02)
+#### 🧪 Comprehensive End-to-End Testing
+- **Complete E2E Test Suite**: Implemented 25 comprehensive end-to-end workflow tests covering all major CLI functionality
+- **Real CLI Execution**: Tests run actual `splurge-dsv` commands with real files, not just mocked components
+- **Workflow Coverage**: Tests cover CSV/TSV parsing, file operations, data processing, error handling, and performance scenarios
+- **Cross-Platform Compatibility**: Handles Windows-specific encoding issues and platform differences gracefully
+- **Performance Testing**: Large file processing tests (1,000+ and 10,000+ rows) with streaming and chunking validation
+#### 📊 Test Coverage Improvements
+- **Integration Testing**: Added real file system operations and complete pipeline validation
+#### 🔄 Test Categories
+- **CLI Workflows**: 19 tests covering basic parsing, custom delimiters, header/footer skipping, streaming, and error scenarios
+- **Error Handling**: 3 tests for invalid arguments, missing parameters, and CLI error conditions
+- **Integration Scenarios**: 3 tests for data analysis, transformation, and multi-format workflows
+#### 📚 Documentation & Examples
+- **E2E Testing Guide**: Created comprehensive documentation (`docs/e2e_testing_coverage.md`) explaining test coverage and usage
+- **Real-World Examples**: Tests serve as practical examples of library usage patterns
+- **Error Scenario Coverage**: Comprehensive testing of edge cases and failure conditions
+### 2025.1.1 (2025-08-XX)
 #### 🔧 Code Quality Improvements
 - **Refactored Complex Regex Logic**: Extracted Windows drive letter validation logic from `_check_dangerous_characters` into a dedicated `_is_valid_windows_drive_pattern` helper method in `PathValidator` for better readability and maintainability
@@ -285,7 +315,7 @@ The project follows strict coding standards:
 - **StringTokenizer**: Core string parsing functionality
 #### 🧪 Testing & Quality
-- **Comprehensive Test Suite**: 250+ tests with 90%+ code coverage
+- **Comprehensive Test Suite**: 250+ tests with 85%+ coverage gate
 - **Cross-Platform Testing**: Tested on Windows, Linux, and macOS
 - **Type Safety**: Full type annotations throughout the codebase
 - **Error Handling**: Custom exception hierarchy with detailed error messages

splurge_dsv-2025.1.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+splurge_dsv/__init__.py,sha256=YaY3yCQF5bNVO76yaITvqggFOg3QkpkY3CjtkkLf-C8,2415
+splurge_dsv/__main__.py,sha256=RkXcU3P3tHpFa6iGJfx8sAZ4aUcz6z7TIN_1q_5N9xE,354
+splurge_dsv/cli.py,sha256=hFchXI5DdLTpfdOgvh0Unq_nvsN8YNzJg86DgygdLrg,5866
+splurge_dsv/dsv_helper.py,sha256=VRq2ejx6y-JBtFKJdAaK-GD4V0eoxZZfmoX0I3CSbDI,9428
+splurge_dsv/exceptions.py,sha256=cu9Jd2pGhy7GBbbngH6zs0lfZzLp_OvGwbnsG5khp80,3035
+splurge_dsv/path_validator.py,sha256=RuRFjtHWE1Z5-DlSBBZMoHJegQEFYEQ0HJgN7ndre2k,9969
+splurge_dsv/resource_manager.py,sha256=g4igv1hhJdPtw4A1P-WdaNKcYMfEGCtZ4xaU4ynVbKw,12045
+splurge_dsv/string_tokenizer.py,sha256=wBKWdi68rreTqf3RF-8Oxh1nz6QdnbOyWflw2x8pGWY,4022
+splurge_dsv/text_file_helper.py,sha256=vdhEv9uK0hsNXvTO5SoWwFsI9fPkhdIUXuaJs80DVDA,13573
+splurge_dsv-2025.1.3.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
+splurge_dsv-2025.1.3.dist-info/METADATA,sha256=yKp4mvzuf18nvn2gD6iDBokkVHMqTiX5tfpYuqWtWk8,14298
+splurge_dsv-2025.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+splurge_dsv-2025.1.3.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
+splurge_dsv-2025.1.3.dist-info/RECORD,,

splurge_dsv-2025.1.1.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-splurge_dsv/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-splurge_dsv/__main__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-splurge_dsv/dsv_helper.py,sha256=gv9Wwf_soqdXuZWkBpxSyvJVqryKDfEoSf9SoAaRX1A,9651
-splurge_dsv/exceptions.py,sha256=tPVLXxf8LPc0yd8L8xjik331B4-NUVYfyd6ifPLUtm4,3052
-splurge_dsv/path_validator.py,sha256=2vXsNnmRTLbdYedDrF5kQZsOUvfLhSxkXyRj26OZyj8,10036
-splurge_dsv/resource_manager.py,sha256=fldvZQpzznANf4ahHre_lSqQ5_FUWndFuSlyccBylGE,13013
-splurge_dsv/string_tokenizer.py,sha256=GDAtJht-d9vt8Kb4yglxfl2iMfSMWGT3esksISNklMU,4203
-splurge_dsv/text_file_helper.py,sha256=htYT1Z0aKuQt24X2IfSKnFHB1AOY3dV7lRU8Vr6iqn0,14237
-splurge_dsv-2025.1.1.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
-splurge_dsv-2025.1.1.dist-info/METADATA,sha256=R9-qCXkrt-orUGkLNJbnc-cGLY67B8P1uJFTyjmIJoY,12245
-splurge_dsv-2025.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-splurge_dsv-2025.1.1.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
-splurge_dsv-2025.1.1.dist-info/RECORD,,

{splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{splurge_dsv-2025.1.1.dist-info → splurge_dsv-2025.1.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

splurge-dsv 2025.1.1__py3-none-any.whl → 2025.1.3__py3-none-any.whl

splurge-dsv 2025.1.1py3-none-any.whl → 2025.1.3py3-none-any.whl