PyPI - splurge-dsv - Versions diffs - 2025.1.0__py3-none-any.whl - Mend

splurge-dsv 2025.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

splurge_dsv/__init__.py +0 -0
splurge_dsv/__main__.py +0 -0
splurge_dsv/dsv_helper.py +263 -0
splurge_dsv/exceptions.py +123 -0
splurge_dsv/path_validator.py +262 -0
splurge_dsv/resource_manager.py +432 -0
splurge_dsv/string_tokenizer.py +136 -0
splurge_dsv/text_file_helper.py +343 -0
splurge_dsv-2025.1.0.dist-info/METADATA +292 -0
splurge_dsv-2025.1.0.dist-info/RECORD +13 -0
splurge_dsv-2025.1.0.dist-info/WHEEL +5 -0
splurge_dsv-2025.1.0.dist-info/licenses/LICENSE +21 -0
splurge_dsv-2025.1.0.dist-info/top_level.txt +1 -0

splurge_dsv/__init__.py ADDED Viewed

File without changes

splurge_dsv/__main__.py ADDED Viewed

File without changes

splurge_dsv/dsv_helper.py ADDED Viewed

@@ -0,0 +1,263 @@
+"""
+A utility module for working with DSV (Delimited String Values) files.
+Copyright (c) 2025 Jim Schilling
+Please preserve this header and all related material when sharing!
+This module is licensed under the MIT License.
+"""
+from os import PathLike
+from typing import Iterator
+from splurge_dsv.string_tokenizer import StringTokenizer
+from splurge_dsv.text_file_helper import TextFileHelper
+from splurge_dsv.exceptions import SplurgeParameterError
+class DsvHelper:
+    """
+    Utility class for working with DSV (Delimited String Values) files.
+    Provides methods to parse DSV content from strings, lists of strings, and files.
+    Supports configurable delimiters, text bookends, and whitespace handling options.
+    """
+    DEFAULT_CHUNK_SIZE = 500  # Default chunk size for streaming operations
+    DEFAULT_ENCODING = "utf-8"  # Default text encoding for file operations
+    DEFAULT_SKIP_HEADER_ROWS = 0  # Default number of header rows to skip
+    DEFAULT_SKIP_FOOTER_ROWS = 0  # Default number of footer rows to skip
+    DEFAULT_MIN_CHUNK_SIZE = 100
+    DEFAULT_STRIP = True
+    DEFAULT_BOOKEND_STRIP = True
+    @staticmethod
+    def parse(
+        content: str,
+        *,
+        delimiter: str,
+        strip: bool = DEFAULT_STRIP,
+        bookend: str | None = None,
+        bookend_strip: bool = DEFAULT_BOOKEND_STRIP
+    ) -> list[str]:
+        """
+        Parse a string into a list of strings.
+        Args:
+            content (str): The string to parse.
+            delimiter (str): The delimiter to use.
+            strip (bool): Whether to strip whitespace from the strings.
+            bookend (str | None): The bookend to use for text fields.
+            bookend_strip (bool): Whether to strip whitespace from the bookend.
+        Returns:
+            list[str]: The list of strings.
+        Raises:
+            SplurgeParameterError: If delimiter is empty or None.
+        Example:
+            >>> DsvHelper.parse("a,b,c", delimiter=",")
+            ['a', 'b', 'c']
+            >>> DsvHelper.parse('"a","b","c"', delimiter=",", bookend='"')
+            ['a', 'b', 'c']
+        """
+        if delimiter is None or delimiter == "":
+            raise SplurgeParameterError("delimiter cannot be empty or None")
+        tokens: list[str] = StringTokenizer.parse(content, delimiter=delimiter, strip=strip)
+        if bookend:
+            tokens = [
+                StringTokenizer.remove_bookends(token, bookend=bookend, strip=bookend_strip)
+                for token in tokens
+            ]
+        return tokens
+    @classmethod
+    def parses(
+        cls,
+        content: list[str],
+        *,
+        delimiter: str,
+        strip: bool = DEFAULT_STRIP,
+        bookend: str | None = None,
+        bookend_strip: bool = DEFAULT_BOOKEND_STRIP
+    ) -> list[list[str]]:
+        """
+        Parse a list of strings into a list of lists of strings.
+        Args:
+            content (list[str]): The list of strings to parse.
+            delimiter (str): The delimiter to use.
+            strip (bool): Whether to strip whitespace from the strings.
+            bookend (str | None): The bookend to use for text fields.
+            bookend_strip (bool): Whether to strip whitespace from the bookend.
+        Returns:
+            list[list[str]]: The list of lists of strings.
+        Raises:
+            SplurgeParameterError: If delimiter is empty or None.
+            SplurgeParameterError: If content is not a list of strings.
+        Example:
+            >>> DsvHelper.parses(["a,b,c", "d,e,f"], delimiter=",")
+            [['a', 'b', 'c'], ['d', 'e', 'f']]
+        """
+        if not isinstance(content, list):
+            raise SplurgeParameterError("content must be a list")
+        if not all(isinstance(item, str) for item in content):
+            raise SplurgeParameterError("content must be a list of strings")
+        return [
+            cls.parse(item, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip)
+            for item in content
+        ]
+    @classmethod
+    def parse_file(
+        cls,
+        file_path: PathLike[str] | str,
+        *,
+        delimiter: str,
+        strip: bool = DEFAULT_STRIP,
+        bookend: str | None = None,
+        bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
+        encoding: str = DEFAULT_ENCODING,
+        skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
+        skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS
+    ) -> list[list[str]]:
+        """
+        Parse a file into a list of lists of strings.
+        Args:
+            file_path (PathLike[str] | str): The path to the file to parse.
+            delimiter (str): The delimiter to use.
+            strip (bool): Whether to strip whitespace from the strings.
+            bookend (str | None): The bookend to use for text fields.
+            bookend_strip (bool): Whether to strip whitespace from the bookend.
+            encoding (str): The file encoding.
+            skip_header_rows (int): Number of header rows to skip.
+            skip_footer_rows (int): Number of footer rows to skip.
+        Returns:
+            list[list[str]]: The list of lists of strings.
+        Raises:
+            SplurgeParameterError: If delimiter is empty or None.
+            SplurgeFileNotFoundError: If the file does not exist.
+            SplurgeFilePermissionError: If the file cannot be accessed.
+            SplurgeFileEncodingError: If the file cannot be decoded with the specified encoding.
+        Example:
+            >>> DsvHelper.parse_file("data.csv", delimiter=",")
+            [['header1', 'header2'], ['value1', 'value2']]
+        """
+        lines: list[str] = TextFileHelper.read(
+            file_path,
+            encoding=encoding,
+            skip_header_rows=skip_header_rows,
+            skip_footer_rows=skip_footer_rows
+        )
+        return cls.parses(
+            lines,
+            delimiter=delimiter,
+            strip=strip,
+            bookend=bookend,
+            bookend_strip=bookend_strip
+        )
+    @classmethod
+    def _process_stream_chunk(
+        cls,
+        chunk: list[str],
+        *,
+        delimiter: str,
+        strip: bool = DEFAULT_STRIP,
+        bookend: str | None = None,
+        bookend_strip: bool = DEFAULT_BOOKEND_STRIP
+    ) -> list[list[str]]:
+        """
+        Process a chunk of lines from the stream.
+        Args:
+            chunk: List of lines to process
+            delimiter: Delimiter to use for parsing
+            strip: Whether to strip whitespace
+            bookend: Bookend character for text fields
+            bookend_strip: Whether to strip whitespace from bookends
+        Returns:
+            list[list[str]]: Parsed rows
+        """
+        return cls.parses(
+            chunk,
+            delimiter=delimiter,
+            strip=strip,
+            bookend=bookend,
+            bookend_strip=bookend_strip
+        )
+    @classmethod
+    def parse_stream(
+        cls,
+        file_path: PathLike[str] | str,
+        *,
+        delimiter: str,
+        strip: bool = DEFAULT_STRIP,
+        bookend: str | None = None,
+        bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
+        encoding: str = DEFAULT_ENCODING,
+        skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
+        skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
+        chunk_size: int = DEFAULT_CHUNK_SIZE
+    ) -> Iterator[list[list[str]]]:
+        """
+        Stream-parse a DSV file in chunks of lines.
+        Args:
+            file_path (PathLike[str] | str): The path to the file to parse.
+            delimiter (str): The delimiter to use.
+            strip (bool): Whether to strip whitespace from the strings.
+            bookend (str | None): The bookend to use for text fields.
+            bookend_strip (bool): Whether to strip whitespace from the bookend.
+            encoding (str): The file encoding.
+            skip_header_rows (int): Number of header rows to skip.
+            skip_footer_rows (int): Number of footer rows to skip.
+            chunk_size (int): Number of lines per chunk (default: 100).
+        Yields:
+            list[list[str]]: Parsed rows for each chunk.
+        Raises:
+            SplurgeParameterError: If delimiter is empty or None.
+            SplurgeFileNotFoundError: If the file does not exist.
+            SplurgeFilePermissionError: If the file cannot be accessed.
+            SplurgeFileEncodingError: If the file cannot be decoded with the specified encoding.
+        """
+        if delimiter is None or delimiter == "":
+            raise SplurgeParameterError("delimiter cannot be empty or None")
+        chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
+        skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
+        skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
+        # Use TextFileHelper.read_as_stream for consistent error handling
+        for chunk in TextFileHelper.read_as_stream(
+            file_path,
+            encoding=encoding,
+            skip_header_rows=skip_header_rows,
+            skip_footer_rows=skip_footer_rows,
+            chunk_size=chunk_size
+        ):
+            yield cls._process_stream_chunk(
+                chunk,
+                delimiter=delimiter,
+                strip=strip,
+                bookend=bookend,
+                bookend_strip=bookend_strip
+            )

splurge_dsv/exceptions.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""
+Custom exceptions for the splurge-dsv package.
+This module provides a hierarchy of custom exceptions for better error handling
+and more specific error messages throughout the package.
+Copyright (c) 2025 Jim Schilling
+Please preserve this header and all related material when sharing!
+This module is licensed under the MIT License.
+"""
+class SplurgeDsvError(Exception):
+    """Base exception for all splurge-dsv errors."""
+    def __init__(
+        self,
+        message: str,
+        *,
+        details: str | None = None
+    ) -> None:
+        """
+        Initialize SplurgeDsvError.
+        Args:
+            message: Primary error message
+            details: Additional error details
+        """
+        self.message = message
+        self.details = details
+        super().__init__(self.message)
+class SplurgeValidationError(SplurgeDsvError):
+    """Raised when data validation fails."""
+    pass
+class SplurgeFileOperationError(SplurgeDsvError):
+    """Base exception for file operation errors."""
+    pass
+class SplurgeFileNotFoundError(SplurgeFileOperationError):
+    """Raised when a file is not found."""
+    pass
+class SplurgeFilePermissionError(SplurgeFileOperationError):
+    """Raised when there are permission issues with file operations."""
+    pass
+class SplurgeFileEncodingError(SplurgeFileOperationError):
+    """Raised when there are encoding issues with file operations."""
+    pass
+class SplurgePathValidationError(SplurgeFileOperationError):
+    """Raised when file path validation fails."""
+    pass
+class SplurgeDataProcessingError(SplurgeDsvError):
+    """Base exception for data processing errors."""
+    pass
+class SplurgeParsingError(SplurgeDataProcessingError):
+    """Raised when data parsing fails."""
+    pass
+class SplurgeTypeConversionError(SplurgeDataProcessingError):
+    """Raised when type conversion fails."""
+    pass
+class SplurgeStreamingError(SplurgeDataProcessingError):
+    """Raised when streaming operations fail."""
+    pass
+class SplurgeConfigurationError(SplurgeDsvError):
+    """Raised when configuration is invalid."""
+    pass
+class SplurgeResourceError(SplurgeDsvError):
+    """Base exception for resource management errors."""
+    pass
+class SplurgeResourceAcquisitionError(SplurgeResourceError):
+    """Raised when resource acquisition fails."""
+    pass
+class SplurgeResourceReleaseError(SplurgeResourceError):
+    """Raised when resource release fails."""
+    pass
+class SplurgePerformanceWarning(SplurgeDsvError):
+    """Warning for performance-related issues."""
+    pass
+class SplurgeParameterError(SplurgeValidationError):
+    """Raised when function parameters are invalid."""
+    pass
+class SplurgeRangeError(SplurgeValidationError):
+    """Raised when values are outside expected ranges."""
+    pass
+class SplurgeFormatError(SplurgeValidationError):
+    """Raised when data format is invalid."""
+    pass

splurge_dsv/path_validator.py ADDED Viewed

@@ -0,0 +1,262 @@
+"""
+File path validation utilities for secure file operations.
+This module provides utilities for validating file paths to prevent
+path traversal attacks and ensure secure file operations.
+Copyright (c) 2025 Jim Schilling
+Please preserve this header and all related material when sharing!
+This module is licensed under the MIT License.
+"""
+import os
+import re
+from pathlib import Path
+from splurge_dsv.exceptions import (
+    SplurgePathValidationError,
+    SplurgeFileNotFoundError,
+    SplurgeFilePermissionError
+)
+# Module-level constants for path validation
+_MAX_PATH_LENGTH = 4096  # Maximum path length for most filesystems
+_DEFAULT_FILENAME = "unnamed_file"  # Default filename when sanitization results in empty string
+class PathValidator:
+    """
+    Utility class for validating file paths securely.
+    This class provides methods to validate file paths and prevent
+    path traversal attacks and other security vulnerabilities.
+    """
+    # Private constants for path validation
+    _PATH_TRAVERSAL_PATTERNS = [
+        r'\.\.',  # Directory traversal
+        r'//+',   # Multiple forward slashes (including //)
+        r'\\{2,}',  # Two or more consecutive backslashes (not normal Windows paths)
+        r'~',     # Home directory expansion
+    ]
+    _DANGEROUS_CHARS = [
+        '<', '>', '"', '|', '?', '*',  # Windows reserved characters (excluding ':' for drive letters)
+        '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',  # Control characters
+        '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
+        '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+        '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+    ]
+    MAX_PATH_LENGTH = _MAX_PATH_LENGTH
+    @classmethod
+    def validate_path(
+        cls,
+        file_path: str | Path,
+        *,
+        must_exist: bool = False,
+        must_be_file: bool = False,
+        must_be_readable: bool = False,
+        allow_relative: bool = True,
+        base_directory: str | Path | None = None
+    ) -> Path:
+        """
+        Validate a file path for security and correctness.
+        Args:
+            file_path: Path to validate
+            must_exist: Whether the file must exist
+            must_be_file: Whether the path must be a file (not directory)
+            must_be_readable: Whether the file must be readable
+            allow_relative: Whether to allow relative paths
+            base_directory: Base directory for relative path resolution
+        Returns:
+            Normalized Path object
+        Raises:
+            SplurgePathValidationError: If path validation fails
+            SplurgeFileNotFoundError: If file doesn't exist when required
+            SplurgeFilePermissionError: If file is not readable when required
+        """
+        # Convert to Path object
+        path = Path(file_path) if isinstance(file_path, str) else file_path
+        # Get the original string for validation (before Path normalization)
+        path_str = str(file_path) if isinstance(file_path, str) else str(path)
+        # Check for dangerous characters
+        cls._check_dangerous_characters(path_str)
+        # Check for path traversal patterns
+        cls._check_path_traversal(path_str)
+        # Check path length
+        cls._check_path_length(path_str)
+        # Handle relative paths
+        if not path.is_absolute() and not allow_relative:
+            raise SplurgePathValidationError(
+                f"Relative paths are not allowed: {path}",
+                details="Set allow_relative=True to allow relative paths"
+            )
+        # Resolve path (handles symlinks and normalizes)
+        try:
+            if base_directory:
+                base_path = Path(base_directory).resolve()
+                if not path.is_absolute():
+                    resolved_path = (base_path / path).resolve()
+                else:
+                    resolved_path = path.resolve()
+                # Ensure resolved path is within base directory
+                try:
+                    resolved_path.relative_to(base_path)
+                except ValueError:
+                    raise SplurgePathValidationError(
+                        f"Path {path} resolves outside base directory {base_directory}",
+                        details="Path traversal detected"
+                    )
+            else:
+                resolved_path = path.resolve()
+        except (OSError, RuntimeError) as e:
+            raise SplurgePathValidationError(
+                f"Failed to resolve path {path}: {e}",
+                details="Check if path contains invalid characters or symlinks"
+            )
+        # Check if file exists
+        if must_exist and not resolved_path.exists():
+            raise SplurgeFileNotFoundError(
+                f"File does not exist: {resolved_path}",
+                details="Set must_exist=False to allow non-existent files"
+            )
+        # Check if it's a file (not directory)
+        if must_be_file and resolved_path.exists() and not resolved_path.is_file():
+            raise SplurgePathValidationError(
+                f"Path is not a file: {resolved_path}",
+                details="Path exists but is not a regular file"
+            )
+        # Check if file is readable
+        if must_be_readable:
+            if not resolved_path.exists():
+                raise SplurgeFileNotFoundError(
+                    f"Cannot check readability of non-existent file: {resolved_path}",
+                    details="File must exist to check readability"
+                )
+            if not os.access(resolved_path, os.R_OK):
+                raise SplurgeFilePermissionError(
+                    f"File is not readable: {resolved_path}",
+                    details="Check file permissions"
+                )
+        return resolved_path
+    @classmethod
+    def _is_valid_windows_drive_pattern(cls, path_str: str) -> bool:
+        """
+        Check if a path string contains a valid Windows drive letter pattern.
+        Args:
+            path_str: Path string to validate
+        Returns:
+            True if the path contains a valid Windows drive letter pattern,
+            False otherwise
+        """
+        # Must be C: at the end of the string, or C:\ (or C:/) followed by path
+        return (re.match(r'^[A-Za-z]:$', path_str) or
+                re.match(r'^[A-Za-z]:[\\/]', path_str))
+    @classmethod
+    def _check_dangerous_characters(cls, path_str: str) -> None:
+        """Check for dangerous characters in path string."""
+        # Check for dangerous characters, but allow colons in Windows drive letters
+        for char in cls._DANGEROUS_CHARS:
+            if char in path_str:
+                raise SplurgePathValidationError(
+                    f"Path contains dangerous character: {repr(char)}",
+                    details=f"Character at position {path_str.find(char)}"
+                )
+        # Special handling for colons - only allow them in Windows drive letters (e.g., C:)
+        if ':' in path_str:
+            if not cls._is_valid_windows_drive_pattern(path_str):
+                raise SplurgePathValidationError(
+                    "Path contains colon in invalid position",
+                    details="Colons are only allowed in Windows drive letters (e.g., C: or C:\\)"
+                )
+    @classmethod
+    def _check_path_traversal(cls, path_str: str) -> None:
+        """Check for path traversal patterns."""
+        for pattern in cls._PATH_TRAVERSAL_PATTERNS:
+            if re.search(pattern, path_str):
+                raise SplurgePathValidationError(
+                    f"Path contains traversal pattern: {pattern}",
+                    details="Path traversal attacks are not allowed"
+                )
+    @classmethod
+    def _check_path_length(cls, path_str: str) -> None:
+        """Check if path length is within acceptable limits."""
+        if len(path_str) > cls.MAX_PATH_LENGTH:
+            raise SplurgePathValidationError(
+                f"Path is too long: {len(path_str)} characters",
+                details=f"Maximum allowed length is {cls.MAX_PATH_LENGTH} characters"
+            )
+    @classmethod
+    def sanitize_filename(cls, filename: str) -> str:
+        """
+        Sanitize a filename by removing dangerous characters.
+        Args:
+            filename: Original filename
+        Returns:
+            Sanitized filename
+        """
+        # Remove or replace dangerous characters
+        sanitized = filename
+        # Replace Windows reserved characters
+        for char in ['<', '>', ':', '"', '|', '?', '*']:
+            sanitized = sanitized.replace(char, '_')
+        # Remove control characters
+        sanitized = ''.join(char for char in sanitized if ord(char) >= 32)
+        # Remove leading/trailing spaces and dots
+        sanitized = sanitized.strip(' .')
+        # Ensure filename is not empty
+        if not sanitized:
+            sanitized = _DEFAULT_FILENAME
+        return sanitized
+    @classmethod
+    def is_safe_path(cls, file_path: str | Path) -> bool:
+        """
+        Check if a path is safe without raising exceptions.
+        Args:
+            file_path: Path to check
+        Returns:
+            True if path is safe, False otherwise
+        """
+        try:
+            cls.validate_path(file_path)
+            return True
+        except (SplurgePathValidationError, SplurgeFileNotFoundError, SplurgeFilePermissionError):
+            return False