splurge-dsv 2025.1.5__py3-none-any.whl → 2025.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splurge_dsv/__init__.py +70 -58
- splurge_dsv/__main__.py +10 -6
- splurge_dsv/cli.py +47 -43
- splurge_dsv/dsv.py +256 -0
- splurge_dsv/dsv_helper.py +63 -52
- splurge_dsv/exceptions.py +92 -75
- splurge_dsv/path_validator.py +49 -36
- splurge_dsv/safe_text_file_reader.py +177 -0
- splurge_dsv/safe_text_file_writer.py +136 -0
- splurge_dsv/string_tokenizer.py +34 -26
- splurge_dsv/text_file_helper.py +96 -177
- splurge_dsv-2025.2.0.dist-info/METADATA +217 -0
- splurge_dsv-2025.2.0.dist-info/RECORD +17 -0
- splurge_dsv-2025.2.0.dist-info/entry_points.txt +2 -0
- splurge_dsv/resource_manager.py +0 -371
- splurge_dsv-2025.1.5.dist-info/METADATA +0 -268
- splurge_dsv-2025.1.5.dist-info/RECORD +0 -14
- {splurge_dsv-2025.1.5.dist-info → splurge_dsv-2025.2.0.dist-info}/WHEEL +0 -0
- {splurge_dsv-2025.1.5.dist-info → splurge_dsv-2025.2.0.dist-info}/licenses/LICENSE +0 -0
- {splurge_dsv-2025.1.5.dist-info → splurge_dsv-2025.2.0.dist-info}/top_level.txt +0 -0
splurge_dsv/dsv_helper.py
CHANGED
@@ -13,7 +13,7 @@ from collections.abc import Iterator
|
|
13
13
|
from os import PathLike
|
14
14
|
|
15
15
|
# Local imports
|
16
|
-
from splurge_dsv.exceptions import
|
16
|
+
from splurge_dsv.exceptions import SplurgeDsvParameterError
|
17
17
|
from splurge_dsv.string_tokenizer import StringTokenizer
|
18
18
|
from splurge_dsv.text_file_helper import TextFileHelper
|
19
19
|
|
@@ -43,30 +43,34 @@ class DsvHelper:
|
|
43
43
|
bookend: str | None = None,
|
44
44
|
bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
|
45
45
|
) -> list[str]:
|
46
|
-
"""
|
47
|
-
|
46
|
+
"""Parse a single DSV line into tokens.
|
47
|
+
|
48
|
+
This method tokenizes a single line of DSV text using the provided
|
49
|
+
``delimiter``. It optionally strips surrounding whitespace from each
|
50
|
+
token and may remove configured bookend characters (for example,
|
51
|
+
double-quotes used around fields).
|
48
52
|
|
49
53
|
Args:
|
50
|
-
content
|
51
|
-
delimiter
|
52
|
-
strip
|
53
|
-
bookend
|
54
|
-
bookend_strip
|
54
|
+
content: The input line to tokenize.
|
55
|
+
delimiter: A single-character delimiter string (e.g. "," or "\t").
|
56
|
+
strip: If True, strip leading/trailing whitespace from each token.
|
57
|
+
bookend: Optional bookend character to remove from token ends.
|
58
|
+
bookend_strip: If True, strip whitespace after removing bookends.
|
55
59
|
|
56
60
|
Returns:
|
57
|
-
|
61
|
+
A list of parsed token strings.
|
58
62
|
|
59
63
|
Raises:
|
60
|
-
|
64
|
+
SplurgeDsvParameterError: If ``delimiter`` is empty or None.
|
61
65
|
|
62
|
-
|
66
|
+
Examples:
|
63
67
|
>>> DsvHelper.parse("a,b,c", delimiter=",")
|
64
68
|
['a', 'b', 'c']
|
65
69
|
>>> DsvHelper.parse('"a","b","c"', delimiter=",", bookend='"')
|
66
70
|
['a', 'b', 'c']
|
67
71
|
"""
|
68
72
|
if delimiter is None or delimiter == "":
|
69
|
-
raise
|
73
|
+
raise SplurgeDsvParameterError("delimiter cannot be empty or None")
|
70
74
|
|
71
75
|
tokens: list[str] = StringTokenizer.parse(content, delimiter=delimiter, strip=strip)
|
72
76
|
|
@@ -85,32 +89,34 @@ class DsvHelper:
|
|
85
89
|
bookend: str | None = None,
|
86
90
|
bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
|
87
91
|
) -> list[list[str]]:
|
88
|
-
"""
|
89
|
-
|
92
|
+
"""Parse multiple DSV lines.
|
93
|
+
|
94
|
+
Given a list of lines (for example, the result of reading a file),
|
95
|
+
return a list where each element is the list of tokens for that line.
|
90
96
|
|
91
97
|
Args:
|
92
|
-
content
|
93
|
-
delimiter
|
94
|
-
strip
|
95
|
-
bookend
|
96
|
-
bookend_strip
|
98
|
+
content: A list of input lines to parse.
|
99
|
+
delimiter: Delimiter used to split each line.
|
100
|
+
strip: If True, strip whitespace from tokens.
|
101
|
+
bookend: Optional bookend character to remove from tokens.
|
102
|
+
bookend_strip: If True, strip whitespace after removing bookends.
|
97
103
|
|
98
104
|
Returns:
|
99
|
-
|
105
|
+
A list of token lists, one per input line.
|
100
106
|
|
101
107
|
Raises:
|
102
|
-
|
103
|
-
|
108
|
+
SplurgeDsvParameterError: If ``content`` is not a list of strings or
|
109
|
+
if ``delimiter`` is empty or None.
|
104
110
|
|
105
111
|
Example:
|
106
112
|
>>> DsvHelper.parses(["a,b,c", "d,e,f"], delimiter=",")
|
107
113
|
[['a', 'b', 'c'], ['d', 'e', 'f']]
|
108
114
|
"""
|
109
115
|
if not isinstance(content, list):
|
110
|
-
raise
|
116
|
+
raise SplurgeDsvParameterError("content must be a list")
|
111
117
|
|
112
118
|
if not all(isinstance(item, str) for item in content):
|
113
|
-
raise
|
119
|
+
raise SplurgeDsvParameterError("content must be a list of strings")
|
114
120
|
|
115
121
|
return [
|
116
122
|
cls.parse(item, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip)
|
@@ -130,31 +136,33 @@ class DsvHelper:
|
|
130
136
|
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
131
137
|
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
|
132
138
|
) -> list[list[str]]:
|
133
|
-
"""
|
134
|
-
|
139
|
+
"""Read and parse an entire DSV file.
|
140
|
+
|
141
|
+
This convenience reads all lines from ``file_path`` using
|
142
|
+
:class:`splurge_dsv.text_file_helper.TextFileHelper` and then parses each
|
143
|
+
line into tokens. Header and footer rows may be skipped via the
|
144
|
+
``skip_header_rows`` and ``skip_footer_rows`` parameters.
|
135
145
|
|
136
146
|
Args:
|
137
|
-
file_path
|
138
|
-
delimiter
|
139
|
-
strip
|
140
|
-
bookend
|
141
|
-
bookend_strip
|
142
|
-
encoding
|
143
|
-
skip_header_rows
|
144
|
-
skip_footer_rows
|
147
|
+
file_path: Path to the file to read.
|
148
|
+
delimiter: Delimiter to split fields on.
|
149
|
+
strip: If True, strip whitespace from tokens.
|
150
|
+
bookend: Optional bookend character to remove from tokens.
|
151
|
+
bookend_strip: If True, strip whitespace after removing bookends.
|
152
|
+
encoding: Text encoding to use when reading the file.
|
153
|
+
skip_header_rows: Number of leading lines to ignore.
|
154
|
+
skip_footer_rows: Number of trailing lines to ignore.
|
145
155
|
|
146
156
|
Returns:
|
147
|
-
|
157
|
+
A list of token lists (one list per non-skipped line).
|
148
158
|
|
149
159
|
Raises:
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
>>> DsvHelper.parse_file("data.csv", delimiter=",")
|
157
|
-
[['header1', 'header2'], ['value1', 'value2']]
|
160
|
+
SplurgeDsvParameterError: If ``delimiter`` is empty or None.
|
161
|
+
SplurgeDsvFileNotFoundError: If the file at ``file_path`` does not exist.
|
162
|
+
SplurgeDsvFilePermissionError: If the file cannot be accessed due to
|
163
|
+
permission restrictions.
|
164
|
+
SplurgeDsvFileEncodingError: If the file cannot be decoded using
|
165
|
+
the provided ``encoding``.
|
158
166
|
"""
|
159
167
|
lines: list[str] = TextFileHelper.read(
|
160
168
|
file_path, encoding=encoding, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows
|
@@ -172,18 +180,21 @@ class DsvHelper:
|
|
172
180
|
bookend: str | None = None,
|
173
181
|
bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
|
174
182
|
) -> list[list[str]]:
|
175
|
-
"""
|
176
|
-
|
183
|
+
"""Parse a chunk of lines into tokenized rows.
|
184
|
+
|
185
|
+
Designed to be used by :meth:`parse_stream` as a helper for converting a
|
186
|
+
batch of raw lines into parsed rows.
|
177
187
|
|
178
188
|
Args:
|
179
|
-
chunk:
|
180
|
-
delimiter: Delimiter to
|
181
|
-
strip:
|
182
|
-
bookend:
|
183
|
-
bookend_strip:
|
189
|
+
chunk: A list of raw input lines.
|
190
|
+
delimiter: Delimiter used to split each line.
|
191
|
+
strip: If True, strip whitespace from tokens.
|
192
|
+
bookend: Optional bookend character to remove from tokens.
|
193
|
+
bookend_strip: If True, strip whitespace after removing bookends.
|
184
194
|
|
185
195
|
Returns:
|
186
|
-
list
|
196
|
+
A list where each element is the token list for a corresponding
|
197
|
+
input line from ``chunk``.
|
187
198
|
"""
|
188
199
|
return cls.parses(chunk, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip)
|
189
200
|
|
@@ -225,7 +236,7 @@ class DsvHelper:
|
|
225
236
|
SplurgeFileEncodingError: If the file cannot be decoded with the specified encoding.
|
226
237
|
"""
|
227
238
|
if delimiter is None or delimiter == "":
|
228
|
-
raise
|
239
|
+
raise SplurgeDsvParameterError("delimiter cannot be empty or None")
|
229
240
|
|
230
241
|
chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
|
231
242
|
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
splurge_dsv/exceptions.py
CHANGED
@@ -1,136 +1,153 @@
|
|
1
|
-
"""
|
2
|
-
Custom exceptions for the splurge-dsv package.
|
1
|
+
"""Custom exceptions used across the splurge-dsv package.
|
3
2
|
|
4
|
-
This module
|
5
|
-
|
3
|
+
This module defines a clear exception hierarchy so callers can catch
|
4
|
+
specific error categories (file, validation, parsing, streaming, etc.)
|
5
|
+
instead of dealing with generic builtins. Each exception stores a
|
6
|
+
human-readable ``message`` and optional ``details`` for diagnostic output.
|
6
7
|
|
7
|
-
|
8
|
+
Module contents are intentionally lightweight: exceptions are primarily
|
9
|
+
containers for structured error information.
|
8
10
|
|
9
|
-
|
11
|
+
Example:
|
12
|
+
raise SplurgeDsvFileNotFoundError("File not found", details="/data/foo.csv")
|
10
13
|
|
11
|
-
|
14
|
+
License: MIT
|
15
|
+
|
16
|
+
Copyright (c) 2025 Jim Schilling
|
12
17
|
"""
|
13
18
|
|
14
19
|
|
15
20
|
class SplurgeDsvError(Exception):
|
16
|
-
"""Base exception
|
21
|
+
"""Base exception carrying a message and optional details.
|
17
22
|
|
18
|
-
|
19
|
-
|
20
|
-
|
23
|
+
Args:
|
24
|
+
message: Primary error message to display to the user.
|
25
|
+
details: Optional machine-readable details useful for debugging.
|
26
|
+
|
27
|
+
Attributes:
|
28
|
+
message: User-facing error message.
|
29
|
+
details: Optional additional diagnostic information.
|
30
|
+
"""
|
21
31
|
|
22
|
-
|
23
|
-
message: Primary error message
|
24
|
-
details: Additional error details
|
25
|
-
"""
|
32
|
+
def __init__(self, message: str, *, details: str | None = None) -> None:
|
26
33
|
self.message = message
|
27
34
|
self.details = details
|
28
35
|
super().__init__(self.message)
|
29
36
|
|
30
37
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
pass
|
35
|
-
|
36
|
-
|
37
|
-
class SplurgeFileOperationError(SplurgeDsvError):
|
38
|
-
"""Base exception for file operation errors."""
|
39
|
-
|
40
|
-
pass
|
41
|
-
|
42
|
-
|
43
|
-
class SplurgeFileNotFoundError(SplurgeFileOperationError):
|
44
|
-
"""Raised when a file is not found."""
|
38
|
+
# New-style exception names. Use a SplurgeDsv* prefix to avoid colliding with
|
39
|
+
# Python builtins. We keep the Splurge* aliases for backward compatibility.
|
45
40
|
|
46
|
-
pass
|
47
41
|
|
42
|
+
class SplurgeDsvValidationError(SplurgeDsvError):
|
43
|
+
"""Raised when data validation fails.
|
48
44
|
|
49
|
-
|
50
|
-
|
45
|
+
This exception indicates input or configuration values do not meet
|
46
|
+
expected constraints (for example: invalid delimiter, out-of-range
|
47
|
+
parameters, or malformed metadata).
|
48
|
+
"""
|
51
49
|
|
52
|
-
pass
|
53
50
|
|
51
|
+
class SplurgeDsvFileOperationError(SplurgeDsvError):
|
52
|
+
"""Base exception for file operation errors.
|
54
53
|
|
55
|
-
|
56
|
-
|
54
|
+
Used as a parent for file-related conditions such as not found,
|
55
|
+
permission denied, or encoding issues.
|
56
|
+
"""
|
57
57
|
|
58
|
-
pass
|
59
58
|
|
59
|
+
class SplurgeDsvFileNotFoundError(SplurgeDsvFileOperationError):
|
60
|
+
"""Raised when an expected file cannot be located.
|
60
61
|
|
61
|
-
|
62
|
-
|
62
|
+
This typically maps to ``FileNotFoundError`` semantics but uses the
|
63
|
+
package-specific exception hierarchy so callers can distinguish
|
64
|
+
file errors from other error types.
|
65
|
+
"""
|
63
66
|
|
64
|
-
pass
|
65
67
|
|
68
|
+
class SplurgeDsvFilePermissionError(SplurgeDsvFileOperationError):
|
69
|
+
"""Raised for permission or access-related file errors.
|
66
70
|
|
67
|
-
|
68
|
-
|
71
|
+
For example, attempting to open a file without read permission will
|
72
|
+
raise this exception.
|
73
|
+
"""
|
69
74
|
|
70
|
-
pass
|
71
75
|
|
76
|
+
class SplurgeDsvFileEncodingError(SplurgeDsvFileOperationError):
|
77
|
+
"""Raised when decoding or encoding a text file fails.
|
72
78
|
|
73
|
-
|
74
|
-
|
79
|
+
The exception typically wraps the underlying decoding error and
|
80
|
+
provides a descriptive message and optional details for diagnostics.
|
81
|
+
"""
|
75
82
|
|
76
|
-
pass
|
77
83
|
|
84
|
+
class SplurgeDsvPathValidationError(SplurgeDsvFileOperationError):
|
85
|
+
"""Raised when a provided filesystem path fails validation checks.
|
78
86
|
|
79
|
-
|
80
|
-
|
87
|
+
Use this exception for path traversal, dangerous characters, or other
|
88
|
+
validation failures detected by the path validation utilities.
|
89
|
+
"""
|
81
90
|
|
82
|
-
pass
|
83
91
|
|
92
|
+
class SplurgeDsvDataProcessingError(SplurgeDsvError):
|
93
|
+
"""Base exception for errors that occur during data processing (parsing, conversion).
|
84
94
|
|
85
|
-
|
86
|
-
|
95
|
+
This groups parsing, type conversion, and streaming errors that occur
|
96
|
+
while transforming file content into structured data.
|
97
|
+
"""
|
87
98
|
|
88
|
-
pass
|
89
99
|
|
100
|
+
class SplurgeDsvParsingError(SplurgeDsvDataProcessingError):
|
101
|
+
"""Raised when parsing fails due to malformed or unexpected content."""
|
90
102
|
|
91
|
-
class SplurgeConfigurationError(SplurgeDsvError):
|
92
|
-
"""Raised when configuration is invalid."""
|
93
103
|
|
94
|
-
|
104
|
+
class SplurgeDsvTypeConversionError(SplurgeDsvDataProcessingError):
|
105
|
+
"""Raised when a value cannot be converted to the requested type."""
|
95
106
|
|
96
107
|
|
97
|
-
class
|
98
|
-
"""
|
108
|
+
class SplurgeDsvStreamingError(SplurgeDsvDataProcessingError):
|
109
|
+
"""Raised for errors during streaming (e.g., partial reads, IO interruptions)."""
|
99
110
|
|
100
|
-
pass
|
101
111
|
|
112
|
+
class SplurgeDsvConfigurationError(SplurgeDsvError):
|
113
|
+
"""Raised when an invalid configuration is provided to an API.
|
102
114
|
|
103
|
-
|
104
|
-
|
115
|
+
Examples include invalid chunk sizes, missing delimiters, or mutually
|
116
|
+
exclusive options supplied together.
|
117
|
+
"""
|
105
118
|
|
106
|
-
pass
|
107
119
|
|
120
|
+
class SplurgeDsvResourceError(SplurgeDsvError):
|
121
|
+
"""Base exception for resource acquisition and release errors."""
|
108
122
|
|
109
|
-
class SplurgeResourceReleaseError(SplurgeResourceError):
|
110
|
-
"""Raised when resource release fails."""
|
111
123
|
|
112
|
-
|
124
|
+
class SplurgeDsvResourceAcquisitionError(SplurgeDsvResourceError):
|
125
|
+
"""Raised when acquiring external resources (files, streams) fails."""
|
113
126
|
|
114
127
|
|
115
|
-
class
|
116
|
-
"""
|
128
|
+
class SplurgeDsvResourceReleaseError(SplurgeDsvResourceError):
|
129
|
+
"""Raised when releasing resources (closing files or handles) fails."""
|
117
130
|
|
118
|
-
pass
|
119
131
|
|
132
|
+
class SplurgeDsvPerformanceWarning(SplurgeDsvError):
|
133
|
+
"""Raised to indicate performance-related concerns that may need attention.
|
120
134
|
|
121
|
-
|
122
|
-
|
135
|
+
This is not a fatal error but can be used to signal suboptimal usage
|
136
|
+
patterns (for example, very small streaming chunk sizes) to callers.
|
137
|
+
"""
|
123
138
|
|
124
|
-
pass
|
125
139
|
|
140
|
+
class SplurgeDsvParameterError(SplurgeDsvValidationError):
|
141
|
+
"""Raised when a function or method receives invalid parameters.
|
126
142
|
|
127
|
-
|
128
|
-
|
143
|
+
Use this for invalid types, missing required values, or arguments that
|
144
|
+
violate expected constraints.
|
145
|
+
"""
|
129
146
|
|
130
|
-
pass
|
131
147
|
|
148
|
+
class SplurgeDsvRangeError(SplurgeDsvValidationError):
|
149
|
+
"""Raised when a value falls outside an expected numeric or length range."""
|
132
150
|
|
133
|
-
class SplurgeFormatError(SplurgeValidationError):
|
134
|
-
"""Raised when data format is invalid."""
|
135
151
|
|
136
|
-
|
152
|
+
class SplurgeDsvFormatError(SplurgeDsvValidationError):
|
153
|
+
"""Raised when the data format is invalid or cannot be parsed as expected."""
|
splurge_dsv/path_validator.py
CHANGED
@@ -17,7 +17,11 @@ import re
|
|
17
17
|
from pathlib import Path
|
18
18
|
|
19
19
|
# Local imports
|
20
|
-
from splurge_dsv.exceptions import
|
20
|
+
from splurge_dsv.exceptions import (
|
21
|
+
SplurgeDsvFileNotFoundError,
|
22
|
+
SplurgeDsvFilePermissionError,
|
23
|
+
SplurgeDsvPathValidationError,
|
24
|
+
)
|
21
25
|
|
22
26
|
# Module-level constants for path validation
|
23
27
|
_MAX_PATH_LENGTH = 4096 # Maximum path length for most filesystems
|
@@ -94,24 +98,27 @@ class PathValidator:
|
|
94
98
|
allow_relative: bool = True,
|
95
99
|
base_directory: str | Path | None = None,
|
96
100
|
) -> Path:
|
97
|
-
"""
|
98
|
-
|
101
|
+
"""Validate a filesystem path for security and correctness.
|
102
|
+
|
103
|
+
This is the central path validation routine used across the package.
|
99
104
|
|
100
105
|
Args:
|
101
|
-
file_path: Path to validate
|
102
|
-
must_exist:
|
103
|
-
must_be_file:
|
104
|
-
must_be_readable:
|
105
|
-
allow_relative:
|
106
|
-
base_directory:
|
106
|
+
file_path: Path or string to validate.
|
107
|
+
must_exist: If True, require the path to exist.
|
108
|
+
must_be_file: If True, require the path to be a regular file.
|
109
|
+
must_be_readable: If True, check read permission via os.access().
|
110
|
+
allow_relative: If False, disallow relative paths.
|
111
|
+
base_directory: Optional directory to resolve relative paths
|
112
|
+
against and to restrict the resolved path to.
|
107
113
|
|
108
114
|
Returns:
|
109
|
-
|
115
|
+
pathlib.Path: Resolved and normalized path.
|
110
116
|
|
111
117
|
Raises:
|
112
|
-
|
113
|
-
|
114
|
-
|
118
|
+
SplurgeDsvPathValidationError: If any validation rule fails.
|
119
|
+
SplurgeDsvFileNotFoundError: If must_exist is True and file is missing.
|
120
|
+
SplurgeDsvFilePermissionError: If must_be_readable is True and the
|
121
|
+
file is not readable.
|
115
122
|
"""
|
116
123
|
# Convert to Path object
|
117
124
|
path = Path(file_path) if isinstance(file_path, str) else file_path
|
@@ -130,7 +137,7 @@ class PathValidator:
|
|
130
137
|
|
131
138
|
# Handle relative paths
|
132
139
|
if not path.is_absolute() and not allow_relative:
|
133
|
-
raise
|
140
|
+
raise SplurgeDsvPathValidationError(
|
134
141
|
f"Relative paths are not allowed: {path}", details="Set allow_relative=True to allow relative paths"
|
135
142
|
)
|
136
143
|
|
@@ -147,39 +154,39 @@ class PathValidator:
|
|
147
154
|
try:
|
148
155
|
resolved_path.relative_to(base_path)
|
149
156
|
except ValueError:
|
150
|
-
raise
|
157
|
+
raise SplurgeDsvPathValidationError(
|
151
158
|
f"Path {path} resolves outside base directory {base_directory}",
|
152
159
|
details="Path traversal detected",
|
153
160
|
) from None
|
154
161
|
else:
|
155
162
|
resolved_path = path.resolve()
|
156
163
|
except (OSError, RuntimeError) as e:
|
157
|
-
raise
|
164
|
+
raise SplurgeDsvPathValidationError(
|
158
165
|
f"Failed to resolve path {path}: {e}", details="Check if path contains invalid characters or symlinks"
|
159
166
|
) from e
|
160
167
|
|
161
168
|
# Check if file exists
|
162
169
|
if must_exist and not resolved_path.exists():
|
163
|
-
raise
|
170
|
+
raise SplurgeDsvFileNotFoundError(
|
164
171
|
f"File does not exist: {resolved_path}", details="Set must_exist=False to allow non-existent files"
|
165
172
|
)
|
166
173
|
|
167
174
|
# Check if it's a file (not directory)
|
168
175
|
if must_be_file and resolved_path.exists() and not resolved_path.is_file():
|
169
|
-
raise
|
176
|
+
raise SplurgeDsvPathValidationError(
|
170
177
|
f"Path is not a file: {resolved_path}", details="Path exists but is not a regular file"
|
171
178
|
)
|
172
179
|
|
173
180
|
# Check if file is readable
|
174
181
|
if must_be_readable:
|
175
182
|
if not resolved_path.exists():
|
176
|
-
raise
|
183
|
+
raise SplurgeDsvFileNotFoundError(
|
177
184
|
f"Cannot check readability of non-existent file: {resolved_path}",
|
178
185
|
details="File must exist to check readability",
|
179
186
|
)
|
180
187
|
|
181
188
|
if not os.access(resolved_path, os.R_OK):
|
182
|
-
raise
|
189
|
+
raise SplurgeDsvFilePermissionError(
|
183
190
|
f"File is not readable: {resolved_path}", details="Check file permissions"
|
184
191
|
)
|
185
192
|
|
@@ -187,26 +194,24 @@ class PathValidator:
|
|
187
194
|
|
188
195
|
@classmethod
|
189
196
|
def _is_valid_windows_drive_pattern(cls, path_str: str) -> bool:
|
190
|
-
"""
|
191
|
-
Check if a path string contains a valid Windows drive letter pattern.
|
197
|
+
"""Return True if ``path_str`` looks like a valid Windows drive pattern.
|
192
198
|
|
193
|
-
|
194
|
-
path_str: Path string to validate
|
195
|
-
|
196
|
-
Returns:
|
197
|
-
True if the path contains a valid Windows drive letter pattern,
|
198
|
-
False otherwise
|
199
|
+
Accepts both ``C:`` and ``C:\\...`` or ``C:/...`` forms.
|
199
200
|
"""
|
200
201
|
# Must be C: at the end of the string, or C:\ (or C:/) followed by path
|
201
202
|
return bool(re.match(r"^[A-Za-z]:$", path_str)) or bool(re.match(r"^[A-Za-z]:[\\/]", path_str))
|
202
203
|
|
203
204
|
@classmethod
|
204
205
|
def _check_dangerous_characters(cls, path_str: str) -> None:
|
205
|
-
"""
|
206
|
+
"""Raise if ``path_str`` contains characters disallowed by policy.
|
207
|
+
|
208
|
+
This guards against NULs, control characters, and reserved filesystem
|
209
|
+
characters which may be used in injection or traversal attacks.
|
210
|
+
"""
|
206
211
|
# Check for dangerous characters, but allow colons in Windows drive letters
|
207
212
|
for char in cls._DANGEROUS_CHARS:
|
208
213
|
if char in path_str:
|
209
|
-
raise
|
214
|
+
raise SplurgeDsvPathValidationError(
|
210
215
|
f"Path contains dangerous character: {repr(char)}",
|
211
216
|
details=f"Character at position {path_str.find(char)}",
|
212
217
|
)
|
@@ -214,25 +219,33 @@ class PathValidator:
|
|
214
219
|
# Special handling for colons - only allow them in Windows drive letters (e.g., C:)
|
215
220
|
if ":" in path_str:
|
216
221
|
if not cls._is_valid_windows_drive_pattern(path_str):
|
217
|
-
raise
|
222
|
+
raise SplurgeDsvPathValidationError(
|
218
223
|
"Path contains colon in invalid position",
|
219
224
|
details="Colons are only allowed in Windows drive letters (e.g., C: or C:\\)",
|
220
225
|
)
|
221
226
|
|
222
227
|
@classmethod
|
223
228
|
def _check_path_traversal(cls, path_str: str) -> None:
|
224
|
-
"""
|
229
|
+
"""Raise if ``path_str`` contains obvious traversal patterns.
|
230
|
+
|
231
|
+
This is a best-effort check that catches sequences such as ``..``
|
232
|
+
and unusual repeated separators that are likely malicious.
|
233
|
+
"""
|
225
234
|
for pattern in cls._PATH_TRAVERSAL_PATTERNS:
|
226
235
|
if re.search(pattern, path_str):
|
227
|
-
raise
|
236
|
+
raise SplurgeDsvPathValidationError(
|
228
237
|
f"Path contains traversal pattern: {pattern}", details="Path traversal attacks are not allowed"
|
229
238
|
)
|
230
239
|
|
231
240
|
@classmethod
|
232
241
|
def _check_path_length(cls, path_str: str) -> None:
|
233
|
-
"""
|
242
|
+
"""Raise if the path exceeds the configured maximum length.
|
243
|
+
|
244
|
+
Long paths can indicate malformed input or attempt to overflow
|
245
|
+
downstream APIs; this check enforces a sane upper bound.
|
246
|
+
"""
|
234
247
|
if len(path_str) > cls.MAX_PATH_LENGTH:
|
235
|
-
raise
|
248
|
+
raise SplurgeDsvPathValidationError(
|
236
249
|
f"Path is too long: {len(path_str)} characters",
|
237
250
|
details=f"Maximum allowed length is {cls.MAX_PATH_LENGTH} characters",
|
238
251
|
)
|
@@ -281,5 +294,5 @@ class PathValidator:
|
|
281
294
|
try:
|
282
295
|
cls.validate_path(file_path)
|
283
296
|
return True
|
284
|
-
except (
|
297
|
+
except (SplurgeDsvPathValidationError, SplurgeDsvFileNotFoundError, SplurgeDsvFilePermissionError):
|
285
298
|
return False
|