splurge-dsv 2025.2.1__py3-none-any.whl → 2025.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splurge_dsv/__init__.py +16 -5
- splurge_dsv/cli.py +137 -26
- splurge_dsv/dsv.py +100 -30
- splurge_dsv/dsv_helper.py +415 -90
- splurge_dsv/exceptions.py +22 -1
- splurge_dsv/string_tokenizer.py +7 -1
- {splurge_dsv-2025.2.1.dist-info → splurge_dsv-2025.3.1.dist-info}/METADATA +78 -5
- splurge_dsv-2025.3.1.dist-info/RECORD +13 -0
- splurge_dsv/path_validator.py +0 -298
- splurge_dsv/safe_text_file_reader.py +0 -177
- splurge_dsv/safe_text_file_writer.py +0 -136
- splurge_dsv/text_file_helper.py +0 -240
- splurge_dsv-2025.2.1.dist-info/RECORD +0 -17
- {splurge_dsv-2025.2.1.dist-info → splurge_dsv-2025.3.1.dist-info}/WHEEL +0 -0
- {splurge_dsv-2025.2.1.dist-info → splurge_dsv-2025.3.1.dist-info}/entry_points.txt +0 -0
- {splurge_dsv-2025.2.1.dist-info → splurge_dsv-2025.3.1.dist-info}/licenses/LICENSE +0 -0
- {splurge_dsv-2025.2.1.dist-info → splurge_dsv-2025.3.1.dist-info}/top_level.txt +0 -0
@@ -1,136 +0,0 @@
|
|
1
|
-
"""Deterministic text-only writer utilities.
|
2
|
-
|
3
|
-
This module implements :class:`SafeTextFileWriter` and a convenience
|
4
|
-
``open_text_writer`` context manager. Writes always use the configured
|
5
|
-
encoding and normalize newline characters to a canonical form (LF) to
|
6
|
-
ensure consistent files across platforms.
|
7
|
-
|
8
|
-
Example:
|
9
|
-
with open_text_writer("out.txt") as buf:
|
10
|
-
buf.write("line1\nline2\n")
|
11
|
-
|
12
|
-
Copyright (c) 2025 Jim Schilling
|
13
|
-
Please preserve this header and all related material when sharing!
|
14
|
-
|
15
|
-
License: MIT
|
16
|
-
"""
|
17
|
-
|
18
|
-
from __future__ import annotations
|
19
|
-
|
20
|
-
import io
|
21
|
-
from collections.abc import Iterable, Iterator
|
22
|
-
from contextlib import contextmanager
|
23
|
-
from pathlib import Path
|
24
|
-
from typing import cast
|
25
|
-
|
26
|
-
from .exceptions import SplurgeDsvFileEncodingError
|
27
|
-
|
28
|
-
|
29
|
-
class SafeTextFileWriter:
|
30
|
-
"""Helper for deterministic text writes with newline normalization.
|
31
|
-
|
32
|
-
Args:
|
33
|
-
file_path: Destination file path.
|
34
|
-
encoding: Text encoding to use (default: 'utf-8').
|
35
|
-
newline: Canonical newline sequence to write (default: '\n').
|
36
|
-
|
37
|
-
The class exposes a minimal file-like API and will raise
|
38
|
-
:class:`SplurgeDsvFileEncodingError` when the underlying file cannot be
|
39
|
-
opened with the requested encoding.
|
40
|
-
"""
|
41
|
-
|
42
|
-
def __init__(self, file_path: Path, *, encoding: str = "utf-8", newline: str | None = "\n") -> None:
|
43
|
-
self._path = Path(file_path)
|
44
|
-
self._encoding = encoding
|
45
|
-
# newline is the canonical newline we will write; default to LF
|
46
|
-
self._newline = "\n" if newline is None else newline
|
47
|
-
self._file: io.TextIOBase | None = None
|
48
|
-
|
49
|
-
def open(self, mode: str = "w") -> io.TextIOBase:
|
50
|
-
"""Open the underlying file for text writing.
|
51
|
-
|
52
|
-
Args:
|
53
|
-
mode: File open mode (default: 'w').
|
54
|
-
|
55
|
-
Returns:
|
56
|
-
The opened text file object.
|
57
|
-
|
58
|
-
Raises:
|
59
|
-
SplurgeDsvFileEncodingError: If the file cannot be opened with the
|
60
|
-
requested encoding or underlying OS error occurs.
|
61
|
-
"""
|
62
|
-
try:
|
63
|
-
# open with newline="" to allow us to manage newline normalization
|
64
|
-
fp = open(self._path, mode, encoding=self._encoding, newline="")
|
65
|
-
# cast to TextIOBase for precise typing
|
66
|
-
self._file = cast(io.TextIOBase, fp)
|
67
|
-
return self._file
|
68
|
-
except (LookupError, OSError) as exc:
|
69
|
-
raise SplurgeDsvFileEncodingError(str(exc)) from exc
|
70
|
-
|
71
|
-
def write(self, text: str) -> int:
|
72
|
-
"""Normalize newlines and write ``text`` to the opened file.
|
73
|
-
|
74
|
-
Args:
|
75
|
-
text: Text to write (newlines will be normalized).
|
76
|
-
|
77
|
-
Returns:
|
78
|
-
Number of characters written.
|
79
|
-
"""
|
80
|
-
if self._file is None:
|
81
|
-
raise ValueError("file not opened")
|
82
|
-
normalized = text.replace("\r\n", "\n").replace("\r", "\n")
|
83
|
-
return self._file.write(normalized)
|
84
|
-
|
85
|
-
def writelines(self, lines: Iterable[str]) -> None:
|
86
|
-
if self._file is None:
|
87
|
-
raise ValueError("file not opened")
|
88
|
-
for line in lines:
|
89
|
-
self.write(line)
|
90
|
-
|
91
|
-
def flush(self) -> None:
|
92
|
-
if self._file is None:
|
93
|
-
return
|
94
|
-
self._file.flush()
|
95
|
-
|
96
|
-
def close(self) -> None:
|
97
|
-
if self._file is None:
|
98
|
-
return
|
99
|
-
try:
|
100
|
-
self._file.close()
|
101
|
-
finally:
|
102
|
-
self._file = None
|
103
|
-
|
104
|
-
|
105
|
-
@contextmanager
|
106
|
-
def open_text_writer(file_path: Path | str, *, encoding: str = "utf-8", mode: str = "w") -> Iterator[io.StringIO]:
|
107
|
-
"""Context manager yielding an in-memory StringIO to accumulate text.
|
108
|
-
|
109
|
-
On successful exit, the buffered content is normalized and written to
|
110
|
-
disk using :class:`SafeTextFileWriter`. If an exception occurs inside
|
111
|
-
the context, nothing is written and the exception is propagated.
|
112
|
-
|
113
|
-
Args:
|
114
|
-
file_path: Destination path to write to on successful exit.
|
115
|
-
encoding: Encoding to use when writing.
|
116
|
-
mode: File open mode passed to writer (default: 'w').
|
117
|
-
|
118
|
-
Yields:
|
119
|
-
io.StringIO: Buffer to write textual content into.
|
120
|
-
"""
|
121
|
-
path = Path(file_path)
|
122
|
-
buffer = io.StringIO()
|
123
|
-
try:
|
124
|
-
yield buffer
|
125
|
-
except Exception:
|
126
|
-
# Do not write on exceptions; re-raise
|
127
|
-
raise
|
128
|
-
else:
|
129
|
-
content = buffer.getvalue()
|
130
|
-
writer = SafeTextFileWriter(path, encoding=encoding)
|
131
|
-
try:
|
132
|
-
writer.open(mode=mode)
|
133
|
-
writer.write(content)
|
134
|
-
writer.flush()
|
135
|
-
finally:
|
136
|
-
writer.close()
|
splurge_dsv/text_file_helper.py
DELETED
@@ -1,240 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Text file utility functions for common file operations.
|
3
|
-
|
4
|
-
This module provides helper methods for working with text files, including
|
5
|
-
line counting, file previewing, and file loading capabilities. The TextFileHelper
|
6
|
-
class implements static methods for efficient file operations without requiring
|
7
|
-
class instantiation.
|
8
|
-
|
9
|
-
Key features:
|
10
|
-
- Line counting for text files
|
11
|
-
- File previewing with configurable line limits
|
12
|
-
- Complete file loading with header/footer skipping
|
13
|
-
- Streaming file loading with configurable chunk sizes
|
14
|
-
- Configurable whitespace handling and encoding
|
15
|
-
- Secure file path validation
|
16
|
-
- Resource management with context managers
|
17
|
-
|
18
|
-
Copyright (c) 2025 Jim Schilling
|
19
|
-
|
20
|
-
Please preserve this header and all related material when sharing!
|
21
|
-
|
22
|
-
This module is licensed under the MIT License.
|
23
|
-
"""
|
24
|
-
|
25
|
-
# Standard library imports
|
26
|
-
from collections.abc import Iterator
|
27
|
-
from os import PathLike
|
28
|
-
from pathlib import Path
|
29
|
-
|
30
|
-
# Local imports
|
31
|
-
from splurge_dsv.exceptions import SplurgeDsvParameterError
|
32
|
-
from splurge_dsv.path_validator import PathValidator
|
33
|
-
from splurge_dsv.safe_text_file_reader import SafeTextFileReader
|
34
|
-
|
35
|
-
|
36
|
-
class TextFileHelper:
|
37
|
-
"""Utility helpers for working with text files.
|
38
|
-
|
39
|
-
All methods are provided as classmethods and are designed to be memory
|
40
|
-
efficient. This module enforces a deterministic newline policy: CRLF
|
41
|
-
("\r\n"), CR ("\r"), and LF ("\n") are normalized to a single ``\n``
|
42
|
-
newline. Methods return logical, normalized lines which makes behavior
|
43
|
-
consistent across platforms and simplifies testing.
|
44
|
-
"""
|
45
|
-
|
46
|
-
DEFAULT_ENCODING = "utf-8"
|
47
|
-
DEFAULT_MAX_LINES = 100
|
48
|
-
DEFAULT_CHUNK_SIZE = 500
|
49
|
-
DEFAULT_MIN_CHUNK_SIZE = 100
|
50
|
-
DEFAULT_SKIP_HEADER_ROWS = 0
|
51
|
-
DEFAULT_SKIP_FOOTER_ROWS = 0
|
52
|
-
DEFAULT_STRIP = True
|
53
|
-
DEFAULT_MODE = "r"
|
54
|
-
|
55
|
-
@classmethod
|
56
|
-
def line_count(cls, file_path: PathLike[str] | str, *, encoding: str = DEFAULT_ENCODING) -> int:
|
57
|
-
"""Return the number of logical lines in ``file_path``.
|
58
|
-
|
59
|
-
The file is iterated efficiently without reading the entire contents
|
60
|
-
into memory. Newlines are normalized according to the package newline
|
61
|
-
policy before counting.
|
62
|
-
|
63
|
-
Args:
|
64
|
-
file_path: Path to the text file to inspect.
|
65
|
-
encoding: Text encoding to use when reading the file.
|
66
|
-
|
67
|
-
Returns:
|
68
|
-
The number of logical lines in the file.
|
69
|
-
|
70
|
-
Raises:
|
71
|
-
SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
|
72
|
-
SplurgeDsvFilePermissionError: If the file cannot be read due to
|
73
|
-
permissions.
|
74
|
-
SplurgeDsvFileEncodingError: If the file cannot be decoded using the
|
75
|
-
provided ``encoding``.
|
76
|
-
SplurgeDsvPathValidationError: If path validation fails.
|
77
|
-
"""
|
78
|
-
# Validate file path
|
79
|
-
validated_path = PathValidator.validate_path(
|
80
|
-
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
81
|
-
)
|
82
|
-
|
83
|
-
# Delegate to SafeTextFileReader which centralizes newline normalization
|
84
|
-
reader = SafeTextFileReader(validated_path, encoding=encoding)
|
85
|
-
return len(reader.read(strip=False))
|
86
|
-
|
87
|
-
@classmethod
|
88
|
-
def preview(
|
89
|
-
cls,
|
90
|
-
file_path: PathLike[str] | str,
|
91
|
-
*,
|
92
|
-
max_lines: int = DEFAULT_MAX_LINES,
|
93
|
-
strip: bool = DEFAULT_STRIP,
|
94
|
-
encoding: str = DEFAULT_ENCODING,
|
95
|
-
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
96
|
-
) -> list[str]:
|
97
|
-
"""Return the first ``max_lines`` logical lines from ``file_path``.
|
98
|
-
|
99
|
-
The preview respects header skipping and optional whitespace
|
100
|
-
stripping. Lines returned are normalized according to the package
|
101
|
-
newline policy.
|
102
|
-
|
103
|
-
Args:
|
104
|
-
file_path: Path to the text file.
|
105
|
-
max_lines: Maximum number of lines to return (must be >= 1).
|
106
|
-
strip: If True, strip leading/trailing whitespace from each line.
|
107
|
-
encoding: File encoding to use when reading the file.
|
108
|
-
skip_header_rows: Number of leading lines to ignore before previewing.
|
109
|
-
|
110
|
-
Returns:
|
111
|
-
A list of logical lines (strings), up to ``max_lines`` in length.
|
112
|
-
|
113
|
-
Raises:
|
114
|
-
SplurgeDsvParameterError: If ``max_lines`` is less than 1.
|
115
|
-
SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
|
116
|
-
SplurgeDsvFilePermissionError: If the file cannot be read due to
|
117
|
-
permissions.
|
118
|
-
SplurgeDsvFileEncodingError: If the file cannot be decoded using the
|
119
|
-
provided ``encoding``.
|
120
|
-
SplurgeDsvPathValidationError: If path validation fails.
|
121
|
-
"""
|
122
|
-
if max_lines < 1:
|
123
|
-
raise SplurgeDsvParameterError(
|
124
|
-
"TextFileHelper.preview: max_lines is less than 1", details="max_lines must be at least 1"
|
125
|
-
)
|
126
|
-
|
127
|
-
# Validate file path
|
128
|
-
validated_path = PathValidator.validate_path(
|
129
|
-
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
130
|
-
)
|
131
|
-
|
132
|
-
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
133
|
-
reader = SafeTextFileReader(validated_path, encoding=encoding)
|
134
|
-
return reader.preview(max_lines=max_lines, strip=strip, skip_header_rows=skip_header_rows)
|
135
|
-
|
136
|
-
@classmethod
|
137
|
-
def read_as_stream(
|
138
|
-
cls,
|
139
|
-
file_path: PathLike[str] | str,
|
140
|
-
*,
|
141
|
-
strip: bool = DEFAULT_STRIP,
|
142
|
-
encoding: str = DEFAULT_ENCODING,
|
143
|
-
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
144
|
-
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
|
145
|
-
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
146
|
-
) -> Iterator[list[str]]:
|
147
|
-
"""Yield the file contents as successive chunks of logical lines.
|
148
|
-
|
149
|
-
Each yielded value is a list of lines (strings), where each chunk
|
150
|
-
contains up to ``chunk_size`` lines. Footer skipping is implemented
|
151
|
-
using a sliding-window technique so the file is not fully loaded into
|
152
|
-
memory.
|
153
|
-
|
154
|
-
Args:
|
155
|
-
file_path: Path to the text file to stream.
|
156
|
-
strip: If True, strip leading/trailing whitespace from each line.
|
157
|
-
encoding: Text encoding used to read the file.
|
158
|
-
skip_header_rows: Number of leading lines to skip before yielding.
|
159
|
-
skip_footer_rows: Number of trailing lines to skip (handled via
|
160
|
-
an internal buffer; does not require reading the whole file).
|
161
|
-
chunk_size: Target number of lines per yielded chunk.
|
162
|
-
|
163
|
-
Yields:
|
164
|
-
Lists of logical lines (each a list[str]) for each chunk.
|
165
|
-
|
166
|
-
Raises:
|
167
|
-
SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
|
168
|
-
SplurgeDsvFilePermissionError: If the file cannot be read due to
|
169
|
-
permissions.
|
170
|
-
SplurgeDsvFileEncodingError: If the file cannot be decoded using the
|
171
|
-
provided ``encoding``.
|
172
|
-
SplurgeDsvPathValidationError: If path validation fails.
|
173
|
-
"""
|
174
|
-
# Allow small chunk sizes for testing, but enforce minimum for performance
|
175
|
-
# Only enforce minimum if chunk_size is "moderately small" (to prevent accidental small chunks)
|
176
|
-
if chunk_size >= 10: # If someone sets a chunk size >= 10, enforce minimum for performance
|
177
|
-
chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
|
178
|
-
# For very small chunk sizes (like 1-9), allow them (useful for testing)
|
179
|
-
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
180
|
-
skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
|
181
|
-
|
182
|
-
# Validate file path
|
183
|
-
validated_path = PathValidator.validate_path(
|
184
|
-
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
185
|
-
)
|
186
|
-
|
187
|
-
# Use SafeTextFileReader to centralize newline normalization and streaming behavior.
|
188
|
-
reader = SafeTextFileReader(validated_path, encoding=encoding)
|
189
|
-
yield from reader.read_as_stream(
|
190
|
-
strip=strip, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows, chunk_size=chunk_size
|
191
|
-
)
|
192
|
-
|
193
|
-
@classmethod
|
194
|
-
def read(
|
195
|
-
cls,
|
196
|
-
file_path: PathLike[str] | str,
|
197
|
-
*,
|
198
|
-
strip: bool = DEFAULT_STRIP,
|
199
|
-
encoding: str = DEFAULT_ENCODING,
|
200
|
-
skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
|
201
|
-
skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
|
202
|
-
) -> list[str]:
|
203
|
-
"""Read all logical lines from ``file_path`` into memory.
|
204
|
-
|
205
|
-
This convenience method returns the entire file as a list of
|
206
|
-
normalized lines. Header and footer rows may be skipped with the
|
207
|
-
corresponding parameters.
|
208
|
-
|
209
|
-
Args:
|
210
|
-
file_path: Path to the text file to read.
|
211
|
-
strip: If True, strip leading/trailing whitespace from each line.
|
212
|
-
encoding: Text encoding used to read the file.
|
213
|
-
skip_header_rows: Number of leading lines to ignore.
|
214
|
-
skip_footer_rows: Number of trailing lines to ignore.
|
215
|
-
|
216
|
-
Returns:
|
217
|
-
A list containing every logical line from the file except skipped
|
218
|
-
header/footer lines.
|
219
|
-
|
220
|
-
Raises:
|
221
|
-
SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
|
222
|
-
SplurgeDsvFilePermissionError: If the file cannot be read due to
|
223
|
-
permissions.
|
224
|
-
SplurgeDsvFileEncodingError: If the file cannot be decoded using the
|
225
|
-
provided ``encoding``.
|
226
|
-
SplurgeDsvPathValidationError: If path validation fails.
|
227
|
-
"""
|
228
|
-
# Validate file path
|
229
|
-
validated_path = PathValidator.validate_path(
|
230
|
-
Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
|
231
|
-
)
|
232
|
-
|
233
|
-
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
234
|
-
skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
|
235
|
-
|
236
|
-
skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
|
237
|
-
skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
|
238
|
-
|
239
|
-
reader = SafeTextFileReader(validated_path, encoding=encoding)
|
240
|
-
return reader.read(strip=strip, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows)
|
@@ -1,17 +0,0 @@
|
|
1
|
-
splurge_dsv/__init__.py,sha256=5TfARRtn0dMytGL4TnlEOWBon3HJiwN8MKEdHMItPZI,3337
|
2
|
-
splurge_dsv/__main__.py,sha256=6dpfX_96hEpOqxv5X4bK73xX86YTgK0Adad1uTWSABM,426
|
3
|
-
splurge_dsv/cli.py,sha256=qm7ZwgkUjMW5ASj14kWFyXrXY2T-MGhiCFHh8XUHi38,7605
|
4
|
-
splurge_dsv/dsv.py,sha256=5wDtHDk8Iio2SAIPO7Ce01dGhzH3fv12by8hQcPkJVI,9873
|
5
|
-
splurge_dsv/dsv_helper.py,sha256=ppFVZ4LNSepWbVJtYMQvsZGmMBDr6nbP4yKZettWczk,12060
|
6
|
-
splurge_dsv/exceptions.py,sha256=hefUTjk3ULca5TdXoKe5L-cME7SU1RFcWVHxNpZ_w-Y,5274
|
7
|
-
splurge_dsv/path_validator.py,sha256=r08PkuMdL0eBY_iao00_irBMdT6ORJ2-cNK5AUssEKs,10681
|
8
|
-
splurge_dsv/safe_text_file_reader.py,sha256=9GCOGCTDDP5FJD0u2wZ107SQNEIj9Rm1zN6shYiKq7g,6659
|
9
|
-
splurge_dsv/safe_text_file_writer.py,sha256=zQIsDZ6jRN_ZWwLX4dpUZI35iudxzuv1Gjv7K1vSFJk,4562
|
10
|
-
splurge_dsv/string_tokenizer.py,sha256=jFgkqeGx5PnmKAvu7sn3xxHcQklZTZUy8x_eo5e6TWI,4497
|
11
|
-
splurge_dsv/text_file_helper.py,sha256=2SxbYtZtpMtHQ-5g1aQzgvQobBrlQH4EsrhBY5t3Xx4,10362
|
12
|
-
splurge_dsv-2025.2.1.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
|
13
|
-
splurge_dsv-2025.2.1.dist-info/METADATA,sha256=V1gWLbEKupAzTYWWvgx1UviuOtkjEhAYuPQ6pwOsDYM,8518
|
14
|
-
splurge_dsv-2025.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
-
splurge_dsv-2025.2.1.dist-info/entry_points.txt,sha256=QmGyc3qHYtY61uanRxNOXw-waSJ01qypSCI8Kb3zgsU,56
|
16
|
-
splurge_dsv-2025.2.1.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
|
17
|
-
splurge_dsv-2025.2.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|