splurge-dsv 2025.2.0__py3-none-any.whl → 2025.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,136 +0,0 @@
1
- """Deterministic text-only writer utilities.
2
-
3
- This module implements :class:`SafeTextFileWriter` and a convenience
4
- ``open_text_writer`` context manager. Writes always use the configured
5
- encoding and normalize newline characters to a canonical form (LF) to
6
- ensure consistent files across platforms.
7
-
8
- Example:
9
- with open_text_writer("out.txt") as buf:
10
- buf.write("line1\nline2\n")
11
-
12
- Copyright (c) 2025 Jim Schilling
13
- Please preserve this header and all related material when sharing!
14
-
15
- License: MIT
16
- """
17
-
18
- from __future__ import annotations
19
-
20
- import io
21
- from collections.abc import Iterable, Iterator
22
- from contextlib import contextmanager
23
- from pathlib import Path
24
- from typing import cast
25
-
26
- from .exceptions import SplurgeDsvFileEncodingError
27
-
28
-
29
- class SafeTextFileWriter:
30
- """Helper for deterministic text writes with newline normalization.
31
-
32
- Args:
33
- file_path: Destination file path.
34
- encoding: Text encoding to use (default: 'utf-8').
35
- newline: Canonical newline sequence to write (default: '\n').
36
-
37
- The class exposes a minimal file-like API and will raise
38
- :class:`SplurgeDsvFileEncodingError` when the underlying file cannot be
39
- opened with the requested encoding.
40
- """
41
-
42
- def __init__(self, file_path: Path, *, encoding: str = "utf-8", newline: str | None = "\n") -> None:
43
- self._path = Path(file_path)
44
- self._encoding = encoding
45
- # newline is the canonical newline we will write; default to LF
46
- self._newline = "\n" if newline is None else newline
47
- self._file: io.TextIOBase | None = None
48
-
49
- def open(self, mode: str = "w") -> io.TextIOBase:
50
- """Open the underlying file for text writing.
51
-
52
- Args:
53
- mode: File open mode (default: 'w').
54
-
55
- Returns:
56
- The opened text file object.
57
-
58
- Raises:
59
- SplurgeDsvFileEncodingError: If the file cannot be opened with the
60
- requested encoding or underlying OS error occurs.
61
- """
62
- try:
63
- # open with newline="" to allow us to manage newline normalization
64
- fp = open(self._path, mode, encoding=self._encoding, newline="")
65
- # cast to TextIOBase for precise typing
66
- self._file = cast(io.TextIOBase, fp)
67
- return self._file
68
- except (LookupError, OSError) as exc:
69
- raise SplurgeDsvFileEncodingError(str(exc)) from exc
70
-
71
- def write(self, text: str) -> int:
72
- """Normalize newlines and write ``text`` to the opened file.
73
-
74
- Args:
75
- text: Text to write (newlines will be normalized).
76
-
77
- Returns:
78
- Number of characters written.
79
- """
80
- if self._file is None:
81
- raise ValueError("file not opened")
82
- normalized = text.replace("\r\n", "\n").replace("\r", "\n")
83
- return self._file.write(normalized)
84
-
85
- def writelines(self, lines: Iterable[str]) -> None:
86
- if self._file is None:
87
- raise ValueError("file not opened")
88
- for line in lines:
89
- self.write(line)
90
-
91
- def flush(self) -> None:
92
- if self._file is None:
93
- return
94
- self._file.flush()
95
-
96
- def close(self) -> None:
97
- if self._file is None:
98
- return
99
- try:
100
- self._file.close()
101
- finally:
102
- self._file = None
103
-
104
-
105
- @contextmanager
106
- def open_text_writer(file_path: Path | str, *, encoding: str = "utf-8", mode: str = "w") -> Iterator[io.StringIO]:
107
- """Context manager yielding an in-memory StringIO to accumulate text.
108
-
109
- On successful exit, the buffered content is normalized and written to
110
- disk using :class:`SafeTextFileWriter`. If an exception occurs inside
111
- the context, nothing is written and the exception is propagated.
112
-
113
- Args:
114
- file_path: Destination path to write to on successful exit.
115
- encoding: Encoding to use when writing.
116
- mode: File open mode passed to writer (default: 'w').
117
-
118
- Yields:
119
- io.StringIO: Buffer to write textual content into.
120
- """
121
- path = Path(file_path)
122
- buffer = io.StringIO()
123
- try:
124
- yield buffer
125
- except Exception:
126
- # Do not write on exceptions; re-raise
127
- raise
128
- else:
129
- content = buffer.getvalue()
130
- writer = SafeTextFileWriter(path, encoding=encoding)
131
- try:
132
- writer.open(mode=mode)
133
- writer.write(content)
134
- writer.flush()
135
- finally:
136
- writer.close()
@@ -1,240 +0,0 @@
1
- """
2
- Text file utility functions for common file operations.
3
-
4
- This module provides helper methods for working with text files, including
5
- line counting, file previewing, and file loading capabilities. The TextFileHelper
6
- class implements static methods for efficient file operations without requiring
7
- class instantiation.
8
-
9
- Key features:
10
- - Line counting for text files
11
- - File previewing with configurable line limits
12
- - Complete file loading with header/footer skipping
13
- - Streaming file loading with configurable chunk sizes
14
- - Configurable whitespace handling and encoding
15
- - Secure file path validation
16
- - Resource management with context managers
17
-
18
- Copyright (c) 2025 Jim Schilling
19
-
20
- Please preserve this header and all related material when sharing!
21
-
22
- This module is licensed under the MIT License.
23
- """
24
-
25
- # Standard library imports
26
- from collections.abc import Iterator
27
- from os import PathLike
28
- from pathlib import Path
29
-
30
- # Local imports
31
- from splurge_dsv.exceptions import SplurgeDsvParameterError
32
- from splurge_dsv.path_validator import PathValidator
33
- from splurge_dsv.safe_text_file_reader import SafeTextFileReader
34
-
35
-
36
- class TextFileHelper:
37
- """Utility helpers for working with text files.
38
-
39
- All methods are provided as classmethods and are designed to be memory
40
- efficient. This module enforces a deterministic newline policy: CRLF
41
- ("\r\n"), CR ("\r"), and LF ("\n") are normalized to a single ``\n``
42
- newline. Methods return logical, normalized lines which makes behavior
43
- consistent across platforms and simplifies testing.
44
- """
45
-
46
- DEFAULT_ENCODING = "utf-8"
47
- DEFAULT_MAX_LINES = 100
48
- DEFAULT_CHUNK_SIZE = 500
49
- DEFAULT_MIN_CHUNK_SIZE = 100
50
- DEFAULT_SKIP_HEADER_ROWS = 0
51
- DEFAULT_SKIP_FOOTER_ROWS = 0
52
- DEFAULT_STRIP = True
53
- DEFAULT_MODE = "r"
54
-
55
- @classmethod
56
- def line_count(cls, file_path: PathLike[str] | str, *, encoding: str = DEFAULT_ENCODING) -> int:
57
- """Return the number of logical lines in ``file_path``.
58
-
59
- The file is iterated efficiently without reading the entire contents
60
- into memory. Newlines are normalized according to the package newline
61
- policy before counting.
62
-
63
- Args:
64
- file_path: Path to the text file to inspect.
65
- encoding: Text encoding to use when reading the file.
66
-
67
- Returns:
68
- The number of logical lines in the file.
69
-
70
- Raises:
71
- SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
72
- SplurgeDsvFilePermissionError: If the file cannot be read due to
73
- permissions.
74
- SplurgeDsvFileEncodingError: If the file cannot be decoded using the
75
- provided ``encoding``.
76
- SplurgeDsvPathValidationError: If path validation fails.
77
- """
78
- # Validate file path
79
- validated_path = PathValidator.validate_path(
80
- Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
81
- )
82
-
83
- # Delegate to SafeTextFileReader which centralizes newline normalization
84
- reader = SafeTextFileReader(validated_path, encoding=encoding)
85
- return len(reader.read(strip=False))
86
-
87
- @classmethod
88
- def preview(
89
- cls,
90
- file_path: PathLike[str] | str,
91
- *,
92
- max_lines: int = DEFAULT_MAX_LINES,
93
- strip: bool = DEFAULT_STRIP,
94
- encoding: str = DEFAULT_ENCODING,
95
- skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
96
- ) -> list[str]:
97
- """Return the first ``max_lines`` logical lines from ``file_path``.
98
-
99
- The preview respects header skipping and optional whitespace
100
- stripping. Lines returned are normalized according to the package
101
- newline policy.
102
-
103
- Args:
104
- file_path: Path to the text file.
105
- max_lines: Maximum number of lines to return (must be >= 1).
106
- strip: If True, strip leading/trailing whitespace from each line.
107
- encoding: File encoding to use when reading the file.
108
- skip_header_rows: Number of leading lines to ignore before previewing.
109
-
110
- Returns:
111
- A list of logical lines (strings), up to ``max_lines`` in length.
112
-
113
- Raises:
114
- SplurgeDsvParameterError: If ``max_lines`` is less than 1.
115
- SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
116
- SplurgeDsvFilePermissionError: If the file cannot be read due to
117
- permissions.
118
- SplurgeDsvFileEncodingError: If the file cannot be decoded using the
119
- provided ``encoding``.
120
- SplurgeDsvPathValidationError: If path validation fails.
121
- """
122
- if max_lines < 1:
123
- raise SplurgeDsvParameterError(
124
- "TextFileHelper.preview: max_lines is less than 1", details="max_lines must be at least 1"
125
- )
126
-
127
- # Validate file path
128
- validated_path = PathValidator.validate_path(
129
- Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
130
- )
131
-
132
- skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
133
- reader = SafeTextFileReader(validated_path, encoding=encoding)
134
- return reader.preview(max_lines=max_lines, strip=strip, skip_header_rows=skip_header_rows)
135
-
136
- @classmethod
137
- def read_as_stream(
138
- cls,
139
- file_path: PathLike[str] | str,
140
- *,
141
- strip: bool = DEFAULT_STRIP,
142
- encoding: str = DEFAULT_ENCODING,
143
- skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
144
- skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
145
- chunk_size: int = DEFAULT_CHUNK_SIZE,
146
- ) -> Iterator[list[str]]:
147
- """Yield the file contents as successive chunks of logical lines.
148
-
149
- Each yielded value is a list of lines (strings), where each chunk
150
- contains up to ``chunk_size`` lines. Footer skipping is implemented
151
- using a sliding-window technique so the file is not fully loaded into
152
- memory.
153
-
154
- Args:
155
- file_path: Path to the text file to stream.
156
- strip: If True, strip leading/trailing whitespace from each line.
157
- encoding: Text encoding used to read the file.
158
- skip_header_rows: Number of leading lines to skip before yielding.
159
- skip_footer_rows: Number of trailing lines to skip (handled via
160
- an internal buffer; does not require reading the whole file).
161
- chunk_size: Target number of lines per yielded chunk.
162
-
163
- Yields:
164
- Lists of logical lines (each a list[str]) for each chunk.
165
-
166
- Raises:
167
- SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
168
- SplurgeDsvFilePermissionError: If the file cannot be read due to
169
- permissions.
170
- SplurgeDsvFileEncodingError: If the file cannot be decoded using the
171
- provided ``encoding``.
172
- SplurgeDsvPathValidationError: If path validation fails.
173
- """
174
- # Allow small chunk sizes for testing, but enforce minimum for performance
175
- # Only enforce minimum if chunk_size is "moderately small" (to prevent accidental small chunks)
176
- if chunk_size >= 10: # If someone sets a chunk size >= 10, enforce minimum for performance
177
- chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
178
- # For very small chunk sizes (like 1-9), allow them (useful for testing)
179
- skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
180
- skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
181
-
182
- # Validate file path
183
- validated_path = PathValidator.validate_path(
184
- Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
185
- )
186
-
187
- # Use SafeTextFileReader to centralize newline normalization and streaming behavior.
188
- reader = SafeTextFileReader(validated_path, encoding=encoding)
189
- yield from reader.read_as_stream(
190
- strip=strip, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows, chunk_size=chunk_size
191
- )
192
-
193
- @classmethod
194
- def read(
195
- cls,
196
- file_path: PathLike[str] | str,
197
- *,
198
- strip: bool = DEFAULT_STRIP,
199
- encoding: str = DEFAULT_ENCODING,
200
- skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
201
- skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
202
- ) -> list[str]:
203
- """Read all logical lines from ``file_path`` into memory.
204
-
205
- This convenience method returns the entire file as a list of
206
- normalized lines. Header and footer rows may be skipped with the
207
- corresponding parameters.
208
-
209
- Args:
210
- file_path: Path to the text file to read.
211
- strip: If True, strip leading/trailing whitespace from each line.
212
- encoding: Text encoding used to read the file.
213
- skip_header_rows: Number of leading lines to ignore.
214
- skip_footer_rows: Number of trailing lines to ignore.
215
-
216
- Returns:
217
- A list containing every logical line from the file except skipped
218
- header/footer lines.
219
-
220
- Raises:
221
- SplurgeDsvFileNotFoundError: If ``file_path`` does not exist.
222
- SplurgeDsvFilePermissionError: If the file cannot be read due to
223
- permissions.
224
- SplurgeDsvFileEncodingError: If the file cannot be decoded using the
225
- provided ``encoding``.
226
- SplurgeDsvPathValidationError: If path validation fails.
227
- """
228
- # Validate file path
229
- validated_path = PathValidator.validate_path(
230
- Path(file_path), must_exist=True, must_be_file=True, must_be_readable=True
231
- )
232
-
233
- skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
234
- skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
235
-
236
- skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
237
- skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
238
-
239
- reader = SafeTextFileReader(validated_path, encoding=encoding)
240
- return reader.read(strip=strip, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows)
@@ -1,17 +0,0 @@
1
- splurge_dsv/__init__.py,sha256=cLZbXey-8aogRkpE2easqfaaJdkHCqv3M_KZlrDo3WU,3337
2
- splurge_dsv/__main__.py,sha256=6dpfX_96hEpOqxv5X4bK73xX86YTgK0Adad1uTWSABM,426
3
- splurge_dsv/cli.py,sha256=xdPL7dSdWGDUsG2-FbsIgRnbYuQRN-grLlx8iJ-HxZc,7600
4
- splurge_dsv/dsv.py,sha256=6P0Ujma_8xED-HsTywLaTQYD_Wxo7J8xU25RdE-5I38,8859
5
- splurge_dsv/dsv_helper.py,sha256=lAEquK2WDOB3yS1ynxmOcg_pn9Q3njHJgFs_wyCOi_k,10293
6
- splurge_dsv/exceptions.py,sha256=hefUTjk3ULca5TdXoKe5L-cME7SU1RFcWVHxNpZ_w-Y,5274
7
- splurge_dsv/path_validator.py,sha256=r08PkuMdL0eBY_iao00_irBMdT6ORJ2-cNK5AUssEKs,10681
8
- splurge_dsv/safe_text_file_reader.py,sha256=9GCOGCTDDP5FJD0u2wZ107SQNEIj9Rm1zN6shYiKq7g,6659
9
- splurge_dsv/safe_text_file_writer.py,sha256=zQIsDZ6jRN_ZWwLX4dpUZI35iudxzuv1Gjv7K1vSFJk,4562
10
- splurge_dsv/string_tokenizer.py,sha256=jFgkqeGx5PnmKAvu7sn3xxHcQklZTZUy8x_eo5e6TWI,4497
11
- splurge_dsv/text_file_helper.py,sha256=2SxbYtZtpMtHQ-5g1aQzgvQobBrlQH4EsrhBY5t3Xx4,10362
12
- splurge_dsv-2025.2.0.dist-info/licenses/LICENSE,sha256=fPgtg-tIFHinQvJH0arRfv50AuxikD5eHw6rrPy2A5w,1091
13
- splurge_dsv-2025.2.0.dist-info/METADATA,sha256=n8wCT3oOj_-60a4Et6AvbQ708tdM-pi3DL0lSlBsYuo,8518
14
- splurge_dsv-2025.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- splurge_dsv-2025.2.0.dist-info/entry_points.txt,sha256=QmGyc3qHYtY61uanRxNOXw-waSJ01qypSCI8Kb3zgsU,56
16
- splurge_dsv-2025.2.0.dist-info/top_level.txt,sha256=D6Si3FTfpRYqH7kzM7tSQAyaKbbraO6UPLpcqcY4XXM,12
17
- splurge_dsv-2025.2.0.dist-info/RECORD,,