splurge-dsv 2025.1.5__py3-none-any.whl → 2025.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
splurge_dsv/dsv_helper.py CHANGED
@@ -9,11 +9,12 @@ This module is licensed under the MIT License.
9
9
  """
10
10
 
11
11
  # Standard library imports
12
+ import warnings
12
13
  from collections.abc import Iterator
13
14
  from os import PathLike
14
15
 
15
16
  # Local imports
16
- from splurge_dsv.exceptions import SplurgeParameterError
17
+ from splurge_dsv.exceptions import SplurgeDsvParameterError
17
18
  from splurge_dsv.string_tokenizer import StringTokenizer
18
19
  from splurge_dsv.text_file_helper import TextFileHelper
19
20
 
@@ -43,30 +44,34 @@ class DsvHelper:
43
44
  bookend: str | None = None,
44
45
  bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
45
46
  ) -> list[str]:
46
- """
47
- Parse a string into a list of strings.
47
+ """Parse a single DSV line into tokens.
48
+
49
+ This method tokenizes a single line of DSV text using the provided
50
+ ``delimiter``. It optionally strips surrounding whitespace from each
51
+ token and may remove configured bookend characters (for example,
52
+ double-quotes used around fields).
48
53
 
49
54
  Args:
50
- content (str): The string to parse.
51
- delimiter (str): The delimiter to use.
52
- strip (bool): Whether to strip whitespace from the strings.
53
- bookend (str | None): The bookend to use for text fields.
54
- bookend_strip (bool): Whether to strip whitespace from the bookend.
55
+ content: The input line to tokenize.
56
+ delimiter: A single-character delimiter string (e.g. "," or "\t").
57
+ strip: If True, strip leading/trailing whitespace from each token.
58
+ bookend: Optional bookend character to remove from token ends.
59
+ bookend_strip: If True, strip whitespace after removing bookends.
55
60
 
56
61
  Returns:
57
- list[str]: The list of strings.
62
+ A list of parsed token strings.
58
63
 
59
64
  Raises:
60
- SplurgeParameterError: If delimiter is empty or None.
65
+ SplurgeDsvParameterError: If ``delimiter`` is empty or None.
61
66
 
62
- Example:
67
+ Examples:
63
68
  >>> DsvHelper.parse("a,b,c", delimiter=",")
64
69
  ['a', 'b', 'c']
65
70
  >>> DsvHelper.parse('"a","b","c"', delimiter=",", bookend='"')
66
71
  ['a', 'b', 'c']
67
72
  """
68
73
  if delimiter is None or delimiter == "":
69
- raise SplurgeParameterError("delimiter cannot be empty or None")
74
+ raise SplurgeDsvParameterError("delimiter cannot be empty or None")
70
75
 
71
76
  tokens: list[str] = StringTokenizer.parse(content, delimiter=delimiter, strip=strip)
72
77
 
@@ -85,32 +90,34 @@ class DsvHelper:
85
90
  bookend: str | None = None,
86
91
  bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
87
92
  ) -> list[list[str]]:
88
- """
89
- Parse a list of strings into a list of lists of strings.
93
+ """Parse multiple DSV lines.
94
+
95
+ Given a list of lines (for example, the result of reading a file),
96
+ return a list where each element is the list of tokens for that line.
90
97
 
91
98
  Args:
92
- content (list[str]): The list of strings to parse.
93
- delimiter (str): The delimiter to use.
94
- strip (bool): Whether to strip whitespace from the strings.
95
- bookend (str | None): The bookend to use for text fields.
96
- bookend_strip (bool): Whether to strip whitespace from the bookend.
99
+ content: A list of input lines to parse.
100
+ delimiter: Delimiter used to split each line.
101
+ strip: If True, strip whitespace from tokens.
102
+ bookend: Optional bookend character to remove from tokens.
103
+ bookend_strip: If True, strip whitespace after removing bookends.
97
104
 
98
105
  Returns:
99
- list[list[str]]: The list of lists of strings.
106
+ A list of token lists, one per input line.
100
107
 
101
108
  Raises:
102
- SplurgeParameterError: If delimiter is empty or None.
103
- SplurgeParameterError: If content is not a list of strings.
109
+ SplurgeDsvParameterError: If ``content`` is not a list of strings or
110
+ if ``delimiter`` is empty or None.
104
111
 
105
112
  Example:
106
113
  >>> DsvHelper.parses(["a,b,c", "d,e,f"], delimiter=",")
107
114
  [['a', 'b', 'c'], ['d', 'e', 'f']]
108
115
  """
109
116
  if not isinstance(content, list):
110
- raise SplurgeParameterError("content must be a list")
117
+ raise SplurgeDsvParameterError("content must be a list")
111
118
 
112
119
  if not all(isinstance(item, str) for item in content):
113
- raise SplurgeParameterError("content must be a list of strings")
120
+ raise SplurgeDsvParameterError("content must be a list of strings")
114
121
 
115
122
  return [
116
123
  cls.parse(item, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip)
@@ -130,31 +137,33 @@ class DsvHelper:
130
137
  skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
131
138
  skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
132
139
  ) -> list[list[str]]:
133
- """
134
- Parse a file into a list of lists of strings.
140
+ """Read and parse an entire DSV file.
141
+
142
+ This convenience reads all lines from ``file_path`` using
143
+ :class:`splurge_dsv.text_file_helper.TextFileHelper` and then parses each
144
+ line into tokens. Header and footer rows may be skipped via the
145
+ ``skip_header_rows`` and ``skip_footer_rows`` parameters.
135
146
 
136
147
  Args:
137
- file_path (PathLike[str] | str): The path to the file to parse.
138
- delimiter (str): The delimiter to use.
139
- strip (bool): Whether to strip whitespace from the strings.
140
- bookend (str | None): The bookend to use for text fields.
141
- bookend_strip (bool): Whether to strip whitespace from the bookend.
142
- encoding (str): The file encoding.
143
- skip_header_rows (int): Number of header rows to skip.
144
- skip_footer_rows (int): Number of footer rows to skip.
148
+ file_path: Path to the file to read.
149
+ delimiter: Delimiter to split fields on.
150
+ strip: If True, strip whitespace from tokens.
151
+ bookend: Optional bookend character to remove from tokens.
152
+ bookend_strip: If True, strip whitespace after removing bookends.
153
+ encoding: Text encoding to use when reading the file.
154
+ skip_header_rows: Number of leading lines to ignore.
155
+ skip_footer_rows: Number of trailing lines to ignore.
145
156
 
146
157
  Returns:
147
- list[list[str]]: The list of lists of strings.
158
+ A list of token lists (one list per non-skipped line).
148
159
 
149
160
  Raises:
150
- SplurgeParameterError: If delimiter is empty or None.
151
- SplurgeFileNotFoundError: If the file does not exist.
152
- SplurgeFilePermissionError: If the file cannot be accessed.
153
- SplurgeFileEncodingError: If the file cannot be decoded with the specified encoding.
154
-
155
- Example:
156
- >>> DsvHelper.parse_file("data.csv", delimiter=",")
157
- [['header1', 'header2'], ['value1', 'value2']]
161
+ SplurgeDsvParameterError: If ``delimiter`` is empty or None.
162
+ SplurgeDsvFileNotFoundError: If the file at ``file_path`` does not exist.
163
+ SplurgeDsvFilePermissionError: If the file cannot be accessed due to
164
+ permission restrictions.
165
+ SplurgeDsvFileEncodingError: If the file cannot be decoded using
166
+ the provided ``encoding``.
158
167
  """
159
168
  lines: list[str] = TextFileHelper.read(
160
169
  file_path, encoding=encoding, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows
@@ -172,23 +181,26 @@ class DsvHelper:
172
181
  bookend: str | None = None,
173
182
  bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
174
183
  ) -> list[list[str]]:
175
- """
176
- Process a chunk of lines from the stream.
184
+ """Parse a chunk of lines into tokenized rows.
185
+
186
+ Designed to be used by :meth:`parse_stream` as a helper for converting a
187
+ batch of raw lines into parsed rows.
177
188
 
178
189
  Args:
179
- chunk: List of lines to process
180
- delimiter: Delimiter to use for parsing
181
- strip: Whether to strip whitespace
182
- bookend: Bookend character for text fields
183
- bookend_strip: Whether to strip whitespace from bookends
190
+ chunk: A list of raw input lines.
191
+ delimiter: Delimiter used to split each line.
192
+ strip: If True, strip whitespace from tokens.
193
+ bookend: Optional bookend character to remove from tokens.
194
+ bookend_strip: If True, strip whitespace after removing bookends.
184
195
 
185
196
  Returns:
186
- list[list[str]]: Parsed rows
197
+ A list where each element is the token list for a corresponding
198
+ input line from ``chunk``.
187
199
  """
188
200
  return cls.parses(chunk, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip)
189
201
 
190
202
  @classmethod
191
- def parse_stream(
203
+ def parse_file_stream(
192
204
  cls,
193
205
  file_path: PathLike[str] | str,
194
206
  *,
@@ -202,7 +214,7 @@ class DsvHelper:
202
214
  chunk_size: int = DEFAULT_CHUNK_SIZE,
203
215
  ) -> Iterator[list[list[str]]]:
204
216
  """
205
- Stream-parse a DSV file in chunks of lines.
217
+ Stream-parse a DSV file into chunks of lines.
206
218
 
207
219
  Args:
208
220
  file_path (PathLike[str] | str): The path to the file to parse.
@@ -225,7 +237,7 @@ class DsvHelper:
225
237
  SplurgeFileEncodingError: If the file cannot be decoded with the specified encoding.
226
238
  """
227
239
  if delimiter is None or delimiter == "":
228
- raise SplurgeParameterError("delimiter cannot be empty or None")
240
+ raise SplurgeDsvParameterError("delimiter cannot be empty or None")
229
241
 
230
242
  chunk_size = max(chunk_size, cls.DEFAULT_MIN_CHUNK_SIZE)
231
243
  skip_header_rows = max(skip_header_rows, cls.DEFAULT_SKIP_HEADER_ROWS)
@@ -244,3 +256,51 @@ class DsvHelper:
244
256
  chunk_size=chunk_size,
245
257
  )
246
258
  )
259
+
260
+ @classmethod
261
+ def parse_stream(
262
+ cls,
263
+ file_path: PathLike[str] | str,
264
+ *,
265
+ delimiter: str,
266
+ strip: bool = DEFAULT_STRIP,
267
+ bookend: str | None = None,
268
+ bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
269
+ encoding: str = DEFAULT_ENCODING,
270
+ skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
271
+ skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
272
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
273
+ ) -> Iterator[list[list[str]]]:
274
+ """
275
+ Stream-parse a DSV file, yielding chunks of parsed rows.
276
+
277
+ The method yields lists of parsed rows (each row itself is a list of
278
+ strings). Chunk sizing is controlled by the bound configuration's
279
+ ``chunk_size`` value.
280
+
281
+ Args:
282
+ file_path: Path to the file to parse.
283
+
284
+ Yields:
285
+ Lists of parsed rows, each list containing up to ``chunk_size`` rows.
286
+
287
+ Deprecated: Use `parse_file_stream` instead. This method will be removed in a future release.
288
+ """
289
+ # Emit a DeprecationWarning to signal removal in a future release
290
+ warnings.warn(
291
+ "DsvHelper.parse_stream() is deprecated and will be removed in a future release; use DsvHelper.parse_file_stream() instead.",
292
+ DeprecationWarning,
293
+ stacklevel=2,
294
+ )
295
+
296
+ return cls.parse_file_stream(
297
+ file_path,
298
+ delimiter=delimiter,
299
+ strip=strip,
300
+ bookend=bookend,
301
+ bookend_strip=bookend_strip,
302
+ encoding=encoding,
303
+ skip_header_rows=skip_header_rows,
304
+ skip_footer_rows=skip_footer_rows,
305
+ chunk_size=chunk_size,
306
+ )
splurge_dsv/exceptions.py CHANGED
@@ -1,136 +1,153 @@
1
- """
2
- Custom exceptions for the splurge-dsv package.
1
+ """Custom exceptions used across the splurge-dsv package.
3
2
 
4
- This module provides a hierarchy of custom exceptions for better error handling
5
- and more specific error messages throughout the package.
3
+ This module defines a clear exception hierarchy so callers can catch
4
+ specific error categories (file, validation, parsing, streaming, etc.)
5
+ instead of dealing with generic builtins. Each exception stores a
6
+ human-readable ``message`` and optional ``details`` for diagnostic output.
6
7
 
7
- Copyright (c) 2025 Jim Schilling
8
+ Module contents are intentionally lightweight: exceptions are primarily
9
+ containers for structured error information.
8
10
 
9
- Please preserve this header and all related material when sharing!
11
+ Example:
12
+ raise SplurgeDsvFileNotFoundError("File not found", details="/data/foo.csv")
10
13
 
11
- This module is licensed under the MIT License.
14
+ License: MIT
15
+
16
+ Copyright (c) 2025 Jim Schilling
12
17
  """
13
18
 
14
19
 
15
20
  class SplurgeDsvError(Exception):
16
- """Base exception for all splurge-dsv errors."""
21
+ """Base exception carrying a message and optional details.
17
22
 
18
- def __init__(self, message: str, *, details: str | None = None) -> None:
19
- """
20
- Initialize SplurgeDsvError.
23
+ Args:
24
+ message: Primary error message to display to the user.
25
+ details: Optional machine-readable details useful for debugging.
26
+
27
+ Attributes:
28
+ message: User-facing error message.
29
+ details: Optional additional diagnostic information.
30
+ """
21
31
 
22
- Args:
23
- message: Primary error message
24
- details: Additional error details
25
- """
32
+ def __init__(self, message: str, *, details: str | None = None) -> None:
26
33
  self.message = message
27
34
  self.details = details
28
35
  super().__init__(self.message)
29
36
 
30
37
 
31
- class SplurgeValidationError(SplurgeDsvError):
32
- """Raised when data validation fails."""
33
-
34
- pass
35
-
36
-
37
- class SplurgeFileOperationError(SplurgeDsvError):
38
- """Base exception for file operation errors."""
39
-
40
- pass
41
-
42
-
43
- class SplurgeFileNotFoundError(SplurgeFileOperationError):
44
- """Raised when a file is not found."""
38
+ # New-style exception names. Use a SplurgeDsv* prefix to avoid colliding with
39
+ # Python builtins. We keep the Splurge* aliases for backward compatibility.
45
40
 
46
- pass
47
41
 
42
+ class SplurgeDsvValidationError(SplurgeDsvError):
43
+ """Raised when data validation fails.
48
44
 
49
- class SplurgeFilePermissionError(SplurgeFileOperationError):
50
- """Raised when there are permission issues with file operations."""
45
+ This exception indicates input or configuration values do not meet
46
+ expected constraints (for example: invalid delimiter, out-of-range
47
+ parameters, or malformed metadata).
48
+ """
51
49
 
52
- pass
53
50
 
51
+ class SplurgeDsvFileOperationError(SplurgeDsvError):
52
+ """Base exception for file operation errors.
54
53
 
55
- class SplurgeFileEncodingError(SplurgeFileOperationError):
56
- """Raised when there are encoding issues with file operations."""
54
+ Used as a parent for file-related conditions such as not found,
55
+ permission denied, or encoding issues.
56
+ """
57
57
 
58
- pass
59
58
 
59
+ class SplurgeDsvFileNotFoundError(SplurgeDsvFileOperationError):
60
+ """Raised when an expected file cannot be located.
60
61
 
61
- class SplurgePathValidationError(SplurgeFileOperationError):
62
- """Raised when file path validation fails."""
62
+ This typically maps to ``FileNotFoundError`` semantics but uses the
63
+ package-specific exception hierarchy so callers can distinguish
64
+ file errors from other error types.
65
+ """
63
66
 
64
- pass
65
67
 
68
+ class SplurgeDsvFilePermissionError(SplurgeDsvFileOperationError):
69
+ """Raised for permission or access-related file errors.
66
70
 
67
- class SplurgeDataProcessingError(SplurgeDsvError):
68
- """Base exception for data processing errors."""
71
+ For example, attempting to open a file without read permission will
72
+ raise this exception.
73
+ """
69
74
 
70
- pass
71
75
 
76
+ class SplurgeDsvFileEncodingError(SplurgeDsvFileOperationError):
77
+ """Raised when decoding or encoding a text file fails.
72
78
 
73
- class SplurgeParsingError(SplurgeDataProcessingError):
74
- """Raised when data parsing fails."""
79
+ The exception typically wraps the underlying decoding error and
80
+ provides a descriptive message and optional details for diagnostics.
81
+ """
75
82
 
76
- pass
77
83
 
84
+ class SplurgeDsvPathValidationError(SplurgeDsvFileOperationError):
85
+ """Raised when a provided filesystem path fails validation checks.
78
86
 
79
- class SplurgeTypeConversionError(SplurgeDataProcessingError):
80
- """Raised when type conversion fails."""
87
+ Use this exception for path traversal, dangerous characters, or other
88
+ validation failures detected by the path validation utilities.
89
+ """
81
90
 
82
- pass
83
91
 
92
+ class SplurgeDsvDataProcessingError(SplurgeDsvError):
93
+ """Base exception for errors that occur during data processing (parsing, conversion).
84
94
 
85
- class SplurgeStreamingError(SplurgeDataProcessingError):
86
- """Raised when streaming operations fail."""
95
+ This groups parsing, type conversion, and streaming errors that occur
96
+ while transforming file content into structured data.
97
+ """
87
98
 
88
- pass
89
99
 
100
+ class SplurgeDsvParsingError(SplurgeDsvDataProcessingError):
101
+ """Raised when parsing fails due to malformed or unexpected content."""
90
102
 
91
- class SplurgeConfigurationError(SplurgeDsvError):
92
- """Raised when configuration is invalid."""
93
103
 
94
- pass
104
+ class SplurgeDsvTypeConversionError(SplurgeDsvDataProcessingError):
105
+ """Raised when a value cannot be converted to the requested type."""
95
106
 
96
107
 
97
- class SplurgeResourceError(SplurgeDsvError):
98
- """Base exception for resource management errors."""
108
+ class SplurgeDsvStreamingError(SplurgeDsvDataProcessingError):
109
+ """Raised for errors during streaming (e.g., partial reads, IO interruptions)."""
99
110
 
100
- pass
101
111
 
112
+ class SplurgeDsvConfigurationError(SplurgeDsvError):
113
+ """Raised when an invalid configuration is provided to an API.
102
114
 
103
- class SplurgeResourceAcquisitionError(SplurgeResourceError):
104
- """Raised when resource acquisition fails."""
115
+ Examples include invalid chunk sizes, missing delimiters, or mutually
116
+ exclusive options supplied together.
117
+ """
105
118
 
106
- pass
107
119
 
120
+ class SplurgeDsvResourceError(SplurgeDsvError):
121
+ """Base exception for resource acquisition and release errors."""
108
122
 
109
- class SplurgeResourceReleaseError(SplurgeResourceError):
110
- """Raised when resource release fails."""
111
123
 
112
- pass
124
+ class SplurgeDsvResourceAcquisitionError(SplurgeDsvResourceError):
125
+ """Raised when acquiring external resources (files, streams) fails."""
113
126
 
114
127
 
115
- class SplurgePerformanceWarning(SplurgeDsvError):
116
- """Warning for performance-related issues."""
128
+ class SplurgeDsvResourceReleaseError(SplurgeDsvResourceError):
129
+ """Raised when releasing resources (closing files or handles) fails."""
117
130
 
118
- pass
119
131
 
132
+ class SplurgeDsvPerformanceWarning(SplurgeDsvError):
133
+ """Raised to indicate performance-related concerns that may need attention.
120
134
 
121
- class SplurgeParameterError(SplurgeValidationError):
122
- """Raised when function parameters are invalid."""
135
+ This is not a fatal error but can be used to signal suboptimal usage
136
+ patterns (for example, very small streaming chunk sizes) to callers.
137
+ """
123
138
 
124
- pass
125
139
 
140
+ class SplurgeDsvParameterError(SplurgeDsvValidationError):
141
+ """Raised when a function or method receives invalid parameters.
126
142
 
127
- class SplurgeRangeError(SplurgeValidationError):
128
- """Raised when values are outside expected ranges."""
143
+ Use this for invalid types, missing required values, or arguments that
144
+ violate expected constraints.
145
+ """
129
146
 
130
- pass
131
147
 
148
+ class SplurgeDsvRangeError(SplurgeDsvValidationError):
149
+ """Raised when a value falls outside an expected numeric or length range."""
132
150
 
133
- class SplurgeFormatError(SplurgeValidationError):
134
- """Raised when data format is invalid."""
135
151
 
136
- pass
152
+ class SplurgeDsvFormatError(SplurgeDsvValidationError):
153
+ """Raised when the data format is invalid or cannot be parsed as expected."""