splurge-dsv 2025.1.4__py3-none-any.whl → 2025.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
splurge_dsv/__init__.py CHANGED
@@ -1,84 +1,96 @@
1
- """
2
- Splurge DSV - A utility library for working with DSV (Delimited String Values) files.
1
+ """Top-level package for Splurge DSV.
3
2
 
4
- This package provides utilities for parsing, processing, and manipulating
5
- delimited string value files with support for various delimiters, text bookends,
6
- and streaming operations.
3
+ This package provides utilities for parsing, processing and manipulating
4
+ delimited string value (DSV) files. It exposes the high-level API objects
5
+ such as :class:`~splurge_dsv.dsv.Dsv` and :class:`~splurge_dsv.dsv.DsvConfig`,
6
+ convenience helpers, and the package's exception types.
7
7
 
8
+ License: MIT
8
9
  Copyright (c) 2025 Jim Schilling
9
-
10
- This module is licensed under the MIT License.
11
10
  """
12
11
 
12
+ # Ensure current working directory exists. Some test environments or earlier
13
+ # test cases may remove the process working directory which causes calls to
14
+ # os.getcwd() to raise FileNotFoundError later during test execution. Guard
15
+ # against that here by switching to this package directory when cwd is missing.
16
+ import os
17
+ from pathlib import Path as _Path
18
+
19
+ try:
20
+ try:
21
+ # os.getcwd() can raise FileNotFoundError in CI/runner environments
22
+ # if the original working directory was removed. Check existence via
23
+ # Path.cwd(); if it doesn't exist, switch to the package directory.
24
+ if not _Path.cwd().exists():
25
+ os.chdir(_Path(__file__).resolve().parent)
26
+ except FileNotFoundError:
27
+ # Fall back to package directory when cwd is gone
28
+ os.chdir(_Path(__file__).resolve().parent)
29
+ except Exception:
30
+ # Be conservative: if this fails, don't break import - tests will report
31
+ # the original failure. Swallowing ensures import-time is resilient.
32
+ pass
33
+
13
34
  # Local imports
35
+ from splurge_dsv.dsv import Dsv, DsvConfig
14
36
  from splurge_dsv.dsv_helper import DsvHelper
15
37
  from splurge_dsv.exceptions import (
16
- SplurgeConfigurationError,
17
- SplurgeDataProcessingError,
38
+ SplurgeDsvConfigurationError,
39
+ SplurgeDsvDataProcessingError,
40
+ # canonical SplurgeDsv* exception names
18
41
  SplurgeDsvError,
19
- SplurgeFileEncodingError,
20
- SplurgeFileNotFoundError,
21
- SplurgeFileOperationError,
22
- SplurgeFilePermissionError,
23
- SplurgeFormatError,
24
- SplurgeParameterError,
25
- SplurgeParsingError,
26
- SplurgePathValidationError,
27
- SplurgePerformanceWarning,
28
- SplurgeRangeError,
29
- SplurgeResourceAcquisitionError,
30
- SplurgeResourceError,
31
- SplurgeResourceReleaseError,
32
- SplurgeStreamingError,
33
- SplurgeTypeConversionError,
34
- SplurgeValidationError,
42
+ SplurgeDsvFileEncodingError,
43
+ SplurgeDsvFileNotFoundError,
44
+ SplurgeDsvFileOperationError,
45
+ SplurgeDsvFilePermissionError,
46
+ SplurgeDsvFormatError,
47
+ SplurgeDsvParameterError,
48
+ SplurgeDsvParsingError,
49
+ SplurgeDsvPathValidationError,
50
+ SplurgeDsvPerformanceWarning,
51
+ SplurgeDsvRangeError,
52
+ SplurgeDsvResourceAcquisitionError,
53
+ SplurgeDsvResourceError,
54
+ SplurgeDsvResourceReleaseError,
55
+ SplurgeDsvStreamingError,
56
+ SplurgeDsvTypeConversionError,
57
+ SplurgeDsvValidationError,
35
58
  )
36
59
  from splurge_dsv.path_validator import PathValidator
37
- from splurge_dsv.resource_manager import (
38
- FileResourceManager,
39
- ResourceManager,
40
- StreamResourceManager,
41
- safe_file_operation,
42
- safe_stream_operation,
43
- )
44
60
  from splurge_dsv.string_tokenizer import StringTokenizer
45
61
  from splurge_dsv.text_file_helper import TextFileHelper
46
62
 
47
- __version__ = "2025.1.4"
63
+ __version__ = "2025.2.0"
48
64
  __author__ = "Jim Schilling"
49
65
  __license__ = "MIT"
50
66
 
51
67
  __all__ = [
52
- # Main helper class
68
+ # Main classes
69
+ "Dsv",
70
+ "DsvConfig",
53
71
  "DsvHelper",
54
72
  # Exceptions
55
73
  "SplurgeDsvError",
56
- "SplurgeValidationError",
57
- "SplurgeFileOperationError",
58
- "SplurgeFileNotFoundError",
59
- "SplurgeFilePermissionError",
60
- "SplurgeFileEncodingError",
61
- "SplurgePathValidationError",
62
- "SplurgeDataProcessingError",
63
- "SplurgeParsingError",
64
- "SplurgeTypeConversionError",
65
- "SplurgeStreamingError",
66
- "SplurgeConfigurationError",
67
- "SplurgeResourceError",
68
- "SplurgeResourceAcquisitionError",
69
- "SplurgeResourceReleaseError",
70
- "SplurgePerformanceWarning",
71
- "SplurgeParameterError",
72
- "SplurgeRangeError",
73
- "SplurgeFormatError",
74
+ "SplurgeDsvValidationError",
75
+ "SplurgeDsvFileOperationError",
76
+ "SplurgeDsvFileNotFoundError",
77
+ "SplurgeDsvFilePermissionError",
78
+ "SplurgeDsvFileEncodingError",
79
+ "SplurgeDsvPathValidationError",
80
+ "SplurgeDsvDataProcessingError",
81
+ "SplurgeDsvParsingError",
82
+ "SplurgeDsvTypeConversionError",
83
+ "SplurgeDsvStreamingError",
84
+ "SplurgeDsvConfigurationError",
85
+ "SplurgeDsvResourceError",
86
+ "SplurgeDsvResourceAcquisitionError",
87
+ "SplurgeDsvResourceReleaseError",
88
+ "SplurgeDsvPerformanceWarning",
89
+ "SplurgeDsvParameterError",
90
+ "SplurgeDsvRangeError",
91
+ "SplurgeDsvFormatError",
74
92
  # Utility classes
75
93
  "StringTokenizer",
76
94
  "TextFileHelper",
77
95
  "PathValidator",
78
- "ResourceManager",
79
- "FileResourceManager",
80
- "StreamResourceManager",
81
- # Context managers
82
- "safe_file_operation",
83
- "safe_stream_operation",
84
96
  ]
splurge_dsv/__main__.py CHANGED
@@ -1,14 +1,18 @@
1
- """
2
- Command-line interface entry point for splurge-dsv.
1
+ """Module entrypoint for running splurge-dsv as a script.
2
+
3
+ When executed (``python -m splurge_dsv``), this module delegates to
4
+ ``splurge_dsv.cli.run_cli`` and returns the resulting exit code.
3
5
 
4
- This module serves as the entry point when running the package as a module.
5
- It imports and calls the run_cli function from the cli module.
6
+ License: MIT
7
+ Copyright (c) 2025 Jim Schilling
6
8
  """
7
9
 
8
- # Standard library imports
10
+ from __future__ import annotations
11
+
12
+ # Standard library
9
13
  import sys
10
14
 
11
- # Local imports
15
+ # Local
12
16
  from splurge_dsv.cli import run_cli
13
17
 
14
18
  if __name__ == "__main__":
splurge_dsv/cli.py CHANGED
@@ -1,14 +1,17 @@
1
- """
2
- Command-line interface for splurge-dsv.
1
+ """Command-line interface utilities for splurge-dsv.
3
2
 
4
- This module provides a command-line interface for the splurge-dsv library,
5
- allowing users to parse DSV files from the command line.
3
+ This module provides the CLI entry points and helpers for parsing DSV
4
+ files from the command line. It exposes a thin wrapper around the
5
+ library API suitable for use as ``python -m splurge_dsv``.
6
6
 
7
- Copyright (c) 2025 Jim Schilling
7
+ Public API:
8
+ - parse_arguments: Build and parse the CLI argument parser.
9
+ - print_results: Nicely format parsed rows to stdout.
10
+ - run_cli: Main entrypoint invoked by ``__main__``.
8
11
 
9
- This module is licensed under the MIT License.
12
+ License: MIT
10
13
 
11
- Please preserve this header and all related material when sharing!
14
+ Copyright (c) 2025 Jim Schilling
12
15
  """
13
16
 
14
17
  # Standard library imports
@@ -19,12 +22,17 @@ from pathlib import Path
19
22
 
20
23
  # Local imports
21
24
  from splurge_dsv import __version__
22
- from splurge_dsv.dsv_helper import DsvHelper
25
+ from splurge_dsv.dsv import Dsv, DsvConfig
23
26
  from splurge_dsv.exceptions import SplurgeDsvError
24
27
 
25
28
 
26
29
  def parse_arguments() -> argparse.Namespace:
27
- """Parse command line arguments."""
30
+ """Construct and parse command-line arguments for the CLI.
31
+
32
+ Returns:
33
+ argparse.Namespace: Parsed arguments with attributes matching the
34
+ defined options.
35
+ """
28
36
  parser = argparse.ArgumentParser(
29
37
  description="Parse DSV (Delimited String Values) files",
30
38
  formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -60,7 +68,7 @@ Examples:
60
68
 
61
69
  parser.add_argument(
62
70
  "--output-format",
63
- choices=["table", "json"],
71
+ choices=["table", "json", "ndjson"],
64
72
  default="table",
65
73
  help="Output format for results (default: table)",
66
74
  )
@@ -71,7 +79,15 @@ Examples:
71
79
 
72
80
 
73
81
  def print_results(rows: list[list[str]], delimiter: str) -> None:
74
- """Print parsed results in a formatted way."""
82
+ """Print parsed rows in a human-readable table format.
83
+
84
+ The function computes column widths and prints a simple ASCII table.
85
+
86
+ Args:
87
+ rows: Parsed rows to print (first row is treated as header).
88
+ delimiter: Delimiter used (included here for compatibility; printing
89
+ does not depend on it directly).
90
+ """
75
91
  if not rows:
76
92
  print("No data found.")
77
93
  return
@@ -100,23 +116,17 @@ def print_results(rows: list[list[str]], delimiter: str) -> None:
100
116
 
101
117
 
102
118
  def run_cli() -> int:
103
- """Run the command-line interface for DSV file parsing.
119
+ """Main entry point for running the splurge-dsv CLI.
104
120
 
105
- This function serves as the main entry point for the splurge-dsv CLI tool.
106
- It parses command-line arguments, validates the input file, and processes
107
- DSV files according to the specified options. Supports both regular parsing
108
- and streaming modes for large files.
121
+ The function handles argument parsing, basic path validation, constructing
122
+ the ``DsvConfig`` and ``Dsv`` objects, and printing results in the
123
+ requested format. Designed to be invoked from ``__main__``.
109
124
 
110
125
  Returns:
111
- int: Exit code indicating success or failure:
112
- - 0: Success
113
- - 1: Generic error (file not found, parsing error, etc.)
114
- - 2: Invalid arguments
115
- - 130: Operation interrupted (Ctrl+C)
126
+ Exit code (0 success, non-zero error codes on failure).
116
127
 
117
128
  Raises:
118
- SystemExit: Terminates the program with the appropriate exit code.
119
- This is handled internally and should not be caught by callers.
129
+ SystemExit: On argument parser termination (handled internally).
120
130
  """
121
131
  try:
122
132
  args = parse_arguments()
@@ -131,6 +141,19 @@ def run_cli() -> int:
131
141
  print(f"Error: '{args.file_path}' is not a file.", file=sys.stderr)
132
142
  return 1
133
143
 
144
+ # Create configuration and Dsv instance for parsing
145
+ config = DsvConfig(
146
+ delimiter=args.delimiter,
147
+ strip=not args.no_strip,
148
+ bookend=args.bookend,
149
+ bookend_strip=not args.no_bookend_strip,
150
+ encoding=args.encoding,
151
+ skip_header_rows=args.skip_header,
152
+ skip_footer_rows=args.skip_footer,
153
+ chunk_size=args.chunk_size,
154
+ )
155
+ dsv = Dsv(config)
156
+
134
157
  # Parse the file
135
158
  if args.stream:
136
159
  if args.output_format != "json":
@@ -138,44 +161,31 @@ def run_cli() -> int:
138
161
  chunk_count = 0
139
162
  total_rows = 0
140
163
 
141
- for chunk in DsvHelper.parse_stream(
142
- file_path,
143
- delimiter=args.delimiter,
144
- strip=not args.no_strip,
145
- bookend=args.bookend,
146
- bookend_strip=not args.no_bookend_strip,
147
- encoding=args.encoding,
148
- skip_header_rows=args.skip_header,
149
- skip_footer_rows=args.skip_footer,
150
- chunk_size=args.chunk_size,
151
- ):
164
+ for chunk in dsv.parse_stream(file_path):
152
165
  chunk_count += 1
153
166
  total_rows += len(chunk)
154
167
  if args.output_format == "json":
155
168
  print(json.dumps(chunk, ensure_ascii=False))
169
+ elif args.output_format == "ndjson":
170
+ for row in chunk:
171
+ print(json.dumps(row, ensure_ascii=False))
156
172
  else:
157
173
  print(f"Chunk {chunk_count}: {len(chunk)} rows")
158
174
  print_results(chunk, args.delimiter)
159
175
  print()
160
176
 
161
- if args.output_format != "json":
177
+ if args.output_format not in ["json", "ndjson"]:
162
178
  print(f"Total: {total_rows} rows in {chunk_count} chunks")
163
179
  else:
164
- if args.output_format != "json":
180
+ if args.output_format not in ["json", "ndjson"]:
165
181
  print(f"Parsing file '{args.file_path}' with delimiter '{args.delimiter}'...")
166
- rows = DsvHelper.parse_file(
167
- file_path,
168
- delimiter=args.delimiter,
169
- strip=not args.no_strip,
170
- bookend=args.bookend,
171
- bookend_strip=not args.no_bookend_strip,
172
- encoding=args.encoding,
173
- skip_header_rows=args.skip_header,
174
- skip_footer_rows=args.skip_footer,
175
- )
182
+ rows = dsv.parse_file(file_path)
176
183
 
177
184
  if args.output_format == "json":
178
185
  print(json.dumps(rows, ensure_ascii=False))
186
+ elif args.output_format == "ndjson":
187
+ for row in rows:
188
+ print(json.dumps(row, ensure_ascii=False))
179
189
  else:
180
190
  print(f"Parsed {len(rows)} rows")
181
191
  print_results(rows, args.delimiter)
splurge_dsv/dsv.py ADDED
@@ -0,0 +1,256 @@
1
+ """DSV parsing primitives and configuration objects.
2
+
3
+ This module exposes the :class:`DsvConfig` dataclass and the :class:`Dsv`
4
+ parser. ``DsvConfig`` encapsulates parsing options such as delimiter,
5
+ encoding and header/footer skipping. ``Dsv`` is a thin, stateful wrapper
6
+ around :mod:`splurge_dsv.dsv_helper` that binds a configuration to
7
+ parsing operations and provides convenience methods for parsing strings,
8
+ files, and streaming large inputs.
9
+
10
+ Public API:
11
+ - DsvConfig: Configuration dataclass for parsing behavior.
12
+ - Dsv: Parser instance that performs parse/parse_file/parse_stream.
13
+
14
+ License: MIT
15
+
16
+ Copyright (c) 2025 Jim Schilling
17
+ """
18
+
19
+ # Standard library imports
20
+ from collections.abc import Iterator
21
+ from dataclasses import dataclass, fields
22
+ from os import PathLike
23
+
24
+ # Local imports
25
+ from splurge_dsv.dsv_helper import DsvHelper
26
+ from splurge_dsv.exceptions import SplurgeDsvParameterError
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class DsvConfig:
31
+ """Configuration for DSV parsing operations.
32
+
33
+ This frozen dataclass stores parsing options and performs basic
34
+ validation in :meth:`__post_init__`.
35
+
36
+ Args:
37
+ delimiter: The delimiter character used to separate values.
38
+ strip: Whether to strip whitespace from parsed values.
39
+ bookend: Optional character that wraps text fields (e.g., quotes).
40
+ bookend_strip: Whether to strip whitespace from bookend characters.
41
+ encoding: Text encoding for file operations.
42
+ skip_header_rows: Number of header rows to skip when reading files.
43
+ skip_footer_rows: Number of footer rows to skip when reading files.
44
+ chunk_size: Size of chunks for streaming operations.
45
+
46
+ Raises:
47
+ SplurgeDsvParameterError: If delimiter is empty, chunk_size is too
48
+ small, or skip counts are negative.
49
+ """
50
+
51
+ delimiter: str
52
+ strip: bool = True
53
+ bookend: str | None = None
54
+ bookend_strip: bool = True
55
+ encoding: str = "utf-8"
56
+ skip_header_rows: int = 0
57
+ skip_footer_rows: int = 0
58
+ chunk_size: int = 500
59
+
60
+ def __post_init__(self) -> None:
61
+ """Validate configuration after initialization.
62
+
63
+ Ensures required fields are present and numeric ranges are valid.
64
+ """
65
+ if not self.delimiter:
66
+ raise SplurgeDsvParameterError("delimiter cannot be empty or None")
67
+
68
+ if self.chunk_size < DsvHelper.DEFAULT_MIN_CHUNK_SIZE:
69
+ raise SplurgeDsvParameterError(
70
+ f"chunk_size must be at least {DsvHelper.DEFAULT_MIN_CHUNK_SIZE}, got {self.chunk_size}"
71
+ )
72
+
73
+ if self.skip_header_rows < 0:
74
+ raise SplurgeDsvParameterError(f"skip_header_rows cannot be negative, got {self.skip_header_rows}")
75
+
76
+ if self.skip_footer_rows < 0:
77
+ raise SplurgeDsvParameterError(f"skip_footer_rows cannot be negative, got {self.skip_footer_rows}")
78
+
79
+ @classmethod
80
+ def csv(cls, **overrides) -> "DsvConfig":
81
+ """
82
+ Create a CSV configuration with sensible defaults.
83
+
84
+ Args:
85
+ **overrides: Any configuration values to override
86
+
87
+ Returns:
88
+ DsvConfig: CSV configuration object
89
+
90
+ Example:
91
+ >>> config = DsvConfig.csv(skip_header_rows=1)
92
+ >>> config.delimiter
93
+ ','
94
+ """
95
+ return cls(delimiter=",", **overrides)
96
+
97
+ @classmethod
98
+ def tsv(cls, **overrides) -> "DsvConfig":
99
+ """
100
+ Create a TSV configuration with sensible defaults.
101
+
102
+ Args:
103
+ **overrides: Any configuration values to override
104
+
105
+ Returns:
106
+ DsvConfig: TSV configuration object
107
+
108
+ Example:
109
+ >>> config = DsvConfig.tsv(encoding="utf-16")
110
+ >>> config.delimiter
111
+ '\t'
112
+ """
113
+ return cls(delimiter="\t", **overrides)
114
+
115
+ @classmethod
116
+ def from_params(cls, **kwargs) -> "DsvConfig":
117
+ """
118
+ Create a DsvConfig from arbitrary keyword arguments.
119
+
120
+ This method filters out any invalid parameters that don't correspond
121
+ to DsvConfig fields, making it safe to pass through arbitrary parameter
122
+ dictionaries (useful for migration from existing APIs).
123
+
124
+ Args:
125
+ **kwargs: Configuration parameters (invalid ones are ignored)
126
+
127
+ Returns:
128
+ DsvConfig: Configuration object with valid parameters
129
+
130
+ Example:
131
+ >>> config = DsvConfig.from_params(delimiter=",", invalid_param="ignored")
132
+ >>> config.delimiter
133
+ ','
134
+ """
135
+ valid_fields = {f.name for f in fields(cls)}
136
+ filtered_kwargs = {k: v for k, v in kwargs.items() if k in valid_fields}
137
+ return cls(**filtered_kwargs)
138
+
139
+
140
+ class Dsv:
141
+ """Parser class that binds a :class:`DsvConfig` to parsing operations.
142
+
143
+ The class delegates actual parsing to :mod:`splurge_dsv.dsv_helper` while
144
+ providing a convenient instance API for repeated parsing tasks with the
145
+ same configuration.
146
+
147
+ Attributes:
148
+ config (DsvConfig): Configuration instance used for parsing calls.
149
+ """
150
+
151
+ def __init__(self, config: DsvConfig) -> None:
152
+ """
153
+ Initialize DSV parser with configuration.
154
+
155
+ Args:
156
+ config: DsvConfig object containing parsing parameters
157
+
158
+ Example:
159
+ >>> config = DsvConfig(delimiter=",")
160
+ >>> parser = Dsv(config)
161
+ """
162
+ self.config = config
163
+
164
+ def parse(self, content: str) -> list[str]:
165
+ """Parse a single DSV record (string) into a list of tokens.
166
+
167
+ Args:
168
+ content: Input string representing a single DSV record.
169
+
170
+ Returns:
171
+ List of parsed tokens as strings.
172
+
173
+ Raises:
174
+ SplurgeDsvParameterError: If the configured delimiter is invalid.
175
+ """
176
+ return DsvHelper.parse(
177
+ content,
178
+ delimiter=self.config.delimiter,
179
+ strip=self.config.strip,
180
+ bookend=self.config.bookend,
181
+ bookend_strip=self.config.bookend_strip,
182
+ )
183
+
184
+ def parses(self, content: list[str]) -> list[list[str]]:
185
+ """
186
+ Parse a list of strings into a list of lists of strings.
187
+
188
+ Args:
189
+ content: List of strings to parse
190
+
191
+ Returns:
192
+ List of lists of parsed strings
193
+
194
+ Example:
195
+ >>> parser = Dsv(DsvConfig(delimiter=","))
196
+ >>> parser.parses(["a,b", "c,d"])
197
+ [['a', 'b'], ['c', 'd']]
198
+ """
199
+ return DsvHelper.parses(
200
+ content,
201
+ delimiter=self.config.delimiter,
202
+ strip=self.config.strip,
203
+ bookend=self.config.bookend,
204
+ bookend_strip=self.config.bookend_strip,
205
+ )
206
+
207
+ def parse_file(self, file_path: PathLike[str] | str) -> list[list[str]]:
208
+ """Parse a DSV file and return all rows as lists of strings.
209
+
210
+ Args:
211
+ file_path: Path to the file to parse.
212
+
213
+ Returns:
214
+ A list of rows, where each row is a list of string tokens.
215
+
216
+ Raises:
217
+ SplurgeDsvFileNotFoundError: If the file cannot be found.
218
+ SplurgeDsvFilePermissionError: If the file cannot be read.
219
+ SplurgeDsvFileEncodingError: If the file cannot be decoded with
220
+ the configured encoding.
221
+ """
222
+ return DsvHelper.parse_file(
223
+ file_path,
224
+ delimiter=self.config.delimiter,
225
+ strip=self.config.strip,
226
+ bookend=self.config.bookend,
227
+ bookend_strip=self.config.bookend_strip,
228
+ encoding=self.config.encoding,
229
+ skip_header_rows=self.config.skip_header_rows,
230
+ skip_footer_rows=self.config.skip_footer_rows,
231
+ )
232
+
233
+ def parse_stream(self, file_path: PathLike[str] | str) -> Iterator[list[list[str]]]:
234
+ """Stream-parse a DSV file, yielding chunks of parsed rows.
235
+
236
+ The method yields lists of parsed rows (each row itself is a list of
237
+ strings). Chunk sizing is controlled by the bound configuration's
238
+ ``chunk_size`` value.
239
+
240
+ Args:
241
+ file_path: Path to the file to parse.
242
+
243
+ Yields:
244
+ Lists of parsed rows, each list containing up to ``chunk_size`` rows.
245
+ """
246
+ return DsvHelper.parse_stream(
247
+ file_path,
248
+ delimiter=self.config.delimiter,
249
+ strip=self.config.strip,
250
+ bookend=self.config.bookend,
251
+ bookend_strip=self.config.bookend_strip,
252
+ encoding=self.config.encoding,
253
+ skip_header_rows=self.config.skip_header_rows,
254
+ skip_footer_rows=self.config.skip_footer_rows,
255
+ chunk_size=self.config.chunk_size,
256
+ )