splurge-dsv 2025.1.5__py3-none-any.whl → 2025.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splurge_dsv/__init__.py +70 -58
- splurge_dsv/__main__.py +10 -6
- splurge_dsv/cli.py +47 -43
- splurge_dsv/dsv.py +280 -0
- splurge_dsv/dsv_helper.py +114 -54
- splurge_dsv/exceptions.py +92 -75
- splurge_dsv/path_validator.py +49 -36
- splurge_dsv/safe_text_file_reader.py +177 -0
- splurge_dsv/safe_text_file_writer.py +136 -0
- splurge_dsv/string_tokenizer.py +34 -26
- splurge_dsv/text_file_helper.py +96 -177
- splurge_dsv-2025.2.1.dist-info/METADATA +217 -0
- splurge_dsv-2025.2.1.dist-info/RECORD +17 -0
- splurge_dsv-2025.2.1.dist-info/entry_points.txt +2 -0
- splurge_dsv/resource_manager.py +0 -371
- splurge_dsv-2025.1.5.dist-info/METADATA +0 -268
- splurge_dsv-2025.1.5.dist-info/RECORD +0 -14
- {splurge_dsv-2025.1.5.dist-info → splurge_dsv-2025.2.1.dist-info}/WHEEL +0 -0
- {splurge_dsv-2025.1.5.dist-info → splurge_dsv-2025.2.1.dist-info}/licenses/LICENSE +0 -0
- {splurge_dsv-2025.1.5.dist-info → splurge_dsv-2025.2.1.dist-info}/top_level.txt +0 -0
splurge_dsv/__init__.py
CHANGED
@@ -1,84 +1,96 @@
|
|
1
|
-
"""
|
2
|
-
Splurge DSV - A utility library for working with DSV (Delimited String Values) files.
|
1
|
+
"""Top-level package for Splurge DSV.
|
3
2
|
|
4
|
-
This package provides utilities for parsing, processing
|
5
|
-
delimited string value files
|
6
|
-
|
3
|
+
This package provides utilities for parsing, processing and manipulating
|
4
|
+
delimited string value (DSV) files. It exposes the high-level API objects
|
5
|
+
such as :class:`~splurge_dsv.dsv.Dsv` and :class:`~splurge_dsv.dsv.DsvConfig`,
|
6
|
+
convenience helpers, and the package's exception types.
|
7
7
|
|
8
|
+
License: MIT
|
8
9
|
Copyright (c) 2025 Jim Schilling
|
9
|
-
|
10
|
-
This module is licensed under the MIT License.
|
11
10
|
"""
|
12
11
|
|
12
|
+
# Ensure current working directory exists. Some test environments or earlier
|
13
|
+
# test cases may remove the process working directory which causes calls to
|
14
|
+
# os.getcwd() to raise FileNotFoundError later during test execution. Guard
|
15
|
+
# against that here by switching to this package directory when cwd is missing.
|
16
|
+
import os
|
17
|
+
from pathlib import Path as _Path
|
18
|
+
|
19
|
+
try:
|
20
|
+
try:
|
21
|
+
# os.getcwd() can raise FileNotFoundError in CI/runner environments
|
22
|
+
# if the original working directory was removed. Check existence via
|
23
|
+
# Path.cwd(); if it doesn't exist, switch to the package directory.
|
24
|
+
if not _Path.cwd().exists():
|
25
|
+
os.chdir(_Path(__file__).resolve().parent)
|
26
|
+
except FileNotFoundError:
|
27
|
+
# Fall back to package directory when cwd is gone
|
28
|
+
os.chdir(_Path(__file__).resolve().parent)
|
29
|
+
except Exception:
|
30
|
+
# Be conservative: if this fails, don't break import - tests will report
|
31
|
+
# the original failure. Swallowing ensures import-time is resilient.
|
32
|
+
pass
|
33
|
+
|
13
34
|
# Local imports
|
35
|
+
from splurge_dsv.dsv import Dsv, DsvConfig
|
14
36
|
from splurge_dsv.dsv_helper import DsvHelper
|
15
37
|
from splurge_dsv.exceptions import (
|
16
|
-
|
17
|
-
|
38
|
+
SplurgeDsvConfigurationError,
|
39
|
+
SplurgeDsvDataProcessingError,
|
40
|
+
# canonical SplurgeDsv* exception names
|
18
41
|
SplurgeDsvError,
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
42
|
+
SplurgeDsvFileEncodingError,
|
43
|
+
SplurgeDsvFileNotFoundError,
|
44
|
+
SplurgeDsvFileOperationError,
|
45
|
+
SplurgeDsvFilePermissionError,
|
46
|
+
SplurgeDsvFormatError,
|
47
|
+
SplurgeDsvParameterError,
|
48
|
+
SplurgeDsvParsingError,
|
49
|
+
SplurgeDsvPathValidationError,
|
50
|
+
SplurgeDsvPerformanceWarning,
|
51
|
+
SplurgeDsvRangeError,
|
52
|
+
SplurgeDsvResourceAcquisitionError,
|
53
|
+
SplurgeDsvResourceError,
|
54
|
+
SplurgeDsvResourceReleaseError,
|
55
|
+
SplurgeDsvStreamingError,
|
56
|
+
SplurgeDsvTypeConversionError,
|
57
|
+
SplurgeDsvValidationError,
|
35
58
|
)
|
36
59
|
from splurge_dsv.path_validator import PathValidator
|
37
|
-
from splurge_dsv.resource_manager import (
|
38
|
-
FileResourceManager,
|
39
|
-
ResourceManager,
|
40
|
-
StreamResourceManager,
|
41
|
-
safe_file_operation,
|
42
|
-
safe_stream_operation,
|
43
|
-
)
|
44
60
|
from splurge_dsv.string_tokenizer import StringTokenizer
|
45
61
|
from splurge_dsv.text_file_helper import TextFileHelper
|
46
62
|
|
47
|
-
__version__ = "2025.1
|
63
|
+
__version__ = "2025.2.1"
|
48
64
|
__author__ = "Jim Schilling"
|
49
65
|
__license__ = "MIT"
|
50
66
|
|
51
67
|
__all__ = [
|
52
|
-
# Main
|
68
|
+
# Main classes
|
69
|
+
"Dsv",
|
70
|
+
"DsvConfig",
|
53
71
|
"DsvHelper",
|
54
72
|
# Exceptions
|
55
73
|
"SplurgeDsvError",
|
56
|
-
"
|
57
|
-
"
|
58
|
-
"
|
59
|
-
"
|
60
|
-
"
|
61
|
-
"
|
62
|
-
"
|
63
|
-
"
|
64
|
-
"
|
65
|
-
"
|
66
|
-
"
|
67
|
-
"
|
68
|
-
"
|
69
|
-
"
|
70
|
-
"
|
71
|
-
"
|
72
|
-
"
|
73
|
-
"
|
74
|
+
"SplurgeDsvValidationError",
|
75
|
+
"SplurgeDsvFileOperationError",
|
76
|
+
"SplurgeDsvFileNotFoundError",
|
77
|
+
"SplurgeDsvFilePermissionError",
|
78
|
+
"SplurgeDsvFileEncodingError",
|
79
|
+
"SplurgeDsvPathValidationError",
|
80
|
+
"SplurgeDsvDataProcessingError",
|
81
|
+
"SplurgeDsvParsingError",
|
82
|
+
"SplurgeDsvTypeConversionError",
|
83
|
+
"SplurgeDsvStreamingError",
|
84
|
+
"SplurgeDsvConfigurationError",
|
85
|
+
"SplurgeDsvResourceError",
|
86
|
+
"SplurgeDsvResourceAcquisitionError",
|
87
|
+
"SplurgeDsvResourceReleaseError",
|
88
|
+
"SplurgeDsvPerformanceWarning",
|
89
|
+
"SplurgeDsvParameterError",
|
90
|
+
"SplurgeDsvRangeError",
|
91
|
+
"SplurgeDsvFormatError",
|
74
92
|
# Utility classes
|
75
93
|
"StringTokenizer",
|
76
94
|
"TextFileHelper",
|
77
95
|
"PathValidator",
|
78
|
-
"ResourceManager",
|
79
|
-
"FileResourceManager",
|
80
|
-
"StreamResourceManager",
|
81
|
-
# Context managers
|
82
|
-
"safe_file_operation",
|
83
|
-
"safe_stream_operation",
|
84
96
|
]
|
splurge_dsv/__main__.py
CHANGED
@@ -1,14 +1,18 @@
|
|
1
|
-
"""
|
2
|
-
|
1
|
+
"""Module entrypoint for running splurge-dsv as a script.
|
2
|
+
|
3
|
+
When executed (``python -m splurge_dsv``), this module delegates to
|
4
|
+
``splurge_dsv.cli.run_cli`` and returns the resulting exit code.
|
3
5
|
|
4
|
-
|
5
|
-
|
6
|
+
License: MIT
|
7
|
+
Copyright (c) 2025 Jim Schilling
|
6
8
|
"""
|
7
9
|
|
8
|
-
|
10
|
+
from __future__ import annotations
|
11
|
+
|
12
|
+
# Standard library
|
9
13
|
import sys
|
10
14
|
|
11
|
-
# Local
|
15
|
+
# Local
|
12
16
|
from splurge_dsv.cli import run_cli
|
13
17
|
|
14
18
|
if __name__ == "__main__":
|
splurge_dsv/cli.py
CHANGED
@@ -1,14 +1,17 @@
|
|
1
|
-
"""
|
2
|
-
Command-line interface for splurge-dsv.
|
1
|
+
"""Command-line interface utilities for splurge-dsv.
|
3
2
|
|
4
|
-
This module provides
|
5
|
-
|
3
|
+
This module provides the CLI entry points and helpers for parsing DSV
|
4
|
+
files from the command line. It exposes a thin wrapper around the
|
5
|
+
library API suitable for use as ``python -m splurge_dsv``.
|
6
6
|
|
7
|
-
|
7
|
+
Public API:
|
8
|
+
- parse_arguments: Build and parse the CLI argument parser.
|
9
|
+
- print_results: Nicely format parsed rows to stdout.
|
10
|
+
- run_cli: Main entrypoint invoked by ``__main__``.
|
8
11
|
|
9
|
-
|
12
|
+
License: MIT
|
10
13
|
|
11
|
-
|
14
|
+
Copyright (c) 2025 Jim Schilling
|
12
15
|
"""
|
13
16
|
|
14
17
|
# Standard library imports
|
@@ -19,12 +22,17 @@ from pathlib import Path
|
|
19
22
|
|
20
23
|
# Local imports
|
21
24
|
from splurge_dsv import __version__
|
22
|
-
from splurge_dsv.
|
25
|
+
from splurge_dsv.dsv import Dsv, DsvConfig
|
23
26
|
from splurge_dsv.exceptions import SplurgeDsvError
|
24
27
|
|
25
28
|
|
26
29
|
def parse_arguments() -> argparse.Namespace:
|
27
|
-
"""
|
30
|
+
"""Construct and parse command-line arguments for the CLI.
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
argparse.Namespace: Parsed arguments with attributes matching the
|
34
|
+
defined options.
|
35
|
+
"""
|
28
36
|
parser = argparse.ArgumentParser(
|
29
37
|
description="Parse DSV (Delimited String Values) files",
|
30
38
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
@@ -71,7 +79,15 @@ Examples:
|
|
71
79
|
|
72
80
|
|
73
81
|
def print_results(rows: list[list[str]], delimiter: str) -> None:
|
74
|
-
"""Print parsed
|
82
|
+
"""Print parsed rows in a human-readable table format.
|
83
|
+
|
84
|
+
The function computes column widths and prints a simple ASCII table.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
rows: Parsed rows to print (first row is treated as header).
|
88
|
+
delimiter: Delimiter used (included here for compatibility; printing
|
89
|
+
does not depend on it directly).
|
90
|
+
"""
|
75
91
|
if not rows:
|
76
92
|
print("No data found.")
|
77
93
|
return
|
@@ -100,23 +116,17 @@ def print_results(rows: list[list[str]], delimiter: str) -> None:
|
|
100
116
|
|
101
117
|
|
102
118
|
def run_cli() -> int:
|
103
|
-
"""
|
119
|
+
"""Main entry point for running the splurge-dsv CLI.
|
104
120
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
and streaming modes for large files.
|
121
|
+
The function handles argument parsing, basic path validation, constructing
|
122
|
+
the ``DsvConfig`` and ``Dsv`` objects, and printing results in the
|
123
|
+
requested format. Designed to be invoked from ``__main__``.
|
109
124
|
|
110
125
|
Returns:
|
111
|
-
|
112
|
-
- 0: Success
|
113
|
-
- 1: Generic error (file not found, parsing error, etc.)
|
114
|
-
- 2: Invalid arguments
|
115
|
-
- 130: Operation interrupted (Ctrl+C)
|
126
|
+
Exit code (0 success, non-zero error codes on failure).
|
116
127
|
|
117
128
|
Raises:
|
118
|
-
SystemExit:
|
119
|
-
This is handled internally and should not be caught by callers.
|
129
|
+
SystemExit: On argument parser termination (handled internally).
|
120
130
|
"""
|
121
131
|
try:
|
122
132
|
args = parse_arguments()
|
@@ -131,6 +141,19 @@ def run_cli() -> int:
|
|
131
141
|
print(f"Error: '{args.file_path}' is not a file.", file=sys.stderr)
|
132
142
|
return 1
|
133
143
|
|
144
|
+
# Create configuration and Dsv instance for parsing
|
145
|
+
config = DsvConfig(
|
146
|
+
delimiter=args.delimiter,
|
147
|
+
strip=not args.no_strip,
|
148
|
+
bookend=args.bookend,
|
149
|
+
bookend_strip=not args.no_bookend_strip,
|
150
|
+
encoding=args.encoding,
|
151
|
+
skip_header_rows=args.skip_header,
|
152
|
+
skip_footer_rows=args.skip_footer,
|
153
|
+
chunk_size=args.chunk_size,
|
154
|
+
)
|
155
|
+
dsv = Dsv(config)
|
156
|
+
|
134
157
|
# Parse the file
|
135
158
|
if args.stream:
|
136
159
|
if args.output_format != "json":
|
@@ -138,17 +161,7 @@ def run_cli() -> int:
|
|
138
161
|
chunk_count = 0
|
139
162
|
total_rows = 0
|
140
163
|
|
141
|
-
for chunk in
|
142
|
-
file_path,
|
143
|
-
delimiter=args.delimiter,
|
144
|
-
strip=not args.no_strip,
|
145
|
-
bookend=args.bookend,
|
146
|
-
bookend_strip=not args.no_bookend_strip,
|
147
|
-
encoding=args.encoding,
|
148
|
-
skip_header_rows=args.skip_header,
|
149
|
-
skip_footer_rows=args.skip_footer,
|
150
|
-
chunk_size=args.chunk_size,
|
151
|
-
):
|
164
|
+
for chunk in dsv.parse_file_stream(file_path):
|
152
165
|
chunk_count += 1
|
153
166
|
total_rows += len(chunk)
|
154
167
|
if args.output_format == "json":
|
@@ -166,16 +179,7 @@ def run_cli() -> int:
|
|
166
179
|
else:
|
167
180
|
if args.output_format not in ["json", "ndjson"]:
|
168
181
|
print(f"Parsing file '{args.file_path}' with delimiter '{args.delimiter}'...")
|
169
|
-
rows =
|
170
|
-
file_path,
|
171
|
-
delimiter=args.delimiter,
|
172
|
-
strip=not args.no_strip,
|
173
|
-
bookend=args.bookend,
|
174
|
-
bookend_strip=not args.no_bookend_strip,
|
175
|
-
encoding=args.encoding,
|
176
|
-
skip_header_rows=args.skip_header,
|
177
|
-
skip_footer_rows=args.skip_footer,
|
178
|
-
)
|
182
|
+
rows = dsv.parse_file(file_path)
|
179
183
|
|
180
184
|
if args.output_format == "json":
|
181
185
|
print(json.dumps(rows, ensure_ascii=False))
|
splurge_dsv/dsv.py
ADDED
@@ -0,0 +1,280 @@
|
|
1
|
+
"""DSV parsing primitives and configuration objects.
|
2
|
+
|
3
|
+
This module exposes the :class:`DsvConfig` dataclass and the :class:`Dsv`
|
4
|
+
parser. ``DsvConfig`` encapsulates parsing options such as delimiter,
|
5
|
+
encoding and header/footer skipping. ``Dsv`` is a thin, stateful wrapper
|
6
|
+
around :mod:`splurge_dsv.dsv_helper` that binds a configuration to
|
7
|
+
parsing operations and provides convenience methods for parsing strings,
|
8
|
+
files, and streaming large inputs.
|
9
|
+
|
10
|
+
Public API:
|
11
|
+
- DsvConfig: Configuration dataclass for parsing behavior.
|
12
|
+
- Dsv: Parser instance that performs parse/parse_file/parse_stream.
|
13
|
+
|
14
|
+
License: MIT
|
15
|
+
|
16
|
+
Copyright (c) 2025 Jim Schilling
|
17
|
+
"""
|
18
|
+
|
19
|
+
# Standard library imports
|
20
|
+
import warnings
|
21
|
+
from collections.abc import Iterator
|
22
|
+
from dataclasses import dataclass, fields
|
23
|
+
from os import PathLike
|
24
|
+
|
25
|
+
# Local imports
|
26
|
+
from splurge_dsv.dsv_helper import DsvHelper
|
27
|
+
from splurge_dsv.exceptions import SplurgeDsvParameterError
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass(frozen=True)
|
31
|
+
class DsvConfig:
|
32
|
+
"""Configuration for DSV parsing operations.
|
33
|
+
|
34
|
+
This frozen dataclass stores parsing options and performs basic
|
35
|
+
validation in :meth:`__post_init__`.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
delimiter: The delimiter character used to separate values.
|
39
|
+
strip: Whether to strip whitespace from parsed values.
|
40
|
+
bookend: Optional character that wraps text fields (e.g., quotes).
|
41
|
+
bookend_strip: Whether to strip whitespace from bookend characters.
|
42
|
+
encoding: Text encoding for file operations.
|
43
|
+
skip_header_rows: Number of header rows to skip when reading files.
|
44
|
+
skip_footer_rows: Number of footer rows to skip when reading files.
|
45
|
+
chunk_size: Size of chunks for streaming operations.
|
46
|
+
|
47
|
+
Raises:
|
48
|
+
SplurgeDsvParameterError: If delimiter is empty, chunk_size is too
|
49
|
+
small, or skip counts are negative.
|
50
|
+
"""
|
51
|
+
|
52
|
+
delimiter: str
|
53
|
+
strip: bool = True
|
54
|
+
bookend: str | None = None
|
55
|
+
bookend_strip: bool = True
|
56
|
+
encoding: str = "utf-8"
|
57
|
+
skip_header_rows: int = 0
|
58
|
+
skip_footer_rows: int = 0
|
59
|
+
chunk_size: int = 500
|
60
|
+
|
61
|
+
def __post_init__(self) -> None:
|
62
|
+
"""Validate configuration after initialization.
|
63
|
+
|
64
|
+
Ensures required fields are present and numeric ranges are valid.
|
65
|
+
"""
|
66
|
+
if not self.delimiter:
|
67
|
+
raise SplurgeDsvParameterError("delimiter cannot be empty or None")
|
68
|
+
|
69
|
+
if self.chunk_size < DsvHelper.DEFAULT_MIN_CHUNK_SIZE:
|
70
|
+
raise SplurgeDsvParameterError(
|
71
|
+
f"chunk_size must be at least {DsvHelper.DEFAULT_MIN_CHUNK_SIZE}, got {self.chunk_size}"
|
72
|
+
)
|
73
|
+
|
74
|
+
if self.skip_header_rows < 0:
|
75
|
+
raise SplurgeDsvParameterError(f"skip_header_rows cannot be negative, got {self.skip_header_rows}")
|
76
|
+
|
77
|
+
if self.skip_footer_rows < 0:
|
78
|
+
raise SplurgeDsvParameterError(f"skip_footer_rows cannot be negative, got {self.skip_footer_rows}")
|
79
|
+
|
80
|
+
@classmethod
|
81
|
+
def csv(cls, **overrides) -> "DsvConfig":
|
82
|
+
"""
|
83
|
+
Create a CSV configuration with sensible defaults.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
**overrides: Any configuration values to override
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
DsvConfig: CSV configuration object
|
90
|
+
|
91
|
+
Example:
|
92
|
+
>>> config = DsvConfig.csv(skip_header_rows=1)
|
93
|
+
>>> config.delimiter
|
94
|
+
','
|
95
|
+
"""
|
96
|
+
return cls(delimiter=",", **overrides)
|
97
|
+
|
98
|
+
@classmethod
|
99
|
+
def tsv(cls, **overrides) -> "DsvConfig":
|
100
|
+
"""
|
101
|
+
Create a TSV configuration with sensible defaults.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
**overrides: Any configuration values to override
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
DsvConfig: TSV configuration object
|
108
|
+
|
109
|
+
Example:
|
110
|
+
>>> config = DsvConfig.tsv(encoding="utf-16")
|
111
|
+
>>> config.delimiter
|
112
|
+
'\t'
|
113
|
+
"""
|
114
|
+
return cls(delimiter="\t", **overrides)
|
115
|
+
|
116
|
+
@classmethod
|
117
|
+
def from_params(cls, **kwargs) -> "DsvConfig":
|
118
|
+
"""
|
119
|
+
Create a DsvConfig from arbitrary keyword arguments.
|
120
|
+
|
121
|
+
This method filters out any invalid parameters that don't correspond
|
122
|
+
to DsvConfig fields, making it safe to pass through arbitrary parameter
|
123
|
+
dictionaries (useful for migration from existing APIs).
|
124
|
+
|
125
|
+
Args:
|
126
|
+
**kwargs: Configuration parameters (invalid ones are ignored)
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
DsvConfig: Configuration object with valid parameters
|
130
|
+
|
131
|
+
Example:
|
132
|
+
>>> config = DsvConfig.from_params(delimiter=",", invalid_param="ignored")
|
133
|
+
>>> config.delimiter
|
134
|
+
','
|
135
|
+
"""
|
136
|
+
valid_fields = {f.name for f in fields(cls)}
|
137
|
+
filtered_kwargs = {k: v for k, v in kwargs.items() if k in valid_fields}
|
138
|
+
return cls(**filtered_kwargs)
|
139
|
+
|
140
|
+
|
141
|
+
class Dsv:
|
142
|
+
"""Parser class that binds a :class:`DsvConfig` to parsing operations.
|
143
|
+
|
144
|
+
The class delegates actual parsing to :mod:`splurge_dsv.dsv_helper` while
|
145
|
+
providing a convenient instance API for repeated parsing tasks with the
|
146
|
+
same configuration.
|
147
|
+
|
148
|
+
Attributes:
|
149
|
+
config (DsvConfig): Configuration instance used for parsing calls.
|
150
|
+
"""
|
151
|
+
|
152
|
+
def __init__(self, config: DsvConfig) -> None:
|
153
|
+
"""
|
154
|
+
Initialize DSV parser with configuration.
|
155
|
+
|
156
|
+
Args:
|
157
|
+
config: DsvConfig object containing parsing parameters
|
158
|
+
|
159
|
+
Example:
|
160
|
+
>>> config = DsvConfig(delimiter=",")
|
161
|
+
>>> parser = Dsv(config)
|
162
|
+
"""
|
163
|
+
self.config = config
|
164
|
+
|
165
|
+
def parse(self, content: str) -> list[str]:
|
166
|
+
"""Parse a single DSV record (string) into a list of tokens.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
content: Input string representing a single DSV record.
|
170
|
+
|
171
|
+
Returns:
|
172
|
+
List of parsed tokens as strings.
|
173
|
+
|
174
|
+
Raises:
|
175
|
+
SplurgeDsvParameterError: If the configured delimiter is invalid.
|
176
|
+
"""
|
177
|
+
return DsvHelper.parse(
|
178
|
+
content,
|
179
|
+
delimiter=self.config.delimiter,
|
180
|
+
strip=self.config.strip,
|
181
|
+
bookend=self.config.bookend,
|
182
|
+
bookend_strip=self.config.bookend_strip,
|
183
|
+
)
|
184
|
+
|
185
|
+
def parses(self, content: list[str]) -> list[list[str]]:
|
186
|
+
"""
|
187
|
+
Parse a list of strings into a list of lists of strings.
|
188
|
+
|
189
|
+
Args:
|
190
|
+
content: List of strings to parse
|
191
|
+
|
192
|
+
Returns:
|
193
|
+
List of lists of parsed strings
|
194
|
+
|
195
|
+
Example:
|
196
|
+
>>> parser = Dsv(DsvConfig(delimiter=","))
|
197
|
+
>>> parser.parses(["a,b", "c,d"])
|
198
|
+
[['a', 'b'], ['c', 'd']]
|
199
|
+
"""
|
200
|
+
return DsvHelper.parses(
|
201
|
+
content,
|
202
|
+
delimiter=self.config.delimiter,
|
203
|
+
strip=self.config.strip,
|
204
|
+
bookend=self.config.bookend,
|
205
|
+
bookend_strip=self.config.bookend_strip,
|
206
|
+
)
|
207
|
+
|
208
|
+
def parse_file(self, file_path: PathLike[str] | str) -> list[list[str]]:
|
209
|
+
"""Parse a DSV file and return all rows as lists of strings.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
file_path: Path to the file to parse.
|
213
|
+
|
214
|
+
Returns:
|
215
|
+
A list of rows, where each row is a list of string tokens.
|
216
|
+
|
217
|
+
Raises:
|
218
|
+
SplurgeDsvFileNotFoundError: If the file cannot be found.
|
219
|
+
SplurgeDsvFilePermissionError: If the file cannot be read.
|
220
|
+
SplurgeDsvFileEncodingError: If the file cannot be decoded with
|
221
|
+
the configured encoding.
|
222
|
+
"""
|
223
|
+
return DsvHelper.parse_file(
|
224
|
+
file_path,
|
225
|
+
delimiter=self.config.delimiter,
|
226
|
+
strip=self.config.strip,
|
227
|
+
bookend=self.config.bookend,
|
228
|
+
bookend_strip=self.config.bookend_strip,
|
229
|
+
encoding=self.config.encoding,
|
230
|
+
skip_header_rows=self.config.skip_header_rows,
|
231
|
+
skip_footer_rows=self.config.skip_footer_rows,
|
232
|
+
)
|
233
|
+
|
234
|
+
def parse_file_stream(self, file_path: PathLike[str] | str) -> Iterator[list[list[str]]]:
|
235
|
+
"""Stream-parse a DSV file, yielding chunks of parsed rows.
|
236
|
+
|
237
|
+
The method yields lists of parsed rows (each row itself is a list of
|
238
|
+
strings). Chunk sizing is controlled by the bound configuration's
|
239
|
+
``chunk_size`` value.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
file_path: Path to the file to parse.
|
243
|
+
|
244
|
+
Yields:
|
245
|
+
Lists of parsed rows, each list containing up to ``chunk_size`` rows.
|
246
|
+
"""
|
247
|
+
return DsvHelper.parse_file_stream(
|
248
|
+
file_path,
|
249
|
+
delimiter=self.config.delimiter,
|
250
|
+
strip=self.config.strip,
|
251
|
+
bookend=self.config.bookend,
|
252
|
+
bookend_strip=self.config.bookend_strip,
|
253
|
+
encoding=self.config.encoding,
|
254
|
+
skip_header_rows=self.config.skip_header_rows,
|
255
|
+
skip_footer_rows=self.config.skip_footer_rows,
|
256
|
+
chunk_size=self.config.chunk_size,
|
257
|
+
)
|
258
|
+
|
259
|
+
def parse_stream(self, file_path: PathLike[str] | str) -> Iterator[list[list[str]]]:
|
260
|
+
"""Stream-parse a DSV file, yielding chunks of parsed rows.
|
261
|
+
|
262
|
+
The method yields lists of parsed rows (each row itself is a list of
|
263
|
+
strings). Chunk sizing is controlled by the bound configuration's
|
264
|
+
``chunk_size`` value.
|
265
|
+
|
266
|
+
Args:
|
267
|
+
file_path: Path to the file to parse.
|
268
|
+
|
269
|
+
Yields:
|
270
|
+
Lists of parsed rows, each list containing up to ``chunk_size`` rows.
|
271
|
+
|
272
|
+
Deprecated: Use `parse_file_stream` instead. This method will be removed in a future release.
|
273
|
+
"""
|
274
|
+
# Emit a DeprecationWarning to signal removal in a future release
|
275
|
+
warnings.warn(
|
276
|
+
"Dsv.parse_stream() is deprecated and will be removed in a future release; use Dsv.parse_file_stream() instead.",
|
277
|
+
DeprecationWarning,
|
278
|
+
stacklevel=2,
|
279
|
+
)
|
280
|
+
return Dsv.parse_file_stream(self, file_path)
|