splurge-dsv 2025.1.0__py3-none-any.whl → 2025.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
splurge_dsv/__init__.py CHANGED
@@ -0,0 +1,84 @@
1
+ """
2
+ Splurge DSV - A utility library for working with DSV (Delimited String Values) files.
3
+
4
+ This package provides utilities for parsing, processing, and manipulating
5
+ delimited string value files with support for various delimiters, text bookends,
6
+ and streaming operations.
7
+
8
+ Copyright (c) 2025 Jim Schilling
9
+
10
+ This module is licensed under the MIT License.
11
+ """
12
+
13
+ # Local imports
14
+ from splurge_dsv.dsv_helper import DsvHelper
15
+ from splurge_dsv.exceptions import (
16
+ SplurgeConfigurationError,
17
+ SplurgeDataProcessingError,
18
+ SplurgeDsvError,
19
+ SplurgeFileEncodingError,
20
+ SplurgeFileNotFoundError,
21
+ SplurgeFileOperationError,
22
+ SplurgeFilePermissionError,
23
+ SplurgeFormatError,
24
+ SplurgeParameterError,
25
+ SplurgeParsingError,
26
+ SplurgePathValidationError,
27
+ SplurgePerformanceWarning,
28
+ SplurgeRangeError,
29
+ SplurgeResourceAcquisitionError,
30
+ SplurgeResourceError,
31
+ SplurgeResourceReleaseError,
32
+ SplurgeStreamingError,
33
+ SplurgeTypeConversionError,
34
+ SplurgeValidationError,
35
+ )
36
+ from splurge_dsv.path_validator import PathValidator
37
+ from splurge_dsv.resource_manager import (
38
+ FileResourceManager,
39
+ ResourceManager,
40
+ StreamResourceManager,
41
+ safe_file_operation,
42
+ safe_stream_operation,
43
+ )
44
+ from splurge_dsv.string_tokenizer import StringTokenizer
45
+ from splurge_dsv.text_file_helper import TextFileHelper
46
+
47
+ __version__ = "2025.1.2"
48
+ __author__ = "Jim Schilling"
49
+ __license__ = "MIT"
50
+
51
+ __all__ = [
52
+ # Main helper class
53
+ "DsvHelper",
54
+ # Exceptions
55
+ "SplurgeDsvError",
56
+ "SplurgeValidationError",
57
+ "SplurgeFileOperationError",
58
+ "SplurgeFileNotFoundError",
59
+ "SplurgeFilePermissionError",
60
+ "SplurgeFileEncodingError",
61
+ "SplurgePathValidationError",
62
+ "SplurgeDataProcessingError",
63
+ "SplurgeParsingError",
64
+ "SplurgeTypeConversionError",
65
+ "SplurgeStreamingError",
66
+ "SplurgeConfigurationError",
67
+ "SplurgeResourceError",
68
+ "SplurgeResourceAcquisitionError",
69
+ "SplurgeResourceReleaseError",
70
+ "SplurgePerformanceWarning",
71
+ "SplurgeParameterError",
72
+ "SplurgeRangeError",
73
+ "SplurgeFormatError",
74
+ # Utility classes
75
+ "StringTokenizer",
76
+ "TextFileHelper",
77
+ "PathValidator",
78
+ "ResourceManager",
79
+ "FileResourceManager",
80
+ "StreamResourceManager",
81
+ # Context managers
82
+ "safe_file_operation",
83
+ "safe_stream_operation",
84
+ ]
splurge_dsv/__main__.py CHANGED
@@ -0,0 +1,15 @@
1
+ """
2
+ Command-line interface entry point for splurge-dsv.
3
+
4
+ This module serves as the entry point when running the package as a module.
5
+ It imports and calls the main CLI function from the cli module.
6
+ """
7
+
8
+ # Standard library imports
9
+ import sys
10
+
11
+ # Local imports
12
+ from splurge_dsv.cli import main
13
+
14
+ if __name__ == "__main__":
15
+ sys.exit(main())
splurge_dsv/cli.py ADDED
@@ -0,0 +1,158 @@
1
+ """
2
+ Command-line interface for splurge-dsv.
3
+
4
+ This module provides a command-line interface for the splurge-dsv library,
5
+ allowing users to parse DSV files from the command line.
6
+
7
+ Usage:
8
+ python -m splurge_dsv <file_path> [options]
9
+ python -m splurge_dsv --help
10
+ """
11
+
12
+ # Standard library imports
13
+ import argparse
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ # Local imports
18
+ from splurge_dsv.dsv_helper import DsvHelper
19
+ from splurge_dsv.exceptions import SplurgeDsvError
20
+
21
+
22
+ def parse_arguments() -> argparse.Namespace:
23
+ """Parse command line arguments."""
24
+ parser = argparse.ArgumentParser(
25
+ description="Parse DSV (Delimited String Values) files",
26
+ formatter_class=argparse.RawDescriptionHelpFormatter,
27
+ epilog="""
28
+ Examples:
29
+ python -m splurge_dsv data.csv --delimiter ,
30
+ python -m splurge_dsv data.tsv --delimiter "\\t"
31
+ python -m splurge_dsv data.txt --delimiter "|" --bookend '"'
32
+ """,
33
+ )
34
+
35
+ parser.add_argument("file_path", type=str, help="Path to the DSV file to parse")
36
+
37
+ parser.add_argument("--delimiter", "-d", type=str, required=True, help="Delimiter character to use for parsing")
38
+
39
+ parser.add_argument("--bookend", "-b", type=str, help="Bookend character for text fields (e.g., '\"')")
40
+
41
+ parser.add_argument("--no-strip", action="store_true", help="Don't strip whitespace from values")
42
+
43
+ parser.add_argument("--no-bookend-strip", action="store_true", help="Don't strip whitespace from bookends")
44
+
45
+ parser.add_argument("--encoding", "-e", type=str, default="utf-8", help="File encoding (default: utf-8)")
46
+
47
+ parser.add_argument("--skip-header", type=int, default=0, help="Number of header rows to skip (default: 0)")
48
+
49
+ parser.add_argument("--skip-footer", type=int, default=0, help="Number of footer rows to skip (default: 0)")
50
+
51
+ parser.add_argument(
52
+ "--stream", "-s", action="store_true", help="Stream the file in chunks instead of loading entirely into memory"
53
+ )
54
+
55
+ parser.add_argument("--chunk-size", type=int, default=500, help="Chunk size for streaming (default: 500)")
56
+
57
+ parser.add_argument("--version", action="version", version="%(prog)s 2025.1.2")
58
+
59
+ return parser.parse_args()
60
+
61
+
62
+ def print_results(rows: list[list[str]], delimiter: str) -> None:
63
+ """Print parsed results in a formatted way."""
64
+ if not rows:
65
+ print("No data found.")
66
+ return
67
+
68
+ # Find the maximum width for each column
69
+ if rows:
70
+ max_widths = []
71
+ for col_idx in range(len(rows[0])):
72
+ max_width = max(len(str(row[col_idx])) for row in rows)
73
+ max_widths.append(max_width)
74
+
75
+ # Print header separator
76
+ print("-" * (sum(max_widths) + len(max_widths) * 3 - 1))
77
+
78
+ # Print each row
79
+ for row_idx, row in enumerate(rows):
80
+ formatted_row = []
81
+ for col_idx, value in enumerate(row):
82
+ formatted_value = str(value).ljust(max_widths[col_idx])
83
+ formatted_row.append(formatted_value)
84
+ print(f"| {' | '.join(formatted_row)} |")
85
+
86
+ # Print separator after header
87
+ if row_idx == 0:
88
+ print("-" * (sum(max_widths) + len(max_widths) * 3 - 1))
89
+
90
+
91
+ def main() -> int:
92
+ """Main entry point for the command-line interface."""
93
+ try:
94
+ args = parse_arguments()
95
+
96
+ # Validate file path
97
+ file_path = Path(args.file_path)
98
+ if not file_path.exists():
99
+ print(f"Error: File '{args.file_path}' not found.", file=sys.stderr)
100
+ return 1
101
+
102
+ if not file_path.is_file():
103
+ print(f"Error: '{args.file_path}' is not a file.", file=sys.stderr)
104
+ return 1
105
+
106
+ # Parse the file
107
+ if args.stream:
108
+ print(f"Streaming file '{args.file_path}' with delimiter '{args.delimiter}'...")
109
+ chunk_count = 0
110
+ total_rows = 0
111
+
112
+ for chunk in DsvHelper.parse_stream(
113
+ file_path,
114
+ delimiter=args.delimiter,
115
+ strip=not args.no_strip,
116
+ bookend=args.bookend,
117
+ bookend_strip=not args.no_bookend_strip,
118
+ encoding=args.encoding,
119
+ skip_header_rows=args.skip_header,
120
+ skip_footer_rows=args.skip_footer,
121
+ chunk_size=args.chunk_size,
122
+ ):
123
+ chunk_count += 1
124
+ total_rows += len(chunk)
125
+ print(f"Chunk {chunk_count}: {len(chunk)} rows")
126
+ print_results(chunk, args.delimiter)
127
+ print()
128
+
129
+ print(f"Total: {total_rows} rows in {chunk_count} chunks")
130
+ else:
131
+ print(f"Parsing file '{args.file_path}' with delimiter '{args.delimiter}'...")
132
+ rows = DsvHelper.parse_file(
133
+ file_path,
134
+ delimiter=args.delimiter,
135
+ strip=not args.no_strip,
136
+ bookend=args.bookend,
137
+ bookend_strip=not args.no_bookend_strip,
138
+ encoding=args.encoding,
139
+ skip_header_rows=args.skip_header,
140
+ skip_footer_rows=args.skip_footer,
141
+ )
142
+
143
+ print(f"Parsed {len(rows)} rows")
144
+ print_results(rows, args.delimiter)
145
+
146
+ return 0
147
+
148
+ except KeyboardInterrupt:
149
+ print("\nOperation cancelled by user.", file=sys.stderr)
150
+ return 130
151
+ except SplurgeDsvError as e:
152
+ print(f"Error: {e.message}", file=sys.stderr)
153
+ if e.details:
154
+ print(f"Details: {e.details}", file=sys.stderr)
155
+ return 1
156
+ except Exception as e:
157
+ print(f"Unexpected error: {e}", file=sys.stderr)
158
+ return 1
splurge_dsv/dsv_helper.py CHANGED
@@ -8,12 +8,15 @@ Please preserve this header and all related material when sharing!
8
8
  This module is licensed under the MIT License.
9
9
  """
10
10
 
11
+ # Standard library imports
12
+ from collections.abc import Iterator
11
13
  from os import PathLike
12
- from typing import Iterator
13
14
 
15
+ # Local imports
16
+ from splurge_dsv.exceptions import SplurgeParameterError
14
17
  from splurge_dsv.string_tokenizer import StringTokenizer
15
18
  from splurge_dsv.text_file_helper import TextFileHelper
16
- from splurge_dsv.exceptions import SplurgeParameterError
19
+
17
20
 
18
21
  class DsvHelper:
19
22
  """
@@ -38,7 +41,7 @@ class DsvHelper:
38
41
  delimiter: str,
39
42
  strip: bool = DEFAULT_STRIP,
40
43
  bookend: str | None = None,
41
- bookend_strip: bool = DEFAULT_BOOKEND_STRIP
44
+ bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
42
45
  ) -> list[str]:
43
46
  """
44
47
  Parse a string into a list of strings.
@@ -68,10 +71,7 @@ class DsvHelper:
68
71
  tokens: list[str] = StringTokenizer.parse(content, delimiter=delimiter, strip=strip)
69
72
 
70
73
  if bookend:
71
- tokens = [
72
- StringTokenizer.remove_bookends(token, bookend=bookend, strip=bookend_strip)
73
- for token in tokens
74
- ]
74
+ tokens = [StringTokenizer.remove_bookends(token, bookend=bookend, strip=bookend_strip) for token in tokens]
75
75
 
76
76
  return tokens
77
77
 
@@ -83,7 +83,7 @@ class DsvHelper:
83
83
  delimiter: str,
84
84
  strip: bool = DEFAULT_STRIP,
85
85
  bookend: str | None = None,
86
- bookend_strip: bool = DEFAULT_BOOKEND_STRIP
86
+ bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
87
87
  ) -> list[list[str]]:
88
88
  """
89
89
  Parse a list of strings into a list of lists of strings.
@@ -108,7 +108,7 @@ class DsvHelper:
108
108
  """
109
109
  if not isinstance(content, list):
110
110
  raise SplurgeParameterError("content must be a list")
111
-
111
+
112
112
  if not all(isinstance(item, str) for item in content):
113
113
  raise SplurgeParameterError("content must be a list of strings")
114
114
 
@@ -128,7 +128,7 @@ class DsvHelper:
128
128
  bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
129
129
  encoding: str = DEFAULT_ENCODING,
130
130
  skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
131
- skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS
131
+ skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
132
132
  ) -> list[list[str]]:
133
133
  """
134
134
  Parse a file into a list of lists of strings.
@@ -157,19 +157,10 @@ class DsvHelper:
157
157
  [['header1', 'header2'], ['value1', 'value2']]
158
158
  """
159
159
  lines: list[str] = TextFileHelper.read(
160
- file_path,
161
- encoding=encoding,
162
- skip_header_rows=skip_header_rows,
163
- skip_footer_rows=skip_footer_rows
160
+ file_path, encoding=encoding, skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows
164
161
  )
165
162
 
166
- return cls.parses(
167
- lines,
168
- delimiter=delimiter,
169
- strip=strip,
170
- bookend=bookend,
171
- bookend_strip=bookend_strip
172
- )
163
+ return cls.parses(lines, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip)
173
164
 
174
165
  @classmethod
175
166
  def _process_stream_chunk(
@@ -179,28 +170,22 @@ class DsvHelper:
179
170
  delimiter: str,
180
171
  strip: bool = DEFAULT_STRIP,
181
172
  bookend: str | None = None,
182
- bookend_strip: bool = DEFAULT_BOOKEND_STRIP
173
+ bookend_strip: bool = DEFAULT_BOOKEND_STRIP,
183
174
  ) -> list[list[str]]:
184
175
  """
185
176
  Process a chunk of lines from the stream.
186
-
177
+
187
178
  Args:
188
179
  chunk: List of lines to process
189
180
  delimiter: Delimiter to use for parsing
190
181
  strip: Whether to strip whitespace
191
182
  bookend: Bookend character for text fields
192
183
  bookend_strip: Whether to strip whitespace from bookends
193
-
184
+
194
185
  Returns:
195
186
  list[list[str]]: Parsed rows
196
187
  """
197
- return cls.parses(
198
- chunk,
199
- delimiter=delimiter,
200
- strip=strip,
201
- bookend=bookend,
202
- bookend_strip=bookend_strip
203
- )
188
+ return cls.parses(chunk, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip)
204
189
 
205
190
  @classmethod
206
191
  def parse_stream(
@@ -214,7 +199,7 @@ class DsvHelper:
214
199
  encoding: str = DEFAULT_ENCODING,
215
200
  skip_header_rows: int = DEFAULT_SKIP_HEADER_ROWS,
216
201
  skip_footer_rows: int = DEFAULT_SKIP_FOOTER_ROWS,
217
- chunk_size: int = DEFAULT_CHUNK_SIZE
202
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
218
203
  ) -> Iterator[list[list[str]]]:
219
204
  """
220
205
  Stream-parse a DSV file in chunks of lines.
@@ -247,17 +232,15 @@ class DsvHelper:
247
232
  skip_footer_rows = max(skip_footer_rows, cls.DEFAULT_SKIP_FOOTER_ROWS)
248
233
 
249
234
  # Use TextFileHelper.read_as_stream for consistent error handling
250
- for chunk in TextFileHelper.read_as_stream(
251
- file_path,
252
- encoding=encoding,
253
- skip_header_rows=skip_header_rows,
254
- skip_footer_rows=skip_footer_rows,
255
- chunk_size=chunk_size
256
- ):
257
- yield cls._process_stream_chunk(
258
- chunk,
259
- delimiter=delimiter,
260
- strip=strip,
261
- bookend=bookend,
262
- bookend_strip=bookend_strip
263
- )
235
+ yield from (
236
+ cls._process_stream_chunk(
237
+ chunk, delimiter=delimiter, strip=strip, bookend=bookend, bookend_strip=bookend_strip
238
+ )
239
+ for chunk in TextFileHelper.read_as_stream(
240
+ file_path,
241
+ encoding=encoding,
242
+ skip_header_rows=skip_header_rows,
243
+ skip_footer_rows=skip_footer_rows,
244
+ chunk_size=chunk_size,
245
+ )
246
+ )
splurge_dsv/exceptions.py CHANGED
@@ -14,16 +14,11 @@ This module is licensed under the MIT License.
14
14
 
15
15
  class SplurgeDsvError(Exception):
16
16
  """Base exception for all splurge-dsv errors."""
17
-
18
- def __init__(
19
- self,
20
- message: str,
21
- *,
22
- details: str | None = None
23
- ) -> None:
17
+
18
+ def __init__(self, message: str, *, details: str | None = None) -> None:
24
19
  """
25
20
  Initialize SplurgeDsvError.
26
-
21
+
27
22
  Args:
28
23
  message: Primary error message
29
24
  details: Additional error details
@@ -35,89 +30,107 @@ class SplurgeDsvError(Exception):
35
30
 
36
31
  class SplurgeValidationError(SplurgeDsvError):
37
32
  """Raised when data validation fails."""
33
+
38
34
  pass
39
35
 
40
36
 
41
37
  class SplurgeFileOperationError(SplurgeDsvError):
42
38
  """Base exception for file operation errors."""
39
+
43
40
  pass
44
41
 
45
42
 
46
43
  class SplurgeFileNotFoundError(SplurgeFileOperationError):
47
44
  """Raised when a file is not found."""
45
+
48
46
  pass
49
47
 
50
48
 
51
49
  class SplurgeFilePermissionError(SplurgeFileOperationError):
52
50
  """Raised when there are permission issues with file operations."""
51
+
53
52
  pass
54
53
 
55
54
 
56
55
  class SplurgeFileEncodingError(SplurgeFileOperationError):
57
56
  """Raised when there are encoding issues with file operations."""
57
+
58
58
  pass
59
59
 
60
60
 
61
61
  class SplurgePathValidationError(SplurgeFileOperationError):
62
62
  """Raised when file path validation fails."""
63
+
63
64
  pass
64
65
 
65
66
 
66
67
  class SplurgeDataProcessingError(SplurgeDsvError):
67
68
  """Base exception for data processing errors."""
69
+
68
70
  pass
69
71
 
70
72
 
71
73
  class SplurgeParsingError(SplurgeDataProcessingError):
72
74
  """Raised when data parsing fails."""
75
+
73
76
  pass
74
77
 
75
78
 
76
79
  class SplurgeTypeConversionError(SplurgeDataProcessingError):
77
80
  """Raised when type conversion fails."""
81
+
78
82
  pass
79
83
 
80
84
 
81
85
  class SplurgeStreamingError(SplurgeDataProcessingError):
82
86
  """Raised when streaming operations fail."""
87
+
83
88
  pass
84
89
 
85
90
 
86
91
  class SplurgeConfigurationError(SplurgeDsvError):
87
92
  """Raised when configuration is invalid."""
93
+
88
94
  pass
89
95
 
90
96
 
91
97
  class SplurgeResourceError(SplurgeDsvError):
92
98
  """Base exception for resource management errors."""
99
+
93
100
  pass
94
101
 
95
102
 
96
103
  class SplurgeResourceAcquisitionError(SplurgeResourceError):
97
104
  """Raised when resource acquisition fails."""
105
+
98
106
  pass
99
107
 
100
108
 
101
109
  class SplurgeResourceReleaseError(SplurgeResourceError):
102
110
  """Raised when resource release fails."""
111
+
103
112
  pass
104
113
 
105
114
 
106
115
  class SplurgePerformanceWarning(SplurgeDsvError):
107
116
  """Warning for performance-related issues."""
117
+
108
118
  pass
109
119
 
110
120
 
111
121
  class SplurgeParameterError(SplurgeValidationError):
112
122
  """Raised when function parameters are invalid."""
123
+
113
124
  pass
114
125
 
115
126
 
116
127
  class SplurgeRangeError(SplurgeValidationError):
117
128
  """Raised when values are outside expected ranges."""
129
+
118
130
  pass
119
131
 
120
132
 
121
133
  class SplurgeFormatError(SplurgeValidationError):
122
134
  """Raised when data format is invalid."""
123
- pass
135
+
136
+ pass