typedcsv-lib 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
typedcsv/__init__.py ADDED
@@ -0,0 +1,753 @@
1
+ """
2
+ typedcsv — “Typed CSVs” with header-embedded types + validators (stdlib-only).
3
+
4
+ Contract (v0):
5
+ - Typing + validators live ONLY in the header row.
6
+ - Suffix sigils OR explicit ":type" (not both).
7
+ - Suffix-only sigils:
8
+ # -> int, % -> float, ? -> bool, @ -> datetime, $ -> str
9
+ Explicit types: :int, :float, :bool, :datetime, :str
10
+ - Untyped defaults to str.
11
+ - Validators: optional bracket clause after the type marker:
12
+ col# [min=0 max=10]
13
+ col:str [in=A|B|C]
14
+ col$ [re=^[A-Z]{3}\\d{2}$]
15
+ Parsed as space-separated key=value pairs inside [ ... ].
16
+ Values with spaces must be double-quoted.
17
+ "re=" uses re.fullmatch.
18
+ - Missing values: empty string cell "" is missing:
19
+ str columns -> "" (kept)
20
+ non-str columns -> None
21
+ Missing values skip validation.
22
+ - Errors: raise immediately with TypedCSVError including row/col/context.
23
+ - Writing: None -> "", bool -> true/false, datetime -> isoformat(), float -> repr(f),
24
+ header preserved exactly as provided to DictWriter(fieldnames).
25
+
26
+ API (csv-like, but typed readers consume header):
27
+ - reader(f, ...) -> iterator of typed list rows
28
+ - DictReader(f, ...) -> iterator of typed dict rows
29
+ - writer(f, ...) -> basic csv.writer wrapper for lists (no schema enforcement)
30
+ - DictWriter(f, fieldnames, ...) -> typed-aware dict writer; preserve typed fieldnames
31
+
32
+ Python: 3.10+
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ import csv
38
+ import re
39
+ from dataclasses import dataclass, field
40
+ from datetime import datetime
41
+ from typing import Any, Callable, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Tuple, Union
42
+
43
+
44
+ # ----------------------------
45
+ # Exceptions
46
+ # ----------------------------
47
+
48
+ class TypedCSVError(ValueError):
49
+ """Raised on header parse, cell parse, or validation failures with context."""
50
+
51
+ def __init__(
52
+ self,
53
+ *,
54
+ row: int,
55
+ col: int,
56
+ column: str,
57
+ header: str,
58
+ value: str,
59
+ reason: str,
60
+ ) -> None:
61
+ msg = (
62
+ "TypedCSVError(" +
63
+ f"row={row}, col={col}, column={column!r}, "
64
+ f"header={header!r}, value={value!r}): {reason}"
65
+ )
66
+ super().__init__(msg)
67
+ self.row = row # 1-based row index in CSV (header row is 1)
68
+ self.col = col # 0-based column index
69
+ self.column = column # logical column name (stripped)
70
+ self.header = header # raw header cell text
71
+ self.value = value # raw cell text
72
+ self.reason = reason
73
+
74
+
75
+ # ----------------------------
76
+ # Type dialect
77
+ # ----------------------------
78
+
79
+ TypeName = str # "int" | "float" | "str" | "bool" | "datetime"
80
+
81
+ @dataclass(frozen=True)
82
+ class TypeDialect:
83
+ sigils: Mapping[str, TypeName] = field(
84
+ default_factory=lambda: {
85
+ "#": "int",
86
+ "%": "float",
87
+ "?": "bool",
88
+ "@": "datetime",
89
+ "$": "str",
90
+ }
91
+ )
92
+ explicit_sep: str = ":"
93
+ validators_open: str = "["
94
+ validators_close: str = "]"
95
+ in_sep: str = "|"
96
+ # bool parsing (case-insensitive)
97
+ bool_true: Tuple[str, ...] = ("true", "t", "yes", "y", "1")
98
+ bool_false: Tuple[str, ...] = ("false", "f", "no", "n", "0")
99
+ datetime_parser: Callable[[str], datetime] = staticmethod(datetime.fromisoformat)
100
+ datetime_formatter: Callable[[datetime], str] = staticmethod(lambda dt: dt.isoformat())
101
+
102
+
103
+ DEFAULT = TypeDialect()
104
+ DEFAULT_TYPE_DIALECT = DEFAULT
105
+
106
+ _EXPLICIT_TYPES: Tuple[TypeName, ...] = ("int", "float", "str", "bool", "datetime")
107
+ __version__ = "0.1.0"
108
+
109
+
110
+ # ----------------------------
111
+ # Schema / column spec
112
+ # ----------------------------
113
+
114
+ @dataclass(frozen=True)
115
+ class ColumnSpec:
116
+ name: str # logical name (sigil/type stripped)
117
+ type_name: TypeName # one of _EXPLICIT_TYPES
118
+ raw_header: str # raw header cell (including markers/validators)
119
+ validators: Dict[str, str] # raw validator values (strings) from header
120
+ parser: Callable[[str], Any]
121
+ formatter: Callable[[Any], str]
122
+
123
+
124
+ @dataclass(frozen=True)
125
+ class Schema:
126
+ columns: List[ColumnSpec]
127
+
128
+ @property
129
+ def names(self) -> List[str]:
130
+ return [c.name for c in self.columns]
131
+
132
+
133
+ # ----------------------------
134
+ # Header parsing helpers
135
+ # ----------------------------
136
+
137
+ def _split_validators(cell: str, td: TypeDialect, *, row: int, col: int) -> Tuple[str, Optional[str]]:
138
+ """
139
+ Split "prefix [k=v ...]" into ("prefix", "[k=v ...]") or ("prefix", None).
140
+ If '[' appears, require a trailing validators clause; otherwise raise.
141
+ """
142
+ s = cell.strip()
143
+ if not s:
144
+ return s, None
145
+
146
+ idx = s.find(td.validators_open)
147
+ if idx == -1:
148
+ return s, None
149
+
150
+ if not s.endswith(td.validators_close):
151
+ raise TypedCSVError(
152
+ row=row, col=col, column="", header=cell, value="",
153
+ reason="Malformed validators clause (missing closing ']')"
154
+ )
155
+
156
+ prefix = s[:idx].rstrip()
157
+ validators = s[idx:].strip()
158
+ return prefix, validators
159
+
160
+
161
+ def parse_validators(
162
+ text: str,
163
+ td: TypeDialect,
164
+ *,
165
+ row: int,
166
+ col: int,
167
+ column: str,
168
+ header: str,
169
+ ) -> Dict[str, str]:
170
+ """
171
+ Parse validators clause like: [min=0 max=10 re=^a+$ in=A|B|C]
172
+ - Space-separated key=value pairs.
173
+ - Values may be double-quoted if they contain spaces.
174
+ """
175
+ t = text.strip()
176
+ if not (t.startswith(td.validators_open) and t.endswith(td.validators_close)):
177
+ raise TypedCSVError(
178
+ row=row, col=col, column=column, header=header, value="",
179
+ reason=f"Malformed validators clause: {text!r}"
180
+ )
181
+
182
+ inner = t[1:-1].strip()
183
+ if not inner:
184
+ return {}
185
+
186
+ i, n = 0, len(inner)
187
+ out: Dict[str, str] = {}
188
+
189
+ def skip_ws(j: int) -> int:
190
+ while j < n and inner[j].isspace():
191
+ j += 1
192
+ return j
193
+
194
+ i = skip_ws(i)
195
+ while i < n:
196
+ eq = inner.find("=", i)
197
+ if eq == -1:
198
+ raise TypedCSVError(
199
+ row=row, col=col, column=column, header=header, value="",
200
+ reason=f"Invalid validator token (missing '=') near: {inner[i:]!r}"
201
+ )
202
+ key = inner[i:eq].strip()
203
+ if not key:
204
+ raise TypedCSVError(
205
+ row=row, col=col, column=column, header=header, value="",
206
+ reason=f"Invalid validator key near: {inner[i:]!r}"
207
+ )
208
+ j = eq + 1
209
+ if j >= n:
210
+ raise TypedCSVError(
211
+ row=row, col=col, column=column, header=header, value="",
212
+ reason=f"Validator {key!r} missing value"
213
+ )
214
+
215
+ if inner[j] == '"':
216
+ j += 1
217
+ start = j
218
+ while j < n and inner[j] != '"':
219
+ j += 1
220
+ if j >= n:
221
+ raise TypedCSVError(
222
+ row=row, col=col, column=column, header=header, value="",
223
+ reason=f"Unterminated quote in validator {key!r}"
224
+ )
225
+ value = inner[start:j]
226
+ j += 1
227
+ else:
228
+ start = j
229
+ while j < n and not inner[j].isspace():
230
+ j += 1
231
+ value = inner[start:j]
232
+
233
+ if key in out:
234
+ raise TypedCSVError(
235
+ row=row, col=col, column=column, header=header, value="",
236
+ reason=f"Duplicate validator key: {key!r}"
237
+ )
238
+ out[key] = value
239
+
240
+ i = skip_ws(j)
241
+
242
+ return out
243
+
244
+
245
+ def parse_header_cell(cell: str, td: TypeDialect, *, row: int, col: int) -> ColumnSpec:
246
+ """Parse one header cell into ColumnSpec. Raises TypedCSVError on invalid syntax."""
247
+ raw_header = cell
248
+ prefix, vtext = _split_validators(cell, td, row=row, col=col)
249
+
250
+ base = prefix.strip()
251
+ if not base:
252
+ raise TypedCSVError(
253
+ row=row, col=col, column="", header=raw_header, value="",
254
+ reason="Empty header cell"
255
+ )
256
+
257
+ used_explicit = False
258
+ used_sigil = False
259
+ type_name: TypeName = "str"
260
+
261
+ if td.explicit_sep in base:
262
+ name_part, type_part = base.rsplit(td.explicit_sep, 1)
263
+ if type_part not in _EXPLICIT_TYPES:
264
+ raise TypedCSVError(
265
+ row=row, col=col, column="", header=raw_header, value="",
266
+ reason=f"Unknown explicit type: {type_part!r}"
267
+ )
268
+ used_explicit = True
269
+ type_name = type_part
270
+ base_name = name_part.strip()
271
+ else:
272
+ base_name = base
273
+
274
+ if not used_explicit and base_name and base_name[-1] in td.sigils:
275
+ used_sigil = True
276
+ sig = base_name[-1]
277
+ type_name = td.sigils[sig]
278
+ base_name = base_name[:-1].rstrip()
279
+
280
+ if used_explicit and base_name and base_name[-1] in td.sigils:
281
+ raise TypedCSVError(
282
+ row=row, col=col, column="", header=raw_header, value="",
283
+ reason="Header uses both explicit type and sigil (not allowed)"
284
+ )
285
+
286
+ if used_explicit and used_sigil:
287
+ raise TypedCSVError(
288
+ row=row, col=col, column="", header=raw_header, value="",
289
+ reason="Header uses both explicit type and sigil (not allowed)"
290
+ )
291
+
292
+ name = base_name.strip()
293
+ if not name:
294
+ raise TypedCSVError(
295
+ row=row, col=col, column="", header=raw_header, value="",
296
+ reason="Header name is empty after stripping type marker"
297
+ )
298
+
299
+ validators: Dict[str, str] = {}
300
+ if vtext is not None:
301
+ validators = parse_validators(vtext, td, row=row, col=col, column=name, header=raw_header)
302
+
303
+ parser, formatter = _get_type_codec(type_name, td)
304
+ _validate_validator_keys(type_name, validators, td, row=row, col=col, column=name, header=raw_header)
305
+
306
+ return ColumnSpec(
307
+ name=name,
308
+ type_name=type_name,
309
+ raw_header=raw_header,
310
+ validators=validators,
311
+ parser=parser,
312
+ formatter=formatter,
313
+ )
314
+
315
+
316
+ def parse_header_row(headers: Sequence[str], td: TypeDialect) -> Schema:
317
+ cols: List[ColumnSpec] = []
318
+ for i, cell in enumerate(headers):
319
+ cols.append(parse_header_cell(cell, td, row=1, col=i))
320
+
321
+ names = [c.name for c in cols]
322
+ if len(set(names)) != len(names):
323
+ seen = set()
324
+ for idx, n in enumerate(names):
325
+ if n in seen:
326
+ raise TypedCSVError(
327
+ row=1, col=idx, column=n, header=headers[idx], value="",
328
+ reason=f"Duplicate logical column name: {n!r}"
329
+ )
330
+ seen.add(n)
331
+
332
+ return Schema(columns=cols)
333
+
334
+
335
+ # ----------------------------
336
+ # Type codecs (parse/format)
337
+ # ----------------------------
338
+
339
+ def _parse_bool(raw: str, td: TypeDialect) -> bool:
340
+ s = raw.strip().lower()
341
+ if s in td.bool_true:
342
+ return True
343
+ if s in td.bool_false:
344
+ return False
345
+ raise ValueError(f"Invalid bool literal: {raw!r}")
346
+
347
+
348
+ def _format_bool(v: Any) -> str:
349
+ return "true" if bool(v) else "false"
350
+
351
+
352
+ def _get_type_codec(type_name: TypeName, td: TypeDialect) -> Tuple[Callable[[str], Any], Callable[[Any], str]]:
353
+ if type_name == "str":
354
+ return (lambda s: s), (lambda v: "" if v is None else str(v))
355
+ if type_name == "int":
356
+ return int, (lambda v: "" if v is None else str(int(v)))
357
+ if type_name == "float":
358
+ return float, (lambda v: "" if v is None else repr(float(v)))
359
+ if type_name == "bool":
360
+ return (lambda s: _parse_bool(s, td)), (lambda v: "" if v is None else _format_bool(v))
361
+ if type_name == "datetime":
362
+ return td.datetime_parser, (lambda v: "" if v is None else td.datetime_formatter(v))
363
+ raise AssertionError(f"Unsupported type: {type_name!r}")
364
+
365
+
366
+ # ----------------------------
367
+ # Validators
368
+ # ----------------------------
369
+
370
+ _ALLOWED_VALIDATORS: Dict[TypeName, Tuple[str, ...]] = {
371
+ "int": ("min", "max", "in"),
372
+ "float": ("min", "max", "in"),
373
+ "str": ("minlen", "maxlen", "in", "re"),
374
+ "datetime": ("min", "max"),
375
+ "bool": tuple(),
376
+ }
377
+
378
+
379
+ def _validate_validator_keys(
380
+ type_name: TypeName,
381
+ validators: Mapping[str, str],
382
+ td: TypeDialect,
383
+ *,
384
+ row: int,
385
+ col: int,
386
+ column: str,
387
+ header: str,
388
+ ) -> None:
389
+ allowed = set(_ALLOWED_VALIDATORS.get(type_name, tuple()))
390
+ for k in validators:
391
+ if k not in allowed:
392
+ raise TypedCSVError(
393
+ row=row, col=col, column=column, header=header, value="",
394
+ reason=f"Validator {k!r} not allowed for type {type_name!r}"
395
+ )
396
+
397
+
398
+ def _parse_in_set(type_name: TypeName, raw: str, td: TypeDialect) -> set:
399
+ parts = raw.split(td.in_sep) if raw != "" else []
400
+ if type_name == "str":
401
+ return set(parts)
402
+ if type_name == "int":
403
+ return {int(p) for p in parts}
404
+ if type_name == "float":
405
+ return {float(p) for p in parts}
406
+ raise AssertionError("in= not supported for this type in v0")
407
+
408
+
409
+ def _validate_value(spec: ColumnSpec, value: Any, *, row: int, col: int, td: TypeDialect) -> None:
410
+ if value is None:
411
+ return
412
+
413
+ v = spec.validators
414
+ t = spec.type_name
415
+
416
+ try:
417
+ if t in ("int", "float"):
418
+ if "min" in v and value < spec.parser(v["min"]):
419
+ raise ValueError(f"value {value!r} < min {v['min']!r}")
420
+ if "max" in v and value > spec.parser(v["max"]):
421
+ raise ValueError(f"value {value!r} > max {v['max']!r}")
422
+ if "in" in v:
423
+ allowed = _parse_in_set(t, v["in"], td)
424
+ if value not in allowed:
425
+ raise ValueError(f"value {value!r} not in {sorted(allowed)!r}")
426
+
427
+ elif t == "str":
428
+ s = value if isinstance(value, str) else str(value)
429
+ if "minlen" in v and len(s) < int(v["minlen"]):
430
+ raise ValueError(f"len {len(s)} < minlen {v['minlen']!r}")
431
+ if "maxlen" in v and len(s) > int(v["maxlen"]):
432
+ raise ValueError(f"len {len(s)} > maxlen {v['maxlen']!r}")
433
+ if "in" in v:
434
+ allowed = set(v["in"].split(td.in_sep)) if v["in"] != "" else set()
435
+ if s not in allowed:
436
+ raise ValueError(f"value {s!r} not in {sorted(allowed)!r}")
437
+ if "re" in v:
438
+ pattern = v["re"]
439
+ if re.fullmatch(pattern, s) is None:
440
+ raise ValueError(f"value {s!r} does not fullmatch /{pattern}/")
441
+
442
+ elif t == "datetime":
443
+ if "min" in v:
444
+ mn = td.datetime_parser(v["min"])
445
+ if value < mn:
446
+ raise ValueError(f"value {value!r} < min {v['min']!r}")
447
+ if "max" in v:
448
+ mx = td.datetime_parser(v["max"])
449
+ if value > mx:
450
+ raise ValueError(f"value {value!r} > max {v['max']!r}")
451
+
452
+ elif t == "bool":
453
+ return
454
+
455
+ except TypedCSVError:
456
+ raise
457
+ except Exception as e:
458
+ raise TypedCSVError(
459
+ row=row, col=col, column=spec.name, header=spec.raw_header, value=str(value),
460
+ reason=f"Validation failed: {e}"
461
+ ) from e
462
+
463
+
464
+ # ----------------------------
465
+ # Cell parsing (incl. missing handling)
466
+ # ----------------------------
467
+
468
+ def _parse_cell(spec: ColumnSpec, raw: str, *, row: int, col: int, td: TypeDialect) -> Any:
469
+ if raw == "":
470
+ if spec.type_name == "str":
471
+ return ""
472
+ return None
473
+
474
+ try:
475
+ value = spec.parser(raw)
476
+ except Exception as e:
477
+ raise TypedCSVError(
478
+ row=row, col=col, column=spec.name, header=spec.raw_header, value=raw,
479
+ reason=f"Parse failed for type {spec.type_name!r}: {e}"
480
+ ) from e
481
+
482
+ _validate_value(spec, value, row=row, col=col, td=td)
483
+ return value
484
+
485
+
486
+ # ----------------------------
487
+ # Optional inference (conservative)
488
+ # ----------------------------
489
+
490
+ def infer_type(values: Iterable[str], td: TypeDialect = DEFAULT) -> TypeName:
491
+ """
492
+ Infer a type for an *untyped* column from sample values.
493
+ Conservative: int -> float -> bool, else str. Empty strings ignored.
494
+ """
495
+ samples = [v for v in values if v != ""]
496
+ if not samples:
497
+ return "str"
498
+
499
+ def can_parse_all(parse_fn: Callable[[str], Any]) -> bool:
500
+ try:
501
+ for s in samples:
502
+ parse_fn(s)
503
+ return True
504
+ except Exception:
505
+ return False
506
+
507
+ if can_parse_all(int):
508
+ return "int"
509
+ if can_parse_all(float):
510
+ return "float"
511
+ if can_parse_all(lambda s: _parse_bool(s, td)):
512
+ return "bool"
513
+ return "str"
514
+
515
+
516
+ # ----------------------------
517
+ # Readers
518
+ # ----------------------------
519
+
520
+ class TypedReader:
521
+ """Typed equivalent of csv.reader that consumes the header row to build schema."""
522
+
523
+ def __init__(
524
+ self,
525
+ f: Any,
526
+ dialect: Union[str, csv.Dialect] = "excel",
527
+ *,
528
+ type_dialect: TypeDialect = DEFAULT,
529
+ infer_types: bool = False,
530
+ infer_rows: int = 50,
531
+ **fmtparams: Any,
532
+ ) -> None:
533
+ self._csv = csv.reader(f, dialect=dialect, **fmtparams)
534
+ self._td = type_dialect
535
+
536
+ try:
537
+ raw_headers = next(self._csv)
538
+ except StopIteration:
539
+ self.schema = Schema(columns=[])
540
+ self._buffer = []
541
+ self._row_index = 1
542
+ return
543
+
544
+ schema = parse_header_row(raw_headers, type_dialect)
545
+
546
+ if infer_types and schema.columns:
547
+ cols = list(schema.columns)
548
+ peek: List[List[str]] = []
549
+ for _ in range(infer_rows):
550
+ try:
551
+ r = next(self._csv)
552
+ except StopIteration:
553
+ break
554
+ peek.append(r)
555
+
556
+ for j, spec in enumerate(cols):
557
+ raw = spec.raw_header.strip()
558
+ prefix, _ = _split_validators(raw, type_dialect, row=1, col=j)
559
+ p = prefix.strip()
560
+ has_explicit = (type_dialect.explicit_sep in p)
561
+ has_sigil = (len(p) > 0 and p[-1] in type_dialect.sigils)
562
+ has_validators = _ is not None
563
+ if has_explicit or has_sigil or has_validators:
564
+ continue
565
+ if spec.type_name != "str":
566
+ continue
567
+ col_vals = [row[j] if j < len(row) else "" for row in peek]
568
+ inferred = infer_type(col_vals, type_dialect)
569
+ if inferred != "str":
570
+ parser, formatter = _get_type_codec(inferred, type_dialect)
571
+ cols[j] = ColumnSpec(
572
+ name=spec.name,
573
+ type_name=inferred,
574
+ raw_header=spec.raw_header,
575
+ validators=spec.validators,
576
+ parser=parser,
577
+ formatter=formatter,
578
+ )
579
+
580
+ self.schema = Schema(columns=cols)
581
+ self._buffer = peek
582
+ else:
583
+ self.schema = schema
584
+ self._buffer = []
585
+
586
+ self._row_index = 1
587
+
588
+ def __iter__(self) -> "TypedReader":
589
+ return self
590
+
591
+ def __next__(self) -> List[Any]:
592
+ if not hasattr(self, "_iter"):
593
+ self._iter = self._iter_rows()
594
+ return next(self._iter)
595
+
596
+ def _iter_rows(self) -> Iterator[List[Any]]:
597
+ for r in self._buffer:
598
+ self._row_index += 1
599
+ yield self._parse_row(r)
600
+ for r in self._csv:
601
+ self._row_index += 1
602
+ yield self._parse_row(r)
603
+
604
+ def _parse_row(self, row: List[str]) -> List[Any]:
605
+ out: List[Any] = []
606
+ for j, spec in enumerate(self.schema.columns):
607
+ raw = row[j] if j < len(row) else ""
608
+ out.append(_parse_cell(spec, raw, row=self._row_index, col=j, td=self._td))
609
+ return out
610
+
611
+
612
+ class TypedDictReader:
613
+ """Typed equivalent of csv.DictReader that consumes header and yields typed dicts."""
614
+
615
+ def __init__(
616
+ self,
617
+ f: Any,
618
+ dialect: Union[str, csv.Dialect] = "excel",
619
+ *,
620
+ type_dialect: TypeDialect = DEFAULT,
621
+ infer_types: bool = False,
622
+ infer_rows: int = 50,
623
+ **fmtparams: Any,
624
+ ) -> None:
625
+ self._tr = TypedReader(
626
+ f,
627
+ dialect=dialect,
628
+ type_dialect=type_dialect,
629
+ infer_types=infer_types,
630
+ infer_rows=infer_rows,
631
+ **fmtparams,
632
+ )
633
+ self.fieldnames = self._tr.schema.names
634
+
635
+ def __iter__(self) -> Iterator[Dict[str, Any]]:
636
+ for row in self._tr:
637
+ yield dict(zip(self.fieldnames, row))
638
+
639
+
640
+ def reader(
641
+ f: Any,
642
+ dialect: Union[str, csv.Dialect] = "excel",
643
+ *,
644
+ type_dialect: TypeDialect = DEFAULT,
645
+ infer_types: bool = False,
646
+ infer_rows: int = 50,
647
+ **fmtparams: Any,
648
+ ) -> TypedReader:
649
+ return TypedReader(
650
+ f,
651
+ dialect=dialect,
652
+ type_dialect=type_dialect,
653
+ infer_types=infer_types,
654
+ infer_rows=infer_rows,
655
+ **fmtparams,
656
+ )
657
+
658
+
659
+ def DictReader(
660
+ f: Any,
661
+ dialect: Union[str, csv.Dialect] = "excel",
662
+ *,
663
+ type_dialect: TypeDialect = DEFAULT,
664
+ infer_types: bool = False,
665
+ infer_rows: int = 50,
666
+ **fmtparams: Any,
667
+ ) -> TypedDictReader:
668
+ return TypedDictReader(
669
+ f,
670
+ dialect=dialect,
671
+ type_dialect=type_dialect,
672
+ infer_types=infer_types,
673
+ infer_rows=infer_rows,
674
+ **fmtparams,
675
+ )
676
+
677
+
678
+ # ----------------------------
679
+ # Writers
680
+ # ----------------------------
681
+
682
+ class TypedDictWriter:
683
+ """
684
+ Typed-aware dict writer. `fieldnames` are header cells (may include typing/validators).
685
+ - Preserves header exactly as provided.
686
+ - Formats values canonically based on parsed types.
687
+ """
688
+
689
+ def __init__(
690
+ self,
691
+ f: Any,
692
+ fieldnames: Sequence[str],
693
+ dialect: Union[str, csv.Dialect] = "excel",
694
+ *,
695
+ type_dialect: TypeDialect = DEFAULT,
696
+ **fmtparams: Any,
697
+ ) -> None:
698
+ self._csv = csv.writer(f, dialect=dialect, **fmtparams)
699
+ self._td = type_dialect
700
+ self.raw_fieldnames = list(fieldnames)
701
+
702
+ self.schema = parse_header_row(self.raw_fieldnames, type_dialect)
703
+ self.fieldnames = self.schema.names
704
+
705
+ def writeheader(self) -> int:
706
+ return int(self._csv.writerow(self.raw_fieldnames))
707
+
708
+ def writerow(self, rowdict: Mapping[str, Any]) -> int:
709
+ out: List[str] = []
710
+ for spec in self.schema.columns:
711
+ v = rowdict.get(spec.name, None)
712
+ if v is None:
713
+ out.append("")
714
+ else:
715
+ out.append(spec.formatter(v))
716
+ return int(self._csv.writerow(out))
717
+
718
+ def writerows(self, rows: Iterable[Mapping[str, Any]]) -> None:
719
+ for r in rows:
720
+ self.writerow(r)
721
+
722
+
723
+ def writer(
724
+ f: Any,
725
+ dialect: Union[str, csv.Dialect] = "excel",
726
+ **fmtparams: Any,
727
+ ) -> Any:
728
+ return csv.writer(f, dialect=dialect, **fmtparams)
729
+
730
+
731
+ def DictWriter(
732
+ f: Any,
733
+ fieldnames: Sequence[str],
734
+ dialect: Union[str, csv.Dialect] = "excel",
735
+ *,
736
+ type_dialect: TypeDialect = DEFAULT,
737
+ **fmtparams: Any,
738
+ ) -> TypedDictWriter:
739
+ return TypedDictWriter(f, fieldnames, dialect=dialect, type_dialect=type_dialect, **fmtparams)
740
+
741
+
742
+ __all__ = [
743
+ "TypedCSVError",
744
+ "TypeDialect",
745
+ "DEFAULT",
746
+ "DEFAULT_TYPE_DIALECT",
747
+ "__version__",
748
+ "DictReader",
749
+ "DictWriter",
750
+ "reader",
751
+ "writer",
752
+ "infer_type",
753
+ ]
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: typedcsv-lib
3
+ Version: 0.1.0
4
+ Summary: Typed CSVs via header-embedded types and validators
5
+ Author: Tiago Tresoldi
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/tresoldi/typedcsv
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest; extra == "dev"
13
+ Requires-Dist: ruff; extra == "dev"
14
+ Requires-Dist: mypy; extra == "dev"
15
+ Requires-Dist: twine; extra == "dev"
16
+ Dynamic: license-file
17
+
18
+ # typedcsv
19
+
20
+ Typed CSVs via **header-embedded types** (sigils or `:type`) plus optional **header-embedded validation** — **stdlib-only**, Python **3.10+**.
21
+
22
+ The core lives in a single module and is copy-pasteable into projects.
23
+
24
+ ```bash
25
+ pip install typedcsv-lib
26
+ ```
27
+
28
+ ---
29
+
30
+ ## Header typing
31
+
32
+ Declare types using either **suffix sigils** or **explicit `:type`** (not both on the same column).
33
+
34
+ | Type | Sigil (suffix) | Explicit |
35
+ |------------|-----------------|--------------|
36
+ | `int` | `#` | `:int` |
37
+ | `float` | `%` | `:float` |
38
+ | `bool` | `?` | `:bool` |
39
+ | `datetime` | `@` | `:datetime` |
40
+ | `str` | `$` | `:str` |
41
+
42
+ Untyped columns default to `str`.
43
+
44
+ **Logical column names** are the header names with the type marker removed:
45
+
46
+ - `age#` becomes key `"age"`
47
+ - `created:datetime` becomes key `"created"`
48
+
49
+ ---
50
+
51
+ ## Validators
52
+
53
+ Add an optional validator clause after the type marker:
54
+
55
+ ```text
56
+ age# [min=0 max=120]
57
+ ratio% [min=0 max=1]
58
+ status$ [in=OPEN|CLOSED|PENDING]
59
+ code$ [re=^[A-Z]{3}\d{2}$]
60
+ created@ [min=2020-01-01T00:00:00 max=2030-12-31T23:59:59]
61
+ ```
62
+
63
+ Notes:
64
+
65
+ - Validators are space-separated `key=value` pairs inside `[ ... ]`.
66
+ - `re=` uses Python `re.fullmatch`.
67
+ - `in=` uses `|` as separator.
68
+ - Unknown validator keys raise an error.
69
+
70
+ ---
71
+
72
+ ## Missing values (nullable by default)
73
+
74
+ - An empty cell (`""`) is missing.
75
+ - For `str` columns, missing stays `""`.
76
+ - For non-`str` columns, missing becomes `None`.
77
+ - Missing values skip validation.
78
+
79
+ ---
80
+
81
+ ## Reading
82
+
83
+ ```python
84
+ import typedcsv
85
+
86
+ with open("data.csv", newline="") as f:
87
+ for row in typedcsv.DictReader(f):
88
+ print(row)
89
+ ```
90
+
91
+ Example CSV:
92
+
93
+ ```csv
94
+ id#,name$,active?,created@
95
+ 1,Alice,true,2021-05-01T12:30:00
96
+ 2,Bob,false,
97
+ ```
98
+
99
+ Produces:
100
+
101
+ ```python
102
+ {'id': 1, 'name': 'Alice', 'active': True, 'created': datetime(2021, 5, 1, 12, 30)}
103
+ {'id': 2, 'name': 'Bob', 'active': False, 'created': None}
104
+ ```
105
+
106
+ ---
107
+
108
+ ## Writing (canonical formatting)
109
+
110
+ - `None` → empty cell
111
+ - `bool` → `true` / `false`
112
+ - `datetime` → `isoformat()`
113
+ - `float` → `repr(f)`
114
+ - Header preserved exactly as provided to `DictWriter(fieldnames=...)`
115
+
116
+ ```python
117
+ import typedcsv
118
+ from datetime import datetime
119
+
120
+ rows = [
121
+ {"id": 1, "name": "Alice", "active": True, "created": datetime(2021, 5, 1, 12, 30)},
122
+ {"id": 2, "name": "Bob", "active": False, "created": None},
123
+ ]
124
+
125
+ with open("out.csv", "w", newline="") as f:
126
+ w = typedcsv.DictWriter(f, fieldnames=["id#", "name$", "active?", "created@"])
127
+ w.writeheader()
128
+ w.writerows(rows)
129
+ ```
130
+
131
+ ---
132
+
133
+ ## More examples
134
+
135
+ ### Validators with quoted values
136
+
137
+ ```csv
138
+ name$ [in="Alice Smith"|Bob]
139
+ ```
140
+
141
+ ### Regex validation (fullmatch)
142
+
143
+ ```csv
144
+ code$ [re=^[A-Z]{3}\d{2}$]
145
+ ABC12
146
+ ```
147
+
148
+ ### Type inference for untyped columns
149
+
150
+ ```python
151
+ import typedcsv
152
+ import io
153
+
154
+ data = "a,b\n1,true\n2,false\n"
155
+ rows = list(typedcsv.DictReader(io.StringIO(data), infer_types=True))
156
+ ```
157
+
158
+ ---
159
+
160
+ ## Errors
161
+
162
+ Parsing/validation failures raise `TypedCSVError` with context:
163
+
164
+ - `row` (1-based; header row is 1)
165
+ - `col` (0-based)
166
+ - `column` (logical name)
167
+ - `header` (raw header cell)
168
+ - `value` (raw cell)
169
+ - `reason` (short message)
170
+
171
+ ---
172
+
173
+ ## API reference (csv-compatible)
174
+
175
+ typedcsv mirrors Python's `csv` module API and is designed to be a drop-in replacement where you want typed rows.
176
+
177
+ - `typedcsv.reader(f, ...)` → yields typed list rows (header consumed)
178
+ - `typedcsv.DictReader(f, ...)` → yields typed dict rows keyed by logical names (header consumed)
179
+ - `typedcsv.writer(f, ...)` → returns a standard `csv.writer`
180
+ - `typedcsv.DictWriter(f, fieldnames, ...)` → writes typed dict rows with canonical formatting
181
+
182
+ ---
@@ -0,0 +1,6 @@
1
+ typedcsv/__init__.py,sha256=HrakWu7_DClLO_fr7QxuMiiCRkh-KKmml6peQp1MrrA,23231
2
+ typedcsv_lib-0.1.0.dist-info/licenses/LICENSE,sha256=Zk08FTz7vcsaHBzOtcIq2TV3ysOk7E1FWBMy1TSW4nU,1071
3
+ typedcsv_lib-0.1.0.dist-info/METADATA,sha256=luDxHzfHrRYVIVkX-2s9wkfgWBB3VihXhFxDjsgZ8Ng,4184
4
+ typedcsv_lib-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
5
+ typedcsv_lib-0.1.0.dist-info/top_level.txt,sha256=bmp_SckH38pANjXLr22XYDA37A4hJGK4ilx4R_uLqUA,9
6
+ typedcsv_lib-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Tiago Tresoldi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ typedcsv