typedcsv-lib 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Tiago Tresoldi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: typedcsv-lib
3
+ Version: 0.1.0
4
+ Summary: Typed CSVs via header-embedded types and validators
5
+ Author: Tiago Tresoldi
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/tresoldi/typedcsv
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest; extra == "dev"
13
+ Requires-Dist: ruff; extra == "dev"
14
+ Requires-Dist: mypy; extra == "dev"
15
+ Requires-Dist: twine; extra == "dev"
16
+ Dynamic: license-file
17
+
18
+ # typedcsv
19
+
20
+ Typed CSVs via **header-embedded types** (sigils or `:type`) plus optional **header-embedded validation** — **stdlib-only**, Python **3.10+**.
21
+
22
+ The core lives in a single module and is copy-pasteable into projects.
23
+
24
+ ```bash
25
+ pip install typedcsv-lib
26
+ ```
27
+
28
+ ---
29
+
30
+ ## Header typing
31
+
32
+ Declare types using either **suffix sigils** or **explicit `:type`** (not both on the same column).
33
+
34
+ | Type | Sigil (suffix) | Explicit |
35
+ |------------|-----------------|--------------|
36
+ | `int` | `#` | `:int` |
37
+ | `float` | `%` | `:float` |
38
+ | `bool` | `?` | `:bool` |
39
+ | `datetime` | `@` | `:datetime` |
40
+ | `str` | `$` | `:str` |
41
+
42
+ Untyped columns default to `str`.
43
+
44
+ **Logical column names** are the header names with the type marker removed:
45
+
46
+ - `age#` becomes key `"age"`
47
+ - `created:datetime` becomes key `"created"`
48
+
49
+ ---
50
+
51
+ ## Validators
52
+
53
+ Add an optional validator clause after the type marker:
54
+
55
+ ```text
56
+ age# [min=0 max=120]
57
+ ratio% [min=0 max=1]
58
+ status$ [in=OPEN|CLOSED|PENDING]
59
+ code$ [re=^[A-Z]{3}\d{2}$]
60
+ created@ [min=2020-01-01T00:00:00 max=2030-12-31T23:59:59]
61
+ ```
62
+
63
+ Notes:
64
+
65
+ - Validators are space-separated `key=value` pairs inside `[ ... ]`.
66
+ - `re=` uses Python `re.fullmatch`.
67
+ - `in=` uses `|` as separator.
68
+ - Unknown validator keys raise an error.
69
+
70
+ ---
71
+
72
+ ## Missing values (nullable by default)
73
+
74
+ - An empty cell (`""`) is missing.
75
+ - For `str` columns, missing stays `""`.
76
+ - For non-`str` columns, missing becomes `None`.
77
+ - Missing values skip validation.
78
+
79
+ ---
80
+
81
+ ## Reading
82
+
83
+ ```python
84
+ import typedcsv
85
+
86
+ with open("data.csv", newline="") as f:
87
+ for row in typedcsv.DictReader(f):
88
+ print(row)
89
+ ```
90
+
91
+ Example CSV:
92
+
93
+ ```csv
94
+ id#,name$,active?,created@
95
+ 1,Alice,true,2021-05-01T12:30:00
96
+ 2,Bob,false,
97
+ ```
98
+
99
+ Produces:
100
+
101
+ ```python
102
+ {'id': 1, 'name': 'Alice', 'active': True, 'created': datetime(2021, 5, 1, 12, 30)}
103
+ {'id': 2, 'name': 'Bob', 'active': False, 'created': None}
104
+ ```
105
+
106
+ ---
107
+
108
+ ## Writing (canonical formatting)
109
+
110
+ - `None` → empty cell
111
+ - `bool` → `true` / `false`
112
+ - `datetime` → `isoformat()`
113
+ - `float` → `repr(f)`
114
+ - Header preserved exactly as provided to `DictWriter(fieldnames=...)`
115
+
116
+ ```python
117
+ import typedcsv
118
+ from datetime import datetime
119
+
120
+ rows = [
121
+ {"id": 1, "name": "Alice", "active": True, "created": datetime(2021, 5, 1, 12, 30)},
122
+ {"id": 2, "name": "Bob", "active": False, "created": None},
123
+ ]
124
+
125
+ with open("out.csv", "w", newline="") as f:
126
+ w = typedcsv.DictWriter(f, fieldnames=["id#", "name$", "active?", "created@"])
127
+ w.writeheader()
128
+ w.writerows(rows)
129
+ ```
130
+
131
+ ---
132
+
133
+ ## More examples
134
+
135
+ ### Validators with quoted values
136
+
137
+ ```csv
138
+ name$ [in="Alice Smith"|Bob]
139
+ ```
140
+
141
+ ### Regex validation (fullmatch)
142
+
143
+ ```csv
144
+ code$ [re=^[A-Z]{3}\d{2}$]
145
+ ABC12
146
+ ```
147
+
148
+ ### Type inference for untyped columns
149
+
150
+ ```python
151
+ import typedcsv
152
+ import io
153
+
154
+ data = "a,b\n1,true\n2,false\n"
155
+ rows = list(typedcsv.DictReader(io.StringIO(data), infer_types=True))
156
+ ```
157
+
158
+ ---
159
+
160
+ ## Errors
161
+
162
+ Parsing/validation failures raise `TypedCSVError` with context:
163
+
164
+ - `row` (1-based; header row is 1)
165
+ - `col` (0-based)
166
+ - `column` (logical name)
167
+ - `header` (raw header cell)
168
+ - `value` (raw cell)
169
+ - `reason` (short message)
170
+
171
+ ---
172
+
173
+ ## API reference (csv-compatible)
174
+
175
+ typedcsv mirrors Python's `csv` module API and is designed to be a drop-in replacement where you want typed rows.
176
+
177
+ - `typedcsv.reader(f, ...)` → yields typed list rows (header consumed)
178
+ - `typedcsv.DictReader(f, ...)` → yields typed dict rows keyed by logical names (header consumed)
179
+ - `typedcsv.writer(f, ...)` → returns a standard `csv.writer`
180
+ - `typedcsv.DictWriter(f, fieldnames, ...)` → writes typed dict rows with canonical formatting
181
+
182
+ ---
@@ -0,0 +1,165 @@
1
+ # typedcsv
2
+
3
+ Typed CSVs via **header-embedded types** (sigils or `:type`) plus optional **header-embedded validation** — **stdlib-only**, Python **3.10+**.
4
+
5
+ The core lives in a single module and is copy-pasteable into projects.
6
+
7
+ ```bash
8
+ pip install typedcsv-lib
9
+ ```
10
+
11
+ ---
12
+
13
+ ## Header typing
14
+
15
+ Declare types using either **suffix sigils** or **explicit `:type`** (not both on the same column).
16
+
17
+ | Type | Sigil (suffix) | Explicit |
18
+ |------------|-----------------|--------------|
19
+ | `int` | `#` | `:int` |
20
+ | `float` | `%` | `:float` |
21
+ | `bool` | `?` | `:bool` |
22
+ | `datetime` | `@` | `:datetime` |
23
+ | `str` | `$` | `:str` |
24
+
25
+ Untyped columns default to `str`.
26
+
27
+ **Logical column names** are the header names with the type marker removed:
28
+
29
+ - `age#` becomes key `"age"`
30
+ - `created:datetime` becomes key `"created"`
31
+
32
+ ---
33
+
34
+ ## Validators
35
+
36
+ Add an optional validator clause after the type marker:
37
+
38
+ ```text
39
+ age# [min=0 max=120]
40
+ ratio% [min=0 max=1]
41
+ status$ [in=OPEN|CLOSED|PENDING]
42
+ code$ [re=^[A-Z]{3}\d{2}$]
43
+ created@ [min=2020-01-01T00:00:00 max=2030-12-31T23:59:59]
44
+ ```
45
+
46
+ Notes:
47
+
48
+ - Validators are space-separated `key=value` pairs inside `[ ... ]`.
49
+ - `re=` uses Python `re.fullmatch`.
50
+ - `in=` uses `|` as separator.
51
+ - Unknown validator keys raise an error.
52
+
53
+ ---
54
+
55
+ ## Missing values (nullable by default)
56
+
57
+ - An empty cell (`""`) is missing.
58
+ - For `str` columns, missing stays `""`.
59
+ - For non-`str` columns, missing becomes `None`.
60
+ - Missing values skip validation.
61
+
62
+ ---
63
+
64
+ ## Reading
65
+
66
+ ```python
67
+ import typedcsv
68
+
69
+ with open("data.csv", newline="") as f:
70
+ for row in typedcsv.DictReader(f):
71
+ print(row)
72
+ ```
73
+
74
+ Example CSV:
75
+
76
+ ```csv
77
+ id#,name$,active?,created@
78
+ 1,Alice,true,2021-05-01T12:30:00
79
+ 2,Bob,false,
80
+ ```
81
+
82
+ Produces:
83
+
84
+ ```python
85
+ {'id': 1, 'name': 'Alice', 'active': True, 'created': datetime(2021, 5, 1, 12, 30)}
86
+ {'id': 2, 'name': 'Bob', 'active': False, 'created': None}
87
+ ```
88
+
89
+ ---
90
+
91
+ ## Writing (canonical formatting)
92
+
93
+ - `None` → empty cell
94
+ - `bool` → `true` / `false`
95
+ - `datetime` → `isoformat()`
96
+ - `float` → `repr(f)`
97
+ - Header preserved exactly as provided to `DictWriter(fieldnames=...)`
98
+
99
+ ```python
100
+ import typedcsv
101
+ from datetime import datetime
102
+
103
+ rows = [
104
+ {"id": 1, "name": "Alice", "active": True, "created": datetime(2021, 5, 1, 12, 30)},
105
+ {"id": 2, "name": "Bob", "active": False, "created": None},
106
+ ]
107
+
108
+ with open("out.csv", "w", newline="") as f:
109
+ w = typedcsv.DictWriter(f, fieldnames=["id#", "name$", "active?", "created@"])
110
+ w.writeheader()
111
+ w.writerows(rows)
112
+ ```
113
+
114
+ ---
115
+
116
+ ## More examples
117
+
118
+ ### Validators with quoted values
119
+
120
+ ```csv
121
+ name$ [in="Alice Smith"|Bob]
122
+ ```
123
+
124
+ ### Regex validation (fullmatch)
125
+
126
+ ```csv
127
+ code$ [re=^[A-Z]{3}\d{2}$]
128
+ ABC12
129
+ ```
130
+
131
+ ### Type inference for untyped columns
132
+
133
+ ```python
134
+ import typedcsv
135
+ import io
136
+
137
+ data = "a,b\n1,true\n2,false\n"
138
+ rows = list(typedcsv.DictReader(io.StringIO(data), infer_types=True))
139
+ ```
140
+
141
+ ---
142
+
143
+ ## Errors
144
+
145
+ Parsing/validation failures raise `TypedCSVError` with context:
146
+
147
+ - `row` (1-based; header row is 1)
148
+ - `col` (0-based)
149
+ - `column` (logical name)
150
+ - `header` (raw header cell)
151
+ - `value` (raw cell)
152
+ - `reason` (short message)
153
+
154
+ ---
155
+
156
+ ## API reference (csv-compatible)
157
+
158
+ typedcsv mirrors Python's `csv` module API and is designed to be a drop-in replacement where you want typed rows.
159
+
160
+ - `typedcsv.reader(f, ...)` → yields typed list rows (header consumed)
161
+ - `typedcsv.DictReader(f, ...)` → yields typed dict rows keyed by logical names (header consumed)
162
+ - `typedcsv.writer(f, ...)` → returns a standard `csv.writer`
163
+ - `typedcsv.DictWriter(f, fieldnames, ...)` → writes typed dict rows with canonical formatting
164
+
165
+ ---
@@ -0,0 +1,35 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "typedcsv-lib"
7
+ version = "0.1.0"
8
+ description = "Typed CSVs via header-embedded types and validators"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [
14
+ {name = "Tiago Tresoldi"}
15
+ ]
16
+ urls = {"Homepage" = "https://github.com/tresoldi/typedcsv"}
17
+
18
+ [project.optional-dependencies]
19
+ dev = ["pytest", "ruff", "mypy", "twine"]
20
+
21
+ [tool.setuptools]
22
+ packages = ["typedcsv"]
23
+
24
+ [tool.mypy]
25
+ python_version = "3.10"
26
+ warn_unused_ignores = true
27
+ warn_redundant_casts = true
28
+ warn_return_any = true
29
+ disallow_untyped_defs = true
30
+ check_untyped_defs = true
31
+ no_implicit_optional = true
32
+
33
+ [[tool.mypy.overrides]]
34
+ module = ["test_typedcsv", "tests.test_typedcsv"]
35
+ disallow_untyped_defs = false
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,157 @@
1
+ import io
2
+ import importlib.util
3
+ import sys
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ import pytest
8
+
9
+ _PKG_INIT = Path(__file__).resolve().parents[1] / "typedcsv" / "__init__.py"
10
+ _SPEC = importlib.util.spec_from_file_location("typedcsv_pkg", _PKG_INIT)
11
+ assert _SPEC and _SPEC.loader
12
+ typedcsv = importlib.util.module_from_spec(_SPEC)
13
+ sys.modules[_SPEC.name] = typedcsv
14
+ _SPEC.loader.exec_module(typedcsv)
15
+
16
+
17
+ def read_rows(text, **kwargs):
18
+ f = io.StringIO(text)
19
+ return list(typedcsv.reader(f, **kwargs))
20
+
21
+
22
+ def read_dicts(text, **kwargs):
23
+ f = io.StringIO(text)
24
+ return list(typedcsv.DictReader(f, **kwargs))
25
+
26
+
27
+ def test_header_parsing_conflict_sigil_and_explicit():
28
+ text = "age#:int\n1\n"
29
+ with pytest.raises(typedcsv.TypedCSVError) as exc:
30
+ read_rows(text)
31
+ assert "both explicit type and sigil" in str(exc.value)
32
+
33
+
34
+ def test_header_parsing_unknown_explicit_type_is_error():
35
+ text = "age:integer\n1\n"
36
+ with pytest.raises(typedcsv.TypedCSVError) as exc:
37
+ read_rows(text)
38
+ assert "Unknown explicit type" in str(exc.value)
39
+
40
+
41
+ def test_header_parsing_duplicate_logical_names():
42
+ text = "age#,age:int\n1,2\n"
43
+ with pytest.raises(typedcsv.TypedCSVError) as exc:
44
+ read_rows(text)
45
+ assert "Duplicate logical column name" in str(exc.value)
46
+
47
+
48
+ def test_header_parsing_malformed_validators_clause():
49
+ text = "age# [min=0\n1\n"
50
+ with pytest.raises(typedcsv.TypedCSVError) as exc:
51
+ read_rows(text)
52
+ assert "Malformed validators clause" in str(exc.value)
53
+
54
+
55
+ def test_validators_enforcement_int_min_max_in():
56
+ text = "age# [min=0 max=10 in=1|2|3]\n2\n4\n"
57
+ f = io.StringIO(text)
58
+ r = typedcsv.reader(f)
59
+ assert next(r)[0] == 2
60
+ with pytest.raises(typedcsv.TypedCSVError):
61
+ next(r)
62
+
63
+
64
+ def test_validators_unknown_key_error():
65
+ text = "age# [nope=1]\n1\n"
66
+ with pytest.raises(typedcsv.TypedCSVError) as exc:
67
+ read_rows(text)
68
+ assert "not allowed" in str(exc.value)
69
+
70
+
71
+ def test_missing_values_behavior():
72
+ text = "name$,age#\n,\n"
73
+ rows = read_rows(text)
74
+ assert rows[0][0] == ""
75
+ assert rows[0][1] is None
76
+
77
+
78
+ def test_missing_values_skip_validation():
79
+ text = "age# [min=10]\n\n"
80
+ rows = read_rows(text)
81
+ assert rows[0][0] is None
82
+
83
+
84
+ def test_bool_parsing_variants():
85
+ text = "b?\ntrue\nFALSE\nT\nf\nYes\nno\n1\n0\n"
86
+ rows = read_rows(text)
87
+ assert [r[0] for r in rows] == [True, False, True, False, True, False, True, False]
88
+
89
+
90
+ def test_datetime_parsing_and_min_max():
91
+ text = "ts@ [min=2020-01-01T00:00:00 max=2020-12-31T23:59:59]\n2020-06-01T12:00:00\n2019-01-01T00:00:00\n"
92
+ f = io.StringIO(text)
93
+ r = typedcsv.reader(f)
94
+ assert isinstance(next(r)[0], datetime)
95
+ with pytest.raises(typedcsv.TypedCSVError):
96
+ next(r)
97
+
98
+
99
+ def test_re_fullmatch_for_str():
100
+ text = "code$ [re=^[A-Z]{3}\\d{2}$]\nABC12\nAB12\n"
101
+ f = io.StringIO(text)
102
+ r = typedcsv.reader(f)
103
+ assert next(r)[0] == "ABC12"
104
+ with pytest.raises(typedcsv.TypedCSVError):
105
+ next(r)
106
+
107
+
108
+ def test_error_context_fields_for_parse_error():
109
+ text = "age#\nnope\n"
110
+ with pytest.raises(typedcsv.TypedCSVError) as exc:
111
+ read_rows(text)
112
+ err = exc.value
113
+ assert err.row == 2
114
+ assert err.col == 0
115
+ assert err.column == "age"
116
+ assert err.header == "age#"
117
+ assert err.value == "nope"
118
+ assert "Parse failed" in err.reason
119
+
120
+
121
+ def test_writer_canonicalization_and_float_repr():
122
+ f = io.StringIO()
123
+ w = typedcsv.DictWriter(f, fieldnames=["i#", "b?", "f%", "t@", "s$"])
124
+ w.writeheader()
125
+ w.writerow({
126
+ "i": 1,
127
+ "b": True,
128
+ "f": 1.5,
129
+ "t": datetime(2021, 5, 1, 12, 30),
130
+ "s": "x",
131
+ })
132
+ out = f.getvalue().splitlines()
133
+ assert out[0] == "i#,b?,f%,t@,s$"
134
+ assert out[1].startswith("1,true,1.5,2021-05-01T12:30:00,x")
135
+
136
+
137
+ def test_round_trip_dictwriter_to_dictreader():
138
+ f = io.StringIO()
139
+ w = typedcsv.DictWriter(f, fieldnames=["id#", "name$", "active?", "created@"])
140
+ w.writeheader()
141
+ w.writerow({"id": 1, "name": "Alice", "active": True, "created": datetime(2021, 5, 1, 12, 30)})
142
+ w.writerow({"id": 2, "name": "Bob", "active": False, "created": None})
143
+
144
+ f.seek(0)
145
+ rows = list(typedcsv.DictReader(f))
146
+ assert rows[0]["id"] == 1
147
+ assert rows[0]["name"] == "Alice"
148
+ assert rows[0]["active"] is True
149
+ assert rows[0]["created"] == datetime(2021, 5, 1, 12, 30)
150
+ assert rows[1]["created"] is None
151
+
152
+
153
+ def test_infer_types_untyped_only_without_validators():
154
+ text = "a,b [minlen=1]\n1,x\n2,y\n"
155
+ rows = read_rows(text, infer_types=True)
156
+ assert rows[0][0] == 1
157
+ assert rows[0][1] == "x"