tha-num-runner 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: ["main"]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ matrix:
13
+ python-version: ["3.10", "3.11", "3.12"]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v4
20
+ with:
21
+ version: "latest"
22
+
23
+ - name: Set up Python ${{ matrix.python-version }}
24
+ run: uv python install ${{ matrix.python-version }}
25
+
26
+ - name: Install dependencies
27
+ run: uv sync --extra dev --python ${{ matrix.python-version }}
28
+
29
+ - name: Lint
30
+ run: uv run ruff check src/ tests/
31
+
32
+ - name: Test
33
+ run: uv run pytest
34
+
35
+ - name: Type check
36
+ run: uv run mypy src/
@@ -0,0 +1,53 @@
1
+ name: Publish
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - name: Install uv
14
+ uses: astral-sh/setup-uv@v4
15
+ - name: Build
16
+ run: uv build
17
+ - name: Upload dist
18
+ uses: actions/upload-artifact@v4
19
+ with:
20
+ name: dist
21
+ path: dist/
22
+
23
+ publish-testpypi:
24
+ needs: build
25
+ runs-on: ubuntu-latest
26
+ environment: testpypi
27
+ permissions:
28
+ id-token: write
29
+ steps:
30
+ - name: Download dist
31
+ uses: actions/download-artifact@v4
32
+ with:
33
+ name: dist
34
+ path: dist/
35
+ - name: Publish to TestPyPI
36
+ uses: pypa/gh-action-pypi-publish@release/v1
37
+ with:
38
+ repository-url: https://test.pypi.org/legacy/
39
+
40
+ publish-pypi:
41
+ needs: publish-testpypi
42
+ runs-on: ubuntu-latest
43
+ environment: pypi
44
+ permissions:
45
+ id-token: write
46
+ steps:
47
+ - name: Download dist
48
+ uses: actions/download-artifact@v4
49
+ with:
50
+ name: dist
51
+ path: dist/
52
+ - name: Publish to PyPI
53
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,218 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+ # Temporary file for partial code execution
204
+ tempCodeRunnerFile.py
205
+
206
+ # Ruff stuff:
207
+ .ruff_cache/
208
+
209
+ # PyPI configuration file
210
+ .pypirc
211
+
212
+ # Marimo
213
+ marimo/_static/
214
+ marimo/_lsp/
215
+ __marimo__/
216
+
217
+ # Streamlit
218
+ .streamlit/secrets.toml
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Nathan Wright
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,151 @@
1
+ Metadata-Version: 2.4
2
+ Name: tha-num-runner
3
+ Version: 0.1.0
4
+ Summary: A small Python library that cleans and parses numeric strings — strips currency symbols, commas, and casts to int or float, on single values or CSV-style row dicts.
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Keywords: csv,currency,number,numeric,parse,rows
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Typing :: Typed
12
+ Requires-Python: >=3.10
13
+ Provides-Extra: dev
14
+ Requires-Dist: mypy; extra == 'dev'
15
+ Requires-Dist: pytest; extra == 'dev'
16
+ Requires-Dist: ruff; extra == 'dev'
17
+ Description-Content-Type: text/markdown
18
+
19
+ # tha-num-runner
20
+
21
+ [![CI](https://github.com/tha-guy-nate/tha-num-runner/actions/workflows/ci.yml/badge.svg)](https://github.com/tha-guy-nate/tha-num-runner/actions/workflows/ci.yml)
22
+
23
+ A small Python library that cleans and parses numeric strings — strips currency symbols, commas, and casts to int or float, on single values or CSV-style row dicts.
24
+
25
+ ## Install
26
+
27
+ ```bash
28
+ pip install tha-num-runner
29
+ ```
30
+
31
+ ## Quick start
32
+
33
+ ```python
34
+ from tha_num_runner import ThaNum
35
+
36
+ formatter = ThaNum()
37
+
38
+ # Single value
39
+ ThaNum.format_num("$1,234.56") # 1234.56
40
+ ThaNum.format_num("£2,000.00", cast="int") # 2000
41
+ ThaNum.format_num("(500.75)", round_to=1) # -500.8
42
+ ThaNum.format_num("€9.99", cast="int") # 9
43
+
44
+ # Row dicts
45
+ rows = [
46
+ {"Org BK": "school-001", "Budget": "$1,200.00"},
47
+ {"Org BK": "school-002", "Budget": "£800.50"},
48
+ ]
49
+
50
+ result = formatter.format_num_rows(rows, column="Budget", cast="float", round_to=2)
51
+ # [{"Org BK": "school-001", "Budget": 1200.0}, ...]
52
+ ```
53
+
54
+ ## Cleaned automatically
55
+
56
+ | Input | Output |
57
+ |---|---|
58
+ | `"$1,234.56"` | `1234.56` |
59
+ | `"£2,000"` | `2000.0` |
60
+ | `"€9.99"` | `9.99` |
61
+ | `"(500)"` | `-500.0` |
62
+ | `"($1,200.00)"` | `-1200.0` |
63
+ | `" 42 "` | `42.0` |
64
+
65
+ Supported currency symbols: `$`, `€`, `£`, `¥`, `₹`, `₩`, `₽`, `₺`, `₫`, `฿`, `₱`, `₴`
66
+
67
+ ## API
68
+
69
+ ### `ThaNum`
70
+
71
+ ```python
72
+ ThaNum()
73
+ ```
74
+
75
+ ### `ThaNum.format_num()`
76
+
77
+ ```python
78
+ ThaNum.format_num(
79
+ value: str | int | float,
80
+ *,
81
+ strip_currency: bool = True, # remove currency symbols
82
+ strip_commas: bool = True, # remove comma thousand separators
83
+ round_to: int | None = None, # decimal places to round to
84
+ cast: str = "float", # "float" | "int"
85
+ ) -> float | int
86
+ ```
87
+
88
+ Also callable as an instance method. Raises `NumError` on unparseable input or invalid `cast`.
89
+
90
+ Parenthetical negatives (`(100)`) are converted to negative numbers automatically.
91
+
92
+ ### `formatter.format_num_rows()`
93
+
94
+ ```python
95
+ formatter.format_num_rows(
96
+ rows, # list of row dicts
97
+ column, # column containing numeric strings
98
+ *,
99
+ strip_currency=True,
100
+ strip_commas=True,
101
+ round_to=None,
102
+ cast="float",
103
+ out_column=None, # write to a new column instead of overwriting
104
+ on_error="error", # "error" | "skip" | "blank"
105
+ skip_statuses=["error", "warning"],
106
+ ) -> list[dict]
107
+ ```
108
+
109
+ Results are also stored in `formatter.rows`.
110
+
111
+ #### `on_error`
112
+
113
+ | Value | Behaviour |
114
+ |---|---|
115
+ | `"error"` | `row status="error"`, `message=...`, output column set to `""` |
116
+ | `"skip"` | Row returned unchanged |
117
+ | `"blank"` | Output column set to `""`, row status untouched |
118
+
119
+ ### Composing with `tha-csv-runner`
120
+
121
+ ```python
122
+ from tha_csv_runner import ThaCSV
123
+ from tha_num_runner import ThaNum
124
+
125
+ runner = ThaCSV()
126
+ runner.read("Step 1 of 2", "input.csv", ["Org BK", "Budget"])
127
+
128
+ formatter = ThaNum()
129
+ enriched = formatter.format_num_rows(
130
+ rows=runner.rows,
131
+ column="Budget",
132
+ cast="float",
133
+ round_to=2,
134
+ )
135
+
136
+ runner.write("Step 2 of 2", "output.csv", rows=enriched)
137
+ ```
138
+
139
+ ## Alternatives
140
+
141
+ This library is intentionally limited in scope — it handles one specific pattern: cleaning messy numeric strings from CSV exports and casting them to Python numbers, with row-level error capture for the `tha-*` ecosystem. For more general needs:
142
+
143
+ - [**babel**](https://babel.pocoo.org) — locale-aware number parsing (`babel.numbers.parse_number`) that handles locale-specific decimal and grouping separators
144
+ - [**price-parser**](https://github.com/scrapinghub/price-parser) — extracts prices and currency from arbitrary text, useful when the format is completely unknown
145
+ - [**pandas**](https://pandas.pydata.org) — `pd.to_numeric()` with `errors="coerce"` for vectorized numeric coercion on DataFrames
146
+
147
+ Choose this library when you want currency stripping, comma cleaning, and parenthetical negative handling AND per-row error capture that slots into the `tha-*` pipeline — no other single package gives you all of that with the `row status` pattern.
148
+
149
+ ## License
150
+
151
+ MIT
@@ -0,0 +1,133 @@
1
+ # tha-num-runner
2
+
3
+ [![CI](https://github.com/tha-guy-nate/tha-num-runner/actions/workflows/ci.yml/badge.svg)](https://github.com/tha-guy-nate/tha-num-runner/actions/workflows/ci.yml)
4
+
5
+ A small Python library that cleans and parses numeric strings — strips currency symbols, commas, and casts to int or float, on single values or CSV-style row dicts.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install tha-num-runner
11
+ ```
12
+
13
+ ## Quick start
14
+
15
+ ```python
16
+ from tha_num_runner import ThaNum
17
+
18
+ formatter = ThaNum()
19
+
20
+ # Single value
21
+ ThaNum.format_num("$1,234.56") # 1234.56
22
+ ThaNum.format_num("£2,000.00", cast="int") # 2000
23
+ ThaNum.format_num("(500.75)", round_to=1) # -500.8
24
+ ThaNum.format_num("€9.99", cast="int") # 9
25
+
26
+ # Row dicts
27
+ rows = [
28
+ {"Org BK": "school-001", "Budget": "$1,200.00"},
29
+ {"Org BK": "school-002", "Budget": "£800.50"},
30
+ ]
31
+
32
+ result = formatter.format_num_rows(rows, column="Budget", cast="float", round_to=2)
33
+ # [{"Org BK": "school-001", "Budget": 1200.0}, ...]
34
+ ```
35
+
36
+ ## Cleaned automatically
37
+
38
+ | Input | Output |
39
+ |---|---|
40
+ | `"$1,234.56"` | `1234.56` |
41
+ | `"£2,000"` | `2000.0` |
42
+ | `"€9.99"` | `9.99` |
43
+ | `"(500)"` | `-500.0` |
44
+ | `"($1,200.00)"` | `-1200.0` |
45
+ | `" 42 "` | `42.0` |
46
+
47
+ Supported currency symbols: `$`, `€`, `£`, `¥`, `₹`, `₩`, `₽`, `₺`, `₫`, `฿`, `₱`, `₴`
48
+
49
+ ## API
50
+
51
+ ### `ThaNum`
52
+
53
+ ```python
54
+ ThaNum()
55
+ ```
56
+
57
+ ### `ThaNum.format_num()`
58
+
59
+ ```python
60
+ ThaNum.format_num(
61
+ value: str | int | float,
62
+ *,
63
+ strip_currency: bool = True, # remove currency symbols
64
+ strip_commas: bool = True, # remove comma thousand separators
65
+ round_to: int | None = None, # decimal places to round to
66
+ cast: str = "float", # "float" | "int"
67
+ ) -> float | int
68
+ ```
69
+
70
+ Also callable as an instance method. Raises `NumError` on unparseable input or invalid `cast`.
71
+
72
+ Parenthetical negatives (`(100)`) are converted to negative numbers automatically.
73
+
74
+ ### `formatter.format_num_rows()`
75
+
76
+ ```python
77
+ formatter.format_num_rows(
78
+ rows, # list of row dicts
79
+ column, # column containing numeric strings
80
+ *,
81
+ strip_currency=True,
82
+ strip_commas=True,
83
+ round_to=None,
84
+ cast="float",
85
+ out_column=None, # write to a new column instead of overwriting
86
+ on_error="error", # "error" | "skip" | "blank"
87
+ skip_statuses=["error", "warning"],
88
+ ) -> list[dict]
89
+ ```
90
+
91
+ Results are also stored in `formatter.rows`.
92
+
93
+ #### `on_error`
94
+
95
+ | Value | Behaviour |
96
+ |---|---|
97
+ | `"error"` | `row status="error"`, `message=...`, output column set to `""` |
98
+ | `"skip"` | Row returned unchanged |
99
+ | `"blank"` | Output column set to `""`, row status untouched |
100
+
101
+ ### Composing with `tha-csv-runner`
102
+
103
+ ```python
104
+ from tha_csv_runner import ThaCSV
105
+ from tha_num_runner import ThaNum
106
+
107
+ runner = ThaCSV()
108
+ runner.read("Step 1 of 2", "input.csv", ["Org BK", "Budget"])
109
+
110
+ formatter = ThaNum()
111
+ enriched = formatter.format_num_rows(
112
+ rows=runner.rows,
113
+ column="Budget",
114
+ cast="float",
115
+ round_to=2,
116
+ )
117
+
118
+ runner.write("Step 2 of 2", "output.csv", rows=enriched)
119
+ ```
120
+
121
+ ## Alternatives
122
+
123
+ This library is intentionally limited in scope — it handles one specific pattern: cleaning messy numeric strings from CSV exports and casting them to Python numbers, with row-level error capture for the `tha-*` ecosystem. For more general needs:
124
+
125
+ - [**babel**](https://babel.pocoo.org) — locale-aware number parsing (`babel.numbers.parse_number`) that handles locale-specific decimal and grouping separators
126
+ - [**price-parser**](https://github.com/scrapinghub/price-parser) — extracts prices and currency from arbitrary text, useful when the format is completely unknown
127
+ - [**pandas**](https://pandas.pydata.org) — `pd.to_numeric()` with `errors="coerce"` for vectorized numeric coercion on DataFrames
128
+
129
+ Choose this library when you want currency stripping, comma cleaning, and parenthetical negative handling AND per-row error capture that slots into the `tha-*` pipeline — no other single package gives you all of that with the `row status` pattern.
130
+
131
+ ## License
132
+
133
+ MIT
@@ -0,0 +1,30 @@
1
+ [project]
2
+ name = "tha-num-runner"
3
+ version = "0.1.0"
4
+ description = "A small Python library that cleans and parses numeric strings — strips currency symbols, commas, and casts to int or float, on single values or CSV-style row dicts."
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = { text = "MIT" }
8
+ keywords = ["number", "numeric", "currency", "parse", "csv", "rows"]
9
+ classifiers = [
10
+ "Programming Language :: Python :: 3",
11
+ "License :: OSI Approved :: MIT License",
12
+ "Operating System :: OS Independent",
13
+ "Typing :: Typed",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ dev = ["pytest", "ruff", "mypy"]
18
+
19
+ [build-system]
20
+ requires = ["hatchling"]
21
+ build-backend = "hatchling.build"
22
+
23
+ [tool.hatch.build.targets.wheel]
24
+ packages = ["src/tha_num_runner"]
25
+
26
+ [tool.ruff]
27
+ line-length = 100
28
+
29
+ [tool.mypy]
30
+ strict = true
@@ -0,0 +1,7 @@
1
+ """tha-num-runner: clean and parse numeric strings on single values or row dicts."""
2
+
3
+ from .errors import NumError
4
+ from .runner import ThaNum
5
+
6
+ __version__ = "0.1.0"
7
+ __all__ = ["ThaNum", "NumError"]
@@ -0,0 +1,2 @@
1
+ class NumError(Exception):
2
+ """Raised for invalid tha-num-runner configuration or unparseable values."""
File without changes
@@ -0,0 +1,117 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from .errors import NumError
6
+
7
+ _CURRENCY = frozenset("$€£¥₹₩₽₺₫฿₱₴")
8
+
9
+
10
+ class ThaNum:
11
+ def __init__(self) -> None:
12
+ self.rows: list[dict[str, Any]] = []
13
+
14
+ # ------------------------------------------------------------------
15
+ # Single-value method
16
+ # ------------------------------------------------------------------
17
+
18
+ @staticmethod
19
+ def format_num(
20
+ value: str | int | float,
21
+ *,
22
+ strip_currency: bool = True,
23
+ strip_commas: bool = True,
24
+ round_to: int | None = None,
25
+ cast: str = "float",
26
+ ) -> float | int:
27
+ if cast not in ("float", "int"):
28
+ raise NumError(f"Invalid cast {cast!r} — must be 'float' or 'int'")
29
+
30
+ if isinstance(value, bool):
31
+ raise NumError(f"Cannot parse {value!r} as a number")
32
+
33
+ if isinstance(value, (int, float)):
34
+ result = float(value)
35
+ else:
36
+ cleaned = str(value).strip()
37
+
38
+ # Handle parenthetical negatives: (100) → -100
39
+ negative = False
40
+ if cleaned.startswith("(") and cleaned.endswith(")"):
41
+ negative = True
42
+ cleaned = cleaned[1:-1].strip()
43
+
44
+ if strip_currency:
45
+ cleaned = "".join(c for c in cleaned if c not in _CURRENCY).strip()
46
+
47
+ if strip_commas:
48
+ cleaned = cleaned.replace(",", "")
49
+
50
+ cleaned = cleaned.strip()
51
+
52
+ try:
53
+ result = float(cleaned)
54
+ except ValueError:
55
+ raise NumError(f"Cannot parse {value!r} as a number")
56
+
57
+ if negative:
58
+ result = -result
59
+
60
+ if round_to is not None:
61
+ result = round(result, round_to)
62
+
63
+ if cast == "int":
64
+ return int(result)
65
+ return result
66
+
67
+ # ------------------------------------------------------------------
68
+ # Row method
69
+ # ------------------------------------------------------------------
70
+
71
+ def format_num_rows(
72
+ self,
73
+ rows: list[dict[str, Any]],
74
+ column: str,
75
+ *,
76
+ strip_currency: bool = True,
77
+ strip_commas: bool = True,
78
+ round_to: int | None = None,
79
+ cast: str = "float",
80
+ out_column: str | None = None,
81
+ on_error: str = "error",
82
+ skip_statuses: list[str] | None = None,
83
+ ) -> list[dict[str, Any]]:
84
+ if on_error not in ("error", "skip", "blank"):
85
+ raise NumError(f"Invalid on_error {on_error!r} — must be 'error', 'skip', or 'blank'")
86
+
87
+ _skip = ["error", "warning"] if skip_statuses is None else skip_statuses
88
+ target = out_column if out_column is not None else column
89
+ result = []
90
+
91
+ for row in rows:
92
+ if row.get("row status") in _skip:
93
+ result.append(dict(row))
94
+ continue
95
+
96
+ row_copy = dict(row)
97
+ try:
98
+ row_copy[target] = self.format_num(
99
+ row_copy[column],
100
+ strip_currency=strip_currency,
101
+ strip_commas=strip_commas,
102
+ round_to=round_to,
103
+ cast=cast,
104
+ )
105
+ except Exception as exc:
106
+ if on_error == "error":
107
+ row_copy[target] = ""
108
+ row_copy["row status"] = "error"
109
+ row_copy["message"] = str(exc)
110
+ elif on_error == "blank":
111
+ row_copy[target] = ""
112
+ # on_error == "skip": row unchanged
113
+
114
+ result.append(row_copy)
115
+
116
+ self.rows = result
117
+ return result
@@ -0,0 +1,16 @@
1
+ import pytest
2
+ from tha_num_runner import ThaNum
3
+
4
+
5
+ @pytest.fixture
6
+ def runner() -> ThaNum:
7
+ return ThaNum()
8
+
9
+
10
+ @pytest.fixture
11
+ def rows() -> list[dict]:
12
+ return [
13
+ {"id": "1", "Amount": "$1,234.56", "row status": "", "message": ""},
14
+ {"id": "2", "Amount": "£2,000.00", "row status": "", "message": ""},
15
+ {"id": "3", "Amount": "500", "row status": "", "message": ""},
16
+ ]
@@ -0,0 +1,152 @@
1
+ import pytest
2
+ from tha_num_runner import ThaNum, NumError
3
+
4
+
5
+ # ---------------------------------------------------------------------------
6
+ # format_num — single value
7
+ # ---------------------------------------------------------------------------
8
+
9
+ def test_format_num_plain_float():
10
+ assert ThaNum.format_num("3.14") == 3.14
11
+
12
+ def test_format_num_plain_int_string():
13
+ assert ThaNum.format_num("42") == 42.0
14
+
15
+ def test_format_num_currency_dollar():
16
+ assert ThaNum.format_num("$1,234.56") == 1234.56
17
+
18
+ def test_format_num_currency_pound():
19
+ assert ThaNum.format_num("£2,000.00") == 2000.0
20
+
21
+ def test_format_num_currency_euro():
22
+ assert ThaNum.format_num("€9.99") == 9.99
23
+
24
+ def test_format_num_no_strip_currency():
25
+ with pytest.raises(NumError):
26
+ ThaNum.format_num("$100", strip_currency=False)
27
+
28
+ def test_format_num_no_strip_commas():
29
+ with pytest.raises(NumError):
30
+ ThaNum.format_num("1,000", strip_commas=False)
31
+
32
+ def test_format_num_parenthetical_negative():
33
+ assert ThaNum.format_num("(500)") == -500.0
34
+
35
+ def test_format_num_parenthetical_negative_with_currency():
36
+ assert ThaNum.format_num("($1,200.00)") == -1200.0
37
+
38
+ def test_format_num_cast_int():
39
+ assert ThaNum.format_num("3.7", cast="int") == 3
40
+ assert isinstance(ThaNum.format_num("3.7", cast="int"), int)
41
+
42
+ def test_format_num_cast_float_default():
43
+ result = ThaNum.format_num("3")
44
+ assert isinstance(result, float)
45
+
46
+ def test_format_num_round_to():
47
+ assert ThaNum.format_num("3.14159", round_to=2) == 3.14
48
+
49
+ def test_format_num_from_int():
50
+ assert ThaNum.format_num(42) == 42.0
51
+
52
+ def test_format_num_from_float():
53
+ assert ThaNum.format_num(3.14) == 3.14
54
+
55
+ def test_format_num_whitespace():
56
+ assert ThaNum.format_num(" 100 ") == 100.0
57
+
58
+ def test_format_num_invalid_string():
59
+ with pytest.raises(NumError):
60
+ ThaNum.format_num("N/A")
61
+
62
+ def test_format_num_empty_string():
63
+ with pytest.raises(NumError):
64
+ ThaNum.format_num("")
65
+
66
+ def test_format_num_bool_raises():
67
+ with pytest.raises(NumError):
68
+ ThaNum.format_num(True) # type: ignore[arg-type]
69
+
70
+ def test_format_num_invalid_cast():
71
+ with pytest.raises(NumError):
72
+ ThaNum.format_num("1", cast="str")
73
+
74
+
75
+ # ---------------------------------------------------------------------------
76
+ # format_num_rows
77
+ # ---------------------------------------------------------------------------
78
+
79
+ def test_format_num_rows_basic(runner, rows):
80
+ result = runner.format_num_rows(rows, "Amount")
81
+ assert result[0]["Amount"] == 1234.56
82
+ assert result[1]["Amount"] == 2000.0
83
+ assert result[2]["Amount"] == 500.0
84
+
85
+ def test_format_num_rows_immutability(runner, rows):
86
+ original_val = rows[0]["Amount"]
87
+ runner.format_num_rows(rows, "Amount")
88
+ assert rows[0]["Amount"] == original_val
89
+
90
+ def test_format_num_rows_new_list(runner, rows):
91
+ result = runner.format_num_rows(rows, "Amount")
92
+ assert result is not rows
93
+
94
+ def test_format_num_rows_out_column(runner, rows):
95
+ result = runner.format_num_rows(rows, "Amount", out_column="Amount Parsed")
96
+ assert "Amount Parsed" in result[0]
97
+ assert result[0]["Amount"] == "$1,234.56"
98
+
99
+ def test_format_num_rows_stores_self_rows(runner, rows):
100
+ result = runner.format_num_rows(rows, "Amount")
101
+ assert runner.rows is result
102
+
103
+ def test_format_num_rows_cast_int(runner, rows):
104
+ result = runner.format_num_rows(rows, "Amount", cast="int")
105
+ assert result[0]["Amount"] == 1234
106
+ assert isinstance(result[0]["Amount"], int)
107
+
108
+ def test_format_num_rows_round_to(runner, rows):
109
+ result = runner.format_num_rows(rows, "Amount", round_to=1)
110
+ assert result[0]["Amount"] == 1234.6
111
+
112
+ def test_format_num_rows_on_error_error(runner):
113
+ bad_rows = [{"Amount": "N/A", "row status": "", "message": ""}]
114
+ result = runner.format_num_rows(bad_rows, "Amount")
115
+ assert result[0]["row status"] == "error"
116
+ assert result[0]["Amount"] == ""
117
+
118
+ def test_format_num_rows_on_error_skip(runner):
119
+ bad_rows = [{"Amount": "N/A", "row status": "", "message": ""}]
120
+ result = runner.format_num_rows(bad_rows, "Amount", on_error="skip")
121
+ assert result[0]["Amount"] == "N/A"
122
+ assert result[0]["row status"] == ""
123
+
124
+ def test_format_num_rows_on_error_blank(runner):
125
+ bad_rows = [{"Amount": "N/A", "row status": "", "message": ""}]
126
+ result = runner.format_num_rows(bad_rows, "Amount", on_error="blank")
127
+ assert result[0]["Amount"] == ""
128
+ assert result[0]["row status"] == ""
129
+
130
+ def test_format_num_rows_skip_statuses_default(runner, rows):
131
+ rows[0]["row status"] = "error"
132
+ result = runner.format_num_rows(rows, "Amount")
133
+ assert result[0]["Amount"] == "$1,234.56" # skipped
134
+ assert result[1]["Amount"] == 2000.0
135
+
136
+ def test_format_num_rows_skip_statuses_custom(runner, rows):
137
+ rows[0]["row status"] = "pending"
138
+ result = runner.format_num_rows(rows, "Amount", skip_statuses=["pending"])
139
+ assert result[0]["Amount"] == "$1,234.56" # skipped
140
+ assert result[1]["Amount"] == 2000.0
141
+
142
+ def test_format_num_rows_skip_statuses_empty(runner, rows):
143
+ rows[0]["row status"] = "error"
144
+ result = runner.format_num_rows(rows, "Amount", skip_statuses=[])
145
+ assert result[0]["Amount"] == 1234.56 # processed despite error status
146
+
147
+ def test_format_num_rows_invalid_on_error(runner, rows):
148
+ with pytest.raises(NumError):
149
+ runner.format_num_rows(rows, "Amount", on_error="raise")
150
+
151
+ def test_format_num_rows_empty_input(runner):
152
+ assert runner.format_num_rows([], "Amount") == []