tha-str-runner 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: ["main"]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ matrix:
13
+ python-version: ["3.10", "3.11", "3.12"]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v4
20
+ with:
21
+ version: "latest"
22
+
23
+ - name: Set up Python ${{ matrix.python-version }}
24
+ run: uv python install ${{ matrix.python-version }}
25
+
26
+ - name: Install dependencies
27
+ run: uv sync --extra dev --python ${{ matrix.python-version }}
28
+
29
+ - name: Lint
30
+ run: uv run ruff check src/ tests/
31
+
32
+ - name: Test
33
+ run: uv run pytest
34
+
35
+ - name: Type check
36
+ run: uv run mypy src/
@@ -0,0 +1,53 @@
1
+ name: Publish
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - name: Install uv
14
+ uses: astral-sh/setup-uv@v4
15
+ - name: Build
16
+ run: uv build
17
+ - name: Upload dist
18
+ uses: actions/upload-artifact@v4
19
+ with:
20
+ name: dist
21
+ path: dist/
22
+
23
+ publish-testpypi:
24
+ needs: build
25
+ runs-on: ubuntu-latest
26
+ environment: testpypi
27
+ permissions:
28
+ id-token: write
29
+ steps:
30
+ - name: Download dist
31
+ uses: actions/download-artifact@v4
32
+ with:
33
+ name: dist
34
+ path: dist/
35
+ - name: Publish to TestPyPI
36
+ uses: pypa/gh-action-pypi-publish@release/v1
37
+ with:
38
+ repository-url: https://test.pypi.org/legacy/
39
+
40
+ publish-pypi:
41
+ needs: publish-testpypi
42
+ runs-on: ubuntu-latest
43
+ environment: pypi
44
+ permissions:
45
+ id-token: write
46
+ steps:
47
+ - name: Download dist
48
+ uses: actions/download-artifact@v4
49
+ with:
50
+ name: dist
51
+ path: dist/
52
+ - name: Publish to PyPI
53
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,218 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+ # Temporary file for partial code execution
204
+ tempCodeRunnerFile.py
205
+
206
+ # Ruff stuff:
207
+ .ruff_cache/
208
+
209
+ # PyPI configuration file
210
+ .pypirc
211
+
212
+ # Marimo
213
+ marimo/_static/
214
+ marimo/_lsp/
215
+ __marimo__/
216
+
217
+ # Streamlit
218
+ .streamlit/secrets.toml
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Nathan Wright
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,191 @@
1
+ Metadata-Version: 2.4
2
+ Name: tha-str-runner
3
+ Version: 0.1.0
4
+ Summary: A small Python library that normalizes and slugifies strings — works on single values or CSV-style row dicts.
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Keywords: csv,normalize,rows,slugify,string
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Typing :: Typed
12
+ Requires-Python: >=3.10
13
+ Provides-Extra: dev
14
+ Requires-Dist: mypy; extra == 'dev'
15
+ Requires-Dist: pytest; extra == 'dev'
16
+ Requires-Dist: ruff; extra == 'dev'
17
+ Description-Content-Type: text/markdown
18
+
19
+ # tha-str-runner
20
+
21
+ [![CI](https://github.com/tha-guy-nate/tha-str-runner/actions/workflows/ci.yml/badge.svg)](https://github.com/tha-guy-nate/tha-str-runner/actions/workflows/ci.yml)
22
+
23
+ A small Python library that normalizes and slugifies strings — works on single values or CSV-style row dicts.
24
+
25
+ ## Install
26
+
27
+ ```bash
28
+ pip install tha-str-runner
29
+ ```
30
+
31
+ ## Quick start
32
+
33
+ ```python
34
+ from tha_str_runner import ThaStr
35
+
36
+ formatter = ThaStr()
37
+
38
+ # Single value — normalize
39
+ ThaStr.format_str(" Hello World ", case="lower") # "hello world"
40
+ ThaStr.format_str("foo & bar", replace={"&": "and"}) # "foo and bar"
41
+ ThaStr.format_str("foo123", replace={r"\d+": "#"}, regex=True) # "foo#"
42
+
43
+ # Single value — slugify
44
+ ThaStr.slugify("Hello World") # "hello-world"
45
+ ThaStr.slugify("café au lait") # "cafe-au-lait"
46
+ ThaStr.slugify("School 001", prefix="org-") # "org-school-001"
47
+
48
+ # Row dicts — normalize
49
+ rows = [
50
+ {"Org BK": "school-001", "Name": " Lincoln Elementary "},
51
+ {"Org BK": "school-002", "Name": "ROOSEVELT MIDDLE"},
52
+ ]
53
+
54
+ result = formatter.format_str_rows(rows, column="Name", case="title")
55
+ # [{"Org BK": "school-001", "Name": "Lincoln Elementary"}, ...]
56
+
57
+ # Row dicts — slugify (combine columns into a key)
58
+ result = formatter.slugify_rows(rows, columns=["Org BK", "Name"], out_column="Slug")
59
+ # [{"Org BK": "school-001", ..., "Slug": "school-001-lincoln-elementary"}, ...]
60
+ ```
61
+
62
+ ## API
63
+
64
+ ### `ThaStr`
65
+
66
+ ```python
67
+ ThaStr()
68
+ ```
69
+
70
+ ### `ThaStr.format_str()`
71
+
72
+ ```python
73
+ ThaStr.format_str(
74
+ value: str,
75
+ *,
76
+ strip: bool = True, # strip leading/trailing whitespace
77
+ case: str | None = None, # "upper" | "lower" | "title" | None
78
+ replace: dict[str, str] | None = None, # {old: new} substitutions
79
+ regex: bool = False, # treat replace keys as regex patterns
80
+ ) -> str
81
+ ```
82
+
83
+ Also callable as an instance method. Raises `StrError` for invalid `case`.
84
+
85
+ ### `formatter.format_str_rows()`
86
+
87
+ ```python
88
+ formatter.format_str_rows(
89
+ rows, # list of row dicts
90
+ column, # column containing strings
91
+ *,
92
+ strip=True,
93
+ case=None,
94
+ replace=None,
95
+ regex=False,
96
+ out_column=None, # write to a new column instead of overwriting
97
+ on_error="error", # "error" | "skip" | "blank"
98
+ skip_statuses=["error", "warning"],
99
+ ) -> list[dict]
100
+ ```
101
+
102
+ Results are also stored in `formatter.rows`.
103
+
104
+ #### `on_error`
105
+
106
+ | Value | Behaviour |
107
+ |---|---|
108
+ | `"error"` | `row status="error"`, `message=...`, output column set to `""` |
109
+ | `"skip"` | Row returned unchanged |
110
+ | `"blank"` | Output column set to `""`, row status untouched |
111
+
112
+ ### `ThaStr.slugify()`
113
+
114
+ ```python
115
+ ThaStr.slugify(
116
+ value: str,
117
+ *,
118
+ sep: str = "-", # separator between slug segments
119
+ prefix: str = "", # prepended to the slug
120
+ suffix: str = "", # appended to the slug
121
+ ) -> str
122
+ ```
123
+
124
+ Normalizes unicode to ASCII, lowercases, replaces non-alphanumeric runs with `sep`, and strips leading/trailing separators.
125
+
126
+ ```python
127
+ ThaStr.slugify("café au lait") # "cafe-au-lait"
128
+ ThaStr.slugify("Hello World", sep="_") # "hello_world"
129
+ ThaStr.slugify("abc", prefix="id-") # "id-abc"
130
+ ```
131
+
132
+ ### `formatter.slugify_rows()`
133
+
134
+ ```python
135
+ formatter.slugify_rows(
136
+ rows, # list of row dicts
137
+ columns, # column name, or list of column names to combine
138
+ out_column, # output column (always a new/derived column)
139
+ *,
140
+ sep="-",
141
+ prefix="",
142
+ suffix="",
143
+ on_error="error", # "error" | "skip" | "blank"
144
+ skip_statuses=["error", "warning"],
145
+ ) -> list[dict]
146
+ ```
147
+
148
+ When `columns` is a list, values are joined with `sep` before slugifying.
149
+
150
+ Results are also stored in `formatter.rows`.
151
+
152
+ ### Composing with `tha-csv-runner`
153
+
154
+ ```python
155
+ from tha_csv_runner import ThaCSV
156
+ from tha_str_runner import ThaStr
157
+
158
+ runner = ThaCSV()
159
+ runner.read("Step 1 of 3", "input.csv", ["Org BK", "Name"])
160
+
161
+ formatter = ThaStr()
162
+ enriched = formatter.format_str_rows(
163
+ rows=runner.rows,
164
+ column="Name",
165
+ case="title",
166
+ strip=True,
167
+ )
168
+ enriched = formatter.slugify_rows(
169
+ rows=enriched,
170
+ columns=["Org BK", "Name"],
171
+ out_column="Slug",
172
+ prefix="org-",
173
+ )
174
+
175
+ runner.write("Step 3 of 3", "output.csv", rows=enriched)
176
+ ```
177
+
178
+ ## Alternatives
179
+
180
+ This library is intentionally limited in scope — it normalizes a fixed set of common string operations and slugifies values, with row-level integration for the `tha-*` ecosystem. For more flexible string handling:
181
+
182
+ - [**python-slugify**](https://github.com/un33k/python-slugify) — robust slugification with full unicode transliteration support and configurable stopword removal
183
+ - [**stringcase**](https://github.com/okunishinishi/python-stringcase) — case conversion (camel, snake, pascal, etc.) beyond upper/lower/title
184
+ - [**ftfy**](https://ftfy.readthedocs.io) — fixes broken unicode text (mojibake, garbled encodings) before normalization
185
+ - [**regex**](https://pypi.org/project/regex/) (stdlib drop-in) — more powerful regex engine when `re` isn't enough for complex replace patterns
186
+
187
+ Choose this library when you want strip/case/replace normalization AND slugification AND per-row error capture that slots into the `tha-*` pipeline — no other single package gives you all three with the `row status` pattern.
188
+
189
+ ## License
190
+
191
+ MIT
@@ -0,0 +1,173 @@
1
+ # tha-str-runner
2
+
3
+ [![CI](https://github.com/tha-guy-nate/tha-str-runner/actions/workflows/ci.yml/badge.svg)](https://github.com/tha-guy-nate/tha-str-runner/actions/workflows/ci.yml)
4
+
5
+ A small Python library that normalizes and slugifies strings — works on single values or CSV-style row dicts.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install tha-str-runner
11
+ ```
12
+
13
+ ## Quick start
14
+
15
+ ```python
16
+ from tha_str_runner import ThaStr
17
+
18
+ formatter = ThaStr()
19
+
20
+ # Single value — normalize
21
+ ThaStr.format_str(" Hello World ", case="lower") # "hello world"
22
+ ThaStr.format_str("foo & bar", replace={"&": "and"}) # "foo and bar"
23
+ ThaStr.format_str("foo123", replace={r"\d+": "#"}, regex=True) # "foo#"
24
+
25
+ # Single value — slugify
26
+ ThaStr.slugify("Hello World") # "hello-world"
27
+ ThaStr.slugify("café au lait") # "cafe-au-lait"
28
+ ThaStr.slugify("School 001", prefix="org-") # "org-school-001"
29
+
30
+ # Row dicts — normalize
31
+ rows = [
32
+ {"Org BK": "school-001", "Name": " Lincoln Elementary "},
33
+ {"Org BK": "school-002", "Name": "ROOSEVELT MIDDLE"},
34
+ ]
35
+
36
+ result = formatter.format_str_rows(rows, column="Name", case="title")
37
+ # [{"Org BK": "school-001", "Name": "Lincoln Elementary"}, ...]
38
+
39
+ # Row dicts — slugify (combine columns into a key)
40
+ result = formatter.slugify_rows(rows, columns=["Org BK", "Name"], out_column="Slug")
41
+ # [{"Org BK": "school-001", ..., "Slug": "school-001-lincoln-elementary"}, ...]
42
+ ```
43
+
44
+ ## API
45
+
46
+ ### `ThaStr`
47
+
48
+ ```python
49
+ ThaStr()
50
+ ```
51
+
52
+ ### `ThaStr.format_str()`
53
+
54
+ ```python
55
+ ThaStr.format_str(
56
+ value: str,
57
+ *,
58
+ strip: bool = True, # strip leading/trailing whitespace
59
+ case: str | None = None, # "upper" | "lower" | "title" | None
60
+ replace: dict[str, str] | None = None, # {old: new} substitutions
61
+ regex: bool = False, # treat replace keys as regex patterns
62
+ ) -> str
63
+ ```
64
+
65
+ Also callable as an instance method. Raises `StrError` for invalid `case`.
66
+
67
+ ### `formatter.format_str_rows()`
68
+
69
+ ```python
70
+ formatter.format_str_rows(
71
+ rows, # list of row dicts
72
+ column, # column containing strings
73
+ *,
74
+ strip=True,
75
+ case=None,
76
+ replace=None,
77
+ regex=False,
78
+ out_column=None, # write to a new column instead of overwriting
79
+ on_error="error", # "error" | "skip" | "blank"
80
+ skip_statuses=["error", "warning"],
81
+ ) -> list[dict]
82
+ ```
83
+
84
+ Results are also stored in `formatter.rows`.
85
+
86
+ #### `on_error`
87
+
88
+ | Value | Behaviour |
89
+ |---|---|
90
+ | `"error"` | `row status="error"`, `message=...`, output column set to `""` |
91
+ | `"skip"` | Row returned unchanged |
92
+ | `"blank"` | Output column set to `""`, row status untouched |
93
+
94
+ ### `ThaStr.slugify()`
95
+
96
+ ```python
97
+ ThaStr.slugify(
98
+ value: str,
99
+ *,
100
+ sep: str = "-", # separator between slug segments
101
+ prefix: str = "", # prepended to the slug
102
+ suffix: str = "", # appended to the slug
103
+ ) -> str
104
+ ```
105
+
106
+ Normalizes unicode to ASCII, lowercases, replaces non-alphanumeric runs with `sep`, and strips leading/trailing separators.
107
+
108
+ ```python
109
+ ThaStr.slugify("café au lait") # "cafe-au-lait"
110
+ ThaStr.slugify("Hello World", sep="_") # "hello_world"
111
+ ThaStr.slugify("abc", prefix="id-") # "id-abc"
112
+ ```
113
+
114
+ ### `formatter.slugify_rows()`
115
+
116
+ ```python
117
+ formatter.slugify_rows(
118
+ rows, # list of row dicts
119
+ columns, # column name, or list of column names to combine
120
+ out_column, # output column (always a new/derived column)
121
+ *,
122
+ sep="-",
123
+ prefix="",
124
+ suffix="",
125
+ on_error="error", # "error" | "skip" | "blank"
126
+ skip_statuses=["error", "warning"],
127
+ ) -> list[dict]
128
+ ```
129
+
130
+ When `columns` is a list, values are joined with `sep` before slugifying.
131
+
132
+ Results are also stored in `formatter.rows`.
133
+
134
+ ### Composing with `tha-csv-runner`
135
+
136
+ ```python
137
+ from tha_csv_runner import ThaCSV
138
+ from tha_str_runner import ThaStr
139
+
140
+ runner = ThaCSV()
141
+ runner.read("Step 1 of 3", "input.csv", ["Org BK", "Name"])
142
+
143
+ formatter = ThaStr()
144
+ enriched = formatter.format_str_rows(
145
+ rows=runner.rows,
146
+ column="Name",
147
+ case="title",
148
+ strip=True,
149
+ )
150
+ enriched = formatter.slugify_rows(
151
+ rows=enriched,
152
+ columns=["Org BK", "Name"],
153
+ out_column="Slug",
154
+ prefix="org-",
155
+ )
156
+
157
+ runner.write("Step 3 of 3", "output.csv", rows=enriched)
158
+ ```
159
+
160
+ ## Alternatives
161
+
162
+ This library is intentionally limited in scope — it normalizes a fixed set of common string operations and slugifies values, with row-level integration for the `tha-*` ecosystem. For more flexible string handling:
163
+
164
+ - [**python-slugify**](https://github.com/un33k/python-slugify) — robust slugification with full unicode transliteration support and configurable stopword removal
165
+ - [**stringcase**](https://github.com/okunishinishi/python-stringcase) — case conversion (camel, snake, pascal, etc.) beyond upper/lower/title
166
+ - [**ftfy**](https://ftfy.readthedocs.io) — fixes broken unicode text (mojibake, garbled encodings) before normalization
167
+ - [**regex**](https://pypi.org/project/regex/) (stdlib drop-in) — more powerful regex engine when `re` isn't enough for complex replace patterns
168
+
169
+ Choose this library when you want strip/case/replace normalization AND slugification AND per-row error capture that slots into the `tha-*` pipeline — no other single package gives you all three with the `row status` pattern.
170
+
171
+ ## License
172
+
173
+ MIT
@@ -0,0 +1,30 @@
1
+ [project]
2
+ name = "tha-str-runner"
3
+ version = "0.1.0"
4
+ description = "A small Python library that normalizes and slugifies strings — works on single values or CSV-style row dicts."
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = { text = "MIT" }
8
+ keywords = ["string", "normalize", "slugify", "csv", "rows"]
9
+ classifiers = [
10
+ "Programming Language :: Python :: 3",
11
+ "License :: OSI Approved :: MIT License",
12
+ "Operating System :: OS Independent",
13
+ "Typing :: Typed",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ dev = ["pytest", "ruff", "mypy"]
18
+
19
+ [build-system]
20
+ requires = ["hatchling"]
21
+ build-backend = "hatchling.build"
22
+
23
+ [tool.hatch.build.targets.wheel]
24
+ packages = ["src/tha_str_runner"]
25
+
26
+ [tool.ruff]
27
+ line-length = 100
28
+
29
+ [tool.mypy]
30
+ strict = true
@@ -0,0 +1,7 @@
1
+ """tha-str-runner: normalize and slugify strings, on single values or row dicts."""
2
+
3
+ from .errors import StrError
4
+ from .runner import ThaStr
5
+
6
+ __version__ = "0.1.0"
7
+ __all__ = ["ThaStr", "StrError"]
@@ -0,0 +1,2 @@
1
+ class StrError(Exception):
2
+ """Raised for invalid tha-str-runner configuration."""
File without changes
@@ -0,0 +1,161 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import unicodedata
5
+ from typing import Any
6
+
7
+ from .errors import StrError
8
+
9
+
10
+ class ThaStr:
11
+ def __init__(self) -> None:
12
+ self.rows: list[dict[str, Any]] = []
13
+
14
+ # ------------------------------------------------------------------
15
+ # Single-value methods
16
+ # ------------------------------------------------------------------
17
+
18
+ @staticmethod
19
+ def format_str(
20
+ value: str,
21
+ *,
22
+ strip: bool = True,
23
+ case: str | None = None,
24
+ replace: dict[str, str] | None = None,
25
+ regex: bool = False,
26
+ ) -> str:
27
+ if case not in (None, "upper", "lower", "title"):
28
+ raise StrError(f"Invalid case {case!r} — must be 'upper', 'lower', 'title', or None")
29
+
30
+ result = value
31
+ if strip:
32
+ result = result.strip()
33
+ if case == "upper":
34
+ result = result.upper()
35
+ elif case == "lower":
36
+ result = result.lower()
37
+ elif case == "title":
38
+ result = result.title()
39
+ if replace:
40
+ for pattern, repl in replace.items():
41
+ if regex:
42
+ result = re.sub(pattern, repl, result)
43
+ else:
44
+ result = result.replace(pattern, repl)
45
+ return result
46
+
47
+ @staticmethod
48
+ def slugify(
49
+ value: str,
50
+ *,
51
+ sep: str = "-",
52
+ prefix: str = "",
53
+ suffix: str = "",
54
+ ) -> str:
55
+ normalized = unicodedata.normalize("NFKD", value)
56
+ ascii_str = normalized.encode("ascii", "ignore").decode("ascii")
57
+ lowered = ascii_str.lower()
58
+ slug = re.sub(r"[^a-z0-9]+", sep, lowered)
59
+ slug = slug.strip(sep)
60
+ return f"{prefix}{slug}{suffix}"
61
+
62
+ # ------------------------------------------------------------------
63
+ # Row methods
64
+ # ------------------------------------------------------------------
65
+
66
+ def format_str_rows(
67
+ self,
68
+ rows: list[dict[str, Any]],
69
+ column: str,
70
+ *,
71
+ strip: bool = True,
72
+ case: str | None = None,
73
+ replace: dict[str, str] | None = None,
74
+ regex: bool = False,
75
+ out_column: str | None = None,
76
+ on_error: str = "error",
77
+ skip_statuses: list[str] | None = None,
78
+ ) -> list[dict[str, Any]]:
79
+ if on_error not in ("error", "skip", "blank"):
80
+ raise StrError(f"Invalid on_error {on_error!r} — must be 'error', 'skip', or 'blank'")
81
+
82
+ _skip = ["error", "warning"] if skip_statuses is None else skip_statuses
83
+ target = out_column if out_column is not None else column
84
+ result = []
85
+
86
+ for row in rows:
87
+ if row.get("row status") in _skip:
88
+ result.append(dict(row))
89
+ continue
90
+
91
+ row_copy = dict(row)
92
+ try:
93
+ formatted = self.format_str(
94
+ row_copy[column],
95
+ strip=strip,
96
+ case=case,
97
+ replace=replace,
98
+ regex=regex,
99
+ )
100
+ row_copy[target] = formatted
101
+ except Exception as exc:
102
+ if on_error == "error":
103
+ row_copy[target] = ""
104
+ row_copy["row status"] = "error"
105
+ row_copy["message"] = str(exc)
106
+ elif on_error == "blank":
107
+ row_copy[target] = ""
108
+ # on_error == "skip": row unchanged
109
+
110
+ result.append(row_copy)
111
+
112
+ self.rows = result
113
+ return result
114
+
115
+ def slugify_rows(
116
+ self,
117
+ rows: list[dict[str, Any]],
118
+ columns: str | list[str],
119
+ out_column: str,
120
+ *,
121
+ sep: str = "-",
122
+ prefix: str = "",
123
+ suffix: str = "",
124
+ on_error: str = "error",
125
+ skip_statuses: list[str] | None = None,
126
+ ) -> list[dict[str, Any]]:
127
+ if on_error not in ("error", "skip", "blank"):
128
+ raise StrError(f"Invalid on_error {on_error!r} — must be 'error', 'skip', or 'blank'")
129
+
130
+ _skip = ["error", "warning"] if skip_statuses is None else skip_statuses
131
+ col_list = [columns] if isinstance(columns, str) else columns
132
+ result = []
133
+
134
+ for row in rows:
135
+ if row.get("row status") in _skip:
136
+ result.append(dict(row))
137
+ continue
138
+
139
+ row_copy = dict(row)
140
+ try:
141
+ values = []
142
+ for c in col_list:
143
+ val = row_copy[c]
144
+ if not isinstance(val, str):
145
+ raise TypeError(f"Column {c!r} value must be a string, got {type(val).__name__}")
146
+ values.append(val)
147
+ combined = sep.join(values)
148
+ row_copy[out_column] = self.slugify(combined, sep=sep, prefix=prefix, suffix=suffix)
149
+ except Exception as exc:
150
+ if on_error == "error":
151
+ row_copy[out_column] = ""
152
+ row_copy["row status"] = "error"
153
+ row_copy["message"] = str(exc)
154
+ elif on_error == "blank":
155
+ row_copy[out_column] = ""
156
+ # on_error == "skip": row unchanged
157
+
158
+ result.append(row_copy)
159
+
160
+ self.rows = result
161
+ return result
@@ -0,0 +1,16 @@
1
+ import pytest
2
+ from tha_str_runner import ThaStr
3
+
4
+
5
+ @pytest.fixture
6
+ def runner() -> ThaStr:
7
+ return ThaStr()
8
+
9
+
10
+ @pytest.fixture
11
+ def rows() -> list[dict]:
12
+ return [
13
+ {"id": "1", "Name": " Alice Smith ", "row status": "", "message": ""},
14
+ {"id": "2", "Name": "BOB JONES", "row status": "", "message": ""},
15
+ {"id": "3", "Name": "carol white", "row status": "", "message": ""},
16
+ ]
@@ -0,0 +1,182 @@
1
+ import pytest
2
+ from tha_str_runner import ThaStr, StrError
3
+
4
+
5
+ # ---------------------------------------------------------------------------
6
+ # format_str — single value
7
+ # ---------------------------------------------------------------------------
8
+
9
+ def test_format_str_strip():
10
+ assert ThaStr.format_str(" hello ") == "hello"
11
+
12
+ def test_format_str_no_strip():
13
+ assert ThaStr.format_str(" hello ", strip=False) == " hello "
14
+
15
+ def test_format_str_upper():
16
+ assert ThaStr.format_str("hello", case="upper") == "HELLO"
17
+
18
+ def test_format_str_lower():
19
+ assert ThaStr.format_str("HELLO", case="lower") == "hello"
20
+
21
+ def test_format_str_title():
22
+ assert ThaStr.format_str("hello world", case="title") == "Hello World"
23
+
24
+ def test_format_str_replace_literal():
25
+ assert ThaStr.format_str("hello world", replace={"world": "there"}) == "hello there"
26
+
27
+ def test_format_str_replace_regex():
28
+ assert ThaStr.format_str("foo123bar", replace={r"\d+": "#"}, regex=True) == "foo#bar"
29
+
30
+ def test_format_str_invalid_case():
31
+ with pytest.raises(StrError):
32
+ ThaStr.format_str("hello", case="sentence")
33
+
34
+ def test_format_str_combined():
35
+ assert ThaStr.format_str(" Hello World ", case="lower", replace={"world": "there"}) == "hello there"
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # slugify — single value
40
+ # ---------------------------------------------------------------------------
41
+
42
+ def test_slugify_basic():
43
+ assert ThaStr.slugify("Hello World") == "hello-world"
44
+
45
+ def test_slugify_special_chars():
46
+ assert ThaStr.slugify("foo & bar!") == "foo-bar"
47
+
48
+ def test_slugify_unicode():
49
+ assert ThaStr.slugify("café") == "cafe"
50
+
51
+ def test_slugify_custom_sep():
52
+ assert ThaStr.slugify("Hello World", sep="_") == "hello_world"
53
+
54
+ def test_slugify_prefix_suffix():
55
+ assert ThaStr.slugify("hello", prefix="pre-", suffix="-suf") == "pre-hello-suf"
56
+
57
+ def test_slugify_collapses_seps():
58
+ assert ThaStr.slugify("foo bar") == "foo-bar"
59
+
60
+ def test_slugify_strips_leading_trailing_sep():
61
+ assert ThaStr.slugify("--hello--") == "hello"
62
+
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # format_str_rows
66
+ # ---------------------------------------------------------------------------
67
+
68
+ def test_format_str_rows_basic(runner, rows):
69
+ result = runner.format_str_rows(rows, "Name", case="lower")
70
+ assert result[0]["Name"] == "alice smith"
71
+ assert result[1]["Name"] == "bob jones"
72
+
73
+ def test_format_str_rows_immutability(runner, rows):
74
+ original = [dict(r) for r in rows]
75
+ runner.format_str_rows(rows, "Name", case="upper")
76
+ assert rows[0]["Name"] == original[0]["Name"]
77
+
78
+ def test_format_str_rows_new_list(runner, rows):
79
+ result = runner.format_str_rows(rows, "Name")
80
+ assert result is not rows
81
+
82
+ def test_format_str_rows_out_column(runner, rows):
83
+ result = runner.format_str_rows(rows, "Name", case="lower", out_column="Name Lower")
84
+ assert "Name Lower" in result[0]
85
+ assert result[0]["Name"] == " Alice Smith "
86
+
87
+ def test_format_str_rows_stores_self_rows(runner, rows):
88
+ result = runner.format_str_rows(rows, "Name")
89
+ assert runner.rows is result
90
+
91
+ def test_format_str_rows_on_error_error(runner):
92
+ bad_rows = [{"Name": 123, "row status": "", "message": ""}]
93
+ result = runner.format_str_rows(bad_rows, "Name")
94
+ assert result[0]["row status"] == "error"
95
+ assert result[0]["Name"] == ""
96
+
97
+ def test_format_str_rows_on_error_skip(runner):
98
+ bad_rows = [{"Name": 123, "row status": "", "message": ""}]
99
+ result = runner.format_str_rows(bad_rows, "Name", on_error="skip")
100
+ assert result[0]["Name"] == 123
101
+ assert result[0]["row status"] == ""
102
+
103
+ def test_format_str_rows_on_error_blank(runner):
104
+ bad_rows = [{"Name": 123, "row status": "", "message": ""}]
105
+ result = runner.format_str_rows(bad_rows, "Name", on_error="blank")
106
+ assert result[0]["Name"] == ""
107
+ assert result[0]["row status"] == ""
108
+
109
+ def test_format_str_rows_skip_statuses_default(runner, rows):
110
+ rows[0]["row status"] = "error"
111
+ result = runner.format_str_rows(rows, "Name", case="upper")
112
+ assert result[0]["Name"] == " Alice Smith " # skipped
113
+ assert result[1]["Name"] == "BOB JONES"
114
+
115
+ def test_format_str_rows_skip_statuses_custom(runner, rows):
116
+ rows[0]["row status"] = "pending"
117
+ result = runner.format_str_rows(rows, "Name", case="upper", skip_statuses=["pending"])
118
+ assert result[0]["Name"] == " Alice Smith " # skipped
119
+ assert result[1]["Name"] == "BOB JONES"
120
+
121
+ def test_format_str_rows_skip_statuses_empty(runner, rows):
122
+ rows[0]["row status"] = "error"
123
+ result = runner.format_str_rows(rows, "Name", case="upper", skip_statuses=[])
124
+ assert result[0]["Name"] == "ALICE SMITH" # processed despite error status
125
+
126
+ def test_format_str_rows_invalid_on_error(runner, rows):
127
+ with pytest.raises(StrError):
128
+ runner.format_str_rows(rows, "Name", on_error="raise")
129
+
130
+
131
+ # ---------------------------------------------------------------------------
132
+ # slugify_rows
133
+ # ---------------------------------------------------------------------------
134
+
135
+ def test_slugify_rows_single_column(runner, rows):
136
+ result = runner.slugify_rows(rows, "Name", out_column="Slug")
137
+ assert result[0]["Slug"] == "alice-smith"
138
+
139
+ def test_slugify_rows_multiple_columns(runner):
140
+ rows = [{"First": "Alice", "Last": "Smith", "row status": "", "message": ""}]
141
+ result = runner.slugify_rows(rows, ["First", "Last"], out_column="Slug")
142
+ assert result[0]["Slug"] == "alice-smith"
143
+
144
+ def test_slugify_rows_immutability(runner, rows):
145
+ original = [dict(r) for r in rows]
146
+ runner.slugify_rows(rows, "Name", out_column="Slug")
147
+ assert "Slug" not in rows[0]
148
+ assert rows[0]["Name"] == original[0]["Name"]
149
+
150
+ def test_slugify_rows_new_list(runner, rows):
151
+ result = runner.slugify_rows(rows, "Name", out_column="Slug")
152
+ assert result is not rows
153
+
154
+ def test_slugify_rows_stores_self_rows(runner, rows):
155
+ result = runner.slugify_rows(rows, "Name", out_column="Slug")
156
+ assert runner.rows is result
157
+
158
+ def test_slugify_rows_prefix_suffix(runner):
159
+ rows = [{"Code": "abc 123", "row status": "", "message": ""}]
160
+ result = runner.slugify_rows(rows, "Code", out_column="Slug", prefix="id-", suffix="-v1")
161
+ assert result[0]["Slug"] == "id-abc-123-v1"
162
+
163
+ def test_slugify_rows_skip_statuses_default(runner, rows):
164
+ rows[0]["row status"] = "error"
165
+ result = runner.slugify_rows(rows, "Name", out_column="Slug")
166
+ assert "Slug" not in result[0]
167
+
168
+ def test_slugify_rows_on_error_error(runner):
169
+ bad_rows = [{"Name": None, "row status": "", "message": ""}]
170
+ result = runner.slugify_rows(bad_rows, "Name", out_column="Slug")
171
+ assert result[0]["row status"] == "error"
172
+ assert result[0]["Slug"] == ""
173
+
174
+ def test_slugify_rows_on_error_blank(runner):
175
+ bad_rows = [{"Name": None, "row status": "", "message": ""}]
176
+ result = runner.slugify_rows(bad_rows, "Name", out_column="Slug", on_error="blank")
177
+ assert result[0]["Slug"] == ""
178
+ assert result[0]["row status"] == ""
179
+
180
+ def test_slugify_rows_invalid_on_error(runner, rows):
181
+ with pytest.raises(StrError):
182
+ runner.slugify_rows(rows, "Name", out_column="Slug", on_error="raise")