transformplan 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. transformplan-0.1.0/LICENSE +21 -0
  2. transformplan-0.1.0/PKG-INFO +151 -0
  3. transformplan-0.1.0/README.md +122 -0
  4. transformplan-0.1.0/pyproject.toml +138 -0
  5. transformplan-0.1.0/setup.cfg +4 -0
  6. transformplan-0.1.0/tests/test_chunking.py +861 -0
  7. transformplan-0.1.0/tests/test_column.py +352 -0
  8. transformplan-0.1.0/tests/test_core.py +405 -0
  9. transformplan-0.1.0/tests/test_datetime.py +342 -0
  10. transformplan-0.1.0/tests/test_filters.py +604 -0
  11. transformplan-0.1.0/tests/test_integration.py +340 -0
  12. transformplan-0.1.0/tests/test_map.py +343 -0
  13. transformplan-0.1.0/tests/test_map_encoding.py +515 -0
  14. transformplan-0.1.0/tests/test_math.py +356 -0
  15. transformplan-0.1.0/tests/test_math_scaling.py +389 -0
  16. transformplan-0.1.0/tests/test_protocol.py +487 -0
  17. transformplan-0.1.0/tests/test_rows.py +411 -0
  18. transformplan-0.1.0/tests/test_string.py +316 -0
  19. transformplan-0.1.0/tests/test_validation.py +670 -0
  20. transformplan-0.1.0/transformplan/__init__.py +52 -0
  21. transformplan-0.1.0/transformplan/chunking.py +611 -0
  22. transformplan-0.1.0/transformplan/core.py +667 -0
  23. transformplan-0.1.0/transformplan/filters.py +1049 -0
  24. transformplan-0.1.0/transformplan/plan.py +47 -0
  25. transformplan-0.1.0/transformplan/protocol.py +532 -0
  26. transformplan-0.1.0/transformplan/py.typed +0 -0
  27. transformplan-0.1.0/transformplan/validation.py +1579 -0
  28. transformplan-0.1.0/transformplan.egg-info/PKG-INFO +151 -0
  29. transformplan-0.1.0/transformplan.egg-info/SOURCES.txt +30 -0
  30. transformplan-0.1.0/transformplan.egg-info/dependency_links.txt +1 -0
  31. transformplan-0.1.0/transformplan.egg-info/requires.txt +3 -0
  32. transformplan-0.1.0/transformplan.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 TransformPlan Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,151 @@
1
+ Metadata-Version: 2.4
2
+ Name: transformplan
3
+ Version: 0.1.0
4
+ Summary: Safe, reproducible data transformations with built-in auditing and validation
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/limebit/transformplan
7
+ Project-URL: Repository, https://github.com/limebit/transformplan
8
+ Keywords: data,transformation,polars,pandas,audit,validation
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Topic :: Scientific/Engineering
20
+ Classifier: Topic :: Software Development
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: polars
26
+ Requires-Dist: pandas
27
+ Requires-Dist: pyarrow
28
+ Dynamic: license-file
29
+
30
+ <div align="center">
31
+ <img src="docs/assets/images/logo_wordmark_black.png" alt="TransformPlan" width="600">
32
+ </div>
33
+
34
+ <h1 align="center">TransformPlan: Auditable Data Transformation Pipelines</h1>
35
+
36
+ <div align="center">
37
+ <img src="https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue" alt="Python 3.10+">
38
+ <img src="./coverage.svg" alt="Coverage">
39
+ </div>
40
+
41
+ ## Features
42
+
43
+ - **Declarative transformations**: Build transformation pipelines using method chaining
44
+ - **Schema validation**: Validate operations before execution with dry-run capability
45
+ - **Audit trails**: Generate complete audit protocols with deterministic DataFrame hashing
46
+ - **Multi-backend support**: Works with both Polars (primary) and Pandas DataFrames
47
+ - **Serializable pipelines**: Save and load transformation plans as JSON
48
+
49
+ ## Quick Example
50
+
51
+ ```python
52
+ from transformplan import TransformPlan, Col
53
+
54
+ # Build readable pipelines with 75+ chainable operations
55
+ plan = (
56
+ TransformPlan()
57
+ # Standardize column names
58
+ .col_rename(column="PatientID", new_name="patient_id")
59
+ .col_rename(column="DOB", new_name="date_of_birth")
60
+ .str_strip(column="patient_id")
61
+
62
+ # Calculate derived values
63
+ .dt_age_years(column="date_of_birth", new_column="age")
64
+ .math_clamp(column="age", min_value=0, max_value=120)
65
+
66
+ # Categorize patients age
67
+ .map_discretize(column="age", bins=[18, 40, 65], labels=["young", "adult", "senior"], new_column="age_group")
68
+
69
+ # Filter and clean
70
+ .rows_filter(Col("age") >= 18)
71
+ .rows_drop_nulls(columns=["patient_id", "age"])
72
+ .col_drop(column="date_of_birth")
73
+ )
74
+
75
+ # Execute with schema validation — catch errors before they hit production
76
+ df_result, protocol = plan.process(df, validate=True)
77
+
78
+ # Serialize pipelines to JSON — version control your transformations
79
+ plan.to_json("patient_transform.json")
80
+
81
+ # Reload and reapply — reproducible results across environments
82
+ plan = TransformPlan.from_json("patient_transform.json")
83
+ df_result, protocol = plan.process(new_data)
84
+ ```
85
+
86
+ ### Full Audit Trail — Every Step Tracked and Hashed
87
+
88
+ ```python
89
+ protocol.print(show_params=False)
90
+ ```
91
+
92
+ ```
93
+ ======================================================================
94
+ TRANSFORM PROTOCOL
95
+ ======================================================================
96
+ Input: 1000 rows × 5 cols [a4f8b2c1]
97
+ Output: 847 rows × 5 cols [e7d3f9a2]
98
+ Total time: 0.0247s
99
+ ----------------------------------------------------------------------
100
+
101
+ # Operation Rows Cols Time Hash
102
+ ----------------------------------------------------------------------
103
+ 0 input 1000 5 - a4f8b2c1
104
+ 1 col_rename 1000 5 0.0012s b2e4a7f3
105
+ 2 col_rename 1000 5 0.0008s c9d1e5b8
106
+ 3 str_strip 1000 5 0.0013s c9d1e5b8 ○
107
+ 4 dt_age_years 1000 6 (+1) 0.0041s d4f2c8a1
108
+ 5 math_clamp 1000 6 0.0015s e1b7d3f9
109
+ 6 map_discretize 1000 7 (+1) 0.0028s f8a4c2e6
110
+ 7 rows_filter 858 (-142) 7 0.0037s a2e9f4b7
111
+ 8 rows_drop_nulls 847 (-11) 7 0.0019s b5c1d8e3
112
+ 9 col_drop 847 6 (-1) 0.0006s e7d3f9a2
113
+ ======================================================================
114
+ ○ = no effect (steps 3 did not change data)
115
+ ```
116
+
117
+ ## Available Operations
118
+
119
+ | Category | Description | Examples |
120
+ | ---------- | ------------------------- | ---------------------------------------------------------------------------- |
121
+ | **col\_** | Column operations | `col_rename`, `col_drop`, `col_cast`, `col_add`, `col_select` |
122
+ | **math\_** | Arithmetic & scaling | `math_add`, `math_multiply`, `math_standardize`, `math_minmax`, `math_clamp` |
123
+ | **rows\_** | Row filtering & reshaping | `rows_filter`, `rows_drop_nulls`, `rows_sort`, `rows_unique`, `rows_pivot` |
124
+ | **str\_** | String operations | `str_lower`, `str_upper`, `str_strip`, `str_replace`, `str_split` |
125
+ | **dt\_** | Datetime operations | `dt_year`, `dt_month`, `dt_parse`, `dt_age_years`, `dt_diff_days` |
126
+ | **map\_** | Value mapping & encoding | `map_values`, `map_discretize`, `map_onehot`, `map_ordinal` |
127
+
128
+ ## Installation
129
+
130
+ ```bash
131
+ pip install transformplan
132
+ ```
133
+
134
+ Or with uv:
135
+
136
+ ```bash
137
+ uv add transformplan
138
+ ```
139
+
140
+ ## Development Setup
141
+
142
+ ```bash
143
+ make install-dev # Install with dev dependencies
144
+ make test # Run the test suite
145
+ make lint # Run ruff linting and pyright type checking
146
+ make format # Fix import sorting and format code
147
+ ```
148
+
149
+ ## License
150
+
151
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,122 @@
1
+ <div align="center">
2
+ <img src="docs/assets/images/logo_wordmark_black.png" alt="TransformPlan" width="600">
3
+ </div>
4
+
5
+ <h1 align="center">TransformPlan: Auditable Data Transformation Pipelines</h1>
6
+
7
+ <div align="center">
8
+ <img src="https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue" alt="Python 3.10+">
9
+ <img src="./coverage.svg" alt="Coverage">
10
+ </div>
11
+
12
+ ## Features
13
+
14
+ - **Declarative transformations**: Build transformation pipelines using method chaining
15
+ - **Schema validation**: Validate operations before execution with dry-run capability
16
+ - **Audit trails**: Generate complete audit protocols with deterministic DataFrame hashing
17
+ - **Multi-backend support**: Works with both Polars (primary) and Pandas DataFrames
18
+ - **Serializable pipelines**: Save and load transformation plans as JSON
19
+
20
+ ## Quick Example
21
+
22
+ ```python
23
+ from transformplan import TransformPlan, Col
24
+
25
+ # Build readable pipelines with 75+ chainable operations
26
+ plan = (
27
+ TransformPlan()
28
+ # Standardize column names
29
+ .col_rename(column="PatientID", new_name="patient_id")
30
+ .col_rename(column="DOB", new_name="date_of_birth")
31
+ .str_strip(column="patient_id")
32
+
33
+ # Calculate derived values
34
+ .dt_age_years(column="date_of_birth", new_column="age")
35
+ .math_clamp(column="age", min_value=0, max_value=120)
36
+
37
+ # Categorize patients age
38
+ .map_discretize(column="age", bins=[18, 40, 65], labels=["young", "adult", "senior"], new_column="age_group")
39
+
40
+ # Filter and clean
41
+ .rows_filter(Col("age") >= 18)
42
+ .rows_drop_nulls(columns=["patient_id", "age"])
43
+ .col_drop(column="date_of_birth")
44
+ )
45
+
46
+ # Execute with schema validation — catch errors before they hit production
47
+ df_result, protocol = plan.process(df, validate=True)
48
+
49
+ # Serialize pipelines to JSON — version control your transformations
50
+ plan.to_json("patient_transform.json")
51
+
52
+ # Reload and reapply — reproducible results across environments
53
+ plan = TransformPlan.from_json("patient_transform.json")
54
+ df_result, protocol = plan.process(new_data)
55
+ ```
56
+
57
+ ### Full Audit Trail — Every Step Tracked and Hashed
58
+
59
+ ```python
60
+ protocol.print(show_params=False)
61
+ ```
62
+
63
+ ```
64
+ ======================================================================
65
+ TRANSFORM PROTOCOL
66
+ ======================================================================
67
+ Input: 1000 rows × 5 cols [a4f8b2c1]
68
+ Output: 847 rows × 5 cols [e7d3f9a2]
69
+ Total time: 0.0247s
70
+ ----------------------------------------------------------------------
71
+
72
+ # Operation Rows Cols Time Hash
73
+ ----------------------------------------------------------------------
74
+ 0 input 1000 5 - a4f8b2c1
75
+ 1 col_rename 1000 5 0.0012s b2e4a7f3
76
+ 2 col_rename 1000 5 0.0008s c9d1e5b8
77
+ 3 str_strip 1000 5 0.0013s c9d1e5b8 ○
78
+ 4 dt_age_years 1000 6 (+1) 0.0041s d4f2c8a1
79
+ 5 math_clamp 1000 6 0.0015s e1b7d3f9
80
+ 6 map_discretize 1000 7 (+1) 0.0028s f8a4c2e6
81
+ 7 rows_filter 858 (-142) 7 0.0037s a2e9f4b7
82
+ 8 rows_drop_nulls 847 (-11) 7 0.0019s b5c1d8e3
83
+ 9 col_drop 847 6 (-1) 0.0006s e7d3f9a2
84
+ ======================================================================
85
+ ○ = no effect (steps 3 did not change data)
86
+ ```
87
+
88
+ ## Available Operations
89
+
90
+ | Category | Description | Examples |
91
+ | ---------- | ------------------------- | ---------------------------------------------------------------------------- |
92
+ | **col\_** | Column operations | `col_rename`, `col_drop`, `col_cast`, `col_add`, `col_select` |
93
+ | **math\_** | Arithmetic & scaling | `math_add`, `math_multiply`, `math_standardize`, `math_minmax`, `math_clamp` |
94
+ | **rows\_** | Row filtering & reshaping | `rows_filter`, `rows_drop_nulls`, `rows_sort`, `rows_unique`, `rows_pivot` |
95
+ | **str\_** | String operations | `str_lower`, `str_upper`, `str_strip`, `str_replace`, `str_split` |
96
+ | **dt\_** | Datetime operations | `dt_year`, `dt_month`, `dt_parse`, `dt_age_years`, `dt_diff_days` |
97
+ | **map\_** | Value mapping & encoding | `map_values`, `map_discretize`, `map_onehot`, `map_ordinal` |
98
+
99
+ ## Installation
100
+
101
+ ```bash
102
+ pip install transformplan
103
+ ```
104
+
105
+ Or with uv:
106
+
107
+ ```bash
108
+ uv add transformplan
109
+ ```
110
+
111
+ ## Development Setup
112
+
113
+ ```bash
114
+ make install-dev # Install with dev dependencies
115
+ make test # Run the test suite
116
+ make lint # Run ruff linting and pyright type checking
117
+ make format # Fix import sorting and format code
118
+ ```
119
+
120
+ ## License
121
+
122
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,138 @@
1
+ [project]
2
+ name = "transformplan"
3
+ version = "0.1.0"
4
+ description = "Safe, reproducible data transformations with built-in auditing and validation"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = { text = "MIT" }
8
+ keywords = ["data", "transformation", "polars", "pandas", "audit", "validation"]
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "Intended Audience :: Developers",
12
+ "Intended Audience :: Science/Research",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Programming Language :: Python :: 3.13",
19
+ "Programming Language :: Python :: 3.14",
20
+ "Topic :: Scientific/Engineering",
21
+ "Topic :: Software Development",
22
+ "Typing :: Typed",
23
+ ]
24
+ dependencies = ["polars", "pandas", "pyarrow"]
25
+
26
+ [project.urls]
27
+ Homepage = "https://github.com/limebit/transformplan"
28
+ Repository = "https://github.com/limebit/transformplan"
29
+
30
+ [dependency-groups]
31
+ tests = ["pytest>=8.2.2", "pytest-cov>=7.0.0"]
32
+ docs = [
33
+ "mkdocs>=1.6.0",
34
+ "mkdocs-material>=9.5.0",
35
+ "mkdocstrings[python]>=0.24.0",
36
+ ]
37
+ dev = [
38
+ { include-group = "docs" },
39
+ { include-group = "tests" },
40
+ "ruff>=0.4.9",
41
+ "pyright==1.1.408",
42
+ ]
43
+
44
+ [build-system]
45
+ requires = ["setuptools", "wheel"]
46
+ build-backend = "setuptools.build_meta"
47
+
48
+ [tool.setuptools]
49
+ packages = ["transformplan"]
50
+
51
+ [tool.ruff]
52
+ exclude = ["__pycache__", ".git", ".github", ".ruff_cache", ".venv"]
53
+ line-length = 88
54
+
55
+ [tool.ruff.lint]
56
+ select = [
57
+ "E", # PEP 8 codestyle errors
58
+ "F", # pyflakes
59
+ "I", # isort
60
+ "N", # PEP 8 naming
61
+ "DOC", # Pydoc Linting (preview); complementary to "D"
62
+ "D", # Pydoc Style; PEP 257
63
+ "FA", # future annotations linting; PEP 563
64
+ "W", # pycodestyle warnings; PEP 8
65
+ "SIM", # flake8 simplify; simplify code
66
+ "ANN", # flake8 function annotations; PEP 3107
67
+ "B", # bugbear extension for flake8; opinionated, not based on any PEP
68
+ "C4", # list/set/dict comprehensions
69
+ "T10", # Check for debugging leftovers: pdb;idbp imports and set traces
70
+ "EM", # error messages
71
+ "LOG", # logging module usage linting
72
+ "G", # logging format strings
73
+ "T20", # print statements
74
+ "PYI", # lint stub files .pyi
75
+ "PT", # pytest linting
76
+ "RET", # return values
77
+ "TCH", # type checking
78
+ "PTH", # pathlib usage
79
+ "PERF", # performance linting
80
+ "FURB", # modern python code patterns
81
+ "RUF", # ruff specific rules
82
+ "FBT", # no bool as function param
83
+ "TD", # todo linting
84
+ "C90", # mccabe complexity
85
+ ]
86
+ preview = true
87
+ ignore = [
88
+ "E501", # Line length managed by formatter
89
+ # indentation linters conflicting with formatter:
90
+ "W191",
91
+ "E111",
92
+ "E114",
93
+ "E117",
94
+ "D206",
95
+ # quotation linters conflicting with formatter:
96
+ "D300",
97
+ "Q000",
98
+ "Q001",
99
+ "Q002",
100
+ "Q003",
101
+ # comma linters conflicting with formatter:
102
+ "COM812",
103
+ "COM819",
104
+ # string concatenation linters conflicting with formatter:
105
+ "ISC001",
106
+ "ISC002",
107
+ ]
108
+
109
+ [tool.ruff.lint.per-file-ignores]
110
+ "*/__init__.py" = ["D", "DOC"]
111
+ "tests/**" = ["D", "DOC", "ANN", "PT009", "FBT", "B905", "W505", "PT017"]
112
+
113
+ [tool.ruff.lint.pydocstyle]
114
+ convention = "google"
115
+
116
+ [tool.ruff.lint.pycodestyle]
117
+ max-doc-length = 88
118
+
119
+ [tool.ruff.format]
120
+ docstring-code-format = true
121
+ docstring-code-line-length = 88
122
+
123
+ [tool.pyright]
124
+ typeCheckingMode = "strict"
125
+ reportPrivateUsage = false
126
+ reportIncompatibleMethodOverride = false
127
+ reportUnknownArgumentType = false
128
+ reportUnknownVariableType = false
129
+ reportUnknownMemberType = false
130
+ reportMissingModuleSource = false
131
+ reportMissingTypeStubs = false
132
+ enableTypeIgnoreComments = false
133
+
134
+ [tool.pytest.ini_options]
135
+ filterwarnings = [
136
+ "error",
137
+ "ignore:.*`default` parameter for `replace` is deprecated.*:DeprecationWarning",
138
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+