tabcaddy 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. tabcaddy-0.1.0/.github/workflows/ci.yml +30 -0
  2. tabcaddy-0.1.0/.gitignore +31 -0
  3. tabcaddy-0.1.0/.pre-commit-config.yaml +18 -0
  4. tabcaddy-0.1.0/AGENTS.md +31 -0
  5. tabcaddy-0.1.0/LICENSE +174 -0
  6. tabcaddy-0.1.0/PKG-INFO +65 -0
  7. tabcaddy-0.1.0/README.md +47 -0
  8. tabcaddy-0.1.0/plan.md +1333 -0
  9. tabcaddy-0.1.0/pyproject.toml +37 -0
  10. tabcaddy-0.1.0/tabcaddy/__init__.py +3 -0
  11. tabcaddy-0.1.0/tabcaddy/__main__.py +5 -0
  12. tabcaddy-0.1.0/tabcaddy/application/compile_dataset.py +75 -0
  13. tabcaddy-0.1.0/tabcaddy/application/diff_datasets.py +24 -0
  14. tabcaddy-0.1.0/tabcaddy/application/generate_analysis.py +32 -0
  15. tabcaddy-0.1.0/tabcaddy/application/scaffold_transform.py +45 -0
  16. tabcaddy-0.1.0/tabcaddy/application/transform_dataset.py +101 -0
  17. tabcaddy-0.1.0/tabcaddy/cli/app.py +116 -0
  18. tabcaddy-0.1.0/tabcaddy/domain/__init__.py +25 -0
  19. tabcaddy-0.1.0/tabcaddy/domain/models.py +89 -0
  20. tabcaddy-0.1.0/tabcaddy/domain/serialization.py +132 -0
  21. tabcaddy-0.1.0/tabcaddy/infrastructure/analysis_builder.py +282 -0
  22. tabcaddy-0.1.0/tabcaddy/infrastructure/cache_manager.py +73 -0
  23. tabcaddy-0.1.0/tabcaddy/infrastructure/compiled_dataset_differ.py +22 -0
  24. tabcaddy-0.1.0/tabcaddy/infrastructure/csv_reader.py +13 -0
  25. tabcaddy-0.1.0/tabcaddy/infrastructure/csv_writer.py +10 -0
  26. tabcaddy-0.1.0/tabcaddy/infrastructure/diff_support.py +104 -0
  27. tabcaddy-0.1.0/tabcaddy/infrastructure/feather_reader.py +13 -0
  28. tabcaddy-0.1.0/tabcaddy/infrastructure/feather_writer.py +10 -0
  29. tabcaddy-0.1.0/tabcaddy/infrastructure/file_differ.py +19 -0
  30. tabcaddy-0.1.0/tabcaddy/infrastructure/folder_differ.py +46 -0
  31. tabcaddy-0.1.0/tabcaddy/infrastructure/metadata_builder.py +28 -0
  32. tabcaddy-0.1.0/tabcaddy/infrastructure/parquet_dataset_reader.py +28 -0
  33. tabcaddy-0.1.0/tabcaddy/infrastructure/parquet_dataset_writer.py +19 -0
  34. tabcaddy-0.1.0/tabcaddy/infrastructure/schema_analyzer.py +118 -0
  35. tabcaddy-0.1.0/tabcaddy/infrastructure/source_resolver.py +46 -0
  36. tabcaddy-0.1.0/tabcaddy/infrastructure/transform_loader.py +51 -0
  37. tabcaddy-0.1.0/tabcaddy/rendering/charts/bar_chart.py +15 -0
  38. tabcaddy-0.1.0/tabcaddy/rendering/charts/line_chart.py +11 -0
  39. tabcaddy-0.1.0/tabcaddy/rendering/console.py +19 -0
  40. tabcaddy-0.1.0/tabcaddy/rendering/views/diff.py +28 -0
  41. tabcaddy-0.1.0/tabcaddy/rendering/views/schema.py +68 -0
  42. tabcaddy-0.1.0/tabcaddy/rendering/views/summary.py +102 -0
  43. tabcaddy-0.1.0/tests/conftest.py +60 -0
  44. tabcaddy-0.1.0/tests/snapshots/summary_output.txt +36 -0
  45. tabcaddy-0.1.0/tests/test_analysis_and_cache.py +39 -0
  46. tabcaddy-0.1.0/tests/test_cli.py +72 -0
  47. tabcaddy-0.1.0/tests/test_cli_integration.py +61 -0
  48. tabcaddy-0.1.0/tests/test_core.py +104 -0
  49. tabcaddy-0.1.0/tests/test_diff.py +31 -0
  50. tabcaddy-0.1.0/tests/test_render_snapshots.py +94 -0
  51. tabcaddy-0.1.0/tests/test_rendering.py +58 -0
  52. tabcaddy-0.1.0/tests/test_schema_analyzer.py +20 -0
  53. tabcaddy-0.1.0/uv.lock +540 -0
@@ -0,0 +1,30 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+
9
+ jobs:
10
+ pre-commit:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.13"
17
+ - uses: pre-commit/action@v3.0.1
18
+
19
+ tests:
20
+ runs-on: ubuntu-latest
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+ - uses: astral-sh/setup-uv@v5
24
+ - uses: actions/setup-python@v5
25
+ with:
26
+ python-version: "3.13"
27
+ - name: Sync dependencies
28
+ run: uv sync --group dev
29
+ - name: Run tests
30
+ run: uv run pytest
@@ -0,0 +1,31 @@
1
+ # Python bytecode and caches
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+
6
+ # Virtual environments
7
+ .venv/
8
+ .env
9
+ .env.*
10
+
11
+ # Tool caches
12
+ .pytest_cache/
13
+ .ruff_cache/
14
+ .mypy_cache/
15
+ .coverage
16
+ .coverage.*
17
+ htmlcov/
18
+
19
+ # Build artifacts
20
+ build/
21
+ dist/
22
+ *.egg-info/
23
+
24
+ # Local dataset and project outputs
25
+ .tabcaddy/
26
+
27
+ # Editor and OS files
28
+ .vscode/
29
+ .idea/
30
+ Thumbs.db
31
+ .DS_Store
@@ -0,0 +1,18 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: check-added-large-files
6
+ - id: check-merge-conflict
7
+ - id: check-toml
8
+ - id: check-yaml
9
+ - id: end-of-file-fixer
10
+ - id: mixed-line-ending
11
+ - id: trailing-whitespace
12
+
13
+ - repo: https://github.com/astral-sh/ruff-pre-commit
14
+ rev: v0.11.13
15
+ hooks:
16
+ - id: ruff
17
+ args: [--fix]
18
+ - id: ruff-format
@@ -0,0 +1,31 @@
1
+ # Implementation Guidelines
2
+
3
+ ## Code Style
4
+
5
+ - ALWAYS PRODUCE PROFESSIONAL, STATE-OF-THE ART AND WELL-ENGINEERED CODE!
6
+ - Prefer lean Python code with clear local flow over extra abstraction.
7
+ - Avoid deep call stacks and multiple layers of indirection.
8
+ - Do not introduce helper functions that have one call site and add little semantic value. Do not add "shallow" few-line wrappers.
9
+ - Avoid temporary internal dataclasses, tuples, or helper objects whose only job is to shuttle a few computed values to one consumer.
10
+ - Keep changes minimal and targeted.
11
+ - When fixing a bug or adressing a code review comment, do the change precisely and to the smallest possible scope that fully addresses the issue.
12
+ - Avoid "hacks" that introduce technical debt or reduce code clarity.
13
+
14
+ ## Comments and docstrings
15
+ - Add compact comments to clarify non-obvious code, especially if it contains non-trivial logic or policy.
16
+ - IMPORTANT: Don't remove existing inline comments unless they are incorrect! Check them for accuracy and update them if needed!
17
+
18
+ ## Architecture
19
+
20
+ - The code should be well organized into modules that separate concerns.
21
+ - Maintainability is key! Avoid over-engineering or premature abstraction.
22
+ - Extract a helper only when at least one of these is true:
23
+ - the logic is reused
24
+ - the logic carries real policy that benefits from a name
25
+ - the logic materially improves testability
26
+ - the logic hides non-trivial complexity
27
+
28
+ ## After completing implementation
29
+
30
+ - Check the implementation for any violations of the above guidelines and refactor all found violations as needed.
31
+ - Iterate checking and refactoring until no meaningful violations remain.
tabcaddy-0.1.0/LICENSE ADDED
@@ -0,0 +1,174 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction, and
10
+ distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by the copyright
13
+ owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all other
16
+ entities that control, are controlled by, or are under common control with that
17
+ entity. For the purposes of this definition, "control" means (i) the power,
18
+ direct or indirect, to cause the direction or management of such entity,
19
+ whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or
20
+ more of the outstanding shares, or (iii) beneficial ownership of such entity.
21
+
22
+ "You" (or "Your") shall mean an individual or Legal Entity exercising
23
+ permissions granted by this License.
24
+
25
+ "Source" form shall mean the preferred form for making modifications,
26
+ including but not limited to software source code, documentation source, and
27
+ configuration files.
28
+
29
+ "Object" form shall mean any form resulting from mechanical transformation or
30
+ translation of a Source form, including but not limited to compiled object
31
+ code, generated documentation, and conversions to other media types.
32
+
33
+ "Work" shall mean the work of authorship, whether in Source or Object form,
34
+ made available under the License, as indicated by a copyright notice that is
35
+ included in or attached to the work (an example is provided in the Appendix
36
+ below).
37
+
38
+ "Derivative Works" shall mean any work, whether in Source or Object form, that
39
+ is based on (or derived from) the Work and for which the editorial revisions,
40
+ annotations, elaborations, or other modifications represent, as a whole, an
41
+ original work of authorship. For the purposes of this License, Derivative Works
42
+ shall not include works that remain separable from, or merely link (or bind by
43
+ name) to the interfaces of, the Work and Derivative Works thereof.
44
+
45
+ "Contribution" shall mean any work of authorship, including the original
46
+ version of the Work and any modifications or additions to that Work or
47
+ Derivative Works thereof, that is intentionally submitted to Licensor for
48
+ inclusion in the Work by the copyright owner or by an individual or Legal
49
+ Entity authorized to submit on behalf of the copyright owner. For the purposes
50
+ of this definition, "submitted" means any form of electronic, verbal, or
51
+ written communication sent to the Licensor or its representatives, including
52
+ but not limited to communication on electronic mailing lists, source code
53
+ control systems, and issue tracking systems that are managed by, or on behalf
54
+ of, the Licensor for the purpose of discussing and improving the Work, but
55
+ excluding communication that is conspicuously marked or otherwise designated in
56
+ writing by the copyright owner as "Not a Contribution."
57
+
58
+ "Contributor" shall mean Licensor and any individual or Legal Entity on behalf
59
+ of whom a Contribution has been received by Licensor and subsequently
60
+ incorporated within the Work.
61
+
62
+ 2. Grant of Copyright License. Subject to the terms and conditions of this
63
+ License, each Contributor hereby grants to You a perpetual, worldwide,
64
+ non-exclusive, no-charge, royalty-free, irrevocable copyright license to
65
+ reproduce, prepare Derivative Works of, publicly display, publicly perform,
66
+ sublicense, and distribute the Work and such Derivative Works in Source or
67
+ Object form.
68
+
69
+ 3. Grant of Patent License. Subject to the terms and conditions of this
70
+ License, each Contributor hereby grants to You a perpetual, worldwide,
71
+ non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
72
+ section) patent license to make, have made, use, offer to sell, sell, import,
73
+ and otherwise transfer the Work, where such license applies only to those
74
+ patent claims licensable by such Contributor that are necessarily infringed by
75
+ their Contribution(s) alone or by combination of their Contribution(s) with the
76
+ Work to which such Contribution(s) was submitted. If You institute patent
77
+ litigation against any entity (including a cross-claim or counterclaim in a
78
+ lawsuit) alleging that the Work or a Contribution incorporated within the Work
79
+ constitutes direct or contributory patent infringement, then any patent
80
+ licenses granted to You under this License for that Work shall terminate as of
81
+ the date such litigation is filed.
82
+
83
+ 4. Redistribution. You may reproduce and distribute copies of the Work or
84
+ Derivative Works thereof in any medium, with or without modifications, and in
85
+ Source or Object form, provided that You meet the following conditions:
86
+
87
+ (a) You must give any other recipients of the Work or Derivative Works a copy
88
+ of this License; and
89
+
90
+ (b) You must cause any modified files to carry prominent notices stating that
91
+ You changed the files; and
92
+
93
+ (c) You must retain, in the Source form of any Derivative Works that You
94
+ distribute, all copyright, patent, trademark, and attribution notices from the
95
+ Source form of the Work, excluding those notices that do not pertain to any
96
+ part of the Derivative Works; and
97
+
98
+ (d) If the Work includes a "NOTICE" text file as part of its distribution, then
99
+ any Derivative Works that You distribute must include a readable copy of the
100
+ attribution notices contained within such NOTICE file, excluding those notices
101
+ that do not pertain to any part of the Derivative Works, in at least one of the
102
+ following places: within a NOTICE text file distributed as part of the
103
+ Derivative Works; within the Source form or documentation, if provided along
104
+ with the Derivative Works; or, within a display generated by the Derivative
105
+ Works, if and wherever such third-party notices normally appear. The contents
106
+ of the NOTICE file are for informational purposes only and do not modify the
107
+ License. You may add Your own attribution notices within Derivative Works that
108
+ You distribute, alongside or as an addendum to the NOTICE text from the Work,
109
+ provided that such additional attribution notices cannot be construed as
110
+ modifying the License.
111
+
112
+ You may add Your own copyright statement to Your modifications and may provide
113
+ additional or different license terms and conditions for use, reproduction, or
114
+ distribution of Your modifications, or for any such Derivative Works as a
115
+ whole, provided Your use, reproduction, and distribution of the Work otherwise
116
+ complies with the conditions stated in this License.
117
+
118
+ 5. Submission of Contributions. Unless You explicitly state otherwise, any
119
+ Contribution intentionally submitted for inclusion in the Work by You to the
120
+ Licensor shall be under the terms and conditions of this License, without any
121
+ additional terms or conditions. Notwithstanding the above, nothing herein shall
122
+ supersede or modify the terms of any separate license agreement you may have
123
+ executed with Licensor regarding such Contributions.
124
+
125
+ 6. Trademarks. This License does not grant permission to use the trade names,
126
+ trademarks, service marks, or product names of the Licensor, except as required
127
+ for reasonable and customary use in describing the origin of the Work and
128
+ reproducing the content of the NOTICE file.
129
+
130
+ 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
131
+ writing, Licensor provides the Work (and each Contributor provides its
132
+ Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
133
+ KIND, either express or implied, including, without limitation, any warranties
134
+ or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
135
+ PARTICULAR PURPOSE. You are solely responsible for determining the
136
+ appropriateness of using or redistributing the Work and assume any risks
137
+ associated with Your exercise of permissions under this License.
138
+
139
+ 8. Limitation of Liability. In no event and under no legal theory, whether in
140
+ tort (including negligence), contract, or otherwise, unless required by
141
+ applicable law (such as deliberate and grossly negligent acts) or agreed to in
142
+ writing, shall any Contributor be liable to You for damages, including any
143
+ direct, indirect, special, incidental, or consequential damages of any
144
+ character arising as a result of this License or out of the use or inability to
145
+ use the Work (including but not limited to damages for loss of goodwill, work
146
+ stoppage, computer failure or malfunction, or any and all other commercial
147
+ damages or losses), even if such Contributor has been advised of the
148
+ possibility of such damages.
149
+
150
+ 9. Accepting Warranty or Additional Liability. While redistributing the Work or
151
+ Derivative Works thereof, You may choose to offer, and charge a fee for,
152
+ acceptance of support, warranty, indemnity, or other liability obligations
153
+ and/or rights consistent with this License. However, in accepting such
154
+ obligations, You may act only on Your own behalf and on Your sole
155
+ responsibility, not on behalf of any other Contributor, and only if You agree
156
+ to indemnify, defend, and hold each Contributor harmless for any liability
157
+ incurred by, or claims asserted against, such Contributor by reason of your
158
+ accepting any such warranty or additional liability.
159
+
160
+ END OF TERMS AND CONDITIONS
161
+
162
+ Copyright 2026 Matthias Lenga
163
+
164
+ Licensed under the Apache License, Version 2.0 (the "License");
165
+ you may not use this file except in compliance with the License.
166
+ You may obtain a copy of the License at
167
+
168
+ http://www.apache.org/licenses/LICENSE-2.0
169
+
170
+ Unless required by applicable law or agreed to in writing, software
171
+ distributed under the License is distributed on an "AS IS" BASIS,
172
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
173
+ See the License for the specific language governing permissions and
174
+ limitations under the License.
@@ -0,0 +1,65 @@
1
+ Metadata-Version: 2.4
2
+ Name: tabcaddy
3
+ Version: 0.1.0
4
+ Summary: Dataset-centric CLI toolkit for exploring, compiling, transforming, and diffing tabular data
5
+ Author: Matthias Lenga
6
+ License-Expression: Apache-2.0
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.13
9
+ Requires-Dist: asciichartpy>=1.5.25
10
+ Requires-Dist: numpy>=2.4.6
11
+ Requires-Dist: polars>=1.41.2
12
+ Requires-Dist: pyarrow>=24.0.0
13
+ Requires-Dist: pydantic>=2.13.4
14
+ Requires-Dist: pytest>=9.0.3
15
+ Requires-Dist: rich>=15.0.0
16
+ Requires-Dist: typer>=0.26.7
17
+ Description-Content-Type: text/markdown
18
+
19
+ ## TabCaddy
20
+
21
+ [![CI](https://github.com/MatthiasLen/TabCaddy/actions/workflows/ci.yml/badge.svg)](https://github.com/MatthiasLen/TabCaddy/actions/workflows/ci.yml)
22
+
23
+ TabCaddy is a dataset-centric CLI for exploring, compiling, transforming, and diffing CSV, Feather, and compiled parquet datasets.
24
+
25
+ ### Commands
26
+
27
+ - `tabcaddy summary <source>`
28
+ - `tabcaddy schema <source>`
29
+ - `tabcaddy compile <folder> [--schema N]`
30
+ - `tabcaddy transform <input> <transform.py> [output]`
31
+ - `tabcaddy scaffold-transform <source> [--output transform_template.py]`
32
+ - `tabcaddy diff <left> <right> [--level metadata|statistics|full]`
33
+
34
+ ### Profiles
35
+
36
+ - `quick`: metadata and schema counts
37
+ - `standard`: metadata, schema overview, lightweight statistics
38
+ - `deep`: full statistics, uniqueness estimates, histograms, and column hashes
39
+
40
+ Run with the local virtual environment:
41
+
42
+ ```powershell
43
+ .\.venv\Scripts\python -m tabcaddy --help
44
+ ```
45
+
46
+ ### Development Checks
47
+
48
+ Install the dev tools and register the hooks:
49
+
50
+ ```powershell
51
+ uv sync --group dev
52
+ uv run pre-commit install
53
+ ```
54
+
55
+ Run the same checks locally that GitHub Actions runs:
56
+
57
+ ```powershell
58
+ uv run pre-commit run --all-files
59
+ ```
60
+
61
+ Run the test suite:
62
+
63
+ ```powershell
64
+ .\.venv\Scripts\python -m pytest
65
+ ```
@@ -0,0 +1,47 @@
1
+ ## TabCaddy
2
+
3
+ [![CI](https://github.com/MatthiasLen/TabCaddy/actions/workflows/ci.yml/badge.svg)](https://github.com/MatthiasLen/TabCaddy/actions/workflows/ci.yml)
4
+
5
+ TabCaddy is a dataset-centric CLI for exploring, compiling, transforming, and diffing CSV, Feather, and compiled parquet datasets.
6
+
7
+ ### Commands
8
+
9
+ - `tabcaddy summary <source>`
10
+ - `tabcaddy schema <source>`
11
+ - `tabcaddy compile <folder> [--schema N]`
12
+ - `tabcaddy transform <input> <transform.py> [output]`
13
+ - `tabcaddy scaffold-transform <source> [--output transform_template.py]`
14
+ - `tabcaddy diff <left> <right> [--level metadata|statistics|full]`
15
+
16
+ ### Profiles
17
+
18
+ - `quick`: metadata and schema counts
19
+ - `standard`: metadata, schema overview, lightweight statistics
20
+ - `deep`: full statistics, uniqueness estimates, histograms, and column hashes
21
+
22
+ Run with the local virtual environment:
23
+
24
+ ```powershell
25
+ .\.venv\Scripts\python -m tabcaddy --help
26
+ ```
27
+
28
+ ### Development Checks
29
+
30
+ Install the dev tools and register the hooks:
31
+
32
+ ```powershell
33
+ uv sync --group dev
34
+ uv run pre-commit install
35
+ ```
36
+
37
+ Run the same checks locally that GitHub Actions runs:
38
+
39
+ ```powershell
40
+ uv run pre-commit run --all-files
41
+ ```
42
+
43
+ Run the test suite:
44
+
45
+ ```powershell
46
+ .\.venv\Scripts\python -m pytest
47
+ ```