tablesleuth 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tablesleuth-0.4.2/.gitattributes +24 -0
- tablesleuth-0.4.2/.github/workflows/ci.yml +80 -0
- tablesleuth-0.4.2/.github/workflows/publish.yml +43 -0
- tablesleuth-0.4.2/.gitignore +88 -0
- tablesleuth-0.4.2/.pre-commit-config.yaml +58 -0
- tablesleuth-0.4.2/CHANGELOG.md +179 -0
- tablesleuth-0.4.2/CONTRIBUTING.md +550 -0
- tablesleuth-0.4.2/DEVELOPMENT_SETUP.md +333 -0
- tablesleuth-0.4.2/LICENSE +19 -0
- tablesleuth-0.4.2/Makefile +98 -0
- tablesleuth-0.4.2/PKG-INFO +410 -0
- tablesleuth-0.4.2/QUICKSTART.md +438 -0
- tablesleuth-0.4.2/QUICK_REFERENCE.md +277 -0
- tablesleuth-0.4.2/README.md +347 -0
- tablesleuth-0.4.2/TABLESLEUTH_SETUP.md +443 -0
- tablesleuth-0.4.2/docs/ARCHITECTURE.md +1445 -0
- tablesleuth-0.4.2/docs/DEVELOPER_GUIDE.md +1251 -0
- tablesleuth-0.4.2/docs/EC2_DEPLOYMENT_GUIDE.md +765 -0
- tablesleuth-0.4.2/docs/GIZMOSQL_DEPLOYMENT_GUIDE.md +694 -0
- tablesleuth-0.4.2/docs/PERFORMANCE_PROFILING.md +578 -0
- tablesleuth-0.4.2/docs/USER_GUIDE.md +832 -0
- tablesleuth-0.4.2/docs/images/iceberg_compare.png +0 -0
- tablesleuth-0.4.2/docs/images/iceberg_data_sample.png +0 -0
- tablesleuth-0.4.2/docs/images/iceberg_deletes.png +0 -0
- tablesleuth-0.4.2/docs/images/iceberg_files.png +0 -0
- tablesleuth-0.4.2/docs/images/iceberg_overview.png +0 -0
- tablesleuth-0.4.2/docs/images/iceberg_performance_dropdown.png +0 -0
- tablesleuth-0.4.2/docs/images/iceberg_performance_filtered.png +0 -0
- tablesleuth-0.4.2/docs/images/iceberg_performance_sample.png +0 -0
- tablesleuth-0.4.2/docs/images/iceberg_properties.png +0 -0
- tablesleuth-0.4.2/docs/images/iceberg_schema.png +0 -0
- tablesleuth-0.4.2/docs/images/parquet_data_sample.png +0 -0
- tablesleuth-0.4.2/docs/images/parquet_file_detail.png +0 -0
- tablesleuth-0.4.2/docs/images/parquet_profile.png +0 -0
- tablesleuth-0.4.2/docs/images/parquet_row_groups.png +0 -0
- tablesleuth-0.4.2/docs/images/parquet_schema.png +0 -0
- tablesleuth-0.4.2/docs/images/parquet_structure.png +0 -0
- tablesleuth-0.4.2/examples/inspect_s3_tables.py +146 -0
- tablesleuth-0.4.2/pyproject.toml +224 -0
- tablesleuth-0.4.2/resources/README.md +106 -0
- tablesleuth-0.4.2/resources/config.json.template +10 -0
- tablesleuth-0.4.2/resources/tablesleuth_create_env.py +1042 -0
- tablesleuth-0.4.2/resources/tablesleuth_teardown_env.py +260 -0
- tablesleuth-0.4.2/src/tablesleuth/__init__.py +4 -0
- tablesleuth-0.4.2/src/tablesleuth/cli.py +829 -0
- tablesleuth-0.4.2/src/tablesleuth/config.py +122 -0
- tablesleuth-0.4.2/src/tablesleuth/exceptions.py +91 -0
- tablesleuth-0.4.2/src/tablesleuth/models/__init__.py +44 -0
- tablesleuth-0.4.2/src/tablesleuth/models/file_ref.py +34 -0
- tablesleuth-0.4.2/src/tablesleuth/models/iceberg.py +401 -0
- tablesleuth-0.4.2/src/tablesleuth/models/parquet.py +83 -0
- tablesleuth-0.4.2/src/tablesleuth/models/performance.py +61 -0
- tablesleuth-0.4.2/src/tablesleuth/models/profiling.py +26 -0
- tablesleuth-0.4.2/src/tablesleuth/models/snapshot.py +17 -0
- tablesleuth-0.4.2/src/tablesleuth/models/table.py +10 -0
- tablesleuth-0.4.2/src/tablesleuth/services/__init__.py +1 -0
- tablesleuth-0.4.2/src/tablesleuth/services/file_discovery.py +171 -0
- tablesleuth-0.4.2/src/tablesleuth/services/filesystem.py +127 -0
- tablesleuth-0.4.2/src/tablesleuth/services/formats/__init__.py +4 -0
- tablesleuth-0.4.2/src/tablesleuth/services/formats/base.py +20 -0
- tablesleuth-0.4.2/src/tablesleuth/services/formats/iceberg.py +275 -0
- tablesleuth-0.4.2/src/tablesleuth/services/iceberg_metadata_service.py +473 -0
- tablesleuth-0.4.2/src/tablesleuth/services/mor_service.py +76 -0
- tablesleuth-0.4.2/src/tablesleuth/services/parquet_service.py +422 -0
- tablesleuth-0.4.2/src/tablesleuth/services/profiling/__init__.py +5 -0
- tablesleuth-0.4.2/src/tablesleuth/services/profiling/backend_base.py +96 -0
- tablesleuth-0.4.2/src/tablesleuth/services/profiling/fake_backend.py +45 -0
- tablesleuth-0.4.2/src/tablesleuth/services/profiling/gizmo_duckdb.py +781 -0
- tablesleuth-0.4.2/src/tablesleuth/services/snapshot_performance_analyzer.py +173 -0
- tablesleuth-0.4.2/src/tablesleuth/services/snapshot_test_manager.py +209 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/__init__.py +3 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/app.py +606 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/__init__.py +22 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/data_sample_view.py +597 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/file_detail_view.py +163 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/file_list_view.py +149 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/iceberg_view.py +819 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/profile_view.py +470 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/row_groups_view.py +275 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/schema_view.py +614 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/snapshot_comparison_view.py +310 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/snapshot_detail_view.py +489 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/views/structure_view.py +354 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/widgets/__init__.py +6 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/widgets/loading.py +78 -0
- tablesleuth-0.4.2/src/tablesleuth/tui/widgets/notification.py +164 -0
- tablesleuth-0.4.2/src/tablesleuth/utils/__init__.py +1 -0
- tablesleuth-0.4.2/src/tablesleuth/utils/config_templates.py +253 -0
- tablesleuth-0.4.2/tablesleuth.toml +37 -0
- tablesleuth-0.4.2/tests/__init__.py +0 -0
- tablesleuth-0.4.2/tests/conftest.py +106 -0
- tablesleuth-0.4.2/tests/test_app_advanced.py +1259 -0
- tablesleuth-0.4.2/tests/test_app_caching.py +327 -0
- tablesleuth-0.4.2/tests/test_app_file_selection.py +269 -0
- tablesleuth-0.4.2/tests/test_app_layout.py +243 -0
- tablesleuth-0.4.2/tests/test_app_profiling.py +335 -0
- tablesleuth-0.4.2/tests/test_cli.py +106 -0
- tablesleuth-0.4.2/tests/test_cli_commands.py +562 -0
- tablesleuth-0.4.2/tests/test_cli_config_commands.py +473 -0
- tablesleuth-0.4.2/tests/test_config.py +221 -0
- tablesleuth-0.4.2/tests/test_data_sample_view.py +839 -0
- tablesleuth-0.4.2/tests/test_end_to_end.py +366 -0
- tablesleuth-0.4.2/tests/test_error_handling.py +183 -0
- tablesleuth-0.4.2/tests/test_fake_profiler.py +38 -0
- tablesleuth-0.4.2/tests/test_file_discovery.py +280 -0
- tablesleuth-0.4.2/tests/test_filesystem.py +210 -0
- tablesleuth-0.4.2/tests/test_filter_validation.py +89 -0
- tablesleuth-0.4.2/tests/test_gizmo_duckdb_sanitization.py +122 -0
- tablesleuth-0.4.2/tests/test_gizmo_duckdb_unit.py +567 -0
- tablesleuth-0.4.2/tests/test_gizmo_profiler_config.py +167 -0
- tablesleuth-0.4.2/tests/test_gizmosql_integration.py +217 -0
- tablesleuth-0.4.2/tests/test_iceberg_adapter.py +6 -0
- tablesleuth-0.4.2/tests/test_iceberg_adapter_helpers.py +175 -0
- tablesleuth-0.4.2/tests/test_iceberg_adapter_unit.py +605 -0
- tablesleuth-0.4.2/tests/test_iceberg_metadata_service.py +161 -0
- tablesleuth-0.4.2/tests/test_iceberg_metadata_service_unit.py +520 -0
- tablesleuth-0.4.2/tests/test_iceberg_models.py +518 -0
- tablesleuth-0.4.2/tests/test_iceberg_view.py +881 -0
- tablesleuth-0.4.2/tests/test_keybindings.py +345 -0
- tablesleuth-0.4.2/tests/test_mor_service.py +465 -0
- tablesleuth-0.4.2/tests/test_parquet_inspector.py +392 -0
- tablesleuth-0.4.2/tests/test_parquet_profiling_integration.py +125 -0
- tablesleuth-0.4.2/tests/test_performance_models.py +211 -0
- tablesleuth-0.4.2/tests/test_profile_view.py +430 -0
- tablesleuth-0.4.2/tests/test_profiling_backend.py +325 -0
- tablesleuth-0.4.2/tests/test_row_groups_view.py +349 -0
- tablesleuth-0.4.2/tests/test_s3_tables_arn.py +131 -0
- tablesleuth-0.4.2/tests/test_snapshot_comparison_view.py +425 -0
- tablesleuth-0.4.2/tests/test_snapshot_detail_view.py +533 -0
- tablesleuth-0.4.2/tests/test_snapshot_performance_analyzer.py +209 -0
- tablesleuth-0.4.2/tests/test_snapshot_performance_analyzer_unit.py +227 -0
- tablesleuth-0.4.2/tests/test_snapshot_test_manager.py +261 -0
- tablesleuth-0.4.2/tests/test_sql_injection_prevention.py +167 -0
- tablesleuth-0.4.2/tests/test_structure_integration.py +536 -0
- tablesleuth-0.4.2/tests/test_structure_view.py +439 -0
- tablesleuth-0.4.2/tests/test_tui_smoke.py +12 -0
- tablesleuth-0.4.2/uv.lock +2226 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Auto detect text files and normalize line endings to LF in the repository
|
|
2
|
+
* text=auto eol=lf
|
|
3
|
+
|
|
4
|
+
# Explicitly declare text files you want to always be normalized and converted
|
|
5
|
+
# to native line endings on checkout
|
|
6
|
+
*.py text eol=lf
|
|
7
|
+
*.md text eol=lf
|
|
8
|
+
*.yml text eol=lf
|
|
9
|
+
*.yaml text eol=lf
|
|
10
|
+
*.toml text eol=lf
|
|
11
|
+
*.json text eol=lf
|
|
12
|
+
*.txt text eol=lf
|
|
13
|
+
*.sh text eol=lf
|
|
14
|
+
Makefile text eol=lf
|
|
15
|
+
|
|
16
|
+
# Denote all files that are truly binary and should not be modified
|
|
17
|
+
*.png binary
|
|
18
|
+
*.jpg binary
|
|
19
|
+
*.jpeg binary
|
|
20
|
+
*.gif binary
|
|
21
|
+
*.ico binary
|
|
22
|
+
*.whl binary
|
|
23
|
+
*.gz binary
|
|
24
|
+
*.zip binary
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main, develop ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main, develop ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
15
|
+
python-version: ['3.13', '3.14']
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: ${{ matrix.python-version }}
|
|
24
|
+
|
|
25
|
+
- name: Install uv
|
|
26
|
+
run: pip install uv
|
|
27
|
+
|
|
28
|
+
- name: Install dependencies
|
|
29
|
+
run: uv sync --extra dev
|
|
30
|
+
|
|
31
|
+
- name: Run linter
|
|
32
|
+
run: uv run ruff check .
|
|
33
|
+
|
|
34
|
+
- name: Run formatter check
|
|
35
|
+
run: uv run ruff format --check .
|
|
36
|
+
|
|
37
|
+
- name: Run type checker
|
|
38
|
+
run: uv run mypy src
|
|
39
|
+
|
|
40
|
+
- name: Run security scan
|
|
41
|
+
run: uv run bandit -c pyproject.toml -r src/
|
|
42
|
+
|
|
43
|
+
- name: Run tests
|
|
44
|
+
run: uv run pytest --cov=tablesleuth --cov-report=xml --cov-report=term
|
|
45
|
+
|
|
46
|
+
- name: Upload coverage to Codecov
|
|
47
|
+
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
|
|
48
|
+
uses: codecov/codecov-action@v4
|
|
49
|
+
with:
|
|
50
|
+
file: ./coverage.xml
|
|
51
|
+
fail_ci_if_error: false
|
|
52
|
+
|
|
53
|
+
build:
|
|
54
|
+
runs-on: ubuntu-latest
|
|
55
|
+
needs: test
|
|
56
|
+
|
|
57
|
+
steps:
|
|
58
|
+
- uses: actions/checkout@v4
|
|
59
|
+
|
|
60
|
+
- name: Set up Python
|
|
61
|
+
uses: actions/setup-python@v5
|
|
62
|
+
with:
|
|
63
|
+
python-version: '3.13'
|
|
64
|
+
|
|
65
|
+
- name: Install uv
|
|
66
|
+
run: pip install uv
|
|
67
|
+
|
|
68
|
+
- name: Build package
|
|
69
|
+
run: uv build
|
|
70
|
+
|
|
71
|
+
- name: Check package
|
|
72
|
+
run: |
|
|
73
|
+
pip install twine
|
|
74
|
+
twine check dist/*
|
|
75
|
+
|
|
76
|
+
- name: Upload artifacts
|
|
77
|
+
uses: actions/upload-artifact@v4
|
|
78
|
+
with:
|
|
79
|
+
name: dist
|
|
80
|
+
path: dist/
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment:
|
|
14
|
+
name: pypi
|
|
15
|
+
url: https://pypi.org/p/TableSleuth
|
|
16
|
+
permissions:
|
|
17
|
+
id-token: write # Required for trusted publishing
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up Python
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: '3.13'
|
|
26
|
+
|
|
27
|
+
- name: Install uv
|
|
28
|
+
run: pip install uv
|
|
29
|
+
|
|
30
|
+
- name: Build package
|
|
31
|
+
run: uv build
|
|
32
|
+
|
|
33
|
+
- name: Verify version matches tag
|
|
34
|
+
run: |
|
|
35
|
+
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
|
36
|
+
PKG_VERSION=$(grep '^version = ' pyproject.toml | cut -d'"' -f2)
|
|
37
|
+
if [ "$TAG_VERSION" != "$PKG_VERSION" ]; then
|
|
38
|
+
echo "Tag version ($TAG_VERSION) does not match package version ($PKG_VERSION)"
|
|
39
|
+
exit 1
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
- name: Publish to PyPI
|
|
43
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Environment variables
|
|
2
|
+
.env
|
|
3
|
+
.env.local
|
|
4
|
+
|
|
5
|
+
# Python
|
|
6
|
+
__pycache__/
|
|
7
|
+
*.py[cod]
|
|
8
|
+
*$py.class
|
|
9
|
+
*.so
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
*.egg-info/
|
|
24
|
+
.installed.cfg
|
|
25
|
+
*.egg
|
|
26
|
+
|
|
27
|
+
# Virtual environments
|
|
28
|
+
venv/
|
|
29
|
+
env/
|
|
30
|
+
ENV/
|
|
31
|
+
.venv
|
|
32
|
+
|
|
33
|
+
# IDEs
|
|
34
|
+
.vscode/
|
|
35
|
+
.idea/
|
|
36
|
+
*.swp
|
|
37
|
+
*.swo
|
|
38
|
+
*~
|
|
39
|
+
|
|
40
|
+
# Logs
|
|
41
|
+
*.log
|
|
42
|
+
logs/
|
|
43
|
+
|
|
44
|
+
# Testing
|
|
45
|
+
.pytest_cache/
|
|
46
|
+
.coverage
|
|
47
|
+
htmlcov/
|
|
48
|
+
.tox/
|
|
49
|
+
coverage.xml
|
|
50
|
+
|
|
51
|
+
# Type checking
|
|
52
|
+
.mypy_cache/
|
|
53
|
+
.dmypy.json
|
|
54
|
+
dmypy.json
|
|
55
|
+
|
|
56
|
+
# Ruff
|
|
57
|
+
.ruff_cache/
|
|
58
|
+
|
|
59
|
+
# OS
|
|
60
|
+
.DS_Store
|
|
61
|
+
Thumbs.db
|
|
62
|
+
|
|
63
|
+
# Project specific
|
|
64
|
+
resources/config.json
|
|
65
|
+
*.pem
|
|
66
|
+
input/
|
|
67
|
+
.kiro/
|
|
68
|
+
.chroma/
|
|
69
|
+
.ruff_cache/
|
|
70
|
+
.mypy_cache/
|
|
71
|
+
*.zip
|
|
72
|
+
data/
|
|
73
|
+
scripts/
|
|
74
|
+
docs/archive/
|
|
75
|
+
resources/debug/
|
|
76
|
+
resources/docs/
|
|
77
|
+
debug/
|
|
78
|
+
.pyiceberg.yaml
|
|
79
|
+
|
|
80
|
+
# Gradio
|
|
81
|
+
gradio_cached_examples/
|
|
82
|
+
flagged/
|
|
83
|
+
|
|
84
|
+
# Docker
|
|
85
|
+
docker-compose.override.yml
|
|
86
|
+
*.tar.gz
|
|
87
|
+
backups/
|
|
88
|
+
ssl/
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Pre-commit hooks for table-sleuth
|
|
2
|
+
# Install: uv run pre-commit install
|
|
3
|
+
# Run manually: uv run pre-commit run --all-files
|
|
4
|
+
|
|
5
|
+
repos:
|
|
6
|
+
# Ruff - replaces black, isort, flake8
|
|
7
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
8
|
+
rev: v0.7.4
|
|
9
|
+
hooks:
|
|
10
|
+
# Run the linter
|
|
11
|
+
- id: ruff
|
|
12
|
+
args: [--fix]
|
|
13
|
+
# Run the formatter
|
|
14
|
+
- id: ruff-format
|
|
15
|
+
|
|
16
|
+
# mypy - type checking
|
|
17
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
18
|
+
rev: v1.13.0
|
|
19
|
+
hooks:
|
|
20
|
+
- id: mypy
|
|
21
|
+
additional_dependencies:
|
|
22
|
+
- types-toml
|
|
23
|
+
- types-pyyaml
|
|
24
|
+
- pydantic>=2.0
|
|
25
|
+
- textual>=0.47.0
|
|
26
|
+
- rich>=13.0.0
|
|
27
|
+
- pyarrow>=14.0.0
|
|
28
|
+
- pyiceberg>=0.6.0
|
|
29
|
+
- adbc-driver-flightsql>=0.8.0
|
|
30
|
+
- boto3>=1.34.0
|
|
31
|
+
- botocore>=1.34.0
|
|
32
|
+
- fsspec>=2023.0.0
|
|
33
|
+
- s3fs>=2023.0.0
|
|
34
|
+
args: [--config-file=pyproject.toml, src/]
|
|
35
|
+
pass_filenames: false
|
|
36
|
+
|
|
37
|
+
# Standard pre-commit hooks
|
|
38
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
39
|
+
rev: v5.0.0
|
|
40
|
+
hooks:
|
|
41
|
+
- id: trailing-whitespace
|
|
42
|
+
- id: end-of-file-fixer
|
|
43
|
+
- id: check-yaml
|
|
44
|
+
- id: check-toml
|
|
45
|
+
- id: check-added-large-files
|
|
46
|
+
args: [--maxkb=1000]
|
|
47
|
+
- id: check-merge-conflict
|
|
48
|
+
- id: check-case-conflict
|
|
49
|
+
- id: mixed-line-ending
|
|
50
|
+
args: [--fix=lf]
|
|
51
|
+
|
|
52
|
+
# Security scanning with bandit
|
|
53
|
+
- repo: https://github.com/PyCQA/bandit
|
|
54
|
+
rev: 1.7.10
|
|
55
|
+
hooks:
|
|
56
|
+
- id: bandit
|
|
57
|
+
args: [-c, pyproject.toml, -r, src/]
|
|
58
|
+
additional_dependencies: ["bandit[toml]"]
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [0.4.2] - 2026-01-17
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- **Configuration Management Commands**
|
|
9
|
+
- `tablesleuth init` - Interactive configuration file initialization
|
|
10
|
+
- Creates `tablesleuth.toml` and `.pyiceberg.yaml` with comprehensive templates
|
|
11
|
+
- Prompts for home directory (~/) or current directory (./) placement
|
|
12
|
+
- Includes `--force` flag to overwrite existing files
|
|
13
|
+
- Generates well-commented templates with multiple catalog examples
|
|
14
|
+
- `tablesleuth config-check` - Configuration validation and testing
|
|
15
|
+
- Validates all configuration files and syntax
|
|
16
|
+
- Tests GizmoSQL connection
|
|
17
|
+
- Checks PyIceberg catalog configuration
|
|
18
|
+
- Shows configuration precedence and active values
|
|
19
|
+
- Supports `-v/--verbose` flag for detailed output
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
- **Configuration File Locations** - Simplified configuration paths
|
|
23
|
+
- Removed `~/.config/tablesleuth/` directory approach
|
|
24
|
+
- Now supports: `./tablesleuth.toml` (local) and `~/tablesleuth.toml` (home)
|
|
25
|
+
- PyIceberg config: `./.pyiceberg.yaml` (local) and `~/.pyiceberg.yaml` (home)
|
|
26
|
+
- Respects `PYICEBERG_HOME` environment variable for PyIceberg config location
|
|
27
|
+
|
|
28
|
+
- **Configuration Priority** - Clear precedence order
|
|
29
|
+
1. Environment variables (`TABLESLEUTH_*`, `PYICEBERG_*`)
|
|
30
|
+
2. Local config files (current directory)
|
|
31
|
+
3. Home config files (home directory)
|
|
32
|
+
4. Built-in defaults
|
|
33
|
+
|
|
34
|
+
- **Environment Variable Support**
|
|
35
|
+
- `TABLESLEUTH_CONFIG` - Override config file path
|
|
36
|
+
- Existing: `TABLESLEUTH_CATALOG_NAME`, `TABLESLEUTH_GIZMO_*`
|
|
37
|
+
- PyIceberg native: `PYICEBERG_HOME`
|
|
38
|
+
|
|
39
|
+
- **Configuration File Renamed** - Consistency with package name
|
|
40
|
+
- `table_sleuth.toml` → `tablesleuth.toml`
|
|
41
|
+
- Updated all documentation and code references
|
|
42
|
+
|
|
43
|
+
### Fixed
|
|
44
|
+
- **Configuration Error Handling** - Improved error messages and handling
|
|
45
|
+
- Fixed unhandled `FileNotFoundError` in `inspect` and `iceberg` commands when `TABLESLEUTH_CONFIG` points to non-existent file
|
|
46
|
+
- Fixed unhandled exception in `config-check` command with invalid `TABLESLEUTH_CONFIG` environment variable
|
|
47
|
+
- Both commands now show helpful error messages suggesting `tablesleuth init` instead of tracebacks
|
|
48
|
+
- Added proper try-except blocks around `load_config()` calls in main CLI commands
|
|
49
|
+
|
|
50
|
+
- **Configuration Template TOML Syntax** - Fixed invalid TOML in generated config
|
|
51
|
+
- Changed `default = null` to commented `# default = ""` (TOML doesn't support null type)
|
|
52
|
+
- Generated config files now parse correctly without `TOMLDecodeError`
|
|
53
|
+
- Affects `tablesleuth init` command output
|
|
54
|
+
|
|
55
|
+
- **S3 Tables Catalog Configuration** - Fixed incorrect catalog type and improved flexibility
|
|
56
|
+
- Changed S3 Tables catalog from `type: glue` to `type: rest` with proper REST API settings
|
|
57
|
+
- Added required REST API configuration: `uri`, `rest.sigv4-enabled`, `rest.signing-name`, `rest.signing-region`
|
|
58
|
+
- Fixed hardcoded catalog name - now supports multiple S3 Tables catalogs
|
|
59
|
+
- Users can specify which S3 Tables catalog to use with `--catalog` flag when using ARNs
|
|
60
|
+
- Default catalog name "s3tables" is used when ARN is provided without `--catalog` flag
|
|
61
|
+
- Added clear documentation and usage examples in template showing multiple S3 Tables catalogs
|
|
62
|
+
- Clarified difference between Glue catalog and S3 Tables catalog
|
|
63
|
+
|
|
64
|
+
- **GizmoSQL Optional Component Handling** - Made GizmoSQL truly optional
|
|
65
|
+
- `config-check` command no longer fails when GizmoSQL connection fails
|
|
66
|
+
- Added `--with-gizmosql` flag to explicitly test GizmoSQL connection
|
|
67
|
+
- GizmoSQL test is now skipped by default (shown as "⊘ Skipped")
|
|
68
|
+
- Exit code 0 (success) when only optional components fail
|
|
69
|
+
- Consistent with other optional checks like missing PyIceberg config
|
|
70
|
+
|
|
71
|
+
### Dependencies
|
|
72
|
+
- Added `pyyaml>=6.0.0` for PyIceberg config validation
|
|
73
|
+
|
|
74
|
+
## [0.4.1] - 2026-01-17
|
|
75
|
+
|
|
76
|
+
### Changed
|
|
77
|
+
- **Python Module Renamed to `tablesleuth`** - Complete consistency across package
|
|
78
|
+
- Module directory renamed from `table_sleuth` to `tablesleuth`
|
|
79
|
+
- All imports now use `from tablesleuth import ...`
|
|
80
|
+
- Eliminates confusion between package name and import name
|
|
81
|
+
- **Breaking Change:** Update all imports from `table_sleuth` to `tablesleuth`
|
|
82
|
+
|
|
83
|
+
### Migration
|
|
84
|
+
If upgrading from v0.4.0 (unreleased), update your imports:
|
|
85
|
+
```python
|
|
86
|
+
# Old
|
|
87
|
+
from table_sleuth import __version__
|
|
88
|
+
from table_sleuth.services import ParquetInspector
|
|
89
|
+
|
|
90
|
+
# New
|
|
91
|
+
from tablesleuth import __version__
|
|
92
|
+
from tablesleuth.services import ParquetInspector
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## [0.4.0] - 2026-01-16 (Unreleased)
|
|
96
|
+
|
|
97
|
+
### Changed
|
|
98
|
+
- **Package Renamed to `tablesleuth`** - Unified package name for PyPI distribution
|
|
99
|
+
- CLI command changed from `table-sleuth` to `tablesleuth`
|
|
100
|
+
- Package name now matches tablesleuth.com domain
|
|
101
|
+
- Improved discoverability on PyPI
|
|
102
|
+
- **Version Management** - Consolidated version to single source of truth in `__init__.py`
|
|
103
|
+
- Removed hardcoded version from CLI
|
|
104
|
+
- Version now imported from package
|
|
105
|
+
- **Enhanced PyPI Metadata**
|
|
106
|
+
- Upgraded development status from Alpha to Beta
|
|
107
|
+
- Added comprehensive classifiers for better discoverability
|
|
108
|
+
- Added project URLs including homepage, documentation, and changelog
|
|
109
|
+
- Added publishing tools (twine, build) to dev dependencies
|
|
110
|
+
|
|
111
|
+
### Added
|
|
112
|
+
- **GitHub Actions CI/CD** - Automated testing and publishing workflows
|
|
113
|
+
- Multi-platform testing (Ubuntu, macOS, Windows)
|
|
114
|
+
- Multi-version Python testing (3.13, 3.14)
|
|
115
|
+
- Automated quality checks (ruff, mypy, bandit)
|
|
116
|
+
- Automated PyPI publishing on release
|
|
117
|
+
- Support for PyPI Trusted Publishing
|
|
118
|
+
- **PyPI Publishing Guide** - Comprehensive documentation for package publishing
|
|
119
|
+
- Step-by-step publishing instructions
|
|
120
|
+
- TestPyPI testing workflow
|
|
121
|
+
- Automated release process documentation
|
|
122
|
+
- Troubleshooting guide
|
|
123
|
+
|
|
124
|
+
## [0.3.0] - 2025-11-29
|
|
125
|
+
|
|
126
|
+
### Added
|
|
127
|
+
- **Strict MyPy Type Checking** - Comprehensive type annotations across the codebase
|
|
128
|
+
- Enabled strict mypy configuration with `disallow_untyped_defs`, `disallow_incomplete_defs`, and `warn_return_any`
|
|
129
|
+
- Added proper type annotations to all service classes and methods
|
|
130
|
+
- Configured per-module overrides for third-party libraries without type stubs
|
|
131
|
+
- Integrated mypy into pre-commit hooks with all required dependencies
|
|
132
|
+
- Zero type errors in production code (only expected import-untyped warnings for PyArrow)
|
|
133
|
+
|
|
134
|
+
- **Enhanced Documentation**
|
|
135
|
+
- Streamlined README.md with high-level feature overview and screenshot galleries
|
|
136
|
+
- Organized documentation with clear navigation to detailed guides
|
|
137
|
+
- Added visual comparison tables for Parquet and Iceberg interfaces
|
|
138
|
+
- Improved quick start examples and configuration guidance
|
|
139
|
+
|
|
140
|
+
- **UI Improvements**
|
|
141
|
+
- Removed subtitle from TUI header for cleaner interface
|
|
142
|
+
- Updated application title to "Table Sleuth - Parquet Analysis"
|
|
143
|
+
|
|
144
|
+
### Changed
|
|
145
|
+
- **Code Quality Improvements**
|
|
146
|
+
- Fixed import paths for IcebergAdapter (moved to `formats.iceberg`)
|
|
147
|
+
- Removed unreachable backwards compatibility code in gizmo_duckdb.py
|
|
148
|
+
- Added explicit type casts where needed for type safety
|
|
149
|
+
- Improved error handling with proper type annotations
|
|
150
|
+
|
|
151
|
+
- **Pre-commit Configuration**
|
|
152
|
+
- Added all required dependencies to mypy pre-commit hook
|
|
153
|
+
- Configured proper module overrides for untyped libraries (pyarrow, fsspec, s3fs, etc.)
|
|
154
|
+
- All pre-commit hooks now pass cleanly
|
|
155
|
+
|
|
156
|
+
### Fixed
|
|
157
|
+
- Type annotation issues in FileDiscoveryService, ParquetInspector, and GizmoDuckDbProfiler
|
|
158
|
+
- Missing return type annotations across multiple service classes
|
|
159
|
+
- Unused type ignore comments after fixing import paths
|
|
160
|
+
- Event handler type annotations in TUI views
|
|
161
|
+
|
|
162
|
+
## [Unreleased]
|
|
163
|
+
|
|
164
|
+
### Added
|
|
165
|
+
|
|
166
|
+
#### Performance Profiling for Merge-on-Read
|
|
167
|
+
- **Added performance profiling models** (`QueryPerformanceProfile`, `MergeOnReadPerformance`)
|
|
168
|
+
- Measures query execution time with and without delete file application
|
|
169
|
+
- Calculates merge-on-read overhead in milliseconds and percentage
|
|
170
|
+
- Tracks rows scanned, rows returned, and rows deleted
|
|
171
|
+
- Provides timing breakdown for data file scan, delete file scan, and merge operations
|
|
172
|
+
- **Extended ProfilingBackend protocol** with `profile_query_performance()` method
|
|
173
|
+
- Allows backends to implement performance profiling
|
|
174
|
+
- Optional method - backends can raise `NotImplementedError` if not supported
|
|
175
|
+
- **Comprehensive test suite** for performance profiling models
|
|
176
|
+
- Tests overhead calculation, edge cases, and zero-division handling
|
|
177
|
+
- **Updated product specification** with performance profiling user story
|
|
178
|
+
- Story 6: Performance profiling for merge-on-read queries
|
|
179
|
+
- Helps engineers make data-driven decisions about table compaction
|