tablesleuth 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. tablesleuth-0.4.2/.gitattributes +24 -0
  2. tablesleuth-0.4.2/.github/workflows/ci.yml +80 -0
  3. tablesleuth-0.4.2/.github/workflows/publish.yml +43 -0
  4. tablesleuth-0.4.2/.gitignore +88 -0
  5. tablesleuth-0.4.2/.pre-commit-config.yaml +58 -0
  6. tablesleuth-0.4.2/CHANGELOG.md +179 -0
  7. tablesleuth-0.4.2/CONTRIBUTING.md +550 -0
  8. tablesleuth-0.4.2/DEVELOPMENT_SETUP.md +333 -0
  9. tablesleuth-0.4.2/LICENSE +19 -0
  10. tablesleuth-0.4.2/Makefile +98 -0
  11. tablesleuth-0.4.2/PKG-INFO +410 -0
  12. tablesleuth-0.4.2/QUICKSTART.md +438 -0
  13. tablesleuth-0.4.2/QUICK_REFERENCE.md +277 -0
  14. tablesleuth-0.4.2/README.md +347 -0
  15. tablesleuth-0.4.2/TABLESLEUTH_SETUP.md +443 -0
  16. tablesleuth-0.4.2/docs/ARCHITECTURE.md +1445 -0
  17. tablesleuth-0.4.2/docs/DEVELOPER_GUIDE.md +1251 -0
  18. tablesleuth-0.4.2/docs/EC2_DEPLOYMENT_GUIDE.md +765 -0
  19. tablesleuth-0.4.2/docs/GIZMOSQL_DEPLOYMENT_GUIDE.md +694 -0
  20. tablesleuth-0.4.2/docs/PERFORMANCE_PROFILING.md +578 -0
  21. tablesleuth-0.4.2/docs/USER_GUIDE.md +832 -0
  22. tablesleuth-0.4.2/docs/images/iceberg_compare.png +0 -0
  23. tablesleuth-0.4.2/docs/images/iceberg_data_sample.png +0 -0
  24. tablesleuth-0.4.2/docs/images/iceberg_deletes.png +0 -0
  25. tablesleuth-0.4.2/docs/images/iceberg_files.png +0 -0
  26. tablesleuth-0.4.2/docs/images/iceberg_overview.png +0 -0
  27. tablesleuth-0.4.2/docs/images/iceberg_performance_dropdown.png +0 -0
  28. tablesleuth-0.4.2/docs/images/iceberg_performance_filtered.png +0 -0
  29. tablesleuth-0.4.2/docs/images/iceberg_performance_sample.png +0 -0
  30. tablesleuth-0.4.2/docs/images/iceberg_properties.png +0 -0
  31. tablesleuth-0.4.2/docs/images/iceberg_schema.png +0 -0
  32. tablesleuth-0.4.2/docs/images/parquet_data_sample.png +0 -0
  33. tablesleuth-0.4.2/docs/images/parquet_file_detail.png +0 -0
  34. tablesleuth-0.4.2/docs/images/parquet_profile.png +0 -0
  35. tablesleuth-0.4.2/docs/images/parquet_row_groups.png +0 -0
  36. tablesleuth-0.4.2/docs/images/parquet_schema.png +0 -0
  37. tablesleuth-0.4.2/docs/images/parquet_structure.png +0 -0
  38. tablesleuth-0.4.2/examples/inspect_s3_tables.py +146 -0
  39. tablesleuth-0.4.2/pyproject.toml +224 -0
  40. tablesleuth-0.4.2/resources/README.md +106 -0
  41. tablesleuth-0.4.2/resources/config.json.template +10 -0
  42. tablesleuth-0.4.2/resources/tablesleuth_create_env.py +1042 -0
  43. tablesleuth-0.4.2/resources/tablesleuth_teardown_env.py +260 -0
  44. tablesleuth-0.4.2/src/tablesleuth/__init__.py +4 -0
  45. tablesleuth-0.4.2/src/tablesleuth/cli.py +829 -0
  46. tablesleuth-0.4.2/src/tablesleuth/config.py +122 -0
  47. tablesleuth-0.4.2/src/tablesleuth/exceptions.py +91 -0
  48. tablesleuth-0.4.2/src/tablesleuth/models/__init__.py +44 -0
  49. tablesleuth-0.4.2/src/tablesleuth/models/file_ref.py +34 -0
  50. tablesleuth-0.4.2/src/tablesleuth/models/iceberg.py +401 -0
  51. tablesleuth-0.4.2/src/tablesleuth/models/parquet.py +83 -0
  52. tablesleuth-0.4.2/src/tablesleuth/models/performance.py +61 -0
  53. tablesleuth-0.4.2/src/tablesleuth/models/profiling.py +26 -0
  54. tablesleuth-0.4.2/src/tablesleuth/models/snapshot.py +17 -0
  55. tablesleuth-0.4.2/src/tablesleuth/models/table.py +10 -0
  56. tablesleuth-0.4.2/src/tablesleuth/services/__init__.py +1 -0
  57. tablesleuth-0.4.2/src/tablesleuth/services/file_discovery.py +171 -0
  58. tablesleuth-0.4.2/src/tablesleuth/services/filesystem.py +127 -0
  59. tablesleuth-0.4.2/src/tablesleuth/services/formats/__init__.py +4 -0
  60. tablesleuth-0.4.2/src/tablesleuth/services/formats/base.py +20 -0
  61. tablesleuth-0.4.2/src/tablesleuth/services/formats/iceberg.py +275 -0
  62. tablesleuth-0.4.2/src/tablesleuth/services/iceberg_metadata_service.py +473 -0
  63. tablesleuth-0.4.2/src/tablesleuth/services/mor_service.py +76 -0
  64. tablesleuth-0.4.2/src/tablesleuth/services/parquet_service.py +422 -0
  65. tablesleuth-0.4.2/src/tablesleuth/services/profiling/__init__.py +5 -0
  66. tablesleuth-0.4.2/src/tablesleuth/services/profiling/backend_base.py +96 -0
  67. tablesleuth-0.4.2/src/tablesleuth/services/profiling/fake_backend.py +45 -0
  68. tablesleuth-0.4.2/src/tablesleuth/services/profiling/gizmo_duckdb.py +781 -0
  69. tablesleuth-0.4.2/src/tablesleuth/services/snapshot_performance_analyzer.py +173 -0
  70. tablesleuth-0.4.2/src/tablesleuth/services/snapshot_test_manager.py +209 -0
  71. tablesleuth-0.4.2/src/tablesleuth/tui/__init__.py +3 -0
  72. tablesleuth-0.4.2/src/tablesleuth/tui/app.py +606 -0
  73. tablesleuth-0.4.2/src/tablesleuth/tui/views/__init__.py +22 -0
  74. tablesleuth-0.4.2/src/tablesleuth/tui/views/data_sample_view.py +597 -0
  75. tablesleuth-0.4.2/src/tablesleuth/tui/views/file_detail_view.py +163 -0
  76. tablesleuth-0.4.2/src/tablesleuth/tui/views/file_list_view.py +149 -0
  77. tablesleuth-0.4.2/src/tablesleuth/tui/views/iceberg_view.py +819 -0
  78. tablesleuth-0.4.2/src/tablesleuth/tui/views/profile_view.py +470 -0
  79. tablesleuth-0.4.2/src/tablesleuth/tui/views/row_groups_view.py +275 -0
  80. tablesleuth-0.4.2/src/tablesleuth/tui/views/schema_view.py +614 -0
  81. tablesleuth-0.4.2/src/tablesleuth/tui/views/snapshot_comparison_view.py +310 -0
  82. tablesleuth-0.4.2/src/tablesleuth/tui/views/snapshot_detail_view.py +489 -0
  83. tablesleuth-0.4.2/src/tablesleuth/tui/views/structure_view.py +354 -0
  84. tablesleuth-0.4.2/src/tablesleuth/tui/widgets/__init__.py +6 -0
  85. tablesleuth-0.4.2/src/tablesleuth/tui/widgets/loading.py +78 -0
  86. tablesleuth-0.4.2/src/tablesleuth/tui/widgets/notification.py +164 -0
  87. tablesleuth-0.4.2/src/tablesleuth/utils/__init__.py +1 -0
  88. tablesleuth-0.4.2/src/tablesleuth/utils/config_templates.py +253 -0
  89. tablesleuth-0.4.2/tablesleuth.toml +37 -0
  90. tablesleuth-0.4.2/tests/__init__.py +0 -0
  91. tablesleuth-0.4.2/tests/conftest.py +106 -0
  92. tablesleuth-0.4.2/tests/test_app_advanced.py +1259 -0
  93. tablesleuth-0.4.2/tests/test_app_caching.py +327 -0
  94. tablesleuth-0.4.2/tests/test_app_file_selection.py +269 -0
  95. tablesleuth-0.4.2/tests/test_app_layout.py +243 -0
  96. tablesleuth-0.4.2/tests/test_app_profiling.py +335 -0
  97. tablesleuth-0.4.2/tests/test_cli.py +106 -0
  98. tablesleuth-0.4.2/tests/test_cli_commands.py +562 -0
  99. tablesleuth-0.4.2/tests/test_cli_config_commands.py +473 -0
  100. tablesleuth-0.4.2/tests/test_config.py +221 -0
  101. tablesleuth-0.4.2/tests/test_data_sample_view.py +839 -0
  102. tablesleuth-0.4.2/tests/test_end_to_end.py +366 -0
  103. tablesleuth-0.4.2/tests/test_error_handling.py +183 -0
  104. tablesleuth-0.4.2/tests/test_fake_profiler.py +38 -0
  105. tablesleuth-0.4.2/tests/test_file_discovery.py +280 -0
  106. tablesleuth-0.4.2/tests/test_filesystem.py +210 -0
  107. tablesleuth-0.4.2/tests/test_filter_validation.py +89 -0
  108. tablesleuth-0.4.2/tests/test_gizmo_duckdb_sanitization.py +122 -0
  109. tablesleuth-0.4.2/tests/test_gizmo_duckdb_unit.py +567 -0
  110. tablesleuth-0.4.2/tests/test_gizmo_profiler_config.py +167 -0
  111. tablesleuth-0.4.2/tests/test_gizmosql_integration.py +217 -0
  112. tablesleuth-0.4.2/tests/test_iceberg_adapter.py +6 -0
  113. tablesleuth-0.4.2/tests/test_iceberg_adapter_helpers.py +175 -0
  114. tablesleuth-0.4.2/tests/test_iceberg_adapter_unit.py +605 -0
  115. tablesleuth-0.4.2/tests/test_iceberg_metadata_service.py +161 -0
  116. tablesleuth-0.4.2/tests/test_iceberg_metadata_service_unit.py +520 -0
  117. tablesleuth-0.4.2/tests/test_iceberg_models.py +518 -0
  118. tablesleuth-0.4.2/tests/test_iceberg_view.py +881 -0
  119. tablesleuth-0.4.2/tests/test_keybindings.py +345 -0
  120. tablesleuth-0.4.2/tests/test_mor_service.py +465 -0
  121. tablesleuth-0.4.2/tests/test_parquet_inspector.py +392 -0
  122. tablesleuth-0.4.2/tests/test_parquet_profiling_integration.py +125 -0
  123. tablesleuth-0.4.2/tests/test_performance_models.py +211 -0
  124. tablesleuth-0.4.2/tests/test_profile_view.py +430 -0
  125. tablesleuth-0.4.2/tests/test_profiling_backend.py +325 -0
  126. tablesleuth-0.4.2/tests/test_row_groups_view.py +349 -0
  127. tablesleuth-0.4.2/tests/test_s3_tables_arn.py +131 -0
  128. tablesleuth-0.4.2/tests/test_snapshot_comparison_view.py +425 -0
  129. tablesleuth-0.4.2/tests/test_snapshot_detail_view.py +533 -0
  130. tablesleuth-0.4.2/tests/test_snapshot_performance_analyzer.py +209 -0
  131. tablesleuth-0.4.2/tests/test_snapshot_performance_analyzer_unit.py +227 -0
  132. tablesleuth-0.4.2/tests/test_snapshot_test_manager.py +261 -0
  133. tablesleuth-0.4.2/tests/test_sql_injection_prevention.py +167 -0
  134. tablesleuth-0.4.2/tests/test_structure_integration.py +536 -0
  135. tablesleuth-0.4.2/tests/test_structure_view.py +439 -0
  136. tablesleuth-0.4.2/tests/test_tui_smoke.py +12 -0
  137. tablesleuth-0.4.2/uv.lock +2226 -0
@@ -0,0 +1,24 @@
1
+ # Auto detect text files and normalize line endings to LF in the repository
2
+ * text=auto eol=lf
3
+
4
+ # Explicitly declare text files you want to always be normalized and converted
5
+ # to native line endings on checkout
6
+ *.py text eol=lf
7
+ *.md text eol=lf
8
+ *.yml text eol=lf
9
+ *.yaml text eol=lf
10
+ *.toml text eol=lf
11
+ *.json text eol=lf
12
+ *.txt text eol=lf
13
+ *.sh text eol=lf
14
+ Makefile text eol=lf
15
+
16
+ # Denote all files that are truly binary and should not be modified
17
+ *.png binary
18
+ *.jpg binary
19
+ *.jpeg binary
20
+ *.gif binary
21
+ *.ico binary
22
+ *.whl binary
23
+ *.gz binary
24
+ *.zip binary
@@ -0,0 +1,80 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, develop ]
6
+ pull_request:
7
+ branches: [ main, develop ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ matrix:
14
+ os: [ubuntu-latest, macos-latest, windows-latest]
15
+ python-version: ['3.13', '3.14']
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set up Python ${{ matrix.python-version }}
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Install uv
26
+ run: pip install uv
27
+
28
+ - name: Install dependencies
29
+ run: uv sync --extra dev
30
+
31
+ - name: Run linter
32
+ run: uv run ruff check .
33
+
34
+ - name: Run formatter check
35
+ run: uv run ruff format --check .
36
+
37
+ - name: Run type checker
38
+ run: uv run mypy src
39
+
40
+ - name: Run security scan
41
+ run: uv run bandit -c pyproject.toml -r src/
42
+
43
+ - name: Run tests
44
+ run: uv run pytest --cov=tablesleuth --cov-report=xml --cov-report=term
45
+
46
+ - name: Upload coverage to Codecov
47
+ if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
48
+ uses: codecov/codecov-action@v4
49
+ with:
50
+ file: ./coverage.xml
51
+ fail_ci_if_error: false
52
+
53
+ build:
54
+ runs-on: ubuntu-latest
55
+ needs: test
56
+
57
+ steps:
58
+ - uses: actions/checkout@v4
59
+
60
+ - name: Set up Python
61
+ uses: actions/setup-python@v5
62
+ with:
63
+ python-version: '3.13'
64
+
65
+ - name: Install uv
66
+ run: pip install uv
67
+
68
+ - name: Build package
69
+ run: uv build
70
+
71
+ - name: Check package
72
+ run: |
73
+ pip install twine
74
+ twine check dist/*
75
+
76
+ - name: Upload artifacts
77
+ uses: actions/upload-artifact@v4
78
+ with:
79
+ name: dist
80
+ path: dist/
@@ -0,0 +1,43 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment:
14
+ name: pypi
15
+ url: https://pypi.org/p/TableSleuth
16
+ permissions:
17
+ id-token: write # Required for trusted publishing
18
+
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - name: Set up Python
23
+ uses: actions/setup-python@v5
24
+ with:
25
+ python-version: '3.13'
26
+
27
+ - name: Install uv
28
+ run: pip install uv
29
+
30
+ - name: Build package
31
+ run: uv build
32
+
33
+ - name: Verify version matches tag
34
+ run: |
35
+ TAG_VERSION=${GITHUB_REF#refs/tags/v}
36
+ PKG_VERSION=$(grep '^version = ' pyproject.toml | cut -d'"' -f2)
37
+ if [ "$TAG_VERSION" != "$PKG_VERSION" ]; then
38
+ echo "Tag version ($TAG_VERSION) does not match package version ($PKG_VERSION)"
39
+ exit 1
40
+ fi
41
+
42
+ - name: Publish to PyPI
43
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,88 @@
1
+ # Environment variables
2
+ .env
3
+ .env.local
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # Virtual environments
28
+ venv/
29
+ env/
30
+ ENV/
31
+ .venv
32
+
33
+ # IDEs
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+ *.swo
38
+ *~
39
+
40
+ # Logs
41
+ *.log
42
+ logs/
43
+
44
+ # Testing
45
+ .pytest_cache/
46
+ .coverage
47
+ htmlcov/
48
+ .tox/
49
+ coverage.xml
50
+
51
+ # Type checking
52
+ .mypy_cache/
53
+ .dmypy.json
54
+ dmypy.json
55
+
56
+ # Ruff
57
+ .ruff_cache/
58
+
59
+ # OS
60
+ .DS_Store
61
+ Thumbs.db
62
+
63
+ # Project specific
64
+ resources/config.json
65
+ *.pem
66
+ input/
67
+ .kiro/
68
+ .chroma/
69
+ .ruff_cache/
70
+ .mypy_cache/
71
+ *.zip
72
+ data/
73
+ scripts/
74
+ docs/archive/
75
+ resources/debug/
76
+ resources/docs/
77
+ debug/
78
+ .pyiceberg.yaml
79
+
80
+ # Gradio
81
+ gradio_cached_examples/
82
+ flagged/
83
+
84
+ # Docker
85
+ docker-compose.override.yml
86
+ *.tar.gz
87
+ backups/
88
+ ssl/
@@ -0,0 +1,58 @@
1
+ # Pre-commit hooks for table-sleuth
2
+ # Install: uv run pre-commit install
3
+ # Run manually: uv run pre-commit run --all-files
4
+
5
+ repos:
6
+ # Ruff - replaces black, isort, flake8
7
+ - repo: https://github.com/astral-sh/ruff-pre-commit
8
+ rev: v0.7.4
9
+ hooks:
10
+ # Run the linter
11
+ - id: ruff
12
+ args: [--fix]
13
+ # Run the formatter
14
+ - id: ruff-format
15
+
16
+ # mypy - type checking
17
+ - repo: https://github.com/pre-commit/mirrors-mypy
18
+ rev: v1.13.0
19
+ hooks:
20
+ - id: mypy
21
+ additional_dependencies:
22
+ - types-toml
23
+ - types-pyyaml
24
+ - pydantic>=2.0
25
+ - textual>=0.47.0
26
+ - rich>=13.0.0
27
+ - pyarrow>=14.0.0
28
+ - pyiceberg>=0.6.0
29
+ - adbc-driver-flightsql>=0.8.0
30
+ - boto3>=1.34.0
31
+ - botocore>=1.34.0
32
+ - fsspec>=2023.0.0
33
+ - s3fs>=2023.0.0
34
+ args: [--config-file=pyproject.toml, src/]
35
+ pass_filenames: false
36
+
37
+ # Standard pre-commit hooks
38
+ - repo: https://github.com/pre-commit/pre-commit-hooks
39
+ rev: v5.0.0
40
+ hooks:
41
+ - id: trailing-whitespace
42
+ - id: end-of-file-fixer
43
+ - id: check-yaml
44
+ - id: check-toml
45
+ - id: check-added-large-files
46
+ args: [--maxkb=1000]
47
+ - id: check-merge-conflict
48
+ - id: check-case-conflict
49
+ - id: mixed-line-ending
50
+ args: [--fix=lf]
51
+
52
+ # Security scanning with bandit
53
+ - repo: https://github.com/PyCQA/bandit
54
+ rev: 1.7.10
55
+ hooks:
56
+ - id: bandit
57
+ args: [-c, pyproject.toml, -r, src/]
58
+ additional_dependencies: ["bandit[toml]"]
@@ -0,0 +1,179 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [0.4.2] - 2026-01-17
6
+
7
+ ### Added
8
+ - **Configuration Management Commands**
9
+ - `tablesleuth init` - Interactive configuration file initialization
10
+ - Creates `tablesleuth.toml` and `.pyiceberg.yaml` with comprehensive templates
11
+ - Prompts for home directory (~/) or current directory (./) placement
12
+ - Includes `--force` flag to overwrite existing files
13
+ - Generates well-commented templates with multiple catalog examples
14
+ - `tablesleuth config-check` - Configuration validation and testing
15
+ - Validates all configuration files and syntax
16
+ - Tests GizmoSQL connection
17
+ - Checks PyIceberg catalog configuration
18
+ - Shows configuration precedence and active values
19
+ - Supports `-v/--verbose` flag for detailed output
20
+
21
+ ### Changed
22
+ - **Configuration File Locations** - Simplified configuration paths
23
+ - Removed `~/.config/tablesleuth/` directory approach
24
+ - Now supports: `./tablesleuth.toml` (local) and `~/tablesleuth.toml` (home)
25
+ - PyIceberg config: `./.pyiceberg.yaml` (local) and `~/.pyiceberg.yaml` (home)
26
+ - Respects `PYICEBERG_HOME` environment variable for PyIceberg config location
27
+
28
+ - **Configuration Priority** - Clear precedence order
29
+ 1. Environment variables (`TABLESLEUTH_*`, `PYICEBERG_*`)
30
+ 2. Local config files (current directory)
31
+ 3. Home config files (home directory)
32
+ 4. Built-in defaults
33
+
34
+ - **Environment Variable Support**
35
+ - `TABLESLEUTH_CONFIG` - Override config file path
36
+ - Existing: `TABLESLEUTH_CATALOG_NAME`, `TABLESLEUTH_GIZMO_*`
37
+ - PyIceberg native: `PYICEBERG_HOME`
38
+
39
+ - **Configuration File Renamed** - Consistency with package name
40
+ - `table_sleuth.toml` → `tablesleuth.toml`
41
+ - Updated all documentation and code references
42
+
43
+ ### Fixed
44
+ - **Configuration Error Handling** - Improved error messages and handling
45
+ - Fixed unhandled `FileNotFoundError` in `inspect` and `iceberg` commands when `TABLESLEUTH_CONFIG` points to non-existent file
46
+ - Fixed unhandled exception in `config-check` command with invalid `TABLESLEUTH_CONFIG` environment variable
47
+ - Both commands now show helpful error messages suggesting `tablesleuth init` instead of tracebacks
48
+ - Added proper try-except blocks around `load_config()` calls in main CLI commands
49
+
50
+ - **Configuration Template TOML Syntax** - Fixed invalid TOML in generated config
51
+ - Changed `default = null` to commented `# default = ""` (TOML doesn't support null type)
52
+ - Generated config files now parse correctly without `TOMLDecodeError`
53
+ - Affects `tablesleuth init` command output
54
+
55
+ - **S3 Tables Catalog Configuration** - Fixed incorrect catalog type and improved flexibility
56
+ - Changed S3 Tables catalog from `type: glue` to `type: rest` with proper REST API settings
57
+ - Added required REST API configuration: `uri`, `rest.sigv4-enabled`, `rest.signing-name`, `rest.signing-region`
58
+ - Fixed hardcoded catalog name - now supports multiple S3 Tables catalogs
59
+ - Users can specify which S3 Tables catalog to use with `--catalog` flag when using ARNs
60
+ - Default catalog name "s3tables" is used when ARN is provided without `--catalog` flag
61
+ - Added clear documentation and usage examples in template showing multiple S3 Tables catalogs
62
+ - Clarified difference between Glue catalog and S3 Tables catalog
63
+
64
+ - **GizmoSQL Optional Component Handling** - Made GizmoSQL truly optional
65
+ - `config-check` command no longer fails when GizmoSQL connection fails
66
+ - Added `--with-gizmosql` flag to explicitly test GizmoSQL connection
67
+ - GizmoSQL test is now skipped by default (shown as "⊘ Skipped")
68
+ - Exit code 0 (success) when only optional components fail
69
+ - Consistent with other optional checks like missing PyIceberg config
70
+
71
+ ### Dependencies
72
+ - Added `pyyaml>=6.0.0` for PyIceberg config validation
73
+
74
+ ## [0.4.1] - 2026-01-17
75
+
76
+ ### Changed
77
+ - **Python Module Renamed to `tablesleuth`** - Complete consistency across package
78
+ - Module directory renamed from `table_sleuth` to `tablesleuth`
79
+ - All imports now use `from tablesleuth import ...`
80
+ - Eliminates confusion between package name and import name
81
+ - **Breaking Change:** Update all imports from `table_sleuth` to `tablesleuth`
82
+
83
+ ### Migration
84
+ If upgrading from v0.4.0 (unreleased), update your imports:
85
+ ```python
86
+ # Old
87
+ from table_sleuth import __version__
88
+ from table_sleuth.services import ParquetInspector
89
+
90
+ # New
91
+ from tablesleuth import __version__
92
+ from tablesleuth.services import ParquetInspector
93
+ ```
94
+
95
+ ## [0.4.0] - 2026-01-16 (Unreleased)
96
+
97
+ ### Changed
98
+ - **Package Renamed to `tablesleuth`** - Unified package name for PyPI distribution
99
+ - CLI command changed from `table-sleuth` to `tablesleuth`
100
+ - Package name now matches tablesleuth.com domain
101
+ - Improved discoverability on PyPI
102
+ - **Version Management** - Consolidated version to single source of truth in `__init__.py`
103
+ - Removed hardcoded version from CLI
104
+ - Version now imported from package
105
+ - **Enhanced PyPI Metadata**
106
+ - Upgraded development status from Alpha to Beta
107
+ - Added comprehensive classifiers for better discoverability
108
+ - Added project URLs including homepage, documentation, and changelog
109
+ - Added publishing tools (twine, build) to dev dependencies
110
+
111
+ ### Added
112
+ - **GitHub Actions CI/CD** - Automated testing and publishing workflows
113
+ - Multi-platform testing (Ubuntu, macOS, Windows)
114
+ - Multi-version Python testing (3.13, 3.14)
115
+ - Automated quality checks (ruff, mypy, bandit)
116
+ - Automated PyPI publishing on release
117
+ - Support for PyPI Trusted Publishing
118
+ - **PyPI Publishing Guide** - Comprehensive documentation for package publishing
119
+ - Step-by-step publishing instructions
120
+ - TestPyPI testing workflow
121
+ - Automated release process documentation
122
+ - Troubleshooting guide
123
+
124
+ ## [0.3.0] - 2025-11-29
125
+
126
+ ### Added
127
+ - **Strict MyPy Type Checking** - Comprehensive type annotations across the codebase
128
+ - Enabled strict mypy configuration with `disallow_untyped_defs`, `disallow_incomplete_defs`, and `warn_return_any`
129
+ - Added proper type annotations to all service classes and methods
130
+ - Configured per-module overrides for third-party libraries without type stubs
131
+ - Integrated mypy into pre-commit hooks with all required dependencies
132
+ - Zero type errors in production code (only expected import-untyped warnings for PyArrow)
133
+
134
+ - **Enhanced Documentation**
135
+ - Streamlined README.md with high-level feature overview and screenshot galleries
136
+ - Organized documentation with clear navigation to detailed guides
137
+ - Added visual comparison tables for Parquet and Iceberg interfaces
138
+ - Improved quick start examples and configuration guidance
139
+
140
+ - **UI Improvements**
141
+ - Removed subtitle from TUI header for cleaner interface
142
+ - Updated application title to "Table Sleuth - Parquet Analysis"
143
+
144
+ ### Changed
145
+ - **Code Quality Improvements**
146
+ - Fixed import paths for IcebergAdapter (moved to `formats.iceberg`)
147
+ - Removed unreachable backwards compatibility code in gizmo_duckdb.py
148
+ - Added explicit type casts where needed for type safety
149
+ - Improved error handling with proper type annotations
150
+
151
+ - **Pre-commit Configuration**
152
+ - Added all required dependencies to mypy pre-commit hook
153
+ - Configured proper module overrides for untyped libraries (pyarrow, fsspec, s3fs, etc.)
154
+ - All pre-commit hooks now pass cleanly
155
+
156
+ ### Fixed
157
+ - Type annotation issues in FileDiscoveryService, ParquetInspector, and GizmoDuckDbProfiler
158
+ - Missing return type annotations across multiple service classes
159
+ - Unused type ignore comments after fixing import paths
160
+ - Event handler type annotations in TUI views
161
+
162
+ ## [Unreleased]
163
+
164
+ ### Added
165
+
166
+ #### Performance Profiling for Merge-on-Read
167
+ - **Added performance profiling models** (`QueryPerformanceProfile`, `MergeOnReadPerformance`)
168
+ - Measures query execution time with and without delete file application
169
+ - Calculates merge-on-read overhead in milliseconds and percentage
170
+ - Tracks rows scanned, rows returned, and rows deleted
171
+ - Provides timing breakdown for data file scan, delete file scan, and merge operations
172
+ - **Extended ProfilingBackend protocol** with `profile_query_performance()` method
173
+ - Allows backends to implement performance profiling
174
+ - Optional method - backends can raise `NotImplementedError` if not supported
175
+ - **Comprehensive test suite** for performance profiling models
176
+ - Tests overhead calculation, edge cases, and zero-division handling
177
+ - **Updated product specification** with performance profiling user story
178
+ - Story 6: Performance profiling for merge-on-read queries
179
+ - Helps engineers make data-driven decisions about table compaction