wherewolf 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {wherewolf-0.2.2 → wherewolf-0.3.0}/GEMINI.md +4 -0
  2. {wherewolf-0.2.2 → wherewolf-0.3.0}/PKG-INFO +1 -1
  3. {wherewolf-0.2.2 → wherewolf-0.3.0}/README.md +5 -5
  4. wherewolf-0.3.0/docs/agent_conversations/2026-04-21_fix_infinite_loop_regression.json +13 -0
  5. wherewolf-0.3.0/docs/agent_conversations/2026-04-21_schema_hud.json +22 -0
  6. wherewolf-0.3.0/docs/plans/code_review_fixes.md +55 -0
  7. wherewolf-0.3.0/docs/plans/excel_support.md +37 -0
  8. wherewolf-0.3.0/docs/plans/schema_hud.md +72 -0
  9. {wherewolf-0.2.2 → wherewolf-0.3.0}/pyproject.toml +1 -1
  10. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/app.py +136 -56
  11. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/execution/duckdb_engine.py +39 -22
  12. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/execution/spark_engine.py +51 -28
  13. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/ui/file_browser.py +1 -1
  14. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_app.py +0 -1
  15. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_app_cancel.py +1 -1
  16. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_app_flow.py +1 -1
  17. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_duckdb_engine.py +11 -0
  18. wherewolf-0.3.0/tests/test_spark_engine.py +27 -0
  19. {wherewolf-0.2.2 → wherewolf-0.3.0}/uv.lock +1 -1
  20. wherewolf-0.2.2/tests/test_spark_engine.py +0 -6
  21. {wherewolf-0.2.2 → wherewolf-0.3.0}/.envrc +0 -0
  22. {wherewolf-0.2.2 → wherewolf-0.3.0}/.github/workflows/ci.yml +0 -0
  23. {wherewolf-0.2.2 → wherewolf-0.3.0}/.github/workflows/release.yml +0 -0
  24. {wherewolf-0.2.2 → wherewolf-0.3.0}/.gitignore +0 -0
  25. {wherewolf-0.2.2 → wherewolf-0.3.0}/.protocol +0 -0
  26. {wherewolf-0.2.2 → wherewolf-0.3.0}/.streamlit/config.toml +0 -0
  27. {wherewolf-0.2.2 → wherewolf-0.3.0}/AGENTS.md +0 -0
  28. {wherewolf-0.2.2 → wherewolf-0.3.0}/LICENSE +0 -0
  29. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/RELEASING.md +0 -0
  30. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/agent_conversations/2026-03-09_file_browsing.json +0 -0
  31. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/agent_conversations/2026-03-09_initial_implementation.json +0 -0
  32. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/agent_conversations/2026-04-19_github_setup.json +0 -0
  33. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/agent_conversations/2026-04-19_tag_v0.1.0.json +0 -0
  34. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/agent_protocol.md +0 -0
  35. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/execution_ledger.json +0 -0
  36. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/plans/core_system_design.md +0 -0
  37. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/plans/execution_engines.md +0 -0
  38. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/plans/export_formats.md +0 -0
  39. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/plans/file_browsing.md +0 -0
  40. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/plans/file_browsing_v2.md +0 -0
  41. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/plans/github_automation.md +0 -0
  42. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/plans/initial_prompt.md +0 -0
  43. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/plans/storage_and_history.md +0 -0
  44. {wherewolf-0.2.2 → wherewolf-0.3.0}/docs/plans/streamlit_ui.md +0 -0
  45. {wherewolf-0.2.2 → wherewolf-0.3.0}/run.sh +0 -0
  46. {wherewolf-0.2.2 → wherewolf-0.3.0}/scripts/check_tdd.sh +0 -0
  47. {wherewolf-0.2.2 → wherewolf-0.3.0}/scripts/take_screenshot.py +0 -0
  48. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/__init__.py +0 -0
  49. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/assets/img/screenshot.png +0 -0
  50. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/assets/img/wherewolf_banner.png +0 -0
  51. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/assets/img/wherewolf_logo.png +0 -0
  52. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/cli.py +0 -0
  53. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/execution/__init__.py +0 -0
  54. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/execution/models.py +0 -0
  55. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/export/__init__.py +0 -0
  56. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/export/exporter.py +0 -0
  57. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/storage/__init__.py +0 -0
  58. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/storage/history.py +0 -0
  59. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/translation/__init__.py +0 -0
  60. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/translation/translator.py +0 -0
  61. {wherewolf-0.2.2 → wherewolf-0.3.0}/src/wherewolf/ui/__init__.py +0 -0
  62. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/reproduce_issue_symlink.py +0 -0
  63. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/reproduce_issue_symlink_v2.py +0 -0
  64. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/sample_queries.md +0 -0
  65. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test___init__.py +0 -0
  66. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_cli.py +0 -0
  67. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_config_toml.py +0 -0
  68. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_data.csv +0 -0
  69. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_duckdb_sql_injection.py +0 -0
  70. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_excel_support.py +0 -0
  71. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_execution___init__.py +0 -0
  72. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_export___init__.py +0 -0
  73. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_exporter.py +0 -0
  74. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_file_browser.py +0 -0
  75. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_file_browser_errors.py +0 -0
  76. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_file_browser_navigation.py +0 -0
  77. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_file_browser_v2.py +0 -0
  78. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_history.py +0 -0
  79. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_history_atomicity.py +0 -0
  80. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_models.py +0 -0
  81. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_protocol.py +0 -0
  82. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_spark_engine_logic.py +0 -0
  83. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_spark_engine_optimization.py +0 -0
  84. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_storage___init__.py +0 -0
  85. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_translation___init__.py +0 -0
  86. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_translation_integration.py +0 -0
  87. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_translator.py +0 -0
  88. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_ui___init__.py +0 -0
  89. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_ui_branding.py +0 -0
  90. {wherewolf-0.2.2 → wherewolf-0.3.0}/tests/test_wherewolf___init__.py +0 -0
@@ -379,6 +379,7 @@ Before any commit, agents MUST execute:
379
379
 
380
380
  ruff check . --fix
381
381
  ruff format .
382
+ ty check .
382
383
  uv run pytest
383
384
 
384
385
  ```
@@ -474,3 +475,6 @@ If execution fails:
474
475
  5. Re-run validation suite
475
476
 
476
477
  Blind retries are forbidden.
478
+
479
+ ## Gemini Added Memories
480
+ - When creating a new release tag for this project, I MUST always increment the cacheBuster parameter in all URLs within the README.md (e.g., badges, banners, screenshots) to ensure GitHub Camo refreshes the images immediately.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wherewolf
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  License-File: LICENSE
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: duckdb>=1.5.0
@@ -1,10 +1,10 @@
1
1
  # Wherewolf
2
2
 
3
- <img src="https://raw.githubusercontent.com/beallio/wherewolf/main/src/wherewolf/assets/img/wherewolf_banner.png?cacheBuster=4" width="100%">
3
+ <img src="https://raw.githubusercontent.com/beallio/wherewolf/main/src/wherewolf/assets/img/wherewolf_banner.png?cacheBuster=5" width="100%">
4
4
 
5
- [![CI](https://github.com/beallio/wherewolf/actions/workflows/ci.yml/badge.svg?cacheBuster=4)](https://github.com/beallio/wherewolf/actions/workflows/ci.yml)
6
- [![PyPI version](https://img.shields.io/pypi/v/wherewolf.svg?cacheBuster=4)](https://pypi.org/project/wherewolf/)
7
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?cacheBuster=4)](https://opensource.org/licenses/MIT)
5
+ [![CI](https://github.com/beallio/wherewolf/actions/workflows/ci.yml/badge.svg?cacheBuster=5)](https://github.com/beallio/wherewolf/actions/workflows/ci.yml)
6
+ [![PyPI version](https://img.shields.io/pypi/v/wherewolf.svg?cacheBuster=5)](https://pypi.org/project/wherewolf/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?cacheBuster=5)](https://opensource.org/licenses/MIT)
8
8
 
9
9
  A production-grade, local SQL workbench for querying files (CSV, Parquet, JSON) using DuckDB or Spark.
10
10
 
@@ -16,7 +16,7 @@ A production-grade, local SQL workbench for querying files (CSV, Parquet, JSON)
16
16
  - **Export:** Download query results as CSV, Excel, or Parquet.
17
17
  - **Execution Metrics:** Tracks row count and execution time.
18
18
 
19
- ![Wherewolf Screenshot](https://raw.githubusercontent.com/beallio/wherewolf/main/src/wherewolf/assets/img/screenshot.png?cacheBuster=4)
19
+ ![Wherewolf Screenshot](https://raw.githubusercontent.com/beallio/wherewolf/main/src/wherewolf/assets/img/screenshot.png?cacheBuster=5)
20
20
 
21
21
  ## Installation
22
22
 
@@ -0,0 +1,13 @@
1
+ {
2
+ "date": "2026-04-21",
3
+ "task_objective": "Fix data loading regression (infinite loop in app.py)",
4
+ "files_modified": [
5
+ "src/wherewolf/app.py"
6
+ ],
7
+ "tests_added": [],
8
+ "design_decisions": [
9
+ "Moved background query completion check above the autorefresh block.",
10
+ "Moved the autorefresh block to the end of the script to ensure UI components are rendered before rerunning."
11
+ ],
12
+ "results": "Fixed the regression where the app would enter an infinite loop during query execution, preventing results from loading and UI components from appearing. Verified via existing test suite."
13
+ }
@@ -0,0 +1,22 @@
1
+ {
2
+ "date": "2026-04-21",
3
+ "task_objective": "Implement Schema & Metadata HUD",
4
+ "files_modified": [
5
+ "src/wherewolf/execution/duckdb_engine.py",
6
+ "src/wherewolf/execution/spark_engine.py",
7
+ "src/wherewolf/app.py",
8
+ "tests/test_duckdb_engine.py",
9
+ "tests/test_spark_engine.py"
10
+ ],
11
+ "tests_added": [
12
+ "tests/test_duckdb_engine.py:test_duckdb_get_schema",
13
+ "tests/test_spark_engine.py:test_spark_get_schema"
14
+ ],
15
+ "design_decisions": [
16
+ "Extracted _register_view in engines for reusability.",
17
+ "Added get_schema method to engines returning a normalized pd.DataFrame.",
18
+ "Integrated schema HUD into Streamlit sidebar with automatic refreshing.",
19
+ "Used dict-based DataFrame initialization to satisfy strict 'ty' type checks."
20
+ ],
21
+ "results": "Successfully implemented and verified Schema HUD for both DuckDB and Spark engines. All tests and type checks pass."
22
+ }
@@ -0,0 +1,55 @@
1
+ # Plan: Code Review Fixes Implementation
2
+
3
+ ## Problem Definition
4
+ The code review identified several critical and medium-priority issues:
5
+ 1. SQL Injection vulnerability in `DuckDBEngine`.
6
+ 2. Non-functional query cancellation in the Streamlit UI.
7
+ 3. Performance bottleneck in `SparkEngine` due to redundant actions.
8
+ 4. Data loss risk in `HistoryManager` due to non-atomic writes.
9
+ 5. Inconsistent translation panel state in the UI.
10
+
11
+ ## Architecture Overview
12
+ - **Infrastructure:** Update `HistoryManager` to use atomic filesystem operations.
13
+ - **Core Logic:** Parametrize SQL in `DuckDBEngine` and optimize `SparkEngine` preview logic.
14
+ - **UI:** Integrate `ThreadPoolExecutor` for background execution and fix state tracking for translations.
15
+
16
+ ## Dependency Requirements
17
+ - `concurrent.futures.ThreadPoolExecutor` (stdlib)
18
+ - `tempfile` (stdlib)
19
+ - `pathlib` (stdlib)
20
+
21
+ ## Git Strategy
22
+ - Branch: `feat/code-review-fixes`
23
+ - Commit Frequency: Atomic commit per task.
24
+ - Protocol: `run.sh uv run pytest` and `run.sh ruff` before each commit.
25
+
26
+ ## Phased Approach
27
+
28
+ ### Phase 1: DuckDB SQL Injection Fix
29
+ - **Task 1.1:** Create `tests/test_duckdb_sql_injection.py` reproducing the issue.
30
+ - **Task 1.2:** Implement parametrized query in `src/wherewolf/execution/duckdb_engine.py`.
31
+ - **Task 1.3:** Verify with tests.
32
+
33
+ ### Phase 2: History Atomic Writes
34
+ - **Task 2.1:** Create `tests/test_history_atomicity.py`.
35
+ - **Task 2.2:** Update `src/wherewolf/storage/history.py` to use `tempfile` and `os.replace`.
36
+ - **Task 2.3:** Verify with tests.
37
+
38
+ ### Phase 3: Spark Engine Optimization
39
+ - **Task 3.1:** Create `tests/test_spark_engine_optimization.py`.
40
+ - **Task 3.2:** Optimize `src/wherewolf/execution/spark_engine.py`.
41
+ - **Task 3.3:** Verify with tests and benchmark.
42
+
43
+ ### Phase 4: UI Cancellation
44
+ - **Task 4.1:** Create `tests/test_app_cancel.py` (using mocks).
45
+ - **Task 4.2:** Refactor `src/wherewolf/app.py` to use `ThreadPoolExecutor`.
46
+ - **Task 4.3:** Verify with tests.
47
+
48
+ ### Phase 5: UI Translation State
49
+ - **Task 5.1:** Create `tests/test_app_translation_state.py`.
50
+ - **Task 5.2:** Update `src/wherewolf/app.py` to track executed query state.
51
+ - **Task 5.3:** Verify with tests.
52
+
53
+ ### Phase 6: Final Validation
54
+ - **Task 6.1:** Run full test suite.
55
+ - **Task 6.2:** Run Principal Engineer Code Review.
@@ -0,0 +1,37 @@
1
+ # Plan: Excel File Support
2
+
3
+ ## Problem Definition
4
+ Users want to query Excel files (`.xlsx`, `.xls`) directly within Wherewolf, similar to how they query CSV, Parquet, and JSON files.
5
+
6
+ ## Architecture Overview
7
+ - **DuckDBEngine:** Utilize the DuckDB `excel` extension. This requires running `INSTALL excel; LOAD excel;` and using `excel_scan(?)`.
8
+ - **SparkEngine:** Since native Spark Excel support requires external JARs (e.g., `spark-excel`), and Wherewolf runs in a local environment, we will use `pandas` to read the Excel file and then convert it to a Spark DataFrame as a fallback for this engine.
9
+ - **FileBrowser:** Update the allowed extensions to include `.xlsx` and `.xls`.
10
+
11
+ ## Core Data Structures
12
+ No changes to `QueryResult`.
13
+
14
+ ## Public Interfaces
15
+ - No API changes.
16
+ - UI: `FileBrowser.render_explorer` will now permit loading of Excel files.
17
+
18
+ ## Dependency Requirements
19
+ - `openpyxl` (already present for export)
20
+ - `duckdb` (already present, requires extension install at runtime)
21
+ - `pandas` (already present)
22
+
23
+ ## Testing Strategy
24
+ - Create `tests/test_excel_support.py`.
25
+ - Verify DuckDB can read a sample `.xlsx`.
26
+ - Verify Spark can read a sample `.xlsx`.
27
+ - Verify UI logic allows the extensions.
28
+
29
+ ## Task Decomposition
30
+ - **Task 1: DuckDB Excel Logic**
31
+ - Implement extension loading and `excel_scan` in `src/wherewolf/execution/duckdb_engine.py`.
32
+ - **Task 2: Spark Excel Logic**
33
+ - Implement pandas-based bridge for Excel in `src/wherewolf/execution/spark_engine.py`.
34
+ - **Task 3: UI Extension Update**
35
+ - Update `src/wherewolf/ui/file_browser.py` to include `.xlsx` and `.xls`.
36
+ - **Task 4: Verification**
37
+ - Add and run tests.
@@ -0,0 +1,72 @@
1
+ # Plan: Schema & Metadata HUD
2
+
3
+ ## Problem Definition
4
+ Users currently have no visual way to see the column names and data types of a loaded dataset without running a manual query like `SELECT * FROM dataset LIMIT 1`. This leads to friction when writing queries, especially for datasets with many or complex column names.
5
+
6
+ ## Architecture Overview
7
+ The Schema HUD will be integrated into the existing engine-based architecture:
8
+ 1. **Engines (`DuckDBEngine`, `SparkEngine`)**: Will be extended with a `get_schema(path: str)` method that returns a summary of the dataset's structure.
9
+ 2. **UI (`app.py`)**: Will trigger a schema fetch when a new file is loaded and display the results in a "Schema Preview" section in the sidebar.
10
+ 3. **State Management**: The schema will be cached in `st.session_state` to avoid redundant engine calls.
11
+
12
+ ## Core Data Structures
13
+ The schema will be represented as a `pandas.DataFrame` with the following columns:
14
+ - `Column`: The name of the field.
15
+ - `Type`: The detected data type (e.g., `VARCHAR`, `INTEGER`, `struct`).
16
+
17
+ ## Public Interfaces
18
+ - `DuckDBEngine.get_schema(path: str) -> pd.DataFrame`: Uses `DESCRIBE dataset` or native relation metadata.
19
+ - `SparkEngine.get_schema(path: str) -> pd.DataFrame`: Uses `df.schema` or `DESCRIBE dataset`.
20
+
21
+ ## Dependency Requirements
22
+ - Existing dependencies (`duckdb`, `pyspark`, `pandas`, `streamlit`) are sufficient.
23
+
24
+ ## Implementation Plan
25
+
26
+ ### Phase 1: Engine Enhancements
27
+ - [ ] **Refactor `DuckDBEngine`**:
28
+ - Extract file-to-view registration logic into a private `_register_view(path)` method.
29
+ - Implement `get_schema(path)`: registers the view and runs `DESCRIBE dataset`.
30
+ - [ ] **Refactor `SparkEngine`**:
31
+ - Extract file-to-view registration logic into a private `_register_view(path)` method.
32
+ - Implement `get_schema(path)`: registers the view and runs `DESCRIBE dataset`.
33
+
34
+ ### Phase 2: UI Integration
35
+ - [ ] **Update `app.py` Session State**:
36
+ - Initialize `st.session_state.schema = None`.
37
+ - [ ] **Update Path Processing**:
38
+ - When `pending_path` is detected, trigger `get_schema` using the currently selected engine.
39
+ - Store the result in `st.session_state.schema`.
40
+ - [ ] **Add Sidebar HUD**:
41
+ - Add an `st.expander("📊 Schema Preview")` in the sidebar below the "Active Path" info.
42
+ - Display the schema DataFrame if available.
43
+
44
+ ### Phase 3: Robustness & Polishing
45
+ - [ ] Handle edge cases (e.g., empty files, unsupported formats) gracefully within `get_schema`.
46
+ - [ ] Ensure `get_schema` is non-blocking or fast enough for the UI (metadata-only operations are typically very fast).
47
+
48
+ ## Testing Strategy
49
+ - **Unit Tests**:
50
+ - `tests/test_duckdb_engine.py`: Add `test_get_schema` verifying column names/types for CSV and Parquet.
51
+ - `tests/test_spark_engine.py`: Add `test_get_schema` verifying column names/types for CSV and Parquet.
52
+ - **Integration Tests**:
53
+ - Verify that switching engines refreshes the schema HUD correctly.
54
+ - Verify that the HUD persists across query executions.
55
+
56
+ ## Git & Workflow
57
+ - **Feature Branch**: Create a new branch `feat/schema-hud`.
58
+ - **Commits**: Use imperative style (e.g., "Add get_schema to DuckDBEngine").
59
+ - **Finalization**: Merge to `main` (if requested) or leave for review.
60
+
61
+ ## Verification
62
+ - [ ] `uv run pytest` passes.
63
+ - [ ] `ruff check . --fix` and `ruff format .` pass.
64
+ - [ ] `ty check .` (or `uv run ty`) passes if applicable.
65
+ - [ ] Manual verification: Load a Parquet file and confirm columns appear in the sidebar.
66
+
67
+ ## Definition of Done
68
+ - [ ] Tests pass.
69
+ - [ ] Linter/Formatter pass.
70
+ - [ ] `ty` check passes.
71
+ - [ ] Session log recorded in `docs/agent_conversations/`.
72
+ - [ ] README updated if necessary (not needed for this internal UI feature).
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wherewolf"
3
- version = "0.2.2"
3
+ version = "0.3.0"
4
4
  requires-python = ">=3.11"
5
5
  dependencies = [
6
6
  "duckdb>=1.5.0",
@@ -7,6 +7,13 @@ from wherewolf.storage import HistoryManager
7
7
  from wherewolf.export import Exporter
8
8
  from wherewolf.ui import FileBrowser
9
9
  from streamlit_ace import st_ace
10
+ import importlib.metadata
11
+
12
+ # Get version from metadata
13
+ try:
14
+ __version__ = importlib.metadata.version("wherewolf")
15
+ except importlib.metadata.PackageNotFoundError:
16
+ __version__ = "0.3.0" # Fallback for dev runs
10
17
 
11
18
  # --- Configuration ---
12
19
  st.set_page_config(
@@ -24,11 +31,43 @@ hide_st_style = """
24
31
  footer {visibility: hidden;}
25
32
  /* Hide the Deploy button specifically */
26
33
  .stAppDeployButton {display: none;}
27
-
34
+
28
35
  /* Darken the sidebar */
29
36
  [data-testid="stSidebar"] {
30
37
  background-color: #000000;
31
38
  }
39
+
40
+ /* Add back some top padding for main content */
41
+ .main .block-container, .block-container {
42
+ padding-top: 4rem !important;
43
+ margin-top: 0rem !important;
44
+ }
45
+
46
+ /* Aggressively remove top padding for sidebar */
47
+ [data-testid="stSidebar"] section {
48
+ padding-top: 0rem !important;
49
+ }
50
+ [data-testid="stSidebar"] [data-testid="stVerticalBlock"] {
51
+ padding-top: 0rem !important;
52
+ }
53
+ /* Specific fix for sidebar header whitespace */
54
+ [data-testid="stSidebarHeader"], .st-emotion-cache-10p9htt {
55
+ height: 3rem !important;
56
+ min-height: 3rem !important;
57
+ margin-bottom: 0rem !important;
58
+ padding-top: 0rem !important;
59
+ }
60
+
61
+ /* Make primary buttons green */
62
+ button[kind="primary"] {
63
+ background-color: #28a745 !important;
64
+ border-color: #28a745 !important;
65
+ color: white !important;
66
+ }
67
+ button[kind="primary"]:hover {
68
+ background-color: #218838 !important;
69
+ border-color: #1e7e34 !important;
70
+ }
32
71
  </style>
33
72
  """
34
73
  st.markdown(hide_st_style, unsafe_allow_html=True)
@@ -64,6 +103,12 @@ if "active_engine" not in st.session_state:
64
103
  st.session_state.active_engine = None
65
104
  if "query_future" not in st.session_state:
66
105
  st.session_state.query_future = None
106
+ if "schema" not in st.session_state:
107
+ st.session_state.schema = None
108
+ if "last_schema_path" not in st.session_state:
109
+ st.session_state.last_schema_path = ""
110
+ if "last_schema_engine" not in st.session_state:
111
+ st.session_state.last_schema_engine = ""
67
112
 
68
113
  # --- Early State Update Pattern ---
69
114
  # This avoids StreamlitAPIException by updating state BEFORE widgets are instantiated.
@@ -91,9 +136,12 @@ with st.sidebar:
91
136
 
92
137
  st.markdown(
93
138
  f"""
94
- <div style="display: flex; align-items: center; gap: 12px; margin-bottom: 20px;">
139
+ <div style="display: flex; align-items: center; gap: 12px; margin-bottom: 20px; position: relative;">
95
140
  <img src="data:image/png;base64,{logo_b64}" width="60">
96
- <h1 style="margin: 0; white-space: nowrap; font-size: 2.2rem;">Wherewolf</h1>
141
+ <div>
142
+ <h1 style="margin: 0; white-space: nowrap; font-size: 2.2rem;">Wherewolf</h1>
143
+ <p style="margin: 0; font-size: 0.8rem; color: #666; position: absolute; bottom: -12px; left: 72px;">v{__version__}</p>
144
+ </div>
97
145
  </div>
98
146
  """,
99
147
  unsafe_allow_html=True,
@@ -117,6 +165,36 @@ with st.sidebar:
117
165
 
118
166
  engine_name = st.selectbox("Execution Engine", ["DuckDB", "Spark"])
119
167
 
168
+ # --- Schema HUD Logic ---
169
+ if st.session_state.path_input:
170
+ # Refresh schema if path or engine changed
171
+ if (
172
+ st.session_state.path_input != st.session_state.last_schema_path
173
+ or engine_name != st.session_state.last_schema_engine
174
+ ):
175
+ try:
176
+ if engine_name == "DuckDB":
177
+ temp_engine = DuckDBEngine()
178
+ else:
179
+ temp_engine = SparkEngine()
180
+ st.session_state.schema = temp_engine.get_schema(st.session_state.path_input)
181
+ st.session_state.last_schema_path = st.session_state.path_input
182
+ st.session_state.last_schema_engine = engine_name
183
+ except Exception as e:
184
+ st.session_state.schema = None
185
+ st.sidebar.error(f"Failed to fetch schema: {e}")
186
+
187
+ if st.session_state.schema is not None and not st.session_state.schema.empty:
188
+ with st.expander("📊 Schema Preview", expanded=True):
189
+ st.dataframe(
190
+ st.session_state.schema,
191
+ hide_index=True,
192
+ width="stretch",
193
+ height=200,
194
+ )
195
+ elif st.session_state.schema is not None:
196
+ st.caption("No columns detected.")
197
+
120
198
  # Auto-align input dialect if engine changes
121
199
  if st.session_state.last_engine_name != engine_name:
122
200
  st.session_state.input_dialect_ui = engine_name
@@ -178,61 +256,36 @@ with st.sidebar:
178
256
  index=themes.index("dracula"),
179
257
  )
180
258
 
181
- # --- Main Area ---
182
- col_h1, col_h2 = st.columns([0.7, 0.3])
183
- with col_h1:
184
- st.header("SQL Editor")
185
- with col_h2:
186
- input_dialect_ui = st.selectbox(
187
- "Input Dialect", options=["DuckDB", "Spark", "Azure SQL"], key="input_dialect_ui"
188
- )
189
-
190
- # --- Autorefresh while running ---
191
- if st.session_state.is_running and "PYTEST_CURRENT_TEST" not in os.environ:
192
- import time
193
-
194
- time.sleep(0.1)
195
- st.rerun()
196
-
197
- # Use st_ace for syntax highlighting
198
- query_text = st_ace(
199
- value=st.session_state.selected_query,
200
- language="sql",
201
- theme=ace_theme,
202
- height=300,
203
- key="sql_editor",
204
- auto_update=True,
205
- )
259
+ # Use a container-like column to force alignment of editor and buttons
260
+ main_col, _ = st.columns([0.99, 0.01])
261
+ with main_col:
262
+ # Dialect selector right-aligned within the main column
263
+ _, col_h2 = st.columns([0.7, 0.3])
264
+ with col_h2:
265
+ input_dialect_ui = st.selectbox(
266
+ "Input Dialect", options=["DuckDB", "Spark", "Azure SQL"], key="input_dialect_ui"
267
+ )
206
268
 
207
- col1, col2 = st.columns([0.1, 0.9])
208
- with col1:
209
- run_button = st.button(
210
- "🚀 Run",
211
- type="primary",
212
- disabled=st.session_state.is_running or not st.session_state.path_input,
269
+ # Use st_ace for syntax highlighting
270
+ query_text = st_ace(
271
+ value=st.session_state.selected_query,
272
+ language="sql",
273
+ theme=ace_theme,
274
+ height=300,
275
+ key="sql_editor",
276
+ auto_update=True,
213
277
  )
214
- with col2:
215
- cancel_button = st.button("🛑 Cancel", disabled=not st.session_state.is_running)
216
278
 
217
- # --- Execution Logic ---
218
- # Handle completion of background query
219
- if st.session_state.query_future and st.session_state.query_future.done():
220
- try:
221
- result = st.session_state.query_future.result()
222
- st.session_state.query_result = result
223
- if result.success:
224
- history_manager.add_entry(
225
- st.session_state.last_engine_name.lower(),
226
- st.session_state.executed_query,
227
- st.session_state.path_input,
228
- )
229
- except Exception as e:
230
- st.session_state.query_result = QueryResult(success=False, error_message=str(e))
231
-
232
- st.session_state.is_running = False
233
- st.session_state.query_future = None
234
- st.session_state.active_engine = None
235
- st.rerun()
279
+ # Button row inside the same alignment context
280
+ col_b1, col_b2, col_b3 = st.columns([0.12, 0.12, 0.76])
281
+ with col_b1:
282
+ run_button = st.button(
283
+ "Run",
284
+ type="primary",
285
+ disabled=st.session_state.is_running or not st.session_state.path_input,
286
+ )
287
+ with col_b2:
288
+ cancel_button = st.button("Cancel", disabled=not st.session_state.is_running)
236
289
 
237
290
  if run_button and st.session_state.path_input:
238
291
  if engine_name == "DuckDB":
@@ -287,6 +340,26 @@ if cancel_button and st.session_state.active_engine:
287
340
  st.warning("Query cancelled.")
288
341
  st.rerun()
289
342
 
343
+ # --- Execution Logic ---
344
+ # Handle completion of background query
345
+ if st.session_state.query_future and st.session_state.query_future.done():
346
+ try:
347
+ result = st.session_state.query_future.result()
348
+ st.session_state.query_result = result
349
+ if result.success:
350
+ history_manager.add_entry(
351
+ st.session_state.last_engine_name.lower(),
352
+ st.session_state.executed_query,
353
+ st.session_state.path_input,
354
+ )
355
+ except Exception as e:
356
+ st.session_state.query_result = QueryResult(success=False, error_message=str(e))
357
+
358
+ st.session_state.is_running = False
359
+ st.session_state.query_future = None
360
+ st.session_state.active_engine = None
361
+ st.rerun()
362
+
290
363
  # --- Results Display ---
291
364
  if st.session_state.query_result:
292
365
  result: QueryResult = st.session_state.query_result
@@ -323,7 +396,7 @@ if st.session_state.query_result:
323
396
  from_dialect=executed_input_key,
324
397
  to_dialect=target_dialect,
325
398
  )
326
- with st.expander(f"Translated SQL ({selected_target_ui})", expanded=True):
399
+ with st.expander(f"Translated SQL ({selected_target_ui})", expanded=True):
327
400
  st.code(translated_sql, language="sql")
328
401
  except Exception as e:
329
402
  st.warning(f"Translation failed: {str(e)}")
@@ -374,3 +447,10 @@ if st.session_state.query_result:
374
447
 
375
448
  elif not st.session_state.path_input:
376
449
  st.info("👈 Please provide a dataset path in the sidebar to begin.")
450
+
451
+ # --- Autorefresh while running ---
452
+ if st.session_state.is_running and "PYTEST_CURRENT_TEST" not in os.environ:
453
+ import time
454
+
455
+ time.sleep(0.1)
456
+ st.rerun()
@@ -1,5 +1,6 @@
1
1
  import duckdb
2
2
  import time
3
+ import pandas as pd
3
4
  from .models import QueryResult
4
5
 
5
6
 
@@ -9,6 +10,43 @@ class DuckDBEngine:
9
10
  def __init__(self):
10
11
  self.con = duckdb.connect(database=":memory:")
11
12
 
13
+ def _register_view(self, path: str):
14
+ """Registers the dataset as a view named 'dataset'."""
15
+ from pathlib import Path
16
+
17
+ abs_path = Path(path).expanduser().resolve()
18
+ suffix = abs_path.suffix.lower()
19
+ if suffix == ".csv":
20
+ rel_source = self.con.from_csv_auto(str(abs_path))
21
+ elif suffix == ".parquet":
22
+ rel_source = self.con.from_parquet(str(abs_path))
23
+ elif suffix == ".json":
24
+ rel_source = self.con.sql("SELECT * FROM read_json_auto(?)", params=[str(abs_path)])
25
+ elif suffix in [".xlsx", ".xls"]:
26
+ self.con.execute("INSTALL excel; LOAD excel;")
27
+ rel_source = self.con.sql("SELECT * FROM read_xlsx(?)", params=[str(abs_path)])
28
+ else:
29
+ rel_source = self.con.from_csv_auto(str(abs_path))
30
+
31
+ rel_source.create_view("dataset", replace=True)
32
+
33
+ def get_schema(self, path: str) -> pd.DataFrame:
34
+ """Returns the schema of the dataset.
35
+
36
+ Returns:
37
+ A DataFrame with 'Column' and 'Type' columns.
38
+ """
39
+ try:
40
+ self._register_view(path)
41
+ # DESCRIBE dataset returns: column_name, column_type, null, key, default, extra
42
+ df = self.con.sql("DESCRIBE dataset").df()
43
+ # Normalize to Column/Type
44
+ return df[["column_name", "column_type"]].rename(
45
+ columns={"column_name": "Column", "column_type": "Type"}
46
+ )
47
+ except Exception:
48
+ return pd.DataFrame({"Column": [], "Type": []})
49
+
12
50
  def execute(self, query: str, path: str, limit: int = 1000) -> QueryResult:
13
51
  """Executes a SQL query against a local file using DuckDB.
14
52
 
@@ -20,31 +58,10 @@ class DuckDBEngine:
20
58
  Returns:
21
59
  A QueryResult object.
22
60
  """
23
- from pathlib import Path
24
-
25
- abs_path = Path(path).expanduser().resolve()
26
61
  start_time = time.time()
27
62
  try:
28
63
  # 1. Register the dataset view
29
- # DuckDB automatically detects CSV, Parquet, JSON based on extension or content
30
- # Using Relation API to safely handle paths with special characters
31
- suffix = abs_path.suffix.lower()
32
- if suffix == ".csv":
33
- rel_source = self.con.from_csv_auto(str(abs_path))
34
- elif suffix == ".parquet":
35
- rel_source = self.con.from_parquet(str(abs_path))
36
- elif suffix == ".json":
37
- # Use SQL with read_json_auto to avoid ty check warning about missing attribute
38
- rel_source = self.con.sql("SELECT * FROM read_json_auto(?)", params=[str(abs_path)])
39
- elif suffix in [".xlsx", ".xls"]:
40
- # Official DuckDB excel extension
41
- self.con.execute("INSTALL excel; LOAD excel;")
42
- rel_source = self.con.sql("SELECT * FROM read_xlsx(?)", params=[str(abs_path)])
43
- else:
44
- # Fallback to auto-detection
45
- rel_source = self.con.from_csv_auto(str(abs_path))
46
-
47
- rel_source.create_view("dataset", replace=True)
64
+ self._register_view(path)
48
65
 
49
66
  # 2. Execute the user query
50
67
  # We wrap the user query to handle limits for the preview
@@ -1,4 +1,5 @@
1
1
  import time
2
+ import pandas as pd
2
3
  from .models import QueryResult
3
4
 
4
5
  try:
@@ -23,42 +24,64 @@ class SparkEngine:
23
24
  self.spark = SparkSession.builder.appName("Wherewolf").master("local[*]").getOrCreate()
24
25
  return self.spark
25
26
 
27
+ def _register_view(self, path: str):
28
+ """Registers the dataset as a view named 'dataset'."""
29
+ import os
30
+
31
+ spark = self._get_session()
32
+ abs_path = os.path.abspath(path)
33
+
34
+ # Determine format by extension (basic detection)
35
+ if abs_path.endswith(".csv"):
36
+ df_spark = (
37
+ spark.read.option("header", "true").option("inferSchema", "true").csv(abs_path)
38
+ )
39
+ elif abs_path.endswith(".parquet"):
40
+ df_spark = spark.read.parquet(abs_path)
41
+ elif abs_path.endswith(".json"):
42
+ df_spark = spark.read.json(abs_path)
43
+ elif abs_path.endswith(".xlsx") or abs_path.endswith(".xls"):
44
+ # Use pandas as a bridge for Excel in local Spark
45
+ df_pd = pd.read_excel(abs_path)
46
+ df_spark = spark.createDataFrame(df_pd)
47
+ else:
48
+ raise ValueError(f"Unsupported file format for path: {abs_path}")
49
+
50
+ # 2. Register temp view
51
+ df_spark.createOrReplaceTempView("dataset")
52
+ return df_spark
53
+
54
+ def get_schema(self, path: str) -> pd.DataFrame:
55
+ """Returns the schema of the dataset.
56
+
57
+ Returns:
58
+ A DataFrame with 'Column' and 'Type' columns.
59
+ """
60
+ if not SPARK_AVAILABLE:
61
+ return pd.DataFrame({"Column": [], "Type": []})
62
+
63
+ try:
64
+ df_spark = self._register_view(path)
65
+ # Spark schema to pandas
66
+ schema_data = []
67
+ for field in df_spark.schema:
68
+ schema_data.append({"Column": field.name, "Type": field.dataType.simpleString()})
69
+ return pd.DataFrame(schema_data)
70
+ except Exception:
71
+ return pd.DataFrame({"Column": [], "Type": []})
72
+
26
73
  def execute(self, query: str, path: str, limit: int = 1000) -> QueryResult:
27
74
  if not SPARK_AVAILABLE:
28
75
  return QueryResult(success=False, error_message="PySpark not installed")
29
76
 
30
- import os
31
-
32
- abs_path = os.path.abspath(path)
33
77
  start_time = time.time()
34
78
  try:
35
79
  spark = self._get_session()
36
80
 
37
- # 1. Read the dataset
38
- # Determine format by extension (basic detection)
39
- if abs_path.endswith(".csv"):
40
- df_spark = (
41
- spark.read.option("header", "true").option("inferSchema", "true").csv(abs_path)
42
- )
43
- elif abs_path.endswith(".parquet"):
44
- df_spark = spark.read.parquet(abs_path)
45
- elif abs_path.endswith(".json"):
46
- df_spark = spark.read.json(abs_path)
47
- elif abs_path.endswith(".xlsx") or abs_path.endswith(".xls"):
48
- # Use pandas as a bridge for Excel in local Spark
49
- import pandas as pd
50
-
51
- df_pd = pd.read_excel(abs_path)
52
- df_spark = spark.createDataFrame(df_pd)
53
- else:
54
- # Default to automatic detection if supported,
55
- # but Spark is less automatic than DuckDB
56
- raise ValueError(f"Unsupported file format for path: {abs_path}")
57
-
58
- # 2. Register temp view
59
- df_spark.createOrReplaceTempView("dataset")
60
-
61
- # 3. Execute query
81
+ # 1. Register the dataset view
82
+ self._register_view(path)
83
+
84
+ # 2. Execute query
62
85
  res_spark = spark.sql(query)
63
86
 
64
87
  # 4. Fetch the preview + 1 extra row to see if there's more
@@ -93,7 +93,7 @@ class FileBrowser:
93
93
 
94
94
  if is_valid:
95
95
  st.success(f"📄 Ready to load: `{selected_file}`")
96
- if st.button("🚀 Load This File", use_container_width=True, type="primary"):
96
+ if st.button("Load This File", width="stretch", type="primary"):
97
97
  return full_path
98
98
  else:
99
99
  st.warning(f"⚠️ `{selected_file}` is not a supported data format.")
@@ -9,5 +9,4 @@ def test_app_initialization():
9
9
 
10
10
  # Assert basic UI elements exist
11
11
  assert any("Wherewolf" in m.value for m in at.sidebar.markdown)
12
- assert at.header[0].value == "SQL Editor"
13
12
  assert not at.exception
@@ -17,7 +17,7 @@ def test_app_cancel_logic_mocked():
17
17
  at.run()
18
18
 
19
19
  # Find Cancel button
20
- cancel_btn = next(b for b in at.button if b.label == "🛑 Cancel")
20
+ cancel_btn = next(b for b in at.button if b.label == "Cancel")
21
21
  assert not cancel_btn.disabled
22
22
 
23
23
  cancel_btn.click().run()
@@ -17,7 +17,7 @@ def test_app_query_execution_flow(tmp_path):
17
17
  at.run()
18
18
 
19
19
  # 3. Trigger 'Run' button
20
- run_btn = next(b for b in at.button if b.label == "🚀 Run")
20
+ run_btn = next(b for b in at.button if b.label == "Run")
21
21
  run_btn.click().run()
22
22
 
23
23
  # 3.5 Run again to process the completed future
@@ -34,6 +34,17 @@ def test_duckdb_engine_failure(csv_path):
34
34
  assert result.error_message != ""
35
35
 
36
36
 
37
+ def test_duckdb_get_schema(csv_path):
38
+ engine = DuckDBEngine()
39
+ schema_df = engine.get_schema(csv_path)
40
+
41
+ assert isinstance(schema_df, pd.DataFrame)
42
+ # DuckDB's DESCRIBE returns many columns, but our HUD should normalize to ["Column", "Type"]
43
+ assert list(schema_df.columns) == ["Column", "Type"]
44
+ assert "name" in schema_df["Column"].values
45
+ assert "value" in schema_df["Column"].values
46
+
47
+
37
48
  @pytest.mark.skip(reason="Spark requires complex setup for CI, focus on DuckDB first")
38
49
  def test_spark_engine_success(csv_path):
39
50
  engine = SparkEngine()
@@ -0,0 +1,27 @@
1
+ import pandas as pd
2
+ import pytest
3
+ from wherewolf.execution.spark_engine import SparkEngine, SPARK_AVAILABLE
4
+
5
+
6
+ @pytest.fixture
7
+ def csv_path(tmp_path):
8
+ path = tmp_path / "test.csv"
9
+ df = pd.DataFrame({"name": ["alice", "bob", "charlie"], "value": [100, 200, 300]})
10
+ df.to_csv(path, index=False)
11
+ return str(path)
12
+
13
+
14
+ @pytest.mark.skipif(not SPARK_AVAILABLE, reason="PySpark not installed")
15
+ def test_spark_get_schema(csv_path):
16
+ engine = SparkEngine()
17
+ schema_df = engine.get_schema(csv_path)
18
+
19
+ assert isinstance(schema_df, pd.DataFrame)
20
+ assert list(schema_df.columns) == ["Column", "Type"]
21
+ assert "name" in schema_df["Column"].values
22
+ assert "value" in schema_df["Column"].values
23
+
24
+
25
+ def test_spark_engine_init():
26
+ engine = SparkEngine()
27
+ assert engine is not None
@@ -2008,7 +2008,7 @@ wheels = [
2008
2008
 
2009
2009
  [[package]]
2010
2010
  name = "wherewolf"
2011
- version = "0.2.1"
2011
+ version = "0.2.2"
2012
2012
  source = { editable = "." }
2013
2013
  dependencies = [
2014
2014
  { name = "duckdb" },
@@ -1,6 +0,0 @@
1
- from wherewolf.execution.spark_engine import SparkEngine
2
-
3
-
4
- def test_spark_engine_init():
5
- engine = SparkEngine()
6
- assert engine is not None
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes