wherewolf 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wherewolf/__init__.py +0 -0
- wherewolf/app.py +305 -0
- wherewolf/execution/__init__.py +5 -0
- wherewolf/execution/duckdb_engine.py +56 -0
- wherewolf/execution/models.py +13 -0
- wherewolf/execution/spark_engine.py +78 -0
- wherewolf/export/__init__.py +3 -0
- wherewolf/export/exporter.py +26 -0
- wherewolf/storage/__init__.py +3 -0
- wherewolf/storage/history.py +63 -0
- wherewolf/translation/__init__.py +3 -0
- wherewolf/translation/translator.py +35 -0
- wherewolf/ui/__init__.py +3 -0
- wherewolf/ui/file_browser.py +101 -0
- wherewolf-0.1.0.dist-info/METADATA +14 -0
- wherewolf-0.1.0.dist-info/RECORD +18 -0
- wherewolf-0.1.0.dist-info/WHEEL +4 -0
- wherewolf-0.1.0.dist-info/licenses/LICENSE +21 -0
wherewolf/__init__.py
ADDED
|
File without changes
|
wherewolf/app.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import streamlit as st
|
|
2
|
+
from wherewolf.execution import DuckDBEngine, SparkEngine, QueryResult
|
|
3
|
+
from wherewolf.translation import Translator
|
|
4
|
+
from wherewolf.storage import HistoryManager
|
|
5
|
+
from wherewolf.export import Exporter
|
|
6
|
+
from wherewolf.ui import FileBrowser
|
|
7
|
+
from streamlit_ace import st_ace
|
|
8
|
+
|
|
9
|
+
# --- Configuration ---
|
|
10
|
+
st.set_page_config(
|
|
11
|
+
page_title="Wherewolf SQL Workbench",
|
|
12
|
+
layout="wide",
|
|
13
|
+
initial_sidebar_state="expanded",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# Hide Streamlit options (hamburger menu and footer)
|
|
17
|
+
hide_st_style = """
|
|
18
|
+
<style>
|
|
19
|
+
/* Hide the Streamlit main menu (hamburger) */
|
|
20
|
+
#MainMenu {visibility: hidden;}
|
|
21
|
+
/* Hide the "Made with Streamlit" footer */
|
|
22
|
+
footer {visibility: hidden;}
|
|
23
|
+
/* Hide the Deploy button specifically */
|
|
24
|
+
.stAppDeployButton {display: none;}
|
|
25
|
+
/*
|
|
26
|
+
We do NOT hide 'stToolbar' or 'header' entirely
|
|
27
|
+
because they contain the sidebar toggle button.
|
|
28
|
+
*/
|
|
29
|
+
</style>
|
|
30
|
+
"""
|
|
31
|
+
st.markdown(hide_st_style, unsafe_allow_html=True)
|
|
32
|
+
|
|
33
|
+
# --- Initialize Session State ---
|
|
34
|
+
if "path_input" not in st.session_state:
|
|
35
|
+
st.session_state.path_input = ""
|
|
36
|
+
if "query_result" not in st.session_state:
|
|
37
|
+
st.session_state.query_result = None
|
|
38
|
+
if "is_running" not in st.session_state:
|
|
39
|
+
st.session_state.is_running = False
|
|
40
|
+
if "history" not in st.session_state:
|
|
41
|
+
st.session_state.history = []
|
|
42
|
+
if "selected_query" not in st.session_state:
|
|
43
|
+
st.session_state.selected_query = "SELECT * FROM dataset LIMIT 10"
|
|
44
|
+
if "executed_input_dialect_key" not in st.session_state:
|
|
45
|
+
st.session_state.executed_input_dialect_key = "duckdb"
|
|
46
|
+
if "last_engine_name" not in st.session_state:
|
|
47
|
+
st.session_state.last_engine_name = "DuckDB"
|
|
48
|
+
if "input_dialect_ui" not in st.session_state:
|
|
49
|
+
st.session_state.input_dialect_ui = "DuckDB"
|
|
50
|
+
|
|
51
|
+
# --- Early State Update Pattern ---
|
|
52
|
+
# This avoids StreamlitAPIException by updating state BEFORE widgets are instantiated.
|
|
53
|
+
if "pending_path" in st.session_state:
|
|
54
|
+
st.session_state.path_input = st.session_state.pending_path
|
|
55
|
+
del st.session_state.pending_path
|
|
56
|
+
|
|
57
|
+
if "pending_query" in st.session_state:
|
|
58
|
+
st.session_state.selected_query = st.session_state.pending_query
|
|
59
|
+
del st.session_state.pending_query
|
|
60
|
+
|
|
61
|
+
# --- Instances ---
|
|
62
|
+
history_manager = HistoryManager()
|
|
63
|
+
translator = Translator()
|
|
64
|
+
|
|
65
|
+
# --- Sidebar ---
|
|
66
|
+
with st.sidebar:
|
|
67
|
+
st.title("🐺 Wherewolf")
|
|
68
|
+
|
|
69
|
+
# 1. BROWSE LOGIC
|
|
70
|
+
# The browser is now the primary path selection tool.
|
|
71
|
+
with st.expander("📁 Browse Local Files", expanded=True):
|
|
72
|
+
show_hidden = st.checkbox("Show Hidden Files", value=False)
|
|
73
|
+
selected_path = FileBrowser.render_explorer(show_hidden=show_hidden)
|
|
74
|
+
if selected_path:
|
|
75
|
+
# Set PENDING path and rerun
|
|
76
|
+
st.session_state.pending_path = selected_path
|
|
77
|
+
st.rerun()
|
|
78
|
+
|
|
79
|
+
# Display the active path clearly in the sidebar
|
|
80
|
+
if st.session_state.path_input:
|
|
81
|
+
st.info(f"📄 Active: `{st.session_state.path_input}`")
|
|
82
|
+
else:
|
|
83
|
+
st.warning("⚠️ No dataset loaded.")
|
|
84
|
+
|
|
85
|
+
engine_name = st.selectbox("Execution Engine", ["DuckDB", "Spark"])
|
|
86
|
+
|
|
87
|
+
# Auto-align input dialect if engine changes
|
|
88
|
+
if st.session_state.last_engine_name != engine_name:
|
|
89
|
+
st.session_state.input_dialect_ui = engine_name
|
|
90
|
+
st.session_state.last_engine_name = engine_name
|
|
91
|
+
|
|
92
|
+
preview_limit = st.slider("Preview Size", 10, 1000, 100)
|
|
93
|
+
export_format = st.selectbox("Export Format", ["CSV", "Excel", "Parquet"])
|
|
94
|
+
|
|
95
|
+
st.divider()
|
|
96
|
+
st.subheader("Query History")
|
|
97
|
+
history = history_manager.get_all()
|
|
98
|
+
if history:
|
|
99
|
+
history_labels = [f"{h['timestamp'][:16]} - {h['query'][:30]}..." for h in history]
|
|
100
|
+
selected_history = st.selectbox("Select from History", ["Select..."] + history_labels)
|
|
101
|
+
if selected_history != "Select...":
|
|
102
|
+
idx = history_labels.index(selected_history)
|
|
103
|
+
# Use PENDING state to avoid instantiation errors
|
|
104
|
+
st.session_state.pending_query = history[idx]["query"]
|
|
105
|
+
st.session_state.pending_path = history[idx]["path"]
|
|
106
|
+
st.rerun()
|
|
107
|
+
else:
|
|
108
|
+
st.write("No history yet.")
|
|
109
|
+
|
|
110
|
+
if st.button("Clear History"):
|
|
111
|
+
history_manager.clear()
|
|
112
|
+
st.rerun()
|
|
113
|
+
|
|
114
|
+
st.divider()
|
|
115
|
+
st.subheader("Editor Settings")
|
|
116
|
+
themes = sorted(
|
|
117
|
+
[
|
|
118
|
+
"tomorrow_night_eighties",
|
|
119
|
+
"monokai",
|
|
120
|
+
"twilight",
|
|
121
|
+
"ambiance",
|
|
122
|
+
"chaos",
|
|
123
|
+
"clouds_midnight",
|
|
124
|
+
"dracula",
|
|
125
|
+
"gob",
|
|
126
|
+
"solarized_dark",
|
|
127
|
+
"terminal",
|
|
128
|
+
"vibrant_ink",
|
|
129
|
+
"chrome",
|
|
130
|
+
"clouds",
|
|
131
|
+
"crimson_editor",
|
|
132
|
+
"dawn",
|
|
133
|
+
"dreamweaver",
|
|
134
|
+
"eclipse",
|
|
135
|
+
"github",
|
|
136
|
+
"solarized_light",
|
|
137
|
+
"textmate",
|
|
138
|
+
"tomorrow",
|
|
139
|
+
"xcode",
|
|
140
|
+
]
|
|
141
|
+
)
|
|
142
|
+
ace_theme = st.selectbox(
|
|
143
|
+
"Editor Theme",
|
|
144
|
+
themes,
|
|
145
|
+
index=themes.index("dracula"),
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# --- Main Area ---
|
|
149
|
+
col_h1, col_h2 = st.columns([0.7, 0.3])
|
|
150
|
+
with col_h1:
|
|
151
|
+
st.header("SQL Editor")
|
|
152
|
+
with col_h2:
|
|
153
|
+
input_dialect_ui = st.selectbox(
|
|
154
|
+
"Input Dialect", options=["DuckDB", "Spark", "Azure SQL"], key="input_dialect_ui"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Use st_ace for syntax highlighting
|
|
158
|
+
query_text = st_ace(
|
|
159
|
+
value=st.session_state.selected_query,
|
|
160
|
+
language="sql",
|
|
161
|
+
theme=ace_theme,
|
|
162
|
+
height=300,
|
|
163
|
+
key="sql_editor",
|
|
164
|
+
auto_update=True,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
col1, col2 = st.columns([0.1, 0.9])
|
|
168
|
+
with col1:
|
|
169
|
+
run_button = st.button("🚀 Run", type="primary", disabled=st.session_state.is_running)
|
|
170
|
+
with col2:
|
|
171
|
+
cancel_button = st.button("🛑 Cancel", disabled=not st.session_state.is_running)
|
|
172
|
+
|
|
173
|
+
# --- Execution Logic ---
|
|
174
|
+
if run_button and st.session_state.path_input:
|
|
175
|
+
st.session_state.is_running = True
|
|
176
|
+
st.session_state.query_result = None
|
|
177
|
+
|
|
178
|
+
if engine_name == "DuckDB":
|
|
179
|
+
engine = DuckDBEngine()
|
|
180
|
+
else:
|
|
181
|
+
engine = SparkEngine()
|
|
182
|
+
|
|
183
|
+
# Map dialects
|
|
184
|
+
dialect_mapping = {"DuckDB": "duckdb", "Spark": "spark", "Azure SQL": "tsql"}
|
|
185
|
+
input_dialect_key = dialect_mapping[input_dialect_ui]
|
|
186
|
+
engine_dialect_key = dialect_mapping[engine_name]
|
|
187
|
+
|
|
188
|
+
# Save the executed input dialect so the Translation section knows where to translate from
|
|
189
|
+
st.session_state.executed_input_dialect_key = input_dialect_key
|
|
190
|
+
|
|
191
|
+
query_to_run = query_text
|
|
192
|
+
translation_error = None
|
|
193
|
+
|
|
194
|
+
# Translate query if the input dialect is different from the execution engine
|
|
195
|
+
if input_dialect_key != engine_dialect_key:
|
|
196
|
+
try:
|
|
197
|
+
query_to_run = translator.translate(
|
|
198
|
+
query_text, from_dialect=input_dialect_key, to_dialect=engine_dialect_key
|
|
199
|
+
)
|
|
200
|
+
except Exception as e:
|
|
201
|
+
translation_error = str(e)
|
|
202
|
+
|
|
203
|
+
if translation_error:
|
|
204
|
+
st.session_state.query_result = QueryResult(
|
|
205
|
+
success=False,
|
|
206
|
+
error_message=f"Failed to translate input query from {input_dialect_ui} to {engine_name}:\n{translation_error}",
|
|
207
|
+
)
|
|
208
|
+
st.session_state.selected_query = query_text
|
|
209
|
+
else:
|
|
210
|
+
with st.spinner(f"Running query on {engine_name}..."):
|
|
211
|
+
result = engine.execute(query_to_run, st.session_state.path_input, limit=preview_limit)
|
|
212
|
+
st.session_state.query_result = result
|
|
213
|
+
st.session_state.selected_query = query_text
|
|
214
|
+
|
|
215
|
+
if result.success:
|
|
216
|
+
history_manager.add_entry(
|
|
217
|
+
engine_name.lower(), query_text, st.session_state.path_input
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
st.session_state.is_running = False
|
|
221
|
+
st.rerun()
|
|
222
|
+
|
|
223
|
+
# --- Results Display ---
|
|
224
|
+
if st.session_state.query_result:
|
|
225
|
+
result: QueryResult = st.session_state.query_result
|
|
226
|
+
|
|
227
|
+
if result.success:
|
|
228
|
+
# --- Translation Section ---
|
|
229
|
+
st.divider()
|
|
230
|
+
col_t1, col_t2 = st.columns([0.7, 0.3])
|
|
231
|
+
with col_t1:
|
|
232
|
+
st.subheader("SQL Translation")
|
|
233
|
+
with col_t2:
|
|
234
|
+
# All available dialects
|
|
235
|
+
all_dialects_map = {"DuckDB": "duckdb", "Spark": "spark", "Azure SQL": "tsql"}
|
|
236
|
+
|
|
237
|
+
# Map CURRENT UI selection to key for live translation logic
|
|
238
|
+
current_input_key = all_dialects_map[input_dialect_ui]
|
|
239
|
+
|
|
240
|
+
# Determine target options: everything except the CURRENT input dialect
|
|
241
|
+
target_options = [
|
|
242
|
+
ui_name for ui_name, key in all_dialects_map.items() if key != current_input_key
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
selected_target_ui = st.selectbox(
|
|
246
|
+
"Target Dialect", options=target_options, label_visibility="collapsed"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Map UI selection to SQLGlot dialect identifiers
|
|
250
|
+
target_dialect = all_dialects_map[selected_target_ui]
|
|
251
|
+
|
|
252
|
+
try:
|
|
253
|
+
# Translate from the CURRENTLY SELECTED input dialect, not just the last executed one
|
|
254
|
+
translated_sql = translator.translate(
|
|
255
|
+
query_text,
|
|
256
|
+
from_dialect=current_input_key,
|
|
257
|
+
to_dialect=target_dialect,
|
|
258
|
+
)
|
|
259
|
+
with st.expander(f"✨ Translated SQL ({selected_target_ui})", expanded=True):
|
|
260
|
+
st.code(translated_sql, language="sql")
|
|
261
|
+
except Exception as e:
|
|
262
|
+
st.warning(f"Translation failed: {str(e)}")
|
|
263
|
+
|
|
264
|
+
m1, m2 = st.columns(2)
|
|
265
|
+
m1.metric("Rows Returned", f"{result.row_count:,}")
|
|
266
|
+
m2.metric("Execution Time", f"{result.execution_time:.4f}s")
|
|
267
|
+
|
|
268
|
+
st.subheader("Preview")
|
|
269
|
+
st.dataframe(result.df, width="stretch")
|
|
270
|
+
|
|
271
|
+
# --- Export Section ---
|
|
272
|
+
st.divider()
|
|
273
|
+
st.subheader("Export Results")
|
|
274
|
+
|
|
275
|
+
export_label = f"Download as {export_format}"
|
|
276
|
+
if export_format == "CSV":
|
|
277
|
+
data = Exporter.to_csv(result.df)
|
|
278
|
+
mime = "text/csv"
|
|
279
|
+
ext = ".csv"
|
|
280
|
+
elif export_format == "Excel":
|
|
281
|
+
data = Exporter.to_excel(result.df)
|
|
282
|
+
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
283
|
+
ext = ".xlsx"
|
|
284
|
+
else:
|
|
285
|
+
data = Exporter.to_parquet(result.df)
|
|
286
|
+
mime = "application/octet-stream"
|
|
287
|
+
ext = ".parquet"
|
|
288
|
+
|
|
289
|
+
# Derive original filename for the export
|
|
290
|
+
import os
|
|
291
|
+
|
|
292
|
+
orig_filename = os.path.basename(st.session_state.path_input)
|
|
293
|
+
# Strip extension from original if present
|
|
294
|
+
base_name = os.path.splitext(orig_filename)[0] or "wherewolf"
|
|
295
|
+
download_name = f"{base_name}_export{ext}"
|
|
296
|
+
|
|
297
|
+
st.download_button(label=export_label, data=data, file_name=download_name, mime=mime)
|
|
298
|
+
|
|
299
|
+
else:
|
|
300
|
+
st.error("Query Failed")
|
|
301
|
+
with st.expander("Show Details"):
|
|
302
|
+
st.text(result.error_message)
|
|
303
|
+
|
|
304
|
+
elif not st.session_state.path_input:
|
|
305
|
+
st.info("👈 Please provide a dataset path in the sidebar to begin.")
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import duckdb
|
|
2
|
+
import time
|
|
3
|
+
from .models import QueryResult
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DuckDBEngine:
|
|
7
|
+
"""Execution engine using DuckDB."""
|
|
8
|
+
|
|
9
|
+
def __init__(self):
|
|
10
|
+
self.con = duckdb.connect(database=":memory:")
|
|
11
|
+
|
|
12
|
+
def execute(self, query: str, path: str, limit: int = 1000) -> QueryResult:
|
|
13
|
+
"""Executes a SQL query against a local file using DuckDB.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
query: The SQL query to execute.
|
|
17
|
+
path: The filesystem path to the data file.
|
|
18
|
+
limit: Maximum number of rows to return in the preview.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
A QueryResult object.
|
|
22
|
+
"""
|
|
23
|
+
import os
|
|
24
|
+
|
|
25
|
+
abs_path = os.path.abspath(path)
|
|
26
|
+
start_time = time.time()
|
|
27
|
+
try:
|
|
28
|
+
# 1. Register the dataset view
|
|
29
|
+
# DuckDB automatically detects CSV, Parquet, JSON based on extension or content
|
|
30
|
+
self.con.execute(f"CREATE OR REPLACE VIEW dataset AS SELECT * FROM '{abs_path}'")
|
|
31
|
+
|
|
32
|
+
# 2. Execute the user query
|
|
33
|
+
# We wrap the user query to handle limits for the preview
|
|
34
|
+
# Note: The user query must refer to 'dataset'
|
|
35
|
+
rel = self.con.sql(query)
|
|
36
|
+
|
|
37
|
+
# 3. Get total row count (might be expensive for large datasets,
|
|
38
|
+
# but usually okay for local DuckDB)
|
|
39
|
+
row_count = rel.count("*").fetchone()[0]
|
|
40
|
+
|
|
41
|
+
# 4. Fetch the preview DataFrame
|
|
42
|
+
df = rel.limit(limit).df()
|
|
43
|
+
|
|
44
|
+
execution_time = time.time() - start_time
|
|
45
|
+
return QueryResult(
|
|
46
|
+
df=df, execution_time=execution_time, row_count=row_count, success=True
|
|
47
|
+
)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
return QueryResult(
|
|
50
|
+
success=False, error_message=str(e), execution_time=time.time() - start_time
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def interrupt(self):
|
|
54
|
+
"""Interrupts the current query execution."""
|
|
55
|
+
if self.con:
|
|
56
|
+
self.con.interrupt()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class QueryResult:
|
|
7
|
+
"""Represents the results of a SQL query execution."""
|
|
8
|
+
|
|
9
|
+
df: pd.DataFrame = field(default_factory=pd.DataFrame)
|
|
10
|
+
execution_time: float = 0.0
|
|
11
|
+
row_count: int = 0
|
|
12
|
+
success: bool = True
|
|
13
|
+
error_message: str = ""
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from .models import QueryResult
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from pyspark.sql import SparkSession
|
|
6
|
+
|
|
7
|
+
SPARK_AVAILABLE = True
|
|
8
|
+
except ImportError:
|
|
9
|
+
SPARK_AVAILABLE = False
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SparkEngine:
|
|
13
|
+
"""Execution engine using PySpark."""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self.spark = None
|
|
17
|
+
if SPARK_AVAILABLE:
|
|
18
|
+
# Note: We'll lazily create the session or expect it in the execute
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
def _get_session(self):
|
|
22
|
+
if not self.spark:
|
|
23
|
+
self.spark = SparkSession.builder.appName("Wherewolf").master("local[*]").getOrCreate()
|
|
24
|
+
return self.spark
|
|
25
|
+
|
|
26
|
+
def execute(self, query: str, path: str, limit: int = 1000) -> QueryResult:
|
|
27
|
+
if not SPARK_AVAILABLE:
|
|
28
|
+
return QueryResult(success=False, error_message="PySpark not installed")
|
|
29
|
+
|
|
30
|
+
import os
|
|
31
|
+
|
|
32
|
+
abs_path = os.path.abspath(path)
|
|
33
|
+
start_time = time.time()
|
|
34
|
+
try:
|
|
35
|
+
spark = self._get_session()
|
|
36
|
+
|
|
37
|
+
# 1. Read the dataset
|
|
38
|
+
# Determine format by extension (basic detection)
|
|
39
|
+
if abs_path.endswith(".csv"):
|
|
40
|
+
df_spark = (
|
|
41
|
+
spark.read.option("header", "true").option("inferSchema", "true").csv(abs_path)
|
|
42
|
+
)
|
|
43
|
+
elif abs_path.endswith(".parquet"):
|
|
44
|
+
df_spark = spark.read.parquet(abs_path)
|
|
45
|
+
elif abs_path.endswith(".json"):
|
|
46
|
+
df_spark = spark.read.json(abs_path)
|
|
47
|
+
else:
|
|
48
|
+
# Default to automatic detection if supported,
|
|
49
|
+
# but Spark is less automatic than DuckDB
|
|
50
|
+
raise ValueError(f"Unsupported file format for path: {abs_path}")
|
|
51
|
+
|
|
52
|
+
# 2. Register temp view
|
|
53
|
+
df_spark.createOrReplaceTempView("dataset")
|
|
54
|
+
|
|
55
|
+
# 3. Execute query
|
|
56
|
+
res_spark = spark.sql(query)
|
|
57
|
+
|
|
58
|
+
# 4. Get count
|
|
59
|
+
row_count = res_spark.count()
|
|
60
|
+
|
|
61
|
+
# 5. Preview
|
|
62
|
+
# Using limit to avoid fetching everything
|
|
63
|
+
df_preview = res_spark.limit(limit).toPandas()
|
|
64
|
+
|
|
65
|
+
execution_time = time.time() - start_time
|
|
66
|
+
return QueryResult(
|
|
67
|
+
df=df_preview, execution_time=execution_time, row_count=row_count, success=True
|
|
68
|
+
)
|
|
69
|
+
except Exception as e:
|
|
70
|
+
return QueryResult(
|
|
71
|
+
success=False, error_message=str(e), execution_time=time.time() - start_time
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def interrupt(self):
|
|
75
|
+
"""Interrupts current Spark job."""
|
|
76
|
+
if self.spark:
|
|
77
|
+
# Spark context interrupt
|
|
78
|
+
self.spark.sparkContext.cancelAllJobs()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import io
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Exporter:
|
|
6
|
+
"""Utility to convert DataFrames to various byte formats for download."""
|
|
7
|
+
|
|
8
|
+
@staticmethod
|
|
9
|
+
def to_csv(df: pd.DataFrame) -> bytes:
|
|
10
|
+
"""Converts DataFrame to CSV bytes."""
|
|
11
|
+
return df.to_csv(index=False).encode("utf-8")
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def to_excel(df: pd.DataFrame) -> bytes:
|
|
15
|
+
"""Converts DataFrame to Excel bytes."""
|
|
16
|
+
output = io.BytesIO()
|
|
17
|
+
with pd.ExcelWriter(output, engine="openpyxl") as writer:
|
|
18
|
+
df.to_excel(writer, index=False)
|
|
19
|
+
return output.getvalue()
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def to_parquet(df: pd.DataFrame) -> bytes:
|
|
23
|
+
"""Converts DataFrame to Parquet bytes."""
|
|
24
|
+
output = io.BytesIO()
|
|
25
|
+
df.to_parquet(output, index=False)
|
|
26
|
+
return output.getvalue()
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Dict, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class HistoryManager:
|
|
8
|
+
"""Manages local query history persistence."""
|
|
9
|
+
|
|
10
|
+
DEFAULT_PATH = Path.home() / ".wherewolf" / "history.json"
|
|
11
|
+
|
|
12
|
+
def __init__(self, storage_path: Optional[Path] = None):
|
|
13
|
+
self.storage_path = storage_path or self.DEFAULT_PATH
|
|
14
|
+
self._ensure_storage()
|
|
15
|
+
|
|
16
|
+
def _ensure_storage(self):
|
|
17
|
+
"""Ensures the storage directory exists."""
|
|
18
|
+
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
if not self.storage_path.exists():
|
|
20
|
+
with open(self.storage_path, "w") as f:
|
|
21
|
+
json.dump([], f)
|
|
22
|
+
|
|
23
|
+
def add_entry(self, engine: str, query: str, path: str):
|
|
24
|
+
"""Adds a new query to the history.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
engine: The execution engine used (e.g., 'duckdb').
|
|
28
|
+
query: The SQL query string.
|
|
29
|
+
path: The dataset path used.
|
|
30
|
+
"""
|
|
31
|
+
history = self.get_all()
|
|
32
|
+
entry = {
|
|
33
|
+
"timestamp": datetime.now().isoformat(),
|
|
34
|
+
"engine": engine,
|
|
35
|
+
"query": query,
|
|
36
|
+
"path": path,
|
|
37
|
+
}
|
|
38
|
+
history.insert(0, entry) # Add to the beginning
|
|
39
|
+
|
|
40
|
+
# Limit history to 100 entries
|
|
41
|
+
history = history[:100]
|
|
42
|
+
|
|
43
|
+
with open(self.storage_path, "w") as f:
|
|
44
|
+
json.dump(history, f, indent=2)
|
|
45
|
+
|
|
46
|
+
def get_all(self) -> List[Dict]:
|
|
47
|
+
"""Returns all history entries.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
A list of history entry dictionaries.
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
if not self.storage_path.exists():
|
|
54
|
+
return []
|
|
55
|
+
with open(self.storage_path, "r") as f:
|
|
56
|
+
return json.load(f)
|
|
57
|
+
except (json.JSONDecodeError, IOError):
|
|
58
|
+
return []
|
|
59
|
+
|
|
60
|
+
def clear(self):
|
|
61
|
+
"""Clears the query history."""
|
|
62
|
+
with open(self.storage_path, "w") as f:
|
|
63
|
+
json.dump([], f)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import sqlglot
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Translator:
|
|
5
|
+
"""Handles SQL dialect translation between DuckDB and SparkSQL."""
|
|
6
|
+
|
|
7
|
+
VALID_DIALECTS = {"duckdb", "spark", "tsql"}
|
|
8
|
+
|
|
9
|
+
def translate(self, query: str, from_dialect: str, to_dialect: str) -> str:
|
|
10
|
+
"""Translates a SQL query from one dialect to another.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
query: The SQL query string.
|
|
14
|
+
from_dialect: The source dialect (e.g., 'duckdb').
|
|
15
|
+
to_dialect: The target dialect (e.g., 'spark').
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
The translated SQL query string.
|
|
19
|
+
|
|
20
|
+
Raises:
|
|
21
|
+
ValueError: If the dialect is not supported.
|
|
22
|
+
"""
|
|
23
|
+
if from_dialect not in self.VALID_DIALECTS:
|
|
24
|
+
raise ValueError(f"Unsupported source dialect: {from_dialect}")
|
|
25
|
+
if to_dialect not in self.VALID_DIALECTS:
|
|
26
|
+
raise ValueError(f"Unsupported target dialect: {to_dialect}")
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
# sqlglot.transpile returns a list of translated queries
|
|
30
|
+
translated = sqlglot.transpile(query, read=from_dialect, write=to_dialect, pretty=True)
|
|
31
|
+
return translated[0] if translated else ""
|
|
32
|
+
except Exception as e:
|
|
33
|
+
# In a real app, we might want to warn about imperfect translation
|
|
34
|
+
# for now, we'll re-raise or handle gracefully.
|
|
35
|
+
raise ValueError(f"Translation failed: {str(e)}") from e
|
wherewolf/ui/__init__.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import streamlit as st
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FileBrowser:
|
|
8
|
+
"""A highly resilient, selectbox-based file explorer."""
|
|
9
|
+
|
|
10
|
+
@staticmethod
|
|
11
|
+
def _update_dir(key: str):
|
|
12
|
+
"""Callback to update the current directory based on selection."""
|
|
13
|
+
# Use session_state directly to avoid stale variable issues
|
|
14
|
+
choice = st.session_state[key]
|
|
15
|
+
curr_dir_key = f"{key}_curr_dir"
|
|
16
|
+
|
|
17
|
+
curr_dir = st.session_state[curr_dir_key]
|
|
18
|
+
|
|
19
|
+
# Resolve the new path
|
|
20
|
+
if choice == "..":
|
|
21
|
+
new_path = os.path.dirname(curr_dir)
|
|
22
|
+
else:
|
|
23
|
+
new_path = os.path.join(curr_dir, choice)
|
|
24
|
+
|
|
25
|
+
new_path = os.path.normpath(new_path)
|
|
26
|
+
|
|
27
|
+
# Only update if the selection is actually a directory
|
|
28
|
+
if os.path.isdir(new_path):
|
|
29
|
+
st.session_state[curr_dir_key] = new_path
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def render_explorer(show_hidden: bool = False) -> Optional[str]:
|
|
33
|
+
"""Renders the selectbox-based file explorer.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
show_hidden: Whether to show files/folders starting with '.'.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
The selected file path if one was clicked, else None.
|
|
40
|
+
"""
|
|
41
|
+
key = "wherewolf_fs"
|
|
42
|
+
curr_dir_key = f"{key}_curr_dir"
|
|
43
|
+
files_key = f"{key}_files"
|
|
44
|
+
|
|
45
|
+
# Initialization
|
|
46
|
+
if curr_dir_key not in st.session_state:
|
|
47
|
+
base_path = str(Path.home())
|
|
48
|
+
st.session_state[curr_dir_key] = base_path
|
|
49
|
+
|
|
50
|
+
current_path = st.session_state[curr_dir_key]
|
|
51
|
+
|
|
52
|
+
# Re-build files list on every render to respect show_hidden toggle
|
|
53
|
+
try:
|
|
54
|
+
raw_items = sorted(os.listdir(current_path))
|
|
55
|
+
if not show_hidden:
|
|
56
|
+
raw_items = [f for f in raw_items if not f.startswith(".")]
|
|
57
|
+
|
|
58
|
+
# Remove '..' if we are at root
|
|
59
|
+
if current_path == os.path.abspath(os.sep):
|
|
60
|
+
files = ["Select file/folder..."] + raw_items
|
|
61
|
+
else:
|
|
62
|
+
files = ["Select file/folder...", ".."] + raw_items
|
|
63
|
+
|
|
64
|
+
st.session_state[files_key] = files
|
|
65
|
+
except Exception as e:
|
|
66
|
+
st.error(f"Error reading directory {current_path}: {e}")
|
|
67
|
+
st.session_state[files_key] = ["Select file/folder...", ".."]
|
|
68
|
+
|
|
69
|
+
# --- UI Navigation ---
|
|
70
|
+
st.write(f"📂 `{current_path}`")
|
|
71
|
+
|
|
72
|
+
selected_file = st.selectbox(
|
|
73
|
+
label="Select file or directory",
|
|
74
|
+
options=st.session_state[files_key],
|
|
75
|
+
key=key,
|
|
76
|
+
on_change=lambda: FileBrowser._update_dir(key),
|
|
77
|
+
help="Select a directory to enter it, or a file to load it.",
|
|
78
|
+
index=0, # Always reset to placeholder after a change triggers rerun
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if selected_file == "Select file/folder...":
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
full_path = os.path.normpath(os.path.join(current_path, selected_file))
|
|
85
|
+
|
|
86
|
+
# --- Contextual Actions ---
|
|
87
|
+
if os.path.isdir(full_path):
|
|
88
|
+
st.caption("📁 *Directory selected. Change selection to enter.*")
|
|
89
|
+
else:
|
|
90
|
+
# Display file info
|
|
91
|
+
valid_exts = {".csv", ".parquet", ".json"}
|
|
92
|
+
is_valid = Path(full_path).suffix.lower() in valid_exts
|
|
93
|
+
|
|
94
|
+
if is_valid:
|
|
95
|
+
st.success(f"📄 Ready to load: `{selected_file}`")
|
|
96
|
+
if st.button("🚀 Load This File", use_container_width=True, type="primary"):
|
|
97
|
+
return full_path
|
|
98
|
+
else:
|
|
99
|
+
st.warning(f"⚠️ `{selected_file}` is not a supported data format.")
|
|
100
|
+
|
|
101
|
+
return None
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: wherewolf
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
License-File: LICENSE
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: duckdb>=1.5.0
|
|
7
|
+
Requires-Dist: ibis-framework>=12.0.0
|
|
8
|
+
Requires-Dist: openpyxl>=3.1.5
|
|
9
|
+
Requires-Dist: pandas>=2.3.3
|
|
10
|
+
Requires-Dist: pyarrow>=23.0.1
|
|
11
|
+
Requires-Dist: pyspark>=4.1.1
|
|
12
|
+
Requires-Dist: sqlglot>=29.0.1
|
|
13
|
+
Requires-Dist: streamlit-ace>=0.1.1
|
|
14
|
+
Requires-Dist: streamlit>=1.55.0
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
wherewolf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
wherewolf/app.py,sha256=vAPn8FHkMMGZGzcngihIpqQxjreIvPjRYmbs7gWQMpk,10733
|
|
3
|
+
wherewolf/execution/__init__.py,sha256=3yjgaA0_y8UlkTCxCy5EIYYp9Qd-kBVxZtk1mSm4xQo,168
|
|
4
|
+
wherewolf/execution/duckdb_engine.py,sha256=0pe5sTj8HEZw46-VWMwKl4DJ7RhY8wDqTNw1u5iF_7Q,1898
|
|
5
|
+
wherewolf/execution/models.py,sha256=N1k9X7dcYc4_CHBUHve_whb3KAVpZUFYIdxMca_Nqe0,320
|
|
6
|
+
wherewolf/execution/spark_engine.py,sha256=9TQsG0MJiW93yHo0Ex3KJw0OUEv0vQYCLrDuRKRhmEc,2568
|
|
7
|
+
wherewolf/export/__init__.py,sha256=JTvltZqrSobKnqnKkmsG2L8qVRKza9Vwthjv4eBzXIM,55
|
|
8
|
+
wherewolf/export/exporter.py,sha256=966v3jpHq-r1PdQUPbXJ8H701OIOPwFifn46QpKeWy0,799
|
|
9
|
+
wherewolf/storage/__init__.py,sha256=BBK7GxkyH3AcfZuIXw2nrzrkNn1H4itArhuaRvy7sNw,66
|
|
10
|
+
wherewolf/storage/history.py,sha256=XE0EX-IwB7_I7F6uAVqkucTpJvUO02fgPEtw6zPD2dc,1921
|
|
11
|
+
wherewolf/translation/__init__.py,sha256=wOpBIn18rnlW9afDlX_eDJlYcEv4DBWq591H_dC6b-M,61
|
|
12
|
+
wherewolf/translation/translator.py,sha256=lJPTNa5HLqqi9YbMQSYCiY7jZ5N3PKurquWFompboyc,1357
|
|
13
|
+
wherewolf/ui/__init__.py,sha256=A7GBoutYnNExEPLuEh1XzInpLhGeJc3WeVDrfp6Rj5I,65
|
|
14
|
+
wherewolf/ui/file_browser.py,sha256=a6oS5ZIw3JzMIs9KuXORC0vIHRJ0bU4O09Jg5Y-N0eA,3548
|
|
15
|
+
wherewolf-0.1.0.dist-info/METADATA,sha256=CAX72dFM-wXefn7Q2ezBVfeI6P2BLhnLcBWkZ1JQa8U,387
|
|
16
|
+
wherewolf-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
17
|
+
wherewolf-0.1.0.dist-info/licenses/LICENSE,sha256=NFbGGRQm1k66gWimBacIxXUB0W1yMChaohH6rZLGtiw,1068
|
|
18
|
+
wherewolf-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 David Beall
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|