zaturn 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zaturn/__init__.py +14 -0
- zaturn/config.py +88 -0
- zaturn/core.py +101 -0
- zaturn/query_utils.py +80 -0
- zaturn/visualizations.py +155 -0
- zaturn-0.1.0.dist-info/METADATA +153 -0
- zaturn-0.1.0.dist-info/RECORD +11 -0
- zaturn-0.1.0.dist-info/WHEEL +5 -0
- zaturn-0.1.0.dist-info/entry_points.txt +2 -0
- zaturn-0.1.0.dist-info/licenses/LICENSE +21 -0
- zaturn-0.1.0.dist-info/top_level.txt +1 -0
zaturn/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
from fastmcp import FastMCP
|
2
|
+
from zaturn import core, visualizations
|
3
|
+
|
4
|
+
# Mount modules and make MCP
|
5
|
+
mcp = FastMCP("Zaturn MCP")
|
6
|
+
mcp.mount("core", core.mcp)
|
7
|
+
mcp.mount("visualizations", visualizations.mcp)
|
8
|
+
|
9
|
+
def main():
|
10
|
+
mcp.run()
|
11
|
+
|
12
|
+
|
13
|
+
if __name__=="__main__":
|
14
|
+
main()
|
zaturn/config.py
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
import argparse
|
2
|
+
import os
|
3
|
+
import platformdirs
|
4
|
+
import sys
|
5
|
+
|
6
|
+
# Basic Setup
|
7
|
+
USER_DATA_DIR = platformdirs.user_data_dir('zaturn', 'zaturn')
|
8
|
+
USER_CONFIG_DIR = platformdirs.user_config_dir('zaturn', 'zaturn')
|
9
|
+
QUERIES_DIR = os.path.join(USER_DATA_DIR, 'queries')
|
10
|
+
VISUALS_DIR = os.path.join(USER_DATA_DIR, 'visuals')
|
11
|
+
SOURCES_FILE = os.path.join(USER_CONFIG_DIR, 'sources.txt')
|
12
|
+
|
13
|
+
os.makedirs(USER_CONFIG_DIR, exist_ok=True)
|
14
|
+
os.makedirs(QUERIES_DIR, exist_ok=True)
|
15
|
+
os.makedirs(VISUALS_DIR, exist_ok=True)
|
16
|
+
|
17
|
+
# Parse command line args
|
18
|
+
parser = argparse.ArgumentParser(
|
19
|
+
description="Zaturn: A read-only BI tool for analyzing various data sources"
|
20
|
+
)
|
21
|
+
parser.add_argument('--noimg', action='store_const',
|
22
|
+
const=True, default=False,
|
23
|
+
help='Return image file paths instead of images for visuals. Use when MCP client cannot render images.',
|
24
|
+
)
|
25
|
+
parser.add_argument('sources', nargs=argparse.REMAINDER, default=[],
|
26
|
+
help='Data source (can be specified multiple times). Can be SQLite, MySQL, PostgreSQL connection string, or a path to CSV, Parquet, or DuckDB file.'
|
27
|
+
)
|
28
|
+
args = parser.parse_args()
|
29
|
+
|
30
|
+
# Read and parse sources
|
31
|
+
source_list = []
|
32
|
+
if os.path.exists(SOURCES_FILE):
|
33
|
+
with open(SOURCES_FILE) as f:
|
34
|
+
source_list = [line.strip('\n') for line in f.readlines() if line.strip('\n')]
|
35
|
+
|
36
|
+
if not source_list:
|
37
|
+
source_list = args.sources
|
38
|
+
|
39
|
+
if not source_list:
|
40
|
+
source_list = [os.path.join(os.getcwd(), 'example_data', 'all_pokemon_data.csv')]
|
41
|
+
print("No data sources provided. Loading example dataset for demonstration.")
|
42
|
+
print(f"\nTo load your datasets, add them to {SOURCES_FILE} (one source URL or full file path per line)")
|
43
|
+
print("\nOr use command line args to specify data sources:")
|
44
|
+
print("uv run --directory /path/to/zaturn mcp_server.py sqlite:///path/to/mydata.db")
|
45
|
+
print(f"\nNOTE: Sources in command line args will be ignored if sources are found in {SOURCES_FILE}")
|
46
|
+
|
47
|
+
SOURCES = {}
|
48
|
+
for s in source_list:
|
49
|
+
source = s.lower()
|
50
|
+
if source.startswith('sqlite://'):
|
51
|
+
source_type = 'sqlite'
|
52
|
+
source_name = source.split('/')[-1].split('?')[0].split('.db')[0]
|
53
|
+
elif source.startswith('postgresql://'):
|
54
|
+
source_type = 'postgresql'
|
55
|
+
source_name = source.split('/')[-1].split('?')[0]
|
56
|
+
elif source.startswith("mysql+pymysql://"):
|
57
|
+
source_type = 'mysql'
|
58
|
+
source_name = source.split('/')[-1].split('?')[0]
|
59
|
+
elif source.endswith(".duckdb"):
|
60
|
+
source_type = "duckdb"
|
61
|
+
source_name = source.split('/')[-1].split('.')[0]
|
62
|
+
elif source.endswith(".csv"):
|
63
|
+
source_type = "csv"
|
64
|
+
source_name = source.split('/')[-1].split('.')[0]
|
65
|
+
elif source.endswith(".parquet"):
|
66
|
+
source_type = "parquet"
|
67
|
+
source_name = source.split('/')[-1].split('.')[0]
|
68
|
+
else:
|
69
|
+
continue
|
70
|
+
|
71
|
+
source_id = f'{source_name}-{source_type}'
|
72
|
+
if source_id in SOURCES:
|
73
|
+
i = 2
|
74
|
+
while True:
|
75
|
+
source_id = f'{source_name}{i}-{source_type}'
|
76
|
+
if source_id not in SOURCES:
|
77
|
+
break
|
78
|
+
i += 1
|
79
|
+
|
80
|
+
SOURCES[source_id] = {'url': s, 'type': source_type}
|
81
|
+
|
82
|
+
|
83
|
+
# Other Settings
|
84
|
+
RETURN_IMAGES = not args.noimg
|
85
|
+
print(SOURCES)
|
86
|
+
|
87
|
+
|
88
|
+
|
zaturn/core.py
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
from fastmcp import FastMCP
|
2
|
+
import os
|
3
|
+
from typing import Any, List, Union
|
4
|
+
from zaturn import config, query_utils
|
5
|
+
|
6
|
+
mcp = FastMCP("Zaturn Core")
|
7
|
+
|
8
|
+
|
9
|
+
@mcp.tool()
|
10
|
+
def list_sources() -> str:
|
11
|
+
"""
|
12
|
+
List all available data sources.
|
13
|
+
Returns a list of unique source_ids to be used for other queries.
|
14
|
+
Source type is included in the source_id string.
|
15
|
+
While drafting SQL queries use appropriate syntax as per source type.
|
16
|
+
"""
|
17
|
+
try:
|
18
|
+
if not config.SOURCES:
|
19
|
+
return "No data sources available. Add sources using the command line parameters."
|
20
|
+
|
21
|
+
result = "Available data sources:\n\n"
|
22
|
+
for source in config.SOURCES:
|
23
|
+
result += f"- {source}\n"
|
24
|
+
|
25
|
+
return result
|
26
|
+
except Exception as e:
|
27
|
+
return str(e)
|
28
|
+
|
29
|
+
|
30
|
+
@mcp.tool()
|
31
|
+
def list_tables(source_id: str):
|
32
|
+
"""
|
33
|
+
Lists names of all tables/datasets in a given data source.
|
34
|
+
Use run_query with appropriate SQL query to determine table structure
|
35
|
+
|
36
|
+
Args:
|
37
|
+
source_id: The data source to list tables from
|
38
|
+
"""
|
39
|
+
try:
|
40
|
+
source = config.SOURCES.get(source_id)
|
41
|
+
if not source:
|
42
|
+
return f"Source {source_id} Not Found"
|
43
|
+
|
44
|
+
match source['type']:
|
45
|
+
case "sqlite":
|
46
|
+
result = query_utils.execute_query(source,
|
47
|
+
"SELECT name FROM sqlite_schema WHERE type ='table' AND name NOT LIKE 'sqlite_%';"
|
48
|
+
)
|
49
|
+
return result['name'].to_list()
|
50
|
+
|
51
|
+
case "postgresql":
|
52
|
+
result = query_utils.execute_query(source,
|
53
|
+
"SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema';"
|
54
|
+
)
|
55
|
+
return result['tablename'].to_list()
|
56
|
+
|
57
|
+
case "mysql":
|
58
|
+
result = query_utils.execute_query(source, "SHOW TABLES")
|
59
|
+
for col in list(result):
|
60
|
+
if col.startswith("Tables_in_"):
|
61
|
+
return result[col].to_list()
|
62
|
+
|
63
|
+
case "duckdb" | "csv" | "parquet":
|
64
|
+
result = query_utils.execute_query(source, "SHOW TABLES")
|
65
|
+
return result['name'].to_list()
|
66
|
+
|
67
|
+
except Exception as e:
|
68
|
+
return str(e)
|
69
|
+
|
70
|
+
@mcp.tool()
|
71
|
+
def run_query(source_id: str, query: str) -> str:
|
72
|
+
"""
|
73
|
+
Run query against specified source
|
74
|
+
For both csv and parquet sources, use DuckDB SQL syntax
|
75
|
+
Use 'CSV' as the table name for csv sources.
|
76
|
+
Use 'PARQUET' as the table name for parquet sources.
|
77
|
+
|
78
|
+
This will return a query_id, which can be referenced while calling other Zaturn tools.
|
79
|
+
Args:
|
80
|
+
source_id: The data source to run the query on
|
81
|
+
query: SQL query to run on the data source
|
82
|
+
"""
|
83
|
+
try:
|
84
|
+
source = config.SOURCES.get(source_id)
|
85
|
+
if not source:
|
86
|
+
return f"Source {source_id} Not Found"
|
87
|
+
|
88
|
+
df = query_utils.execute_query(source, query)
|
89
|
+
query_id = query_utils.save_query(df)
|
90
|
+
return query_id
|
91
|
+
except Exception as e:
|
92
|
+
return str(e)
|
93
|
+
|
94
|
+
|
95
|
+
@mcp.tool()
|
96
|
+
def show_query_result(query_id) -> str:
|
97
|
+
"""
|
98
|
+
Show stored result for query_id in markdown table format
|
99
|
+
"""
|
100
|
+
df = query_utils.load_query(query_id)
|
101
|
+
return df.to_markdown(index=False)
|
zaturn/query_utils.py
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
import duckdb
|
2
|
+
import numpy as np
|
3
|
+
import os
|
4
|
+
import pandas as pd
|
5
|
+
import sqlalchemy
|
6
|
+
from sqlalchemy.orm import Session
|
7
|
+
import time
|
8
|
+
from typing import List
|
9
|
+
from zaturn import config
|
10
|
+
|
11
|
+
|
12
|
+
def execute_query(source: dict, query: str):
|
13
|
+
"""Run the query using the appropriate engine and read only config"""
|
14
|
+
url = source['url']
|
15
|
+
|
16
|
+
match source['type']:
|
17
|
+
case "sqlite":
|
18
|
+
if "mode=ro" in url:
|
19
|
+
pass
|
20
|
+
elif '?' in url:
|
21
|
+
url += '&mode=ro'
|
22
|
+
else:
|
23
|
+
url += '?mode=ro'
|
24
|
+
with sqlalchemy.create_engine(url).connect() as conn:
|
25
|
+
result = conn.execute(sqlalchemy.text(query))
|
26
|
+
return pd.DataFrame(result)
|
27
|
+
|
28
|
+
case "mysql":
|
29
|
+
engine = sqlalchemy.create_engine(url)
|
30
|
+
with Session(engine) as session:
|
31
|
+
session.autoflush = False
|
32
|
+
session.autocommit = False
|
33
|
+
session.flush = lambda *args: None
|
34
|
+
result = session.execute(sqlalchemy.text(query))
|
35
|
+
return pd.DataFrame(result)
|
36
|
+
|
37
|
+
case "postgresql":
|
38
|
+
engine = sqlalchemy.create_engine(url)
|
39
|
+
with engine.connect() as conn:
|
40
|
+
conn = conn.execution_options(
|
41
|
+
isolation_level="SERIALIZABLE",
|
42
|
+
postgresql_readonly=True,
|
43
|
+
postgresql_deferrable=True,
|
44
|
+
)
|
45
|
+
with conn.begin():
|
46
|
+
result = conn.execute(sqlalchemy.text(query))
|
47
|
+
return pd.DataFrame(result)
|
48
|
+
|
49
|
+
case "duckdb":
|
50
|
+
conn = duckdb.connect(url, read_only=True)
|
51
|
+
return conn.execute(query).df()
|
52
|
+
|
53
|
+
case "csv":
|
54
|
+
conn = duckdb.connect(database=':memory:')
|
55
|
+
conn.execute(f"CREATE VIEW CSV AS SELECT * FROM read_csv('{url}')")
|
56
|
+
return conn.execute(query).df()
|
57
|
+
|
58
|
+
case "parquet":
|
59
|
+
conn = duckdb.connect(database=':memory:')
|
60
|
+
conn.execute(f"CREATE VIEW PARQUET AS SELECT * FROM read_parquet('{url}')")
|
61
|
+
return conn.execute(query).df()
|
62
|
+
|
63
|
+
case _:
|
64
|
+
raise Exception("Unsupported Source")
|
65
|
+
|
66
|
+
|
67
|
+
def save_query(df: pd.DataFrame):
|
68
|
+
"""Save query results to disk and return a unique reference id"""
|
69
|
+
query_id = 'q' + str(int(time.time()))
|
70
|
+
filepath = os.path.join(config.QUERIES_DIR, f'{query_id}.parquet')
|
71
|
+
df.replace({np.nan: None}).to_parquet(filepath, engine='pyarrow', index=False)
|
72
|
+
return query_id
|
73
|
+
|
74
|
+
|
75
|
+
def load_query(query_id: str):
|
76
|
+
"""Load query results from disk using unique reference id"""
|
77
|
+
filepath = os.path.join(config.QUERIES_DIR, f'{query_id}.parquet')
|
78
|
+
df = pd.read_parquet(filepath, engine='pyarrow').replace({np.nan: None})
|
79
|
+
df.reset_index(drop=True, inplace=True)
|
80
|
+
return df
|
zaturn/visualizations.py
ADDED
@@ -0,0 +1,155 @@
|
|
1
|
+
from fastmcp import FastMCP, Image
|
2
|
+
import matplotlib.pyplot as plt
|
3
|
+
import os
|
4
|
+
import seaborn as sns
|
5
|
+
import time
|
6
|
+
from typing import Any, Union, Optional
|
7
|
+
from zaturn import config, query_utils
|
8
|
+
|
9
|
+
sns.set_theme()
|
10
|
+
|
11
|
+
mcp = FastMCP("Zaturn Visualizations")
|
12
|
+
|
13
|
+
|
14
|
+
def _plot_to_image(plot) -> Union[str, Image]:
|
15
|
+
figure = plot.get_figure()
|
16
|
+
filepath = os.path.join(config.VISUALS_DIR, str(int(time.time())) + '.png')
|
17
|
+
figure.savefig(filepath)
|
18
|
+
plt.clf()
|
19
|
+
if config.RETURN_IMAGES:
|
20
|
+
return Image(path=filepath)
|
21
|
+
else:
|
22
|
+
return filepath
|
23
|
+
|
24
|
+
|
25
|
+
# Relationships
|
26
|
+
|
27
|
+
@mcp.tool()
|
28
|
+
def scatter_plot(
|
29
|
+
query_id: str,
|
30
|
+
x: str,
|
31
|
+
y: str,
|
32
|
+
hue: str = None
|
33
|
+
):
|
34
|
+
"""
|
35
|
+
Make a scatter plot with the dataframe obtained from running SQL Query against source
|
36
|
+
If this returns an image, display it. If it returns a file path, mention it.
|
37
|
+
Args:
|
38
|
+
query_id: Previously run query to use for plotting
|
39
|
+
x: Column name from SQL result to use for x-axis
|
40
|
+
y: Column name from SQL result to use for y-axis
|
41
|
+
hue: Optional String; Column name from SQL result to use for coloring the points
|
42
|
+
"""
|
43
|
+
df = query_utils.load_query(query_id)
|
44
|
+
plot = sns.scatterplot(df, x=x, y=y, hue=hue)
|
45
|
+
return _plot_to_image(plot)
|
46
|
+
|
47
|
+
|
48
|
+
@mcp.tool()
|
49
|
+
def line_plot(
|
50
|
+
query_id: str,
|
51
|
+
x: str,
|
52
|
+
y: str,
|
53
|
+
hue: str = None
|
54
|
+
):
|
55
|
+
"""
|
56
|
+
Make a line plot with the dataframe obtained from running SQL Query against source
|
57
|
+
Args:
|
58
|
+
query_id: Previously run query to use for plotting
|
59
|
+
x: Column name from SQL result to use for x-axis
|
60
|
+
y: Column name from SQL result to use for y-axis
|
61
|
+
hue: Optional; column name from SQL result to use for drawing multiple colored lines
|
62
|
+
"""
|
63
|
+
df = query_utils.load_query(query_id)
|
64
|
+
plot = sns.lineplot(df, x=x, y=y, hue=hue)
|
65
|
+
return _plot_to_image(plot)
|
66
|
+
|
67
|
+
|
68
|
+
# Distributions
|
69
|
+
|
70
|
+
@mcp.tool()
|
71
|
+
def histogram(
|
72
|
+
query_id: str,
|
73
|
+
column: str,
|
74
|
+
hue: str = None,
|
75
|
+
bins: int = None
|
76
|
+
):
|
77
|
+
"""
|
78
|
+
Make a histogram with a column of the dataframe obtained from running SQL Query against source
|
79
|
+
Args:
|
80
|
+
query_id: Previously run query to use for plotting
|
81
|
+
column: Column name from SQL result to use for the histogram
|
82
|
+
hue: Optional; column name from SQL result to use for drawing multiple colored histograms
|
83
|
+
bins: Optional; number of bins
|
84
|
+
"""
|
85
|
+
df = query_utils.load_query(query_id)
|
86
|
+
plot = sns.histplot(df, x=column, hue=hue, bins=bins)
|
87
|
+
return _plot_to_image(plot)
|
88
|
+
|
89
|
+
|
90
|
+
# Categorical
|
91
|
+
|
92
|
+
@mcp.tool()
|
93
|
+
def strip_plot(
|
94
|
+
query_id: str,
|
95
|
+
x: str,
|
96
|
+
y: str = None,
|
97
|
+
hue: str = None,
|
98
|
+
legend: bool = False
|
99
|
+
):
|
100
|
+
"""
|
101
|
+
Make a strip plot with the dataframe obtained from running SQL Query against source
|
102
|
+
Args:
|
103
|
+
query_id: Previously run query to use for plotting
|
104
|
+
x: Column name from SQL result to use for x axis
|
105
|
+
y: Optional; column name from SQL result to use for y axis
|
106
|
+
hue: Optional; column name from SQL result to use for coloring the points
|
107
|
+
legend: Whether to draw a legend for the hue
|
108
|
+
"""
|
109
|
+
df = query_utils.load_query(query_id)
|
110
|
+
plot = sns.stripplot(df, x=x, y=y, hue=hue, legend=legend)
|
111
|
+
return _plot_to_image(plot)
|
112
|
+
|
113
|
+
|
114
|
+
@mcp.tool()
|
115
|
+
def box_plot(
|
116
|
+
query_id: str,
|
117
|
+
x: str,
|
118
|
+
y: str = None,
|
119
|
+
hue: str = None
|
120
|
+
):
|
121
|
+
"""
|
122
|
+
Make a box plot with the dataframe obtained from running SQL Query against source
|
123
|
+
Args:
|
124
|
+
query_id: Previously run query to use for plotting
|
125
|
+
x: Column name from SQL result to use for x axis
|
126
|
+
y: Optional; column name from SQL result to use for y axis
|
127
|
+
hue: Optional column name from SQL result to use for coloring the points
|
128
|
+
"""
|
129
|
+
df = query_utils.load_query(query_id)
|
130
|
+
plot = sns.boxplot(df, x=x, y=y, hue=hue)
|
131
|
+
return _plot_to_image(plot)
|
132
|
+
|
133
|
+
|
134
|
+
@mcp.tool()
|
135
|
+
def bar_plot(
|
136
|
+
query_id: str,
|
137
|
+
x: str,
|
138
|
+
y: str = None,
|
139
|
+
hue: str = None,
|
140
|
+
orient: str = 'v'
|
141
|
+
):
|
142
|
+
"""
|
143
|
+
Make a bar plot with the dataframe obtained from running SQL Query against source
|
144
|
+
Args:
|
145
|
+
query_id: Previously run query to use for plotting
|
146
|
+
x: Column name from SQL result to use for x axis
|
147
|
+
y: Optional; column name from SQL result to use for y axis
|
148
|
+
hue: Optional column name from SQL result to use for coloring the bars
|
149
|
+
orient: Orientation of the box plot, use 'v' for vertical and 'h' for horizontal
|
150
|
+
"""
|
151
|
+
df = query_utils.load_query(query_id)
|
152
|
+
plot = sns.barplot(df, x=x, y=y, hue=hue, orient=orient)
|
153
|
+
return _plot_to_image(plot)
|
154
|
+
|
155
|
+
|
@@ -0,0 +1,153 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: zaturn
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: AI Data Analysis MCP
|
5
|
+
Author-email: Karthik Devan <krtdvn@gmail.com>
|
6
|
+
Maintainer-email: Karthik Devan <krtdvn@gmail.com>
|
7
|
+
Project-URL: Homepage, https://github.com/kdqed/zaturn
|
8
|
+
Project-URL: Changelog, https://github.com/kdqed/zaturn/releases
|
9
|
+
Project-URL: Issues, https://github.com/kdqed/zaturn/issues
|
10
|
+
Project-URL: CI, https://github.com/kdqed/zaturn/actions
|
11
|
+
Requires-Python: >=3.11
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
License-File: LICENSE
|
14
|
+
Requires-Dist: cryptography>=44.0.2
|
15
|
+
Requires-Dist: duckdb>=1.2.1
|
16
|
+
Requires-Dist: fastmcp>=0.4.1
|
17
|
+
Requires-Dist: pandas>=2.2.3
|
18
|
+
Requires-Dist: platformdirs>=4.3.7
|
19
|
+
Requires-Dist: psycopg2-binary>=2.9.10
|
20
|
+
Requires-Dist: pyarrow>=19.0.1
|
21
|
+
Requires-Dist: pymysql>=1.1.1
|
22
|
+
Requires-Dist: python-lsp-server>=1.12.2
|
23
|
+
Requires-Dist: seaborn>=0.13.2
|
24
|
+
Requires-Dist: sqlalchemy>=2.0.40
|
25
|
+
Requires-Dist: tabulate>=0.9.0
|
26
|
+
Dynamic: license-file
|
27
|
+
|
28
|
+
<p align="center">
|
29
|
+
<img src="brand/logo.png" width="128" height="128">
|
30
|
+
</p>
|
31
|
+
|
32
|
+
# Zaturn: Your Co-Pilot For Data Analytics & BI
|
33
|
+
|
34
|
+
https://github.com/user-attachments/assets/d42dc433-e5ec-4b3e-bef0-5cfc097396ab
|
35
|
+
|
36
|
+
Zaturn is an open source, AI-powered data analysis/BI tool that can connect to your data sources, run SQL queries on it, and give you useful insights. Think of it like vibe data analysis, in the spirit of vibe coding. Currently Zaturn is available as an MCP (Model Context Protocol) Server that can be integrated into your favorite MCP Client (Claude, Cursor, etc.) A full fledged notebook interface is on the roadmap.
|
37
|
+
|
38
|
+
## Features:
|
39
|
+
|
40
|
+
### Multiple Data Sources
|
41
|
+
Zaturn can currently connect to the following data sources:
|
42
|
+
- SQL Databases: PostgreSQL, SQLite, DuckDB, MySQL
|
43
|
+
- Files: CSV, Parquet
|
44
|
+
|
45
|
+
Connectors for more data sources are being added.
|
46
|
+
|
47
|
+
### Visualizations
|
48
|
+
In addition to providing tabular and textual summaries, Zaturn can also generate the following image visualizations
|
49
|
+
|
50
|
+
- Scatter and Line Plots
|
51
|
+
- Histograms
|
52
|
+
- Strip and Box Plots
|
53
|
+
- Bar Plots
|
54
|
+
|
55
|
+
> NOTE: The visuals will be shown only if your MCP client supports image rendering (e.g. Claude Desktop)
|
56
|
+
>
|
57
|
+
> If you MCP client does not support images (e.g. Cursor) add the `--noimg` argument in the MCP config. Then the plots will be stored as files and the file location will be returned. You can view the plots with your file browser.
|
58
|
+
|
59
|
+
More visualization capabilities are being added.
|
60
|
+
|
61
|
+
|
62
|
+
## How Does Zaturn Work?
|
63
|
+
|
64
|
+
The naivest way to have an LLM analyze your data is to upload a dataset with a prompt. But that won't get you far, because AI has context window limitations, and it can only go through a few thousand rows at the best. Also, LLM's are not great at doing math.
|
65
|
+
|
66
|
+
Using an MCP like Zaturn will keep your data where it is, and enable AI to draft and run SQL queries on the data. The LLM now processes only the queries and results instead of your entire dataset.
|
67
|
+
|
68
|
+
## Installation & Setup
|
69
|
+
1. Install [uv](https://docs.astral.sh/uv/getting-started/installation/#installation-methods)
|
70
|
+
|
71
|
+
2. Clone/Download this repository:
|
72
|
+
```bash
|
73
|
+
$ git clone https://github.com/kdqed/zaturn.git
|
74
|
+
```
|
75
|
+
|
76
|
+
3. Add to MCP config, with data sources:
|
77
|
+
```json
|
78
|
+
"mcpServers": {
|
79
|
+
"zaturn": {
|
80
|
+
"command": "uv",
|
81
|
+
"args": [
|
82
|
+
"run",
|
83
|
+
"--directory",
|
84
|
+
"/path/to/downloaded/folder",
|
85
|
+
"mcp_server.py",
|
86
|
+
"mysql+pymysql://username:password@host:3306/dbname",
|
87
|
+
"postgresql://username:password@host:port/dbname",
|
88
|
+
"sqlite:////full/path/to/sample_dbs/northwind.db",
|
89
|
+
"/full/path/to/sample_dbs/titanic.parquet",
|
90
|
+
"/full/path/to/sample_dbs/ny_aq.csv",
|
91
|
+
"/full/path/to/sample_dbs/duckdb_sample.duckdb"
|
92
|
+
]
|
93
|
+
},
|
94
|
+
}
|
95
|
+
```
|
96
|
+
|
97
|
+
If your MCP client does not support images, add the `--noimg` flag after `mcp_server.py`:
|
98
|
+
```json
|
99
|
+
...
|
100
|
+
"mcp_server.py",
|
101
|
+
"--noimg",
|
102
|
+
"mysql+pymysql://username:password@host:3306/dbname",
|
103
|
+
...
|
104
|
+
```
|
105
|
+
|
106
|
+
|
107
|
+
4. Set a system prompt if your LLM/IDE allows you to:
|
108
|
+
```
|
109
|
+
You are a helpful data analysis assistant. Use only the tool provided data sources to process user inputs. Do not use external sources or your own knowledge base.
|
110
|
+
```
|
111
|
+
|
112
|
+
5. Ask a question and watch the magic:
|
113
|
+
```
|
114
|
+
User: List the top 5 customers by revenue for Northwind
|
115
|
+
AI:
|
116
|
+
[04/08/25 15:16:47] INFO Processing request of type ListToolsRequest server.py:534
|
117
|
+
[04/08/25 15:16:51] INFO Processing request of type CallToolRequest server.py:534
|
118
|
+
[04/08/25 15:16:53] INFO Processing request of type CallToolRequest server.py:534
|
119
|
+
[04/08/25 15:16:55] INFO Processing request of type CallToolRequest server.py:534
|
120
|
+
The top 5 customers by revenue for Northwind are:
|
121
|
+
|
122
|
+
1. B's Beverages with a revenue of $6,154,115.34
|
123
|
+
2. Hungry Coyote Import Store** with a revenue of $5,698,023.67
|
124
|
+
3. Rancho grande with a revenue of $5,559,110.08
|
125
|
+
4. Gourmet Lanchonetes with a revenue of $5,552,597.90
|
126
|
+
5. Ana Trujillo Emparedados y helados with a revenue of $5,534,356.6
|
127
|
+
```
|
128
|
+
|
129
|
+
## Roadmap
|
130
|
+
|
131
|
+
- Support for more data source types
|
132
|
+
- More data visualizations
|
133
|
+
- Predictive analysis and forecasting, e.g.:
|
134
|
+
```
|
135
|
+
Based on the revenue of the last 3 months, forecast next month's revenue.
|
136
|
+
```
|
137
|
+
- Generate Presentations & PDFs
|
138
|
+
```
|
139
|
+
Manager:
|
140
|
+
I need a presentation to show the boss. Can you do it by EOD?
|
141
|
+
Analyst:
|
142
|
+
EOD?! Are you still in the 2010s?
|
143
|
+
I can get it done right now. Actually, you can do it right now.
|
144
|
+
You know what? The boss can do it right now.
|
145
|
+
```
|
146
|
+
- A native notebook interface
|
147
|
+
|
148
|
+
If you have any specific requirements please feel free to raise an issue.
|
149
|
+
|
150
|
+
|
151
|
+
## Example Dataset Credits
|
152
|
+
|
153
|
+
THe [pokemon dataset compiled by Sarah Taha and PokéAPI](https://www.kaggle.com/datasets/sarahtaha/1025-pokemon) has been included under the [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
|
@@ -0,0 +1,11 @@
|
|
1
|
+
zaturn/__init__.py,sha256=v4t5fkRuIJFE-SBxCa5pBjZv0EoC0eWK75nU9iaa7Rg,267
|
2
|
+
zaturn/config.py,sha256=t11_sJxTsliTGpImveqmgjXWTDeijK8VMqqTPygEY7E,3154
|
3
|
+
zaturn/core.py,sha256=sn2oliWVDqhYSAwSe5VbhNamaKAsnlrT3oc6uzffzs4,3267
|
4
|
+
zaturn/query_utils.py,sha256=zyQjcRnPKGHZdf0XHzQeMxHw9vieZIwXhBbVGP87ml4,2801
|
5
|
+
zaturn/visualizations.py,sha256=Dj08msgZ_DIMEVvJIlybT3cIA3GNkcXVOa9TJsHK2yo,4551
|
6
|
+
zaturn-0.1.0.dist-info/licenses/LICENSE,sha256=mZSuFlbEBZGl0-8ULRMLdRDbhau5hrWRNQOjytYeaug,1070
|
7
|
+
zaturn-0.1.0.dist-info/METADATA,sha256=_NI82YYMcGMc1Jz2OH6XmGwsoLUVpPB5zxvu0d1mCjM,5831
|
8
|
+
zaturn-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
9
|
+
zaturn-0.1.0.dist-info/entry_points.txt,sha256=N1UZC2zvod92_Brs4A2xZiAnt-iGLBNryglXfwhxfj4,43
|
10
|
+
zaturn-0.1.0.dist-info/top_level.txt,sha256=KLUnwQwVZkfd5YCnnqR35MOOs8KLhanPGelvmRo2MVA,7
|
11
|
+
zaturn-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Karthik Devan
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
zaturn
|