zaturn 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zaturn/config.py +3 -0
- zaturn/core.py +2 -2
- zaturn/query_utils.py +6 -0
- zaturn/visualizations.py +42 -90
- {zaturn-0.1.4.dist-info → zaturn-0.1.6.dist-info}/METADATA +16 -3
- zaturn-0.1.6.dist-info/RECORD +12 -0
- {zaturn-0.1.4.dist-info → zaturn-0.1.6.dist-info}/WHEEL +1 -1
- zaturn-0.1.4.dist-info/RECORD +0 -12
- {zaturn-0.1.4.dist-info → zaturn-0.1.6.dist-info}/entry_points.txt +0 -0
- {zaturn-0.1.4.dist-info → zaturn-0.1.6.dist-info}/licenses/LICENSE +0 -0
- {zaturn-0.1.4.dist-info → zaturn-0.1.6.dist-info}/top_level.txt +0 -0
zaturn/config.py
CHANGED
@@ -62,6 +62,9 @@ for s in source_list:
|
|
62
62
|
elif source.startswith("mysql+pymysql://"):
|
63
63
|
source_type = 'mysql'
|
64
64
|
source_name = source.split('/')[-1].split('?')[0]
|
65
|
+
elif source.startswith('clickhouse://'):
|
66
|
+
source_type = 'clickhouse'
|
67
|
+
source_name = source.split('/')[-1].split('?')[0]
|
65
68
|
elif source.endswith(".duckdb"):
|
66
69
|
source_type = "duckdb"
|
67
70
|
source_name = source.split('/')[-1].split('.')[0]
|
zaturn/core.py
CHANGED
@@ -62,7 +62,7 @@ def _list_tables(source_id: str):
|
|
62
62
|
if col.startswith("Tables_in_"):
|
63
63
|
return result[col].to_list()
|
64
64
|
|
65
|
-
case "duckdb" | "csv" | "parquet":
|
65
|
+
case "duckdb" | "csv" | "parquet" | "clickhouse":
|
66
66
|
result = query_utils.execute_query(source, "SHOW TABLES")
|
67
67
|
return result['name'].to_list()
|
68
68
|
|
@@ -96,7 +96,7 @@ def describe_table(source_id: str, table_name: str) -> str:
|
|
96
96
|
)
|
97
97
|
return result.to_markdown(index=False)
|
98
98
|
|
99
|
-
case "mysql" | "duckdb" | "csv" | "parquet":
|
99
|
+
case "mysql" | "duckdb" | "csv" | "parquet" | "clickhouse":
|
100
100
|
result = query_utils.execute_query(source,
|
101
101
|
f"DESCRIBE {table_name};"
|
102
102
|
)
|
zaturn/query_utils.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import clickhouse_connect
|
1
2
|
import duckdb
|
2
3
|
import numpy as np
|
3
4
|
import os
|
@@ -46,6 +47,11 @@ def execute_query(source: dict, query: str):
|
|
46
47
|
result = conn.execute(sqlalchemy.text(query))
|
47
48
|
return pd.DataFrame(result)
|
48
49
|
|
50
|
+
case "clickhouse":
|
51
|
+
client = clickhouse_connect.get_client(dsn=url)
|
52
|
+
client.query('SET readonly=1;')
|
53
|
+
return client.query_df(query, use_extended_dtypes=False)
|
54
|
+
|
49
55
|
case "duckdb":
|
50
56
|
conn = duckdb.connect(url, read_only=True)
|
51
57
|
return conn.execute(query).df()
|
zaturn/visualizations.py
CHANGED
@@ -1,69 +1,23 @@
|
|
1
1
|
from fastmcp import FastMCP, Image
|
2
2
|
import math
|
3
|
-
import matplotlib.pyplot as plt
|
4
3
|
import os
|
5
|
-
import
|
4
|
+
import plotly.express as px
|
6
5
|
import time
|
7
6
|
from typing import Any, Union, Optional
|
8
7
|
from zaturn import config, query_utils
|
9
8
|
|
10
|
-
sns.set_theme()
|
11
|
-
sns.set_style('ticks')
|
12
9
|
|
13
10
|
mcp = FastMCP("Zaturn Visualizations")
|
14
11
|
|
15
12
|
|
16
|
-
def
|
17
|
-
figure = plot.get_figure()
|
13
|
+
def _fig_to_image(fig) -> Union[str, Image]:
|
18
14
|
filepath = os.path.join(config.VISUALS_DIR, str(int(time.time())) + '.png')
|
19
|
-
|
20
|
-
plt.clf()
|
15
|
+
fig.write_image(filepath)
|
21
16
|
if config.RETURN_IMAGES:
|
22
17
|
return Image(path=filepath)
|
23
18
|
else:
|
24
19
|
return filepath
|
25
|
-
|
26
|
-
|
27
|
-
def _fix_x_labels(plot, labels):
|
28
|
-
max_label_length = max(list(labels.map(lambda x: len(str(x)))))
|
29
|
-
|
30
|
-
LABEL_HIDE_FACTOR = 1
|
31
|
-
if len(labels) > 20:
|
32
|
-
LABEL_HIDE_FACTOR = math.ceil(len(labels)/20)
|
33
|
-
|
34
|
-
labels_to_show = list(labels)
|
35
|
-
ticks = list(plot.get_xticks())
|
36
|
-
if LABEL_HIDE_FACTOR > 1:
|
37
|
-
ticks = ticks[::LABEL_HIDE_FACTOR]
|
38
|
-
labels_to_show = labels_to_show[::LABEL_HIDE_FACTOR]
|
39
|
-
|
40
|
-
plot.set_xticks(ticks, labels_to_show)
|
41
|
-
cutoff = 2 # for rotation
|
42
|
-
|
43
|
-
if max_label_length >= 12:
|
44
|
-
cutoff = 3
|
45
|
-
elif max_label_length >= 10:
|
46
|
-
cutoff = 4
|
47
|
-
elif max_label_length >= 8:
|
48
|
-
cutoff = 5
|
49
|
-
elif max_label_length >= 7:
|
50
|
-
cutoff = 5
|
51
|
-
elif max_label_length >= 6:
|
52
|
-
cutoff = 6
|
53
|
-
elif max_label_length >= 5:
|
54
|
-
cutoff = 7
|
55
|
-
elif max_label_length >= 4:
|
56
|
-
cutoff = 9
|
57
|
-
elif max_label_length >= 3:
|
58
|
-
cutoff = 13
|
59
|
-
else:
|
60
|
-
cutoff = 15
|
61
|
-
|
62
|
-
if len(labels)>cutoff:
|
63
|
-
plot.set_xticklabels(plot.get_xticklabels(), rotation=-45, ha='left', va='top')
|
64
|
-
|
65
|
-
return plot
|
66
|
-
|
20
|
+
|
67
21
|
|
68
22
|
# Relationships
|
69
23
|
|
@@ -72,7 +26,7 @@ def scatter_plot(
|
|
72
26
|
query_id: str,
|
73
27
|
x: str,
|
74
28
|
y: str,
|
75
|
-
|
29
|
+
color: str = None
|
76
30
|
):
|
77
31
|
"""
|
78
32
|
Make a scatter plot with the dataframe obtained from running SQL Query against source
|
@@ -81,12 +35,12 @@ def scatter_plot(
|
|
81
35
|
query_id: Previously run query to use for plotting
|
82
36
|
x: Column name from SQL result to use for x-axis
|
83
37
|
y: Column name from SQL result to use for y-axis
|
84
|
-
|
38
|
+
color: Optional; column name from SQL result to use for coloring the points, with color representing another dimension
|
85
39
|
"""
|
86
40
|
df = query_utils.load_query(query_id)
|
87
|
-
|
88
|
-
|
89
|
-
return
|
41
|
+
fig = px.scatter(df, x=x, y=y, color=color)
|
42
|
+
fig.update_xaxes(autotickangles=[0, 45, 60, 90])
|
43
|
+
return _fig_to_image(fig)
|
90
44
|
|
91
45
|
|
92
46
|
@mcp.tool()
|
@@ -94,7 +48,7 @@ def line_plot(
|
|
94
48
|
query_id: str,
|
95
49
|
x: str,
|
96
50
|
y: str,
|
97
|
-
|
51
|
+
color: str = None
|
98
52
|
):
|
99
53
|
"""
|
100
54
|
Make a line plot with the dataframe obtained from running SQL Query against source
|
@@ -102,12 +56,12 @@ def line_plot(
|
|
102
56
|
query_id: Previously run query to use for plotting
|
103
57
|
x: Column name from SQL result to use for x-axis
|
104
58
|
y: Column name from SQL result to use for y-axis
|
105
|
-
|
59
|
+
color: Optional; column name from SQL result to use for drawing multiple colored lines representing another dimension
|
106
60
|
"""
|
107
61
|
df = query_utils.load_query(query_id)
|
108
|
-
|
109
|
-
|
110
|
-
return
|
62
|
+
fig = px.line(df, x=x, y=y, color=color)
|
63
|
+
fig.update_xaxes(autotickangles=[0, 45, 60, 90])
|
64
|
+
return _fig_to_image(fig)
|
111
65
|
|
112
66
|
|
113
67
|
# Distributions
|
@@ -116,21 +70,21 @@ def line_plot(
|
|
116
70
|
def histogram(
|
117
71
|
query_id: str,
|
118
72
|
column: str,
|
119
|
-
|
120
|
-
|
73
|
+
color: str = None,
|
74
|
+
nbins: int = None
|
121
75
|
):
|
122
76
|
"""
|
123
77
|
Make a histogram with a column of the dataframe obtained from running SQL Query against source
|
124
78
|
Args:
|
125
79
|
query_id: Previously run query to use for plotting
|
126
80
|
column: Column name from SQL result to use for the histogram
|
127
|
-
|
128
|
-
|
81
|
+
color: Optional; column name from SQL result to use for drawing multiple colored histograms representing another dimension
|
82
|
+
nbins: Optional; number of bins
|
129
83
|
"""
|
130
84
|
df = query_utils.load_query(query_id)
|
131
|
-
|
132
|
-
|
133
|
-
|
85
|
+
fig = px.histogram(df, x=column, color=color, nbins=nbins)
|
86
|
+
fig.update_xaxes(autotickangles=[0, 45, 60, 90])
|
87
|
+
return _fig_to_image(fig)
|
134
88
|
|
135
89
|
# Categorical
|
136
90
|
|
@@ -139,8 +93,7 @@ def strip_plot(
|
|
139
93
|
query_id: str,
|
140
94
|
x: str,
|
141
95
|
y: str = None,
|
142
|
-
|
143
|
-
legend: bool = False
|
96
|
+
color: str = None
|
144
97
|
):
|
145
98
|
"""
|
146
99
|
Make a strip plot with the dataframe obtained from running SQL Query against source
|
@@ -148,34 +101,33 @@ def strip_plot(
|
|
148
101
|
query_id: Previously run query to use for plotting
|
149
102
|
x: Column name from SQL result to use for x axis
|
150
103
|
y: Optional; column name from SQL result to use for y axis
|
151
|
-
|
152
|
-
legend: Whether to draw a legend for the hue
|
104
|
+
color: Optional column name from SQL result to show multiple colored strips representing another dimension
|
153
105
|
"""
|
154
106
|
df = query_utils.load_query(query_id)
|
155
|
-
|
156
|
-
|
157
|
-
return
|
107
|
+
fig = px.strip(df, x=x, y=y, color=color)
|
108
|
+
fig.update_xaxes(autotickangles=[0, 45, 60, 90])
|
109
|
+
return _fig_to_image(fig)
|
158
110
|
|
159
111
|
|
160
112
|
@mcp.tool()
|
161
113
|
def box_plot(
|
162
114
|
query_id: str,
|
163
|
-
|
164
|
-
|
165
|
-
|
115
|
+
y: str,
|
116
|
+
x: str = None,
|
117
|
+
color: str = None
|
166
118
|
):
|
167
119
|
"""
|
168
120
|
Make a box plot with the dataframe obtained from running SQL Query against source
|
169
121
|
Args:
|
170
122
|
query_id: Previously run query to use for plotting
|
171
|
-
|
172
|
-
|
173
|
-
|
123
|
+
y: Column name from SQL result to use for y axis
|
124
|
+
x: Optional; Column name from SQL result to use for x axis
|
125
|
+
color: Optional column name from SQL result to show multiple colored bars representing another dimension
|
174
126
|
"""
|
175
127
|
df = query_utils.load_query(query_id)
|
176
|
-
|
177
|
-
|
178
|
-
return
|
128
|
+
fig = px.box(df, x=x, y=y, color=color)
|
129
|
+
fig.update_xaxes(autotickangles=[0, 45, 60, 90])
|
130
|
+
return _fig_to_image(fig)
|
179
131
|
|
180
132
|
|
181
133
|
@mcp.tool()
|
@@ -183,8 +135,8 @@ def bar_plot(
|
|
183
135
|
query_id: str,
|
184
136
|
x: str,
|
185
137
|
y: str = None,
|
186
|
-
|
187
|
-
|
138
|
+
color: str = None,
|
139
|
+
orientation: str = 'v'
|
188
140
|
):
|
189
141
|
"""
|
190
142
|
Make a bar plot with the dataframe obtained from running SQL Query against source
|
@@ -192,12 +144,12 @@ def bar_plot(
|
|
192
144
|
query_id: Previously run query to use for plotting
|
193
145
|
x: Column name from SQL result to use for x axis
|
194
146
|
y: Optional; column name from SQL result to use for y axis
|
195
|
-
|
196
|
-
|
147
|
+
color: Optional column name from SQL result to use as a 3rd dimension by splitting each bar into colored sections
|
148
|
+
orientation: Orientation of the box plot, use 'v' for vertical (default) and 'h' for horizontal. Be mindful of choosing the correct X and Y columns as per orientation
|
197
149
|
"""
|
198
150
|
df = query_utils.load_query(query_id)
|
199
|
-
|
200
|
-
|
201
|
-
return
|
151
|
+
fig = px.bar(df, x=x, y=y, color=color, orientation=orientation)
|
152
|
+
fig.update_xaxes(autotickangles=[0, 45, 60, 90])
|
153
|
+
return _fig_to_image(fig)
|
202
154
|
|
203
155
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: zaturn
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.6
|
4
4
|
Summary: AI Data Analysis MCP
|
5
5
|
Author-email: Karthik Devan <krtdvn@gmail.com>
|
6
6
|
Maintainer-email: Karthik Devan <krtdvn@gmail.com>
|
@@ -9,11 +9,14 @@ Project-URL: Issues, https://github.com/kdqed/zaturn/issues
|
|
9
9
|
Requires-Python: >=3.11
|
10
10
|
Description-Content-Type: text/markdown
|
11
11
|
License-File: LICENSE
|
12
|
+
Requires-Dist: clickhouse-connect>=0.8.17
|
12
13
|
Requires-Dist: cryptography>=44.0.2
|
13
14
|
Requires-Dist: duckdb>=1.2.1
|
14
15
|
Requires-Dist: fastmcp>=0.4.1
|
16
|
+
Requires-Dist: kaleido==0.2.1
|
15
17
|
Requires-Dist: pandas>=2.2.3
|
16
18
|
Requires-Dist: platformdirs>=4.3.7
|
19
|
+
Requires-Dist: plotly[express]>=6.0.1
|
17
20
|
Requires-Dist: psycopg2-binary>=2.9.10
|
18
21
|
Requires-Dist: pyarrow>=19.0.1
|
19
22
|
Requires-Dist: pymysql>=1.1.1
|
@@ -54,7 +57,7 @@ https://github.com/user-attachments/assets/d42dc433-e5ec-4b3e-bef0-5cfc097396ab
|
|
54
57
|
|
55
58
|
### Multiple Data Sources
|
56
59
|
Zaturn can currently connect to the following data sources:
|
57
|
-
- SQL Databases: PostgreSQL, SQLite, DuckDB, MySQL
|
60
|
+
- SQL Databases: PostgreSQL, SQLite, DuckDB, MySQL, ClickHouse
|
58
61
|
- Files: CSV, Parquet
|
59
62
|
|
60
63
|
Connectors for more data sources are being added.
|
@@ -103,6 +106,7 @@ OR add a `sources.txt` to the Zaturn config directory:
|
|
103
106
|
```
|
104
107
|
postgresql://username:password@host:port/dbname
|
105
108
|
mysql+pymysql://username:password@host:3306/dbname
|
109
|
+
clickhouse://username:password@host:port/dbname
|
106
110
|
sqlite:////full/path/to/sample_dbs/northwind.db
|
107
111
|
/full/path/to/sample_dbs/titanic.parquet
|
108
112
|
/full/path/to/sample_dbs/ny_aq.csv
|
@@ -158,11 +162,20 @@ Analyst:
|
|
158
162
|
```
|
159
163
|
- A native notebook interface
|
160
164
|
|
161
|
-
##
|
165
|
+
## Help And Feedback
|
162
166
|
|
163
167
|
[Raise an issue](https://github.com/kdqed/zaturn/issues) or [join the Discord](https://discord.gg/K8mECeVzpQ).
|
164
168
|
|
165
169
|
|
170
|
+
## Support The Project
|
171
|
+
|
172
|
+
If you find Zaturn useful, please support this project by:
|
173
|
+
- Starring the Project
|
174
|
+
- Spreading the word
|
175
|
+
- [Pledging $9/month on Patreon](https://www.patreon.com/kdqed?utm_medium=github&utm_source=join_link&utm_campaign=creatorshare_creator&utm_content=copyLink)
|
176
|
+
|
177
|
+
Your support will enable me to dedicate more of my time to Zaturn.
|
178
|
+
|
166
179
|
## Example Dataset Credits
|
167
180
|
|
168
181
|
The [pokemon dataset compiled by Sarah Taha and PokéAPI](https://www.kaggle.com/datasets/sarahtaha/1025-pokemon) has been included under the [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license for demonstration purposes.
|
@@ -0,0 +1,12 @@
|
|
1
|
+
zaturn/__init__.py,sha256=v4t5fkRuIJFE-SBxCa5pBjZv0EoC0eWK75nU9iaa7Rg,267
|
2
|
+
zaturn/config.py,sha256=V0sqe72zCLyHVJyH27vOR401hA7NnRhkfaJlIeyNoXo,3359
|
3
|
+
zaturn/core.py,sha256=hASmXF6iMQA5M62tf94YjE5D3Rja9Vj5-8zMVKQ_tOY,4671
|
4
|
+
zaturn/query_utils.py,sha256=zXy8-eDpqpehgdHBs0zjLPfpLDsbF4xtiCEU1dE0xms,3028
|
5
|
+
zaturn/visualizations.py,sha256=0ON70D_mK4o0oyfEKqAhr2jkKWz_5-kNKkD6_TGBR9k,5014
|
6
|
+
zaturn/example_data/all_pokemon_data.csv,sha256=SUlGHHWbehuLg-ch1YUrQ6-xBtqHGw6rIkyn70fAgCk,130893
|
7
|
+
zaturn-0.1.6.dist-info/licenses/LICENSE,sha256=mZSuFlbEBZGl0-8ULRMLdRDbhau5hrWRNQOjytYeaug,1070
|
8
|
+
zaturn-0.1.6.dist-info/METADATA,sha256=tO7Fc7dIV_YumwTavFxKWQMGEitbDBE82zDv1J_vEVw,7179
|
9
|
+
zaturn-0.1.6.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
10
|
+
zaturn-0.1.6.dist-info/entry_points.txt,sha256=N1UZC2zvod92_Brs4A2xZiAnt-iGLBNryglXfwhxfj4,43
|
11
|
+
zaturn-0.1.6.dist-info/top_level.txt,sha256=KLUnwQwVZkfd5YCnnqR35MOOs8KLhanPGelvmRo2MVA,7
|
12
|
+
zaturn-0.1.6.dist-info/RECORD,,
|
zaturn-0.1.4.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
zaturn/__init__.py,sha256=v4t5fkRuIJFE-SBxCa5pBjZv0EoC0eWK75nU9iaa7Rg,267
|
2
|
-
zaturn/config.py,sha256=gF5M6Agmixw2A4vpWqIF3ICVnClPeASA51dhp1bkk04,3221
|
3
|
-
zaturn/core.py,sha256=9zcKb0FbkgGkDtWfBA6_O5NQf6GVKG69HrqOr5nhsLU,4641
|
4
|
-
zaturn/query_utils.py,sha256=zyQjcRnPKGHZdf0XHzQeMxHw9vieZIwXhBbVGP87ml4,2801
|
5
|
-
zaturn/visualizations.py,sha256=4RA4FytYNPiBdMyIqlw3dfjH7flT2Nz6ncVoVoIqPEA,5883
|
6
|
-
zaturn/example_data/all_pokemon_data.csv,sha256=SUlGHHWbehuLg-ch1YUrQ6-xBtqHGw6rIkyn70fAgCk,130893
|
7
|
-
zaturn-0.1.4.dist-info/licenses/LICENSE,sha256=mZSuFlbEBZGl0-8ULRMLdRDbhau5hrWRNQOjytYeaug,1070
|
8
|
-
zaturn-0.1.4.dist-info/METADATA,sha256=VPtw2YcLYUz54oLKX72sCjzImF7xiqrM65e2u1j2-EE,6658
|
9
|
-
zaturn-0.1.4.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
10
|
-
zaturn-0.1.4.dist-info/entry_points.txt,sha256=N1UZC2zvod92_Brs4A2xZiAnt-iGLBNryglXfwhxfj4,43
|
11
|
-
zaturn-0.1.4.dist-info/top_level.txt,sha256=KLUnwQwVZkfd5YCnnqR35MOOs8KLhanPGelvmRo2MVA,7
|
12
|
-
zaturn-0.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|