zaturn 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zaturn/config.py CHANGED
@@ -62,6 +62,9 @@ for s in source_list:
62
62
  elif source.startswith("mysql+pymysql://"):
63
63
  source_type = 'mysql'
64
64
  source_name = source.split('/')[-1].split('?')[0]
65
+ elif source.startswith('clickhouse://'):
66
+ source_type = 'clickhouse'
67
+ source_name = source.split('/')[-1].split('?')[0]
65
68
  elif source.endswith(".duckdb"):
66
69
  source_type = "duckdb"
67
70
  source_name = source.split('/')[-1].split('.')[0]
zaturn/core.py CHANGED
@@ -62,7 +62,7 @@ def _list_tables(source_id: str):
62
62
  if col.startswith("Tables_in_"):
63
63
  return result[col].to_list()
64
64
 
65
- case "duckdb" | "csv" | "parquet":
65
+ case "duckdb" | "csv" | "parquet" | "clickhouse":
66
66
  result = query_utils.execute_query(source, "SHOW TABLES")
67
67
  return result['name'].to_list()
68
68
 
@@ -96,7 +96,7 @@ def describe_table(source_id: str, table_name: str) -> str:
96
96
  )
97
97
  return result.to_markdown(index=False)
98
98
 
99
- case "mysql" | "duckdb" | "csv" | "parquet":
99
+ case "mysql" | "duckdb" | "csv" | "parquet" | "clickhouse":
100
100
  result = query_utils.execute_query(source,
101
101
  f"DESCRIBE {table_name};"
102
102
  )
zaturn/query_utils.py CHANGED
@@ -1,3 +1,4 @@
1
+ import clickhouse_connect
1
2
  import duckdb
2
3
  import numpy as np
3
4
  import os
@@ -46,6 +47,11 @@ def execute_query(source: dict, query: str):
46
47
  result = conn.execute(sqlalchemy.text(query))
47
48
  return pd.DataFrame(result)
48
49
 
50
+ case "clickhouse":
51
+ client = clickhouse_connect.get_client(dsn=url)
52
+ client.query('SET readonly=1;')
53
+ return client.query_df(query, use_extended_dtypes=False)
54
+
49
55
  case "duckdb":
50
56
  conn = duckdb.connect(url, read_only=True)
51
57
  return conn.execute(query).df()
zaturn/visualizations.py CHANGED
@@ -1,69 +1,23 @@
1
1
  from fastmcp import FastMCP, Image
2
2
  import math
3
- import matplotlib.pyplot as plt
4
3
  import os
5
- import seaborn as sns
4
+ import plotly.express as px
6
5
  import time
7
6
  from typing import Any, Union, Optional
8
7
  from zaturn import config, query_utils
9
8
 
10
- sns.set_theme()
11
- sns.set_style('ticks')
12
9
 
13
10
  mcp = FastMCP("Zaturn Visualizations")
14
11
 
15
12
 
16
- def _plot_to_image(plot) -> Union[str, Image]:
17
- figure = plot.get_figure()
13
+ def _fig_to_image(fig) -> Union[str, Image]:
18
14
  filepath = os.path.join(config.VISUALS_DIR, str(int(time.time())) + '.png')
19
- figure.savefig(filepath, bbox_inches='tight')
20
- plt.clf()
15
+ fig.write_image(filepath)
21
16
  if config.RETURN_IMAGES:
22
17
  return Image(path=filepath)
23
18
  else:
24
19
  return filepath
25
-
26
-
27
- def _fix_x_labels(plot, labels):
28
- max_label_length = max(list(labels.map(lambda x: len(str(x)))))
29
-
30
- LABEL_HIDE_FACTOR = 1
31
- if len(labels) > 20:
32
- LABEL_HIDE_FACTOR = math.ceil(len(labels)/20)
33
-
34
- labels_to_show = list(labels)
35
- ticks = list(plot.get_xticks())
36
- if LABEL_HIDE_FACTOR > 1:
37
- ticks = ticks[::LABEL_HIDE_FACTOR]
38
- labels_to_show = labels_to_show[::LABEL_HIDE_FACTOR]
39
-
40
- plot.set_xticks(ticks, labels_to_show)
41
- cutoff = 2 # for rotation
42
-
43
- if max_label_length >= 12:
44
- cutoff = 3
45
- elif max_label_length >= 10:
46
- cutoff = 4
47
- elif max_label_length >= 8:
48
- cutoff = 5
49
- elif max_label_length >= 7:
50
- cutoff = 5
51
- elif max_label_length >= 6:
52
- cutoff = 6
53
- elif max_label_length >= 5:
54
- cutoff = 7
55
- elif max_label_length >= 4:
56
- cutoff = 9
57
- elif max_label_length >= 3:
58
- cutoff = 13
59
- else:
60
- cutoff = 15
61
-
62
- if len(labels)>cutoff:
63
- plot.set_xticklabels(plot.get_xticklabels(), rotation=-45, ha='left', va='top')
64
-
65
- return plot
66
-
20
+
67
21
 
68
22
  # Relationships
69
23
 
@@ -72,7 +26,7 @@ def scatter_plot(
72
26
  query_id: str,
73
27
  x: str,
74
28
  y: str,
75
- hue: str = None
29
+ color: str = None
76
30
  ):
77
31
  """
78
32
  Make a scatter plot with the dataframe obtained from running SQL Query against source
@@ -81,12 +35,12 @@ def scatter_plot(
81
35
  query_id: Previously run query to use for plotting
82
36
  x: Column name from SQL result to use for x-axis
83
37
  y: Column name from SQL result to use for y-axis
84
- hue: Optional String; Column name from SQL result to use for coloring the points
38
+ color: Optional; column name from SQL result to use for coloring the points, with color representing another dimension
85
39
  """
86
40
  df = query_utils.load_query(query_id)
87
- plot = sns.scatterplot(df, x=x, y=y, hue=hue)
88
- plot = _fix_x_labels(plot, df[x])
89
- return _plot_to_image(plot)
41
+ fig = px.scatter(df, x=x, y=y, color=color)
42
+ fig.update_xaxes(autotickangles=[0, 45, 60, 90])
43
+ return _fig_to_image(fig)
90
44
 
91
45
 
92
46
  @mcp.tool()
@@ -94,7 +48,7 @@ def line_plot(
94
48
  query_id: str,
95
49
  x: str,
96
50
  y: str,
97
- hue: str = None
51
+ color: str = None
98
52
  ):
99
53
  """
100
54
  Make a line plot with the dataframe obtained from running SQL Query against source
@@ -102,12 +56,12 @@ def line_plot(
102
56
  query_id: Previously run query to use for plotting
103
57
  x: Column name from SQL result to use for x-axis
104
58
  y: Column name from SQL result to use for y-axis
105
- hue: Optional; column name from SQL result to use for drawing multiple colored lines
59
+ color: Optional; column name from SQL result to use for drawing multiple colored lines representing another dimension
106
60
  """
107
61
  df = query_utils.load_query(query_id)
108
- plot = sns.lineplot(df, x=x, y=y, hue=hue)
109
- plot = _fix_x_labels(plot, df[x])
110
- return _plot_to_image(plot)
62
+ fig = px.line(df, x=x, y=y, color=color)
63
+ fig.update_xaxes(autotickangles=[0, 45, 60, 90])
64
+ return _fig_to_image(fig)
111
65
 
112
66
 
113
67
  # Distributions
@@ -116,21 +70,21 @@ def line_plot(
116
70
  def histogram(
117
71
  query_id: str,
118
72
  column: str,
119
- hue: str = None,
120
- bins: int = None
73
+ color: str = None,
74
+ nbins: int = None
121
75
  ):
122
76
  """
123
77
  Make a histogram with a column of the dataframe obtained from running SQL Query against source
124
78
  Args:
125
79
  query_id: Previously run query to use for plotting
126
80
  column: Column name from SQL result to use for the histogram
127
- hue: Optional; column name from SQL result to use for drawing multiple colored histograms
128
- bins: Optional; number of bins
81
+ color: Optional; column name from SQL result to use for drawing multiple colored histograms representing another dimension
82
+ nbins: Optional; number of bins
129
83
  """
130
84
  df = query_utils.load_query(query_id)
131
- plot = sns.histplot(df, x=column, hue=hue, bins=bins)
132
- return _plot_to_image(plot)
133
-
85
+ fig = px.histogram(df, x=column, color=color, nbins=nbins)
86
+ fig.update_xaxes(autotickangles=[0, 45, 60, 90])
87
+ return _fig_to_image(fig)
134
88
 
135
89
  # Categorical
136
90
 
@@ -139,8 +93,7 @@ def strip_plot(
139
93
  query_id: str,
140
94
  x: str,
141
95
  y: str = None,
142
- hue: str = None,
143
- legend: bool = False
96
+ color: str = None
144
97
  ):
145
98
  """
146
99
  Make a strip plot with the dataframe obtained from running SQL Query against source
@@ -148,34 +101,33 @@ def strip_plot(
148
101
  query_id: Previously run query to use for plotting
149
102
  x: Column name from SQL result to use for x axis
150
103
  y: Optional; column name from SQL result to use for y axis
151
- hue: Optional; column name from SQL result to use for coloring the points
152
- legend: Whether to draw a legend for the hue
104
+ color: Optional column name from SQL result to show multiple colored strips representing another dimension
153
105
  """
154
106
  df = query_utils.load_query(query_id)
155
- plot = sns.stripplot(df, x=x, y=y, hue=hue, legend=legend)
156
- plot = _fix_x_labels(plot, df[x])
157
- return _plot_to_image(plot)
107
+ fig = px.strip(df, x=x, y=y, color=color)
108
+ fig.update_xaxes(autotickangles=[0, 45, 60, 90])
109
+ return _fig_to_image(fig)
158
110
 
159
111
 
160
112
  @mcp.tool()
161
113
  def box_plot(
162
114
  query_id: str,
163
- x: str,
164
- y: str = None,
165
- hue: str = None
115
+ y: str,
116
+ x: str = None,
117
+ color: str = None
166
118
  ):
167
119
  """
168
120
  Make a box plot with the dataframe obtained from running SQL Query against source
169
121
  Args:
170
122
  query_id: Previously run query to use for plotting
171
- x: Column name from SQL result to use for x axis
172
- y: Optional; column name from SQL result to use for y axis
173
- hue: Optional column name from SQL result to use for coloring the points
123
+ y: Column name from SQL result to use for y axis
124
+ x: Optional; Column name from SQL result to use for x axis
125
+ color: Optional column name from SQL result to show multiple colored bars representing another dimension
174
126
  """
175
127
  df = query_utils.load_query(query_id)
176
- plot = sns.boxplot(df, x=x, y=y, hue=hue)
177
- plot = _fix_x_labels(plot, df[x])
178
- return _plot_to_image(plot)
128
+ fig = px.box(df, x=x, y=y, color=color)
129
+ fig.update_xaxes(autotickangles=[0, 45, 60, 90])
130
+ return _fig_to_image(fig)
179
131
 
180
132
 
181
133
  @mcp.tool()
@@ -183,8 +135,8 @@ def bar_plot(
183
135
  query_id: str,
184
136
  x: str,
185
137
  y: str = None,
186
- hue: str = None,
187
- orient: str = 'v'
138
+ color: str = None,
139
+ orientation: str = 'v'
188
140
  ):
189
141
  """
190
142
  Make a bar plot with the dataframe obtained from running SQL Query against source
@@ -192,12 +144,12 @@ def bar_plot(
192
144
  query_id: Previously run query to use for plotting
193
145
  x: Column name from SQL result to use for x axis
194
146
  y: Optional; column name from SQL result to use for y axis
195
- hue: Optional column name from SQL result to use for coloring the bars
196
- orient: Orientation of the box plot, use 'v' for vertical and 'h' for horizontal
147
+ color: Optional column name from SQL result to use as a 3rd dimension by splitting each bar into colored sections
148
+ orientation: Orientation of the box plot, use 'v' for vertical (default) and 'h' for horizontal. Be mindful of choosing the correct X and Y columns as per orientation
197
149
  """
198
150
  df = query_utils.load_query(query_id)
199
- plot = sns.barplot(df, x=x, y=y, hue=hue, orient=orient)
200
- plot = _fix_x_labels(plot, df[x])
201
- return _plot_to_image(plot)
151
+ fig = px.bar(df, x=x, y=y, color=color, orientation=orientation)
152
+ fig.update_xaxes(autotickangles=[0, 45, 60, 90])
153
+ return _fig_to_image(fig)
202
154
 
203
155
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zaturn
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: AI Data Analysis MCP
5
5
  Author-email: Karthik Devan <krtdvn@gmail.com>
6
6
  Maintainer-email: Karthik Devan <krtdvn@gmail.com>
@@ -9,11 +9,14 @@ Project-URL: Issues, https://github.com/kdqed/zaturn/issues
9
9
  Requires-Python: >=3.11
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE
12
+ Requires-Dist: clickhouse-connect>=0.8.17
12
13
  Requires-Dist: cryptography>=44.0.2
13
14
  Requires-Dist: duckdb>=1.2.1
14
15
  Requires-Dist: fastmcp>=0.4.1
16
+ Requires-Dist: kaleido==0.2.1
15
17
  Requires-Dist: pandas>=2.2.3
16
18
  Requires-Dist: platformdirs>=4.3.7
19
+ Requires-Dist: plotly[express]>=6.0.1
17
20
  Requires-Dist: psycopg2-binary>=2.9.10
18
21
  Requires-Dist: pyarrow>=19.0.1
19
22
  Requires-Dist: pymysql>=1.1.1
@@ -54,7 +57,7 @@ https://github.com/user-attachments/assets/d42dc433-e5ec-4b3e-bef0-5cfc097396ab
54
57
 
55
58
  ### Multiple Data Sources
56
59
  Zaturn can currently connect to the following data sources:
57
- - SQL Databases: PostgreSQL, SQLite, DuckDB, MySQL
60
+ - SQL Databases: PostgreSQL, SQLite, DuckDB, MySQL, ClickHouse
58
61
  - Files: CSV, Parquet
59
62
 
60
63
  Connectors for more data sources are being added.
@@ -103,6 +106,7 @@ OR add a `sources.txt` to the Zaturn config directory:
103
106
  ```
104
107
  postgresql://username:password@host:port/dbname
105
108
  mysql+pymysql://username:password@host:3306/dbname
109
+ clickhouse://username:password@host:port/dbname
106
110
  sqlite:////full/path/to/sample_dbs/northwind.db
107
111
  /full/path/to/sample_dbs/titanic.parquet
108
112
  /full/path/to/sample_dbs/ny_aq.csv
@@ -158,11 +162,20 @@ Analyst:
158
162
  ```
159
163
  - A native notebook interface
160
164
 
161
- ## Support And Feedback
165
+ ## Help And Feedback
162
166
 
163
167
  [Raise an issue](https://github.com/kdqed/zaturn/issues) or [join the Discord](https://discord.gg/K8mECeVzpQ).
164
168
 
165
169
 
170
+ ## Support The Project
171
+
172
+ If you find Zaturn useful, please support this project by:
173
+ - Starring the Project
174
+ - Spreading the word
175
+ - [Pledging $9/month on Patreon](https://www.patreon.com/kdqed?utm_medium=github&utm_source=join_link&utm_campaign=creatorshare_creator&utm_content=copyLink)
176
+
177
+ Your support will enable me to dedicate more of my time to Zaturn.
178
+
166
179
  ## Example Dataset Credits
167
180
 
168
181
  The [pokemon dataset compiled by Sarah Taha and PokéAPI](https://www.kaggle.com/datasets/sarahtaha/1025-pokemon) has been included under the [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license for demonstration purposes.
@@ -0,0 +1,12 @@
1
+ zaturn/__init__.py,sha256=v4t5fkRuIJFE-SBxCa5pBjZv0EoC0eWK75nU9iaa7Rg,267
2
+ zaturn/config.py,sha256=V0sqe72zCLyHVJyH27vOR401hA7NnRhkfaJlIeyNoXo,3359
3
+ zaturn/core.py,sha256=hASmXF6iMQA5M62tf94YjE5D3Rja9Vj5-8zMVKQ_tOY,4671
4
+ zaturn/query_utils.py,sha256=zXy8-eDpqpehgdHBs0zjLPfpLDsbF4xtiCEU1dE0xms,3028
5
+ zaturn/visualizations.py,sha256=0ON70D_mK4o0oyfEKqAhr2jkKWz_5-kNKkD6_TGBR9k,5014
6
+ zaturn/example_data/all_pokemon_data.csv,sha256=SUlGHHWbehuLg-ch1YUrQ6-xBtqHGw6rIkyn70fAgCk,130893
7
+ zaturn-0.1.6.dist-info/licenses/LICENSE,sha256=mZSuFlbEBZGl0-8ULRMLdRDbhau5hrWRNQOjytYeaug,1070
8
+ zaturn-0.1.6.dist-info/METADATA,sha256=tO7Fc7dIV_YumwTavFxKWQMGEitbDBE82zDv1J_vEVw,7179
9
+ zaturn-0.1.6.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
10
+ zaturn-0.1.6.dist-info/entry_points.txt,sha256=N1UZC2zvod92_Brs4A2xZiAnt-iGLBNryglXfwhxfj4,43
11
+ zaturn-0.1.6.dist-info/top_level.txt,sha256=KLUnwQwVZkfd5YCnnqR35MOOs8KLhanPGelvmRo2MVA,7
12
+ zaturn-0.1.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- zaturn/__init__.py,sha256=v4t5fkRuIJFE-SBxCa5pBjZv0EoC0eWK75nU9iaa7Rg,267
2
- zaturn/config.py,sha256=gF5M6Agmixw2A4vpWqIF3ICVnClPeASA51dhp1bkk04,3221
3
- zaturn/core.py,sha256=9zcKb0FbkgGkDtWfBA6_O5NQf6GVKG69HrqOr5nhsLU,4641
4
- zaturn/query_utils.py,sha256=zyQjcRnPKGHZdf0XHzQeMxHw9vieZIwXhBbVGP87ml4,2801
5
- zaturn/visualizations.py,sha256=4RA4FytYNPiBdMyIqlw3dfjH7flT2Nz6ncVoVoIqPEA,5883
6
- zaturn/example_data/all_pokemon_data.csv,sha256=SUlGHHWbehuLg-ch1YUrQ6-xBtqHGw6rIkyn70fAgCk,130893
7
- zaturn-0.1.4.dist-info/licenses/LICENSE,sha256=mZSuFlbEBZGl0-8ULRMLdRDbhau5hrWRNQOjytYeaug,1070
8
- zaturn-0.1.4.dist-info/METADATA,sha256=VPtw2YcLYUz54oLKX72sCjzImF7xiqrM65e2u1j2-EE,6658
9
- zaturn-0.1.4.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
10
- zaturn-0.1.4.dist-info/entry_points.txt,sha256=N1UZC2zvod92_Brs4A2xZiAnt-iGLBNryglXfwhxfj4,43
11
- zaturn-0.1.4.dist-info/top_level.txt,sha256=KLUnwQwVZkfd5YCnnqR35MOOs8KLhanPGelvmRo2MVA,7
12
- zaturn-0.1.4.dist-info/RECORD,,