PyPI - sqlsaber-viz - Versions diffs - 0.1.1__py3-none-any.whl - Mend

sqlsaber-viz 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

sqlsaber_viz/__init__.py +19 -0
sqlsaber_viz/data_loader.py +143 -0
sqlsaber_viz/prompts.py +31 -0
sqlsaber_viz/renderers/__init__.py +6 -0
sqlsaber_viz/renderers/base.py +13 -0
sqlsaber_viz/renderers/html_renderer.py +17 -0
sqlsaber_viz/renderers/plotext_renderer.py +385 -0
sqlsaber_viz/spec.py +130 -0
sqlsaber_viz/spec_agent.py +144 -0
sqlsaber_viz/templates.py +175 -0
sqlsaber_viz/tools.py +234 -0
sqlsaber_viz/transforms.py +155 -0
sqlsaber_viz-0.1.1.dist-info/METADATA +12 -0
sqlsaber_viz-0.1.1.dist-info/RECORD +16 -0
sqlsaber_viz-0.1.1.dist-info/WHEEL +4 -0
sqlsaber_viz-0.1.1.dist-info/entry_points.txt +2 -0

sqlsaber_viz/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""SQLSaber visualization plugin."""
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from sqlsaber.tools.registry import ToolRegistry
+def register_tools(registry: "ToolRegistry | None" = None):
+    """Register visualization tools.
+    Returns list of tool classes for sqlsaber to register.
+    """
+    from .tools import VizTool
+    return [VizTool]
+__all__ = ["register_tools"]

sqlsaber_viz/data_loader.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Helpers for loading SQL result payloads and extracting summaries."""
+from __future__ import annotations
+import json
+from datetime import date, datetime, time
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from pydantic_ai import RunContext
+def find_tool_output_payload(ctx: "RunContext", tool_call_id: str) -> dict | None:
+    """Find tool output from RunContext message history."""
+    return find_tool_output_in_messages(ctx.messages, tool_call_id)
+def find_tool_output_in_messages(messages: list, tool_call_id: str) -> dict | None:
+    """Find tool output from a list of ModelMessage objects."""
+    for message in reversed(messages):
+        for part in getattr(message, "parts", []):
+            if getattr(part, "part_kind", "") not in (
+                "tool-return",
+                "builtin-tool-return",
+            ):
+                continue
+            if getattr(part, "tool_call_id", None) != tool_call_id:
+                continue
+            content = getattr(part, "content", None)
+            if isinstance(content, dict):
+                return content
+            if isinstance(content, str):
+                try:
+                    parsed = json.loads(content)
+                except json.JSONDecodeError:
+                    return {"result": content}
+                if isinstance(parsed, dict):
+                    return parsed
+                return {"result": parsed}
+    return None
+def extract_data_summary(payload: dict) -> dict:
+    """Extract column info and samples from SQL result payload.
+    Returns:
+        {
+            "columns": [
+                {"name": "col1", "type": "string", "sample": ["a", "b", "c"]},
+                {"name": "col2", "type": "number", "sample": [1, 2, 3]},
+            ],
+            "row_count": 150,
+            "rows": [...]  # Full rows for rendering
+        }
+    """
+    results = payload.get("results")
+    rows = _coerce_rows(results) if isinstance(results, list) else []
+    row_count = payload.get("row_count")
+    if not isinstance(row_count, int):
+        row_count = len(rows)
+    columns = _extract_columns(rows)
+    return {"columns": columns, "row_count": row_count, "rows": rows}
+def infer_column_type(values: list[object]) -> str:
+    """Infer column type from sample values.
+    Returns: "number", "string", "time", "boolean", or "null"
+    """
+    cleaned = [value for value in values if value is not None]
+    if not cleaned:
+        return "null"
+    if all(isinstance(value, bool) for value in cleaned):
+        return "boolean"
+    if all(isinstance(value, (int, float)) for value in cleaned):
+        return "number"
+    if all(_is_time_value(value) for value in cleaned):
+        return "time"
+    return "string"
+def _extract_columns(rows: list[dict[str, object]]) -> list[dict[str, object]]:
+    if not rows:
+        return []
+    # Use the union of keys from the first 50 rows to avoid missing sparse columns.
+    keys: list[str] = []
+    seen: set[str] = set()
+    for row in rows[:50]:
+        for key in row.keys():
+            if key not in seen:
+                seen.add(key)
+                keys.append(key)
+    columns: list[dict[str, object]] = []
+    for key in keys:
+        sample_values = [row.get(key) for row in rows[:20] if key in row]
+        column_type = infer_column_type(sample_values)
+        columns.append(
+            {
+                "name": key,
+                "type": column_type,
+                "sample": sample_values[:5],
+            }
+        )
+    return columns
+def _coerce_rows(rows: list[object]) -> list[dict[str, object]]:
+    coerced: list[dict[str, object]] = []
+    for row in rows:
+        if isinstance(row, dict):
+            coerced.append({str(key): value for key, value in row.items()})
+        else:
+            coerced.append({"value": row})
+    return coerced
+def _is_time_value(value: object) -> bool:
+    if isinstance(value, (datetime, date, time)):
+        return True
+    if isinstance(value, str):
+        normalized = value
+        if value.endswith("Z"):
+            normalized = value[:-1] + "+00:00"
+        try:
+            datetime.fromisoformat(normalized)
+            return True
+        except ValueError:
+            try:
+                time.fromisoformat(normalized)
+                return True
+            except ValueError:
+                return False
+    return False

sqlsaber_viz/prompts.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""Prompt definitions for viz spec generation."""
+VIZ_SYSTEM_PROMPT = """You are a visualization spec generator. Given a user's request and data summary, generate a valid JSON visualization spec.
+## Workflow
+1. Decide the appropriate chart type based on the request and data
+2. Call `get_vizspec_template` with the chart type and file to get the correct spec structure
+3. Fill in the template with actual column names from the provided data summary
+4. Return ONLY the final JSON spec (no explanations, no markdown code blocks)
+## Chart Type Selection
+- Comparing categories → bar
+- Comparing categories across series → bar with encoding.series
+- Trend over time → line
+- Correlation between two numbers → scatter
+- Distribution of one variable → histogram
+- Distribution comparison across groups → boxplot
+## Transform Operations (optional, add to "transform" array)
+- {"sort": [{"field": "col", "dir": "desc"}]} - Sort data
+- {"limit": 20} - Limit rows (recommended for bar charts with many categories)
+- {"filter": {"field": "col", "op": "!=", "value": null}} - Filter rows
+## Rules
+1. ALWAYS call `get_vizspec_template` first to get the correct structure
+2. Use ONLY columns that exist in the provided data summary
+3. Match field types: category columns for x in bar charts, numeric columns for y
+4. Add limit transform for bar charts to avoid overcrowding (10-20 bars max)
+5. Sort bar charts by y value descending for better readability
+6. Title should describe what the chart shows
+"""

sqlsaber_viz/renderers/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Renderer exports for SQLSaber viz."""
+from .base import RendererProtocol
+from .plotext_renderer import PlotextRenderer
+__all__ = ["RendererProtocol", "PlotextRenderer"]

sqlsaber_viz/renderers/base.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Renderer protocol for visualization outputs."""
+from __future__ import annotations
+from typing import Protocol
+from ..spec import VizSpec
+class RendererProtocol(Protocol):
+    def render(self, spec: VizSpec, rows: list[dict]) -> str:
+        """Render a visualization spec with data rows."""
+        ...

sqlsaber_viz/renderers/html_renderer.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Placeholder HTML renderer for future web UI support."""
+from __future__ import annotations
+from ..spec import VizSpec
+class HtmlRenderer:
+    """Render VizSpec to HTML.
+    Placeholder implementation; currently returns an empty string.
+    """
+    def render(self, spec: VizSpec, rows: list[dict]) -> str:
+        _ = spec
+        _ = rows
+        return ""

sqlsaber_viz/renderers/plotext_renderer.py ADDED Viewed

@@ -0,0 +1,385 @@
+"""Plotext renderer for terminal charts."""
+from __future__ import annotations
+import re
+from collections import defaultdict
+from datetime import datetime, time
+from typing import Iterable
+from ..spec import (
+    BarChart,
+    BoxplotChart,
+    HistogramChart,
+    LineChart,
+    ScatterChart,
+    VizSpec,
+)
+class PlotextRenderer:
+    """Render VizSpec to terminal using plotext."""
+    _series_colors = [
+        "cyan+",
+        "yellow+",
+        "red+",
+        "green+",
+        "blue+",
+        "magenta+",
+        "white+",
+    ]
+    _default_width = 80
+    _default_height = 25
+    def render(self, spec: VizSpec, rows: list[dict]) -> str:
+        """Render spec with data to ASCII chart string.
+        Returns:
+            ASCII chart string from plt.build(), or error message if rendering fails.
+        """
+        import plotext as plt
+        plt.clf()
+        plt.clear_figure()
+        chart = spec.chart
+        options = chart.options
+        width = options.width or self._default_width
+        height = options.height or self._default_height
+        plt.plot_size(width=width, height=height)
+        if spec.title:
+            plt.title(spec.title)
+        error_msg: str | None = None
+        try:
+            if isinstance(chart, BarChart):
+                error_msg = self._render_bar(chart, rows, plt)
+            elif isinstance(chart, LineChart):
+                error_msg = self._render_line(chart, rows, plt)
+            elif isinstance(chart, ScatterChart):
+                error_msg = self._render_scatter(chart, rows, plt)
+            elif isinstance(chart, BoxplotChart):
+                error_msg = self._render_boxplot(chart, rows, plt)
+            elif isinstance(chart, HistogramChart):
+                error_msg = self._render_histogram(chart, rows, plt)
+            else:
+                return f"[Unsupported chart type: {type(chart).__name__}]"
+        except Exception as e:
+            return f"[Chart rendering error: {e}]"
+        if error_msg:
+            return error_msg
+        if options.x_label:
+            plt.xlabel(options.x_label)
+        if options.y_label:
+            plt.ylabel(options.y_label)
+        return plt.build()
+    def _render_bar(self, chart: BarChart, rows: list[dict], plt) -> str | None:
+        x_field = chart.encoding.x.field
+        y_field = chart.encoding.y.field
+        series_field = chart.encoding.series.field if chart.encoding.series else None
+        orientation = "h" if chart.orientation == "horizontal" else "v"
+        if series_field:
+            categories, series_names, series_values = self._build_series_matrix(
+                rows, x_field, y_field, series_field
+            )
+            if not categories or not series_names:
+                return f"[No data: no valid values for '{x_field}' / '{y_field}']"
+            if chart.mode == "stacked":
+                plt.stacked_bar(
+                    categories,
+                    series_values,
+                    labels=series_names,
+                    orientation=orientation,
+                )
+            else:
+                plt.multiple_bar(
+                    categories,
+                    series_values,
+                    labels=series_names,
+                    orientation=orientation,
+                )
+            return None
+        # Aggregate by category (sum) for consistency with series path
+        aggregated: dict[str, float] = {}
+        for row in rows:
+            category = str(row.get(x_field, ""))
+            value = self._to_number(row.get(y_field))
+            if value is None:
+                continue
+            aggregated[category] = aggregated.get(category, 0.0) + value
+        if not aggregated:
+            return f"[No data: no valid numeric values for '{y_field}']"
+        categories = list(aggregated.keys())
+        values = list(aggregated.values())
+        color = self._safe_color(chart.options.color, "blue+")
+        plt.bar(categories, values, color=color, orientation=orientation)
+        return None
+    def _render_line(self, chart: LineChart, rows: list[dict], plt) -> str | None:
+        x_field = chart.encoding.x.field
+        y_field = chart.encoding.y.field
+        series_field = chart.encoding.series.field if chart.encoding.series else None
+        marker = self._safe_marker(chart.options.marker, "braille")
+        if series_field:
+            series_map = self._group_series(rows, series_field)
+            any_plotted = False
+            for idx, (series_name, series_rows) in enumerate(series_map.items()):
+                x, y = self._extract_xy_sorted(series_rows, x_field, y_field)
+                if not x or not y:
+                    continue
+                any_plotted = True
+                color = self._series_colors[idx % len(self._series_colors)]
+                plt.plot(x, y, color=color, marker=marker, label=series_name)
+            if not any_plotted:
+                return f"[No data: no valid values for '{x_field}' / '{y_field}']"
+            return None
+        x, y = self._extract_xy_sorted(rows, x_field, y_field)
+        if not x or not y:
+            return f"[No data: no valid values for '{x_field}' / '{y_field}']"
+        color = self._safe_color(chart.options.color, "cyan+")
+        plt.plot(x, y, color=color, marker=marker)
+        return None
+    def _render_scatter(self, chart: ScatterChart, rows: list[dict], plt) -> str | None:
+        x_field = chart.encoding.x.field
+        y_field = chart.encoding.y.field
+        series_field = chart.encoding.series.field if chart.encoding.series else None
+        marker = self._safe_marker(chart.options.marker, "dot")
+        if series_field:
+            series_map = self._group_series(rows, series_field)
+            any_plotted = False
+            for idx, (series_name, series_rows) in enumerate(series_map.items()):
+                x, y = self._extract_xy(series_rows, x_field, y_field)
+                if not x or not y:
+                    continue
+                any_plotted = True
+                color = self._series_colors[idx % len(self._series_colors)]
+                plt.scatter(x, y, color=color, marker=marker, label=series_name)
+            if not any_plotted:
+                return f"[No data: no valid values for '{x_field}' / '{y_field}']"
+            return None
+        x, y = self._extract_xy(rows, x_field, y_field)
+        if not x or not y:
+            return f"[No data: no valid values for '{x_field}' / '{y_field}']"
+        color = self._safe_color(chart.options.color, "red+")
+        plt.scatter(x, y, color=color, marker=marker)
+        return None
+    def _render_boxplot(self, chart: BoxplotChart, rows: list[dict], plt) -> str | None:
+        label_field = chart.boxplot.label_field
+        value_field = chart.boxplot.value_field
+        groups: dict[str, list[float]] = {}
+        for row in rows:
+            label = str(row.get(label_field, ""))
+            value = self._to_number(row.get(value_field))
+            if value is None:
+                continue
+            groups.setdefault(label, []).append(value)
+        if not groups:
+            return f"[No data: no valid numeric values for '{value_field}']"
+        labels = list(groups.keys())
+        data = [groups[label] for label in labels]
+        plt.box(labels, data)
+        return None
+    def _render_histogram(self, chart: HistogramChart, rows: list[dict], plt) -> str | None:
+        field = chart.histogram.field
+        bins = chart.histogram.bins
+        values: list[float] = []
+        for row in rows:
+            val = self._to_number(row.get(field))
+            if val is not None:
+                values.append(val)
+        if not values:
+            return f"[No data: no valid numeric values for '{field}']"
+        color = self._safe_color(chart.options.color, "green+")
+        plt.hist(values, bins=bins, color=color)
+        return None
+    def _extract_xy(
+        self, rows: Iterable[dict], x_field: str, y_field: str
+    ) -> tuple[list[float], list[float]]:
+        x: list[float] = []
+        y: list[float] = []
+        for row in rows:
+            x_val = self._to_number(row.get(x_field))
+            y_val = self._to_number(row.get(y_field))
+            if x_val is None or y_val is None:
+                continue
+            x.append(x_val)
+            y.append(y_val)
+        return x, y
+    def _extract_xy_sorted(
+        self, rows: Iterable[dict], x_field: str, y_field: str
+    ) -> tuple[list[float], list[float]]:
+        """Extract x/y pairs and sort by x for proper line chart rendering."""
+        pairs: list[tuple[float, float]] = []
+        for row in rows:
+            x_val = self._to_number(row.get(x_field))
+            y_val = self._to_number(row.get(y_field))
+            if x_val is None or y_val is None:
+                continue
+            pairs.append((x_val, y_val))
+        pairs.sort(key=lambda p: p[0])
+        x = [p[0] for p in pairs]
+        y = [p[1] for p in pairs]
+        return x, y
+    def _group_series(
+        self, rows: Iterable[dict], series_field: str
+    ) -> dict[str, list[dict]]:
+        groups: dict[str, list[dict]] = defaultdict(list)
+        for row in rows:
+            key = str(row.get(series_field, ""))
+            groups[key].append(row)
+        return dict(groups)
+    def _build_series_matrix(
+        self,
+        rows: Iterable[dict],
+        x_field: str,
+        y_field: str,
+        series_field: str,
+    ) -> tuple[list[str], list[str], list[list[float]]]:
+        categories: list[str] = []
+        series_names: list[str] = []
+        data: dict[str, dict[str, float]] = {}
+        for row in rows:
+            category = str(row.get(x_field, ""))
+            series_name = str(row.get(series_field, ""))
+            value = self._to_number(row.get(y_field))
+            if value is None:
+                continue
+            if category not in categories:
+                categories.append(category)
+            if series_name not in series_names:
+                series_names.append(series_name)
+            data.setdefault(series_name, {})
+            data[series_name][category] = data[series_name].get(category, 0.0) + value
+        series_values: list[list[float]] = []
+        for series_name in series_names:
+            values = [
+                data.get(series_name, {}).get(category, 0.0) for category in categories
+            ]
+            series_values.append(values)
+        return categories, series_names, series_values
+    def _to_number(self, value: object) -> float | None:
+        if value is None:
+            return None
+        if isinstance(value, bool):
+            return None
+        if isinstance(value, (int, float)):
+            return float(value)
+        if isinstance(value, datetime):
+            return value.timestamp()
+        if isinstance(value, time):
+            return self._time_to_seconds(value)
+        if isinstance(value, str):
+            try:
+                return float(value)
+            except ValueError:
+                # Handle Z suffix (e.g., "2024-01-01T00:00:00Z")
+                normalized = value
+                if value.endswith("Z"):
+                    normalized = value[:-1] + "+00:00"
+                try:
+                    return datetime.fromisoformat(normalized).timestamp()
+                except ValueError:
+                    pass
+                try:
+                    return self._time_to_seconds(time.fromisoformat(normalized))
+                except ValueError:
+                    pass
+                # Try YYYY-MM format (e.g., "2023-06")
+                if re.match(r"^\d{4}-\d{2}$", value):
+                    try:
+                        return datetime.fromisoformat(f"{value}-01").timestamp()
+                    except ValueError:
+                        pass
+                return None
+        return None
+    def _time_to_seconds(self, value: time) -> float:
+        """Convert time-only values to seconds since midnight."""
+        return (
+            value.hour * 3600
+            + value.minute * 60
+            + value.second
+            + value.microsecond / 1_000_000
+        )
+    def _safe_color(self, color: str | None, default: str) -> str:
+        """Return validated color or default if invalid."""
+        if not color:
+            return default
+        # plotext accepts color names like "red+", "blue", etc.
+        # If an invalid color is used, plotext may throw; keep known-good defaults
+        valid_colors = {
+            "black",
+            "red",
+            "green",
+            "yellow",
+            "blue",
+            "magenta",
+            "cyan",
+            "white",
+            "black+",
+            "red+",
+            "green+",
+            "yellow+",
+            "blue+",
+            "magenta+",
+            "cyan+",
+            "white+",
+        }
+        return color if color in valid_colors else default
+    def _safe_marker(self, marker: str | None, default: str) -> str:
+        """Return validated marker or default if invalid."""
+        if not marker:
+            return default
+        # plotext marker options
+        valid_markers = {
+            "sd",
+            "dot",
+            "hd",
+            "fhd",
+            "braille",
+            "heart",
+            "point",
+        }
+        return marker if marker in valid_markers else default