PyPI - sqlsaber-viz - Versions diffs - 0.1.1__py3-none-any.whl - Mend

sqlsaber-viz 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

sqlsaber_viz/__init__.py +19 -0
sqlsaber_viz/data_loader.py +143 -0
sqlsaber_viz/prompts.py +31 -0
sqlsaber_viz/renderers/__init__.py +6 -0
sqlsaber_viz/renderers/base.py +13 -0
sqlsaber_viz/renderers/html_renderer.py +17 -0
sqlsaber_viz/renderers/plotext_renderer.py +385 -0
sqlsaber_viz/spec.py +130 -0
sqlsaber_viz/spec_agent.py +144 -0
sqlsaber_viz/templates.py +175 -0
sqlsaber_viz/tools.py +234 -0
sqlsaber_viz/transforms.py +155 -0
sqlsaber_viz-0.1.1.dist-info/METADATA +12 -0
sqlsaber_viz-0.1.1.dist-info/RECORD +16 -0
sqlsaber_viz-0.1.1.dist-info/WHEEL +4 -0
sqlsaber_viz-0.1.1.dist-info/entry_points.txt +2 -0

sqlsaber_viz/spec.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""Pydantic models for visualization specs."""
+from __future__ import annotations
+from typing import Annotated, Literal
+from pydantic import BaseModel, Field
+class FieldEncoding(BaseModel):
+    field: str
+    type: Literal["category", "number", "time"] = "number"
+class ChartOptions(BaseModel):
+    width: int | None = Field(default=None, ge=20, le=200)
+    height: int | None = Field(default=None, ge=10, le=100)
+    x_label: str | None = None
+    y_label: str | None = None
+    color: str | None = None
+    marker: str | None = None
+class BarEncoding(BaseModel):
+    x: FieldEncoding
+    y: FieldEncoding
+    series: FieldEncoding | None = None
+class BarChart(BaseModel):
+    type: Literal["bar"]
+    encoding: BarEncoding
+    orientation: Literal["vertical", "horizontal"] = "vertical"
+    mode: Literal["grouped", "stacked"] = "grouped"
+    options: ChartOptions = Field(default_factory=ChartOptions)
+class LineEncoding(BaseModel):
+    x: FieldEncoding
+    y: FieldEncoding
+    series: FieldEncoding | None = None
+class LineChart(BaseModel):
+    type: Literal["line"]
+    encoding: LineEncoding
+    options: ChartOptions = Field(default_factory=ChartOptions)
+class ScatterEncoding(BaseModel):
+    x: FieldEncoding
+    y: FieldEncoding
+    series: FieldEncoding | None = None
+class ScatterChart(BaseModel):
+    type: Literal["scatter"]
+    encoding: ScatterEncoding
+    options: ChartOptions = Field(default_factory=ChartOptions)
+class BoxplotConfig(BaseModel):
+    label_field: str
+    value_field: str
+class BoxplotChart(BaseModel):
+    type: Literal["boxplot"]
+    boxplot: BoxplotConfig
+    options: ChartOptions = Field(default_factory=ChartOptions)
+class HistogramConfig(BaseModel):
+    field: str
+    bins: int = Field(default=20, ge=2, le=100)
+class HistogramChart(BaseModel):
+    type: Literal["histogram"]
+    histogram: HistogramConfig
+    options: ChartOptions = Field(default_factory=ChartOptions)
+ChartSpec = Annotated[
+    BarChart | LineChart | ScatterChart | BoxplotChart | HistogramChart,
+    Field(discriminator="type"),
+]
+class SortItem(BaseModel):
+    field: str
+    dir: Literal["asc", "desc"] = "asc"
+class SortTransform(BaseModel):
+    sort: list[SortItem]
+class LimitTransform(BaseModel):
+    limit: int = Field(ge=1)
+class FilterConfig(BaseModel):
+    field: str
+    op: Literal["==", "!=", ">", "<", ">=", "<="]
+    value: str | int | float | bool | None
+class FilterTransform(BaseModel):
+    filter: FilterConfig
+Transform = SortTransform | LimitTransform | FilterTransform
+class DataSource(BaseModel):
+    file: str = Field(pattern=r"^result_[A-Za-z0-9._-]+\.json$")
+class DataConfig(BaseModel):
+    source: DataSource
+class VizSpec(BaseModel):
+    version: Literal["1"] = "1"
+    title: str | None = None
+    description: str | None = None
+    data: DataConfig
+    chart: ChartSpec
+    transform: list[Transform] = Field(default_factory=list)

sqlsaber_viz/spec_agent.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""Internal agent for generating visualization specs."""
+from __future__ import annotations
+import json
+from typing import Any
+from sqlsaber.agents.provider_factory import ProviderFactory
+from sqlsaber.config import providers
+from sqlsaber.config.settings import Config
+from .prompts import VIZ_SYSTEM_PROMPT
+from .spec import VizSpec
+from .templates import ChartType, list_chart_types, vizspec_template
+class SpecAgent:
+    """Internal agent for generating visualization specs."""
+    def __init__(self, model_name: str | None = None, api_key: str | None = None):
+        self.config = Config()
+        self._model_name_override = model_name
+        self._api_key_override = api_key
+        self.agent = self._build_agent()
+    def _build_agent(self):
+        model_name = self._model_name_override or self.config.model.name
+        model_name_only = (
+            model_name.split(":", 1)[1] if ":" in model_name else model_name
+        )
+        if not (self._model_name_override and self._api_key_override):
+            self.config.auth.validate(model_name)
+        provider = providers.provider_from_model(model_name) or ""
+        api_key = self._api_key_override or self.config.auth.get_api_key(model_name)
+        factory = ProviderFactory()
+        agent = factory.create_agent(
+            provider=provider,
+            model_name=model_name_only,
+            full_model_str=model_name,
+            api_key=api_key,
+            thinking_enabled=False,
+        )
+        @agent.system_prompt
+        def viz_system_prompt() -> str:
+            return VIZ_SYSTEM_PROMPT
+        self._register_tools(agent)
+        return agent
+    def _register_tools(self, agent) -> None:
+        """Register visualization helper tools on the agent."""
+        @agent.tool_plain
+        def get_vizspec_template(chart_type: ChartType, file: str) -> dict:
+            """Get the complete VizSpec template for a chart type.
+            Call this FIRST to get the correct JSON structure, then fill in
+            the placeholder field names with actual column names from your data.
+            Args:
+                chart_type: One of "bar", "line", "scatter", "boxplot", "histogram"
+                file: The result file key (e.g., "result_abc123.json")
+            Returns:
+                A complete VizSpec template with placeholders for field names.
+            """
+            return vizspec_template(chart_type, file)
+        @agent.tool_plain
+        def get_available_chart_types() -> list[dict]:
+            """List available chart types with descriptions.
+            Call this if you're unsure which chart type to use for the data.
+            Returns:
+                List of chart types with descriptions and use cases.
+            """
+            return list_chart_types()
+    async def generate_spec(
+        self,
+        request: str,
+        columns: list[dict],
+        row_count: int,
+        file: str,
+        chart_type_hint: str | None = None,
+    ) -> VizSpec:
+        """Generate a VizSpec from user request and data summary."""
+        prompt = self._build_prompt(
+            request=request,
+            columns=columns,
+            row_count=row_count,
+            file=file,
+            chart_type_hint=chart_type_hint,
+        )
+        result = await self.agent.run(prompt)
+        output = str(result.output).strip()
+        parsed = _parse_json(output)
+        return VizSpec.model_validate(parsed)
+    def _build_prompt(
+        self,
+        request: str,
+        columns: list[dict],
+        row_count: int,
+        file: str,
+        chart_type_hint: str | None,
+    ) -> str:
+        columns_json = json.dumps(columns, ensure_ascii=False, indent=2)
+        hint_text = f"Chart type hint: {chart_type_hint}" if chart_type_hint else ""
+        return (
+            "## User Request\n"
+            f"{request.strip()}\n\n"
+            "## Data Summary\n"
+            f"Row count: {row_count}\n"
+            f"File: {file}\n"
+            f"Columns:\n{columns_json}\n\n"
+            f"{hint_text}\n\n"
+            "Use `get_vizspec_template` to get the correct spec structure, "
+            "then fill in the placeholders with actual column names.\n"
+            "Return ONLY the final JSON."
+        ).strip()
+def _parse_json(text: str) -> dict[str, Any]:
+    try:
+        parsed = json.loads(text)
+    except json.JSONDecodeError:
+        start = text.find("{")
+        end = text.rfind("}")
+        if start == -1 or end == -1 or end <= start:
+            raise
+        parsed = json.loads(text[start : end + 1])
+    if not isinstance(parsed, dict):
+        raise json.JSONDecodeError("Expected JSON object", text, 0)
+    return parsed

sqlsaber_viz/templates.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""Template builders for visualization specs.
+These functions generate minimal valid templates from Pydantic models,
+ensuring they stay in sync with the schema definitions.
+"""
+from __future__ import annotations
+from typing import Literal
+from .spec import (
+    BarChart,
+    BarEncoding,
+    BoxplotChart,
+    BoxplotConfig,
+    ChartSpec,
+    ChartOptions,
+    DataConfig,
+    DataSource,
+    FieldEncoding,
+    HistogramChart,
+    HistogramConfig,
+    LineChart,
+    LineEncoding,
+    ScatterChart,
+    ScatterEncoding,
+    VizSpec,
+)
+ChartType = Literal["bar", "line", "scatter", "boxplot", "histogram"]
+# Placeholder values for template fields
+_CATEGORY_PLACEHOLDER = "<category_column>"
+_NUMBER_PLACEHOLDER = "<number_column>"
+_TIME_PLACEHOLDER = "<time_column>"
+_LABEL_PLACEHOLDER = "<label_column>"
+_VALUE_PLACEHOLDER = "<value_column>"
+def _build_bar_chart() -> BarChart:
+    return BarChart(
+        type="bar",
+        encoding=BarEncoding(
+            x=FieldEncoding(field=_CATEGORY_PLACEHOLDER, type="category"),
+            y=FieldEncoding(field=_NUMBER_PLACEHOLDER, type="number"),
+            series=None,
+        ),
+        orientation="vertical",
+        mode="grouped",
+        options=ChartOptions(),
+    )
+def _build_line_chart() -> LineChart:
+    return LineChart(
+        type="line",
+        encoding=LineEncoding(
+            x=FieldEncoding(field=_TIME_PLACEHOLDER, type="time"),
+            y=FieldEncoding(field=_NUMBER_PLACEHOLDER, type="number"),
+            series=None,
+        ),
+        options=ChartOptions(),
+    )
+def _build_scatter_chart() -> ScatterChart:
+    return ScatterChart(
+        type="scatter",
+        encoding=ScatterEncoding(
+            x=FieldEncoding(field=_NUMBER_PLACEHOLDER, type="number"),
+            y=FieldEncoding(field=_NUMBER_PLACEHOLDER, type="number"),
+            series=None,
+        ),
+        options=ChartOptions(),
+    )
+def _build_boxplot_chart() -> BoxplotChart:
+    return BoxplotChart(
+        type="boxplot",
+        boxplot=BoxplotConfig(
+            label_field=_LABEL_PLACEHOLDER,
+            value_field=_VALUE_PLACEHOLDER,
+        ),
+        options=ChartOptions(),
+    )
+def _build_histogram_chart() -> HistogramChart:
+    return HistogramChart(
+        type="histogram",
+        histogram=HistogramConfig(
+            field=_NUMBER_PLACEHOLDER,
+            bins=20,
+        ),
+        options=ChartOptions(),
+    )
+_CHART_BUILDERS: dict[ChartType, callable] = {
+    "bar": _build_bar_chart,
+    "line": _build_line_chart,
+    "scatter": _build_scatter_chart,
+    "boxplot": _build_boxplot_chart,
+    "histogram": _build_histogram_chart,
+}
+def _build_chart(chart_type: ChartType) -> ChartSpec:
+    """Build a chart object for the given type."""
+    builder = _CHART_BUILDERS.get(chart_type)
+    if builder is None:
+        raise ValueError(f"Unknown chart type: {chart_type}")
+    return builder()
+def chart_template(chart_type: ChartType) -> dict:
+    """Return a minimal valid chart template for the given chart type.
+    The template uses placeholder field names that the model should replace
+    with actual column names from the data.
+    """
+    return _build_chart(chart_type).model_dump(exclude_none=True)
+def vizspec_template(chart_type: ChartType, file: str) -> dict:
+    """Return a complete VizSpec template with data source pre-filled.
+    The template includes the chart structure for the specified type
+    and has placeholders for field names.
+    """
+    spec = VizSpec(
+        version="1",
+        title=None,
+        description=None,
+        data=DataConfig(source=DataSource(file=file)),
+        chart=_build_chart(chart_type),
+        transform=[],
+    )
+    return spec.model_dump(exclude_none=True)
+def list_chart_types() -> list[dict]:
+    """Return available chart types with descriptions.
+    Helps the model choose the appropriate chart type for the data.
+    """
+    return [
+        {
+            "type": "bar",
+            "description": "Compare categories. Use x for category, y for numeric value.",
+            "use_when": "Comparing values across categories (e.g., sales by region)",
+        },
+        {
+            "type": "line",
+            "description": "Show trends over time/sequence. Use x for time/sequence, y for value.",
+            "use_when": "Showing change over time (e.g., monthly revenue)",
+        },
+        {
+            "type": "scatter",
+            "description": "Show correlation between two numeric variables.",
+            "use_when": "Exploring relationship between two numbers (e.g., age vs income)",
+        },
+        {
+            "type": "boxplot",
+            "description": "Show distribution of values across groups.",
+            "use_when": "Comparing distributions (e.g., salary by department)",
+        },
+        {
+            "type": "histogram",
+            "description": "Show distribution of a single numeric variable.",
+            "use_when": "Understanding value distribution (e.g., age distribution)",
+        },
+    ]

sqlsaber_viz/tools.py ADDED Viewed

@@ -0,0 +1,234 @@
+"""Visualization tool implementation."""
+from __future__ import annotations
+import asyncio
+import json
+import re
+from html import escape
+from pydantic import ValidationError
+from pydantic_ai import RunContext
+from rich.console import Console
+from rich.text import Text
+from sqlsaber.tools.base import Tool
+from sqlsaber.utils.json_utils import json_dumps
+from .data_loader import (
+    extract_data_summary,
+    find_tool_output_in_messages,
+    find_tool_output_payload,
+)
+from .renderers.plotext_renderer import PlotextRenderer
+from .spec import BarChart, LimitTransform, SortItem, SortTransform, VizSpec
+from .transforms import apply_transforms
+TOOL_OUTPUT_FILE_PATTERN = re.compile(r"^result_[A-Za-z0-9._-]+\.json$")
+SPEC_TIMEOUT_SECONDS = 300
+class VizTool(Tool):
+    """Terminal visualization tool for SQL results."""
+    requires_ctx = True
+    def __init__(self):
+        super().__init__()
+        self._last_ctx: RunContext | None = None
+        self._last_rows: list[dict] | None = None
+        self._last_file: str | None = None
+        self._replay_messages: list | None = None
+    def set_replay_messages(self, messages: list) -> None:
+        """Set message history for replay scenarios (e.g., threads show)."""
+        self._replay_messages = messages
+    @property
+    def name(self) -> str:
+        return "viz"
+    def render_executing(self, console: Console, args: dict) -> bool:
+        """Suppress default JSON rendering during execution."""
+        return True
+    async def execute(
+        self,
+        ctx: RunContext,
+        request: str,
+        file: str,
+        chart_type: str | None = None,
+    ) -> str:
+        """Generate a visualization spec for SQL results.
+        Args:
+            request: Natural language description of the desired visualization.
+            file: Result file key from execute_sql (e.g., "result_abc123.json").
+            chart_type: Optional hint for chart type (bar, line, scatter, boxplot, histogram).
+        Returns:
+            JSON string containing the visualization spec.
+        """
+        self._last_ctx = ctx
+        if not file or not TOOL_OUTPUT_FILE_PATTERN.match(file):
+            return json_dumps({"error": "Invalid result file key format."})
+        tool_call_id = file.removeprefix("result_").removesuffix(".json")
+        payload = find_tool_output_payload(ctx, tool_call_id)
+        if payload is None:
+            return json_dumps({"error": "Tool output not found in message history."})
+        summary = extract_data_summary(payload)
+        columns = summary.get("columns", [])
+        row_count = summary.get("row_count", 0)
+        rows = summary.get("rows", [])
+        self._last_rows = rows
+        self._last_file = file
+        agent = _get_spec_agent_cls()()
+        try:
+            spec = await asyncio.wait_for(
+                agent.generate_spec(
+                    request=request,
+                    columns=columns,
+                    row_count=row_count,
+                    file=file,
+                    chart_type_hint=chart_type,
+                ),
+                timeout=SPEC_TIMEOUT_SECONDS,
+            )
+            spec = self._ensure_bar_defaults(spec, row_count)
+            return json_dumps(spec.model_dump())
+        except asyncio.TimeoutError:
+            return json_dumps(
+                {
+                    "error": "Spec generation timed out.",
+                    "details": f"Timed out after {SPEC_TIMEOUT_SECONDS} seconds.",
+                }
+            )
+        except (ValidationError, json.JSONDecodeError, ValueError) as exc:
+            return json_dumps(
+                {
+                    "error": "Failed to generate a valid visualization spec.",
+                    "details": str(exc),
+                }
+            )
+    def render_result(self, console: Console, result: object) -> bool:
+        """Render the spec as a terminal chart using plotext."""
+        spec = self._parse_spec(result)
+        if spec is None:
+            return False
+        rows = self._resolve_rows(spec)
+        if rows is None:
+            if console.is_terminal:
+                console.print("[warning]No data available for visualization.[/warning]")
+            else:
+                console.print("*No data available for visualization.*\n")
+            return True
+        rows = apply_transforms(rows, spec.transform)
+        renderer = PlotextRenderer()
+        chart = renderer.render(spec, rows)
+        if console.is_terminal:
+            console.print(Text.from_ansi(chart))
+        else:
+            console.print(f"```\n{self._strip_ansi(chart)}\n```\n", markup=False)
+        return True
+    def render_result_html(self, result: object) -> str | None:
+        """Render the spec as an HTML chart."""
+        spec = self._parse_spec(result)
+        if spec is None:
+            return None
+        rows = self._resolve_rows(spec)
+        if rows is None:
+            return '<div class="viz-error">No data available for visualization.</div>'
+        rows = apply_transforms(rows, spec.transform)
+        from .renderers.plotext_renderer import PlotextRenderer
+        renderer = PlotextRenderer()
+        chart = renderer.render(spec, rows)
+        return f'<pre class="viz-chart">{escape(self._strip_ansi(chart))}</pre>'
+    def _parse_spec(self, result: object) -> VizSpec | None:
+        data = self._parse_result(result)
+        if not isinstance(data, dict):
+            return None
+        if "error" in data and data["error"]:
+            return None
+        try:
+            return VizSpec.model_validate(data)
+        except ValidationError:
+            return None
+    def _parse_result(self, result: object) -> object:
+        if isinstance(result, dict):
+            return result
+        if isinstance(result, str):
+            try:
+                return json.loads(result)
+            except json.JSONDecodeError:
+                return {"error": result}
+        return {"error": str(result)}
+    def _strip_ansi(self, text: str) -> str:
+        return re.sub(r"\x1b\[[0-9;]*m", "", text)
+    def _resolve_rows(self, spec: VizSpec) -> list[dict] | None:
+        if self._last_rows is not None and self._last_file == spec.data.source.file:
+            return self._last_rows
+        tool_call_id = spec.data.source.file.removeprefix("result_").removesuffix(
+            ".json"
+        )
+        payload: dict | None = None
+        if self._last_ctx is not None:
+            payload = find_tool_output_payload(self._last_ctx, tool_call_id)
+        elif self._replay_messages is not None:
+            payload = find_tool_output_in_messages(self._replay_messages, tool_call_id)
+        if payload is None:
+            return None
+        summary = extract_data_summary(payload)
+        rows = summary.get("rows")
+        if isinstance(rows, list):
+            return rows
+        return None
+    def _ensure_bar_defaults(self, spec: VizSpec, row_count: int) -> VizSpec:
+        if not isinstance(spec.chart, BarChart):
+            return spec
+        transforms = list(spec.transform)
+        has_limit = any(isinstance(t, LimitTransform) for t in transforms)
+        has_sort = any(isinstance(t, SortTransform) for t in transforms)
+        if not has_sort:
+            transforms.append(
+                SortTransform(
+                    sort=[SortItem(field=spec.chart.encoding.y.field, dir="desc")]
+                )
+            )
+        if not has_limit and row_count > 20:
+            transforms.append(LimitTransform(limit=20))
+        if transforms != spec.transform:
+            return spec.model_copy(update={"transform": transforms})
+        return spec
+def _get_spec_agent_cls():
+    from .spec_agent import SpecAgent
+    return SpecAgent