PyPI - stacklet-mcp - Versions diffs - 0.1.0__py3-none-any.whl - Mend

stacklet-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

stacklet/mcp/__init__.py +8 -0
stacklet/mcp/__main__.py +11 -0
stacklet/mcp/assetdb/__init__.py +4 -0
stacklet/mcp/assetdb/models.py +268 -0
stacklet/mcp/assetdb/redash.py +268 -0
stacklet/mcp/assetdb/sql_info.md +147 -0
stacklet/mcp/assetdb/tools.py +404 -0
stacklet/mcp/cmdline.py +67 -0
stacklet/mcp/docs/__init__.py +4 -0
stacklet/mcp/docs/client.py +80 -0
stacklet/mcp/docs/models.py +33 -0
stacklet/mcp/docs/tools.py +71 -0
stacklet/mcp/lifespan.py +52 -0
stacklet/mcp/mcp.py +23 -0
stacklet/mcp/mcp_info.md +40 -0
stacklet/mcp/platform/__init__.py +4 -0
stacklet/mcp/platform/dataset_info.md +115 -0
stacklet/mcp/platform/graphql.py +250 -0
stacklet/mcp/platform/graphql_info.md +71 -0
stacklet/mcp/platform/models.py +152 -0
stacklet/mcp/platform/tools.py +240 -0
stacklet/mcp/server.py +35 -0
stacklet/mcp/settings.py +42 -0
stacklet/mcp/stacklet_auth.py +105 -0
stacklet/mcp/utils/__init__.py +4 -0
stacklet/mcp/utils/file.py +31 -0
stacklet/mcp/utils/json.py +67 -0
stacklet/mcp/utils/mcp_json.py +82 -0
stacklet/mcp/utils/text.py +14 -0
stacklet/mcp/utils/tool.py +27 -0
stacklet_mcp-0.1.0.dist-info/METADATA +13 -0
stacklet_mcp-0.1.0.dist-info/RECORD +34 -0
stacklet_mcp-0.1.0.dist-info/WHEEL +4 -0
stacklet_mcp-0.1.0.dist-info/entry_points.txt +2 -0

stacklet/mcp/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+# LICENSE HEADER MANAGED BY add-license-header
+#
+# Copyright (c) 2025 Stacklet, Inc.
+#
+"""Stacklet MCP server."""
+__version__ = "0.1.0"

stacklet/mcp/__main__.py ADDED Viewed

@@ -0,0 +1,11 @@
+# LICENSE HEADER MANAGED BY add-license-header
+#
+# Copyright (c) 2025 Stacklet, Inc.
+#
+"""Entry point for running the server from the package."""
+from .mcp import main
+main()

stacklet/mcp/assetdb/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+# LICENSE HEADER MANAGED BY add-license-header
+#
+# Copyright (c) 2025 Stacklet, Inc.
+#

stacklet/mcp/assetdb/models.py ADDED Viewed

@@ -0,0 +1,268 @@
+# LICENSE HEADER MANAGED BY add-license-header
+#
+# Copyright (c) 2025 Stacklet, Inc.
+#
+import copy
+from datetime import datetime
+from enum import IntEnum, StrEnum
+from typing import Any
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+class ExportFormat(StrEnum):
+    """Format for query result export."""
+    CSV = "csv"
+    JSON = "json"
+    TSV = "tsv"
+    XLSX = "xlsx"
+class JobStatus(IntEnum):
+    """Status values for AssetDB query execution jobs."""
+    QUEUED = 1
+    STARTED = 2
+    FINISHED = 3
+    FAILED = 4
+    CANCELED = 5
+    DEFERRED = 6
+    SCHEDULED = 7
+    @property
+    def is_terminal(self) -> bool:
+        """Whether this job status represents a completed state (finished, failed, or canceled)."""
+        return self in (JobStatus.FINISHED, JobStatus.FAILED, JobStatus.CANCELED)
+class Job(BaseModel):
+    """Redash job object for async query execution."""
+    id: str
+    status: JobStatus
+    error: str | None
+    query_result_id: int | None
+class QueryArchiveResult(BaseModel):
+    """Result of archiving/deleting a query."""
+    success: bool
+    message: str
+    query_id: int
+class User(BaseModel):
+    """Redash user object model."""
+    model_config = ConfigDict(extra="ignore")
+    id: int = Field(..., description="Unique user ID in the Redash system")
+    name: str | None = Field(None, description="User's display name")
+    email: str | None = Field(None, description="User's email address")
+class Query(BaseModel):
+    """Redash query object model based on serialize_query output."""
+    model_config = ConfigDict(extra="ignore")
+    id: int = Field(..., description="Unique query ID in the Redash system")
+    latest_query_data_id: int | None = Field(
+        None, description="ID of the most recent query result data"
+    )
+    name: str = Field(..., description="Query display name")
+    description: str | None = Field(None, description="Query description or documentation")
+    query: str = Field(..., description="SQL query text")
+    api_key: str = Field(..., description="API key for accessing this query")
+    is_draft: bool = Field(..., description="Whether the query is in draft status")
+    updated_at: datetime = Field(..., description="Timestamp of last modification")
+    created_at: datetime = Field(..., description="Timestamp when query was created")
+    data_source_id: int = Field(..., description="ID of the data source this query runs against")
+    options: dict[str, Any] = Field(
+        ..., description="Query configuration options including parameters"
+    )
+    tags: list[str] = Field(..., description="List of tags for categorizing the query")
+    is_safe: bool = Field(..., description="Whether the query is considered safe to run")
+    user: User = Field(..., description="User who created the query")
+    last_modified_by: User | None = Field(None, description="User who last modified the query")
+    retrieved_at: datetime | None = Field(
+        None, description="Timestamp when query data was last retrieved"
+    )
+    runtime: float | None = Field(None, description="Last execution runtime in seconds")
+    is_favorite: bool = Field(..., description="Whether the query is marked as favorite")
+    @model_validator(mode="before")
+    @classmethod
+    def transform_user_fields(cls, data: Any) -> Any:
+        if not isinstance(data, dict):
+            return data
+        # Deep copy to avoid any mutation issues
+        data = copy.deepcopy(data)
+        # Handle user field - convert user_id to User object if needed
+        if "user_id" in data and "user" not in data:
+            data["user"] = {"id": data["user_id"]}
+        # Handle last_modified_by - convert last_modified_by_id to User object if needed
+        if "last_modified_by_id" in data and "last_modified_by" not in data:
+            if data["last_modified_by_id"] is not None:
+                data["last_modified_by"] = {"id": data["last_modified_by_id"]}
+            else:
+                data["last_modified_by"] = None
+        return data
+class QueryListResponse(BaseModel):
+    """Raw response model for query list endpoint (internal use)."""
+    model_config = ConfigDict(extra="ignore")
+    count: int = Field(..., description="Total number of queries matching the search criteria")
+    page: int = Field(..., description="Current page number (1-based)")
+    page_size: int = Field(..., description="Number of queries per page")
+    results: list[Query] = Field(..., description="List of queries on the current page")
+class QueryUpsert(BaseModel):
+    """Query data for create/update operations."""
+    name: str | None = Field(None, description="Query display name (required for new queries)")
+    query: str | None = Field(None, description="SQL query text (required for new queries)")
+    description: str | None = Field(None, description="Query description or documentation")
+    tags: list[str] | None = Field(None, description="List of tags for categorizing the query")
+    options: dict[str, Any] | None = Field(
+        None, description="Query configuration options including parameters"
+    )
+    is_draft: bool | None = Field(None, description="Whether the query should be in draft status")
+    def payload(self, data_source_id: int | None = None) -> dict[str, Any]:
+        """
+        Build API payload for query create/update.
+        Args:
+            data_source_id: Required data source ID for the query
+        Returns:
+            Payload dictionary with non-None values
+        """
+        payload = self.model_dump(exclude_none=True)
+        if data_source_id:
+            payload["data_source_id"] = data_source_id
+        return payload
+class ToolQueryListPagination(BaseModel):
+    """Pagination metadata for query list responses."""
+    page: int = Field(..., description="Current page number (1-based)")
+    page_size: int = Field(..., description="Number of queries per page")
+    has_next_page: bool = Field(..., description="Whether there are more pages available")
+    total_count: int = Field(
+        ..., description="Total number of queries matching the search criteria"
+    )
+class ToolQueryListItem(BaseModel):
+    """Simplified query information for list responses."""
+    id: int = Field(..., description="Unique query ID")
+    name: str = Field(..., description="Query display name")
+    description: str | None = Field(..., description="Query description or documentation")
+    has_parameters: bool = Field(..., description="Whether the query accepts parameters")
+    data_source_id: int = Field(..., description="ID of the data source this query runs against")
+    is_draft: bool = Field(..., description="Whether the query is in draft status")
+    is_favorite: bool = Field(..., description="Whether the query is marked as favorite")
+    tags: list[str] = Field(..., description="List of tags for categorizing the query")
+    user: User = Field(..., description="User who created the query")
+class ToolQueryList(BaseModel):
+    """Complete response for query list operations."""
+    queries: list[ToolQueryListItem] = Field(..., description="List of queries on the current page")
+    pagination: ToolQueryListPagination = Field(..., description="Pagination information")
+class QueryResultColumn(BaseModel):
+    """Column definition in a query result."""
+    name: str = Field(..., description="Column name")
+    type: str | None = Field(None, description="Column data type")
+    friendly_name: str | None = Field(None, description="Human-friendly column name")
+    model_config = ConfigDict(extra="ignore")
+class QueryResultData(BaseModel):
+    """The data structure within a query result containing columns and rows."""
+    columns: list[QueryResultColumn] = Field(
+        ..., description="Column definitions for the query result"
+    )
+    rows: list[dict[str, Any]] = Field(
+        ..., description="Query result rows as key-value dictionaries"
+    )
+    model_config = ConfigDict(extra="ignore")
+class QueryResult(BaseModel):
+    """Query result object as returned by Redash QueryResult.to_dict()."""
+    id: int = Field(..., description="Query result ID")
+    query: str = Field(..., description="The SQL query text that was executed")
+    data: QueryResultData = Field(..., description="Query result data with columns and rows")
+    data_source_id: int = Field(..., description="ID of the data source used")
+    runtime: float = Field(..., description="Query execution time in seconds")
+    retrieved_at: datetime = Field(..., description="When the query result was retrieved")
+    model_config = ConfigDict(extra="ignore")
+class ToolQueryResultArtifact(BaseModel):
+    """Query download details for a data format."""
+    format: ExportFormat = Field(..., description="Export format for the query result download")
+    download_from: str = Field(..., description="URL to download the data in the specified format")
+class ToolQueryResult(BaseModel):
+    """
+    Truncated query results suitable for LLMs, along with ways to get the full
+    result set for analysis with tools suited to that task.
+    """
+    result_id: int = Field(..., description="Query result id")
+    query_id: int | None = Field(None, description="Query id, if applicable")
+    # These fields come directly from the redash QueryResult.
+    query_text: str = Field(..., description="The SQL query text that was executed")
+    query_runtime: float = Field(..., description="Query execution duration in seconds")
+    query_timestamp: datetime = Field(..., description="Query execution finish timestamp")
+    columns: list[QueryResultColumn] = Field(
+        ..., description="Column definitions; sparse row dicts are keyed on column name"
+    )
+    # These fields are derived from the QueryResult rows.
+    row_count: int = Field(..., description="Total rows in the full query result")
+    some_rows: list[dict[str, Any]] = Field(
+        ..., description="Sample of up to 20 rows from the query result for preview"
+    )
+    # Complete result data is always saved locally for further analysis. (We don't *have*
+    # to do this on every path, but we do on *some*, so we choose consistency.)
+    full_results_saved_to: str = Field(
+        ..., description="Local path where complete result data was saved as JSON"
+    )
+    # Available only for saved queries (not ad-hoc queries) that have API keys.
+    alternate_formats: list[ToolQueryResultArtifact] | None = Field(
+        None, description="Download URLs for different formats, None for ad-hoc queries"
+    )

stacklet/mcp/assetdb/redash.py ADDED Viewed

@@ -0,0 +1,268 @@
+# LICENSE HEADER MANAGED BY add-license-header
+#
+# Copyright (c) 2025 Stacklet, Inc.
+#
+"""
+AssetDB client using Redash API with Stacklet authentication.
+"""
+import asyncio
+import time
+from typing import Any, Self, cast
+from urllib.parse import urljoin
+import httpx
+from fastmcp import Context
+from ..lifespan import server_cached
+from ..settings import SETTINGS
+from ..stacklet_auth import StackletCredentials
+from .models import ExportFormat, Job, Query, QueryListResponse, QueryResult, QueryUpsert
+class AssetDBClient:
+    """Client for AssetDB interface via Redash API using Stacklet authentication."""
+    def __init__(self, credentials: StackletCredentials, data_source_id: int = 1) -> None:
+        """
+        Initialize AssetDB client with Stacklet credentials.
+        Args:
+            credentials: StackletCredentials object containing endpoint and id_token
+            data_source_id: ID of the Redash data source (default 1 for main AssetDB)
+        """
+        self.credentials = credentials
+        self.data_source_id = data_source_id
+        self.redash_url = self.credentials.service_endpoint("redash")
+        self.session = httpx.AsyncClient(
+            cookies={"stacklet-auth": credentials.identity_token}, timeout=60.0
+        )
+    @classmethod
+    def get(cls, ctx: Context) -> Self:
+        def construct() -> AssetDBClient:
+            return cls(StackletCredentials.get(ctx), SETTINGS.assetdb_datasource)
+        return cast(Self, server_cached(ctx, "ASSETDB_CLIENT", construct))
+    async def _make_request(self, method: str, endpoint: str, **kwargs: Any) -> Any:
+        """
+        Make a request to the Redash API with Stacklet authentication.
+        Args:
+            method: HTTP method (GET, POST, etc.)
+            endpoint: API endpoint path
+            **kwargs: Additional arguments for httpx
+        Returns:
+            Decoded response JSON
+        """
+        url = urljoin(self.redash_url, endpoint)
+        response = await self.session.request(method, url, **kwargs)
+        response.raise_for_status()
+        return response.json()
+    async def list_queries(
+        self,
+        page: int = 1,
+        page_size: int = 25,
+        search: str | None = None,
+        tags: list[str] | None = None,
+    ) -> QueryListResponse:
+        """
+        Get list of queries with search and sorting support.
+        Args:
+            page: Page number (1-based)
+            page_size: Number of queries per page
+            search: Search query names, descriptions, and SQL content
+            tags: Filter out queries not matching all tags
+        Returns:
+            Structured response with queries and pagination metadata
+        """
+        params: dict[str, Any] = {"page": page, "page_size": page_size}
+        if search:
+            params["q"] = search
+        if tags:
+            params["tags"] = tags
+        result = await self._make_request("GET", "api/queries", params=params)
+        return QueryListResponse(**result)
+    async def get_query(self, query_id: int) -> Query:
+        """
+        Get detailed information about a specific saved query.
+        Args:
+            query_id: ID of the query to retrieve
+        Returns:
+            Complete query object with SQL and parameters
+        """
+        result = await self._make_request("GET", f"api/queries/{query_id}")
+        return Query(**result)
+    async def execute_saved_query(
+        self,
+        query_id: int,
+        parameters: dict[str, Any] | None,
+        max_age: int,
+        timeout: int,
+    ) -> QueryResult:
+        """
+        Execute a saved query by ID, with caching control.
+        Args:
+            query_id: ID of the query
+            parameters: Optional parameters for the query
+            max_age: Maximum age of cached results in seconds (-1=any cached result, 0=always fresh)
+            timeout: Timeout in seconds for query execution (if not cached)
+        Returns:
+            Complete query result with data, columns, and metadata
+        """
+        payload = {"max_age": max_age, "parameters": parameters or {}}
+        return await self._execute_results(f"api/queries/{query_id}/results", payload, timeout)
+    async def execute_adhoc_query(self, query: str, max_age: int, timeout: int) -> QueryResult:
+        """
+        Execute an ad-hoc SQL query without saving it.
+        Args:
+            query: SQL query string to execute
+            timeout: Timeout in seconds for query execution
+        Returns:
+            Complete query result with data, columns, and metadata
+        """
+        payload = {
+            "query": query,
+            "data_source_id": self.data_source_id,
+            "max_age": max_age,
+            "parameters": {},
+            "apply_auto_limit": True,
+        }
+        return await self._execute_results("api/query_results", payload, timeout)
+    async def _execute_results(
+        self, endpoint: str, payload: dict[str, Any], timeout: int
+    ) -> QueryResult:
+        """
+        Execute query request and handle both sync and async results.
+        Args:
+            endpoint: API endpoint to POST the query to
+            payload: Query parameters and options
+            timeout: Maximum time to wait for async job completion
+        Returns:
+            Complete query result with data, columns, and metadata
+        """
+        # This will contain either a "job" or a full "query_result". Since we're
+        # sometimes stuck grabbing a whole result set any way, we may as well do
+        # it every time; this also lets us always return a preview of the result
+        # data even when it's large.
+        response = await self._make_request("POST", endpoint, json=payload)
+        if "query_result" in response:
+            return QueryResult(**response["query_result"])
+        job = Job(**response["job"])
+        result_id = await self._poll_job(job, timeout)
+        qr_response = await self._make_request("GET", f"api/query_results/{result_id}")
+        return QueryResult(**qr_response["query_result"])
+    async def _poll_job(self, job: Job, timeout: int) -> int:
+        """
+        Poll an async job until completion using exponential backoff.
+        Args:
+            job: Initial job object from query execution
+            timeout: Maximum time to wait before timing out
+        Returns:
+            Query result ID when job completes successfully
+        """
+        cutoff = time.monotonic() + timeout
+        interval_s = 2
+        while True:
+            job_result = await self._make_request("GET", f"api/jobs/{job.id}")
+            job = Job(**job_result["job"])
+            if job.query_result_id:
+                return job.query_result_id
+            elif job.status.is_terminal:
+                raise RuntimeError(f"Query execution failed: {job.error or 'Unknown error.'}")
+            remaining_s = cutoff - time.monotonic()
+            if remaining_s <= 0:
+                raise RuntimeError(f"Query execution timed out after {timeout} seconds")
+            await asyncio.sleep(min(interval_s, remaining_s))
+            interval_s *= 2
+    def get_query_result_urls(
+        self, query: Query, query_result: QueryResult
+    ) -> dict[ExportFormat, str]:
+        """
+        Return download URLs for a query result.
+        Args:
+            query_id: ID of the query the result refers to
+            result_id: ID of the query result to get downloads urls for
+            api_key: the API key for the query.
+        Returns:
+            Dictionary mapping download formats to their URLs
+        """
+        return {
+            fmt: urljoin(
+                self.redash_url,
+                f"api/queries/{query.id}/results/{query_result.id}.{fmt}?api_key={query.api_key}",
+            )
+            for fmt in ExportFormat
+        }
+    async def create_query(self, upsert: QueryUpsert) -> Query:
+        """
+        Create a new saved query.
+        Args:
+            upsert: QueryUpsert object with query data
+        Returns:
+            Complete query object with ID, timestamps, and metadata
+        """
+        payload = upsert.payload(data_source_id=self.data_source_id)
+        result = await self._make_request("POST", "api/queries", json=payload)
+        return Query(**result)
+    async def update_query(self, query_id: int, upsert: QueryUpsert) -> Query:
+        """
+        Update an existing saved query.
+        Args:
+            query_id: ID of the query to update
+            upsert: QueryUpsert object with query data to update
+        Returns:
+            Complete updated query object with ID, timestamps, and metadata
+        """
+        payload = upsert.payload()
+        result = await self._make_request("POST", f"api/queries/{query_id}", json=payload)
+        return Query(**result)
+    async def delete_query(self, query_id: int) -> None:
+        """
+        Archive a saved query.
+        This sets the query's is_archived flag to True and removes associated
+        visualizations and alerts, but preserves the query in the database.
+        Args:
+            query_id: ID of the query to archive
+        """
+        await self._make_request("DELETE", f"api/queries/{query_id}")