PyPI - vaultkit - Versions diffs - 0.1.0__py3-none-any.whl - Mend

vaultkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

vaultkit/__init__.py +85 -0
vaultkit/client.py +441 -0
vaultkit/core/__init__.py +1 -0
vaultkit/core/http.py +190 -0
vaultkit/core/polling.py +140 -0
vaultkit/errors/__init__.py +36 -0
vaultkit/errors/base.py +30 -0
vaultkit/errors/exceptions.py +211 -0
vaultkit/models/__init__.py +11 -0
vaultkit/models/dataset_info.py +25 -0
vaultkit/models/dataset_schema.py +75 -0
vaultkit/models/fetch_result.py +53 -0
vaultkit/models/query_result.py +73 -0
vaultkit/tools/__init__.py +5 -0
vaultkit/tools/adapters/__init__.py +12 -0
vaultkit/tools/adapters/anthropic.py +17 -0
vaultkit/tools/adapters/openai.py +23 -0
vaultkit/tools/builder.py +128 -0
vaultkit/tools/definitions.py +177 -0
vaultkit/tools/executor.py +199 -0
vaultkit/tools/schemas.py +39 -0
vaultkit/utils/__init__.py +1 -0
vaultkit/utils/retry.py +81 -0
vaultkit/utils/validation.py +87 -0
vaultkit-0.1.0.dist-info/METADATA +207 -0
vaultkit-0.1.0.dist-info/RECORD +28 -0
vaultkit-0.1.0.dist-info/WHEEL +5 -0
vaultkit-0.1.0.dist-info/top_level.txt +1 -0

vaultkit/models/dataset_schema.py ADDED Viewed

@@ -0,0 +1,75 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+@dataclass(frozen=True)
+class DatasetSchema:
+    dataset: str
+    datasource: str
+    fields: List[Dict[str, Any]]
+    correlation_id: Optional[str] = None
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "DatasetSchema":
+        dataset = data.get("dataset")
+        datasource = data.get("datasource")
+        if not dataset or not datasource:
+            raise ValueError("DatasetSchema requires 'dataset' and 'datasource'")
+        raw_fields = data.get("fields")
+        fields = raw_fields if isinstance(raw_fields, list) else []
+        return cls(
+            dataset=dataset,
+            datasource=datasource,
+            fields=fields,
+            correlation_id=data.get("correlation_id"),
+        )
+    @property
+    def field_names(self) -> List[str]:
+        return [
+            f.get("name")
+            for f in self.fields
+            if isinstance(f, dict) and f.get("name")
+        ]
+    @property
+    def field_map(self) -> Dict[str, Dict[str, Any]]:
+        return {
+            f["name"]: f
+            for f in self.fields
+            if isinstance(f, dict) and "name" in f
+        }
+    @property
+    def field_summaries(self) -> List[str]:
+        """
+        Human-readable summaries for LLM grounding.
+        """
+        summaries = []
+        for f in self.fields:
+            if not isinstance(f, dict):
+                continue
+            name = f.get("name")
+            if not name:
+                continue
+            parts = [name]
+            if f.get("masked"):
+                parts.append("(masked)")
+            if f.get("visibility") == "deny":
+                parts.append("(restricted)")
+            if f.get("sensitivity"):
+                parts.append(f"(sensitivity: {f['sensitivity']})")
+            summaries.append(" ".join(parts))
+        return summaries

vaultkit/models/fetch_result.py ADDED Viewed

@@ -0,0 +1,53 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+@dataclass(frozen=True)
+class FetchResult:
+    rows: List[Dict[str, Any]]
+    meta: Optional[Dict[str, Any]] = None
+    correlation_id: Optional[str] = None
+    @staticmethod
+    def from_dict(data: Dict[str, Any]) -> "FetchResult":
+        rows = data.get("rows") or []
+        if not isinstance(rows, list):
+            rows = []
+        meta = data.get("meta")
+        if not isinstance(meta, dict):
+            meta = None
+        return FetchResult(
+            rows=rows,
+            meta=meta,
+            correlation_id=data.get("correlation_id"),
+        )
+    # Compatibility properties for client and executor — these are the standard names that
+    # client and executor expect, but we can also have aliases for them if needed.
+    @property
+    def data(self) -> List[Dict[str, Any]]:
+        """Alias for rows — used by executor and client."""
+        return self.rows
+    @property
+    def row_count(self) -> int:
+        """Row count derived from rows length."""
+        return len(self.rows)
+    @property
+    def masked_fields(self) -> List[str]:
+        """
+        Masked field names from meta, if your API returns them there.
+        Returns [] safely if meta is absent or has no masked_fields key.
+        """
+        value = (self.meta or {}).get("masked_fields")
+        return value if isinstance(value, list) else []
+    @property
+    def is_empty(self) -> bool:
+        return not self.rows

vaultkit/models/query_result.py ADDED Viewed

@@ -0,0 +1,73 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+@dataclass(frozen=True)
+class QueryResult:
+    status: str
+    grant_ref: Optional[str] = None
+    expires_at: Optional[str] = None
+    masked_fields: List[str] = field(default_factory=list)
+    rows: List[Dict[str, Any]] = field(default_factory=list)
+    request_id: Optional[str] = None
+    reason: Optional[str] = None
+    meta: Optional[Dict[str, Any]] = None
+    policy_id: Optional[str] = None
+    approver_role: Optional[str] = None
+    correlation_id: Optional[str] = None  # propagated from HTTP layer
+    @staticmethod
+    def from_dict(data: Dict[str, Any]) -> "QueryResult":
+        status = str(data.get("status") or "").lower().strip()
+        if not status:
+            raise ValueError("Missing 'status' in QueryResult response")
+        request_id = data.get("request_id")
+        if status in ("queued", "granted", "ok", "pending_approval") and not request_id:
+            raise ValueError(f"Missing request_id for status '{status}'")
+        return QueryResult(
+            status=status,
+            grant_ref=data.get("grant_ref") or data.get("grant_id"),
+            expires_at=data.get("expires_at"),
+            masked_fields=list(data.get("masked_fields") or []),
+            rows=list(data.get("rows") or []),
+            request_id=data.get("request_id"),
+            reason=data.get("reason"),
+            meta=data.get("meta"),
+            policy_id=data.get("policy_id"),
+            approver_role=data.get("approver_role"),
+            correlation_id=data.get("correlation_id"),
+        )
+    @property
+    def is_granted(self) -> bool:
+        return self.status == "granted"
+    @property
+    def is_denied(self) -> bool:
+        return self.status == "denied"
+    @property
+    def needs_approval(self) -> bool:
+        return self.status in ("queued", "pending_approval")
+    @property
+    def is_pending(self) -> bool:
+        return self.status in ("queued", "pending_approval")
+    @property
+    def is_terminal(self) -> bool:
+        return self.status in ("granted", "denied")
+    @property
+    def has_data(self) -> bool:
+        return bool(self.rows)

vaultkit/tools/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .builder import ToolBuilder
+from .executor import ToolExecutor
+from .adapters import ToolProvider
+__all__ = ["ToolBuilder", "ToolExecutor", "ToolProvider"]

vaultkit/tools/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+from __future__ import annotations
+from enum import Enum
+class ToolProvider(str, Enum):
+    OPENAI = "openai"
+    ANTHROPIC = "anthropic"
+    RAW = "raw"
+__all__ = ["ToolProvider"]

vaultkit/tools/adapters/anthropic.py ADDED Viewed

@@ -0,0 +1,17 @@
+from __future__ import annotations
+from typing import Any, Dict
+def to_anthropic_tool(defn: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert canonical tool definition -> Anthropic 'tools' format.
+    Anthropic expects:
+      { "name": str, "description": str, "input_schema": {...} }
+    """
+    return {
+        "name": defn["name"],
+        "description": defn.get("description", ""),
+        "input_schema": defn.get("input_schema", {"type": "object", "properties": {}}),
+    }

vaultkit/tools/adapters/openai.py ADDED Viewed

@@ -0,0 +1,23 @@
+from __future__ import annotations
+from typing import Any, Dict
+def to_openai_tool(defn: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert canonical tool definition -> OpenAI 'tools' format.
+    Canonical:
+      { "name": str, "description": str, "input_schema": {...} }
+    OpenAI:
+      { "type": "function", "function": { "name": ..., "description": ..., "parameters": ... } }
+    """
+    return {
+        "type": "function",
+        "function": {
+            "name": defn["name"],
+            "description": defn.get("description", ""),
+            "parameters": defn.get("input_schema", {"type": "object", "properties": {}}),
+        },
+    }

vaultkit/tools/builder.py ADDED Viewed

@@ -0,0 +1,128 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from .adapters import ToolProvider
+from .adapters.anthropic import to_anthropic_tool
+from .adapters.openai import to_openai_tool
+from .definitions import (
+    check_approval_tool_def,
+    discover_tool_def,
+    query_tool_def,
+)
+if TYPE_CHECKING:
+    from vaultkit.client import VaultKitClient
+class ToolBuilder:
+    """
+    Generates provider-specific tool schemas scoped to what the agent
+    is authorized to access.
+    Provider formats:
+      - OpenAI: tools=[{type:"function", function:{name,description,parameters}}]
+      - Anthropic: tools=[{name,description,input_schema}]
+      - Raw: canonical tool defs=[{name,description,input_schema}]
+    """
+    MAX_SCHEMA_HINTS = 10  # prevent N+1 explosion
+    def __init__(self, client: "VaultKitClient") -> None:
+        self._client = client
+    def build(
+        self,
+        *,
+        provider: ToolProvider = ToolProvider.OPENAI,
+        environment: str = "production",
+        include_discover: bool = True,
+        include_query: bool = True,
+        include_check_approval: bool = False,
+        datasets: Optional[List[str]] = None,
+        fetch_schema_hints: bool = True,
+        requester_region: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        # Build canonical tool defs
+        resolved_datasets = datasets or self._fetch_dataset_names(
+            environment=environment,
+            requester_region=requester_region,
+        )
+        schema_hints: Optional[Dict[str, List[str]]] = None
+        if fetch_schema_hints and resolved_datasets:
+            schema_hints = self._fetch_schema_hints(
+                resolved_datasets,
+                environment=environment,
+                requester_region=requester_region,
+            )
+        canonical: List[Dict[str, Any]] = []
+        if include_discover:
+            canonical.append(discover_tool_def(dataset_names=resolved_datasets))
+        if include_query:
+            canonical.append(
+                query_tool_def(dataset_names=resolved_datasets, schema_hints=schema_hints)
+            )
+        if include_check_approval:
+            canonical.append(check_approval_tool_def())
+        # Convert to provider format
+        if provider == ToolProvider.RAW:
+            return canonical
+        if provider == ToolProvider.ANTHROPIC:
+            return [to_anthropic_tool(t) for t in canonical]
+        # default OPENAI
+        return [to_openai_tool(t) for t in canonical]
+    def build_minimal(self, *, provider: ToolProvider = ToolProvider.OPENAI) -> List[Dict[str, Any]]:
+        """Fast startup: just vaultkit_query, no registry calls."""
+        canonical = [query_tool_def()]
+        if provider == ToolProvider.RAW:
+            return canonical
+        if provider == ToolProvider.ANTHROPIC:
+            return [to_anthropic_tool(t) for t in canonical]
+        return [to_openai_tool(t) for t in canonical]
+    # private
+    def _fetch_dataset_names(
+        self,
+        *,
+        environment: str,
+        requester_region: Optional[str],
+    ) -> List[str]:
+        try:
+            infos = self._client.datasets(
+                environment=environment,
+                requester_region=requester_region,
+            )
+            return [d.dataset for d in infos]
+        except Exception:
+            return []
+    def _fetch_schema_hints(
+        self,
+        dataset_names: List[str],
+        *,
+        environment: str,
+        requester_region: Optional[str],
+    ) -> Dict[str, List[str]]:
+        hints: Dict[str, List[str]] = {}
+        for dataset in dataset_names[: self.MAX_SCHEMA_HINTS]:
+            try:
+                schema = self._client.schema(
+                    dataset,
+                    environment=environment,
+                    requester_region=requester_region,
+                )
+                summaries = getattr(schema, "field_summaries", None)
+                hints[dataset] = summaries if isinstance(summaries, list) else schema.field_names
+            except Exception:
+                continue
+        return hints

vaultkit/tools/definitions.py ADDED Viewed

@@ -0,0 +1,177 @@
+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+def discover_tool_def(
+    *,
+    dataset_names: Optional[List[str]] = None,
+) -> Dict[str, Any]:
+    description = (
+        "Discover datasets available in VaultKit that you are authorized to query. "
+        "Returns dataset names, data sources, and access level (allow/require_approval/deny). "
+        "Call this before vaultkit_query if you are unsure which datasets exist."
+    )
+    if dataset_names:
+        description += f" Available datasets include: {', '.join(dataset_names)}."
+    return {
+        "name": "vaultkit_discover",
+        "description": description,
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "environment": {
+                    "type": "string",
+                    "enum": ["production", "staging", "development"],
+                    "default": "production",
+                    "description": "Environment to discover datasets in.",
+                },
+                "requester_region": {
+                    "type": "string",
+                    "description": "Optional requester region context for policy-aware discovery.",
+                },
+                "dataset_region": {
+                    "type": "string",
+                    "description": "Optional dataset region context for policy-aware discovery.",
+                },
+            },
+            "required": [],
+        },
+    }
+def query_tool_def(
+    *,
+    dataset_names: Optional[List[str]] = None,
+    schema_hints: Optional[Dict[str, List[str]]] = None,
+) -> Dict[str, Any]:
+    # Dataset guidance
+    if dataset_names:
+        dataset_prop: Dict[str, Any] = {
+            "type": "string",
+            "enum": dataset_names,
+            "description": (
+                "The dataset to query. Must be one of the authorized datasets. "
+                "Use vaultkit_discover if unsure."
+            ),
+        }
+    else:
+        dataset_prop = {
+            "type": "string",
+            "description": "The dataset to query (use vaultkit_discover to list options).",
+        }
+    # Field guidance
+    fields_desc = "Columns to retrieve. Omit to return all accessible columns."
+    if schema_hints:
+        examples = []
+        for ds, fields in list(schema_hints.items())[:2]:
+            examples.append(f"{ds}: [{', '.join(fields[:5])}]")
+        if examples:
+            fields_desc += f" Example fields — {'; '.join(examples)}."
+    filter_condition = {
+        "type": "object",
+        "properties": {
+            "field": {"type": "string"},
+            "operator": {
+                "type": "string",
+                "enum": [
+                    "eq",
+                    "neq",
+                    "gt",
+                    "lt",
+                    "gte",
+                    "lte",
+                    "like",
+                    "in",
+                    "is_null",
+                    "is_not_null",
+                ],
+            },
+            "value": {},
+        },
+        "required": ["field", "operator"],
+    }
+    filter_group = {
+        "type": "object",
+        "properties": {
+            "logic": {"type": "string", "enum": ["AND", "OR"]},
+            "conditions": {
+                "type": "array",
+                "items": {"oneOf": [filter_condition, {"$ref": "#/$defs/filter_group"}]},
+            },
+        },
+        "required": ["logic", "conditions"],
+    }
+    return {
+        "name": "vaultkit_query",
+        "description": (
+            "Query a governed dataset through VaultKit. Policies are enforced automatically: "
+            "sensitive fields may be masked and some datasets require approval before data is returned. "
+            "Returns data or a pending status if approval is required."
+        ),
+        "input_schema": {
+            "$defs": {"filter_group": filter_group},
+            "type": "object",
+            "properties": {
+                "dataset": dataset_prop,
+                "fields": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": fields_desc,
+                },
+                "filters": {
+                    "type": "array",
+                    "items": {"oneOf": [filter_condition, filter_group]},
+                    "description": (
+                        "Filter conditions using AQL-style predicates. Supports nested AND/OR groups.\n\n"
+                        "Examples:\n"
+                        "- Simple: {field: 'age', operator: 'gt', value: 30}\n"
+                        "- Nested: {logic: 'OR', conditions: [{...}, {...}]}"
+                    ),
+                },
+                "limit": {
+                    "type": "integer",
+                    "minimum": 1,
+                    "maximum": 10000,
+                    "description": "Maximum number of rows to return.",
+                },
+                "purpose": {
+                    "type": "string",
+                    "description": (
+                        "Human-readable reason for accessing this data. Required for audit logging and "
+                        "approval workflows. Be specific."
+                    ),
+                },
+                "requester_region": {
+                    "type": "string",
+                    "description": "Optional requester region context for policy evaluation.",
+                },
+            },
+            "required": ["dataset"],
+        },
+    }
+def check_approval_tool_def() -> Dict[str, Any]:
+    return {
+        "name": "vaultkit_check_approval",
+        "description": (
+            "Check the approval status of a previously submitted VaultKit query that is pending human approval. "
+            "Returns 'pending', 'approved', or 'denied'. If approved, returns the data."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "request_id": {
+                    "type": "string",
+                    "description": "The request_id returned by vaultkit_query.",
+                }
+            },
+            "required": ["request_id"],
+        },
+    }