vaultkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ @dataclass(frozen=True)
7
+ class DatasetSchema:
8
+ dataset: str
9
+ datasource: str
10
+ fields: List[Dict[str, Any]]
11
+ correlation_id: Optional[str] = None
12
+
13
+ @classmethod
14
+ def from_dict(cls, data: Dict[str, Any]) -> "DatasetSchema":
15
+ dataset = data.get("dataset")
16
+ datasource = data.get("datasource")
17
+
18
+ if not dataset or not datasource:
19
+ raise ValueError("DatasetSchema requires 'dataset' and 'datasource'")
20
+
21
+ raw_fields = data.get("fields")
22
+ fields = raw_fields if isinstance(raw_fields, list) else []
23
+
24
+ return cls(
25
+ dataset=dataset,
26
+ datasource=datasource,
27
+ fields=fields,
28
+ correlation_id=data.get("correlation_id"),
29
+ )
30
+
31
+ @property
32
+ def field_names(self) -> List[str]:
33
+ return [
34
+ f.get("name")
35
+ for f in self.fields
36
+ if isinstance(f, dict) and f.get("name")
37
+ ]
38
+
39
+ @property
40
+ def field_map(self) -> Dict[str, Dict[str, Any]]:
41
+ return {
42
+ f["name"]: f
43
+ for f in self.fields
44
+ if isinstance(f, dict) and "name" in f
45
+ }
46
+
47
+ @property
48
+ def field_summaries(self) -> List[str]:
49
+ """
50
+ Human-readable summaries for LLM grounding.
51
+ """
52
+ summaries = []
53
+
54
+ for f in self.fields:
55
+ if not isinstance(f, dict):
56
+ continue
57
+
58
+ name = f.get("name")
59
+ if not name:
60
+ continue
61
+
62
+ parts = [name]
63
+
64
+ if f.get("masked"):
65
+ parts.append("(masked)")
66
+
67
+ if f.get("visibility") == "deny":
68
+ parts.append("(restricted)")
69
+
70
+ if f.get("sensitivity"):
71
+ parts.append(f"(sensitivity: {f['sensitivity']})")
72
+
73
+ summaries.append(" ".join(parts))
74
+
75
+ return summaries
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class FetchResult:
9
+ rows: List[Dict[str, Any]]
10
+ meta: Optional[Dict[str, Any]] = None
11
+ correlation_id: Optional[str] = None
12
+
13
+ @staticmethod
14
+ def from_dict(data: Dict[str, Any]) -> "FetchResult":
15
+ rows = data.get("rows") or []
16
+ if not isinstance(rows, list):
17
+ rows = []
18
+
19
+ meta = data.get("meta")
20
+ if not isinstance(meta, dict):
21
+ meta = None
22
+
23
+ return FetchResult(
24
+ rows=rows,
25
+ meta=meta,
26
+ correlation_id=data.get("correlation_id"),
27
+ )
28
+
29
+ # Compatibility properties for client and executor — these are the standard names that
30
+ # client and executor expect, but we can also have aliases for them if needed.
31
+
32
+ @property
33
+ def data(self) -> List[Dict[str, Any]]:
34
+ """Alias for rows — used by executor and client."""
35
+ return self.rows
36
+
37
+ @property
38
+ def row_count(self) -> int:
39
+ """Row count derived from rows length."""
40
+ return len(self.rows)
41
+
42
+ @property
43
+ def masked_fields(self) -> List[str]:
44
+ """
45
+ Masked field names from meta, if your API returns them there.
46
+ Returns [] safely if meta is absent or has no masked_fields key.
47
+ """
48
+ value = (self.meta or {}).get("masked_fields")
49
+ return value if isinstance(value, list) else []
50
+
51
+ @property
52
+ def is_empty(self) -> bool:
53
+ return not self.rows
@@ -0,0 +1,73 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Dict, List, Optional
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class QueryResult:
9
+ status: str
10
+ grant_ref: Optional[str] = None
11
+ expires_at: Optional[str] = None
12
+
13
+ masked_fields: List[str] = field(default_factory=list)
14
+ rows: List[Dict[str, Any]] = field(default_factory=list)
15
+
16
+ request_id: Optional[str] = None
17
+ reason: Optional[str] = None
18
+ meta: Optional[Dict[str, Any]] = None
19
+
20
+ policy_id: Optional[str] = None
21
+ approver_role: Optional[str] = None
22
+
23
+ correlation_id: Optional[str] = None # propagated from HTTP layer
24
+
25
+ @staticmethod
26
+ def from_dict(data: Dict[str, Any]) -> "QueryResult":
27
+ status = str(data.get("status") or "").lower().strip()
28
+
29
+ if not status:
30
+ raise ValueError("Missing 'status' in QueryResult response")
31
+
32
+ request_id = data.get("request_id")
33
+
34
+ if status in ("queued", "granted", "ok", "pending_approval") and not request_id:
35
+ raise ValueError(f"Missing request_id for status '{status}'")
36
+
37
+ return QueryResult(
38
+ status=status,
39
+ grant_ref=data.get("grant_ref") or data.get("grant_id"),
40
+ expires_at=data.get("expires_at"),
41
+ masked_fields=list(data.get("masked_fields") or []),
42
+ rows=list(data.get("rows") or []),
43
+ request_id=data.get("request_id"),
44
+ reason=data.get("reason"),
45
+ meta=data.get("meta"),
46
+ policy_id=data.get("policy_id"),
47
+ approver_role=data.get("approver_role"),
48
+ correlation_id=data.get("correlation_id"),
49
+ )
50
+
51
+ @property
52
+ def is_granted(self) -> bool:
53
+ return self.status == "granted"
54
+
55
+ @property
56
+ def is_denied(self) -> bool:
57
+ return self.status == "denied"
58
+
59
+ @property
60
+ def needs_approval(self) -> bool:
61
+ return self.status in ("queued", "pending_approval")
62
+
63
+ @property
64
+ def is_pending(self) -> bool:
65
+ return self.status in ("queued", "pending_approval")
66
+
67
+ @property
68
+ def is_terminal(self) -> bool:
69
+ return self.status in ("granted", "denied")
70
+
71
+ @property
72
+ def has_data(self) -> bool:
73
+ return bool(self.rows)
@@ -0,0 +1,5 @@
1
+ from .builder import ToolBuilder
2
+ from .executor import ToolExecutor
3
+ from .adapters import ToolProvider
4
+
5
+ __all__ = ["ToolBuilder", "ToolExecutor", "ToolProvider"]
@@ -0,0 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class ToolProvider(str, Enum):
7
+ OPENAI = "openai"
8
+ ANTHROPIC = "anthropic"
9
+ RAW = "raw"
10
+
11
+
12
+ __all__ = ["ToolProvider"]
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict
4
+
5
+
6
+ def to_anthropic_tool(defn: Dict[str, Any]) -> Dict[str, Any]:
7
+ """
8
+ Convert canonical tool definition -> Anthropic 'tools' format.
9
+
10
+ Anthropic expects:
11
+ { "name": str, "description": str, "input_schema": {...} }
12
+ """
13
+ return {
14
+ "name": defn["name"],
15
+ "description": defn.get("description", ""),
16
+ "input_schema": defn.get("input_schema", {"type": "object", "properties": {}}),
17
+ }
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict
4
+
5
+
6
+ def to_openai_tool(defn: Dict[str, Any]) -> Dict[str, Any]:
7
+ """
8
+ Convert canonical tool definition -> OpenAI 'tools' format.
9
+
10
+ Canonical:
11
+ { "name": str, "description": str, "input_schema": {...} }
12
+
13
+ OpenAI:
14
+ { "type": "function", "function": { "name": ..., "description": ..., "parameters": ... } }
15
+ """
16
+ return {
17
+ "type": "function",
18
+ "function": {
19
+ "name": defn["name"],
20
+ "description": defn.get("description", ""),
21
+ "parameters": defn.get("input_schema", {"type": "object", "properties": {}}),
22
+ },
23
+ }
@@ -0,0 +1,128 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
4
+
5
+ from .adapters import ToolProvider
6
+ from .adapters.anthropic import to_anthropic_tool
7
+ from .adapters.openai import to_openai_tool
8
+ from .definitions import (
9
+ check_approval_tool_def,
10
+ discover_tool_def,
11
+ query_tool_def,
12
+ )
13
+
14
+ if TYPE_CHECKING:
15
+ from vaultkit.client import VaultKitClient
16
+
17
+
18
+ class ToolBuilder:
19
+ """
20
+ Generates provider-specific tool schemas scoped to what the agent
21
+ is authorized to access.
22
+
23
+ Provider formats:
24
+ - OpenAI: tools=[{type:"function", function:{name,description,parameters}}]
25
+ - Anthropic: tools=[{name,description,input_schema}]
26
+ - Raw: canonical tool defs=[{name,description,input_schema}]
27
+ """
28
+
29
+ MAX_SCHEMA_HINTS = 10 # prevent N+1 explosion
30
+
31
+ def __init__(self, client: "VaultKitClient") -> None:
32
+ self._client = client
33
+
34
+ def build(
35
+ self,
36
+ *,
37
+ provider: ToolProvider = ToolProvider.OPENAI,
38
+ environment: str = "production",
39
+ include_discover: bool = True,
40
+ include_query: bool = True,
41
+ include_check_approval: bool = False,
42
+ datasets: Optional[List[str]] = None,
43
+ fetch_schema_hints: bool = True,
44
+ requester_region: Optional[str] = None,
45
+ ) -> List[Dict[str, Any]]:
46
+ # Build canonical tool defs
47
+ resolved_datasets = datasets or self._fetch_dataset_names(
48
+ environment=environment,
49
+ requester_region=requester_region,
50
+ )
51
+
52
+ schema_hints: Optional[Dict[str, List[str]]] = None
53
+ if fetch_schema_hints and resolved_datasets:
54
+ schema_hints = self._fetch_schema_hints(
55
+ resolved_datasets,
56
+ environment=environment,
57
+ requester_region=requester_region,
58
+ )
59
+
60
+ canonical: List[Dict[str, Any]] = []
61
+
62
+ if include_discover:
63
+ canonical.append(discover_tool_def(dataset_names=resolved_datasets))
64
+
65
+ if include_query:
66
+ canonical.append(
67
+ query_tool_def(dataset_names=resolved_datasets, schema_hints=schema_hints)
68
+ )
69
+
70
+ if include_check_approval:
71
+ canonical.append(check_approval_tool_def())
72
+
73
+ # Convert to provider format
74
+ if provider == ToolProvider.RAW:
75
+ return canonical
76
+ if provider == ToolProvider.ANTHROPIC:
77
+ return [to_anthropic_tool(t) for t in canonical]
78
+ # default OPENAI
79
+ return [to_openai_tool(t) for t in canonical]
80
+
81
+ def build_minimal(self, *, provider: ToolProvider = ToolProvider.OPENAI) -> List[Dict[str, Any]]:
82
+ """Fast startup: just vaultkit_query, no registry calls."""
83
+ canonical = [query_tool_def()]
84
+ if provider == ToolProvider.RAW:
85
+ return canonical
86
+ if provider == ToolProvider.ANTHROPIC:
87
+ return [to_anthropic_tool(t) for t in canonical]
88
+ return [to_openai_tool(t) for t in canonical]
89
+
90
+ # private
91
+
92
+ def _fetch_dataset_names(
93
+ self,
94
+ *,
95
+ environment: str,
96
+ requester_region: Optional[str],
97
+ ) -> List[str]:
98
+ try:
99
+ infos = self._client.datasets(
100
+ environment=environment,
101
+ requester_region=requester_region,
102
+ )
103
+ return [d.dataset for d in infos]
104
+ except Exception:
105
+ return []
106
+
107
+ def _fetch_schema_hints(
108
+ self,
109
+ dataset_names: List[str],
110
+ *,
111
+ environment: str,
112
+ requester_region: Optional[str],
113
+ ) -> Dict[str, List[str]]:
114
+ hints: Dict[str, List[str]] = {}
115
+
116
+ for dataset in dataset_names[: self.MAX_SCHEMA_HINTS]:
117
+ try:
118
+ schema = self._client.schema(
119
+ dataset,
120
+ environment=environment,
121
+ requester_region=requester_region,
122
+ )
123
+ summaries = getattr(schema, "field_summaries", None)
124
+ hints[dataset] = summaries if isinstance(summaries, list) else schema.field_names
125
+ except Exception:
126
+ continue
127
+
128
+ return hints
@@ -0,0 +1,177 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional
4
+
5
+
6
+ def discover_tool_def(
7
+ *,
8
+ dataset_names: Optional[List[str]] = None,
9
+ ) -> Dict[str, Any]:
10
+ description = (
11
+ "Discover datasets available in VaultKit that you are authorized to query. "
12
+ "Returns dataset names, data sources, and access level (allow/require_approval/deny). "
13
+ "Call this before vaultkit_query if you are unsure which datasets exist."
14
+ )
15
+ if dataset_names:
16
+ description += f" Available datasets include: {', '.join(dataset_names)}."
17
+
18
+ return {
19
+ "name": "vaultkit_discover",
20
+ "description": description,
21
+ "input_schema": {
22
+ "type": "object",
23
+ "properties": {
24
+ "environment": {
25
+ "type": "string",
26
+ "enum": ["production", "staging", "development"],
27
+ "default": "production",
28
+ "description": "Environment to discover datasets in.",
29
+ },
30
+ "requester_region": {
31
+ "type": "string",
32
+ "description": "Optional requester region context for policy-aware discovery.",
33
+ },
34
+ "dataset_region": {
35
+ "type": "string",
36
+ "description": "Optional dataset region context for policy-aware discovery.",
37
+ },
38
+ },
39
+ "required": [],
40
+ },
41
+ }
42
+
43
+
44
+ def query_tool_def(
45
+ *,
46
+ dataset_names: Optional[List[str]] = None,
47
+ schema_hints: Optional[Dict[str, List[str]]] = None,
48
+ ) -> Dict[str, Any]:
49
+ # Dataset guidance
50
+ if dataset_names:
51
+ dataset_prop: Dict[str, Any] = {
52
+ "type": "string",
53
+ "enum": dataset_names,
54
+ "description": (
55
+ "The dataset to query. Must be one of the authorized datasets. "
56
+ "Use vaultkit_discover if unsure."
57
+ ),
58
+ }
59
+ else:
60
+ dataset_prop = {
61
+ "type": "string",
62
+ "description": "The dataset to query (use vaultkit_discover to list options).",
63
+ }
64
+
65
+ # Field guidance
66
+ fields_desc = "Columns to retrieve. Omit to return all accessible columns."
67
+ if schema_hints:
68
+ examples = []
69
+ for ds, fields in list(schema_hints.items())[:2]:
70
+ examples.append(f"{ds}: [{', '.join(fields[:5])}]")
71
+ if examples:
72
+ fields_desc += f" Example fields — {'; '.join(examples)}."
73
+
74
+ filter_condition = {
75
+ "type": "object",
76
+ "properties": {
77
+ "field": {"type": "string"},
78
+ "operator": {
79
+ "type": "string",
80
+ "enum": [
81
+ "eq",
82
+ "neq",
83
+ "gt",
84
+ "lt",
85
+ "gte",
86
+ "lte",
87
+ "like",
88
+ "in",
89
+ "is_null",
90
+ "is_not_null",
91
+ ],
92
+ },
93
+ "value": {},
94
+ },
95
+ "required": ["field", "operator"],
96
+ }
97
+
98
+ filter_group = {
99
+ "type": "object",
100
+ "properties": {
101
+ "logic": {"type": "string", "enum": ["AND", "OR"]},
102
+ "conditions": {
103
+ "type": "array",
104
+ "items": {"oneOf": [filter_condition, {"$ref": "#/$defs/filter_group"}]},
105
+ },
106
+ },
107
+ "required": ["logic", "conditions"],
108
+ }
109
+
110
+ return {
111
+ "name": "vaultkit_query",
112
+ "description": (
113
+ "Query a governed dataset through VaultKit. Policies are enforced automatically: "
114
+ "sensitive fields may be masked and some datasets require approval before data is returned. "
115
+ "Returns data or a pending status if approval is required."
116
+ ),
117
+ "input_schema": {
118
+ "$defs": {"filter_group": filter_group},
119
+ "type": "object",
120
+ "properties": {
121
+ "dataset": dataset_prop,
122
+ "fields": {
123
+ "type": "array",
124
+ "items": {"type": "string"},
125
+ "description": fields_desc,
126
+ },
127
+ "filters": {
128
+ "type": "array",
129
+ "items": {"oneOf": [filter_condition, filter_group]},
130
+ "description": (
131
+ "Filter conditions using AQL-style predicates. Supports nested AND/OR groups.\n\n"
132
+ "Examples:\n"
133
+ "- Simple: {field: 'age', operator: 'gt', value: 30}\n"
134
+ "- Nested: {logic: 'OR', conditions: [{...}, {...}]}"
135
+ ),
136
+ },
137
+ "limit": {
138
+ "type": "integer",
139
+ "minimum": 1,
140
+ "maximum": 10000,
141
+ "description": "Maximum number of rows to return.",
142
+ },
143
+ "purpose": {
144
+ "type": "string",
145
+ "description": (
146
+ "Human-readable reason for accessing this data. Required for audit logging and "
147
+ "approval workflows. Be specific."
148
+ ),
149
+ },
150
+ "requester_region": {
151
+ "type": "string",
152
+ "description": "Optional requester region context for policy evaluation.",
153
+ },
154
+ },
155
+ "required": ["dataset"],
156
+ },
157
+ }
158
+
159
+
160
+ def check_approval_tool_def() -> Dict[str, Any]:
161
+ return {
162
+ "name": "vaultkit_check_approval",
163
+ "description": (
164
+ "Check the approval status of a previously submitted VaultKit query that is pending human approval. "
165
+ "Returns 'pending', 'approved', or 'denied'. If approved, returns the data."
166
+ ),
167
+ "input_schema": {
168
+ "type": "object",
169
+ "properties": {
170
+ "request_id": {
171
+ "type": "string",
172
+ "description": "The request_id returned by vaultkit_query.",
173
+ }
174
+ },
175
+ "required": ["request_id"],
176
+ },
177
+ }