tobeverified 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tobeverified
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Runtime verification for non-deterministic LLM outputs — Python SDK
|
|
5
|
+
Project-URL: Homepage, https://tobeverified.com
|
|
6
|
+
Project-URL: Documentation, https://docs.tobeverified.com
|
|
7
|
+
Project-URL: Repository, https://github.com/pvinjamuri/gitproduct
|
|
8
|
+
Project-URL: Issues, https://github.com/pvinjamuri/gitproduct/issues
|
|
9
|
+
Author-email: Prasanna Vinjamuri <pvinjamuri@gmail.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
Keywords: ai-safety,evals,llm,model-validation,tbv,tobeverified,verification
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: httpx>=0.24
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# tobeverified — Python SDK
|
|
26
|
+
|
|
27
|
+
Runtime verification for non-deterministic LLM outputs.
|
|
28
|
+
|
|
29
|
+
`tobeverified` submits each AI-generated decision to an independent verifier (LLM judge with policy + low-confidence routing to a human reviewer). Use it where unit tests don't reach: refund classifications, fraud screens, PII redaction, intent routing, tool-call validation.
|
|
30
|
+
|
|
31
|
+
## Install
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install tobeverified
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Use
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from tobeverified import TBV
|
|
41
|
+
|
|
42
|
+
tbv = TBV() # reads TBV_API_KEY and TBV_BASE_URL from env
|
|
43
|
+
|
|
44
|
+
result = tbv.verify(
|
|
45
|
+
prompt="Is this customer message a refund request?",
|
|
46
|
+
context={"message": "I'd like my money back."},
|
|
47
|
+
allowed_verdicts=["yes", "no"],
|
|
48
|
+
confidence_threshold=0.85,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
print(result["final_verdict"], result["final_tier"])
|
|
52
|
+
# → 'yes' 'agent' (or 'human' if confidence was below threshold)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
`verify()` is fire-and-wait: submits, polls until terminal state. For non-blocking flows use `submit()` + `wait()` separately.
|
|
56
|
+
|
|
57
|
+
## Decorator pattern
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from tobeverified import verified
|
|
61
|
+
|
|
62
|
+
@verified(
|
|
63
|
+
allowed_verdicts=["safe", "unsafe", "needs_review"],
|
|
64
|
+
confidence_threshold=0.9,
|
|
65
|
+
source="my-app/comment-mod",
|
|
66
|
+
)
|
|
67
|
+
def moderate(text: str) -> dict:
|
|
68
|
+
return {
|
|
69
|
+
"prompt": "Is this comment safe to publish?",
|
|
70
|
+
"context": {"text": text},
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
result = moderate("user-supplied content")
|
|
74
|
+
# result.verdict, result.tier, result.output
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Get an API key
|
|
78
|
+
|
|
79
|
+
Sign in at [app.tobeverified.com](https://app.tobeverified.com) → **API keys** → create. Plaintext is shown exactly once.
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
export TBV_API_KEY=tbv_live_...
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Reviewer queue
|
|
86
|
+
|
|
87
|
+
Tasks routed to `human_needed` (confidence below threshold) appear at [review.tobeverified.com](https://review.tobeverified.com) for the account that submitted them. Anyone signed in to the account can resolve. The agent's first-pass verdict, confidence, and rationale are shown alongside the verdict form.
|
|
88
|
+
|
|
89
|
+
## Reference
|
|
90
|
+
|
|
91
|
+
| Method | Purpose |
|
|
92
|
+
|---|---|
|
|
93
|
+
| `TBV(base_url=None, api_key=None, timeout=30.0)` | Construct a client. Defaults read `TBV_BASE_URL` and `TBV_API_KEY` from env. |
|
|
94
|
+
| `submit(prompt, allowed_verdicts, ...)` | Enqueue a verification task; returns `{id, status, ...}`. |
|
|
95
|
+
| `get(task_id)` | Fetch current state. |
|
|
96
|
+
| `wait(task_id, timeout=300.0, poll_interval=1.5)` | Block until terminal state. |
|
|
97
|
+
| `verify(prompt, allowed_verdicts, ...)` | `submit()` + `wait()` in one call. |
|
|
98
|
+
|
|
99
|
+
Errors raise `TBVError` (or `TBVTimeoutError` for `wait()` timeouts). Both inherit from `Exception`.
|
|
100
|
+
|
|
101
|
+
## Status
|
|
102
|
+
|
|
103
|
+
Developer preview. Versioned `0.x` until the API contract is frozen; expect minor surface changes. Production deployments tracked via [docs.tobeverified.com](https://docs.tobeverified.com).
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tobeverified"
|
|
7
|
+
version = "0.5.0"
|
|
8
|
+
description = "Runtime verification for non-deterministic LLM outputs — Python SDK"
|
|
9
|
+
readme = "tobeverified/README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Prasanna Vinjamuri", email = "pvinjamuri@gmail.com" }
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"llm",
|
|
17
|
+
"verification",
|
|
18
|
+
"evals",
|
|
19
|
+
"ai-safety",
|
|
20
|
+
"model-validation",
|
|
21
|
+
"tobeverified",
|
|
22
|
+
"tbv",
|
|
23
|
+
]
|
|
24
|
+
classifiers = [
|
|
25
|
+
"Development Status :: 4 - Beta",
|
|
26
|
+
"Intended Audience :: Developers",
|
|
27
|
+
"License :: OSI Approved :: MIT License",
|
|
28
|
+
"Operating System :: OS Independent",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Programming Language :: Python :: 3.12",
|
|
33
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
34
|
+
]
|
|
35
|
+
dependencies = [
|
|
36
|
+
"httpx>=0.24",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://tobeverified.com"
|
|
41
|
+
Documentation = "https://docs.tobeverified.com"
|
|
42
|
+
Repository = "https://github.com/pvinjamuri/gitproduct"
|
|
43
|
+
Issues = "https://github.com/pvinjamuri/gitproduct/issues"
|
|
44
|
+
|
|
45
|
+
[tool.hatch.build.targets.wheel]
|
|
46
|
+
packages = ["tobeverified"]
|
|
47
|
+
|
|
48
|
+
[tool.hatch.build.targets.sdist]
|
|
49
|
+
include = [
|
|
50
|
+
"tobeverified/**",
|
|
51
|
+
"pyproject.toml",
|
|
52
|
+
]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# tobeverified — Python SDK
|
|
2
|
+
|
|
3
|
+
Runtime verification for non-deterministic LLM outputs.
|
|
4
|
+
|
|
5
|
+
`tobeverified` submits each AI-generated decision to an independent verifier (LLM judge with policy + low-confidence routing to a human reviewer). Use it where unit tests don't reach: refund classifications, fraud screens, PII redaction, intent routing, tool-call validation.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install tobeverified
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Use
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from tobeverified import TBV
|
|
17
|
+
|
|
18
|
+
tbv = TBV() # reads TBV_API_KEY and TBV_BASE_URL from env
|
|
19
|
+
|
|
20
|
+
result = tbv.verify(
|
|
21
|
+
prompt="Is this customer message a refund request?",
|
|
22
|
+
context={"message": "I'd like my money back."},
|
|
23
|
+
allowed_verdicts=["yes", "no"],
|
|
24
|
+
confidence_threshold=0.85,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
print(result["final_verdict"], result["final_tier"])
|
|
28
|
+
# → 'yes' 'agent' (or 'human' if confidence was below threshold)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
`verify()` is fire-and-wait: submits, polls until terminal state. For non-blocking flows use `submit()` + `wait()` separately.
|
|
32
|
+
|
|
33
|
+
## Decorator pattern
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from tobeverified import verified
|
|
37
|
+
|
|
38
|
+
@verified(
|
|
39
|
+
allowed_verdicts=["safe", "unsafe", "needs_review"],
|
|
40
|
+
confidence_threshold=0.9,
|
|
41
|
+
source="my-app/comment-mod",
|
|
42
|
+
)
|
|
43
|
+
def moderate(text: str) -> dict:
|
|
44
|
+
return {
|
|
45
|
+
"prompt": "Is this comment safe to publish?",
|
|
46
|
+
"context": {"text": text},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
result = moderate("user-supplied content")
|
|
50
|
+
# result.verdict, result.tier, result.output
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Get an API key
|
|
54
|
+
|
|
55
|
+
Sign in at [app.tobeverified.com](https://app.tobeverified.com) → **API keys** → create. Plaintext is shown exactly once.
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
export TBV_API_KEY=tbv_live_...
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Reviewer queue
|
|
62
|
+
|
|
63
|
+
Tasks routed to `human_needed` (confidence below threshold) appear at [review.tobeverified.com](https://review.tobeverified.com) for the account that submitted them. Anyone signed in to the account can resolve. The agent's first-pass verdict, confidence, and rationale are shown alongside the verdict form.
|
|
64
|
+
|
|
65
|
+
## Reference
|
|
66
|
+
|
|
67
|
+
| Method | Purpose |
|
|
68
|
+
|---|---|
|
|
69
|
+
| `TBV(base_url=None, api_key=None, timeout=30.0)` | Construct a client. Defaults read `TBV_BASE_URL` and `TBV_API_KEY` from env. |
|
|
70
|
+
| `submit(prompt, allowed_verdicts, ...)` | Enqueue a verification task; returns `{id, status, ...}`. |
|
|
71
|
+
| `get(task_id)` | Fetch current state. |
|
|
72
|
+
| `wait(task_id, timeout=300.0, poll_interval=1.5)` | Block until terminal state. |
|
|
73
|
+
| `verify(prompt, allowed_verdicts, ...)` | `submit()` + `wait()` in one call. |
|
|
74
|
+
|
|
75
|
+
Errors raise `TBVError` (or `TBVTimeoutError` for `wait()` timeouts). Both inherit from `Exception`.
|
|
76
|
+
|
|
77
|
+
## Status
|
|
78
|
+
|
|
79
|
+
Developer preview. Versioned `0.x` until the API contract is frozen; expect minor surface changes. Production deployments tracked via [docs.tobeverified.com](https://docs.tobeverified.com).
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TBVError(Exception):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TBVTimeoutError(TBVError):
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TBV:
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
base_url: str | None = None,
|
|
20
|
+
api_key: str | None = None,
|
|
21
|
+
timeout: float = 30.0,
|
|
22
|
+
) -> None:
|
|
23
|
+
self.base_url = (
|
|
24
|
+
base_url or os.getenv("TBV_BASE_URL", "https://api.tobeverified.com")
|
|
25
|
+
).rstrip("/")
|
|
26
|
+
self.api_key = api_key or os.getenv("TBV_API_KEY")
|
|
27
|
+
self._http = httpx.Client(timeout=timeout)
|
|
28
|
+
|
|
29
|
+
def _headers(self) -> dict[str, str]:
|
|
30
|
+
h: dict[str, str] = {}
|
|
31
|
+
if self.api_key:
|
|
32
|
+
h["authorization"] = f"Bearer {self.api_key}"
|
|
33
|
+
return h
|
|
34
|
+
|
|
35
|
+
def submit(
|
|
36
|
+
self,
|
|
37
|
+
prompt: str,
|
|
38
|
+
allowed_verdicts: list[str],
|
|
39
|
+
context: dict[str, Any] | None = None,
|
|
40
|
+
task_type: str = "decide",
|
|
41
|
+
confidence_threshold: float = 0.8,
|
|
42
|
+
touchpoint: str = "chat",
|
|
43
|
+
source: str = "sdk",
|
|
44
|
+
) -> dict[str, Any]:
|
|
45
|
+
resp = self._http.post(
|
|
46
|
+
f"{self.base_url}/v1/tasks",
|
|
47
|
+
headers=self._headers(),
|
|
48
|
+
json={
|
|
49
|
+
"task_type": task_type,
|
|
50
|
+
"prompt": prompt,
|
|
51
|
+
"context": context or {},
|
|
52
|
+
"allowed_verdicts": allowed_verdicts,
|
|
53
|
+
"confidence_threshold": confidence_threshold,
|
|
54
|
+
"touchpoint": touchpoint,
|
|
55
|
+
"source": source,
|
|
56
|
+
},
|
|
57
|
+
)
|
|
58
|
+
if resp.status_code >= 400:
|
|
59
|
+
raise TBVError(f"submit failed {resp.status_code}: {resp.text}")
|
|
60
|
+
return resp.json()
|
|
61
|
+
|
|
62
|
+
def get(self, task_id: str) -> dict[str, Any]:
|
|
63
|
+
resp = self._http.get(
|
|
64
|
+
f"{self.base_url}/v1/tasks/{task_id}",
|
|
65
|
+
headers=self._headers(),
|
|
66
|
+
)
|
|
67
|
+
if resp.status_code >= 400:
|
|
68
|
+
raise TBVError(f"get failed {resp.status_code}: {resp.text}")
|
|
69
|
+
return resp.json()
|
|
70
|
+
|
|
71
|
+
def wait(
|
|
72
|
+
self,
|
|
73
|
+
task_id: str,
|
|
74
|
+
timeout: float = 300.0,
|
|
75
|
+
poll_interval: float = 1.5,
|
|
76
|
+
) -> dict[str, Any]:
|
|
77
|
+
deadline = time.time() + timeout
|
|
78
|
+
while time.time() < deadline:
|
|
79
|
+
task = self.get(task_id)
|
|
80
|
+
if task["status"] == "completed":
|
|
81
|
+
return task
|
|
82
|
+
time.sleep(poll_interval)
|
|
83
|
+
raise TBVTimeoutError(f"task {task_id} did not complete within {timeout}s")
|
|
84
|
+
|
|
85
|
+
def verify(
|
|
86
|
+
self,
|
|
87
|
+
prompt: str,
|
|
88
|
+
allowed_verdicts: list[str],
|
|
89
|
+
context: dict[str, Any] | None = None,
|
|
90
|
+
confidence_threshold: float = 0.8,
|
|
91
|
+
touchpoint: str = "chat",
|
|
92
|
+
source: str = "sdk",
|
|
93
|
+
timeout: float = 300.0,
|
|
94
|
+
) -> dict[str, Any]:
|
|
95
|
+
submitted = self.submit(
|
|
96
|
+
prompt=prompt,
|
|
97
|
+
allowed_verdicts=allowed_verdicts,
|
|
98
|
+
context=context,
|
|
99
|
+
confidence_threshold=confidence_threshold,
|
|
100
|
+
touchpoint=touchpoint,
|
|
101
|
+
source=source,
|
|
102
|
+
)
|
|
103
|
+
return self.wait(submitted["id"], timeout=timeout)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
from typing import Any, Callable
|
|
3
|
+
|
|
4
|
+
from .client import TBV
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def verified(
|
|
8
|
+
allowed_verdicts: list[str],
|
|
9
|
+
confidence_threshold: float = 0.8,
|
|
10
|
+
task_type: str = "decide",
|
|
11
|
+
touchpoint: str = "chat",
|
|
12
|
+
source: str | None = None,
|
|
13
|
+
base_url: str | None = None,
|
|
14
|
+
timeout: float = 300.0,
|
|
15
|
+
) -> Callable[..., Any]:
|
|
16
|
+
"""Wrap a function so its output is verified by TBV before being returned.
|
|
17
|
+
|
|
18
|
+
The wrapped function must return a dict containing at least a "prompt"
|
|
19
|
+
key. Any other keys are sent as context. The decorator submits a
|
|
20
|
+
verification task, waits for completion, and returns the final verdict
|
|
21
|
+
string plus the tier that produced it.
|
|
22
|
+
|
|
23
|
+
`touchpoint` and `source` route the event in the Control Tower stream.
|
|
24
|
+
`source` defaults to the wrapped function's qualified name.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def decorator(fn: Callable[..., dict[str, Any]]) -> Callable[..., dict[str, Any]]:
|
|
28
|
+
client = TBV(base_url=base_url)
|
|
29
|
+
resolved_source = source or fn.__qualname__
|
|
30
|
+
|
|
31
|
+
@functools.wraps(fn)
|
|
32
|
+
def wrapper(*args: Any, **kwargs: Any) -> dict[str, Any]:
|
|
33
|
+
payload = fn(*args, **kwargs)
|
|
34
|
+
if not isinstance(payload, dict) or "prompt" not in payload:
|
|
35
|
+
raise TypeError(
|
|
36
|
+
"@verified function must return a dict containing a 'prompt' key"
|
|
37
|
+
)
|
|
38
|
+
prompt = payload.pop("prompt")
|
|
39
|
+
task = client.verify(
|
|
40
|
+
prompt=prompt,
|
|
41
|
+
allowed_verdicts=allowed_verdicts,
|
|
42
|
+
context=payload,
|
|
43
|
+
confidence_threshold=confidence_threshold,
|
|
44
|
+
touchpoint=touchpoint,
|
|
45
|
+
source=resolved_source,
|
|
46
|
+
timeout=timeout,
|
|
47
|
+
)
|
|
48
|
+
return {
|
|
49
|
+
"verdict": task["final_verdict"],
|
|
50
|
+
"tier": task["final_tier"],
|
|
51
|
+
"task_id": task["id"],
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return wrapper
|
|
55
|
+
|
|
56
|
+
return decorator
|