vectora 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectora-0.1.0/LICENSE +21 -0
- vectora-0.1.0/PKG-INFO +107 -0
- vectora-0.1.0/README.md +86 -0
- vectora-0.1.0/pyproject.toml +29 -0
- vectora-0.1.0/setup.cfg +4 -0
- vectora-0.1.0/tests/test_client.py +47 -0
- vectora-0.1.0/vectora/__init__.py +13 -0
- vectora-0.1.0/vectora/_version.py +1 -0
- vectora-0.1.0/vectora/agent/__init__.py +7 -0
- vectora-0.1.0/vectora/client.py +108 -0
- vectora-0.1.0/vectora/compliance/__init__.py +3 -0
- vectora-0.1.0/vectora/compliance/trace.py +20 -0
- vectora-0.1.0/vectora/exceptions.py +30 -0
- vectora-0.1.0/vectora/explain/__init__.py +3 -0
- vectora-0.1.0/vectora/explain/shap.py +78 -0
- vectora-0.1.0/vectora/llm/__init__.py +7 -0
- vectora-0.1.0/vectora/models/__init__.py +3 -0
- vectora-0.1.0/vectora/models/sklearn.py +146 -0
- vectora-0.1.0/vectora.egg-info/PKG-INFO +107 -0
- vectora-0.1.0/vectora.egg-info/SOURCES.txt +21 -0
- vectora-0.1.0/vectora.egg-info/dependency_links.txt +1 -0
- vectora-0.1.0/vectora.egg-info/requires.txt +8 -0
- vectora-0.1.0/vectora.egg-info/top_level.txt +1 -0
vectora-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vectora
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
vectora-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vectora
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI Assurance Platform - Monitor, Explain, Govern every AI system in production
|
|
5
|
+
Author-email: Vectora <hello@vectora.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://vectora.ai
|
|
8
|
+
Project-URL: Repository, https://github.com/vectora-dev/vectora-sdk
|
|
9
|
+
Keywords: machine learning,monitoring,explainability,drift detection,SHAP,MLOps
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: numpy>=1.21
|
|
14
|
+
Requires-Dist: requests>=2.28
|
|
15
|
+
Requires-Dist: scikit-learn>=1.0
|
|
16
|
+
Requires-Dist: shap>=0.42
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: build>=1.2.0; extra == "dev"
|
|
19
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# Vectora Python SDK
|
|
23
|
+
|
|
24
|
+
Vectora helps you monitor sklearn models in production without adding latency to your prediction path.
|
|
25
|
+
|
|
26
|
+
The SDK captures:
|
|
27
|
+
- prediction metrics like accuracy, F1, precision, and recall
|
|
28
|
+
- feature distribution summaries for drift detection
|
|
29
|
+
- SHAP-based feature importance when `shap` is available
|
|
30
|
+
- trace IDs that connect predictions back to the Vectora dashboard
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install vectora
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Quickstart
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from vectora import VectoraClient
|
|
42
|
+
from vectora.models import SklearnConnector
|
|
43
|
+
|
|
44
|
+
client = VectoraClient(api_key="vct_live_xxx")
|
|
45
|
+
connector = SklearnConnector(
|
|
46
|
+
client=client,
|
|
47
|
+
model=your_sklearn_model,
|
|
48
|
+
model_id="11111111-1111-1111-1111-111111111111",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
predictions = connector.predict(X_test, y_true=y_test)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
`predict()` returns the model's predictions immediately. Vectora sends the monitoring payload in a background thread so your production inference path stays fast.
|
|
55
|
+
|
|
56
|
+
## What gets sent
|
|
57
|
+
|
|
58
|
+
Each prediction call sends a payload to `/api/ingest/metrics` with:
|
|
59
|
+
- `trace_id`
|
|
60
|
+
- `model_id`
|
|
61
|
+
- `metrics`
|
|
62
|
+
- `feature_distributions`
|
|
63
|
+
- `shap_values`
|
|
64
|
+
- `sample_count`
|
|
65
|
+
- `timestamp`
|
|
66
|
+
|
|
67
|
+
If SHAP is not installed, the SDK logs a warning once and continues without SHAP values.
|
|
68
|
+
|
|
69
|
+
If the network call fails, the SDK logs the error to stderr and never raises it back to your prediction path.
|
|
70
|
+
|
|
71
|
+
## API
|
|
72
|
+
|
|
73
|
+
### `VectoraClient`
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
client = VectoraClient(
|
|
77
|
+
api_key="vct_live_xxx",
|
|
78
|
+
base_url="https://vectora.ai",
|
|
79
|
+
timeout=5.0,
|
|
80
|
+
max_retries=2,
|
|
81
|
+
)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### `SklearnConnector`
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
connector = SklearnConnector(client, model_id="11111111-1111-1111-1111-111111111111", model=trained_model)
|
|
88
|
+
predictions = connector.predict(X, y_true=y_true)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Coming Soon
|
|
92
|
+
|
|
93
|
+
`vectora.llm` and `vectora.agent` are reserved for future releases and raise `ComingSoonError` when accessed.
|
|
94
|
+
|
|
95
|
+
## Release
|
|
96
|
+
|
|
97
|
+
Tagging the repository with `v*` triggers the GitHub Actions publish workflow in [`.github/workflows/publish.yml`](.github/workflows/publish.yml).
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
git tag v0.1.0
|
|
101
|
+
git push origin v0.1.0
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
The workflow:
|
|
105
|
+
- runs the SDK test suite
|
|
106
|
+
- builds the source and wheel distributions
|
|
107
|
+
- publishes to PyPI using GitHub Actions trusted publishing
|
vectora-0.1.0/README.md
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Vectora Python SDK
|
|
2
|
+
|
|
3
|
+
Vectora helps you monitor sklearn models in production without adding latency to your prediction path.
|
|
4
|
+
|
|
5
|
+
The SDK captures:
|
|
6
|
+
- prediction metrics like accuracy, F1, precision, and recall
|
|
7
|
+
- feature distribution summaries for drift detection
|
|
8
|
+
- SHAP-based feature importance when `shap` is available
|
|
9
|
+
- trace IDs that connect predictions back to the Vectora dashboard
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install vectora
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quickstart
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from vectora import VectoraClient
|
|
21
|
+
from vectora.models import SklearnConnector
|
|
22
|
+
|
|
23
|
+
client = VectoraClient(api_key="vct_live_xxx")
|
|
24
|
+
connector = SklearnConnector(
|
|
25
|
+
client=client,
|
|
26
|
+
model=your_sklearn_model,
|
|
27
|
+
model_id="11111111-1111-1111-1111-111111111111",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
predictions = connector.predict(X_test, y_true=y_test)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
`predict()` returns the model's predictions immediately. Vectora sends the monitoring payload in a background thread so your production inference path stays fast.
|
|
34
|
+
|
|
35
|
+
## What gets sent
|
|
36
|
+
|
|
37
|
+
Each prediction call sends a payload to `/api/ingest/metrics` with:
|
|
38
|
+
- `trace_id`
|
|
39
|
+
- `model_id`
|
|
40
|
+
- `metrics`
|
|
41
|
+
- `feature_distributions`
|
|
42
|
+
- `shap_values`
|
|
43
|
+
- `sample_count`
|
|
44
|
+
- `timestamp`
|
|
45
|
+
|
|
46
|
+
If SHAP is not installed, the SDK logs a warning once and continues without SHAP values.
|
|
47
|
+
|
|
48
|
+
If the network call fails, the SDK logs the error to stderr and never raises it back to your prediction path.
|
|
49
|
+
|
|
50
|
+
## API
|
|
51
|
+
|
|
52
|
+
### `VectoraClient`
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
client = VectoraClient(
|
|
56
|
+
api_key="vct_live_xxx",
|
|
57
|
+
base_url="https://vectora.ai",
|
|
58
|
+
timeout=5.0,
|
|
59
|
+
max_retries=2,
|
|
60
|
+
)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### `SklearnConnector`
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
connector = SklearnConnector(client, model_id="11111111-1111-1111-1111-111111111111", model=trained_model)
|
|
67
|
+
predictions = connector.predict(X, y_true=y_true)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Coming Soon
|
|
71
|
+
|
|
72
|
+
`vectora.llm` and `vectora.agent` are reserved for future releases and raise `ComingSoonError` when accessed.
|
|
73
|
+
|
|
74
|
+
## Release
|
|
75
|
+
|
|
76
|
+
Tagging the repository with `v*` triggers the GitHub Actions publish workflow in [`.github/workflows/publish.yml`](.github/workflows/publish.yml).
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
git tag v0.1.0
|
|
80
|
+
git push origin v0.1.0
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
The workflow:
|
|
84
|
+
- runs the SDK test suite
|
|
85
|
+
- builds the source and wheel distributions
|
|
86
|
+
- publishes to PyPI using GitHub Actions trusted publishing
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "vectora"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "AI Assurance Platform - Monitor, Explain, Govern every AI system in production"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{ name = "Vectora", email = "hello@vectora.ai" }]
|
|
13
|
+
keywords = ["machine learning", "monitoring", "explainability", "drift detection", "SHAP", "MLOps"]
|
|
14
|
+
dependencies = [
|
|
15
|
+
"numpy>=1.21",
|
|
16
|
+
"requests>=2.28",
|
|
17
|
+
"scikit-learn>=1.0",
|
|
18
|
+
"shap>=0.42",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
dev = ["build>=1.2.0", "pytest>=8.0"]
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Homepage = "https://vectora.ai"
|
|
26
|
+
Repository = "https://github.com/vectora-dev/vectora-sdk"
|
|
27
|
+
|
|
28
|
+
[tool.setuptools.packages.find]
|
|
29
|
+
include = ["vectora*"]
|
vectora-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from vectora import VectoraClient
|
|
10
|
+
from vectora.compliance.trace import generate_trace_id
|
|
11
|
+
from vectora.exceptions import VectoraConfigError
|
|
12
|
+
from vectora.models import SklearnConnector
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FakeModel:
|
|
16
|
+
def predict(self, X):
|
|
17
|
+
array = np.asarray(X)
|
|
18
|
+
return np.where(array[:, 0] > 0.5, 1, 0)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_client_requires_vectora_key_prefix():
|
|
22
|
+
with pytest.raises(VectoraConfigError):
|
|
23
|
+
VectoraClient(api_key="invalid_key")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_generate_trace_id_matches_expected_format():
|
|
27
|
+
trace_id = generate_trace_id()
|
|
28
|
+
assert re.fullmatch(r"vct_\d{8}_[a-z0-9]{4}", trace_id)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_sklearn_connector_requires_uuid_model_id():
|
|
32
|
+
with pytest.raises(VectoraConfigError):
|
|
33
|
+
SklearnConnector(client=VectoraClient(api_key="vct_live_12345678901234567890"), model_id="fraud-detector-v1")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_predict_returns_underlying_model_predictions(monkeypatch):
|
|
37
|
+
client = VectoraClient(api_key="vct_live_12345678901234567890")
|
|
38
|
+
connector = SklearnConnector(client=client, model=FakeModel(), model_id=str(uuid4()))
|
|
39
|
+
|
|
40
|
+
monkeypatch.setattr(SklearnConnector, "_send_payload", lambda self, payload: None)
|
|
41
|
+
|
|
42
|
+
X = np.array([[0.1, 0.2], [0.8, 0.4], [0.7, 0.9]])
|
|
43
|
+
expected = FakeModel().predict(X)
|
|
44
|
+
|
|
45
|
+
predictions = connector.predict(X)
|
|
46
|
+
|
|
47
|
+
assert np.array_equal(predictions, expected)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from vectora._version import __version__
|
|
2
|
+
from vectora.client import VectoraClient
|
|
3
|
+
from vectora.compliance import generate_trace_id, is_valid_trace_id, isValidTraceId
|
|
4
|
+
from vectora.models import SklearnConnector
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"__version__",
|
|
8
|
+
"VectoraClient",
|
|
9
|
+
"SklearnConnector",
|
|
10
|
+
"generate_trace_id",
|
|
11
|
+
"is_valid_trace_id",
|
|
12
|
+
"isValidTraceId",
|
|
13
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from vectora.exceptions import (
|
|
9
|
+
VectoraAuthError,
|
|
10
|
+
VectoraConfigError,
|
|
11
|
+
VectoraConnectionError,
|
|
12
|
+
VectoraNotFoundError,
|
|
13
|
+
VectoraRateLimitError,
|
|
14
|
+
VectoraServerError,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class VectoraClient:
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
api_key: str,
|
|
22
|
+
base_url: str = "https://vectora.ai",
|
|
23
|
+
timeout: float = 5.0,
|
|
24
|
+
max_retries: int = 2,
|
|
25
|
+
) -> None:
|
|
26
|
+
if not isinstance(api_key, str) or not api_key.startswith("vct_"):
|
|
27
|
+
raise VectoraConfigError("Vectora API keys must start with 'vct_'.")
|
|
28
|
+
|
|
29
|
+
if len(api_key.strip()) < 20:
|
|
30
|
+
raise VectoraConfigError("Vectora API keys must be at least 20 characters long.")
|
|
31
|
+
|
|
32
|
+
self.api_key = api_key
|
|
33
|
+
self.base_url = base_url.rstrip("/")
|
|
34
|
+
self.timeout = timeout
|
|
35
|
+
self.max_retries = max_retries
|
|
36
|
+
self._session = requests.Session()
|
|
37
|
+
|
|
38
|
+
def _post(self, path: str, json_payload: dict[str, Any]) -> dict[str, Any]:
|
|
39
|
+
if not path.startswith("/"):
|
|
40
|
+
raise VectoraConfigError("Vectora client paths must start with '/'.")
|
|
41
|
+
|
|
42
|
+
url = f"{self.base_url}{path}"
|
|
43
|
+
headers = {
|
|
44
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
45
|
+
"Content-Type": "application/json",
|
|
46
|
+
"User-Agent": "vectora-python-sdk/0.1.0",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
last_error: Exception | None = None
|
|
50
|
+
|
|
51
|
+
for attempt in range(self.max_retries + 1):
|
|
52
|
+
try:
|
|
53
|
+
response = self._session.post(
|
|
54
|
+
url,
|
|
55
|
+
json=json_payload,
|
|
56
|
+
headers=headers,
|
|
57
|
+
timeout=self.timeout,
|
|
58
|
+
)
|
|
59
|
+
except requests.Timeout as exc:
|
|
60
|
+
last_error = VectoraConnectionError(
|
|
61
|
+
"Timed out while reaching the Vectora API. Check your network or increase the timeout."
|
|
62
|
+
)
|
|
63
|
+
except requests.RequestException as exc:
|
|
64
|
+
last_error = VectoraConnectionError(
|
|
65
|
+
f"Couldn't reach the Vectora API: {exc}"
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
if response.status_code == 401:
|
|
69
|
+
raise VectoraAuthError(
|
|
70
|
+
"Vectora rejected the API key. Verify that your key is current and starts with 'vct_'."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
if response.status_code == 404:
|
|
74
|
+
raise VectoraNotFoundError(
|
|
75
|
+
"The requested Vectora endpoint or resource was not found."
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
if response.status_code == 429:
|
|
79
|
+
retry_after = response.headers.get("Retry-After")
|
|
80
|
+
detail = (
|
|
81
|
+
f" Retry after {retry_after} seconds."
|
|
82
|
+
if retry_after and retry_after.isdigit()
|
|
83
|
+
else ""
|
|
84
|
+
)
|
|
85
|
+
raise VectoraRateLimitError(
|
|
86
|
+
f"Vectora rate-limited this request.{detail}"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if 500 <= response.status_code:
|
|
90
|
+
raise VectoraServerError(
|
|
91
|
+
f"Vectora returned a server error ({response.status_code}). Try again shortly."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if not response.ok:
|
|
95
|
+
raise VectoraServerError(
|
|
96
|
+
f"Vectora rejected the request with status {response.status_code}: {response.text}"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if not response.content:
|
|
100
|
+
return {}
|
|
101
|
+
|
|
102
|
+
data = response.json()
|
|
103
|
+
return data if isinstance(data, dict) else {"data": data}
|
|
104
|
+
|
|
105
|
+
if attempt < self.max_retries:
|
|
106
|
+
time.sleep(0.25 * (attempt + 1))
|
|
107
|
+
|
|
108
|
+
raise last_error or VectoraConnectionError("Couldn't reach the Vectora API.")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from secrets import token_hex
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
TRACE_ID_PATTERN = re.compile(r"^vct_\d{8}_[a-z0-9]{4,}$", re.IGNORECASE)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def generate_trace_id() -> str:
|
|
11
|
+
date_prefix = datetime.now(timezone.utc).strftime("%Y%m%d")
|
|
12
|
+
return f"vct_{date_prefix}_{token_hex(2)}"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_valid_trace_id(value: str) -> bool:
|
|
16
|
+
return bool(TRACE_ID_PATTERN.fullmatch(value))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def isValidTraceId(value: str) -> bool:
|
|
20
|
+
return is_valid_trace_id(value)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
class VectoraError(Exception):
|
|
2
|
+
"""Base exception for all SDK errors."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class VectoraConfigError(VectoraError):
|
|
6
|
+
"""Raised when the SDK is configured incorrectly."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class VectoraAuthError(VectoraError):
|
|
10
|
+
"""Raised when the API key is invalid or expired."""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class VectoraConnectionError(VectoraError):
|
|
14
|
+
"""Raised when the SDK cannot reach the Vectora API."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VectoraRateLimitError(VectoraError):
|
|
18
|
+
"""Raised when the Vectora API rate-limits the caller."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class VectoraNotFoundError(VectoraError):
|
|
22
|
+
"""Raised when a requested Vectora resource does not exist."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class VectoraServerError(VectoraError):
|
|
26
|
+
"""Raised when the Vectora API returns an unexpected server error."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ComingSoonError(VectoraError):
|
|
30
|
+
"""Raised when a not-yet-shipped SDK surface is accessed."""
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from importlib import import_module
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
_SHAP_WARNING_EMITTED = False
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SHAPWrapper:
|
|
13
|
+
def compute(self, model: Any, X: Any, feature_names: list[str]) -> dict[str, float] | None:
|
|
14
|
+
shap_module = self._import_shap()
|
|
15
|
+
if shap_module is None:
|
|
16
|
+
return None
|
|
17
|
+
|
|
18
|
+
array = np.asarray(X)
|
|
19
|
+
if array.ndim == 1:
|
|
20
|
+
array = array.reshape(1, -1)
|
|
21
|
+
|
|
22
|
+
explainer = self._build_explainer(shap_module, model, array)
|
|
23
|
+
if explainer is None:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
shap_values = explainer(array)
|
|
28
|
+
values = getattr(shap_values, "values", shap_values)
|
|
29
|
+
except Exception:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
normalized = self._normalize_values(values)
|
|
33
|
+
if normalized is None or normalized.shape[-1] != len(feature_names):
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
mean_abs = np.mean(np.abs(normalized), axis=0)
|
|
37
|
+
return {
|
|
38
|
+
feature_name: float(value)
|
|
39
|
+
for feature_name, value in zip(feature_names, mean_abs)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
def _build_explainer(self, shap_module: Any, model: Any, background: np.ndarray) -> Any:
|
|
43
|
+
try:
|
|
44
|
+
return shap_module.Explainer(model, background)
|
|
45
|
+
except Exception:
|
|
46
|
+
try:
|
|
47
|
+
return shap_module.Explainer(model.predict, background)
|
|
48
|
+
except Exception:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
def _import_shap(self) -> Any | None:
|
|
52
|
+
global _SHAP_WARNING_EMITTED
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
return import_module("shap")
|
|
56
|
+
except Exception:
|
|
57
|
+
if not _SHAP_WARNING_EMITTED:
|
|
58
|
+
warnings.warn(
|
|
59
|
+
"The 'shap' package is not installed. Vectora will skip SHAP values until it is available.",
|
|
60
|
+
RuntimeWarning,
|
|
61
|
+
stacklevel=2,
|
|
62
|
+
)
|
|
63
|
+
_SHAP_WARNING_EMITTED = True
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def _normalize_values(self, values: Any) -> np.ndarray | None:
|
|
67
|
+
array = np.asarray(values)
|
|
68
|
+
|
|
69
|
+
if array.ndim == 3:
|
|
70
|
+
array = array[..., 0]
|
|
71
|
+
|
|
72
|
+
if array.ndim == 1:
|
|
73
|
+
array = array.reshape(1, -1)
|
|
74
|
+
|
|
75
|
+
if array.ndim != 2:
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
return array
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
import sys
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from vectora.compliance.trace import generate_trace_id
|
|
12
|
+
from vectora.exceptions import VectoraConfigError
|
|
13
|
+
from vectora.explain.shap import SHAPWrapper
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
|
|
17
|
+
except Exception: # pragma: no cover - dependency contract handles this at install time
|
|
18
|
+
accuracy_score = None
|
|
19
|
+
f1_score = None
|
|
20
|
+
precision_score = None
|
|
21
|
+
recall_score = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SklearnConnector:
|
|
25
|
+
def __init__(self, client: Any, model_id: str, model: Any | None = None) -> None:
|
|
26
|
+
self.client = client
|
|
27
|
+
self.model = model
|
|
28
|
+
self.model_id = self._validate_model_id(model_id)
|
|
29
|
+
self._shap = SHAPWrapper()
|
|
30
|
+
|
|
31
|
+
def predict(self, X: Any, y_true: Any | None = None, model: Any | None = None) -> Any:
|
|
32
|
+
model_to_use = model or self.model
|
|
33
|
+
if model_to_use is None:
|
|
34
|
+
raise VectoraConfigError(
|
|
35
|
+
"Provide a fitted sklearn-compatible model when creating SklearnConnector or calling predict()."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
predictions = model_to_use.predict(X)
|
|
39
|
+
payload = self._build_payload(model_to_use, X, predictions, y_true=y_true)
|
|
40
|
+
|
|
41
|
+
thread = threading.Thread(
|
|
42
|
+
target=self._send_payload,
|
|
43
|
+
args=(payload,),
|
|
44
|
+
daemon=True,
|
|
45
|
+
)
|
|
46
|
+
thread.start()
|
|
47
|
+
|
|
48
|
+
return predictions
|
|
49
|
+
|
|
50
|
+
def _build_payload(
|
|
51
|
+
self,
|
|
52
|
+
model: Any,
|
|
53
|
+
X: Any,
|
|
54
|
+
predictions: Any,
|
|
55
|
+
y_true: Any | None = None,
|
|
56
|
+
) -> dict[str, Any]:
|
|
57
|
+
feature_names = self._feature_names(X)
|
|
58
|
+
metrics = self._compute_metrics(y_true, predictions)
|
|
59
|
+
feature_distributions = self._compute_distributions(X, feature_names)
|
|
60
|
+
shap_values = self._shap.compute(model, X, feature_names)
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
"trace_id": generate_trace_id(),
|
|
64
|
+
"model_id": self.model_id,
|
|
65
|
+
"metrics": metrics,
|
|
66
|
+
"feature_distributions": feature_distributions,
|
|
67
|
+
"shap_values": shap_values,
|
|
68
|
+
"sample_count": int(self._row_count(X)),
|
|
69
|
+
"timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
def _compute_metrics(self, y_true: Any | None, predictions: Any) -> dict[str, float]:
|
|
73
|
+
if y_true is None:
|
|
74
|
+
return {}
|
|
75
|
+
|
|
76
|
+
if accuracy_score is None or f1_score is None or precision_score is None or recall_score is None:
|
|
77
|
+
return {}
|
|
78
|
+
|
|
79
|
+
y_true_array = np.asarray(y_true)
|
|
80
|
+
predictions_array = np.asarray(predictions)
|
|
81
|
+
average = "binary" if len(np.unique(y_true_array)) <= 2 else "weighted"
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
"accuracy": float(accuracy_score(y_true_array, predictions_array)),
|
|
85
|
+
"f1": float(f1_score(y_true_array, predictions_array, average=average, zero_division=0)),
|
|
86
|
+
"precision": float(
|
|
87
|
+
precision_score(y_true_array, predictions_array, average=average, zero_division=0)
|
|
88
|
+
),
|
|
89
|
+
"recall": float(recall_score(y_true_array, predictions_array, average=average, zero_division=0)),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def _compute_distributions(self, X: Any, feature_names: list[str]) -> dict[str, dict[str, float]]:
|
|
93
|
+
array = self._as_2d_array(X)
|
|
94
|
+
distributions: dict[str, dict[str, float]] = {}
|
|
95
|
+
|
|
96
|
+
for index, feature_name in enumerate(feature_names):
|
|
97
|
+
column = array[:, index].astype(float)
|
|
98
|
+
distributions[feature_name] = {
|
|
99
|
+
"mean": float(np.mean(column)),
|
|
100
|
+
"std": float(np.std(column)),
|
|
101
|
+
"p25": float(np.percentile(column, 25)),
|
|
102
|
+
"p50": float(np.percentile(column, 50)),
|
|
103
|
+
"p75": float(np.percentile(column, 75)),
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return distributions
|
|
107
|
+
|
|
108
|
+
def _send_payload(self, payload: dict[str, Any]) -> None:
|
|
109
|
+
trace_id = payload.get("trace_id", "unknown-trace")
|
|
110
|
+
try:
|
|
111
|
+
self.client._post("/api/ingest/metrics", payload)
|
|
112
|
+
except Exception as exc:
|
|
113
|
+
print(
|
|
114
|
+
f"[vectora] failed to send metrics for trace_id={trace_id}: {exc}",
|
|
115
|
+
file=sys.stderr,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def _feature_names(self, X: Any) -> list[str]:
|
|
119
|
+
if hasattr(X, "columns"):
|
|
120
|
+
return [str(column) for column in X.columns]
|
|
121
|
+
|
|
122
|
+
array = self._as_2d_array(X)
|
|
123
|
+
return [f"feature_{index}" for index in range(array.shape[1])]
|
|
124
|
+
|
|
125
|
+
def _as_2d_array(self, X: Any) -> np.ndarray:
|
|
126
|
+
if hasattr(X, "to_numpy"):
|
|
127
|
+
array = X.to_numpy()
|
|
128
|
+
else:
|
|
129
|
+
array = np.asarray(X)
|
|
130
|
+
|
|
131
|
+
if array.ndim == 1:
|
|
132
|
+
array = array.reshape(1, -1)
|
|
133
|
+
|
|
134
|
+
if array.ndim != 2:
|
|
135
|
+
raise VectoraConfigError("X must be a 2D array-like object for Vectora monitoring.")
|
|
136
|
+
|
|
137
|
+
return array
|
|
138
|
+
|
|
139
|
+
def _row_count(self, X: Any) -> int:
|
|
140
|
+
return int(self._as_2d_array(X).shape[0])
|
|
141
|
+
|
|
142
|
+
def _validate_model_id(self, model_id: str) -> str:
|
|
143
|
+
try:
|
|
144
|
+
return str(UUID(model_id))
|
|
145
|
+
except (ValueError, TypeError) as exc:
|
|
146
|
+
raise VectoraConfigError("model_id must be a valid UUID string.") from exc
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vectora
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI Assurance Platform - Monitor, Explain, Govern every AI system in production
|
|
5
|
+
Author-email: Vectora <hello@vectora.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://vectora.ai
|
|
8
|
+
Project-URL: Repository, https://github.com/vectora-dev/vectora-sdk
|
|
9
|
+
Keywords: machine learning,monitoring,explainability,drift detection,SHAP,MLOps
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: numpy>=1.21
|
|
14
|
+
Requires-Dist: requests>=2.28
|
|
15
|
+
Requires-Dist: scikit-learn>=1.0
|
|
16
|
+
Requires-Dist: shap>=0.42
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: build>=1.2.0; extra == "dev"
|
|
19
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# Vectora Python SDK
|
|
23
|
+
|
|
24
|
+
Vectora helps you monitor sklearn models in production without adding latency to your prediction path.
|
|
25
|
+
|
|
26
|
+
The SDK captures:
|
|
27
|
+
- prediction metrics like accuracy, F1, precision, and recall
|
|
28
|
+
- feature distribution summaries for drift detection
|
|
29
|
+
- SHAP-based feature importance when `shap` is available
|
|
30
|
+
- trace IDs that connect predictions back to the Vectora dashboard
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install vectora
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Quickstart
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from vectora import VectoraClient
|
|
42
|
+
from vectora.models import SklearnConnector
|
|
43
|
+
|
|
44
|
+
client = VectoraClient(api_key="vct_live_xxx")
|
|
45
|
+
connector = SklearnConnector(
|
|
46
|
+
client=client,
|
|
47
|
+
model=your_sklearn_model,
|
|
48
|
+
model_id="11111111-1111-1111-1111-111111111111",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
predictions = connector.predict(X_test, y_true=y_test)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
`predict()` returns the model's predictions immediately. Vectora sends the monitoring payload in a background thread so your production inference path stays fast.
|
|
55
|
+
|
|
56
|
+
## What gets sent
|
|
57
|
+
|
|
58
|
+
Each prediction call sends a payload to `/api/ingest/metrics` with:
|
|
59
|
+
- `trace_id`
|
|
60
|
+
- `model_id`
|
|
61
|
+
- `metrics`
|
|
62
|
+
- `feature_distributions`
|
|
63
|
+
- `shap_values`
|
|
64
|
+
- `sample_count`
|
|
65
|
+
- `timestamp`
|
|
66
|
+
|
|
67
|
+
If SHAP is not installed, the SDK logs a warning once and continues without SHAP values.
|
|
68
|
+
|
|
69
|
+
If the network call fails, the SDK logs the error to stderr and never raises it back to your prediction path.
|
|
70
|
+
|
|
71
|
+
## API
|
|
72
|
+
|
|
73
|
+
### `VectoraClient`
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
client = VectoraClient(
|
|
77
|
+
api_key="vct_live_xxx",
|
|
78
|
+
base_url="https://vectora.ai",
|
|
79
|
+
timeout=5.0,
|
|
80
|
+
max_retries=2,
|
|
81
|
+
)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### `SklearnConnector`
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
connector = SklearnConnector(client, model_id="11111111-1111-1111-1111-111111111111", model=trained_model)
|
|
88
|
+
predictions = connector.predict(X, y_true=y_true)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Coming Soon
|
|
92
|
+
|
|
93
|
+
`vectora.llm` and `vectora.agent` are reserved for future releases and raise `ComingSoonError` when accessed.
|
|
94
|
+
|
|
95
|
+
## Release
|
|
96
|
+
|
|
97
|
+
Tagging the repository with `v*` triggers the GitHub Actions publish workflow in [`.github/workflows/publish.yml`](.github/workflows/publish.yml).
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
git tag v0.1.0
|
|
101
|
+
git push origin v0.1.0
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
The workflow:
|
|
105
|
+
- runs the SDK test suite
|
|
106
|
+
- builds the source and wheel distributions
|
|
107
|
+
- publishes to PyPI using GitHub Actions trusted publishing
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
tests/test_client.py
|
|
5
|
+
vectora/__init__.py
|
|
6
|
+
vectora/_version.py
|
|
7
|
+
vectora/client.py
|
|
8
|
+
vectora/exceptions.py
|
|
9
|
+
vectora.egg-info/PKG-INFO
|
|
10
|
+
vectora.egg-info/SOURCES.txt
|
|
11
|
+
vectora.egg-info/dependency_links.txt
|
|
12
|
+
vectora.egg-info/requires.txt
|
|
13
|
+
vectora.egg-info/top_level.txt
|
|
14
|
+
vectora/agent/__init__.py
|
|
15
|
+
vectora/compliance/__init__.py
|
|
16
|
+
vectora/compliance/trace.py
|
|
17
|
+
vectora/explain/__init__.py
|
|
18
|
+
vectora/explain/shap.py
|
|
19
|
+
vectora/llm/__init__.py
|
|
20
|
+
vectora/models/__init__.py
|
|
21
|
+
vectora/models/sklearn.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
vectora
|