turingpulse-sdk 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- turingpulse_sdk-1.0.0/PKG-INFO +14 -0
- turingpulse_sdk-1.0.0/README.md +215 -0
- turingpulse_sdk-1.0.0/pyproject.toml +23 -0
- turingpulse_sdk-1.0.0/setup.cfg +4 -0
- turingpulse_sdk-1.0.0/tests/test_cross_framework_fingerprint.py +378 -0
- turingpulse_sdk-1.0.0/tests/test_double_instrumentation.py +48 -0
- turingpulse_sdk-1.0.0/tests/test_fingerprint.py +336 -0
- turingpulse_sdk-1.0.0/tests/test_fingerprint_categorization.py +483 -0
- turingpulse_sdk-1.0.0/tests/test_governance_precheck.py +200 -0
- turingpulse_sdk-1.0.0/tests/test_robustness.py +99 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/__init__.py +44 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/attachments.py +176 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/client.py +435 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/config.py +234 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/context.py +220 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/decorators.py +143 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/deploy.py +249 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/event_builder.py +223 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/exceptions.py +38 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/fingerprint.py +388 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/governance.py +147 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/instrumentation.py +86 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/__init__.py +52 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/autogen.py +335 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/base.py +224 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/crewai.py +262 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/langgraph.py +872 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/llamaindex.py +268 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/kpi.py +104 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/llm_detector.py +348 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/plugin.py +554 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/registry.py +36 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/tracing.py +92 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/transcripts.py +69 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk/trigger_state.py +30 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/PKG-INFO +14 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/SOURCES.txt +38 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/dependency_links.txt +1 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/requires.txt +9 -0
- turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: turingpulse-sdk
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Python plugin decorators for TuringPulse observability, governance, and KPI mapping.
|
|
5
|
+
Author: TuringPulse
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: pydantic<3.0.0,>=2.7.0
|
|
8
|
+
Requires-Dist: httpx>=0.26.0
|
|
9
|
+
Requires-Dist: opentelemetry-api<2.0.0,>=1.21.0
|
|
10
|
+
Requires-Dist: typing_extensions>=4.9.0
|
|
11
|
+
Requires-Dist: turingpulse-interfaces>=1.0.0
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=8.2.0; extra == "dev"
|
|
14
|
+
Requires-Dist: anyio>=4.3.0; extra == "dev"
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# TuringPulse Python SDK
|
|
2
|
+
|
|
3
|
+
End-to-end instrumentation helpers that let any Python service publish telemetry to the Agent Observability Service, open governance tasks in the Agent PM Service, and register agent entrypoints without touching the host application's core configuration.
|
|
4
|
+
|
|
5
|
+
## Framework Compatibility
|
|
6
|
+
|
|
7
|
+
<!-- COMPAT-BADGES:START -->
|
|
8
|
+
 
|
|
9
|
+
<!-- COMPAT-BADGES:END -->
|
|
10
|
+
|
|
11
|
+
<details>
|
|
12
|
+
<summary>Full Compatibility Matrix</summary>
|
|
13
|
+
|
|
14
|
+
<!-- COMPAT-MATRIX:START -->
|
|
15
|
+
| Framework | Version | Python 3.11 | Python 3.12 | Python 3.13 |
|
|
16
|
+
|---|---| ---| ---| --- |
|
|
17
|
+
| AWS Bedrock | 1.42.66 | -- | -- | pass |
|
|
18
|
+
| Mistral | 0.4.0 | pass | -- | -- |
|
|
19
|
+
| Mistral | 2.0.1 | -- | pass | -- |
|
|
20
|
+
<!-- COMPAT-MATRIX:END -->
|
|
21
|
+
|
|
22
|
+
</details>
|
|
23
|
+
|
|
24
|
+
## Highlights
|
|
25
|
+
|
|
26
|
+
- `TuringPulsePlugin.init(...)` bootstraps connectivity (base URLs, tenant headers, API keys, tracing defaults).
|
|
27
|
+
- `@turingpulse.instrument(...)` decorator captures latency, status, errors, KPI mappings, and governance directives (HITL/HATL/HOTL) for both sync and async functions.
|
|
28
|
+
- Automatic OpenTelemetry span management with optional custom labels and KPI-derived attributes.
|
|
29
|
+
- Governance helper automatically creates PM tasks and HITL actions with reviewer/escalation metadata.
|
|
30
|
+
- Trigger registry lets you invoke decorated agent entrypoints that are not exposed via HTTP/UI endpoints.
|
|
31
|
+
|
|
32
|
+
## Quickstart
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install -e packages/interfaces-py # shared models
|
|
36
|
+
pip install -e packages/turingpulse-sdk
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from turingpulse_sdk import TuringPulseConfig, init, instrument, GovernanceDirective, KPIConfig
|
|
41
|
+
|
|
42
|
+
# Minimal setup — only API key and workflow name (URLs and tenant/project resolved automatically)
|
|
43
|
+
init(
|
|
44
|
+
TuringPulseConfig(
|
|
45
|
+
api_key="sk_...",
|
|
46
|
+
workflow_name="My Workflow",
|
|
47
|
+
use_ingestion_service=True,
|
|
48
|
+
)
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Or with explicit URLs/tenant (e.g. self-hosted or overrides)
|
|
52
|
+
init(
|
|
53
|
+
TuringPulseConfig(
|
|
54
|
+
api_key="sk_...",
|
|
55
|
+
workflow_name="My Workflow",
|
|
56
|
+
pm_url="http://localhost:8002",
|
|
57
|
+
ingestion_url="http://localhost:8004",
|
|
58
|
+
use_ingestion_service=True,
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@instrument(
|
|
64
|
+
agent_id="agent-research",
|
|
65
|
+
operation="market_scan",
|
|
66
|
+
labels={"surface": "slack"},
|
|
67
|
+
governance=GovernanceDirective(hitl=True, reviewers=["audit@org.com"]),
|
|
68
|
+
kpis=[
|
|
69
|
+
KPIConfig(kpi_id="latency_ms", use_duration=True, alert_threshold=8000, comparator="gt"),
|
|
70
|
+
],
|
|
71
|
+
trigger_key="hidden.market_scan",
|
|
72
|
+
)
|
|
73
|
+
def run_market_scan(query: str) -> dict:
|
|
74
|
+
...
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Decorator Options
|
|
78
|
+
|
|
79
|
+
| Option | Description |
|
|
80
|
+
| --- | --- |
|
|
81
|
+
| `agent_id` | Required Agent identifier used by observability + governance |
|
|
82
|
+
| `operation` | Logical operation name; defaults to function __name__ |
|
|
83
|
+
| `labels` | Dict merged with global defaults and exposed to traces/custom metrics |
|
|
84
|
+
| `trace` | Enable/disable tracing per function |
|
|
85
|
+
| `trigger_key` | Registers callable for manual triggering via `TuringPulsePlugin.trigger_agent` |
|
|
86
|
+
| `governance` | `GovernanceDirective` describing HITL/HATL/HOTL expectations |
|
|
87
|
+
| `kpis` | List of `KPIConfig` definitions to emit custom metrics + alerts |
|
|
88
|
+
| `trigger_hidden_agent` | When True, plugin calls the agent even if it's hidden from UI |
|
|
89
|
+
|
|
90
|
+
## Manual Triggers
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from turingpulse_sdk import get_plugin
|
|
94
|
+
|
|
95
|
+
get_plugin().trigger_agent("hidden.market_scan", query="pricing updates")
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## KPI & Alerting
|
|
99
|
+
|
|
100
|
+
- KPIs can derive values from runtime context via callables (`lambda ctx: len(ctx.result["items"])`).
|
|
101
|
+
- When `alert_threshold` is crossed, the plugin attaches `metric.alert=true` metadata so the observability service can raise KPI alerts downstream.
|
|
102
|
+
|
|
103
|
+
## Governance
|
|
104
|
+
|
|
105
|
+
`GovernanceDirective` supports HITL (human-in-the-loop), HATL (human-after-the-loop), and HOTL (human-on-the-loop) patterns. The directive controls:
|
|
106
|
+
|
|
107
|
+
- which reviewer queues/tasks to open in the PM service,
|
|
108
|
+
- escalation channels & metadata,
|
|
109
|
+
- auto-escalation timers,
|
|
110
|
+
- whether the invocation should block on HITL.
|
|
111
|
+
|
|
112
|
+
## Security
|
|
113
|
+
|
|
114
|
+
The SDK includes built-in protections to ensure safe operation in customer environments:
|
|
115
|
+
|
|
116
|
+
### Endpoint Validation
|
|
117
|
+
- **SSRF protection**: Private/internal IP ranges (`127.0.0.0/8`, `10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`, `169.254.0.0/16`, `localhost`, `[::1]`) are blocked as endpoints.
|
|
118
|
+
- **CRLF injection prevention**: Newline and null characters are stripped from API keys and endpoint URLs.
|
|
119
|
+
- **TLS enforcement**: The SDK emits a warning if an `http://` endpoint is used instead of `https://`.
|
|
120
|
+
|
|
121
|
+
### Data Protection
|
|
122
|
+
- **Sensitive field redaction**: Use `redact_fields` in `TuringPulseConfig` to mask sensitive keys before telemetry leaves your environment.
|
|
123
|
+
- **Error sanitization**: Error messages in telemetry do not include stack traces or internal paths.
|
|
124
|
+
- **No content truncation**: The SDK does not truncate or limit content size — backend handles that.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
init(TuringPulseConfig(
|
|
128
|
+
api_key="sk_...",
|
|
129
|
+
workflow_name="My Workflow",
|
|
130
|
+
redact_fields=["password", "api_key", "secret", "token", "authorization"],
|
|
131
|
+
))
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### File Attachments
|
|
135
|
+
- **Path traversal prevention**: `attach_document()` normalises file paths and rejects symlinks, device files, and directory traversal patterns.
|
|
136
|
+
- **Filename sanitisation**: Control characters and path components are stripped from filenames.
|
|
137
|
+
|
|
138
|
+
### Network Safety
|
|
139
|
+
- **Retry-After compliance**: The SDK respects `Retry-After` headers from 429 responses.
|
|
140
|
+
- **No retry on terminal errors**: 401, 402, 403, 404 responses stop retries immediately.
|
|
141
|
+
- **Timeout cap**: HTTP timeout is capped at 120 seconds.
|
|
142
|
+
|
|
143
|
+
## Fingerprinting & Change Detection
|
|
144
|
+
|
|
145
|
+
The SDK automatically captures workflow fingerprints to enable root cause analysis when anomalies or drifts are detected. This works out-of-the-box with zero configuration.
|
|
146
|
+
|
|
147
|
+
### How It Works
|
|
148
|
+
|
|
149
|
+
1. The SDK automatically detects LLM calls (OpenAI, Anthropic, LangChain, etc.)
|
|
150
|
+
2. Prompts and configurations are hashed for change detection
|
|
151
|
+
3. Workflow structure (nodes and edges) is tracked
|
|
152
|
+
4. Fingerprints are sent to the backend after each run
|
|
153
|
+
5. When anomalies occur, changes are correlated for root cause attribution
|
|
154
|
+
|
|
155
|
+
### Configuration
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from turingpulse_sdk import init, FingerprintConfig
|
|
159
|
+
|
|
160
|
+
init(
|
|
161
|
+
observability_url="http://localhost:8001/v1",
|
|
162
|
+
tenant_id="tenant-42",
|
|
163
|
+
fingerprint=FingerprintConfig(
|
|
164
|
+
enabled=True, # Master switch (default: True)
|
|
165
|
+
capture_prompts=True, # Hash prompts for change detection
|
|
166
|
+
capture_configs=True, # Hash configs for change detection
|
|
167
|
+
capture_structure=True, # Track DAG structure
|
|
168
|
+
sensitive_config_keys=[ # Keys to redact before hashing
|
|
169
|
+
"api_key", "password", "secret", "token"
|
|
170
|
+
],
|
|
171
|
+
send_async=True, # Send fingerprints asynchronously
|
|
172
|
+
send_on_failure=True, # Send even if run fails
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Deploy Tracking
|
|
178
|
+
|
|
179
|
+
Register deployments to correlate anomalies with code changes:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from turingpulse_sdk import register_deploy
|
|
183
|
+
import os
|
|
184
|
+
|
|
185
|
+
# Auto-detect from CI/CD environment (GitHub Actions, GitLab CI, etc.)
|
|
186
|
+
register_deploy(
|
|
187
|
+
workflow_id="chat-assistant",
|
|
188
|
+
auto_detect=True,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Or provide explicit values
|
|
192
|
+
register_deploy(
|
|
193
|
+
workflow_id="chat-assistant",
|
|
194
|
+
version="v1.2.3",
|
|
195
|
+
git_sha=os.getenv("GIT_SHA"),
|
|
196
|
+
commit_message=os.getenv("GIT_COMMIT_MSG"),
|
|
197
|
+
)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Supported CI/CD environments for auto-detection:
|
|
201
|
+
- GitHub Actions
|
|
202
|
+
- GitLab CI
|
|
203
|
+
- CircleCI
|
|
204
|
+
- Jenkins
|
|
205
|
+
- Azure DevOps
|
|
206
|
+
- Bitbucket Pipelines
|
|
207
|
+
- Travis CI
|
|
208
|
+
|
|
209
|
+
## Roadmap
|
|
210
|
+
|
|
211
|
+
- Typed FastAPI/Flask middleware shims
|
|
212
|
+
- Async batch exporter for very high volume workloads
|
|
213
|
+
- Richer KPI authoring toolkit and alert subscriptions
|
|
214
|
+
|
|
215
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "turingpulse-sdk"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Python plugin decorators for TuringPulse observability, governance, and KPI mapping."
|
|
5
|
+
authors = [{name = "TuringPulse"}]
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"pydantic>=2.7.0,<3.0.0",
|
|
9
|
+
"httpx>=0.26.0",
|
|
10
|
+
"opentelemetry-api>=1.21.0,<2.0.0",
|
|
11
|
+
"typing_extensions>=4.9.0",
|
|
12
|
+
"turingpulse-interfaces>=1.0.0",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
dev = ["pytest>=8.2.0", "anyio>=4.3.0"]
|
|
17
|
+
|
|
18
|
+
[tool.setuptools.packages.find]
|
|
19
|
+
include = ["turingpulse_sdk*"]
|
|
20
|
+
|
|
21
|
+
[build-system]
|
|
22
|
+
requires = ["setuptools>=68", "wheel"]
|
|
23
|
+
build-backend = "setuptools.build_meta"
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""Cross-framework fingerprint categorization tests.
|
|
2
|
+
|
|
3
|
+
Validates that config keys extracted by every supported LLM provider are
|
|
4
|
+
correctly classified into the right hash categories (model, eval, policy,
|
|
5
|
+
general). Uses the actual CONFIG_KEYS lists from llm_detector.py to
|
|
6
|
+
ensure there is no drift between what the SDK extracts and what the
|
|
7
|
+
fingerprint builder categorises.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import pytest
|
|
11
|
+
from turingpulse_sdk.fingerprint import (
|
|
12
|
+
FingerprintBuilder,
|
|
13
|
+
FingerprintConfig,
|
|
14
|
+
classify_config_key,
|
|
15
|
+
split_config,
|
|
16
|
+
)
|
|
17
|
+
from turingpulse_sdk.llm_detector import PROVIDERS
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _provider_keys(name: str) -> list:
|
|
21
|
+
"""Look up config_keys for a provider by name from the PROVIDERS registry."""
|
|
22
|
+
for spec in PROVIDERS:
|
|
23
|
+
if spec.name == name:
|
|
24
|
+
return spec.config_keys
|
|
25
|
+
raise ValueError(f"Unknown provider: {name}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# Provider config key definitions (from PROVIDERS registry in llm_detector)
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
PROVIDER_CONFIGS = {
|
|
33
|
+
"openai": {
|
|
34
|
+
"keys": _provider_keys("openai"),
|
|
35
|
+
"sample": {"model": "gpt-4o", "temperature": 0.7, "max_tokens": 2000,
|
|
36
|
+
"top_p": 0.9, "frequency_penalty": 0.1, "presence_penalty": 0.2, "stop": ["\n"]},
|
|
37
|
+
"expected_model_keys": {"model", "temperature", "max_tokens", "top_p",
|
|
38
|
+
"frequency_penalty", "presence_penalty", "stop"},
|
|
39
|
+
"expected_general_keys": set(),
|
|
40
|
+
},
|
|
41
|
+
"anthropic": {
|
|
42
|
+
"keys": _provider_keys("anthropic"),
|
|
43
|
+
"sample": {"model": "claude-3-opus", "temperature": 0.5, "max_tokens": 4096,
|
|
44
|
+
"top_p": 0.95, "stop_sequences": ["</answer>"]},
|
|
45
|
+
"expected_model_keys": {"model", "temperature", "max_tokens", "top_p", "stop_sequences"},
|
|
46
|
+
"expected_general_keys": set(),
|
|
47
|
+
},
|
|
48
|
+
"google": {
|
|
49
|
+
"keys": _provider_keys("google"),
|
|
50
|
+
"sample": {"model": "gemini-pro", "temperature": 0.8, "max_output_tokens": 1024,
|
|
51
|
+
"top_p": 0.9, "top_k": 40},
|
|
52
|
+
"expected_model_keys": {"model", "temperature", "max_output_tokens", "top_p", "top_k"},
|
|
53
|
+
"expected_general_keys": set(),
|
|
54
|
+
},
|
|
55
|
+
"vertexai": {
|
|
56
|
+
"keys": _provider_keys("vertexai"),
|
|
57
|
+
"sample": {"model": "gemini-1.5-pro", "temperature": 0.4, "max_output_tokens": 2048,
|
|
58
|
+
"top_p": 0.8, "top_k": 32, "candidate_count": 1},
|
|
59
|
+
"expected_model_keys": {"model", "temperature", "max_output_tokens", "top_p", "top_k", "candidate_count"},
|
|
60
|
+
"expected_general_keys": set(),
|
|
61
|
+
},
|
|
62
|
+
"langchain": {
|
|
63
|
+
"keys": _provider_keys("langchain"),
|
|
64
|
+
"sample": {"model_name": "gpt-4", "temperature": 0.7, "max_tokens": 1000,
|
|
65
|
+
"model_kwargs": {"seed": 42}},
|
|
66
|
+
"expected_model_keys": {"model_name", "temperature", "max_tokens", "model_kwargs"},
|
|
67
|
+
"expected_general_keys": set(),
|
|
68
|
+
},
|
|
69
|
+
"langgraph": {
|
|
70
|
+
"keys": _provider_keys("langgraph"),
|
|
71
|
+
"sample": {"model_name": "gpt-4o", "temperature": 0.5, "max_tokens": 1500,
|
|
72
|
+
"recursion_limit": 25, "configurable": {"thread_id": "abc"}},
|
|
73
|
+
"expected_model_keys": {"model_name", "temperature", "max_tokens"},
|
|
74
|
+
"expected_general_keys": {"recursion_limit", "configurable"},
|
|
75
|
+
},
|
|
76
|
+
"crewai": {
|
|
77
|
+
"keys": _provider_keys("crewai"),
|
|
78
|
+
"sample": {"model": "gpt-4o", "temperature": 0.3, "max_tokens": 500,
|
|
79
|
+
"verbose": True, "memory": True},
|
|
80
|
+
"expected_model_keys": {"model", "temperature", "max_tokens"},
|
|
81
|
+
"expected_general_keys": {"verbose", "memory"},
|
|
82
|
+
},
|
|
83
|
+
"autogen": {
|
|
84
|
+
"keys": _provider_keys("autogen"),
|
|
85
|
+
"sample": {"model": "gpt-4", "temperature": 0.0, "max_tokens": 2000,
|
|
86
|
+
"timeout": 120, "cache_seed": 42},
|
|
87
|
+
"expected_model_keys": {"model", "temperature", "max_tokens"},
|
|
88
|
+
"expected_general_keys": {"cache_seed"},
|
|
89
|
+
"expected_policy_keys": {"timeout"},
|
|
90
|
+
},
|
|
91
|
+
"llamaindex": {
|
|
92
|
+
"keys": _provider_keys("llamaindex"),
|
|
93
|
+
"sample": {"model": "gpt-4", "temperature": 0.1, "max_tokens": 3000,
|
|
94
|
+
"context_window": 128000},
|
|
95
|
+
"expected_model_keys": {"model", "temperature", "max_tokens", "context_window"},
|
|
96
|
+
"expected_general_keys": set(),
|
|
97
|
+
},
|
|
98
|
+
"bedrock": {
|
|
99
|
+
"keys": _provider_keys("bedrock"),
|
|
100
|
+
"sample": {"modelId": "anthropic.claude-3-sonnet", "temperature": 0.7,
|
|
101
|
+
"maxTokens": 4096, "topP": 0.9, "stopSequences": ["</result>"]},
|
|
102
|
+
"expected_model_keys": {"modelId", "temperature", "maxTokens", "topP", "stopSequences"},
|
|
103
|
+
"expected_general_keys": set(),
|
|
104
|
+
},
|
|
105
|
+
"cohere": {
|
|
106
|
+
"keys": _provider_keys("cohere"),
|
|
107
|
+
"sample": {"model": "command-r-plus", "temperature": 0.3, "max_tokens": 1000,
|
|
108
|
+
"p": 0.9, "k": 40},
|
|
109
|
+
"expected_model_keys": {"model", "temperature", "max_tokens", "p", "k"},
|
|
110
|
+
"expected_general_keys": set(),
|
|
111
|
+
},
|
|
112
|
+
"mistral": {
|
|
113
|
+
"keys": _provider_keys("mistral"),
|
|
114
|
+
"sample": {"model": "mistral-large-latest", "temperature": 0.5,
|
|
115
|
+
"max_tokens": 2000, "top_p": 0.95},
|
|
116
|
+
"expected_model_keys": {"model", "temperature", "max_tokens", "top_p"},
|
|
117
|
+
"expected_general_keys": set(),
|
|
118
|
+
},
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ---------------------------------------------------------------------------
|
|
123
|
+
# Per-key classification tests
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class TestPerProviderKeyClassification:
|
|
128
|
+
"""Verify every extracted config key is classified into the correct category."""
|
|
129
|
+
|
|
130
|
+
@pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
|
|
131
|
+
def test_all_keys_classified_correctly(self, provider: str, spec: dict):
|
|
132
|
+
"""Every key from a provider should be classified as model or general (not unknown)."""
|
|
133
|
+
expected_model = spec["expected_model_keys"]
|
|
134
|
+
expected_general = spec["expected_general_keys"]
|
|
135
|
+
expected_policy = spec.get("expected_policy_keys", set())
|
|
136
|
+
expected_eval = spec.get("expected_eval_keys", set())
|
|
137
|
+
|
|
138
|
+
for key in spec["keys"]:
|
|
139
|
+
category = classify_config_key(key)
|
|
140
|
+
if key in expected_model:
|
|
141
|
+
assert category == "model", \
|
|
142
|
+
f"[{provider}] Key '{key}' expected model, got {category}"
|
|
143
|
+
elif key in expected_eval:
|
|
144
|
+
assert category == "eval", \
|
|
145
|
+
f"[{provider}] Key '{key}' expected eval, got {category}"
|
|
146
|
+
elif key in expected_policy:
|
|
147
|
+
assert category == "policy", \
|
|
148
|
+
f"[{provider}] Key '{key}' expected policy, got {category}"
|
|
149
|
+
elif key in expected_general:
|
|
150
|
+
assert category == "general", \
|
|
151
|
+
f"[{provider}] Key '{key}' expected general, got {category}"
|
|
152
|
+
else:
|
|
153
|
+
# Any key not explicitly expected should be model or general
|
|
154
|
+
assert category in ("model", "general", "eval", "policy"), \
|
|
155
|
+
f"[{provider}] Key '{key}' got unexpected category {category}"
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
# Full fingerprint generation per provider
|
|
160
|
+
# ---------------------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class TestPerProviderFingerprint:
|
|
164
|
+
"""Verify that each provider's typical config produces the right hash categories."""
|
|
165
|
+
|
|
166
|
+
@pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
|
|
167
|
+
def test_provider_fingerprint_has_model_hash(self, provider: str, spec: dict):
|
|
168
|
+
"""Every provider config should produce a model hash (all have 'model' or equivalent)."""
|
|
169
|
+
builder = FingerprintBuilder(FingerprintConfig())
|
|
170
|
+
builder.record_node(f"{provider}_llm", "llm", spec["sample"], "system prompt")
|
|
171
|
+
fp = builder.get_fingerprint()
|
|
172
|
+
|
|
173
|
+
node = f"{provider}_llm"
|
|
174
|
+
|
|
175
|
+
# Every provider should have a model hash
|
|
176
|
+
assert node in fp.node_model_hashes, \
|
|
177
|
+
f"[{provider}] Expected model hash but got none. Config: {spec['sample']}"
|
|
178
|
+
|
|
179
|
+
# Combined hash should always be present (backward compat)
|
|
180
|
+
assert node in fp.node_config_hashes, \
|
|
181
|
+
f"[{provider}] Missing combined config hash"
|
|
182
|
+
|
|
183
|
+
# Prompt hash should be present
|
|
184
|
+
assert node in fp.node_prompt_hashes, \
|
|
185
|
+
f"[{provider}] Missing prompt hash"
|
|
186
|
+
|
|
187
|
+
@pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
|
|
188
|
+
def test_provider_no_spurious_eval_or_policy(self, provider: str, spec: dict):
|
|
189
|
+
"""Standard provider configs should NOT produce eval or policy hashes."""
|
|
190
|
+
builder = FingerprintBuilder(FingerprintConfig())
|
|
191
|
+
builder.record_node(f"{provider}_llm", "llm", spec["sample"])
|
|
192
|
+
fp = builder.get_fingerprint()
|
|
193
|
+
|
|
194
|
+
node = f"{provider}_llm"
|
|
195
|
+
expected_eval = spec.get("expected_eval_keys", set())
|
|
196
|
+
expected_policy = spec.get("expected_policy_keys", set())
|
|
197
|
+
|
|
198
|
+
if not expected_eval:
|
|
199
|
+
assert node not in fp.node_eval_config_hashes, \
|
|
200
|
+
f"[{provider}] Unexpected eval hash for standard config"
|
|
201
|
+
if not expected_policy:
|
|
202
|
+
assert node not in fp.node_policy_config_hashes, \
|
|
203
|
+
f"[{provider}] Unexpected policy hash for standard config"
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# ---------------------------------------------------------------------------
|
|
207
|
+
# Cross-provider change detection isolation
|
|
208
|
+
# ---------------------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class TestCrossProviderChangeIsolation:
|
|
212
|
+
"""Verify that model changes are detected correctly across different providers."""
|
|
213
|
+
|
|
214
|
+
@pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
|
|
215
|
+
def test_model_swap_detected_as_model_change(self, provider: str, spec: dict):
|
|
216
|
+
"""Swapping the model name should change the model hash but not other hashes."""
|
|
217
|
+
config_v1 = dict(spec["sample"])
|
|
218
|
+
config_v2 = dict(spec["sample"])
|
|
219
|
+
|
|
220
|
+
# Find the model key for this provider
|
|
221
|
+
model_key = None
|
|
222
|
+
for k in spec["sample"]:
|
|
223
|
+
if classify_config_key(k) == "model" and "model" in k.lower():
|
|
224
|
+
model_key = k
|
|
225
|
+
break
|
|
226
|
+
|
|
227
|
+
if model_key is None:
|
|
228
|
+
pytest.skip(f"No model key found for {provider}")
|
|
229
|
+
|
|
230
|
+
config_v2[model_key] = "different-model-v2"
|
|
231
|
+
|
|
232
|
+
b1 = FingerprintBuilder(FingerprintConfig())
|
|
233
|
+
b1.record_node("llm", "llm", config_v1, "same prompt")
|
|
234
|
+
fp1 = b1.get_fingerprint()
|
|
235
|
+
|
|
236
|
+
b2 = FingerprintBuilder(FingerprintConfig())
|
|
237
|
+
b2.record_node("llm", "llm", config_v2, "same prompt")
|
|
238
|
+
fp2 = b2.get_fingerprint()
|
|
239
|
+
|
|
240
|
+
# Model hash MUST change
|
|
241
|
+
assert fp1.node_model_hashes["llm"] != fp2.node_model_hashes["llm"], \
|
|
242
|
+
f"[{provider}] Model hash should change when model is swapped"
|
|
243
|
+
|
|
244
|
+
# Prompt hash MUST NOT change
|
|
245
|
+
assert fp1.node_prompt_hashes["llm"] == fp2.node_prompt_hashes["llm"], \
|
|
246
|
+
f"[{provider}] Prompt hash should NOT change when only model swapped"
|
|
247
|
+
|
|
248
|
+
@pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
|
|
249
|
+
def test_temperature_change_detected_as_model_change(self, provider: str, spec: dict):
|
|
250
|
+
"""Changing temperature should change the model hash."""
|
|
251
|
+
if "temperature" not in spec["sample"]:
|
|
252
|
+
pytest.skip(f"No temperature for {provider}")
|
|
253
|
+
|
|
254
|
+
config_v1 = dict(spec["sample"])
|
|
255
|
+
config_v2 = dict(spec["sample"])
|
|
256
|
+
config_v2["temperature"] = 0.99 # changed
|
|
257
|
+
|
|
258
|
+
b1 = FingerprintBuilder(FingerprintConfig())
|
|
259
|
+
b1.record_node("llm", "llm", config_v1)
|
|
260
|
+
fp1 = b1.get_fingerprint()
|
|
261
|
+
|
|
262
|
+
b2 = FingerprintBuilder(FingerprintConfig())
|
|
263
|
+
b2.record_node("llm", "llm", config_v2)
|
|
264
|
+
fp2 = b2.get_fingerprint()
|
|
265
|
+
|
|
266
|
+
assert fp1.node_model_hashes["llm"] != fp2.node_model_hashes["llm"], \
|
|
267
|
+
f"[{provider}] Model hash should change when temperature changes"
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
# ---------------------------------------------------------------------------
|
|
271
|
+
# camelCase variant validation (critical for JS SDK / Bedrock / Google)
|
|
272
|
+
# ---------------------------------------------------------------------------
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class TestCamelCaseKeyClassification:
|
|
276
|
+
"""Ensure camelCase config keys from TypeScript SDKs classify correctly."""
|
|
277
|
+
|
|
278
|
+
@pytest.mark.parametrize("key,expected", [
|
|
279
|
+
# Google / Vertex AI (TS SDK uses camelCase)
|
|
280
|
+
("maxOutputTokens", "model"),
|
|
281
|
+
("topP", "model"),
|
|
282
|
+
("topK", "model"),
|
|
283
|
+
("candidateCount", "model"),
|
|
284
|
+
# LangChain / LangGraph (TS SDK)
|
|
285
|
+
("modelName", "model"),
|
|
286
|
+
("maxTokens", "model"),
|
|
287
|
+
("modelKwargs", "model"),
|
|
288
|
+
# Bedrock (camelCase native)
|
|
289
|
+
("modelId", "model"),
|
|
290
|
+
("stopSequences", "model"),
|
|
291
|
+
# AutoGen TS
|
|
292
|
+
("cacheSeed", "general"),
|
|
293
|
+
# LangGraph TS
|
|
294
|
+
("recursionLimit", "general"),
|
|
295
|
+
# CrewAI / LlamaIndex TS
|
|
296
|
+
("contextWindow", "model"),
|
|
297
|
+
])
|
|
298
|
+
def test_camelcase_key(self, key: str, expected: str):
|
|
299
|
+
assert classify_config_key(key) == expected, \
|
|
300
|
+
f"Key '{key}' should classify as '{expected}' but got '{classify_config_key(key)}'"
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
# ---------------------------------------------------------------------------
|
|
304
|
+
# Eval and Policy augmented configs
|
|
305
|
+
# ---------------------------------------------------------------------------
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class TestAugmentedConfigs:
|
|
309
|
+
"""Test provider configs augmented with eval/policy keys.
|
|
310
|
+
|
|
311
|
+
In real workflows, users may add eval or policy config to their LLM nodes.
|
|
312
|
+
These should be detected separately from model changes.
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
def test_openai_with_eval_config(self):
|
|
316
|
+
config = {
|
|
317
|
+
"model": "gpt-4o",
|
|
318
|
+
"temperature": 0.7,
|
|
319
|
+
"eval_criteria": "helpfulness",
|
|
320
|
+
"eval_rubric": "1-5 scale",
|
|
321
|
+
}
|
|
322
|
+
builder = FingerprintBuilder(FingerprintConfig())
|
|
323
|
+
builder.record_node("llm", "llm", config)
|
|
324
|
+
fp = builder.get_fingerprint()
|
|
325
|
+
|
|
326
|
+
assert "llm" in fp.node_model_hashes
|
|
327
|
+
assert "llm" in fp.node_eval_config_hashes
|
|
328
|
+
|
|
329
|
+
def test_anthropic_with_guardrail(self):
|
|
330
|
+
config = {
|
|
331
|
+
"model": "claude-3-opus",
|
|
332
|
+
"temperature": 0.5,
|
|
333
|
+
"guardrail": "pii_filter",
|
|
334
|
+
"safety_threshold": 0.95,
|
|
335
|
+
}
|
|
336
|
+
builder = FingerprintBuilder(FingerprintConfig())
|
|
337
|
+
builder.record_node("llm", "llm", config)
|
|
338
|
+
fp = builder.get_fingerprint()
|
|
339
|
+
|
|
340
|
+
assert "llm" in fp.node_model_hashes
|
|
341
|
+
assert "llm" in fp.node_policy_config_hashes
|
|
342
|
+
|
|
343
|
+
def test_langchain_with_eval_and_policy(self):
|
|
344
|
+
config = {
|
|
345
|
+
"model_name": "gpt-4",
|
|
346
|
+
"temperature": 0.3,
|
|
347
|
+
"eval_model": "gpt-4o-mini",
|
|
348
|
+
"eval_criteria": "accuracy",
|
|
349
|
+
"guardrail_config": {"toxicity": True},
|
|
350
|
+
}
|
|
351
|
+
builder = FingerprintBuilder(FingerprintConfig())
|
|
352
|
+
builder.record_node("llm", "llm", config)
|
|
353
|
+
fp = builder.get_fingerprint()
|
|
354
|
+
|
|
355
|
+
assert "llm" in fp.node_model_hashes
|
|
356
|
+
assert "llm" in fp.node_eval_config_hashes
|
|
357
|
+
assert "llm" in fp.node_policy_config_hashes
|
|
358
|
+
|
|
359
|
+
def test_changing_eval_does_not_affect_model_hash(self):
|
|
360
|
+
config_v1 = {
|
|
361
|
+
"model": "gpt-4o",
|
|
362
|
+
"eval_criteria": "helpfulness",
|
|
363
|
+
}
|
|
364
|
+
config_v2 = {
|
|
365
|
+
"model": "gpt-4o",
|
|
366
|
+
"eval_criteria": "accuracy", # changed
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
b1 = FingerprintBuilder(FingerprintConfig())
|
|
370
|
+
b1.record_node("llm", "llm", config_v1)
|
|
371
|
+
fp1 = b1.get_fingerprint()
|
|
372
|
+
|
|
373
|
+
b2 = FingerprintBuilder(FingerprintConfig())
|
|
374
|
+
b2.record_node("llm", "llm", config_v2)
|
|
375
|
+
fp2 = b2.get_fingerprint()
|
|
376
|
+
|
|
377
|
+
assert fp1.node_model_hashes["llm"] == fp2.node_model_hashes["llm"]
|
|
378
|
+
assert fp1.node_eval_config_hashes["llm"] != fp2.node_eval_config_hashes["llm"]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Tests for double-instrumentation prevention via _INSTRUMENTING context var."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contextvars import ContextVar
|
|
6
|
+
from unittest.mock import MagicMock
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from turingpulse_sdk import init
|
|
11
|
+
import turingpulse_sdk.plugin as pm
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture(autouse=True)
|
|
15
|
+
def setup():
|
|
16
|
+
pm._PLUGIN = None
|
|
17
|
+
plugin = init(api_key="dedup-key", workflow_name="dedup-test")
|
|
18
|
+
plugin.client = MagicMock()
|
|
19
|
+
plugin.client.emit_serialized_payload = MagicMock()
|
|
20
|
+
plugin.client.policy_check = MagicMock(return_value=MagicMock(action="allow", blocked=False))
|
|
21
|
+
yield plugin
|
|
22
|
+
pm._PLUGIN = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TestInstrumentingGuard:
|
|
26
|
+
"""Verify _INSTRUMENTING context var prevents nested double spans."""
|
|
27
|
+
|
|
28
|
+
def test_instrumenting_var_prevents_recursion(self):
|
|
29
|
+
from contextvars import ContextVar
|
|
30
|
+
|
|
31
|
+
guard: ContextVar[bool] = ContextVar("test_guard", default=False)
|
|
32
|
+
call_count = 0
|
|
33
|
+
|
|
34
|
+
def inner():
|
|
35
|
+
nonlocal call_count
|
|
36
|
+
if guard.get(False):
|
|
37
|
+
call_count += 1
|
|
38
|
+
return "skipped"
|
|
39
|
+
token = guard.set(True)
|
|
40
|
+
try:
|
|
41
|
+
call_count += 1
|
|
42
|
+
return inner() # Recursive call should skip
|
|
43
|
+
finally:
|
|
44
|
+
guard.reset(token)
|
|
45
|
+
|
|
46
|
+
result = inner()
|
|
47
|
+
assert call_count == 2
|
|
48
|
+
assert result == "skipped"
|