turingpulse-sdk 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. turingpulse_sdk-1.0.0/PKG-INFO +14 -0
  2. turingpulse_sdk-1.0.0/README.md +215 -0
  3. turingpulse_sdk-1.0.0/pyproject.toml +23 -0
  4. turingpulse_sdk-1.0.0/setup.cfg +4 -0
  5. turingpulse_sdk-1.0.0/tests/test_cross_framework_fingerprint.py +378 -0
  6. turingpulse_sdk-1.0.0/tests/test_double_instrumentation.py +48 -0
  7. turingpulse_sdk-1.0.0/tests/test_fingerprint.py +336 -0
  8. turingpulse_sdk-1.0.0/tests/test_fingerprint_categorization.py +483 -0
  9. turingpulse_sdk-1.0.0/tests/test_governance_precheck.py +200 -0
  10. turingpulse_sdk-1.0.0/tests/test_robustness.py +99 -0
  11. turingpulse_sdk-1.0.0/turingpulse_sdk/__init__.py +44 -0
  12. turingpulse_sdk-1.0.0/turingpulse_sdk/attachments.py +176 -0
  13. turingpulse_sdk-1.0.0/turingpulse_sdk/client.py +435 -0
  14. turingpulse_sdk-1.0.0/turingpulse_sdk/config.py +234 -0
  15. turingpulse_sdk-1.0.0/turingpulse_sdk/context.py +220 -0
  16. turingpulse_sdk-1.0.0/turingpulse_sdk/decorators.py +143 -0
  17. turingpulse_sdk-1.0.0/turingpulse_sdk/deploy.py +249 -0
  18. turingpulse_sdk-1.0.0/turingpulse_sdk/event_builder.py +223 -0
  19. turingpulse_sdk-1.0.0/turingpulse_sdk/exceptions.py +38 -0
  20. turingpulse_sdk-1.0.0/turingpulse_sdk/fingerprint.py +388 -0
  21. turingpulse_sdk-1.0.0/turingpulse_sdk/governance.py +147 -0
  22. turingpulse_sdk-1.0.0/turingpulse_sdk/instrumentation.py +86 -0
  23. turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/__init__.py +52 -0
  24. turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/autogen.py +335 -0
  25. turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/base.py +224 -0
  26. turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/crewai.py +262 -0
  27. turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/langgraph.py +872 -0
  28. turingpulse_sdk-1.0.0/turingpulse_sdk/integrations/llamaindex.py +268 -0
  29. turingpulse_sdk-1.0.0/turingpulse_sdk/kpi.py +104 -0
  30. turingpulse_sdk-1.0.0/turingpulse_sdk/llm_detector.py +348 -0
  31. turingpulse_sdk-1.0.0/turingpulse_sdk/plugin.py +554 -0
  32. turingpulse_sdk-1.0.0/turingpulse_sdk/registry.py +36 -0
  33. turingpulse_sdk-1.0.0/turingpulse_sdk/tracing.py +92 -0
  34. turingpulse_sdk-1.0.0/turingpulse_sdk/transcripts.py +69 -0
  35. turingpulse_sdk-1.0.0/turingpulse_sdk/trigger_state.py +30 -0
  36. turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/PKG-INFO +14 -0
  37. turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/SOURCES.txt +38 -0
  38. turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/dependency_links.txt +1 -0
  39. turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/requires.txt +9 -0
  40. turingpulse_sdk-1.0.0/turingpulse_sdk.egg-info/top_level.txt +1 -0
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: turingpulse-sdk
3
+ Version: 1.0.0
4
+ Summary: Python plugin decorators for TuringPulse observability, governance, and KPI mapping.
5
+ Author: TuringPulse
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: pydantic<3.0.0,>=2.7.0
8
+ Requires-Dist: httpx>=0.26.0
9
+ Requires-Dist: opentelemetry-api<2.0.0,>=1.21.0
10
+ Requires-Dist: typing_extensions>=4.9.0
11
+ Requires-Dist: turingpulse-interfaces>=1.0.0
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=8.2.0; extra == "dev"
14
+ Requires-Dist: anyio>=4.3.0; extra == "dev"
@@ -0,0 +1,215 @@
1
+ # TuringPulse Python SDK
2
+
3
+ End-to-end instrumentation helpers that let any Python service publish telemetry to the Agent Observability Service, open governance tasks in the Agent PM Service, and register agent entrypoints without touching the host application's core configuration.
4
+
5
+ ## Framework Compatibility
6
+
7
+ <!-- COMPAT-BADGES:START -->
8
+ ![AWS Bedrock](https://img.shields.io/badge/AWS%20Bedrock-passing-brightgreen) ![Mistral](https://img.shields.io/badge/Mistral-passing-brightgreen)
9
+ <!-- COMPAT-BADGES:END -->
10
+
11
+ <details>
12
+ <summary>Full Compatibility Matrix</summary>
13
+
14
+ <!-- COMPAT-MATRIX:START -->
15
+ | Framework | Version | Python 3.11 | Python 3.12 | Python 3.13 |
16
+ |---|---| ---| ---| --- |
17
+ | AWS Bedrock | 1.42.66 | -- | -- | pass |
18
+ | Mistral | 0.4.0 | pass | -- | -- |
19
+ | Mistral | 2.0.1 | -- | pass | -- |
20
+ <!-- COMPAT-MATRIX:END -->
21
+
22
+ </details>
23
+
24
+ ## Highlights
25
+
26
+ - `TuringPulsePlugin.init(...)` bootstraps connectivity (base URLs, tenant headers, API keys, tracing defaults).
27
+ - `@turingpulse.instrument(...)` decorator captures latency, status, errors, KPI mappings, and governance directives (HITL/HATL/HOTL) for both sync and async functions.
28
+ - Automatic OpenTelemetry span management with optional custom labels and KPI-derived attributes.
29
+ - Governance helper automatically creates PM tasks and HITL actions with reviewer/escalation metadata.
30
+ - Trigger registry lets you invoke decorated agent entrypoints that are not exposed via HTTP/UI endpoints.
31
+
32
+ ## Quickstart
33
+
34
+ ```bash
35
+ pip install -e packages/interfaces-py # shared models
36
+ pip install -e packages/turingpulse-sdk
37
+ ```
38
+
39
+ ```python
40
+ from turingpulse_sdk import TuringPulseConfig, init, instrument, GovernanceDirective, KPIConfig
41
+
42
+ # Minimal setup — only API key and workflow name (URLs and tenant/project resolved automatically)
43
+ init(
44
+ TuringPulseConfig(
45
+ api_key="sk_...",
46
+ workflow_name="My Workflow",
47
+ use_ingestion_service=True,
48
+ )
49
+ )
50
+
51
+ # Or with explicit URLs/tenant (e.g. self-hosted or overrides)
52
+ init(
53
+ TuringPulseConfig(
54
+ api_key="sk_...",
55
+ workflow_name="My Workflow",
56
+ pm_url="http://localhost:8002",
57
+ ingestion_url="http://localhost:8004",
58
+ use_ingestion_service=True,
59
+ )
60
+ )
61
+
62
+
63
+ @instrument(
64
+ agent_id="agent-research",
65
+ operation="market_scan",
66
+ labels={"surface": "slack"},
67
+ governance=GovernanceDirective(hitl=True, reviewers=["audit@org.com"]),
68
+ kpis=[
69
+ KPIConfig(kpi_id="latency_ms", use_duration=True, alert_threshold=8000, comparator="gt"),
70
+ ],
71
+ trigger_key="hidden.market_scan",
72
+ )
73
+ def run_market_scan(query: str) -> dict:
74
+ ...
75
+ ```
76
+
77
+ ## Decorator Options
78
+
79
+ | Option | Description |
80
+ | --- | --- |
81
+ | `agent_id` | Required Agent identifier used by observability + governance |
82
+ | `operation` | Logical operation name; defaults to function __name__ |
83
+ | `labels` | Dict merged with global defaults and exposed to traces/custom metrics |
84
+ | `trace` | Enable/disable tracing per function |
85
+ | `trigger_key` | Registers callable for manual triggering via `TuringPulsePlugin.trigger_agent` |
86
+ | `governance` | `GovernanceDirective` describing HITL/HATL/HOTL expectations |
87
+ | `kpis` | List of `KPIConfig` definitions to emit custom metrics + alerts |
88
+ | `trigger_hidden_agent` | When True, plugin calls the agent even if it's hidden from UI |
89
+
90
+ ## Manual Triggers
91
+
92
+ ```python
93
+ from turingpulse_sdk import get_plugin
94
+
95
+ get_plugin().trigger_agent("hidden.market_scan", query="pricing updates")
96
+ ```
97
+
98
+ ## KPI & Alerting
99
+
100
+ - KPIs can derive values from runtime context via callables (`lambda ctx: len(ctx.result["items"])`).
101
+ - When `alert_threshold` is crossed, the plugin attaches `metric.alert=true` metadata so the observability service can raise KPI alerts downstream.
102
+
103
+ ## Governance
104
+
105
+ `GovernanceDirective` supports HITL (human-in-the-loop), HATL (human-after-the-loop), and HOTL (human-on-the-loop) patterns. The directive controls:
106
+
107
+ - which reviewer queues/tasks to open in the PM service,
108
+ - escalation channels & metadata,
109
+ - auto-escalation timers,
110
+ - whether the invocation should block on HITL.
111
+
112
+ ## Security
113
+
114
+ The SDK includes built-in protections to ensure safe operation in customer environments:
115
+
116
+ ### Endpoint Validation
117
+ - **SSRF protection**: Private/internal IP ranges (`127.0.0.0/8`, `10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`, `169.254.0.0/16`, `localhost`, `[::1]`) are blocked as endpoints.
118
+ - **CRLF injection prevention**: Newline and null characters are stripped from API keys and endpoint URLs.
119
+ - **TLS enforcement**: The SDK emits a warning if an `http://` endpoint is used instead of `https://`.
120
+
121
+ ### Data Protection
122
+ - **Sensitive field redaction**: Use `redact_fields` in `TuringPulseConfig` to mask sensitive keys before telemetry leaves your environment.
123
+ - **Error sanitization**: Error messages in telemetry do not include stack traces or internal paths.
124
+ - **No content truncation**: The SDK does not truncate or limit content size — backend handles that.
125
+
126
+ ```python
127
+ init(TuringPulseConfig(
128
+ api_key="sk_...",
129
+ workflow_name="My Workflow",
130
+ redact_fields=["password", "api_key", "secret", "token", "authorization"],
131
+ ))
132
+ ```
133
+
134
+ ### File Attachments
135
+ - **Path traversal prevention**: `attach_document()` normalises file paths and rejects symlinks, device files, and directory traversal patterns.
136
+ - **Filename sanitisation**: Control characters and path components are stripped from filenames.
137
+
138
+ ### Network Safety
139
+ - **Retry-After compliance**: The SDK respects `Retry-After` headers from 429 responses.
140
+ - **No retry on terminal errors**: 401, 402, 403, 404 responses stop retries immediately.
141
+ - **Timeout cap**: HTTP timeout is capped at 120 seconds.
142
+
143
+ ## Fingerprinting & Change Detection
144
+
145
+ The SDK automatically captures workflow fingerprints to enable root cause analysis when anomalies or drifts are detected. This works out-of-the-box with zero configuration.
146
+
147
+ ### How It Works
148
+
149
+ 1. The SDK automatically detects LLM calls (OpenAI, Anthropic, LangChain, etc.)
150
+ 2. Prompts and configurations are hashed for change detection
151
+ 3. Workflow structure (nodes and edges) is tracked
152
+ 4. Fingerprints are sent to the backend after each run
153
+ 5. When anomalies occur, changes are correlated for root cause attribution
154
+
155
+ ### Configuration
156
+
157
+ ```python
158
+ from turingpulse_sdk import init, FingerprintConfig
159
+
160
+ init(
161
+ observability_url="http://localhost:8001/v1",
162
+ tenant_id="tenant-42",
163
+ fingerprint=FingerprintConfig(
164
+ enabled=True, # Master switch (default: True)
165
+ capture_prompts=True, # Hash prompts for change detection
166
+ capture_configs=True, # Hash configs for change detection
167
+ capture_structure=True, # Track DAG structure
168
+ sensitive_config_keys=[ # Keys to redact before hashing
169
+ "api_key", "password", "secret", "token"
170
+ ],
171
+ send_async=True, # Send fingerprints asynchronously
172
+ send_on_failure=True, # Send even if run fails
173
+ )
174
+ )
175
+ ```
176
+
177
+ ### Deploy Tracking
178
+
179
+ Register deployments to correlate anomalies with code changes:
180
+
181
+ ```python
182
+ from turingpulse_sdk import register_deploy
183
+ import os
184
+
185
+ # Auto-detect from CI/CD environment (GitHub Actions, GitLab CI, etc.)
186
+ register_deploy(
187
+ workflow_id="chat-assistant",
188
+ auto_detect=True,
189
+ )
190
+
191
+ # Or provide explicit values
192
+ register_deploy(
193
+ workflow_id="chat-assistant",
194
+ version="v1.2.3",
195
+ git_sha=os.getenv("GIT_SHA"),
196
+ commit_message=os.getenv("GIT_COMMIT_MSG"),
197
+ )
198
+ ```
199
+
200
+ Supported CI/CD environments for auto-detection:
201
+ - GitHub Actions
202
+ - GitLab CI
203
+ - CircleCI
204
+ - Jenkins
205
+ - Azure DevOps
206
+ - Bitbucket Pipelines
207
+ - Travis CI
208
+
209
+ ## Roadmap
210
+
211
+ - Typed FastAPI/Flask middleware shims
212
+ - Async batch exporter for very high volume workloads
213
+ - Richer KPI authoring toolkit and alert subscriptions
214
+
215
+
@@ -0,0 +1,23 @@
1
+ [project]
2
+ name = "turingpulse-sdk"
3
+ version = "1.0.0"
4
+ description = "Python plugin decorators for TuringPulse observability, governance, and KPI mapping."
5
+ authors = [{name = "TuringPulse"}]
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "pydantic>=2.7.0,<3.0.0",
9
+ "httpx>=0.26.0",
10
+ "opentelemetry-api>=1.21.0,<2.0.0",
11
+ "typing_extensions>=4.9.0",
12
+ "turingpulse-interfaces>=1.0.0",
13
+ ]
14
+
15
+ [project.optional-dependencies]
16
+ dev = ["pytest>=8.2.0", "anyio>=4.3.0"]
17
+
18
+ [tool.setuptools.packages.find]
19
+ include = ["turingpulse_sdk*"]
20
+
21
+ [build-system]
22
+ requires = ["setuptools>=68", "wheel"]
23
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,378 @@
1
+ """Cross-framework fingerprint categorization tests.
2
+
3
+ Validates that config keys extracted by every supported LLM provider are
4
+ correctly classified into the right hash categories (model, eval, policy,
5
+ general). Uses the actual CONFIG_KEYS lists from llm_detector.py to
6
+ ensure there is no drift between what the SDK extracts and what the
7
+ fingerprint builder categorises.
8
+ """
9
+
10
+ import pytest
11
+ from turingpulse_sdk.fingerprint import (
12
+ FingerprintBuilder,
13
+ FingerprintConfig,
14
+ classify_config_key,
15
+ split_config,
16
+ )
17
+ from turingpulse_sdk.llm_detector import PROVIDERS
18
+
19
+
20
+ def _provider_keys(name: str) -> list:
21
+ """Look up config_keys for a provider by name from the PROVIDERS registry."""
22
+ for spec in PROVIDERS:
23
+ if spec.name == name:
24
+ return spec.config_keys
25
+ raise ValueError(f"Unknown provider: {name}")
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Provider config key definitions (from PROVIDERS registry in llm_detector)
30
+ # ---------------------------------------------------------------------------
31
+
32
+ PROVIDER_CONFIGS = {
33
+ "openai": {
34
+ "keys": _provider_keys("openai"),
35
+ "sample": {"model": "gpt-4o", "temperature": 0.7, "max_tokens": 2000,
36
+ "top_p": 0.9, "frequency_penalty": 0.1, "presence_penalty": 0.2, "stop": ["\n"]},
37
+ "expected_model_keys": {"model", "temperature", "max_tokens", "top_p",
38
+ "frequency_penalty", "presence_penalty", "stop"},
39
+ "expected_general_keys": set(),
40
+ },
41
+ "anthropic": {
42
+ "keys": _provider_keys("anthropic"),
43
+ "sample": {"model": "claude-3-opus", "temperature": 0.5, "max_tokens": 4096,
44
+ "top_p": 0.95, "stop_sequences": ["</answer>"]},
45
+ "expected_model_keys": {"model", "temperature", "max_tokens", "top_p", "stop_sequences"},
46
+ "expected_general_keys": set(),
47
+ },
48
+ "google": {
49
+ "keys": _provider_keys("google"),
50
+ "sample": {"model": "gemini-pro", "temperature": 0.8, "max_output_tokens": 1024,
51
+ "top_p": 0.9, "top_k": 40},
52
+ "expected_model_keys": {"model", "temperature", "max_output_tokens", "top_p", "top_k"},
53
+ "expected_general_keys": set(),
54
+ },
55
+ "vertexai": {
56
+ "keys": _provider_keys("vertexai"),
57
+ "sample": {"model": "gemini-1.5-pro", "temperature": 0.4, "max_output_tokens": 2048,
58
+ "top_p": 0.8, "top_k": 32, "candidate_count": 1},
59
+ "expected_model_keys": {"model", "temperature", "max_output_tokens", "top_p", "top_k", "candidate_count"},
60
+ "expected_general_keys": set(),
61
+ },
62
+ "langchain": {
63
+ "keys": _provider_keys("langchain"),
64
+ "sample": {"model_name": "gpt-4", "temperature": 0.7, "max_tokens": 1000,
65
+ "model_kwargs": {"seed": 42}},
66
+ "expected_model_keys": {"model_name", "temperature", "max_tokens", "model_kwargs"},
67
+ "expected_general_keys": set(),
68
+ },
69
+ "langgraph": {
70
+ "keys": _provider_keys("langgraph"),
71
+ "sample": {"model_name": "gpt-4o", "temperature": 0.5, "max_tokens": 1500,
72
+ "recursion_limit": 25, "configurable": {"thread_id": "abc"}},
73
+ "expected_model_keys": {"model_name", "temperature", "max_tokens"},
74
+ "expected_general_keys": {"recursion_limit", "configurable"},
75
+ },
76
+ "crewai": {
77
+ "keys": _provider_keys("crewai"),
78
+ "sample": {"model": "gpt-4o", "temperature": 0.3, "max_tokens": 500,
79
+ "verbose": True, "memory": True},
80
+ "expected_model_keys": {"model", "temperature", "max_tokens"},
81
+ "expected_general_keys": {"verbose", "memory"},
82
+ },
83
+ "autogen": {
84
+ "keys": _provider_keys("autogen"),
85
+ "sample": {"model": "gpt-4", "temperature": 0.0, "max_tokens": 2000,
86
+ "timeout": 120, "cache_seed": 42},
87
+ "expected_model_keys": {"model", "temperature", "max_tokens"},
88
+ "expected_general_keys": {"cache_seed"},
89
+ "expected_policy_keys": {"timeout"},
90
+ },
91
+ "llamaindex": {
92
+ "keys": _provider_keys("llamaindex"),
93
+ "sample": {"model": "gpt-4", "temperature": 0.1, "max_tokens": 3000,
94
+ "context_window": 128000},
95
+ "expected_model_keys": {"model", "temperature", "max_tokens", "context_window"},
96
+ "expected_general_keys": set(),
97
+ },
98
+ "bedrock": {
99
+ "keys": _provider_keys("bedrock"),
100
+ "sample": {"modelId": "anthropic.claude-3-sonnet", "temperature": 0.7,
101
+ "maxTokens": 4096, "topP": 0.9, "stopSequences": ["</result>"]},
102
+ "expected_model_keys": {"modelId", "temperature", "maxTokens", "topP", "stopSequences"},
103
+ "expected_general_keys": set(),
104
+ },
105
+ "cohere": {
106
+ "keys": _provider_keys("cohere"),
107
+ "sample": {"model": "command-r-plus", "temperature": 0.3, "max_tokens": 1000,
108
+ "p": 0.9, "k": 40},
109
+ "expected_model_keys": {"model", "temperature", "max_tokens", "p", "k"},
110
+ "expected_general_keys": set(),
111
+ },
112
+ "mistral": {
113
+ "keys": _provider_keys("mistral"),
114
+ "sample": {"model": "mistral-large-latest", "temperature": 0.5,
115
+ "max_tokens": 2000, "top_p": 0.95},
116
+ "expected_model_keys": {"model", "temperature", "max_tokens", "top_p"},
117
+ "expected_general_keys": set(),
118
+ },
119
+ }
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # Per-key classification tests
124
+ # ---------------------------------------------------------------------------
125
+
126
+
127
+ class TestPerProviderKeyClassification:
128
+ """Verify every extracted config key is classified into the correct category."""
129
+
130
+ @pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
131
+ def test_all_keys_classified_correctly(self, provider: str, spec: dict):
132
+ """Every key from a provider should be classified as model or general (not unknown)."""
133
+ expected_model = spec["expected_model_keys"]
134
+ expected_general = spec["expected_general_keys"]
135
+ expected_policy = spec.get("expected_policy_keys", set())
136
+ expected_eval = spec.get("expected_eval_keys", set())
137
+
138
+ for key in spec["keys"]:
139
+ category = classify_config_key(key)
140
+ if key in expected_model:
141
+ assert category == "model", \
142
+ f"[{provider}] Key '{key}' expected model, got {category}"
143
+ elif key in expected_eval:
144
+ assert category == "eval", \
145
+ f"[{provider}] Key '{key}' expected eval, got {category}"
146
+ elif key in expected_policy:
147
+ assert category == "policy", \
148
+ f"[{provider}] Key '{key}' expected policy, got {category}"
149
+ elif key in expected_general:
150
+ assert category == "general", \
151
+ f"[{provider}] Key '{key}' expected general, got {category}"
152
+ else:
153
+ # Any key not explicitly expected should be model or general
154
+ assert category in ("model", "general", "eval", "policy"), \
155
+ f"[{provider}] Key '{key}' got unexpected category {category}"
156
+
157
+
158
+ # ---------------------------------------------------------------------------
159
+ # Full fingerprint generation per provider
160
+ # ---------------------------------------------------------------------------
161
+
162
+
163
+ class TestPerProviderFingerprint:
164
+ """Verify that each provider's typical config produces the right hash categories."""
165
+
166
+ @pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
167
+ def test_provider_fingerprint_has_model_hash(self, provider: str, spec: dict):
168
+ """Every provider config should produce a model hash (all have 'model' or equivalent)."""
169
+ builder = FingerprintBuilder(FingerprintConfig())
170
+ builder.record_node(f"{provider}_llm", "llm", spec["sample"], "system prompt")
171
+ fp = builder.get_fingerprint()
172
+
173
+ node = f"{provider}_llm"
174
+
175
+ # Every provider should have a model hash
176
+ assert node in fp.node_model_hashes, \
177
+ f"[{provider}] Expected model hash but got none. Config: {spec['sample']}"
178
+
179
+ # Combined hash should always be present (backward compat)
180
+ assert node in fp.node_config_hashes, \
181
+ f"[{provider}] Missing combined config hash"
182
+
183
+ # Prompt hash should be present
184
+ assert node in fp.node_prompt_hashes, \
185
+ f"[{provider}] Missing prompt hash"
186
+
187
+ @pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
188
+ def test_provider_no_spurious_eval_or_policy(self, provider: str, spec: dict):
189
+ """Standard provider configs should NOT produce eval or policy hashes."""
190
+ builder = FingerprintBuilder(FingerprintConfig())
191
+ builder.record_node(f"{provider}_llm", "llm", spec["sample"])
192
+ fp = builder.get_fingerprint()
193
+
194
+ node = f"{provider}_llm"
195
+ expected_eval = spec.get("expected_eval_keys", set())
196
+ expected_policy = spec.get("expected_policy_keys", set())
197
+
198
+ if not expected_eval:
199
+ assert node not in fp.node_eval_config_hashes, \
200
+ f"[{provider}] Unexpected eval hash for standard config"
201
+ if not expected_policy:
202
+ assert node not in fp.node_policy_config_hashes, \
203
+ f"[{provider}] Unexpected policy hash for standard config"
204
+
205
+
206
+ # ---------------------------------------------------------------------------
207
+ # Cross-provider change detection isolation
208
+ # ---------------------------------------------------------------------------
209
+
210
+
211
+ class TestCrossProviderChangeIsolation:
212
+ """Verify that model changes are detected correctly across different providers."""
213
+
214
+ @pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
215
+ def test_model_swap_detected_as_model_change(self, provider: str, spec: dict):
216
+ """Swapping the model name should change the model hash but not other hashes."""
217
+ config_v1 = dict(spec["sample"])
218
+ config_v2 = dict(spec["sample"])
219
+
220
+ # Find the model key for this provider
221
+ model_key = None
222
+ for k in spec["sample"]:
223
+ if classify_config_key(k) == "model" and "model" in k.lower():
224
+ model_key = k
225
+ break
226
+
227
+ if model_key is None:
228
+ pytest.skip(f"No model key found for {provider}")
229
+
230
+ config_v2[model_key] = "different-model-v2"
231
+
232
+ b1 = FingerprintBuilder(FingerprintConfig())
233
+ b1.record_node("llm", "llm", config_v1, "same prompt")
234
+ fp1 = b1.get_fingerprint()
235
+
236
+ b2 = FingerprintBuilder(FingerprintConfig())
237
+ b2.record_node("llm", "llm", config_v2, "same prompt")
238
+ fp2 = b2.get_fingerprint()
239
+
240
+ # Model hash MUST change
241
+ assert fp1.node_model_hashes["llm"] != fp2.node_model_hashes["llm"], \
242
+ f"[{provider}] Model hash should change when model is swapped"
243
+
244
+ # Prompt hash MUST NOT change
245
+ assert fp1.node_prompt_hashes["llm"] == fp2.node_prompt_hashes["llm"], \
246
+ f"[{provider}] Prompt hash should NOT change when only model swapped"
247
+
248
+ @pytest.mark.parametrize("provider,spec", list(PROVIDER_CONFIGS.items()))
249
+ def test_temperature_change_detected_as_model_change(self, provider: str, spec: dict):
250
+ """Changing temperature should change the model hash."""
251
+ if "temperature" not in spec["sample"]:
252
+ pytest.skip(f"No temperature for {provider}")
253
+
254
+ config_v1 = dict(spec["sample"])
255
+ config_v2 = dict(spec["sample"])
256
+ config_v2["temperature"] = 0.99 # changed
257
+
258
+ b1 = FingerprintBuilder(FingerprintConfig())
259
+ b1.record_node("llm", "llm", config_v1)
260
+ fp1 = b1.get_fingerprint()
261
+
262
+ b2 = FingerprintBuilder(FingerprintConfig())
263
+ b2.record_node("llm", "llm", config_v2)
264
+ fp2 = b2.get_fingerprint()
265
+
266
+ assert fp1.node_model_hashes["llm"] != fp2.node_model_hashes["llm"], \
267
+ f"[{provider}] Model hash should change when temperature changes"
268
+
269
+
270
+ # ---------------------------------------------------------------------------
271
+ # camelCase variant validation (critical for JS SDK / Bedrock / Google)
272
+ # ---------------------------------------------------------------------------
273
+
274
+
275
+ class TestCamelCaseKeyClassification:
276
+ """Ensure camelCase config keys from TypeScript SDKs classify correctly."""
277
+
278
+ @pytest.mark.parametrize("key,expected", [
279
+ # Google / Vertex AI (TS SDK uses camelCase)
280
+ ("maxOutputTokens", "model"),
281
+ ("topP", "model"),
282
+ ("topK", "model"),
283
+ ("candidateCount", "model"),
284
+ # LangChain / LangGraph (TS SDK)
285
+ ("modelName", "model"),
286
+ ("maxTokens", "model"),
287
+ ("modelKwargs", "model"),
288
+ # Bedrock (camelCase native)
289
+ ("modelId", "model"),
290
+ ("stopSequences", "model"),
291
+ # AutoGen TS
292
+ ("cacheSeed", "general"),
293
+ # LangGraph TS
294
+ ("recursionLimit", "general"),
295
+ # CrewAI / LlamaIndex TS
296
+ ("contextWindow", "model"),
297
+ ])
298
+ def test_camelcase_key(self, key: str, expected: str):
299
+ assert classify_config_key(key) == expected, \
300
+ f"Key '{key}' should classify as '{expected}' but got '{classify_config_key(key)}'"
301
+
302
+
303
+ # ---------------------------------------------------------------------------
304
+ # Eval and Policy augmented configs
305
+ # ---------------------------------------------------------------------------
306
+
307
+
308
+ class TestAugmentedConfigs:
309
+ """Test provider configs augmented with eval/policy keys.
310
+
311
+ In real workflows, users may add eval or policy config to their LLM nodes.
312
+ These should be detected separately from model changes.
313
+ """
314
+
315
+ def test_openai_with_eval_config(self):
316
+ config = {
317
+ "model": "gpt-4o",
318
+ "temperature": 0.7,
319
+ "eval_criteria": "helpfulness",
320
+ "eval_rubric": "1-5 scale",
321
+ }
322
+ builder = FingerprintBuilder(FingerprintConfig())
323
+ builder.record_node("llm", "llm", config)
324
+ fp = builder.get_fingerprint()
325
+
326
+ assert "llm" in fp.node_model_hashes
327
+ assert "llm" in fp.node_eval_config_hashes
328
+
329
+ def test_anthropic_with_guardrail(self):
330
+ config = {
331
+ "model": "claude-3-opus",
332
+ "temperature": 0.5,
333
+ "guardrail": "pii_filter",
334
+ "safety_threshold": 0.95,
335
+ }
336
+ builder = FingerprintBuilder(FingerprintConfig())
337
+ builder.record_node("llm", "llm", config)
338
+ fp = builder.get_fingerprint()
339
+
340
+ assert "llm" in fp.node_model_hashes
341
+ assert "llm" in fp.node_policy_config_hashes
342
+
343
+ def test_langchain_with_eval_and_policy(self):
344
+ config = {
345
+ "model_name": "gpt-4",
346
+ "temperature": 0.3,
347
+ "eval_model": "gpt-4o-mini",
348
+ "eval_criteria": "accuracy",
349
+ "guardrail_config": {"toxicity": True},
350
+ }
351
+ builder = FingerprintBuilder(FingerprintConfig())
352
+ builder.record_node("llm", "llm", config)
353
+ fp = builder.get_fingerprint()
354
+
355
+ assert "llm" in fp.node_model_hashes
356
+ assert "llm" in fp.node_eval_config_hashes
357
+ assert "llm" in fp.node_policy_config_hashes
358
+
359
+ def test_changing_eval_does_not_affect_model_hash(self):
360
+ config_v1 = {
361
+ "model": "gpt-4o",
362
+ "eval_criteria": "helpfulness",
363
+ }
364
+ config_v2 = {
365
+ "model": "gpt-4o",
366
+ "eval_criteria": "accuracy", # changed
367
+ }
368
+
369
+ b1 = FingerprintBuilder(FingerprintConfig())
370
+ b1.record_node("llm", "llm", config_v1)
371
+ fp1 = b1.get_fingerprint()
372
+
373
+ b2 = FingerprintBuilder(FingerprintConfig())
374
+ b2.record_node("llm", "llm", config_v2)
375
+ fp2 = b2.get_fingerprint()
376
+
377
+ assert fp1.node_model_hashes["llm"] == fp2.node_model_hashes["llm"]
378
+ assert fp1.node_eval_config_hashes["llm"] != fp2.node_eval_config_hashes["llm"]
@@ -0,0 +1,48 @@
1
+ """Tests for double-instrumentation prevention via _INSTRUMENTING context var."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contextvars import ContextVar
6
+ from unittest.mock import MagicMock
7
+
8
+ import pytest
9
+
10
+ from turingpulse_sdk import init
11
+ import turingpulse_sdk.plugin as pm
12
+
13
+
14
+ @pytest.fixture(autouse=True)
15
+ def setup():
16
+ pm._PLUGIN = None
17
+ plugin = init(api_key="dedup-key", workflow_name="dedup-test")
18
+ plugin.client = MagicMock()
19
+ plugin.client.emit_serialized_payload = MagicMock()
20
+ plugin.client.policy_check = MagicMock(return_value=MagicMock(action="allow", blocked=False))
21
+ yield plugin
22
+ pm._PLUGIN = None
23
+
24
+
25
+ class TestInstrumentingGuard:
26
+ """Verify _INSTRUMENTING context var prevents nested double spans."""
27
+
28
+ def test_instrumenting_var_prevents_recursion(self):
29
+ from contextvars import ContextVar
30
+
31
+ guard: ContextVar[bool] = ContextVar("test_guard", default=False)
32
+ call_count = 0
33
+
34
+ def inner():
35
+ nonlocal call_count
36
+ if guard.get(False):
37
+ call_count += 1
38
+ return "skipped"
39
+ token = guard.set(True)
40
+ try:
41
+ call_count += 1
42
+ return inner() # Recursive call should skip
43
+ finally:
44
+ guard.reset(token)
45
+
46
+ result = inner()
47
+ assert call_count == 2
48
+ assert result == "skipped"