turingpulse-sdk-haystack 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Virtual environments
|
|
7
|
+
.venv/
|
|
8
|
+
venv/
|
|
9
|
+
ENV/
|
|
10
|
+
|
|
11
|
+
# Distribution / packaging
|
|
12
|
+
dist/
|
|
13
|
+
build/
|
|
14
|
+
*.egg-info/
|
|
15
|
+
|
|
16
|
+
# Database files
|
|
17
|
+
*.db
|
|
18
|
+
*.sqlite3
|
|
19
|
+
|
|
20
|
+
# Environment variables
|
|
21
|
+
.env
|
|
22
|
+
.env.local
|
|
23
|
+
|
|
24
|
+
# IDE
|
|
25
|
+
.idea/
|
|
26
|
+
.vscode/
|
|
27
|
+
*.swp
|
|
28
|
+
*.swo
|
|
29
|
+
|
|
30
|
+
# Testing
|
|
31
|
+
.pytest_cache/
|
|
32
|
+
.coverage
|
|
33
|
+
htmlcov/
|
|
34
|
+
.tox/
|
|
35
|
+
|
|
36
|
+
# Logs
|
|
37
|
+
*.log
|
|
38
|
+
logs/
|
|
39
|
+
|
|
40
|
+
# OS files
|
|
41
|
+
.DS_Store
|
|
42
|
+
Thumbs.db
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: turingpulse-sdk-haystack
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: TuringPulse SDK integration for Haystack
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: haystack-ai>=2.24.1
|
|
8
|
+
Requires-Dist: turingpulse-sdk>=1.0.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
11
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "turingpulse-sdk-haystack"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "TuringPulse SDK integration for Haystack"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"turingpulse-sdk>=1.0.0",
|
|
13
|
+
"haystack-ai>=2.24.1",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.23"]
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Haystack instrumentation for TuringPulse.
|
|
2
|
+
|
|
3
|
+
Wraps ``Pipeline.run()`` to capture component executions,
|
|
4
|
+
model calls, retriever operations, and pipeline structure.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from contextvars import ContextVar
|
|
11
|
+
from typing import Any, Dict, Optional, Sequence
|
|
12
|
+
|
|
13
|
+
from turingpulse_sdk import instrument, GovernanceDirective
|
|
14
|
+
from turingpulse_sdk.config import MAX_FIELD_SIZE
|
|
15
|
+
from turingpulse_sdk.context import current_context
|
|
16
|
+
from turingpulse_sdk.integrations.base import emit_child_spans
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("turingpulse.sdk.haystack")
|
|
19
|
+
|
|
20
|
+
_INSTRUMENTING: ContextVar[bool] = ContextVar("_tp_haystack_instrumenting", default=False)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def instrument_haystack(
|
|
24
|
+
pipeline,
|
|
25
|
+
*,
|
|
26
|
+
name: str,
|
|
27
|
+
governance: Optional[GovernanceDirective] = None,
|
|
28
|
+
model: Optional[str] = None,
|
|
29
|
+
provider: str = "openai",
|
|
30
|
+
kpis: Optional[Sequence["KPIConfig"]] = None,
|
|
31
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
32
|
+
):
|
|
33
|
+
"""Wrap a Haystack Pipeline for TuringPulse observability.
|
|
34
|
+
|
|
35
|
+
Returns a callable wrapping ``pipeline.run()``.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
pipeline: A ``haystack.Pipeline`` instance.
|
|
39
|
+
name: Workflow display name.
|
|
40
|
+
governance: Optional governance directive.
|
|
41
|
+
model: LLM model name override.
|
|
42
|
+
provider: LLM provider name.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
A callable wrapping the pipeline's run method.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
@instrument(name=name, governance=governance, kpis=kpis, metadata=metadata or {})
|
|
49
|
+
def _run(data: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]:
|
|
50
|
+
token = _INSTRUMENTING.set(True)
|
|
51
|
+
try:
|
|
52
|
+
result = pipeline.run(data, **kwargs)
|
|
53
|
+
|
|
54
|
+
ctx = current_context()
|
|
55
|
+
if ctx:
|
|
56
|
+
ctx.framework = "haystack"
|
|
57
|
+
ctx.node_type = "workflow"
|
|
58
|
+
|
|
59
|
+
ctx.set_io(
|
|
60
|
+
input_data=str(data)[:MAX_FIELD_SIZE],
|
|
61
|
+
output_data=str(result)[:MAX_FIELD_SIZE],
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if model:
|
|
65
|
+
ctx.set_model(model, provider)
|
|
66
|
+
|
|
67
|
+
total_prompt, total_completion = _extract_pipeline_tokens(result)
|
|
68
|
+
if total_prompt or total_completion:
|
|
69
|
+
ctx.set_tokens(total_prompt, total_completion)
|
|
70
|
+
|
|
71
|
+
spans = _build_component_spans(pipeline, result)
|
|
72
|
+
if spans:
|
|
73
|
+
emit_child_spans(
|
|
74
|
+
spans,
|
|
75
|
+
run_id=ctx.run_id,
|
|
76
|
+
parent_span_id=ctx.span_id,
|
|
77
|
+
workflow_name=name,
|
|
78
|
+
framework="haystack",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return result
|
|
82
|
+
finally:
|
|
83
|
+
_INSTRUMENTING.reset(token)
|
|
84
|
+
|
|
85
|
+
return _run
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _extract_pipeline_tokens(result: Dict[str, Any]) -> tuple:
|
|
89
|
+
"""Walk component outputs looking for token usage metadata.
|
|
90
|
+
|
|
91
|
+
Haystack generators embed usage in each reply's ``.meta`` dict,
|
|
92
|
+
e.g. ``result["llm"]["replies"][i].meta["usage"]``.
|
|
93
|
+
"""
|
|
94
|
+
total_prompt = 0
|
|
95
|
+
total_completion = 0
|
|
96
|
+
if not isinstance(result, dict):
|
|
97
|
+
return 0, 0
|
|
98
|
+
for output in result.values():
|
|
99
|
+
if not isinstance(output, dict):
|
|
100
|
+
continue
|
|
101
|
+
for val in output.values():
|
|
102
|
+
items = val if isinstance(val, list) else [val]
|
|
103
|
+
for item in items:
|
|
104
|
+
meta = getattr(item, "meta", None)
|
|
105
|
+
if not isinstance(meta, dict):
|
|
106
|
+
if isinstance(item, dict):
|
|
107
|
+
meta = item.get("meta")
|
|
108
|
+
if not isinstance(meta, dict):
|
|
109
|
+
continue
|
|
110
|
+
usage = meta.get("usage", {})
|
|
111
|
+
if isinstance(usage, dict):
|
|
112
|
+
total_prompt += usage.get("prompt_tokens", 0) or 0
|
|
113
|
+
total_completion += usage.get("completion_tokens", 0) or 0
|
|
114
|
+
return total_prompt, total_completion
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _build_component_spans(pipeline, result: Dict[str, Any]) -> list:
|
|
118
|
+
"""Extract per-component spans from the pipeline result."""
|
|
119
|
+
spans = []
|
|
120
|
+
if not isinstance(result, dict):
|
|
121
|
+
return spans
|
|
122
|
+
|
|
123
|
+
graph = getattr(pipeline, "graph", None)
|
|
124
|
+
if graph is None:
|
|
125
|
+
return spans
|
|
126
|
+
|
|
127
|
+
for component_name in graph.nodes:
|
|
128
|
+
if component_name in result:
|
|
129
|
+
component_output = result[component_name]
|
|
130
|
+
component = graph.nodes[component_name].get("instance")
|
|
131
|
+
node_type = "function"
|
|
132
|
+
if component:
|
|
133
|
+
cls_name = type(component).__name__.lower()
|
|
134
|
+
if "llm" in cls_name or "generator" in cls_name or "chatgenerator" in cls_name:
|
|
135
|
+
node_type = "llm"
|
|
136
|
+
elif "retriever" in cls_name:
|
|
137
|
+
node_type = "retriever"
|
|
138
|
+
elif "embedder" in cls_name:
|
|
139
|
+
node_type = "embedder"
|
|
140
|
+
|
|
141
|
+
spans.append({
|
|
142
|
+
"node": component_name,
|
|
143
|
+
"node_type": node_type,
|
|
144
|
+
"status": "success",
|
|
145
|
+
"duration_ms": 0,
|
|
146
|
+
"output": str(component_output)[:MAX_FIELD_SIZE],
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
return spans
|