turingpulse-sdk-haystack 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual environments
7
+ .venv/
8
+ venv/
9
+ ENV/
10
+
11
+ # Distribution / packaging
12
+ dist/
13
+ build/
14
+ *.egg-info/
15
+
16
+ # Database files
17
+ *.db
18
+ *.sqlite3
19
+
20
+ # Environment variables
21
+ .env
22
+ .env.local
23
+
24
+ # IDE
25
+ .idea/
26
+ .vscode/
27
+ *.swp
28
+ *.swo
29
+
30
+ # Testing
31
+ .pytest_cache/
32
+ .coverage
33
+ htmlcov/
34
+ .tox/
35
+
36
+ # Logs
37
+ *.log
38
+ logs/
39
+
40
+ # OS files
41
+ .DS_Store
42
+ Thumbs.db
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: turingpulse-sdk-haystack
3
+ Version: 1.0.0
4
+ Summary: TuringPulse SDK integration for Haystack
5
+ License-Expression: Apache-2.0
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: haystack-ai>=2.24.1
8
+ Requires-Dist: turingpulse-sdk>=1.0.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
11
+ Requires-Dist: pytest>=8.0; extra == 'dev'
@@ -0,0 +1,17 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "turingpulse-sdk-haystack"
7
+ version = "1.0.0"
8
+ description = "TuringPulse SDK integration for Haystack"
9
+ requires-python = ">=3.11"
10
+ license = "Apache-2.0"
11
+ dependencies = [
12
+ "turingpulse-sdk>=1.0.0",
13
+ "haystack-ai>=2.24.1",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ dev = ["pytest>=8.0", "pytest-asyncio>=0.23"]
@@ -0,0 +1,6 @@
1
+ """TuringPulse SDK integration for Haystack."""
2
+
3
+ from ._wrapper import instrument_haystack
4
+
5
+ __version__ = "0.1.0"
6
+ __all__ = ["instrument_haystack"]
@@ -0,0 +1,149 @@
1
+ """Haystack instrumentation for TuringPulse.
2
+
3
+ Wraps ``Pipeline.run()`` to capture component executions,
4
+ model calls, retriever operations, and pipeline structure.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from contextvars import ContextVar
11
+ from typing import Any, Dict, Optional, Sequence
12
+
13
+ from turingpulse_sdk import instrument, GovernanceDirective
14
+ from turingpulse_sdk.config import MAX_FIELD_SIZE
15
+ from turingpulse_sdk.context import current_context
16
+ from turingpulse_sdk.integrations.base import emit_child_spans
17
+
18
+ logger = logging.getLogger("turingpulse.sdk.haystack")
19
+
20
+ _INSTRUMENTING: ContextVar[bool] = ContextVar("_tp_haystack_instrumenting", default=False)
21
+
22
+
23
+ def instrument_haystack(
24
+ pipeline,
25
+ *,
26
+ name: str,
27
+ governance: Optional[GovernanceDirective] = None,
28
+ model: Optional[str] = None,
29
+ provider: str = "openai",
30
+ kpis: Optional[Sequence["KPIConfig"]] = None,
31
+ metadata: Optional[Dict[str, str]] = None,
32
+ ):
33
+ """Wrap a Haystack Pipeline for TuringPulse observability.
34
+
35
+ Returns a callable wrapping ``pipeline.run()``.
36
+
37
+ Args:
38
+ pipeline: A ``haystack.Pipeline`` instance.
39
+ name: Workflow display name.
40
+ governance: Optional governance directive.
41
+ model: LLM model name override.
42
+ provider: LLM provider name.
43
+
44
+ Returns:
45
+ A callable wrapping the pipeline's run method.
46
+ """
47
+
48
+ @instrument(name=name, governance=governance, kpis=kpis, metadata=metadata or {})
49
+ def _run(data: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]:
50
+ token = _INSTRUMENTING.set(True)
51
+ try:
52
+ result = pipeline.run(data, **kwargs)
53
+
54
+ ctx = current_context()
55
+ if ctx:
56
+ ctx.framework = "haystack"
57
+ ctx.node_type = "workflow"
58
+
59
+ ctx.set_io(
60
+ input_data=str(data)[:MAX_FIELD_SIZE],
61
+ output_data=str(result)[:MAX_FIELD_SIZE],
62
+ )
63
+
64
+ if model:
65
+ ctx.set_model(model, provider)
66
+
67
+ total_prompt, total_completion = _extract_pipeline_tokens(result)
68
+ if total_prompt or total_completion:
69
+ ctx.set_tokens(total_prompt, total_completion)
70
+
71
+ spans = _build_component_spans(pipeline, result)
72
+ if spans:
73
+ emit_child_spans(
74
+ spans,
75
+ run_id=ctx.run_id,
76
+ parent_span_id=ctx.span_id,
77
+ workflow_name=name,
78
+ framework="haystack",
79
+ )
80
+
81
+ return result
82
+ finally:
83
+ _INSTRUMENTING.reset(token)
84
+
85
+ return _run
86
+
87
+
88
+ def _extract_pipeline_tokens(result: Dict[str, Any]) -> tuple:
89
+ """Walk component outputs looking for token usage metadata.
90
+
91
+ Haystack generators embed usage in each reply's ``.meta`` dict,
92
+ e.g. ``result["llm"]["replies"][i].meta["usage"]``.
93
+ """
94
+ total_prompt = 0
95
+ total_completion = 0
96
+ if not isinstance(result, dict):
97
+ return 0, 0
98
+ for output in result.values():
99
+ if not isinstance(output, dict):
100
+ continue
101
+ for val in output.values():
102
+ items = val if isinstance(val, list) else [val]
103
+ for item in items:
104
+ meta = getattr(item, "meta", None)
105
+ if not isinstance(meta, dict):
106
+ if isinstance(item, dict):
107
+ meta = item.get("meta")
108
+ if not isinstance(meta, dict):
109
+ continue
110
+ usage = meta.get("usage", {})
111
+ if isinstance(usage, dict):
112
+ total_prompt += usage.get("prompt_tokens", 0) or 0
113
+ total_completion += usage.get("completion_tokens", 0) or 0
114
+ return total_prompt, total_completion
115
+
116
+
117
+ def _build_component_spans(pipeline, result: Dict[str, Any]) -> list:
118
+ """Extract per-component spans from the pipeline result."""
119
+ spans = []
120
+ if not isinstance(result, dict):
121
+ return spans
122
+
123
+ graph = getattr(pipeline, "graph", None)
124
+ if graph is None:
125
+ return spans
126
+
127
+ for component_name in graph.nodes:
128
+ if component_name in result:
129
+ component_output = result[component_name]
130
+ component = graph.nodes[component_name].get("instance")
131
+ node_type = "function"
132
+ if component:
133
+ cls_name = type(component).__name__.lower()
134
+ if "llm" in cls_name or "generator" in cls_name or "chatgenerator" in cls_name:
135
+ node_type = "llm"
136
+ elif "retriever" in cls_name:
137
+ node_type = "retriever"
138
+ elif "embedder" in cls_name:
139
+ node_type = "embedder"
140
+
141
+ spans.append({
142
+ "node": component_name,
143
+ "node_type": node_type,
144
+ "status": "success",
145
+ "duration_ms": 0,
146
+ "output": str(component_output)[:MAX_FIELD_SIZE],
147
+ })
148
+
149
+ return spans