xray-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xray_sdk-0.1.0/PKG-INFO +190 -0
- xray_sdk-0.1.0/README.md +180 -0
- xray_sdk-0.1.0/pyproject.toml +18 -0
- xray_sdk-0.1.0/setup.cfg +4 -0
- xray_sdk-0.1.0/xray_sdk/__init__.py +10 -0
- xray_sdk-0.1.0/xray_sdk/client.py +198 -0
- xray_sdk-0.1.0/xray_sdk/run.py +128 -0
- xray_sdk-0.1.0/xray_sdk/step.py +36 -0
- xray_sdk-0.1.0/xray_sdk.egg-info/PKG-INFO +190 -0
- xray_sdk-0.1.0/xray_sdk.egg-info/SOURCES.txt +11 -0
- xray_sdk-0.1.0/xray_sdk.egg-info/dependency_links.txt +1 -0
- xray_sdk-0.1.0/xray_sdk.egg-info/requires.txt +1 -0
- xray_sdk-0.1.0/xray_sdk.egg-info/top_level.txt +1 -0
xray_sdk-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xray-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lightweight debugging SDK for multi-step AI pipelines.
|
|
5
|
+
Author: Equal Collective
|
|
6
|
+
License: Proprietary
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: requests>=2.31.0
|
|
10
|
+
|
|
11
|
+
# X-Ray SDK and API
|
|
12
|
+
|
|
13
|
+
A lightweight debugging system for multi-step AI pipelines that captures execution data and uses AI to identify faulty steps.
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
### 1. Install Dependencies
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip3 install flask flask-sqlalchemy flask-cors psycopg2-binary openai python-dotenv requests
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### 2. Configure Environment
|
|
24
|
+
|
|
25
|
+
Create a `.env` file:
|
|
26
|
+
```env
|
|
27
|
+
DATABASE_URL=postgresql://user:pass@host/dbname
|
|
28
|
+
CEREBRAS_API_KEY=your-api-key
|
|
29
|
+
CEREBRAS_BASE_URL=https://api.cerebras.ai/v1
|
|
30
|
+
CEREBRAS_MODEL=llama3.1-8b
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### 3. Initialize Database
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
python3 -c "
|
|
37
|
+
from dotenv import load_dotenv
|
|
38
|
+
load_dotenv()
|
|
39
|
+
from xray_api.app import create_app
|
|
40
|
+
from xray_api.models import db
|
|
41
|
+
|
|
42
|
+
app = create_app()
|
|
43
|
+
with app.app_context():
|
|
44
|
+
db.create_all()
|
|
45
|
+
print('Database tables created!')
|
|
46
|
+
"
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### 4. Start the API Server
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
python3 -m xray_api.app
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### 5. Run Example
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
python3 examples/amazon_competitor.py
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## SDK Usage
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from xray_sdk import XRayClient, XRayRun, XRayStep
|
|
65
|
+
|
|
66
|
+
# Create a run
|
|
67
|
+
run = XRayRun("my_pipeline", metadata={"context": "test"}, sample_size=50)
|
|
68
|
+
|
|
69
|
+
# Add steps (after your pipeline executes)
|
|
70
|
+
run.add_step(XRayStep(
|
|
71
|
+
name="keyword_generation",
|
|
72
|
+
order=1,
|
|
73
|
+
inputs={"title": "Phone Case"},
|
|
74
|
+
outputs={"keywords": ["phone case", "iphone"]},
|
|
75
|
+
description="Generate search keywords from the title."# explain what this step does
|
|
76
|
+
))
|
|
77
|
+
|
|
78
|
+
run.add_step(XRayStep(
|
|
79
|
+
name="search",
|
|
80
|
+
order=2,
|
|
81
|
+
inputs={"keywords": ["phone case", "iphone"]},
|
|
82
|
+
outputs={"candidates_count": 100},
|
|
83
|
+
description="Search the catalog for items matching the keywords."
|
|
84
|
+
))
|
|
85
|
+
|
|
86
|
+
run.add_step(XRayStep(
|
|
87
|
+
name="filter",
|
|
88
|
+
order=3,
|
|
89
|
+
inputs={"candidates_count": 100},
|
|
90
|
+
outputs={"filtered_count": 5},
|
|
91
|
+
description="Filter candidates by rating.",
|
|
92
|
+
reasons={"dropped_items": [{"id": 123, "reason": "low rating"}]},
|
|
93
|
+
metrics={"elimination_rate": 0.95}
|
|
94
|
+
))
|
|
95
|
+
|
|
96
|
+
# Send for analysis
|
|
97
|
+
client = XRayClient("http://localhost:5000")
|
|
98
|
+
result = client.send(run)
|
|
99
|
+
|
|
100
|
+
print(result["analysis"])
|
|
101
|
+
# {
|
|
102
|
+
# "faulty_step": "keyword_generation",
|
|
103
|
+
# "reason": "...",
|
|
104
|
+
# "suggestion": "..."
|
|
105
|
+
# }
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## SDK Client Methods
|
|
109
|
+
|
|
110
|
+
| Method | Description |
|
|
111
|
+
|--------|-------------|
|
|
112
|
+
| `send(run, analyze=True)` | Send run to API; spools locally if unavailable |
|
|
113
|
+
| `spool(run)` | Manually save run to `.xray_spool/` |
|
|
114
|
+
| `flush_spool()` | Send newest spooled run and delete all spool files |
|
|
115
|
+
| `list_pipelines()` | List all pipelines |
|
|
116
|
+
| `list_runs(pipeline, status, limit)` | List runs with filters |
|
|
117
|
+
| `get_run(run_id)` | Get run with all steps |
|
|
118
|
+
| `get_analysis(run_id)` | Get analysis result only |
|
|
119
|
+
| `search_steps(step_name, pipeline, limit)` | Search steps across runs |
|
|
120
|
+
|
|
121
|
+
## API Endpoints
|
|
122
|
+
|
|
123
|
+
POST
|
|
124
|
+
- `/api/ingest`: Store a run and, by default, trigger analysis (`analyze=false` to skip)
|
|
125
|
+
- `/api/analyze/<id>`: Re-trigger analysis for an existing run
|
|
126
|
+
|
|
127
|
+
GET
|
|
128
|
+
- `/api/runs`: List runs (filter by pipeline/status)
|
|
129
|
+
- `/api/runs/<id>`: Get a run with all steps
|
|
130
|
+
- `/api/runs/<id>/analysis`: Get analysis only for a run
|
|
131
|
+
- `/api/pipelines`: List pipelines
|
|
132
|
+
- `/api/search/steps`: Search steps by name/pipeline
|
|
133
|
+
- `/health`: Health check
|
|
134
|
+
|
|
135
|
+
## Project Structure
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
├── xray_sdk/ # Python SDK
|
|
139
|
+
│ ├── step.py # XRayStep dataclass
|
|
140
|
+
│ ├── run.py # XRayRun with auto-summarization
|
|
141
|
+
│ └── client.py # HTTP client with spool fallback
|
|
142
|
+
├── xray_api/ # Flask API
|
|
143
|
+
│ ├── app.py # Flask entry point
|
|
144
|
+
│ ├── models.py # Database models
|
|
145
|
+
│ ├── routes/ # API endpoints
|
|
146
|
+
│ └── agents/ # Cerebras AI analyzer
|
|
147
|
+
├── examples/ # Example scripts
|
|
148
|
+
├── ARCHITECTURE.md # Detailed architecture doc
|
|
149
|
+
└── requirements.txt # Dependencies
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Features
|
|
153
|
+
|
|
154
|
+
- **End-of-pipeline integration**: Add steps as your pipeline runs, send at the end
|
|
155
|
+
- **Deterministic summarization**: Large outputs are summarized with head/tail sampling for reproducible debugging
|
|
156
|
+
- **Spool fallback**: If API is down, saves to `.xray_spool/` for later submission
|
|
157
|
+
- **Step intent hints**: Optional one-line descriptions per step improve semantic analysis
|
|
158
|
+
- **Server-side safety net**: The API summarizes oversized inputs/outputs if a client skips SDK summarization
|
|
159
|
+
- **AI-powered analysis**: Uses Cerebras LLM with a 2-step sliding window when needed to identify semantic mismatches and faulty steps
|
|
160
|
+
|
|
161
|
+
## Approach
|
|
162
|
+
|
|
163
|
+
The system is designed around these key principles:
|
|
164
|
+
1. **Minimal Integration Burden**: The SDK requires only wrapping each step's inputs/outputs after execution. Users can enrich this data with optional descriptions for both the pipeline and individual steps, making the system extensible and allowing the AI to understand the intent behind any domain-specific logic without requiring code changes.
|
|
165
|
+
|
|
166
|
+
2. **Sliding Window Analysis**: Instead of sending entire pipelines to the LLM (which can exceed token limits), we analyze 2 consecutive steps at a time. This keeps prompts under 65K tokens while still detecting data flow issues between adjacent steps.
|
|
167
|
+
|
|
168
|
+
3. **Semantic Context via Descriptions**: Pipeline and step descriptions tell the LLM what *type* of pipeline (e-commerce, document processing, etc.) and what each step *should* do. This helps detect semantic mismatches beyond just structural data flow.
|
|
169
|
+
|
|
170
|
+
4. **Deterministic Summarization**: Large outputs (500+ items) are summarized using head/tail sampling (first N + last N items). This is deterministic and preserves edge cases that often reveal bugs.
|
|
171
|
+
|
|
172
|
+
5. **Graceful Degradation**: If the API is unavailable, runs are spooled locally and can be flushed later with `client.flush_spool()`.
|
|
173
|
+
|
|
174
|
+
## Known Limitations
|
|
175
|
+
|
|
176
|
+
- **No cross-window context**: When analyzing step 3→4, the LLM doesn't see steps 1→2. Issues that span multiple transitions may be missed, if they are not detected somehow at previous step.
|
|
177
|
+
|
|
178
|
+
- **Single LLM provider**: Currently only supports Cerebras API. Other providers require code changes.
|
|
179
|
+
|
|
180
|
+
- **Summarization loses detail**: Very large payloads are aggressively trimmed. Some bugs may be hidden in truncated data.
|
|
181
|
+
|
|
182
|
+
## Future Improvements
|
|
183
|
+
|
|
184
|
+
- **Docker image**: Pre-built container for one-command local setup - developers just run `docker compose up` instead of installing packages, databases, etc.
|
|
185
|
+
- **Local LLM support**: Run lightweight local models (e.g., Ollama, llama.cpp) to eliminate third-party API dependency and reduce costs
|
|
186
|
+
- **Multi-LLM support**: Add OpenAI, Anthropic, and other cloud providers via configurable adapters
|
|
187
|
+
- **Pipeline-level summary**: Generate a one-pass summary of the entire pipeline before window analysis
|
|
188
|
+
- **Streaming results**: Return partial analysis as each window completes
|
|
189
|
+
- **Web dashboard**: Visual timeline of pipeline runs with highlighted faulty steps
|
|
190
|
+
- **Comparison mode**: Compare two runs of the same pipeline to spot regressions
|
xray_sdk-0.1.0/README.md
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# X-Ray SDK and API
|
|
2
|
+
|
|
3
|
+
A lightweight debugging system for multi-step AI pipelines that captures execution data and uses AI to identify faulty steps.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
### 1. Install Dependencies
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip3 install flask flask-sqlalchemy flask-cors psycopg2-binary openai python-dotenv requests
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
### 2. Configure Environment
|
|
14
|
+
|
|
15
|
+
Create a `.env` file:
|
|
16
|
+
```env
|
|
17
|
+
DATABASE_URL=postgresql://user:pass@host/dbname
|
|
18
|
+
CEREBRAS_API_KEY=your-api-key
|
|
19
|
+
CEREBRAS_BASE_URL=https://api.cerebras.ai/v1
|
|
20
|
+
CEREBRAS_MODEL=llama3.1-8b
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### 3. Initialize Database
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
python3 -c "
|
|
27
|
+
from dotenv import load_dotenv
|
|
28
|
+
load_dotenv()
|
|
29
|
+
from xray_api.app import create_app
|
|
30
|
+
from xray_api.models import db
|
|
31
|
+
|
|
32
|
+
app = create_app()
|
|
33
|
+
with app.app_context():
|
|
34
|
+
db.create_all()
|
|
35
|
+
print('Database tables created!')
|
|
36
|
+
"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### 4. Start the API Server
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
python3 -m xray_api.app
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### 5. Run Example
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
python3 examples/amazon_competitor.py
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## SDK Usage
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from xray_sdk import XRayClient, XRayRun, XRayStep
|
|
55
|
+
|
|
56
|
+
# Create a run
|
|
57
|
+
run = XRayRun("my_pipeline", metadata={"context": "test"}, sample_size=50)
|
|
58
|
+
|
|
59
|
+
# Add steps (after your pipeline executes)
|
|
60
|
+
run.add_step(XRayStep(
|
|
61
|
+
name="keyword_generation",
|
|
62
|
+
order=1,
|
|
63
|
+
inputs={"title": "Phone Case"},
|
|
64
|
+
outputs={"keywords": ["phone case", "iphone"]},
|
|
65
|
+
description="Generate search keywords from the title."# explain what this step does
|
|
66
|
+
))
|
|
67
|
+
|
|
68
|
+
run.add_step(XRayStep(
|
|
69
|
+
name="search",
|
|
70
|
+
order=2,
|
|
71
|
+
inputs={"keywords": ["phone case", "iphone"]},
|
|
72
|
+
outputs={"candidates_count": 100},
|
|
73
|
+
description="Search the catalog for items matching the keywords."
|
|
74
|
+
))
|
|
75
|
+
|
|
76
|
+
run.add_step(XRayStep(
|
|
77
|
+
name="filter",
|
|
78
|
+
order=3,
|
|
79
|
+
inputs={"candidates_count": 100},
|
|
80
|
+
outputs={"filtered_count": 5},
|
|
81
|
+
description="Filter candidates by rating.",
|
|
82
|
+
reasons={"dropped_items": [{"id": 123, "reason": "low rating"}]},
|
|
83
|
+
metrics={"elimination_rate": 0.95}
|
|
84
|
+
))
|
|
85
|
+
|
|
86
|
+
# Send for analysis
|
|
87
|
+
client = XRayClient("http://localhost:5000")
|
|
88
|
+
result = client.send(run)
|
|
89
|
+
|
|
90
|
+
print(result["analysis"])
|
|
91
|
+
# {
|
|
92
|
+
# "faulty_step": "keyword_generation",
|
|
93
|
+
# "reason": "...",
|
|
94
|
+
# "suggestion": "..."
|
|
95
|
+
# }
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## SDK Client Methods
|
|
99
|
+
|
|
100
|
+
| Method | Description |
|
|
101
|
+
|--------|-------------|
|
|
102
|
+
| `send(run, analyze=True)` | Send run to API; spools locally if unavailable |
|
|
103
|
+
| `spool(run)` | Manually save run to `.xray_spool/` |
|
|
104
|
+
| `flush_spool()` | Send newest spooled run and delete all spool files |
|
|
105
|
+
| `list_pipelines()` | List all pipelines |
|
|
106
|
+
| `list_runs(pipeline, status, limit)` | List runs with filters |
|
|
107
|
+
| `get_run(run_id)` | Get run with all steps |
|
|
108
|
+
| `get_analysis(run_id)` | Get analysis result only |
|
|
109
|
+
| `search_steps(step_name, pipeline, limit)` | Search steps across runs |
|
|
110
|
+
|
|
111
|
+
## API Endpoints
|
|
112
|
+
|
|
113
|
+
POST
|
|
114
|
+
- `/api/ingest`: Store a run and, by default, trigger analysis (`analyze=false` to skip)
|
|
115
|
+
- `/api/analyze/<id>`: Re-trigger analysis for an existing run
|
|
116
|
+
|
|
117
|
+
GET
|
|
118
|
+
- `/api/runs`: List runs (filter by pipeline/status)
|
|
119
|
+
- `/api/runs/<id>`: Get a run with all steps
|
|
120
|
+
- `/api/runs/<id>/analysis`: Get analysis only for a run
|
|
121
|
+
- `/api/pipelines`: List pipelines
|
|
122
|
+
- `/api/search/steps`: Search steps by name/pipeline
|
|
123
|
+
- `/health`: Health check
|
|
124
|
+
|
|
125
|
+
## Project Structure
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
├── xray_sdk/ # Python SDK
|
|
129
|
+
│ ├── step.py # XRayStep dataclass
|
|
130
|
+
│ ├── run.py # XRayRun with auto-summarization
|
|
131
|
+
│ └── client.py # HTTP client with spool fallback
|
|
132
|
+
├── xray_api/ # Flask API
|
|
133
|
+
│ ├── app.py # Flask entry point
|
|
134
|
+
│ ├── models.py # Database models
|
|
135
|
+
│ ├── routes/ # API endpoints
|
|
136
|
+
│ └── agents/ # Cerebras AI analyzer
|
|
137
|
+
├── examples/ # Example scripts
|
|
138
|
+
├── ARCHITECTURE.md # Detailed architecture doc
|
|
139
|
+
└── requirements.txt # Dependencies
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Features
|
|
143
|
+
|
|
144
|
+
- **End-of-pipeline integration**: Add steps as your pipeline runs, send at the end
|
|
145
|
+
- **Deterministic summarization**: Large outputs are summarized with head/tail sampling for reproducible debugging
|
|
146
|
+
- **Spool fallback**: If API is down, saves to `.xray_spool/` for later submission
|
|
147
|
+
- **Step intent hints**: Optional one-line descriptions per step improve semantic analysis
|
|
148
|
+
- **Server-side safety net**: The API summarizes oversized inputs/outputs if a client skips SDK summarization
|
|
149
|
+
- **AI-powered analysis**: Uses Cerebras LLM with a 2-step sliding window when needed to identify semantic mismatches and faulty steps
|
|
150
|
+
|
|
151
|
+
## Approach
|
|
152
|
+
|
|
153
|
+
The system is designed around these key principles:
|
|
154
|
+
1. **Minimal Integration Burden**: The SDK requires only wrapping each step's inputs/outputs after execution. Users can enrich this data with optional descriptions for both the pipeline and individual steps, making the system extensible and allowing the AI to understand the intent behind any domain-specific logic without requiring code changes.
|
|
155
|
+
|
|
156
|
+
2. **Sliding Window Analysis**: Instead of sending entire pipelines to the LLM (which can exceed token limits), we analyze 2 consecutive steps at a time. This keeps prompts under 65K tokens while still detecting data flow issues between adjacent steps.
|
|
157
|
+
|
|
158
|
+
3. **Semantic Context via Descriptions**: Pipeline and step descriptions tell the LLM what *type* of pipeline (e-commerce, document processing, etc.) and what each step *should* do. This helps detect semantic mismatches beyond just structural data flow.
|
|
159
|
+
|
|
160
|
+
4. **Deterministic Summarization**: Large outputs (500+ items) are summarized using head/tail sampling (first N + last N items). This is deterministic and preserves edge cases that often reveal bugs.
|
|
161
|
+
|
|
162
|
+
5. **Graceful Degradation**: If the API is unavailable, runs are spooled locally and can be flushed later with `client.flush_spool()`.
|
|
163
|
+
|
|
164
|
+
## Known Limitations
|
|
165
|
+
|
|
166
|
+
- **No cross-window context**: When analyzing step 3→4, the LLM doesn't see steps 1→2. Issues that span multiple transitions may be missed, if they are not detected somehow at previous step.
|
|
167
|
+
|
|
168
|
+
- **Single LLM provider**: Currently only supports Cerebras API. Other providers require code changes.
|
|
169
|
+
|
|
170
|
+
- **Summarization loses detail**: Very large payloads are aggressively trimmed. Some bugs may be hidden in truncated data.
|
|
171
|
+
|
|
172
|
+
## Future Improvements
|
|
173
|
+
|
|
174
|
+
- **Docker image**: Pre-built container for one-command local setup - developers just run `docker compose up` instead of installing packages, databases, etc.
|
|
175
|
+
- **Local LLM support**: Run lightweight local models (e.g., Ollama, llama.cpp) to eliminate third-party API dependency and reduce costs
|
|
176
|
+
- **Multi-LLM support**: Add OpenAI, Anthropic, and other cloud providers via configurable adapters
|
|
177
|
+
- **Pipeline-level summary**: Generate a one-pass summary of the entire pipeline before window analysis
|
|
178
|
+
- **Streaming results**: Return partial analysis as each window completes
|
|
179
|
+
- **Web dashboard**: Visual timeline of pipeline runs with highlighted faulty steps
|
|
180
|
+
- **Comparison mode**: Compare two runs of the same pipeline to spot regressions
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "xray-sdk"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Lightweight debugging SDK for multi-step AI pipelines."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "Proprietary" }
|
|
12
|
+
authors = [{ name = "Equal Collective" }]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"requests>=2.31.0",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[tool.setuptools.packages.find]
|
|
18
|
+
include = ["xray_sdk"]
|
xray_sdk-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""
|
|
2
|
+
XRayClient - Sends run data to the X-Ray API for analysis
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
import requests
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, Any, Optional
|
|
10
|
+
from .run import XRayRun
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class XRayClient:
|
|
14
|
+
"""
|
|
15
|
+
Client for sending pipeline runs to the X-Ray API.
|
|
16
|
+
|
|
17
|
+
Features:
|
|
18
|
+
- Sends run data to API for AI-powered analysis
|
|
19
|
+
- Spools to local file if API is unavailable
|
|
20
|
+
- Supports API key authentication
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
DEFAULT_SPOOL_DIR = ".xray_spool"
|
|
24
|
+
|
|
25
|
+
def __init__(self, api_url: str, api_key: Optional[str] = None, timeout: int = 180):
|
|
26
|
+
"""
|
|
27
|
+
Initialize the X-Ray client.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
api_url: Base URL of the X-Ray API (e.g., "http://localhost:5000")
|
|
31
|
+
api_key: Optional API key for authentication (required if server has XRAY_API_KEY set)
|
|
32
|
+
timeout: Request timeout in seconds (default: 180 for LLM analysis)
|
|
33
|
+
"""
|
|
34
|
+
self.api_url = api_url.rstrip("/")
|
|
35
|
+
self.api_key = api_key
|
|
36
|
+
self.timeout = timeout
|
|
37
|
+
|
|
38
|
+
def _headers(self) -> Dict[str, str]:
|
|
39
|
+
"""Build headers for API requests."""
|
|
40
|
+
headers = {"Content-Type": "application/json"}
|
|
41
|
+
if self.api_key:
|
|
42
|
+
headers["X-API-Key"] = self.api_key
|
|
43
|
+
return headers
|
|
44
|
+
|
|
45
|
+
def send(self, run: XRayRun, analyze: bool = True) -> Dict[str, Any]:
|
|
46
|
+
"""
|
|
47
|
+
Send a run to the X-Ray API for storage and optional analysis.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
run: The XRayRun to send
|
|
51
|
+
analyze: Whether to trigger AI analysis (default: True)
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
API response with run_id and analysis result (if requested)
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
requests.exceptions.RequestException: If API call fails
|
|
58
|
+
"""
|
|
59
|
+
payload = run.to_dict()
|
|
60
|
+
payload["analyze"] = analyze
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
response = requests.post(
|
|
64
|
+
f"{self.api_url}/api/ingest",
|
|
65
|
+
json=payload,
|
|
66
|
+
headers=self._headers(),
|
|
67
|
+
timeout=self.timeout
|
|
68
|
+
)
|
|
69
|
+
response.raise_for_status()
|
|
70
|
+
return response.json()
|
|
71
|
+
except requests.exceptions.RequestException as e:
|
|
72
|
+
# Spool locally if API unavailable
|
|
73
|
+
spool_path = self.spool(run)
|
|
74
|
+
return {
|
|
75
|
+
"error": str(e),
|
|
76
|
+
"spooled": True,
|
|
77
|
+
"spool_path": str(spool_path)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def spool(self, run: XRayRun, spool_dir: Optional[str] = None) -> Path:
|
|
81
|
+
"""
|
|
82
|
+
Save run data to local file for later submission.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
run: The XRayRun to spool
|
|
86
|
+
spool_dir: Directory to save files (default: .xray_spool)
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Path to the spooled file
|
|
90
|
+
"""
|
|
91
|
+
spool_dir = Path(spool_dir or self.DEFAULT_SPOOL_DIR)
|
|
92
|
+
spool_dir.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
|
|
94
|
+
# Generate filename with timestamp
|
|
95
|
+
import datetime
|
|
96
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
97
|
+
filename = f"{run.pipeline_name}_{timestamp}.json"
|
|
98
|
+
filepath = spool_dir / filename
|
|
99
|
+
|
|
100
|
+
with open(filepath, "w") as f:
|
|
101
|
+
json.dump(run.to_dict(), f, indent=2, default=str)
|
|
102
|
+
|
|
103
|
+
return filepath
|
|
104
|
+
|
|
105
|
+
def list_pipelines(self) -> Dict[str, Any]:
|
|
106
|
+
"""List all pipelines."""
|
|
107
|
+
response = requests.get(f"{self.api_url}/api/pipelines", headers=self._headers(), timeout=self.timeout)
|
|
108
|
+
response.raise_for_status()
|
|
109
|
+
return response.json()
|
|
110
|
+
|
|
111
|
+
def list_runs(
|
|
112
|
+
self,
|
|
113
|
+
pipeline: Optional[str] = None,
|
|
114
|
+
status: Optional[str] = None,
|
|
115
|
+
limit: int = 50,
|
|
116
|
+
) -> Dict[str, Any]:
|
|
117
|
+
"""List runs with optional filters."""
|
|
118
|
+
params = {"limit": limit}
|
|
119
|
+
if pipeline:
|
|
120
|
+
params["pipeline"] = pipeline
|
|
121
|
+
if status:
|
|
122
|
+
params["status"] = status
|
|
123
|
+
response = requests.get(f"{self.api_url}/api/runs", params=params, headers=self._headers(), timeout=self.timeout)
|
|
124
|
+
response.raise_for_status()
|
|
125
|
+
return response.json()
|
|
126
|
+
|
|
127
|
+
def get_run(self, run_id: str) -> Dict[str, Any]:
|
|
128
|
+
"""Get a single run with all its steps."""
|
|
129
|
+
response = requests.get(f"{self.api_url}/api/runs/{run_id}", headers=self._headers(), timeout=self.timeout)
|
|
130
|
+
response.raise_for_status()
|
|
131
|
+
return response.json()
|
|
132
|
+
|
|
133
|
+
def get_analysis(self, run_id: str) -> Dict[str, Any]:
|
|
134
|
+
"""Get analysis result for a run."""
|
|
135
|
+
response = requests.get(f"{self.api_url}/api/runs/{run_id}/analysis", headers=self._headers(), timeout=self.timeout)
|
|
136
|
+
response.raise_for_status()
|
|
137
|
+
return response.json()
|
|
138
|
+
|
|
139
|
+
def search_steps(
|
|
140
|
+
self,
|
|
141
|
+
step_name: Optional[str] = None,
|
|
142
|
+
pipeline: Optional[str] = None,
|
|
143
|
+
limit: int = 50,
|
|
144
|
+
) -> Dict[str, Any]:
|
|
145
|
+
"""Search steps across runs."""
|
|
146
|
+
params = {"limit": limit}
|
|
147
|
+
if step_name:
|
|
148
|
+
params["step_name"] = step_name
|
|
149
|
+
if pipeline:
|
|
150
|
+
params["pipeline"] = pipeline
|
|
151
|
+
response = requests.get(f"{self.api_url}/api/search/steps", params=params, headers=self._headers(), timeout=self.timeout)
|
|
152
|
+
response.raise_for_status()
|
|
153
|
+
return response.json()
|
|
154
|
+
|
|
155
|
+
def flush_spool(self, spool_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
156
|
+
"""
|
|
157
|
+
Send the newest spooled run to the API and delete all spooled files.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
spool_dir: Directory containing spooled files
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Summary of flush results
|
|
164
|
+
"""
|
|
165
|
+
spool_dir = Path(spool_dir or self.DEFAULT_SPOOL_DIR)
|
|
166
|
+
if not spool_dir.exists():
|
|
167
|
+
return {"flushed": 0, "failed": 0}
|
|
168
|
+
|
|
169
|
+
files = list(spool_dir.glob("*.json"))
|
|
170
|
+
if not files:
|
|
171
|
+
return {"flushed": 0, "failed": 0}
|
|
172
|
+
|
|
173
|
+
newest = max(files, key=lambda p: p.stat().st_mtime)
|
|
174
|
+
results = {"flushed": 0, "failed": 0, "errors": [], "sent_file": str(newest)}
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
with open(newest) as f:
|
|
178
|
+
data = json.load(f)
|
|
179
|
+
|
|
180
|
+
response = requests.post(
|
|
181
|
+
f"{self.api_url}/api/ingest",
|
|
182
|
+
json=data,
|
|
183
|
+
headers=self._headers(),
|
|
184
|
+
timeout=self.timeout
|
|
185
|
+
)
|
|
186
|
+
response.raise_for_status()
|
|
187
|
+
response_json = response.json()
|
|
188
|
+
results["flushed"] = 1
|
|
189
|
+
results["response"] = response_json
|
|
190
|
+
|
|
191
|
+
# Delete all spooled files after successful send of newest.
|
|
192
|
+
for filepath in files:
|
|
193
|
+
filepath.unlink()
|
|
194
|
+
except Exception as e:
|
|
195
|
+
results["failed"] = 1
|
|
196
|
+
results["errors"].append({"file": str(newest), "error": str(e)})
|
|
197
|
+
|
|
198
|
+
return results
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""
|
|
2
|
+
XRayRun - Represents a complete pipeline execution with multiple steps
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import List, Dict, Any, Optional
|
|
7
|
+
from .step import XRayStep
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class XRayRun:
|
|
11
|
+
"""
|
|
12
|
+
A complete run of a pipeline, containing multiple steps.
|
|
13
|
+
|
|
14
|
+
Automatically summarizes large inputs/outputs to prevent token limit issues.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
MAX_PAYLOAD_SIZE = 80000 # chars per step side (~20K tokens) - 2 steps = ~40K tokens, safely under 65K limit
|
|
18
|
+
SAMPLE_SIZE = 100 # initial sample size per large list
|
|
19
|
+
MIN_SAMPLE_SIZE = 10 # floor for aggressive trimming when still oversized
|
|
20
|
+
STRING_TRUNCATE = 2000 # truncate very long strings to this many chars
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
pipeline_name: str,
|
|
25
|
+
description: Optional[str] = None,
|
|
26
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
27
|
+
sample_size: Optional[int] = None,
|
|
28
|
+
):
|
|
29
|
+
"""
|
|
30
|
+
Initialize a new run.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
pipeline_name: Name of the pipeline (e.g., "competitor_selection")
|
|
34
|
+
description: Optional description of what this pipeline does (helps AI analysis)
|
|
35
|
+
metadata: Optional metadata about this run (e.g., {"product_id": "123"})
|
|
36
|
+
sample_size: Optional override for summarization sample size
|
|
37
|
+
"""
|
|
38
|
+
self.pipeline_name = pipeline_name
|
|
39
|
+
self.description = description or ""
|
|
40
|
+
self.metadata = metadata or {}
|
|
41
|
+
if sample_size is None:
|
|
42
|
+
self.sample_size = self.SAMPLE_SIZE
|
|
43
|
+
else:
|
|
44
|
+
self.sample_size = max(1, sample_size)
|
|
45
|
+
self.steps: List[XRayStep] = []
|
|
46
|
+
|
|
47
|
+
def add_step(self, step: XRayStep) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Add a step to this run. Auto-summarizes large outputs.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
step: The XRayStep to add
|
|
53
|
+
"""
|
|
54
|
+
step.inputs = self._ensure_within_budget(step.inputs)
|
|
55
|
+
step.outputs = self._ensure_within_budget(step.outputs)
|
|
56
|
+
|
|
57
|
+
self.steps.append(step)
|
|
58
|
+
|
|
59
|
+
def _ensure_within_budget(self, data: Any) -> Any:
|
|
60
|
+
"""Summarize data if it exceeds MAX_PAYLOAD_SIZE."""
|
|
61
|
+
if data is None:
|
|
62
|
+
return {}
|
|
63
|
+
try:
|
|
64
|
+
size = len(json.dumps(data, default=str))
|
|
65
|
+
except Exception:
|
|
66
|
+
size = self.MAX_PAYLOAD_SIZE + 1 # force summarization if not serializable
|
|
67
|
+
if size <= self.MAX_PAYLOAD_SIZE:
|
|
68
|
+
return data
|
|
69
|
+
# Log summarization
|
|
70
|
+
print(f" [SDK] Summarizing large payload: {size} chars -> MAX {self.MAX_PAYLOAD_SIZE} chars")
|
|
71
|
+
summarized = self._summarize_with_budget(data)
|
|
72
|
+
new_size = len(json.dumps(summarized, default=str))
|
|
73
|
+
print(f" [SDK] Summarization complete: {size} -> {new_size} chars")
|
|
74
|
+
return summarized
|
|
75
|
+
|
|
76
|
+
def _summarize_with_budget(self, data: Any) -> Any:
|
|
77
|
+
"""Iteratively summarize until payload fits under MAX_PAYLOAD_SIZE."""
|
|
78
|
+
sample_size = self.sample_size
|
|
79
|
+
summarized = data
|
|
80
|
+
while True:
|
|
81
|
+
summarized = self._summarize_once(summarized, sample_size)
|
|
82
|
+
size = len(json.dumps(summarized, default=str))
|
|
83
|
+
if size <= self.MAX_PAYLOAD_SIZE or sample_size <= self.MIN_SAMPLE_SIZE:
|
|
84
|
+
return summarized
|
|
85
|
+
sample_size = max(self.MIN_SAMPLE_SIZE, sample_size // 2)
|
|
86
|
+
|
|
87
|
+
def _summarize_once(self, data: Any, sample_size: int) -> Any:
|
|
88
|
+
"""One-pass summarization with recursion and string truncation."""
|
|
89
|
+
if isinstance(data, dict):
|
|
90
|
+
summarized = {}
|
|
91
|
+
for key, value in data.items():
|
|
92
|
+
if isinstance(value, list):
|
|
93
|
+
summarized_list, total_count = self._summarize_list(value, sample_size)
|
|
94
|
+
summarized[key] = summarized_list
|
|
95
|
+
if total_count is not None:
|
|
96
|
+
summarized[f"{key}_total_count"] = total_count
|
|
97
|
+
else:
|
|
98
|
+
summarized[key] = self._summarize_once(value, sample_size)
|
|
99
|
+
return summarized
|
|
100
|
+
if isinstance(data, list):
|
|
101
|
+
summarized_list, _ = self._summarize_list(data, sample_size)
|
|
102
|
+
return summarized_list
|
|
103
|
+
if isinstance(data, str) and len(data) > self.STRING_TRUNCATE:
|
|
104
|
+
overflow = len(data) - self.STRING_TRUNCATE
|
|
105
|
+
return f"{data[:self.STRING_TRUNCATE]}...[truncated {overflow} chars]"
|
|
106
|
+
return data
|
|
107
|
+
|
|
108
|
+
def _summarize_list(self, items: List[Any], sample_size: int):
|
|
109
|
+
"""Summarize a list: sample if large, recurse into elements."""
|
|
110
|
+
total_count = None
|
|
111
|
+
if len(items) > sample_size:
|
|
112
|
+
total_count = len(items)
|
|
113
|
+
head_count = sample_size // 2
|
|
114
|
+
tail_count = sample_size - head_count
|
|
115
|
+
items = items[:head_count] + items[-tail_count:]
|
|
116
|
+
return [self._summarize_once(item, sample_size) for item in items], total_count
|
|
117
|
+
|
|
118
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
119
|
+
"""Convert run to dictionary for JSON serialization"""
|
|
120
|
+
return {
|
|
121
|
+
"pipeline_name": self.pipeline_name,
|
|
122
|
+
"pipeline_description": self.description,
|
|
123
|
+
"metadata": self.metadata,
|
|
124
|
+
"steps": [step.to_dict() for step in self.steps]
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
def __repr__(self) -> str:
|
|
128
|
+
return f"XRayRun(pipeline='{self.pipeline_name}', steps={len(self.steps)})"
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""
|
|
2
|
+
XRayStep - Represents a single step in a pipeline execution
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field, asdict
|
|
6
|
+
from typing import Dict, Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class XRayStep:
|
|
11
|
+
"""
|
|
12
|
+
A single step in a pipeline execution.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
name: Step identifier (e.g., "keyword_generation", "filter", "rank")
|
|
16
|
+
order: Step sequence number (1, 2, 3, ...)
|
|
17
|
+
inputs: What was fed to this step (any JSON-serializable dict)
|
|
18
|
+
outputs: What this step produced (any JSON-serializable dict)
|
|
19
|
+
description: Optional one-line summary of the step's intent
|
|
20
|
+
reasons: Optional dict for rejections or drops
|
|
21
|
+
metrics: Optional dict for step-level metrics
|
|
22
|
+
"""
|
|
23
|
+
name: str
|
|
24
|
+
order: int
|
|
25
|
+
inputs: Dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
outputs: Dict[str, Any] = field(default_factory=dict)
|
|
27
|
+
description: str = ""
|
|
28
|
+
reasons: Dict[str, Any] = field(default_factory=dict)
|
|
29
|
+
metrics: Dict[str, Any] = field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
32
|
+
"""Convert step to dictionary for JSON serialization"""
|
|
33
|
+
return asdict(self)
|
|
34
|
+
|
|
35
|
+
def __repr__(self) -> str:
|
|
36
|
+
return f"XRayStep(name='{self.name}', order={self.order})"
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xray-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lightweight debugging SDK for multi-step AI pipelines.
|
|
5
|
+
Author: Equal Collective
|
|
6
|
+
License: Proprietary
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: requests>=2.31.0
|
|
10
|
+
|
|
11
|
+
# X-Ray SDK and API
|
|
12
|
+
|
|
13
|
+
A lightweight debugging system for multi-step AI pipelines that captures execution data and uses AI to identify faulty steps.
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
### 1. Install Dependencies
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip3 install flask flask-sqlalchemy flask-cors psycopg2-binary openai python-dotenv requests
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### 2. Configure Environment
|
|
24
|
+
|
|
25
|
+
Create a `.env` file:
|
|
26
|
+
```env
|
|
27
|
+
DATABASE_URL=postgresql://user:pass@host/dbname
|
|
28
|
+
CEREBRAS_API_KEY=your-api-key
|
|
29
|
+
CEREBRAS_BASE_URL=https://api.cerebras.ai/v1
|
|
30
|
+
CEREBRAS_MODEL=llama3.1-8b
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### 3. Initialize Database
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
python3 -c "
|
|
37
|
+
from dotenv import load_dotenv
|
|
38
|
+
load_dotenv()
|
|
39
|
+
from xray_api.app import create_app
|
|
40
|
+
from xray_api.models import db
|
|
41
|
+
|
|
42
|
+
app = create_app()
|
|
43
|
+
with app.app_context():
|
|
44
|
+
db.create_all()
|
|
45
|
+
print('Database tables created!')
|
|
46
|
+
"
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### 4. Start the API Server
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
python3 -m xray_api.app
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### 5. Run Example
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
python3 examples/amazon_competitor.py
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## SDK Usage
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from xray_sdk import XRayClient, XRayRun, XRayStep
|
|
65
|
+
|
|
66
|
+
# Create a run
|
|
67
|
+
run = XRayRun("my_pipeline", metadata={"context": "test"}, sample_size=50)
|
|
68
|
+
|
|
69
|
+
# Add steps (after your pipeline executes)
|
|
70
|
+
run.add_step(XRayStep(
|
|
71
|
+
name="keyword_generation",
|
|
72
|
+
order=1,
|
|
73
|
+
inputs={"title": "Phone Case"},
|
|
74
|
+
outputs={"keywords": ["phone case", "iphone"]},
|
|
75
|
+
description="Generate search keywords from the title."# explain what this step does
|
|
76
|
+
))
|
|
77
|
+
|
|
78
|
+
run.add_step(XRayStep(
|
|
79
|
+
name="search",
|
|
80
|
+
order=2,
|
|
81
|
+
inputs={"keywords": ["phone case", "iphone"]},
|
|
82
|
+
outputs={"candidates_count": 100},
|
|
83
|
+
description="Search the catalog for items matching the keywords."
|
|
84
|
+
))
|
|
85
|
+
|
|
86
|
+
run.add_step(XRayStep(
|
|
87
|
+
name="filter",
|
|
88
|
+
order=3,
|
|
89
|
+
inputs={"candidates_count": 100},
|
|
90
|
+
outputs={"filtered_count": 5},
|
|
91
|
+
description="Filter candidates by rating.",
|
|
92
|
+
reasons={"dropped_items": [{"id": 123, "reason": "low rating"}]},
|
|
93
|
+
metrics={"elimination_rate": 0.95}
|
|
94
|
+
))
|
|
95
|
+
|
|
96
|
+
# Send for analysis
|
|
97
|
+
client = XRayClient("http://localhost:5000")
|
|
98
|
+
result = client.send(run)
|
|
99
|
+
|
|
100
|
+
print(result["analysis"])
|
|
101
|
+
# {
|
|
102
|
+
# "faulty_step": "keyword_generation",
|
|
103
|
+
# "reason": "...",
|
|
104
|
+
# "suggestion": "..."
|
|
105
|
+
# }
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## SDK Client Methods
|
|
109
|
+
|
|
110
|
+
| Method | Description |
|
|
111
|
+
|--------|-------------|
|
|
112
|
+
| `send(run, analyze=True)` | Send run to API; spools locally if unavailable |
|
|
113
|
+
| `spool(run)` | Manually save run to `.xray_spool/` |
|
|
114
|
+
| `flush_spool()` | Send newest spooled run and delete all spool files |
|
|
115
|
+
| `list_pipelines()` | List all pipelines |
|
|
116
|
+
| `list_runs(pipeline, status, limit)` | List runs with filters |
|
|
117
|
+
| `get_run(run_id)` | Get run with all steps |
|
|
118
|
+
| `get_analysis(run_id)` | Get analysis result only |
|
|
119
|
+
| `search_steps(step_name, pipeline, limit)` | Search steps across runs |
|
|
120
|
+
|
|
121
|
+
## API Endpoints
|
|
122
|
+
|
|
123
|
+
POST
|
|
124
|
+
- `/api/ingest`: Store a run and, by default, trigger analysis (`analyze=false` to skip)
|
|
125
|
+
- `/api/analyze/<id>`: Re-trigger analysis for an existing run
|
|
126
|
+
|
|
127
|
+
GET
|
|
128
|
+
- `/api/runs`: List runs (filter by pipeline/status)
|
|
129
|
+
- `/api/runs/<id>`: Get a run with all steps
|
|
130
|
+
- `/api/runs/<id>/analysis`: Get analysis only for a run
|
|
131
|
+
- `/api/pipelines`: List pipelines
|
|
132
|
+
- `/api/search/steps`: Search steps by name/pipeline
|
|
133
|
+
- `/health`: Health check
|
|
134
|
+
|
|
135
|
+
## Project Structure
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
├── xray_sdk/ # Python SDK
|
|
139
|
+
│ ├── step.py # XRayStep dataclass
|
|
140
|
+
│ ├── run.py # XRayRun with auto-summarization
|
|
141
|
+
│ └── client.py # HTTP client with spool fallback
|
|
142
|
+
├── xray_api/ # Flask API
|
|
143
|
+
│ ├── app.py # Flask entry point
|
|
144
|
+
│ ├── models.py # Database models
|
|
145
|
+
│ ├── routes/ # API endpoints
|
|
146
|
+
│ └── agents/ # Cerebras AI analyzer
|
|
147
|
+
├── examples/ # Example scripts
|
|
148
|
+
├── ARCHITECTURE.md # Detailed architecture doc
|
|
149
|
+
└── requirements.txt # Dependencies
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Features
|
|
153
|
+
|
|
154
|
+
- **End-of-pipeline integration**: Add steps as your pipeline runs, send at the end
|
|
155
|
+
- **Deterministic summarization**: Large outputs are summarized with head/tail sampling for reproducible debugging
|
|
156
|
+
- **Spool fallback**: If API is down, saves to `.xray_spool/` for later submission
|
|
157
|
+
- **Step intent hints**: Optional one-line descriptions per step improve semantic analysis
|
|
158
|
+
- **Server-side safety net**: The API summarizes oversized inputs/outputs if a client skips SDK summarization
|
|
159
|
+
- **AI-powered analysis**: Uses Cerebras LLM with a 2-step sliding window when needed to identify semantic mismatches and faulty steps
|
|
160
|
+
|
|
161
|
+
## Approach
|
|
162
|
+
|
|
163
|
+
The system is designed around these key principles:
|
|
164
|
+
1. **Minimal Integration Burden**: The SDK requires only wrapping each step's inputs/outputs after execution. Users can enrich this data with optional descriptions for both the pipeline and individual steps, making the system extensible and allowing the AI to understand the intent behind any domain-specific logic without requiring code changes.
|
|
165
|
+
|
|
166
|
+
2. **Sliding Window Analysis**: Instead of sending entire pipelines to the LLM (which can exceed token limits), we analyze 2 consecutive steps at a time. This keeps prompts under 65K tokens while still detecting data flow issues between adjacent steps.
|
|
167
|
+
|
|
168
|
+
3. **Semantic Context via Descriptions**: Pipeline and step descriptions tell the LLM what *type* of pipeline (e-commerce, document processing, etc.) and what each step *should* do. This helps detect semantic mismatches beyond just structural data flow.
|
|
169
|
+
|
|
170
|
+
4. **Deterministic Summarization**: Large outputs (500+ items) are summarized using head/tail sampling (first N + last N items). This is deterministic and preserves edge cases that often reveal bugs.
|
|
171
|
+
|
|
172
|
+
5. **Graceful Degradation**: If the API is unavailable, runs are spooled locally and can be flushed later with `client.flush_spool()`.
|
|
173
|
+
|
|
174
|
+
## Known Limitations
|
|
175
|
+
|
|
176
|
+
- **No cross-window context**: When analyzing step 3→4, the LLM doesn't see steps 1→2. Issues that span multiple transitions may be missed, if they are not detected somehow at previous step.
|
|
177
|
+
|
|
178
|
+
- **Single LLM provider**: Currently only supports Cerebras API. Other providers require code changes.
|
|
179
|
+
|
|
180
|
+
- **Summarization loses detail**: Very large payloads are aggressively trimmed. Some bugs may be hidden in truncated data.
|
|
181
|
+
|
|
182
|
+
## Future Improvements
|
|
183
|
+
|
|
184
|
+
- **Docker image**: Pre-built container for one-command local setup - developers just run `docker compose up` instead of installing packages, databases, etc.
|
|
185
|
+
- **Local LLM support**: Run lightweight local models (e.g., Ollama, llama.cpp) to eliminate third-party API dependency and reduce costs
|
|
186
|
+
- **Multi-LLM support**: Add OpenAI, Anthropic, and other cloud providers via configurable adapters
|
|
187
|
+
- **Pipeline-level summary**: Generate a one-pass summary of the entire pipeline before window analysis
|
|
188
|
+
- **Streaming results**: Return partial analysis as each window completes
|
|
189
|
+
- **Web dashboard**: Visual timeline of pipeline runs with highlighted faulty steps
|
|
190
|
+
- **Comparison mode**: Compare two runs of the same pipeline to spot regressions
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
xray_sdk/__init__.py
|
|
4
|
+
xray_sdk/client.py
|
|
5
|
+
xray_sdk/run.py
|
|
6
|
+
xray_sdk/step.py
|
|
7
|
+
xray_sdk.egg-info/PKG-INFO
|
|
8
|
+
xray_sdk.egg-info/SOURCES.txt
|
|
9
|
+
xray_sdk.egg-info/dependency_links.txt
|
|
10
|
+
xray_sdk.egg-info/requires.txt
|
|
11
|
+
xray_sdk.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
requests>=2.31.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
xray_sdk
|