vde 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
VDE/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ from VDE.sdk import VDECollector, vde_trace
2
+ from VDE.schema import IntentEvent, ExecutionEvent, StateTransitionEvent, VDEEvent
3
+
4
+ __all__ = [
5
+ "VDECollector",
6
+ "vde_trace",
7
+ "IntentEvent",
8
+ "ExecutionEvent",
9
+ "StateTransitionEvent",
10
+ "VDEEvent",
11
+ ]
VDE/api.py ADDED
@@ -0,0 +1,308 @@
1
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
2
+ from fastapi.responses import JSONResponse
3
+ from pydantic import BaseModel, Field
4
+ from typing import List, Optional, Dict, Any, Union
5
+ from datetime import datetime
6
+ import asyncio
7
+ import os
8
+
9
+ from sqlalchemy import create_engine, Column, String, DateTime, JSON
10
+ from sqlalchemy.ext.declarative import declarative_base
11
+ from sqlalchemy.orm import sessionmaker, Session
12
+ from sqlalchemy.dialects.postgresql import JSONB
13
+
14
+ from .schema import VDEEvent, IntentEvent, ExecutionEvent, StateTransitionEvent
15
+
16
+
17
+ # Database configuration
18
+ DATABASE_URL = os.getenv(
19
+ "DATABASE_URL",
20
+ "postgresql://vde:vde_password@localhost:5432/vde_db"
21
+ )
22
+
23
+ # SQLAlchemy setup
24
+ engine = create_engine(DATABASE_URL)
25
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
26
+ Base = declarative_base()
27
+
28
+
29
+ # SQLAlchemy Event model
30
+ class EventRecord(Base):
31
+ """SQLAlchemy model for storing events in PostgreSQL."""
32
+ __tablename__ = "events"
33
+
34
+ id = Column(String, primary_key=True, index=True)
35
+ trace_id = Column(String, index=True, nullable=False)
36
+ step_id = Column(String, index=True, nullable=True)
37
+ event_type = Column(String, index=True, nullable=False)
38
+ payload = Column(JSONB, nullable=False)
39
+ timestamp = Column(DateTime, index=True, nullable=False)
40
+ created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
41
+
42
+
43
+ # Create tables
44
+ Base.metadata.create_all(bind=engine)
45
+
46
+
47
+ # Pydantic models for API requests
48
+ class IngestRequest(BaseModel):
49
+ """Request model for /ingest endpoint."""
50
+ events: List[Dict[str, Any]] = Field(..., description="List of event dictionaries")
51
+
52
+
53
+ class IngestResponse(BaseModel):
54
+ """Response model for /ingest endpoint."""
55
+ success: bool
56
+ message: str
57
+ processed_count: int
58
+ failed_count: int
59
+
60
+
61
+ # FastAPI app
62
+ app = FastAPI(
63
+ title="VDE Ingestion API",
64
+ description="API for ingesting VDE events into PostgreSQL",
65
+ version="1.0.0"
66
+ )
67
+
68
+
69
+ # Batch writing configuration
70
+ BATCH_SIZE = int(os.getenv("VDE_BATCH_SIZE", "50"))
71
+ BATCH_INTERVAL = float(os.getenv("VDE_BATCH_INTERVAL", "2.0"))
72
+
73
+
74
+ # Event queue for batch writing
75
+ event_queue: asyncio.Queue = asyncio.Queue()
76
+ batch_writer_task: Optional[asyncio.Task] = None
77
+
78
+
79
+ def validate_event(event_dict: Dict[str, Any]) -> Union[IntentEvent, ExecutionEvent, StateTransitionEvent]:
80
+ """
81
+ Validate an event dictionary against Pydantic models.
82
+
83
+ Args:
84
+ event_dict: Event dictionary to validate
85
+
86
+ Returns:
87
+ Validated Pydantic model instance
88
+
89
+ Raises:
90
+ ValueError: If event type is invalid or validation fails
91
+ """
92
+ event_type = event_dict.get("type")
93
+
94
+ if event_type == "intent":
95
+ return IntentEvent(**event_dict)
96
+ elif event_type == "execution":
97
+ return ExecutionEvent(**event_dict)
98
+ elif event_type == "state_change":
99
+ return StateTransitionEvent(**event_dict)
100
+ else:
101
+ raise ValueError(f"Unknown event type: {event_type}")
102
+
103
+
104
+ def save_events_to_db(events: List[Dict[str, Any]]) -> tuple[int, int]:
105
+ """
106
+ Save a batch of events to PostgreSQL.
107
+
108
+ Args:
109
+ events: List of event dictionaries
110
+
111
+ Returns:
112
+ Tuple of (success_count, failed_count)
113
+ """
114
+ db: Session = SessionLocal()
115
+ success_count = 0
116
+ failed_count = 0
117
+
118
+ try:
119
+ for event_dict in events:
120
+ try:
121
+ # Validate event
122
+ validated_event = validate_event(event_dict)
123
+
124
+ # Create database record
125
+ event_record = EventRecord(
126
+ id=str(validated_event.trace_id + "_" + validated_event.step_id + "_" + str(int(datetime.utcnow().timestamp()))),
127
+ trace_id=validated_event.trace_id,
128
+ step_id=validated_event.step_id,
129
+ event_type=validated_event.type,
130
+ payload=validated_event.model_dump(),
131
+ timestamp=validated_event.timestamp
132
+ )
133
+
134
+ db.add(event_record)
135
+ success_count += 1
136
+
137
+ except Exception as e:
138
+ print(f"Failed to validate or create event record: {e}")
139
+ failed_count += 1
140
+ continue
141
+
142
+ db.commit()
143
+
144
+ except Exception as e:
145
+ db.rollback()
146
+ print(f"Database error during batch save: {e}")
147
+ # Count all as failed on transaction error
148
+ failed_count = len(events)
149
+ success_count = 0
150
+
151
+ finally:
152
+ db.close()
153
+
154
+ return success_count, failed_count
155
+
156
+
157
+ async def batch_writer():
158
+ """
159
+ Background task that periodically flushes events from queue to database.
160
+ """
161
+ while True:
162
+ try:
163
+ await asyncio.sleep(BATCH_INTERVAL)
164
+
165
+ # Collect batch of events
166
+ batch = []
167
+ while len(batch) < BATCH_SIZE and not event_queue.empty():
168
+ try:
169
+ event = event_queue.get_nowait()
170
+ batch.append(event)
171
+ except asyncio.QueueEmpty:
172
+ break
173
+
174
+ if batch:
175
+ # Run database save in thread pool to avoid blocking
176
+ loop = asyncio.get_event_loop()
177
+ success, failed = await loop.run_in_executor(
178
+ None,
179
+ save_events_to_db,
180
+ batch
181
+ )
182
+ print(f"Batch saved: {success} succeeded, {failed} failed")
183
+
184
+ except asyncio.CancelledError:
185
+ # Flush remaining events before shutdown
186
+ if not event_queue.empty():
187
+ batch = []
188
+ while not event_queue.empty():
189
+ try:
190
+ event = event_queue.get_nowait()
191
+ batch.append(event)
192
+ except asyncio.QueueEmpty:
193
+ break
194
+
195
+ if batch:
196
+ loop = asyncio.get_event_loop()
197
+ success, failed = await loop.run_in_executor(
198
+ None,
199
+ save_events_to_db,
200
+ batch
201
+ )
202
+ print(f"Final batch saved: {success} succeeded, {failed} failed")
203
+ break
204
+
205
+ except Exception as e:
206
+ print(f"Error in batch writer: {e}")
207
+
208
+
209
+ @app.on_event("startup")
210
+ async def startup_event():
211
+ """Start the batch writer task on application startup."""
212
+ global batch_writer_task
213
+ batch_writer_task = asyncio.create_task(batch_writer())
214
+ print("VDE API started - batch writer initialized")
215
+
216
+
217
+ @app.on_event("shutdown")
218
+ async def shutdown_event():
219
+ """Cancel the batch writer task on application shutdown."""
220
+ global batch_writer_task
221
+ if batch_writer_task:
222
+ batch_writer_task.cancel()
223
+ try:
224
+ await batch_writer_task
225
+ except asyncio.CancelledError:
226
+ pass
227
+ print("VDE API stopped - batch writer shutdown")
228
+
229
+
230
+ @app.post("/ingest", response_model=IngestResponse)
231
+ async def ingest_events(request: IngestRequest, background_tasks: BackgroundTasks):
232
+ """
233
+ Ingest VDE events into the system.
234
+
235
+ This endpoint accepts a batch of events, validates them against Pydantic models,
236
+ and queues them for batch writing to PostgreSQL.
237
+
238
+ Args:
239
+ request: IngestRequest containing list of events
240
+ background_tasks: FastAPI BackgroundTasks for async processing
241
+
242
+ Returns:
243
+ IngestResponse with processing statistics
244
+ """
245
+ processed_count = 0
246
+ failed_count = 0
247
+ validation_errors = []
248
+
249
+ for event_dict in request.events:
250
+ try:
251
+ # Validate event against Pydantic models
252
+ validated_event = validate_event(event_dict)
253
+
254
+ # Queue event for batch writing
255
+ await event_queue.put(validated_event.model_dump())
256
+ processed_count += 1
257
+
258
+ except Exception as e:
259
+ failed_count += 1
260
+ validation_errors.append({
261
+ "event": event_dict,
262
+ "error": str(e)
263
+ })
264
+
265
+ return IngestResponse(
266
+ success=failed_count == 0,
267
+ message=f"Processed {processed_count} events, {failed_count} failed",
268
+ processed_count=processed_count,
269
+ failed_count=failed_count
270
+ )
271
+
272
+
273
+ @app.get("/health")
274
+ async def health_check():
275
+ """Health check endpoint."""
276
+ try:
277
+ # Test database connection
278
+ db: Session = SessionLocal()
279
+ db.execute("SELECT 1")
280
+ db.close()
281
+
282
+ return {
283
+ "status": "healthy",
284
+ "database": "connected",
285
+ "queue_size": event_queue.qsize()
286
+ }
287
+ except Exception as e:
288
+ return JSONResponse(
289
+ status_code=503,
290
+ content={
291
+ "status": "unhealthy",
292
+ "database": "disconnected",
293
+ "error": str(e)
294
+ }
295
+ )
296
+
297
+
298
+ @app.get("/")
299
+ async def root():
300
+ """Root endpoint with API information."""
301
+ return {
302
+ "name": "VDE Ingestion API",
303
+ "version": "1.0.0",
304
+ "endpoints": {
305
+ "ingest": "/ingest",
306
+ "health": "/health"
307
+ }
308
+ }
VDE/dashboard.py ADDED
@@ -0,0 +1,342 @@
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.graph_objects as go
4
+ import plotly.express as px
5
+ from datetime import datetime
6
+ import os
7
+ import sys
8
+
9
+ # Add parent directory to path to import VDE modules
10
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
+
12
+ from VDE.engine import DriftDetectionEngine
13
+
14
+
15
+ # Page configuration
16
+ st.set_page_config(
17
+ page_title="VDE Drift Dashboard",
18
+ page_icon="📊",
19
+ layout="wide"
20
+ )
21
+
22
+ # Custom CSS
23
+ st.markdown("""
24
+ <style>
25
+ .main-header {
26
+ font-size: 2.5rem;
27
+ font-weight: bold;
28
+ color: #1f77b4;
29
+ }
30
+ .metric-card {
31
+ background-color: #f0f2f6;
32
+ padding: 1rem;
33
+ border-radius: 0.5rem;
34
+ margin: 0.5rem 0;
35
+ }
36
+ .anomaly-box {
37
+ padding: 0.5rem;
38
+ margin: 0.25rem 0;
39
+ border-radius: 0.25rem;
40
+ border-left: 4px solid;
41
+ }
42
+ .missing-step { border-color: #dc3545; background-color: #f8d7da; }
43
+ .extra-step { border-color: #ffc107; background-color: #fff3cd; }
44
+ .reordering { border-color: #17a2b8; background-color: #d1ecf1; }
45
+ .status-mismatch { border-color: #6c757d; background-color: #e2e3e5; }
46
+ </style>
47
+ """, unsafe_allow_html=True)
48
+
49
+
50
+ def create_timeline_comparison(intent_sequence, execution_sequence):
51
+ """
52
+ Create a timeline comparison visualization for intent vs execution.
53
+
54
+ Args:
55
+ intent_sequence: List of step IDs in intent order
56
+ execution_sequence: List of step IDs in execution order
57
+
58
+ Returns:
59
+ Plotly figure object
60
+ """
61
+ fig = go.Figure()
62
+
63
+ # Intent timeline (top)
64
+ for i, step_id in enumerate(intent_sequence):
65
+ fig.add_trace(go.Scatter(
66
+ x=[i],
67
+ y=[1],
68
+ mode='markers+text',
69
+ name='Intent',
70
+ text=[step_id],
71
+ textposition='top center',
72
+ marker=dict(size=20, color='#1f77b4', symbol='circle'),
73
+ hovertemplate=f'Step: {step_id}<br>Position: {i}<br>Type: Intent<extra></extra>',
74
+ showlegend=False
75
+ ))
76
+
77
+ # Execution timeline (bottom)
78
+ for i, step_id in enumerate(execution_sequence):
79
+ fig.add_trace(go.Scatter(
80
+ x=[i],
81
+ y=[0],
82
+ mode='markers+text',
83
+ name='Execution',
84
+ text=[step_id],
85
+ textposition='bottom center',
86
+ marker=dict(size=20, color='#ff7f0e', symbol='square'),
87
+ hovertemplate=f'Step: {step_id}<br>Position: {i}<br>Type: Execution<extra></extra>',
88
+ showlegend=False
89
+ ))
90
+
91
+ # Draw lines between matching steps
92
+ for step_id in intent_sequence:
93
+ if step_id in execution_sequence:
94
+ intent_pos = intent_sequence.index(step_id)
95
+ exec_pos = execution_sequence.index(step_id)
96
+
97
+ fig.add_trace(go.Scatter(
98
+ x=[intent_pos, exec_pos],
99
+ y=[1, 0],
100
+ mode='lines',
101
+ line=dict(color='gray', width=1, dash='dot'),
102
+ hoverinfo='skip',
103
+ showlegend=False
104
+ ))
105
+
106
+ fig.update_layout(
107
+ title='Intent vs Execution Timeline',
108
+ xaxis_title='Position',
109
+ yaxis=dict(
110
+ tickvals=[0, 1],
111
+ ticktext=['Execution', 'Intent'],
112
+ range=[-0.2, 1.2]
113
+ ),
114
+ height=400,
115
+ hovermode='closest'
116
+ )
117
+
118
+ return fig
119
+
120
+
121
+ def create_drift_score_chart(executions):
122
+ """
123
+ Create a bar chart showing drift scores across executions.
124
+
125
+ Args:
126
+ executions: List of execution dictionaries with drift scores
127
+
128
+ Returns:
129
+ Plotly figure object
130
+ """
131
+ df = pd.DataFrame(executions)
132
+
133
+ fig = px.bar(
134
+ df,
135
+ x='trace_id',
136
+ y='drift_score',
137
+ title='Drift Score by Trace ID',
138
+ labels={'trace_id': 'Trace ID', 'drift_score': 'Drift Score (0-100)'},
139
+ color='drift_score',
140
+ color_continuous_scale='RdYlGn_r',
141
+ range_color=[0, 100]
142
+ )
143
+
144
+ fig.update_layout(
145
+ xaxis_tickangle=-45,
146
+ height=400
147
+ )
148
+
149
+ return fig
150
+
151
+
152
+ def create_anomaly_summary_chart(executions):
153
+ """
154
+ Create a stacked bar chart showing anomaly types across executions.
155
+
156
+ Args:
157
+ executions: List of execution dictionaries with summary_json
158
+
159
+ Returns:
160
+ Plotly figure object
161
+ """
162
+ data = []
163
+ for exec_data in executions:
164
+ summary = exec_data['summary_json'].get('summary', {})
165
+ data.append({
166
+ 'trace_id': exec_data['trace_id'],
167
+ 'missing_steps': summary.get('missing_steps', 0),
168
+ 'extra_steps': summary.get('extra_steps', 0),
169
+ 'reorderings': summary.get('reorderings', 0),
170
+ 'status_mismatches': summary.get('status_mismatches', 0)
171
+ })
172
+
173
+ df = pd.DataFrame(data)
174
+
175
+ fig = go.Figure()
176
+
177
+ fig.add_trace(go.Bar(
178
+ name='Missing Steps',
179
+ x=df['trace_id'],
180
+ y=df['missing_steps'],
181
+ marker_color='#dc3545'
182
+ ))
183
+
184
+ fig.add_trace(go.Bar(
185
+ name='Extra Steps',
186
+ x=df['trace_id'],
187
+ y=df['extra_steps'],
188
+ marker_color='#ffc107'
189
+ ))
190
+
191
+ fig.add_trace(go.Bar(
192
+ name='Reorderings',
193
+ x=df['trace_id'],
194
+ y=df['reorderings'],
195
+ marker_color='#17a2b8'
196
+ ))
197
+
198
+ fig.add_trace(go.Bar(
199
+ name='Status Mismatches',
200
+ x=df['trace_id'],
201
+ y=df['status_mismatches'],
202
+ marker_color='#6c757d'
203
+ ))
204
+
205
+ fig.update_layout(
206
+ barmode='stack',
207
+ title='Anomaly Types by Trace ID',
208
+ xaxis_title='Trace ID',
209
+ yaxis_title='Count',
210
+ xaxis_tickangle=-45,
211
+ height=400
212
+ )
213
+
214
+ return fig
215
+
216
+
217
+ def main():
218
+ """Main Streamlit application."""
219
+ st.markdown('<h1 class="main-header">📊 VDE Drift Dashboard</h1>', unsafe_allow_html=True)
220
+
221
+ # Initialize engine
222
+ try:
223
+ engine = DriftDetectionEngine()
224
+ except Exception as e:
225
+ st.error(f"Failed to connect to database: {e}")
226
+ st.info("Please ensure PostgreSQL is running and DATABASE_URL is configured.")
227
+ return
228
+
229
+ # Sidebar
230
+ st.sidebar.header("Controls")
231
+
232
+ # Refresh button
233
+ if st.sidebar.button("🔄 Refresh Data"):
234
+ st.rerun()
235
+
236
+ # Get all reconstructed executions
237
+ try:
238
+ executions = engine.get_all_reconstructed_executions()
239
+ except Exception as e:
240
+ st.error(f"Failed to fetch executions: {e}")
241
+ return
242
+
243
+ if not executions:
244
+ st.warning("No reconstructed executions found in the database.")
245
+ st.info("Use the DriftDetectionEngine to analyze traces first.")
246
+ return
247
+
248
+ # Overview metrics
249
+ st.subheader("Overview")
250
+ col1, col2, col3, col4 = st.columns(4)
251
+
252
+ total_traces = len(executions)
253
+ avg_drift = sum(e['drift_score'] for e in executions) / total_traces
254
+ high_drift_count = sum(1 for e in executions if e['drift_score'] > 50)
255
+ total_anomalies = sum(e['summary_json'].get('summary', {}).get('total_anomalies', 0) for e in executions)
256
+
257
+ col1.metric("Total Traces", total_traces)
258
+ col2.metric("Avg Drift Score", f"{avg_drift:.2f}")
259
+ col3.metric("High Drift (>50)", high_drift_count)
260
+ col4.metric("Total Anomalies", total_anomalies)
261
+
262
+ # Trace selection
263
+ st.subheader("Trace Analysis")
264
+ trace_ids = [e['trace_id'] for e in executions]
265
+ selected_trace = st.selectbox("Select Trace ID", trace_ids)
266
+
267
+ # Get selected execution data
268
+ selected_exec = next(e for e in executions if e['trace_id'] == selected_trace)
269
+ summary_json = selected_exec['summary_json']
270
+
271
+ # Display drift score prominently
272
+ drift_score = summary_json['drift_score']
273
+
274
+ # Color code drift score
275
+ if drift_score < 20:
276
+ score_color = "🟢"
277
+ elif drift_score < 50:
278
+ score_color = "🟡"
279
+ else:
280
+ score_color = "🔴"
281
+
282
+ st.markdown(f"### Drift Score: {score_color} {drift_score}/100")
283
+
284
+ # Timeline visualization
285
+ st.subheader("Intent vs Execution Timeline")
286
+ intent_sequence = summary_json.get('intent_sequence', [])
287
+ execution_sequence = summary_json.get('execution_sequence', [])
288
+
289
+ if intent_sequence and execution_sequence:
290
+ timeline_fig = create_timeline_comparison(intent_sequence, execution_sequence)
291
+ st.plotly_chart(timeline_fig, use_container_width=True)
292
+ else:
293
+ st.warning("No sequence data available for this trace.")
294
+
295
+ # Anomalies detail
296
+ st.subheader("Detected Anomalies")
297
+ anomalies = summary_json.get('anomalies', [])
298
+
299
+ if anomalies:
300
+ for anomaly in anomalies:
301
+ anomaly_type = anomaly['type']
302
+ css_class = anomaly_type.replace('_', '-')
303
+
304
+ st.markdown(f"""
305
+ <div class="anomaly-box {css_class}">
306
+ <strong>{anomaly_type.replace('_', ' ').title()}</strong>: {anomaly.get('description', '')}
307
+ </div>
308
+ """, unsafe_allow_html=True)
309
+ else:
310
+ st.success("No anomalies detected for this trace.")
311
+
312
+ # Summary statistics
313
+ st.subheader("Summary Statistics")
314
+ summary = summary_json.get('summary', {})
315
+
316
+ col1, col2, col3, col4, col5 = st.columns(5)
317
+ col1.metric("Total Anomalies", summary.get('total_anomalies', 0))
318
+ col2.metric("Missing Steps", summary.get('missing_steps', 0))
319
+ col3.metric("Extra Steps", summary.get('extra_steps', 0))
320
+ col4.metric("Reorderings", summary.get('reorderings', 0))
321
+ col5.metric("Status Mismatches", summary.get('status_mismatches', 0))
322
+
323
+ # Global visualizations
324
+ st.subheader("Global Analysis")
325
+
326
+ tab1, tab2 = st.tabs(["Drift Scores", "Anomaly Distribution"])
327
+
328
+ with tab1:
329
+ drift_fig = create_drift_score_chart(executions)
330
+ st.plotly_chart(drift_fig, use_container_width=True)
331
+
332
+ with tab2:
333
+ anomaly_fig = create_anomaly_summary_chart(executions)
334
+ st.plotly_chart(anomaly_fig, use_container_width=True)
335
+
336
+ # Raw data expander
337
+ with st.expander("View Raw Data"):
338
+ st.json(summary_json)
339
+
340
+
341
+ if __name__ == "__main__":
342
+ main()