PyPI - vde - Versions diffs - 0.1.0__py3-none-any.whl - Mend

vde 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

VDE/__init__.py +11 -0
VDE/api.py +308 -0
VDE/dashboard.py +342 -0
VDE/engine.py +494 -0
VDE/schema.py +25 -0
VDE/sdk.py +375 -0
vde-0.1.0.dist-info/METADATA +311 -0
vde-0.1.0.dist-info/RECORD +10 -0
vde-0.1.0.dist-info/WHEEL +4 -0
vde-0.1.0.dist-info/licenses/LICENSE +190 -0

VDE/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from VDE.sdk import VDECollector, vde_trace
+from VDE.schema import IntentEvent, ExecutionEvent, StateTransitionEvent, VDEEvent
+__all__ = [
+    "VDECollector",
+    "vde_trace",
+    "IntentEvent",
+    "ExecutionEvent",
+    "StateTransitionEvent",
+    "VDEEvent",
+]

VDE/api.py ADDED Viewed

@@ -0,0 +1,308 @@
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+from typing import List, Optional, Dict, Any, Union
+from datetime import datetime
+import asyncio
+import os
+from sqlalchemy import create_engine, Column, String, DateTime, JSON
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, Session
+from sqlalchemy.dialects.postgresql import JSONB
+from .schema import VDEEvent, IntentEvent, ExecutionEvent, StateTransitionEvent
+# Database configuration
+DATABASE_URL = os.getenv(
+    "DATABASE_URL",
+    "postgresql://vde:vde_password@localhost:5432/vde_db"
+)
+# SQLAlchemy setup
+engine = create_engine(DATABASE_URL)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+Base = declarative_base()
+# SQLAlchemy Event model
+class EventRecord(Base):
+    """SQLAlchemy model for storing events in PostgreSQL."""
+    __tablename__ = "events"
+    id = Column(String, primary_key=True, index=True)
+    trace_id = Column(String, index=True, nullable=False)
+    step_id = Column(String, index=True, nullable=True)
+    event_type = Column(String, index=True, nullable=False)
+    payload = Column(JSONB, nullable=False)
+    timestamp = Column(DateTime, index=True, nullable=False)
+    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
+# Create tables
+Base.metadata.create_all(bind=engine)
+# Pydantic models for API requests
+class IngestRequest(BaseModel):
+    """Request model for /ingest endpoint."""
+    events: List[Dict[str, Any]] = Field(..., description="List of event dictionaries")
+class IngestResponse(BaseModel):
+    """Response model for /ingest endpoint."""
+    success: bool
+    message: str
+    processed_count: int
+    failed_count: int
+# FastAPI app
+app = FastAPI(
+    title="VDE Ingestion API",
+    description="API for ingesting VDE events into PostgreSQL",
+    version="1.0.0"
+)
+# Batch writing configuration
+BATCH_SIZE = int(os.getenv("VDE_BATCH_SIZE", "50"))
+BATCH_INTERVAL = float(os.getenv("VDE_BATCH_INTERVAL", "2.0"))
+# Event queue for batch writing
+event_queue: asyncio.Queue = asyncio.Queue()
+batch_writer_task: Optional[asyncio.Task] = None
+def validate_event(event_dict: Dict[str, Any]) -> Union[IntentEvent, ExecutionEvent, StateTransitionEvent]:
+    """
+    Validate an event dictionary against Pydantic models.
+    Args:
+        event_dict: Event dictionary to validate
+    Returns:
+        Validated Pydantic model instance
+    Raises:
+        ValueError: If event type is invalid or validation fails
+    """
+    event_type = event_dict.get("type")
+    if event_type == "intent":
+        return IntentEvent(**event_dict)
+    elif event_type == "execution":
+        return ExecutionEvent(**event_dict)
+    elif event_type == "state_change":
+        return StateTransitionEvent(**event_dict)
+    else:
+        raise ValueError(f"Unknown event type: {event_type}")
+def save_events_to_db(events: List[Dict[str, Any]]) -> tuple[int, int]:
+    """
+    Save a batch of events to PostgreSQL.
+    Args:
+        events: List of event dictionaries
+    Returns:
+        Tuple of (success_count, failed_count)
+    """
+    db: Session = SessionLocal()
+    success_count = 0
+    failed_count = 0
+    try:
+        for event_dict in events:
+            try:
+                # Validate event
+                validated_event = validate_event(event_dict)
+                # Create database record
+                event_record = EventRecord(
+                    id=str(validated_event.trace_id + "_" + validated_event.step_id + "_" + str(int(datetime.utcnow().timestamp()))),
+                    trace_id=validated_event.trace_id,
+                    step_id=validated_event.step_id,
+                    event_type=validated_event.type,
+                    payload=validated_event.model_dump(),
+                    timestamp=validated_event.timestamp
+                )
+                db.add(event_record)
+                success_count += 1
+            except Exception as e:
+                print(f"Failed to validate or create event record: {e}")
+                failed_count += 1
+                continue
+        db.commit()
+    except Exception as e:
+        db.rollback()
+        print(f"Database error during batch save: {e}")
+        # Count all as failed on transaction error
+        failed_count = len(events)
+        success_count = 0
+    finally:
+        db.close()
+    return success_count, failed_count
+async def batch_writer():
+    """
+    Background task that periodically flushes events from queue to database.
+    """
+    while True:
+        try:
+            await asyncio.sleep(BATCH_INTERVAL)
+            # Collect batch of events
+            batch = []
+            while len(batch) < BATCH_SIZE and not event_queue.empty():
+                try:
+                    event = event_queue.get_nowait()
+                    batch.append(event)
+                except asyncio.QueueEmpty:
+                    break
+            if batch:
+                # Run database save in thread pool to avoid blocking
+                loop = asyncio.get_event_loop()
+                success, failed = await loop.run_in_executor(
+                    None,
+                    save_events_to_db,
+                    batch
+                )
+                print(f"Batch saved: {success} succeeded, {failed} failed")
+        except asyncio.CancelledError:
+            # Flush remaining events before shutdown
+            if not event_queue.empty():
+                batch = []
+                while not event_queue.empty():
+                    try:
+                        event = event_queue.get_nowait()
+                        batch.append(event)
+                    except asyncio.QueueEmpty:
+                        break
+                if batch:
+                    loop = asyncio.get_event_loop()
+                    success, failed = await loop.run_in_executor(
+                        None,
+                        save_events_to_db,
+                        batch
+                    )
+                    print(f"Final batch saved: {success} succeeded, {failed} failed")
+            break
+        except Exception as e:
+            print(f"Error in batch writer: {e}")
+@app.on_event("startup")
+async def startup_event():
+    """Start the batch writer task on application startup."""
+    global batch_writer_task
+    batch_writer_task = asyncio.create_task(batch_writer())
+    print("VDE API started - batch writer initialized")
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Cancel the batch writer task on application shutdown."""
+    global batch_writer_task
+    if batch_writer_task:
+        batch_writer_task.cancel()
+        try:
+            await batch_writer_task
+        except asyncio.CancelledError:
+            pass
+    print("VDE API stopped - batch writer shutdown")
+@app.post("/ingest", response_model=IngestResponse)
+async def ingest_events(request: IngestRequest, background_tasks: BackgroundTasks):
+    """
+    Ingest VDE events into the system.
+    This endpoint accepts a batch of events, validates them against Pydantic models,
+    and queues them for batch writing to PostgreSQL.
+    Args:
+        request: IngestRequest containing list of events
+        background_tasks: FastAPI BackgroundTasks for async processing
+    Returns:
+        IngestResponse with processing statistics
+    """
+    processed_count = 0
+    failed_count = 0
+    validation_errors = []
+    for event_dict in request.events:
+        try:
+            # Validate event against Pydantic models
+            validated_event = validate_event(event_dict)
+            # Queue event for batch writing
+            await event_queue.put(validated_event.model_dump())
+            processed_count += 1
+        except Exception as e:
+            failed_count += 1
+            validation_errors.append({
+                "event": event_dict,
+                "error": str(e)
+            })
+    return IngestResponse(
+        success=failed_count == 0,
+        message=f"Processed {processed_count} events, {failed_count} failed",
+        processed_count=processed_count,
+        failed_count=failed_count
+    )
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    try:
+        # Test database connection
+        db: Session = SessionLocal()
+        db.execute("SELECT 1")
+        db.close()
+        return {
+            "status": "healthy",
+            "database": "connected",
+            "queue_size": event_queue.qsize()
+        }
+    except Exception as e:
+        return JSONResponse(
+            status_code=503,
+            content={
+                "status": "unhealthy",
+                "database": "disconnected",
+                "error": str(e)
+            }
+        )
+@app.get("/")
+async def root():
+    """Root endpoint with API information."""
+    return {
+        "name": "VDE Ingestion API",
+        "version": "1.0.0",
+        "endpoints": {
+            "ingest": "/ingest",
+            "health": "/health"
+        }
+    }

VDE/dashboard.py ADDED Viewed

@@ -0,0 +1,342 @@
+import streamlit as st
+import pandas as pd
+import plotly.graph_objects as go
+import plotly.express as px
+from datetime import datetime
+import os
+import sys
+# Add parent directory to path to import VDE modules
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from VDE.engine import DriftDetectionEngine
+# Page configuration
+st.set_page_config(
+    page_title="VDE Drift Dashboard",
+    page_icon="📊",
+    layout="wide"
+)
+# Custom CSS
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 2.5rem;
+        font-weight: bold;
+        color: #1f77b4;
+    }
+    .metric-card {
+        background-color: #f0f2f6;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        margin: 0.5rem 0;
+    }
+    .anomaly-box {
+        padding: 0.5rem;
+        margin: 0.25rem 0;
+        border-radius: 0.25rem;
+        border-left: 4px solid;
+    }
+    .missing-step { border-color: #dc3545; background-color: #f8d7da; }
+    .extra-step { border-color: #ffc107; background-color: #fff3cd; }
+    .reordering { border-color: #17a2b8; background-color: #d1ecf1; }
+    .status-mismatch { border-color: #6c757d; background-color: #e2e3e5; }
+</style>
+""", unsafe_allow_html=True)
+def create_timeline_comparison(intent_sequence, execution_sequence):
+    """
+    Create a timeline comparison visualization for intent vs execution.
+    Args:
+        intent_sequence: List of step IDs in intent order
+        execution_sequence: List of step IDs in execution order
+    Returns:
+        Plotly figure object
+    """
+    fig = go.Figure()
+    # Intent timeline (top)
+    for i, step_id in enumerate(intent_sequence):
+        fig.add_trace(go.Scatter(
+            x=[i],
+            y=[1],
+            mode='markers+text',
+            name='Intent',
+            text=[step_id],
+            textposition='top center',
+            marker=dict(size=20, color='#1f77b4', symbol='circle'),
+            hovertemplate=f'Step: {step_id}<br>Position: {i}<br>Type: Intent<extra></extra>',
+            showlegend=False
+        ))
+    # Execution timeline (bottom)
+    for i, step_id in enumerate(execution_sequence):
+        fig.add_trace(go.Scatter(
+            x=[i],
+            y=[0],
+            mode='markers+text',
+            name='Execution',
+            text=[step_id],
+            textposition='bottom center',
+            marker=dict(size=20, color='#ff7f0e', symbol='square'),
+            hovertemplate=f'Step: {step_id}<br>Position: {i}<br>Type: Execution<extra></extra>',
+            showlegend=False
+        ))
+    # Draw lines between matching steps
+    for step_id in intent_sequence:
+        if step_id in execution_sequence:
+            intent_pos = intent_sequence.index(step_id)
+            exec_pos = execution_sequence.index(step_id)
+            fig.add_trace(go.Scatter(
+                x=[intent_pos, exec_pos],
+                y=[1, 0],
+                mode='lines',
+                line=dict(color='gray', width=1, dash='dot'),
+                hoverinfo='skip',
+                showlegend=False
+            ))
+    fig.update_layout(
+        title='Intent vs Execution Timeline',
+        xaxis_title='Position',
+        yaxis=dict(
+            tickvals=[0, 1],
+            ticktext=['Execution', 'Intent'],
+            range=[-0.2, 1.2]
+        ),
+        height=400,
+        hovermode='closest'
+    )
+    return fig
+def create_drift_score_chart(executions):
+    """
+    Create a bar chart showing drift scores across executions.
+    Args:
+        executions: List of execution dictionaries with drift scores
+    Returns:
+        Plotly figure object
+    """
+    df = pd.DataFrame(executions)
+    fig = px.bar(
+        df,
+        x='trace_id',
+        y='drift_score',
+        title='Drift Score by Trace ID',
+        labels={'trace_id': 'Trace ID', 'drift_score': 'Drift Score (0-100)'},
+        color='drift_score',
+        color_continuous_scale='RdYlGn_r',
+        range_color=[0, 100]
+    )
+    fig.update_layout(
+        xaxis_tickangle=-45,
+        height=400
+    )
+    return fig
+def create_anomaly_summary_chart(executions):
+    """
+    Create a stacked bar chart showing anomaly types across executions.
+    Args:
+        executions: List of execution dictionaries with summary_json
+    Returns:
+        Plotly figure object
+    """
+    data = []
+    for exec_data in executions:
+        summary = exec_data['summary_json'].get('summary', {})
+        data.append({
+            'trace_id': exec_data['trace_id'],
+            'missing_steps': summary.get('missing_steps', 0),
+            'extra_steps': summary.get('extra_steps', 0),
+            'reorderings': summary.get('reorderings', 0),
+            'status_mismatches': summary.get('status_mismatches', 0)
+        })
+    df = pd.DataFrame(data)
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+        name='Missing Steps',
+        x=df['trace_id'],
+        y=df['missing_steps'],
+        marker_color='#dc3545'
+    ))
+    fig.add_trace(go.Bar(
+        name='Extra Steps',
+        x=df['trace_id'],
+        y=df['extra_steps'],
+        marker_color='#ffc107'
+    ))
+    fig.add_trace(go.Bar(
+        name='Reorderings',
+        x=df['trace_id'],
+        y=df['reorderings'],
+        marker_color='#17a2b8'
+    ))
+    fig.add_trace(go.Bar(
+        name='Status Mismatches',
+        x=df['trace_id'],
+        y=df['status_mismatches'],
+        marker_color='#6c757d'
+    ))
+    fig.update_layout(
+        barmode='stack',
+        title='Anomaly Types by Trace ID',
+        xaxis_title='Trace ID',
+        yaxis_title='Count',
+        xaxis_tickangle=-45,
+        height=400
+    )
+    return fig
+def main():
+    """Main Streamlit application."""
+    st.markdown('<h1 class="main-header">📊 VDE Drift Dashboard</h1>', unsafe_allow_html=True)
+    # Initialize engine
+    try:
+        engine = DriftDetectionEngine()
+    except Exception as e:
+        st.error(f"Failed to connect to database: {e}")
+        st.info("Please ensure PostgreSQL is running and DATABASE_URL is configured.")
+        return
+    # Sidebar
+    st.sidebar.header("Controls")
+    # Refresh button
+    if st.sidebar.button("🔄 Refresh Data"):
+        st.rerun()
+    # Get all reconstructed executions
+    try:
+        executions = engine.get_all_reconstructed_executions()
+    except Exception as e:
+        st.error(f"Failed to fetch executions: {e}")
+        return
+    if not executions:
+        st.warning("No reconstructed executions found in the database.")
+        st.info("Use the DriftDetectionEngine to analyze traces first.")
+        return
+    # Overview metrics
+    st.subheader("Overview")
+    col1, col2, col3, col4 = st.columns(4)
+    total_traces = len(executions)
+    avg_drift = sum(e['drift_score'] for e in executions) / total_traces
+    high_drift_count = sum(1 for e in executions if e['drift_score'] > 50)
+    total_anomalies = sum(e['summary_json'].get('summary', {}).get('total_anomalies', 0) for e in executions)
+    col1.metric("Total Traces", total_traces)
+    col2.metric("Avg Drift Score", f"{avg_drift:.2f}")
+    col3.metric("High Drift (>50)", high_drift_count)
+    col4.metric("Total Anomalies", total_anomalies)
+    # Trace selection
+    st.subheader("Trace Analysis")
+    trace_ids = [e['trace_id'] for e in executions]
+    selected_trace = st.selectbox("Select Trace ID", trace_ids)
+    # Get selected execution data
+    selected_exec = next(e for e in executions if e['trace_id'] == selected_trace)
+    summary_json = selected_exec['summary_json']
+    # Display drift score prominently
+    drift_score = summary_json['drift_score']
+    # Color code drift score
+    if drift_score < 20:
+        score_color = "🟢"
+    elif drift_score < 50:
+        score_color = "🟡"
+    else:
+        score_color = "🔴"
+    st.markdown(f"### Drift Score: {score_color} {drift_score}/100")
+    # Timeline visualization
+    st.subheader("Intent vs Execution Timeline")
+    intent_sequence = summary_json.get('intent_sequence', [])
+    execution_sequence = summary_json.get('execution_sequence', [])
+    if intent_sequence and execution_sequence:
+        timeline_fig = create_timeline_comparison(intent_sequence, execution_sequence)
+        st.plotly_chart(timeline_fig, use_container_width=True)
+    else:
+        st.warning("No sequence data available for this trace.")
+    # Anomalies detail
+    st.subheader("Detected Anomalies")
+    anomalies = summary_json.get('anomalies', [])
+    if anomalies:
+        for anomaly in anomalies:
+            anomaly_type = anomaly['type']
+            css_class = anomaly_type.replace('_', '-')
+            st.markdown(f"""
+            <div class="anomaly-box {css_class}">
+                <strong>{anomaly_type.replace('_', ' ').title()}</strong>: {anomaly.get('description', '')}
+            </div>
+            """, unsafe_allow_html=True)
+    else:
+        st.success("No anomalies detected for this trace.")
+    # Summary statistics
+    st.subheader("Summary Statistics")
+    summary = summary_json.get('summary', {})
+    col1, col2, col3, col4, col5 = st.columns(5)
+    col1.metric("Total Anomalies", summary.get('total_anomalies', 0))
+    col2.metric("Missing Steps", summary.get('missing_steps', 0))
+    col3.metric("Extra Steps", summary.get('extra_steps', 0))
+    col4.metric("Reorderings", summary.get('reorderings', 0))
+    col5.metric("Status Mismatches", summary.get('status_mismatches', 0))
+    # Global visualizations
+    st.subheader("Global Analysis")
+    tab1, tab2 = st.tabs(["Drift Scores", "Anomaly Distribution"])
+    with tab1:
+        drift_fig = create_drift_score_chart(executions)
+        st.plotly_chart(drift_fig, use_container_width=True)
+    with tab2:
+        anomaly_fig = create_anomaly_summary_chart(executions)
+        st.plotly_chart(anomaly_fig, use_container_width=True)
+    # Raw data expander
+    with st.expander("View Raw Data"):
+        st.json(summary_json)
+if __name__ == "__main__":
+    main()