PyPI - validibot-shared - Versions diffs - 0.1.0__py3-none-any.whl - Mend

validibot-shared 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

validibot_shared/__init__.py +16 -0
validibot_shared/energyplus/__init__.py +25 -0
validibot_shared/energyplus/envelopes.py +300 -0
validibot_shared/energyplus/models.py +140 -0
validibot_shared/fmi/__init__.py +6 -0
validibot_shared/fmi/envelopes.py +190 -0
validibot_shared/fmi/models.py +70 -0
validibot_shared/py.typed +0 -0
validibot_shared/validations/__init__.py +0 -0
validibot_shared/validations/envelopes.py +607 -0
validibot_shared-0.1.0.dist-info/METADATA +168 -0
validibot_shared-0.1.0.dist-info/RECORD +15 -0
validibot_shared-0.1.0.dist-info/WHEEL +4 -0
validibot_shared-0.1.0.dist-info/licenses/LICENSE +21 -0
validibot_shared-0.1.0.dist-info/licenses/NOTICE +23 -0

validibot_shared/fmi/models.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""
+FMI probe result models.
+These models define the contract for FMU probing operations - extracting
+metadata from modelDescription.xml to populate validator catalog entries.
+Probing is done in-process in the Django worker (not in containers) since
+it's just XML parsing with no FMU execution.
+"""
+from __future__ import annotations
+from typing import Literal
+from pydantic import BaseModel, ConfigDict, Field
+class FMIVariableMeta(BaseModel):
+    """Metadata extracted from modelDescription.xml."""
+    model_config = ConfigDict(extra="forbid")
+    name: str
+    causality: str
+    variability: str | None = None
+    value_reference: int = 0
+    value_type: str
+    unit: str | None = None
+class FMIProbeResult(BaseModel):
+    """Result of a short probe run to vet an FMU before approval."""
+    model_config = ConfigDict(extra="forbid")
+    status: Literal["success", "error"] = "error"
+    variables: list[FMIVariableMeta] = Field(default_factory=list)
+    errors: list[str] = Field(default_factory=list)
+    messages: list[str] = Field(default_factory=list)
+    execution_seconds: float | None = None
+    @classmethod
+    def success(
+        cls,
+        *,
+        variables: list[FMIVariableMeta],
+        execution_seconds: float | None = None,
+        messages: list[str] | None = None,
+    ) -> FMIProbeResult:
+        return cls(
+            status="success",
+            variables=variables,
+            execution_seconds=execution_seconds,
+            messages=messages or [],
+            errors=[],
+        )
+    @classmethod
+    def failure(
+        cls,
+        *,
+        errors: list[str],
+        messages: list[str] | None = None,
+    ) -> FMIProbeResult:
+        return cls(
+            status="error",
+            variables=[],
+            errors=errors,
+            messages=messages or [],
+        )

validibot_shared/py.typed ADDED Viewed

File without changes

validibot_shared/validations/__init__.py ADDED Viewed

File without changes

validibot_shared/validations/envelopes.py ADDED Viewed

@@ -0,0 +1,607 @@
+"""
+Pydantic schemas for Advanced validator job execution envelopes.
+These schemas define the contract between Django and Advanced validator
+containers/services. Advanced validators are validations that run in separate
+containers or remote services (e.g., EnergyPlus, FMI), as opposed to Simple
+validators that run directly within Django.
+This library is used by both the Django app and Advanced validator services to ensure
+type safety and contract consistency across the network boundary.
+## Deployment Modes
+This module supports multiple deployment modes:
+1. **GCP deployment**: Uses gs:// URIs for Google Cloud Storage
+2. **Self-hosted Docker** (default): Uses file:// URIs for local filesystem
+The envelope schemas are storage-agnostic - they accept any valid URI. The
+actual storage handling is done by the validators' storage_client module.
+## Architecture Overview
+This module provides a type-safe interface for communication between the Django
+app and Advanced validator containers:
+**GCP Mode (async with callbacks):**
+1. Django creates input.json with files, config, callback URL
+2. Django uploads to GCS and triggers validator container
+3. Validator downloads inputs, runs validation, uploads outputs
+4. Validator POSTs minimal callback to Django when complete
+5. Django receives callback and loads full output.json from GCS
+**Self-hosted Mode (sync execution):**
+1. Django creates input.json with files, config
+2. Django uploads to local storage and runs Docker container synchronously
+3. Validator reads inputs, runs validation, writes outputs to local storage
+4. Docker container exits, Django reads output.json directly
+## Why Two Separate Fields: input_files vs inputs?
+- **input_files**: Generic list of GCS file URIs (IDF, EPW, FMU, XML, etc.)
+  - NEVER changes in subclasses - all validators need file inputs
+  - Role field distinguishes file purposes (e.g., 'primary-model' vs 'weather')
+- **inputs**: Domain-specific configuration parameters
+  - Base class uses dict[str, Any] for flexibility
+  - Subclasses override with typed Pydantic models (e.g., EnergyPlusInputs)
+  - Examples: timestep settings, output variables, simulation options
+This separation keeps file handling generic while allowing type-safe config.
+## Why Separate outputs Field?
+- **messages/metrics/artifacts**: Generic outputs (errors, warnings, values)
+  - Available on all validators for consistent reporting
+- **outputs**: Domain-specific detailed results
+  - Base class uses dict[str, Any] | None for flexibility
+  - Subclasses override with typed models (e.g., EnergyPlusOutputs
+    with returncode, logs, file paths)
+  - Optional because not all validators produce detailed domain-specific data
+## Subclassing Pattern
+Domain-specific validators (energyplus, fmi, xml) create typed subclasses:
+```python
+# In energyplus/envelopes.py
+class EnergyPlusInputs(BaseModel):
+    timestep_per_hour: int = 4
+    invocation_mode: Literal["python_api", "cli"] = "cli"
+    # ... other EnergyPlus-specific config
+class EnergyPlusInputEnvelope(ValidationInputEnvelope):
+    inputs: EnergyPlusInputs  # Override dict[str, Any] with typed version!
+class EnergyPlusOutputs(BaseModel):
+    energyplus_returncode: int
+    execution_seconds: float
+    # ... other EnergyPlus-specific results
+class EnergyPlusOutputEnvelope(ValidationOutputEnvelope):
+    outputs: EnergyPlusOutputs  # Override dict[str, Any] with typed version!
+```
+Django deserializes using the correct subclass based on validator.type.
+## Why ValidationCallback?
+The callback is a minimal async notification sent from the validator container
+back to the Django app when work completes. It contains only:
+- run_id: Which job finished
+- status: success/failed_validation/failed_runtime/cancelled
+- result_uri: Storage path to full output.json
+This avoids redundant data transfer since the full output.json is already in storage.
+The callback enables async execution (validator POSTs when done) instead of the
+Django app having to poll job status repeatedly.
+See docs/adr/2025-12-04-validator-job-interface.md in the validibot repository
+for the full specification.
+"""
+from __future__ import annotations
+from datetime import datetime  # noqa: TC003
+from enum import Enum
+from typing import Any, Literal
+from pydantic import BaseModel, Field, HttpUrl
+# ==============================================================================
+# Shared Enums
+# ==============================================================================
+class Severity(str, Enum):
+    """Severity level for validation messages."""
+    INFO = "INFO"
+    WARNING = "WARNING"
+    ERROR = "ERROR"
+class ValidatorType(str, Enum):
+    """
+    Canonical validator types used across Django and validator containers.
+    These values align with Django's ValidationType TextChoices and should be
+    used anywhere we serialize validator identifiers in envelopes.
+    """
+    BASIC = "BASIC"
+    JSON_SCHEMA = "JSON_SCHEMA"
+    XML_SCHEMA = "XML_SCHEMA"
+    ENERGYPLUS = "ENERGYPLUS"
+    FMI = "FMI"
+    CUSTOM_VALIDATOR = "CUSTOM_VALIDATOR"
+    AI_ASSIST = "AI_ASSIST"
+# ==============================================================================
+# Input Envelope (validibot.input.v1)
+# ==============================================================================
+class SupportedMimeType(str, Enum):
+    """
+    Supported MIME types for file inputs.
+    We only accept specific file types that our validators know how to process.
+    """
+    # XML documents
+    APPLICATION_XML = "application/xml"
+    TEXT_XML = "text/xml"
+    # EnergyPlus files
+    ENERGYPLUS_IDF = "application/vnd.energyplus.idf"  # IDF text format
+    ENERGYPLUS_EPJSON = "application/vnd.energyplus.epjson"  # epJSON format
+    ENERGYPLUS_EPW = "application/vnd.energyplus.epw"  # Weather data
+    # FMU files
+    FMU = "application/vnd.fmi.fmu"  # Functional Mock-up Unit
+class InputFileItem(BaseModel):
+    """
+    A file input for the validator.
+    Files are stored in GCS and referenced by URI. The 'role' field allows
+    validators to understand what each file is for (e.g., 'primary-model' vs
+    'weather' for EnergyPlus).
+    """
+    name: str = Field(description="Human-readable name of the file")
+    mime_type: SupportedMimeType = Field(description="MIME type of the file")
+    role: str | None = Field(
+        default=None,
+        description=(
+            "Validator-specific role (e.g., 'primary-model', 'weather', 'config')"
+        ),
+    )
+    uri: str = Field(
+        description="Storage URI to the file (gs:// or file:// for self-hosted)"
+    )
+    model_config = {"extra": "forbid"}
+class ValidatorInfo(BaseModel):
+    """
+    Information about the validator being executed.
+    This identifies which validator container to run and which version.
+    The 'type' field determines:
+    1. Which validator container to run (e.g., 'validibot-validator-energyplus')
+    2. Which envelope subclass Django uses for deserialization
+       (EnergyPlusInputEnvelope, FMIInputEnvelope, etc.)
+    This class appears in both input and output envelopes to maintain traceability
+    of which validator version produced which results.
+    """
+    id: str = Field(description="Validator UUID from Django database")
+    type: ValidatorType = Field(
+        description="Validator type (e.g., 'ENERGYPLUS', 'FMI', 'JSON_SCHEMA')"
+    )
+    version: str = Field(description="Validator version (e.g., '1.0.0')")
+    model_config = {"extra": "forbid"}
+class OrganizationInfo(BaseModel):
+    """
+    Information about the organization running the validation.
+    Included for logging, debugging, and future multi-tenancy features.
+    The name is redundant with ID but helpful for human-readable logs.
+    """
+    id: str = Field(description="Organization UUID from Django database")
+    name: str = Field(description="Organization name (for human-readable logs)")
+    model_config = {"extra": "forbid"}
+class WorkflowInfo(BaseModel):
+    """
+    Information about the workflow and step being executed.
+    Validators are executed as steps within larger workflows. This metadata
+    enables tracing validation results back to their workflow context for:
+    - Debugging (which workflow triggered this validation?)
+    - Auditing (track all validations in a workflow)
+    - UI display (show validation status within workflow visualization)
+    """
+    id: str = Field(description="Workflow UUID")
+    step_id: str = Field(description="Workflow step UUID")
+    step_name: str | None = Field(default=None, description="Human-readable step name")
+    model_config = {"extra": "forbid"}
+class ExecutionContext(BaseModel):
+    """
+    Execution context and callback information.
+    This provides the validator container with everything it needs to:
+    1. Download input files from storage (execution_bundle_uri)
+    2. Upload output files to storage (execution_bundle_uri)
+    3. Notify Django when complete (callback_url)
+    4. Respect timeout constraints (timeout_seconds)
+    """
+    callback_id: str | None = Field(
+        default=None,
+        description=(
+            "Unique identifier for this callback, used for idempotency. "
+            "Generated at job launch and echoed back in the callback payload. "
+            "The callback handler uses this to detect and ignore duplicate deliveries."
+        ),
+    )
+    callback_url: HttpUrl | None = Field(
+        default=None,
+        description="URL to POST callback when validation completes",
+    )
+    skip_callback: bool = Field(
+        default=False,
+        description=(
+            "If True, skip POSTing callback to callback_url after completion. "
+            "Useful for testing where polling GCS for output.json is preferred."
+        ),
+    )
+    execution_bundle_uri: str = Field(
+        description="Storage URI to the execution bundle directory (gs:// or file://)"
+    )
+    timeout_seconds: int = Field(
+        default=3600, description="Maximum execution time in seconds"
+    )
+    tags: list[str] = Field(default_factory=list, description="Execution tags")
+    model_config = {"extra": "forbid"}
+class ValidationInputEnvelope(BaseModel):
+    """
+    Base input envelope for validator jobs (validibot.input.v1).
+    This is written to storage as input.json by Django before triggering
+    the validator container.
+    ## How Subclassing Works
+    Domain-specific validators create subclasses that override the 'inputs' field
+    with a typed Pydantic model. The base class uses dict[str, Any] for flexibility,
+    but subclasses provide type safety:
+    ```python
+    class EnergyPlusInputEnvelope(ValidationInputEnvelope):
+        inputs: EnergyPlusInputs  # Typed override!
+    ```
+    This pattern allows:
+    - Generic base class for all validators (this class)
+    - Type-safe domain-specific subclasses (EnergyPlusInputEnvelope,
+      FMIInputEnvelope, etc.)
+    - Django to serialize/deserialize using the correct subclass based on
+      validator.type
+    ## Fields That Never Change in Subclasses
+    - input_files: All validators receive files the same way
+    - context: All validators use the same execution context
+    - validator/org/workflow: All validators need this metadata
+    ## Fields That Subclasses Override
+    - inputs: Domain-specific configuration (dict[str, Any] → EnergyPlusInputs, etc.)
+    """
+    schema_version: Literal["validibot.input.v1"] = "validibot.input.v1"
+    run_id: str = Field(description="Unique run identifier (UUID)")
+    validator: ValidatorInfo
+    org: OrganizationInfo
+    workflow: WorkflowInfo
+    input_files: list[InputFileItem] = Field(
+        default_factory=list,
+        description="File inputs for the validator (GCS URIs with roles)",
+    )
+    inputs: dict[str, Any] = Field(
+        default_factory=dict,
+        description=("Domain-specific inputs (subclasses override with typed model)"),
+    )
+    context: ExecutionContext
+    model_config = {"extra": "forbid"}
+# ==============================================================================
+# Output Envelope (validibot.output.v1)
+# ==============================================================================
+class ValidationStatus(str, Enum):
+    """Validation output status."""
+    SUCCESS = "success"  # Validation completed successfully, no errors
+    FAILED_VALIDATION = "failed_validation"  # Validation found errors (user's fault)
+    FAILED_RUNTIME = "failed_runtime"  # Runtime error in validator (system fault)
+    CANCELLED = "cancelled"  # User or system cancelled the job
+class MessageLocation(BaseModel):
+    """Location information for a validation message."""
+    file_role: str | None = Field(
+        default=None, description="Input file role (references InputFileItem.role)"
+    )
+    line: int | None = Field(default=None, description="Line number")
+    column: int | None = Field(default=None, description="Column number")
+    path: str | None = Field(
+        default=None, description="Object path or XPath-like identifier"
+    )
+    model_config = {"extra": "forbid"}
+class ValidationMessage(BaseModel):
+    """A validation finding, warning, or error."""
+    severity: Severity
+    code: str | None = Field(
+        default=None, description="Error code (e.g., 'EP001', 'FMU_INIT_ERROR')"
+    )
+    text: str = Field(description="Human-readable message")
+    location: MessageLocation | None = Field(
+        default=None, description="Location of the issue"
+    )
+    tags: list[str] = Field(default_factory=list, description="Message tags/categories")
+    model_config = {"extra": "forbid"}
+class ValidationMetric(BaseModel):
+    """A computed metric from the validation/simulation."""
+    name: str = Field(description="Metric name (e.g., 'zone_temp_max')")
+    value: float | int | str = Field(description="Metric value")
+    unit: str | None = Field(default=None, description="Unit (e.g., 'C', 'kWh', 'm2')")
+    category: str | None = Field(
+        default=None, description="Category (e.g., 'comfort', 'energy', 'performance')"
+    )
+    tags: list[str] = Field(default_factory=list, description="Metric tags")
+    model_config = {"extra": "forbid"}
+class ValidationArtifact(BaseModel):
+    """A file artifact produced by the validator."""
+    name: str = Field(description="Artifact name")
+    type: str = Field(
+        description=(
+            "Artifact type (e.g., 'simulation-db', 'report-html', 'timeseries-csv')"
+        )
+    )
+    mime_type: str | None = Field(
+        default=None, description="MIME type (e.g., 'application/x-sqlite3')"
+    )
+    uri: str = Field(
+        description="Storage URI to the artifact (gs:// or file:// for self-hosted)"
+    )
+    size_bytes: int | None = Field(default=None, description="File size in bytes")
+    model_config = {"extra": "forbid"}
+class RawOutputs(BaseModel):
+    """Information about raw output files."""
+    format: Literal["directory", "archive"] = Field(
+        description="Format of raw outputs (directory or archive)"
+    )
+    manifest_uri: str = Field(
+        description="Storage URI to the manifest file (gs:// or file://)"
+    )
+    model_config = {"extra": "forbid"}
+class ValidationTiming(BaseModel):
+    """Timing information for the validation run."""
+    queued_at: datetime | None = Field(
+        default=None, description="When the job was queued (ISO8601)"
+    )
+    started_at: datetime | None = Field(
+        default=None, description="When execution started (ISO8601)"
+    )
+    finished_at: datetime | None = Field(
+        default=None, description="When execution finished (ISO8601)"
+    )
+    model_config = {"extra": "forbid"}
+class ValidationOutputEnvelope(BaseModel):
+    """
+    Base output envelope for validator jobs (validibot.output.v1).
+    This is written to storage as output.json by the validator container
+    after completion.
+    ## How Subclassing Works
+    Domain-specific validators create subclasses that override the 'outputs' field
+    with a typed Pydantic model containing detailed domain-specific results:
+    ```python
+    class EnergyPlusOutputEnvelope(ValidationOutputEnvelope):
+        outputs: EnergyPlusOutputs  # Typed override!
+    ```
+    This pattern allows:
+    - Generic base class for all validators (this class)
+    - Type-safe domain-specific subclasses (EnergyPlusOutputEnvelope,
+      FMIOutputEnvelope, etc.)
+    - Django to deserialize using the correct subclass based on validator.type
+    ## Generic vs Domain-Specific Outputs
+    All validators populate these **generic** fields:
+    - status: SUCCESS/FAILED_VALIDATION/FAILED_RUNTIME/CANCELLED
+    - messages: Errors, warnings, info (consistent across all validators)
+    - metrics: Computed values like energy use, temperature ranges
+    - artifacts: GCS URIs to output files (SQL, CSV reports, HTML visualizations)
+    - timing: When the job was queued/started/finished
+    Some validators also populate **domain-specific** outputs:
+    - outputs: Detailed execution data (returncode, logs, file paths, etc.)
+    - Optional because not all validators produce this level of detail
+    ## Why Both metrics and outputs?
+    - **metrics**: High-level computed values for UI display
+      (energy use, peak temperature, etc.)
+      - Always a flat list of name/value/unit tuples
+      - Consistent format across all validators
+      - Used for dashboards, comparisons, search
+    - **outputs**: Detailed domain-specific execution data
+      - Nested structure with validator-specific fields
+      - Examples: EnergyPlus returncode, FMU logs, XML schema validation
+      - Used for debugging, detailed analysis, reproducing results
+    ## Why raw_outputs?
+    Some validators produce many output files (EnergyPlus can create dozens).
+    Instead of creating a ValidationArtifact for each one, we:
+    1. Upload all files to GCS
+    2. Create a manifest.json listing all files
+    3. Set raw_outputs.manifest_uri to point to the manifest
+    This keeps the envelope small while preserving access to all files.
+    """
+    schema_version: Literal["validibot.output.v1"] = "validibot.output.v1"
+    run_id: str = Field(description="Unique run identifier (matches input)")
+    validator: ValidatorInfo
+    status: ValidationStatus
+    timing: ValidationTiming
+    messages: list[ValidationMessage] = Field(
+        default_factory=list, description="Validation findings, warnings, errors"
+    )
+    metrics: list[ValidationMetric] = Field(
+        default_factory=list, description="Computed metrics from the validation"
+    )
+    artifacts: list[ValidationArtifact] = Field(
+        default_factory=list, description="Output files produced by the validator"
+    )
+    raw_outputs: RawOutputs | None = Field(
+        default=None, description="Raw output files information"
+    )
+    outputs: dict[str, Any] | None = Field(
+        default=None,
+        description=("Domain-specific outputs (subclasses override with typed model)"),
+    )
+    model_config = {"extra": "forbid"}
+# ==============================================================================
+# Callback Payload
+# ==============================================================================
+class ValidationCallback(BaseModel):
+    """
+    Callback payload POSTed from validator container to Django.
+    Minimal payload to avoid duplication - Django loads the full output.json
+    from storage after receiving this callback.
+    """
+    run_id: str = Field(description="Run identifier")
+    callback_id: str | None = Field(
+        default=None,
+        description=(
+            "Idempotency key echoed from the input envelope's context.callback_id. "
+            "Used by the callback handler to detect and ignore duplicate deliveries."
+        ),
+    )
+    status: ValidationStatus
+    result_uri: str = Field(
+        description="Storage URI to output.json (gs:// or file:// for self-hosted)"
+    )
+    model_config = {"extra": "forbid"}