PyPI - wt-runner - Versions diffs - 0.1.3__py3-none-any.whl - Mend

wt-runner 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

wt_runner/__init__.py +36 -0
wt_runner/_version.py +34 -0
wt_runner/app.py +758 -0
wt_runner/py.typed +0 -0
wt_runner/testing.py +140 -0
wt_runner/tracing.py +164 -0
wt_runner-0.1.3.dist-info/METADATA +25 -0
wt_runner-0.1.3.dist-info/RECORD +10 -0
wt_runner-0.1.3.dist-info/WHEEL +5 -0
wt_runner-0.1.3.dist-info/top_level.txt +1 -0

wt_runner/__init__.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""wt-runner: FastAPI application for workflow execution.
+This package provides a FastAPI web service for executing workflows using
+wt-invokers. It includes endpoints for:
+- Running workflows with various configurations
+- Processing Pub/Sub messages
+- Retrieving workflow metadata and schemas
+- Converting between parameter formats
+"""
+from wt_runner.app import app
+from wt_runner.testing import Case, CaseRunner
+from wt_runner.tracing import (
+    TraceContextHeaders,
+    attach_context,
+    build_context_headers,
+    configure_tracer,
+)
+try:
+    from wt_runner._version import __version__, __version_tuple__
+except ImportError:
+    __version__ = "unknown"
+    __version_tuple__ = (0, 0, 0)
+__all__ = [
+    "app",
+    "Case",
+    "CaseRunner",
+    "configure_tracer",
+    "attach_context",
+    "build_context_headers",
+    "TraceContextHeaders",
+    "__version__",
+    "__version_tuple__",
+]

wt_runner/_version.py ADDED Viewed

@@ -0,0 +1,34 @@
+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '0.1.3'
+__version_tuple__ = version_tuple = (0, 1, 3)
+__commit_id__ = commit_id = None

wt_runner/app.py ADDED Viewed

@@ -0,0 +1,758 @@
+"""FastAPI application for workflow execution."""
+import base64
+import binascii
+import json
+import logging
+import os
+import traceback
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from dataclasses import asdict, dataclass
+from importlib.metadata import PackageNotFoundError, version
+from io import StringIO
+from pathlib import Path
+from typing import Any, Literal
+from urllib.parse import urlparse
+import ruamel.yaml
+from fastapi import (
+    Depends,
+    FastAPI,
+    Header,
+    HTTPException,
+    Query,
+    Request,
+    Response,
+    status,
+)
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.gzip import GZipMiddleware
+from fastapi.responses import JSONResponse
+from opentelemetry import trace as otel_trace
+from pydantic import BaseModel, Field, SecretStr
+from rattler import MatchSpec
+from wt_invokers import (
+    AbstractInvoker,
+    CloudBatchInvoker,
+    LocalSubprocessInvoker,
+)
+from wt_runner.tracing import (
+    TraceContextHeaders,
+    attach_context,
+    build_context_headers,
+    configure_tracer,
+    make_otel_console_exporter_file_dst_kws,
+)
+# Optional imports for ecoscope integration
+try:
+    from ecoscope_eda_core.messages.commands import (  # type: ignore[import-untyped,import-not-found,unused-ignore]
+        InvokerType as EcoscopeInvokerType,
+    )
+    from ecoscope_eda_core.messages.commands import (
+        RunWorkflow,
+        RunWorkflowParams,
+    )
+    from ecoscope_eda_core.workflows import (  # type: ignore[import-untyped,import-not-found,unused-ignore]
+        get_results_json as ecoscope_get_results_json,
+    )
+    HAS_ECOSCOPE = True
+    InvokerType = EcoscopeInvokerType
+except ImportError:
+    HAS_ECOSCOPE = False
+    # Define InvokerType locally when ecoscope_eda_core is not available
+    InvokerType = Literal[
+        "BlockingLocalSubprocessInvoker",
+        "AsyncLocalSubprocessInvoker",
+        "CloudBatchInvoker",
+    ]
+    RunWorkflow = None
+    RunWorkflowParams = None
+    ecoscope_get_results_json = None
+import obstore
+# Invoker registry mapping invoker names to classes
+INVOKERS: dict[str, type[AbstractInvoker]] = {
+    "BlockingLocalSubprocessInvoker": LocalSubprocessInvoker,
+    "AsyncLocalSubprocessInvoker": LocalSubprocessInvoker,
+    "CloudBatchInvoker": CloudBatchInvoker,
+}
+TITLE = "wt-runner"
+TIMEOUT_EXPIRED_ERROR_MSG = (
+    "The workflow timed out. Consider reducing the amount of data being processed."
+)
+PUBSUB_ACK_MAX_TIMEOUT = 570  # seconds
+async def get_results_json(results_url: str) -> dict[str, Any]:
+    """Get workflow results from results URL.
+    Args:
+        results_url: URL or path to results
+    Returns:
+        Results dictionary
+    Raises:
+        RuntimeError: If results cannot be retrieved
+    """
+    if HAS_ECOSCOPE and ecoscope_get_results_json is not None:
+        result: dict[str, Any] = await ecoscope_get_results_json(results_url)
+        return result
+    # Fallback: use obstore directly
+    store = obstore.store.from_url(results_url)
+    get_result = await store.get_async("result.json")
+    result_bytes = bytes(await get_result.bytes_async())
+    result_json: dict[str, Any] = json.loads(result_bytes)
+    return result_json
+def get_otel_exporter() -> Literal["console", "gcp"] | None:
+    """Get OpenTelemetry exporter type from environment.
+    Returns:
+        Exporter type or None
+    """
+    value = os.environ.get("ECOSCOPE_WORKFLOWS_OTEL_EXPORTER")
+    if value == "console":
+        return "console"
+    if value == "gcp":
+        return "gcp"
+    return None
+def get_otel_console_exporter_dst() -> Literal["stdout", "file"]:
+    """Get console exporter destination from environment.
+    Returns:
+        Destination type (stdout or file)
+    """
+    value = os.environ.get("ECOSCOPE_WORKFLOWS_OTEL_CONSOLE_EXPORTER_DST", "file")
+    if value == "stdout":
+        return "stdout"
+    return "file"
+def get_otel_console_exporter_file_dst_target_dir() -> str | None:
+    """Get console exporter file destination directory from environment.
+    Returns:
+        Target directory path or None
+    """
+    return os.environ.get("ECOSCOPE_WORKFLOWS_OTEL_CONSOLE_EXPORTER_FILE_DST_TARGET_DIR")
+@dataclass
+class SpanAttributes:
+    """Attributes for tracing spans."""
+    workflow_run_id: str
+    invoker_type: str
+@asynccontextmanager
+async def lifespan(app: FastAPI) -> AsyncIterator[None]:
+    """FastAPI lifespan context manager for startup/shutdown.
+    Configures OpenTelemetry tracer on startup.
+    Args:
+        app: FastAPI application instance
+    """
+    # on app startup
+    otel_exporter_kws: dict[str, Any] = {}
+    otel_exporter = get_otel_exporter()
+    if otel_exporter == "console" and get_otel_console_exporter_dst() == "file":
+        if not (file_dst_target_dir := get_otel_console_exporter_file_dst_target_dir()):
+            raise RuntimeError(
+                "If OTEL_EXPORTER is 'console' with the destination as 'file', "
+                "then OTEL_CONSOLE_EXPORTER_FILE_DST_TARGET_DIR must be set via the "
+                "env var 'ECOSCOPE_WORKFLOWS_OTEL_CONSOLE_EXPORTER_FILE_DST_TARGET_DIR'."
+            )
+        otel_exporter_kws |= make_otel_console_exporter_file_dst_kws(Path(file_dst_target_dir))
+    configure_tracer(
+        name=app.title,
+        version=app.version,
+        exporter=otel_exporter,
+        exporter_kws=otel_exporter_kws,
+    )
+    yield
+    # on app shutdown
+try:
+    _version = version(TITLE)
+except PackageNotFoundError:
+    _version = "unknown"
+app = FastAPI(title=TITLE, version=_version, lifespan=lifespan)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["POST"],
+    allow_headers=["*"],
+)
+app.add_middleware(GZipMiddleware, minimum_size=1000)
+class Lithops(BaseModel):
+    """Lithops configuration."""
+    backend: Literal["localhost", "gcp_cloudrun"] = "localhost"
+    storage: Literal["localhost", "gcp_storage"] = "localhost"
+    log_level: str = "DEBUG"
+    data_limit: int = 256
+class GCP(BaseModel):
+    """Google Cloud Platform configuration."""
+    region: str = "us-central1"
+    credentials_path: str = "placeholder"  # os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
+class GCPCloudRun(BaseModel):
+    """Google Cloud Run configuration."""
+    runtime: str = "placeholder"  # os.environ["LITHOPS_GCP_CLOUDRUN_RUNTIME"]
+    runtime_cpu: int = 2
+    runtime_memory: int = 1000
+class LithopsConfig(BaseModel):
+    """Complete Lithops configuration."""
+    lithops: Lithops = Field(default_factory=Lithops)
+    gcp: GCP | None = None
+    gcp_cloudrun: GCPCloudRun | None = None
+class ResponseModel(BaseModel):
+    """Standard response model for workflow execution."""
+    result: dict[str, Any] | None = None
+    error: str | None = None
+    trace: str | None = None
+@app.get("/", status_code=200)
+def health_check() -> dict[str, str]:
+    """Health check endpoint.
+    Returns:
+        Status dictionary
+    """
+    return {"status": "ok"}
+def resolve_matchspec(
+    matchspec: str | None = Query(None, description="Matchspec for the workflow."),
+) -> MatchSpec:
+    """Get the matchspec for the workflow.
+    Args:
+        matchspec: Rattler matchspec string
+    Returns:
+        Parsed MatchSpec object
+    Raises:
+        ValueError: If matchspec is not provided
+    """
+    matchspec_override = os.environ.get("ECOSCOPE_WORKFLOWS_MATCHSPEC_OVERRIDE")
+    matchspec_str = matchspec_override or matchspec
+    if not matchspec_str:
+        raise ValueError("Query param `matchspec` is required.")
+    return MatchSpec(matchspec_str)
+async def resolve_invoker(
+    invoker_type: str = Query("BlockingLocalSubprocessInvoker"),
+    matchspec: MatchSpec = Depends(resolve_matchspec),
+) -> AbstractInvoker:
+    """Resolves the invoker name to the corresponding invoker class.
+    Args:
+        invoker_type: Type of invoker to use
+        matchspec: Workflow matchspec
+    Returns:
+        Configured and installed invoker instance
+    Raises:
+        ValueError: If unknown invoker type specified
+    """
+    if invoker_type not in INVOKERS:
+        raise ValueError(f"Unknown invoker name: {invoker_type}")
+    invoker = INVOKERS[invoker_type](matchspec=matchspec)
+    is_installed = await invoker.is_installed()
+    if not is_installed:
+        await invoker.install()
+    return invoker
+def resolve_results_url(
+    results_url: str = Query(..., description="Results URL for the workflow."),
+) -> str:
+    """Get the results URL for the workflow.
+    Args:
+        results_url: URL or local path for results
+    Returns:
+        Normalized results URL
+    Raises:
+        ValueError: If URL is invalid
+    """
+    if not urlparse(results_url).scheme:
+        p = Path(results_url)
+        if not p.is_absolute():
+            raise ValueError("Results URL must be an absolute local path or a URL with scheme.")
+        return p.as_uri()
+    return results_url
+@app.post("/", status_code=200, response_model=ResponseModel)
+async def run(
+    # service response
+    response: Response,
+    # user (http) inputs
+    params: dict[str, Any],
+    execution_mode: Literal["async", "sequential"],
+    mock_io: bool,
+    results_url: str = Depends(resolve_results_url),
+    data_connections_env_vars: dict[str, SecretStr] | None = None,
+    lithops_config: LithopsConfig | None = None,
+    invoker: AbstractInvoker = Depends(resolve_invoker),
+    workflow_run_id: str = Query("", description="Unique ID for the workflow run."),
+    timeout: float | None = Query(
+        None,
+        description="Timeout for the workflow in seconds. Defaults to null; i.e., no timeout.",
+    ),
+    docker_image_uri: str | None = Query(None, description="Docker image URI for the workflow."),
+    traceparent: str | None = Header(
+        None,
+        description="Traceparent header; Cf. https://www.w3.org/TR/trace-context/.",
+    ),
+    tracestate: str | None = Header(
+        None, description="Tracestate header; Cf. https://www.w3.org/TR/trace-context/."
+    ),
+) -> dict[str, Any] | JSONResponse:
+    """Run a workflow with the specified parameters.
+    Args:
+        response: FastAPI response object
+        params: Workflow parameters
+        execution_mode: Execution mode (async or sequential)
+        mock_io: Whether to mock I/O operations
+        results_url: URL for storing results
+        data_connections_env_vars: Environment variables for data connections
+        lithops_config: Lithops configuration for async execution
+        invoker: Workflow invoker instance
+        workflow_run_id: Unique run identifier
+        timeout: Timeout in seconds
+        docker_image_uri: Docker image URI
+        traceparent: W3C traceparent header
+        tracestate: W3C tracestate header
+    Returns:
+        Workflow execution result
+    """
+    tracer = otel_trace.get_tracer(__name__)
+    if traceparent:
+        attach_context(traceparent, tracestate)
+    span_attributes = SpanAttributes(
+        workflow_run_id=workflow_run_id,
+        invoker_type=type(invoker).__name__,
+    )
+    with tracer.start_as_current_span(
+        "run-endpoint",
+        attributes=asdict(span_attributes),
+    ):
+        yaml = ruamel.yaml.YAML(typ="safe")
+        extra_env: dict[str, str] = {}
+        if data_connections_env_vars:
+            extra_env |= {k: v.get_secret_value() for k, v in data_connections_env_vars.items()}
+        trace_context = build_context_headers()
+        for k, v in trace_context.items():
+            extra_env[k.upper()] = str(v)
+        config_text_stream = StringIO()
+        yaml.dump(params, config_text_stream)
+        lithops_kws = {}
+        if execution_mode == "async":
+            lithops_config = LithopsConfig() if not lithops_config else lithops_config
+            lithops_text_stream = StringIO()
+            yaml.dump(lithops_config.model_dump(), lithops_text_stream)
+            lithops_kws = {"lithops_config_text": lithops_text_stream.getvalue()}
+        try:
+            await invoker.run(
+                workflow_run_id=workflow_run_id,
+                config_text=config_text_stream.getvalue(),
+                results_url=results_url,
+                execution_mode=execution_mode,
+                mock_io=mock_io,
+                extra_env=extra_env,
+                otel_exporter=get_otel_exporter(),
+                otel_console_exporter_dst=get_otel_console_exporter_dst(),
+                **lithops_kws,
+                docker_image_uri=docker_image_uri,
+            )
+            if invoker.is_waitable:
+                await invoker.wait(timeout=timeout, error_msg=TIMEOUT_EXPIRED_ERROR_MSG)
+                result = await get_results_json(results_url)
+            else:
+                result = {"result": {}, "error": None, "trace": None}
+                return JSONResponse(content=result, status_code=status.HTTP_202_ACCEPTED)
+        except Exception as e:
+            trace = traceback.format_exc()
+            result = {"error": str(e), "trace": trace}
+        if not isinstance(result, dict):
+            raise RuntimeError(f"Unexpected {result = }. Expected dict.")
+        if result.get("result") is None and result.get("error") is not None:
+            response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
+        return result
+@app.post(
+    "/run-from-pubsub",
+    summary="Processes RunWorkflow messages from Pub/Sub",
+    status_code=200,
+)
+async def run_from_pubsub(
+    request: Request,
+) -> dict[str, Any]:
+    """Process RunWorkflow messages from Google Cloud Pub/Sub.
+    Note: Requires ecoscope_eda_core to be installed.
+    Args:
+        request: FastAPI request object containing Pub/Sub message
+    Returns:
+        Status dictionary
+    Raises:
+        HTTPException: If ecoscope_eda_core is not available
+    """
+    if not HAS_ECOSCOPE:
+        raise HTTPException(
+            status_code=501,
+            detail="Pub/Sub endpoint requires ecoscope_eda_core to be installed",
+        )
+    try:  # Extract the payload from the PubSub message
+        command_payload = await extract_payload_from_pubsub_request(request)
+        invoker_params, trace_context = prepare_invoker_parameters(command_payload)
+    except (binascii.Error, json.JSONDecodeError, ValueError) as e:
+        # handle invalid payload errors to avoid 500 errors,
+        # since it doesn't make sense to let GCP retry those
+        trace = traceback.format_exc()
+        error_msg = f"Error extracting data from PubSub message: {type(e).__name__}: {e}"
+        logging.exception(error_msg)
+        return {
+            "status": "error",
+            "error": error_msg,
+            "trace": trace,
+        }  # Error details are returned for local debugging
+    tracer = otel_trace.get_tracer(__name__)
+    if trace_context and (traceparent := trace_context.get("traceparent")) is not None:
+        attach_context(traceparent, tracestate=trace_context.get("tracestate"))
+    span_attributes = SpanAttributes(
+        workflow_run_id=invoker_params.get("workflow_run_id", ""),
+        invoker_type=command_payload.invoker_type,
+    )
+    with tracer.start_as_current_span(
+        "run-from-pubsub-endpoint",
+        attributes=asdict(span_attributes),
+    ):
+        trace_context = build_context_headers()
+        invoker_params["extra_env"] |= {k.upper(): v for k, v in trace_context.items()}
+        invoker_params["otel_exporter"] = get_otel_exporter()
+        invoker_params["otel_console_exporter_dst"] = get_otel_console_exporter_dst()
+        try:  # Resolve the invoker
+            match_spec_obj = resolve_matchspec(matchspec=command_payload.match_spec)
+            invoker = await resolve_invoker(
+                invoker_type=command_payload.invoker_type, matchspec=match_spec_obj
+            )
+        except ValueError as e:
+            trace = traceback.format_exc()
+            error = {"error": str(e), "trace": trace}
+            await upload_error_to_gcs(
+                error_details=error, results_url=invoker_params["results_url"]
+            )
+            return {"status": "error", **error}
+        try:
+            await invoker.run(**invoker_params)
+            if invoker.is_waitable:
+                timeout = command_payload.invoker_kwargs.get("timeout", PUBSUB_ACK_MAX_TIMEOUT)
+                # Maximum timeout when running from PubSub is 10 minutes.
+                # It's set to a little bit less to have time to cancel and handle the error
+                timeout = min(timeout, max(timeout, PUBSUB_ACK_MAX_TIMEOUT))
+                exit_code = await invoker.wait(timeout=timeout, error_msg=TIMEOUT_EXPIRED_ERROR_MSG)
+                if exit_code != 0:
+                    raise RuntimeError(f"Workflow invoker failed with exit code {exit_code}.")
+        except Exception as e:
+            trace = traceback.format_exc()
+            error = {"error": f"{type(e).__name__}: {e}", "trace": trace}
+            await upload_error_to_gcs(
+                error_details=error, results_url=invoker_params["results_url"]
+            )
+            return {"status": "error", **error}
+        return {"status": "processed"}
+async def extract_payload_from_pubsub_request(
+    request: Request,
+) -> RunWorkflowParams:
+    """Extract the payload from the PubSub request.
+    Args:
+        request: FastAPI request object
+    Returns:
+        Parsed workflow parameters
+    Raises:
+        json.JSONDecodeError: If JSON is invalid
+        base64.binascii.Error: If base64 decoding fails
+    """
+    request_data = await request.json()
+    message = request_data.get("message", {})
+    payload = base64.b64decode(message.get("data", "{}").encode("utf-8"))
+    json_payload = json.loads(payload)
+    command = RunWorkflow.model_validate(json_payload)
+    return command.payload
+def prepare_invoker_parameters(
+    command_payload: RunWorkflowParams,
+) -> tuple[dict[str, Any], TraceContextHeaders]:
+    """Prepare parameters for the invoker from the command payload.
+    Args:
+        command_payload: Workflow parameters from Pub/Sub message
+    Returns:
+        Tuple of (invoker_params, trace_context)
+    """
+    invoker_kwargs = command_payload.invoker_kwargs
+    workflow_run_id = invoker_kwargs.pop("workflow_run_id", "")
+    results_url = invoker_kwargs.pop("results_url", None)
+    params = invoker_kwargs.pop("params", {})
+    data_connections_env_vars = invoker_kwargs.pop("data_connections_env_vars", {})
+    # at minimum, should contain `traceparent`, optionally `tracestate`
+    trace_context = invoker_kwargs.pop("trace_context", None)
+    execution_mode = invoker_kwargs.pop("execution_mode", "sequential")
+    mock_io = invoker_kwargs.pop("mock_io", False)
+    # Build extra params needed for the invoker
+    yaml = ruamel.yaml.YAML(typ="safe")
+    config_text_stream = StringIO()
+    yaml.dump(params, config_text_stream)
+    lithops_kws = {}
+    if execution_mode == "async":
+        lithops_config = LithopsConfig()
+        lithops_text_stream = StringIO()
+        yaml.dump(lithops_config.model_dump(), lithops_text_stream)
+        lithops_kws = {"lithops_config_text": lithops_text_stream.getvalue()}
+    return (
+        {
+            "workflow_run_id": workflow_run_id,
+            "config_text": config_text_stream.getvalue(),
+            "results_url": results_url,
+            "execution_mode": execution_mode,
+            "mock_io": mock_io,
+            "extra_env": data_connections_env_vars,
+        }
+        | lithops_kws
+        | invoker_kwargs,
+        trace_context,
+    )  # Extra kwargs are passed to the invoker
+async def upload_error_to_gcs(error_details: dict[str, Any], results_url: str) -> None:
+    """Upload error details to Google Cloud Storage.
+    Args:
+        error_details: Error information dictionary
+        results_url: URL for storing results
+    """
+    # Save error in result.json and upload to GCS
+    result_store = obstore.store.from_url(results_url)
+    result_bytes = json.dumps(error_details).encode("utf-8")
+    await result_store.put_async("result.json", result_bytes)
+async def _get_metadata_attribute(
+    attr: str,
+    invoker: AbstractInvoker,
+) -> dict[str, Any]:
+    """Get a metadata attribute for the workflow.
+    Args:
+        attr: Attribute name to retrieve
+        invoker: Invoker instance
+    Returns:
+        Metadata as dictionary
+    Raises:
+        RuntimeError: If attribute retrieval or parsing fails
+    """
+    out = await invoker.check_output(f"get {attr}".split())
+    if not out:
+        raise RuntimeError(f"Failed to get {attr}.")
+    try:
+        as_json: dict[str, Any] = json.loads(out)
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Failed to parse rjsf from str: {out}") from e
+    return as_json
+@app.get("/rjsf", status_code=200)
+async def rjsf(invoker: AbstractInvoker = Depends(resolve_invoker)) -> dict[str, Any]:
+    """Get the React JSON Schema Form schema for the workflow.
+    Args:
+        invoker: Invoker instance
+    Returns:
+        RJSF schema dictionary
+    """
+    return await _get_metadata_attribute("rjsf", invoker)
+@app.get("/data-connection-property-names", status_code=200)
+async def data_connection_property_names(
+    invoker: AbstractInvoker = Depends(resolve_invoker),
+) -> dict[str, Any]:
+    """Get the data connection property names for the workflow.
+    Args:
+        invoker: Invoker instance
+    Returns:
+        Data connection property names
+    """
+    return await _get_metadata_attribute("data-connection-property-names", invoker)
+async def _convert(
+    from_: str,
+    to: str,
+    json_: str,
+    invoker: AbstractInvoker,
+) -> dict[str, Any] | list[dict[str, Any]]:
+    """Convert between params and formdata, and visa-versa.
+    Args:
+        from_: Source format
+        to: Target format
+        json_: JSON string to convert
+        invoker: Invoker instance
+    Returns:
+        Converted data as dictionary, or list of dicts for validation errors
+    Raises:
+        RuntimeError: If conversion or parsing fails
+    """
+    cmd = f"convert --from {from_} --to {to}"
+    out = await invoker.check_output(cmd.split(), stdin=json_)
+    if not out:
+        raise RuntimeError(f"Failed to convert {from_} to {to} for '{json_}'.")
+    try:
+        as_json: dict[str, Any] | list[dict[str, Any]] = json.loads(out)
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Failed to parse rjsf from str: {out}") from e
+    return as_json
+def _is_422(json_: dict[str, Any] | list[dict[str, Any]]) -> bool:
+    """Check if the json is a 422 validation error.
+    Args:
+        json_: JSON data to check
+    Returns:
+        True if data represents a 422 error
+    """
+    return (
+        isinstance(json_, list)
+        and len(json_) > 0
+        and all(isinstance(e, dict) for e in json_)
+        and all(set(e) == {"type", "loc", "msg", "input", "url"} for e in json_)
+    )
+@app.post("/formdata-to-params", status_code=200)
+async def validate_formdata(
+    formdata: dict[str, Any], invoker: AbstractInvoker = Depends(resolve_invoker)
+) -> dict[str, Any]:
+    """Convert and validate form data to workflow parameters.
+    Args:
+        formdata: Form data dictionary
+        invoker: Invoker instance
+    Returns:
+        Validated parameters dictionary
+    Raises:
+        HTTPException: If validation fails (422 error)
+    """
+    outjson = await _convert(
+        from_="formdata",
+        to="params",
+        json_=json.dumps(formdata),
+        invoker=invoker,
+    )
+    if _is_422(outjson):
+        raise HTTPException(status_code=422, detail=outjson)
+    # At this point, outjson is not a 422 error list, so it's a dict
+    assert isinstance(outjson, dict)
+    return outjson
+@app.post("/params-to-formdata", status_code=200)
+async def generate_nested_params(
+    params: dict[str, Any], invoker: AbstractInvoker = Depends(resolve_invoker)
+) -> dict[str, Any]:
+    """Convert workflow parameters to form data format.
+    Args:
+        params: Parameters dictionary
+        invoker: Invoker instance
+    Returns:
+        Form data dictionary
+    Raises:
+        HTTPException: If conversion fails (422 error)
+    """
+    outjson = await _convert(
+        from_="params",
+        to="formdata",
+        json_=json.dumps(params),
+        invoker=invoker,
+    )
+    if _is_422(outjson):
+        raise HTTPException(status_code=422, detail=outjson)
+    # At this point, outjson is not a 422 error list, so it's a dict
+    assert isinstance(outjson, dict)
+    return outjson

wt_runner/py.typed ADDED Viewed

File without changes

wt_runner/testing.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""Testing utilities for workflow test cases.
+Provides Case (Pydantic model) and CaseRunner (dataclass) for running
+workflow test cases via either the FastAPI application or CLI.
+"""
+import asyncio
+import os
+import traceback
+import uuid
+from dataclasses import dataclass
+from io import StringIO
+from pathlib import Path
+from typing import Any, Literal
+import ruamel.yaml
+from fastapi.testclient import TestClient
+from pydantic import BaseModel
+from rattler import MatchSpec
+from wt_invokers.local import LocalSubprocessInvoker
+from .app import get_results_json
+from .tracing import OTelConsoleExporterDst, OtelExporterChoice
+class Case(BaseModel):
+    """A test case for a workflow.
+    Args:
+        name: Human-readable name of the test case.
+        description: Description of what the test case covers.
+        params: Workflow parameters to pass.
+        raises: Whether the test case is expected to raise an error.
+        expected_status_code: Expected HTTP status code (default 200).
+    """
+    name: str
+    description: str
+    params: dict[str, Any]
+    raises: bool = False
+    expected_status_code: int = 200
+ExecutionMode = Literal["async", "sequential"]  # TODO: move to executors module
+@dataclass
+class CaseRunner:
+    """Run a single test case for a workflow via either the FastAPI application or CLI.
+    Args:
+        execution_mode: The execution mode to test. One of "async" or "sequential".
+        mock_io: Whether or not to mock IO with 3rd party services.
+        case: The test case to run. Test cases are defined by the `test-cases.yaml` file.
+        results_subdir: The temporary directory to use for the test.
+        traceparent: The traceparent header to propagate tracing context. Optional.
+        otel_exporter: The OpenTelemetry exporter to use. Optional. One of "console", or "gcp".
+        otel_console_exporter_dst: The destination for the console exporter.
+            One of "stdout" or "file".
+    """
+    execution_mode: ExecutionMode
+    mock_io: bool
+    case: Case
+    results_subdir: Path
+    traceparent: str | None = None
+    otel_exporter: OtelExporterChoice | None = "console"
+    otel_console_exporter_dst: OTelConsoleExporterDst = "file"
+    def run_app(
+        self, app: Any, data_connections_env_vars: dict[str, Any] | None = None
+    ) -> dict[str, Any]:
+        """Run a single test case for a workflow via the FastAPI application.
+        Args:
+            app: The fastapi.App instance.
+            data_connections_env_vars: Optional environment variables for data connections.
+        Returns:
+            Response JSON as a dictionary.
+        """
+        json_ = {
+            "params": self.case.params,
+            "data_connections_env_vars": data_connections_env_vars or {},
+        }
+        query_params = {
+            "execution_mode": self.execution_mode,
+            "mock_io": self.mock_io,
+            "results_url": self.results_subdir.absolute().as_posix(),
+        }
+        headers = {"Content-Type": "application/json"}
+        if self.traceparent:
+            headers["traceparent"] = self.traceparent
+        with TestClient(app) as client:
+            response = client.post("/", json=json_, params=query_params, headers=headers)
+            assert response.status_code == self.case.expected_status_code, (
+                f"Test failed with {response.status_code = }, "
+                f"which differs from {self.case.expected_status_code = }; "
+                f"{response.text =}"
+            )
+        result: dict[str, Any] = response.json()
+        return result
+    def run_cli(self, matchspec: MatchSpec) -> dict[str, Any]:
+        """Run a single test case for a workflow via the CLI.
+        Args:
+            matchspec: The matchspec of the workflow to run.
+        Returns:
+            Results dictionary.
+        """
+        invoker = LocalSubprocessInvoker(matchspec=matchspec, cwd=os.getcwd())
+        yaml = ruamel.yaml.YAML(typ="safe")
+        config_text_stream = StringIO()
+        yaml.dump(self.case.params, config_text_stream)
+        async def _run() -> dict[str, Any]:
+            try:
+                await invoker.run(
+                    workflow_run_id=uuid.uuid4().hex,
+                    config_text=config_text_stream.getvalue(),
+                    results_url=self.results_subdir.as_uri(),
+                    execution_mode=self.execution_mode,
+                    mock_io=self.mock_io,
+                    extra_env=({"TRACEPARENT": self.traceparent} if self.traceparent else None),
+                    otel_exporter=self.otel_exporter,
+                    otel_console_exporter_dst=self.otel_console_exporter_dst,
+                )
+                await invoker.wait(timeout=300)
+                result = await get_results_json(self.results_subdir.as_uri())
+            except Exception as e:
+                trace = traceback.format_exc()
+                result = {"error": str(e), "trace": trace}
+            if not isinstance(result, dict):
+                raise RuntimeError(f"Unexpected {result = }. Expected dict.")
+            return result
+        return asyncio.run(_run())

wt_runner/tracing.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""Basic OpenTelemetry tracing setup for Google Cloud Trace.
+Note this is adapted from https://github.com/PADAS/cdip-routing.
+"""
+import os
+from pathlib import Path
+from typing import Any, Literal, TypedDict
+from opentelemetry import context, propagate, trace
+from opentelemetry.propagate import set_global_textmap
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
+from opentelemetry.sdk.trace.export import (
+    BatchSpanProcessor,
+    ConsoleSpanExporter,
+    SpanExporter,
+)
+from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
+# Optional GCP exporter
+try:
+    from opentelemetry.exporter.cloud_trace import (
+        CloudTraceSpanExporter,  # type: ignore[import-not-found,unused-ignore]
+    )
+    HAS_GCP_EXPORTER = True
+except ImportError:
+    CloudTraceSpanExporter = None  # type: ignore[misc,assignment,unused-ignore]
+    HAS_GCP_EXPORTER = False
+OtelExporterChoice = Literal["console", "gcp"]
+OTelConsoleExporterDst = Literal["stdout", "file"]
+def otel_span_formatter(span: ReadableSpan) -> str:
+    """Format an OTEL span as an unindented JSON line.
+    Args:
+        span: The span to format
+    Returns:
+        Formatted span as JSON line with newline
+    """
+    result: str = span.to_json(indent=None) + os.linesep
+    return result
+def make_otel_console_exporter_file_dst_kws(target_dir: Path) -> dict[str, Any]:
+    """Create kwargs for console exporter writing to a file.
+    This opinionated configuration:
+      1. Ensures the target directory exists (creating if necessary)
+      2. Opens a file `otel_traces.jsonl` in the target directory for appending
+      3. Uses line buffering for immediate writes
+      4. Uses unindented JSON formatter for easier parsing
+    Args:
+        target_dir: Directory to write traces to
+    Returns:
+        Dictionary of kwargs for ConsoleSpanExporter
+    Raises:
+        ValueError: If target_dir exists but is not a directory
+    """
+    if target_dir.exists() and not target_dir.is_dir():
+        raise ValueError(f"Target dir {target_dir} exists but is not a directory")
+    elif not target_dir.exists():
+        target_dir.mkdir(parents=True, exist_ok=True)
+    traces_outpath = target_dir / "otel_traces.jsonl"
+    return {
+        "out": traces_outpath.open("a", buffering=1),
+        "formatter": otel_span_formatter,
+    }
+def configure_tracer(
+    name: str,
+    version: str = "",
+    exporter: OtelExporterChoice | None = None,
+    exporter_kws: dict[str, Any] | None = None,
+) -> None:
+    """Configure OpenTelemetry tracer with specified exporter.
+    Args:
+        name: Service name for the tracer
+        version: Service version (optional)
+        exporter: Type of exporter to use (console or gcp), None for no exporter
+        exporter_kws: Additional kwargs for the exporter
+    Raises:
+        ValueError: If unknown exporter type specified
+        RuntimeError: If GCP exporter is requested but not available
+    """
+    resource = Resource.create(
+        {
+            "service.name": name,
+            "service.version": version,
+        }
+    )
+    tracer_provider = TracerProvider(resource=resource)
+    if exporter:
+        _exporter: SpanExporter
+        _exporter_kws = exporter_kws or {}
+        match exporter:
+            case "console":
+                _exporter = ConsoleSpanExporter(**_exporter_kws)
+            case "gcp":
+                if not HAS_GCP_EXPORTER:
+                    raise RuntimeError(
+                        "GCP exporter requested but opentelemetry-exporter-gcp-trace "
+                        "is not installed. Install with: pip install wt-runner[tracing]"
+                    )
+                _exporter = CloudTraceSpanExporter(**_exporter_kws)  # type: ignore[no-untyped-call,unused-ignore]
+            case _:
+                raise ValueError(f"Unknown exporter: {exporter}")
+        tracer_provider.add_span_processor(
+            # BatchSpanProcessor buffers spans and sends them in batches in a
+            # background thread. The default parameters are sensible, but can be
+            # tweaked to optimize your performance
+            BatchSpanProcessor(_exporter)
+        )
+    trace.set_tracer_provider(tracer_provider)
+class TraceContextHeaders(TypedDict, total=False):
+    """W3C Trace Context headers.
+    See: https://www.w3.org/TR/trace-context/
+    """
+    traceparent: str
+    tracestate: str
+def build_context_headers() -> TraceContextHeaders:
+    """Build trace context headers from current OpenTelemetry context.
+    Returns:
+        Dictionary containing traceparent and optionally tracestate headers
+    """
+    headers: TraceContextHeaders = {}
+    propagate.inject(headers)
+    return headers
+def attach_context(traceparent: str, tracestate: str | None = None) -> None:
+    """Attach tracing context from given traceparent and tracestate headers.
+    Args:
+        traceparent: W3C traceparent header value
+        tracestate: W3C tracestate header value (optional)
+    """
+    carrier = {"traceparent": traceparent}
+    if tracestate:
+        carrier["tracestate"] = tracestate
+    ctx = propagate.extract(carrier=carrier)
+    context.attach(ctx)
+# uses the default W3C Trace Context propagator, i.e. `traceparent` header
+set_global_textmap(TraceContextTextMapPropagator())

wt_runner-0.1.3.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,25 @@
+Metadata-Version: 2.4
+Name: wt-runner
+Version: 0.1.3
+Summary: FastAPI application for workflow execution using wt-invokers
+License: BSD-3-Clause
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: <3.16,>=3.13
+Requires-Dist: wt-contracts<1.0.0,>=0.1.0
+Requires-Dist: wt-invokers<1.0.0,>=0.1.0
+Requires-Dist: fastapi>=0.100.0
+Requires-Dist: uvicorn>=0.20.0
+Requires-Dist: pydantic<3.0.0,>=2.0.0
+Requires-Dist: py-rattler>=0.8.0
+Requires-Dist: ruamel.yaml>=0.18.0
+Requires-Dist: opentelemetry-api>=1.0.0
+Requires-Dist: opentelemetry-sdk>=1.0.0
+Requires-Dist: obstore>=0.6.0
+Provides-Extra: gcp
+Requires-Dist: opentelemetry-sdk<2,>=1.37.0; extra == "gcp"
+Requires-Dist: opentelemetry-exporter-gcp-trace<2,>=1.9.0; extra == "gcp"
+Requires-Dist: gcloud-aio-pubsub<7,>=6.1.0; extra == "gcp"

wt_runner-0.1.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+wt_runner/__init__.py,sha256=NKQBohYUXiv4kheM3Cyy0zn9exUJaLVRTgdEP-MZamY,899
+wt_runner/_version.py,sha256=q5nF98G8SoVeJqaknL0xdyxtv0egsqb0fK06_84Izu8,704
+wt_runner/app.py,sha256=d_8qZkbZoKE-5ye00LURGkwrY386Z3uTH9KaFt6tI0Y,24467
+wt_runner/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wt_runner/testing.py,sha256=ec5__H9QFJtzvtO4pkSvAV0giUur6_Xrl9UiMJ6MMSY,5131
+wt_runner/tracing.py,sha256=-mt8MMGo9S74O2tCTXhdCPCw8BcvWSmS4u2RwnzLnAs,5354
+wt_runner-0.1.3.dist-info/METADATA,sha256=EEfkH_8xhcaaGLH2vGmXwnsd_5JWO0F3IHr3MKzMFLE,988
+wt_runner-0.1.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+wt_runner-0.1.3.dist-info/top_level.txt,sha256=ujeMrgee-Be9X1QZegBuCKNWZ2NgYnsHI-VzrxXW70c,10
+wt_runner-0.1.3.dist-info/RECORD,,

wt_runner-0.1.3.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

wt_runner-0.1.3.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ wt_runner