spanforge 2.0.2__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {spanforge-2.0.2 → spanforge-2.0.3}/PKG-INFO +1 -1
- spanforge-2.0.3/docs/api/config.md +119 -0
- spanforge-2.0.3/docs/api/eval.md +211 -0
- spanforge-2.0.3/docs/api/http.md +108 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/index.md +17 -0
- spanforge-2.0.3/docs/api/io.md +128 -0
- spanforge-2.0.3/docs/api/plugins.md +70 -0
- spanforge-2.0.3/docs/api/regression.md +125 -0
- spanforge-2.0.3/docs/api/schema.md +110 -0
- spanforge-2.0.3/docs/api/stats.md +77 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/changelog.md +102 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/pyproject.toml +1 -1
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/__init__.py +39 -0
- spanforge-2.0.3/src/spanforge/_ansi.py +93 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/config.py +63 -1
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/eval.py +68 -0
- spanforge-2.0.3/src/spanforge/http.py +187 -0
- spanforge-2.0.3/src/spanforge/io.py +210 -0
- spanforge-2.0.3/src/spanforge/plugins.py +79 -0
- spanforge-2.0.3/src/spanforge/regression.py +192 -0
- spanforge-2.0.3/src/spanforge/schema.py +187 -0
- spanforge-2.0.3/src/spanforge/stats.py +100 -0
- spanforge-2.0.3/tests/test_ansi.py +100 -0
- spanforge-2.0.3/tests/test_config_interpolate.py +125 -0
- spanforge-2.0.3/tests/test_eval_behaviour_scorer.py +111 -0
- spanforge-2.0.3/tests/test_http.py +297 -0
- spanforge-2.0.3/tests/test_io.py +207 -0
- spanforge-2.0.3/tests/test_plugins.py +83 -0
- spanforge-2.0.3/tests/test_regression.py +181 -0
- spanforge-2.0.3/tests/test_schema.py +217 -0
- spanforge-2.0.3/tests/test_stats.py +126 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.gitattributes +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.github/CODEOWNERS +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.github/ISSUE_TEMPLATE/rfc.yml +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.github/pull_request_template.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.github/workflows/ci.yml +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.github/workflows/release.yml +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/.gitignore +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/CNAME +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/CODE_OF_CONDUCT.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/CONFORMANCE.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/LICENSE +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/MAINTAINERS.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/PRICING.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/README.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/README.md.bak +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/RELEASE.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/SECURITY.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/Makefile +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/_static/.gitkeep +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/auto.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/cache.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/compliance.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/consumer.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/debug.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/deprecations.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/event.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/exceptions.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/export.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/governance.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/hooks.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/integrations.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/lint.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/metrics.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/migrate.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/models.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/normalizer.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/redact.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/signing.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/store.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/stream.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/testing.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/trace.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/types.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/ulid.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/api/validate.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/cli.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/conf.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/configuration.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/contributing.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/deployment/air-gapped.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/deployment/kubernetes.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/index.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/installation.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/integrations/crewai.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/make.bat +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/migrations/from-langfuse.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/migrations/from-langsmith.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/migrations/from-openllmetry.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/audit.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/cache.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/consent.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/cost.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/diff.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/eval.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/explanation.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/fence.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/guard.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/hitl.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/index.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/model_registry.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/prompt.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/redact_ns.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/template.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/namespaces/trace.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/quickstart.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/rfc/rfc-0001.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/runbook.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/README.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/envelope.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/agent-run.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/agent-step.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/audit.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/cache.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/consent.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/cost.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/diff.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/eval.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/explanation.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/fence.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/guard.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/hitl.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/model-registry.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/prompt.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/redact.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/span.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/payloads/template.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema/types/common.schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/schema-versioning.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/cache.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/compliance.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/custom_exporters.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/debugging.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/events.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/export.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/governance.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/index.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/linting.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/metrics.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/migration.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/redaction.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/signing.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/docs/user_guide/tracing.md +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/agent_workflow.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/budget_alert.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/docker/Dockerfile +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/docker/docker-compose.yml +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/docker/otel-config.yaml +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/langchain_chain.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/multi_agent_rag.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/multi_tenant.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/openai_chat.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/otlp_grafana.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/production_multi_agent.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/secure_pipeline.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/examples/streaming_response.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/sonar-project.properties +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/_batch_exporter.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/_cli.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/_hooks.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/_server.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/_span.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/_store.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/_stream.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/_trace.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/_tracer.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/actor.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/alerts.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/auto.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/baseline.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/consent.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/consumer.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/core/__init__.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/core/compliance_mapping.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/cost.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/debug.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/drift.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/egress.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/event.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/exceptions.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/explain.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/__init__.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/append_only.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/cloud.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/datadog.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/grafana.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/jsonl.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/otel_bridge.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/otlp.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/otlp_bridge.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/redis_backend.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/export/webhook.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/exporters/__init__.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/exporters/console.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/exporters/jsonl.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/hitl.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/inspect.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/__init__.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/_pricing.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/anthropic.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/bedrock.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/crewai.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/gemini.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/groq.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/langchain.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/llamaindex.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/ollama.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/openai.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/integrations/together.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/metrics.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/metrics_export.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/migrate.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/model_registry.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/models.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/__init__.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/audit.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/cache.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/chain.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/confidence.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/consent.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/cost.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/decision.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/diff.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/drift.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/eval_.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/fence.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/guard.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/hitl.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/latency.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/prompt.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/redact.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/template.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/tool_call.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/namespaces/trace.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/normalizer.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/presidio_backend.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/processor.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/prompt_registry.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/py.typed +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/redact.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/sampling.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/schemas/v1.0/schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/schemas/v2.0/schema.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/signing.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/stream.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/testing.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/trace.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/types.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/ulid.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/src/spanforge/validate.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/test_agent.jsonl +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/test_events.jsonl +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/__init__.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/__init__.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/fixtures/chain.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/fixtures/compliance.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/fixtures/key_security.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/fixtures/migration.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/fixtures/pii.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/fixtures/signing.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/fixtures.json +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/run_conformance.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conformance/test_conformance.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/conftest.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_actor.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_alerts.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_auto.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_baseline.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_benchmarks.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_budget_alert.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_cli.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_compliance_mapping.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_consent.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_consumer.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_cost_event_emission.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_cost_tracker.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_costguard_gaps.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_drift.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_event.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_exceptions.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_explain.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_export_cloud.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_export_datadog.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_export_grafana.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_export_jsonl.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_export_otel_bridge.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_export_otlp.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_export_redis_backend.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_export_webhook.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_hitl.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_inspect.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_integration.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_integrations.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_migrate.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_model_registry.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_models.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_namespaces.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_otlp_bridge.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase10_features.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase11_security.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase1_context_trace.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase2_observability.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase3_debug_sampling.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase4_agent_instrumentation.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase4_metrics_store.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase5_console_exporter.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase5_coverage.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase5_hooks_crewai.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_phase6_openai_integration.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_processor_coverage.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_properties.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_redact.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_rfc_namespaces.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sampling_coverage.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_config.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_coverage_boost.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_exporters.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_final_coverage.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_gap_filler.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_openai_integration.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_phase7_integrations.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_precision_coverage.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_span.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_stream.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_tracer.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sdk_validation_coverage.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_server.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sf11.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sf12.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sf13.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sf14.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sf15.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_sf16.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_signing.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_stream.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_trace_decorator.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_trace_pytest_fixtures.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_types.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_ulid.py +0 -0
- {spanforge-2.0.2 → spanforge-2.0.3}/tests/test_validate.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spanforge
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: SpanForge — AI lifecycle and governance platform (RFC-0001 SPANFORGE)
|
|
5
5
|
Project-URL: Homepage, https://github.com/veerarag1973/spanforge
|
|
6
6
|
Project-URL: Documentation, https://github.com/veerarag1973/spanforge/blob/main/docs/index.md
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# spanforge.config — Global configuration
|
|
2
|
+
|
|
3
|
+
> **Module:** `spanforge.config`
|
|
4
|
+
|
|
5
|
+
`spanforge.config` provides the global configuration singleton, the
|
|
6
|
+
`configure()` entry point, and environment-variable interpolation utilities.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Quick example
|
|
11
|
+
|
|
12
|
+
```python
|
|
13
|
+
from spanforge import configure
|
|
14
|
+
from spanforge.config import get_config
|
|
15
|
+
|
|
16
|
+
configure(
|
|
17
|
+
preset="production",
|
|
18
|
+
exporter="otlp",
|
|
19
|
+
endpoint="http://collector:4318",
|
|
20
|
+
)
|
|
21
|
+
cfg = get_config()
|
|
22
|
+
print(cfg.exporter) # "otlp"
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## API
|
|
28
|
+
|
|
29
|
+
### `SpanForgeConfig`
|
|
30
|
+
|
|
31
|
+
Mutable dataclass holding all SDK configuration fields. Key fields:
|
|
32
|
+
|
|
33
|
+
| Field | Default | Env var | Description |
|
|
34
|
+
|-------|---------|---------|-------------|
|
|
35
|
+
| `exporter` | `"console"` | `SPANFORGE_EXPORTER` | Backend: `console`, `jsonl`, `otlp`, `webhook`, `datadog`, `grafana_loki` |
|
|
36
|
+
| `endpoint` | `None` | `SPANFORGE_ENDPOINT` | Exporter destination (file path or URL) |
|
|
37
|
+
| `org_id` | `None` | `SPANFORGE_ORG_ID` | Organisation identifier |
|
|
38
|
+
| `service_name` | `"unknown-service"` | `SPANFORGE_SERVICE_NAME` | Human-readable service name |
|
|
39
|
+
| `env` | `"production"` | `SPANFORGE_ENV` | Deployment environment tag |
|
|
40
|
+
| `service_version` | `"0.0.0"` | `SPANFORGE_SERVICE_VERSION` | SemVer string |
|
|
41
|
+
| `signing_key` | `None` | `SPANFORGE_SIGNING_KEY` | Base64-encoded HMAC key |
|
|
42
|
+
| `sample_rate` | `1.0` | `SPANFORGE_SAMPLE_RATE` | Fraction of traces to emit (0.0–1.0) |
|
|
43
|
+
| `on_export_error` | `"warn"` | `SPANFORGE_ON_EXPORT_ERROR` | `"warn"` / `"raise"` / `"drop"` |
|
|
44
|
+
| `enable_trace_store` | `False` | `SPANFORGE_ENABLE_TRACE_STORE` | In-process ring buffer |
|
|
45
|
+
| `no_egress` | `False` | `SPANFORGE_NO_EGRESS` | Block all network exporters |
|
|
46
|
+
| `compliance_sampling` | `True` | `SPANFORGE_COMPLIANCE_SAMPLING` | Always-record compliance events |
|
|
47
|
+
| `consent_enforcement` | `False` | `SPANFORGE_CONSENT_ENFORCEMENT` | T.R.U.S.T. consent checks |
|
|
48
|
+
| `hitl_enabled` | `False` | `SPANFORGE_HITL_ENABLED` | Human-in-the-loop review queue |
|
|
49
|
+
|
|
50
|
+
See the source docstring for the full list of fields.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
### `configure(**kwargs)`
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
def configure(**kwargs: Any) -> None: ...
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Mutate the global `SpanForgeConfig` singleton. Accepts any field name as a
|
|
61
|
+
keyword argument. Unknown keys raise `ValueError`.
|
|
62
|
+
|
|
63
|
+
**Presets:** Pass `preset="<name>"` to apply a sensible defaults bundle
|
|
64
|
+
before other kwargs:
|
|
65
|
+
|
|
66
|
+
| Preset | Exporter | Sample rate | Notes |
|
|
67
|
+
|--------|----------|-------------|-------|
|
|
68
|
+
| `development` | `console` | 1.0 | Trace store on, private endpoints allowed |
|
|
69
|
+
| `testing` | `console` | 1.0 | `on_export_error="raise"` |
|
|
70
|
+
| `staging` | `console` | 0.5 | Always-sample errors |
|
|
71
|
+
| `production` | `otlp` | 0.1 | Batch 512, flush 5 s |
|
|
72
|
+
| `otel_passthrough` | `otel_bridge` | 1.0 | Compliance sampling on |
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
### `get_config()`
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
def get_config() -> SpanForgeConfig: ...
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Return the live configuration singleton. Modifications to the returned
|
|
83
|
+
object affect all subsequent SDK operations.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
### `interpolate_env()`
|
|
88
|
+
|
|
89
|
+
> **Added in:** 2.0.3
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
def interpolate_env(data: Any) -> Any: ...
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Recursively replace `${VAR}` and `${VAR:default}` patterns in *data*.
|
|
96
|
+
|
|
97
|
+
Walks strings, dicts (values only), and lists depth-first. Non-string
|
|
98
|
+
leaves are returned unchanged.
|
|
99
|
+
|
|
100
|
+
| Pattern | Behaviour |
|
|
101
|
+
|---------|-----------|
|
|
102
|
+
| `${FOO}` | Replaced with `os.environ["FOO"]`; left as-is if unset. |
|
|
103
|
+
| `${FOO:bar}` | Replaced with `os.environ["FOO"]` when set, `"bar"` otherwise. |
|
|
104
|
+
|
|
105
|
+
**Example:**
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
import os
|
|
109
|
+
from spanforge.config import interpolate_env
|
|
110
|
+
|
|
111
|
+
os.environ["MODEL"] = "gpt-4o"
|
|
112
|
+
result = interpolate_env({
|
|
113
|
+
"model": "${MODEL}",
|
|
114
|
+
"endpoint": "${ENDPOINT:https://api.openai.com/v1}",
|
|
115
|
+
})
|
|
116
|
+
# {"model": "gpt-4o", "endpoint": "https://api.openai.com/v1"}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
> **Note:** Dict keys are **not** interpolated — only values.
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# spanforge.eval — Evaluation framework
|
|
2
|
+
|
|
3
|
+
> **Module:** `spanforge.eval`
|
|
4
|
+
|
|
5
|
+
`spanforge.eval` provides lightweight instrumentation for attaching quality
|
|
6
|
+
scores to active spans and emitting them as RFC-0001 `llm.eval.*` events.
|
|
7
|
+
It ships built-in scorers, a batch runner, a mean-based regression detector,
|
|
8
|
+
and a plug-in scorer ABC for the entry-point ecosystem.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Quick example
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
from spanforge.eval import record_eval_score, EvalScore
|
|
16
|
+
|
|
17
|
+
score = record_eval_score(
|
|
18
|
+
metric="faithfulness",
|
|
19
|
+
value=0.87,
|
|
20
|
+
span_id="abcdef0123456789",
|
|
21
|
+
trace_id="abcdef0123456789abcdef0123456789",
|
|
22
|
+
label="pass",
|
|
23
|
+
metadata={"evaluator": "ragas"},
|
|
24
|
+
)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## API
|
|
30
|
+
|
|
31
|
+
### `record_eval_score()`
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
def record_eval_score(
|
|
35
|
+
metric: str,
|
|
36
|
+
value: float,
|
|
37
|
+
*,
|
|
38
|
+
span_id: str | None = None,
|
|
39
|
+
trace_id: str | None = None,
|
|
40
|
+
label: str | None = None,
|
|
41
|
+
metadata: dict[str, Any] | None = None,
|
|
42
|
+
) -> EvalScore: ...
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Record a score and emit it as an `llm.eval.score.recorded` event via the
|
|
46
|
+
configured SpanForge exporter.
|
|
47
|
+
|
|
48
|
+
| Parameter | Description |
|
|
49
|
+
|-----------|-------------|
|
|
50
|
+
| `metric` | Name of the quality metric (e.g. `"faithfulness"`). |
|
|
51
|
+
| `value` | Numeric score value (any float). |
|
|
52
|
+
| `span_id` | Optional parent span ID (16 lowercase hex chars). |
|
|
53
|
+
| `trace_id` | Optional trace ID (32 lowercase hex chars). |
|
|
54
|
+
| `label` | Optional human-readable label (`"pass"` / `"fail"` / etc.). |
|
|
55
|
+
| `metadata` | Optional free-form dict with evaluator details. |
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
### `EvalScore`
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
@dataclass
|
|
63
|
+
class EvalScore:
|
|
64
|
+
metric: str
|
|
65
|
+
value: float
|
|
66
|
+
span_id: str | None = None
|
|
67
|
+
trace_id: str | None = None
|
|
68
|
+
label: str | None = None
|
|
69
|
+
metadata: dict[str, Any] | None = None
|
|
70
|
+
timestamp: float = ... # auto-set
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
A single quality measurement attached to a span or agent run.
|
|
74
|
+
|
|
75
|
+
| Method | Description |
|
|
76
|
+
|--------|-------------|
|
|
77
|
+
| `to_dict()` | Serialise to a plain dict. |
|
|
78
|
+
| `from_dict(data)` | Class method — deserialise from a dict. |
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
### `EvalScorer` (Protocol)
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
@runtime_checkable
|
|
86
|
+
class EvalScorer(Protocol):
|
|
87
|
+
@property
|
|
88
|
+
def metric_name(self) -> str: ...
|
|
89
|
+
def score(self, example: dict[str, Any]) -> EvalScore: ...
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Protocol for scorers compatible with `EvalRunner`. Each scorer receives a
|
|
93
|
+
single example dict (with at least an `"output"` key) and returns an
|
|
94
|
+
`EvalScore`.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
### `EvalRunner`
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
class EvalRunner:
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
scorers: list[EvalScorer] | None = None,
|
|
105
|
+
*,
|
|
106
|
+
emit: bool = True,
|
|
107
|
+
) -> None: ...
|
|
108
|
+
|
|
109
|
+
def add_scorer(self, scorer: EvalScorer) -> None: ...
|
|
110
|
+
def run(self, dataset: list[dict[str, Any]]) -> EvalReport: ...
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Run one or more scorers over a dataset. When `emit=True` (default) each
|
|
114
|
+
score is also emitted via `record_eval_score()`.
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
### `EvalReport`
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
@dataclass
|
|
122
|
+
class EvalReport:
|
|
123
|
+
scores: list[EvalScore]
|
|
124
|
+
dataset: list[dict[str, Any]]
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
| Method | Description |
|
|
128
|
+
|--------|-------------|
|
|
129
|
+
| `summary()` | Return `{metric: mean_value}` dict. |
|
|
130
|
+
| `print_summary()` | Print a human-readable table to stdout. |
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
### `RegressionDetector`
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
class RegressionDetector:
|
|
138
|
+
def __init__(
|
|
139
|
+
self,
|
|
140
|
+
baseline: dict[str, float] | None = None,
|
|
141
|
+
*,
|
|
142
|
+
threshold_pct: float = 5.0,
|
|
143
|
+
emit: bool = True,
|
|
144
|
+
) -> None: ...
|
|
145
|
+
|
|
146
|
+
def set_baseline(self, metric: str, value: float) -> None: ...
|
|
147
|
+
def check(self, report: EvalReport) -> list[dict[str, Any]]: ...
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Mean-based regression detection. When the mean score for a metric drops
|
|
151
|
+
below `baseline_mean * (1 - threshold_pct / 100)`, an
|
|
152
|
+
`llm.eval.regression.detected` event is emitted.
|
|
153
|
+
|
|
154
|
+
> **Note:** For per-case pass/fail regression detection, see
|
|
155
|
+
> [`spanforge.regression`](regression.md).
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Built-in scorers
|
|
160
|
+
|
|
161
|
+
| Scorer | `metric_name` | Description |
|
|
162
|
+
|--------|---------------|-------------|
|
|
163
|
+
| `FaithfulnessScorer` | `faithfulness` | Token-overlap proxy between `output` and `context`. |
|
|
164
|
+
| `RefusalDetectionScorer` | `refusal_detection` | Matches common refusal phrases (returns 1.0 on refusal). |
|
|
165
|
+
| `PIILeakageScorer` | `pii_leakage` | Delegates to `spanforge.redact.scan_payload()`; returns 1.0 on PII detection. |
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
### `BehaviourScorer` (ABC)
|
|
170
|
+
|
|
171
|
+
> **Added in:** 2.0.3
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
class BehaviourScorer(ABC):
|
|
175
|
+
name: str = "base"
|
|
176
|
+
|
|
177
|
+
@abstractmethod
|
|
178
|
+
def score(self, case: Any, response: str) -> tuple[float, str]: ...
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
Abstract base class for plug-in behaviour scorers registered via the
|
|
182
|
+
`spanforge.scorers` entry-point group. Unlike `EvalScorer` (which scores
|
|
183
|
+
full `dict` examples), `BehaviourScorer` targets named test-case workflows
|
|
184
|
+
where the scorer receives a structured test case object and the raw model
|
|
185
|
+
response.
|
|
186
|
+
|
|
187
|
+
| Attribute / Method | Description |
|
|
188
|
+
|--------------------|-------------|
|
|
189
|
+
| `name` | Unique identifier for the scorer (override in subclasses). |
|
|
190
|
+
| `score(case, response)` | Return `(score, reason)` where score ∈ [0.0, 1.0]. |
|
|
191
|
+
|
|
192
|
+
**Registration:**
|
|
193
|
+
|
|
194
|
+
```toml
|
|
195
|
+
[project.entry-points."spanforge.scorers"]
|
|
196
|
+
toxicity = "my_package.scorers:ToxicityScorer"
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
**Example:**
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
from spanforge.eval import BehaviourScorer
|
|
203
|
+
|
|
204
|
+
class ToxicityScorer(BehaviourScorer):
|
|
205
|
+
name = "toxicity"
|
|
206
|
+
|
|
207
|
+
def score(self, case, response: str) -> tuple[float, str]:
|
|
208
|
+
if any(w in response.lower() for w in ("hate", "kill")):
|
|
209
|
+
return 0.0, "toxic content detected"
|
|
210
|
+
return 1.0, "no toxicity detected"
|
|
211
|
+
```
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# spanforge.http — OpenAI-compatible HTTP client
|
|
2
|
+
|
|
3
|
+
> **Module:** `spanforge.http`
|
|
4
|
+
> **Added in:** 2.0.3
|
|
5
|
+
|
|
6
|
+
`spanforge.http` provides a zero-dependency, synchronous HTTP client for
|
|
7
|
+
OpenAI-compatible chat-completion APIs. It is intentionally minimal — no
|
|
8
|
+
`httpx`, no `requests`, no `aiohttp` — just the stdlib `urllib.request`.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Quick example
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
from spanforge.http import chat_completion
|
|
16
|
+
|
|
17
|
+
resp = chat_completion(
|
|
18
|
+
endpoint="https://api.openai.com/v1",
|
|
19
|
+
model="gpt-4o",
|
|
20
|
+
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
|
21
|
+
api_key="sk-...",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
if resp.ok:
|
|
25
|
+
print(resp.text) # "Paris"
|
|
26
|
+
print(resp.total_tokens) # 23
|
|
27
|
+
else:
|
|
28
|
+
print("Error:", resp.error)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## API
|
|
34
|
+
|
|
35
|
+
### `chat_completion()`
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
def chat_completion(
|
|
39
|
+
endpoint: str,
|
|
40
|
+
model: str,
|
|
41
|
+
messages: list[dict],
|
|
42
|
+
*,
|
|
43
|
+
api_key: str | None = None,
|
|
44
|
+
timeout: float = 30.0,
|
|
45
|
+
max_retries: int = 3,
|
|
46
|
+
extra_body: dict | None = None,
|
|
47
|
+
) -> ChatCompletionResponse: ...
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Send a single chat-completion request and return a `ChatCompletionResponse`.
|
|
51
|
+
|
|
52
|
+
| Parameter | Description |
|
|
53
|
+
|-----------|-------------|
|
|
54
|
+
| `endpoint` | Base URL, e.g. `"https://api.openai.com/v1"`. The path `/chat/completions` is appended automatically. |
|
|
55
|
+
| `model` | Model name, e.g. `"gpt-4o"`. |
|
|
56
|
+
| `messages` | List of chat message dicts (`role` + `content`). |
|
|
57
|
+
| `api_key` | Bearer token. Falls back to the `OPENAI_API_KEY` environment variable. |
|
|
58
|
+
| `timeout` | Socket timeout in seconds (default 30). |
|
|
59
|
+
| `max_retries` | Maximum number of retry attempts on `429 / 5xx` or network errors. Exponential back-off: `min(2**attempt, 8)` seconds. Default 3. |
|
|
60
|
+
| `extra_body` | Additional fields merged into the request JSON body (e.g. `temperature`, `stream`). |
|
|
61
|
+
|
|
62
|
+
**Returns:** `ChatCompletionResponse`
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
### `ChatCompletionResponse`
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
@dataclass(frozen=True)
|
|
70
|
+
class ChatCompletionResponse:
|
|
71
|
+
text: str
|
|
72
|
+
latency_ms: float
|
|
73
|
+
error: str | None = None
|
|
74
|
+
prompt_tokens: int = 0
|
|
75
|
+
completion_tokens: int = 0
|
|
76
|
+
total_tokens: int = 0
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
| Field | Description |
|
|
80
|
+
|-------|-------------|
|
|
81
|
+
| `text` | The generated text from the first choice, or `""` on error. |
|
|
82
|
+
| `latency_ms` | Wall-clock round-trip time in milliseconds. |
|
|
83
|
+
| `error` | Error description string, or `None` on success. |
|
|
84
|
+
| `prompt_tokens` | Tokens used in the prompt (from the `usage` field). |
|
|
85
|
+
| `completion_tokens` | Tokens generated in the completion. |
|
|
86
|
+
| `total_tokens` | `prompt_tokens + completion_tokens`. |
|
|
87
|
+
| `ok` *(property)* | `True` when `error is None`. |
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## Retry behaviour
|
|
92
|
+
|
|
93
|
+
Retries are performed on:
|
|
94
|
+
- HTTP status codes `429`, `500`, `502`, `503`, `504`
|
|
95
|
+
- `urllib.error.URLError` (network-level failures)
|
|
96
|
+
- `OSError` (connection refused, timeouts)
|
|
97
|
+
|
|
98
|
+
Back-off delay: `min(2**attempt, 8)` seconds (max 8 s). Non-retryable
|
|
99
|
+
status codes (e.g. `400`, `401`, `404`) are returned immediately as an
|
|
100
|
+
error response.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Environment variables
|
|
105
|
+
|
|
106
|
+
| Variable | Effect |
|
|
107
|
+
|----------|--------|
|
|
108
|
+
| `OPENAI_API_KEY` | Default API key when `api_key` is not supplied. |
|
|
@@ -31,6 +31,14 @@ exported at the top-level package under `spanforge`.
|
|
|
31
31
|
- [models](models.md)
|
|
32
32
|
- [cache](cache.md)
|
|
33
33
|
- [lint](lint.md)
|
|
34
|
+
- [eval](eval.md)
|
|
35
|
+
- [config](config.md)
|
|
36
|
+
- [http](http.md)
|
|
37
|
+
- [io](io.md)
|
|
38
|
+
- [plugins](plugins.md)
|
|
39
|
+
- [schema](schema.md)
|
|
40
|
+
- [regression](regression.md)
|
|
41
|
+
- [stats](stats.md)
|
|
34
42
|
|
|
35
43
|
## Module summary
|
|
36
44
|
|
|
@@ -66,3 +74,12 @@ exported at the top-level package under `spanforge`.
|
|
|
66
74
|
| `spanforge.model_registry` | `ModelRegistryEntry`, model governance lifecycle (`registered` / `deprecated` / `retired`), attestation integration |
|
|
67
75
|
| `spanforge.explain` | `ExplainabilityRecord`, decision explainability (`generated`), EU AI Act Art. 13 / NIST MAP 1.1 mapping |
|
|
68
76
|
| `spanforge.lint` | `run_checks()`, `LintError`, AO001–AO005 checks, `SpanForgeChecker` flake8 plugin, `python -m spanforge.lint` CLI |
|
|
77
|
+
| `spanforge.eval` | `record_eval_score()`, `EvalScore`, `EvalRunner`, `EvalReport`, `RegressionDetector` (mean-based), `BehaviourScorer` ABC, built-in scorers |
|
|
78
|
+
| `spanforge.config` | `SpanForgeConfig`, `configure()`, `get_config()`, `interpolate_env()` — global configuration and env-var interpolation |
|
|
79
|
+
| `spanforge.http` | `chat_completion()`, `ChatCompletionResponse` — zero-dependency OpenAI-compatible HTTP client with exponential-backoff retry |
|
|
80
|
+
| `spanforge.io` | `write_jsonl()`, `read_jsonl()`, `append_jsonl()`, `write_events()`, `read_events()` — synchronous JSONL read/write utilities |
|
|
81
|
+
| `spanforge.plugins` | `discover(group)` — Python-version-aware entry-point plugin discovery (3.9 / 3.10 / 3.12+) |
|
|
82
|
+
| `spanforge.schema` | `validate()`, `validate_strict()`, `SchemaValidationError` — lightweight zero-dependency JSON Schema validator |
|
|
83
|
+
| `spanforge.regression` | `RegressionDetector`, `RegressionReport`, `compare()` — pass/fail and score-drop regression detection |
|
|
84
|
+
| `spanforge.stats` | `percentile()`, `latency_summary()` — latency statistics with linear-interpolation percentiles |
|
|
85
|
+
| `spanforge._ansi` | `color()`, `strip_ansi()`, ANSI color constants — terminal colour helpers with `NO_COLOR` / non-TTY support |
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# spanforge.io — Synchronous JSONL utilities
|
|
2
|
+
|
|
3
|
+
> **Module:** `spanforge.io`
|
|
4
|
+
> **Added in:** 2.0.3
|
|
5
|
+
|
|
6
|
+
`spanforge.io` provides reliable, synchronous JSONL read/write helpers.
|
|
7
|
+
They are a simpler, dependency-free alternative to the async `JSONLExporter`
|
|
8
|
+
and `EventStream.from_file()` pattern — ideal for scripts, tests, and
|
|
9
|
+
offline pipelines.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Quick example
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from spanforge.io import write_events, read_events
|
|
17
|
+
|
|
18
|
+
# Write eval results as event envelopes
|
|
19
|
+
write_events(
|
|
20
|
+
[{"case_id": "tc-001", "score": 0.95}, {"case_id": "tc-002", "score": 0.72}],
|
|
21
|
+
"results.jsonl",
|
|
22
|
+
event_type="llm.eval.done",
|
|
23
|
+
source="my-eval-runner@1.0",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Read them back, filtered by event type
|
|
27
|
+
payloads = read_events("results.jsonl", event_type="llm.eval.done")
|
|
28
|
+
# → [{"case_id": "tc-001", "score": 0.95}, ...]
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## API
|
|
34
|
+
|
|
35
|
+
### `write_jsonl()`
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
def write_jsonl(
|
|
39
|
+
records: Iterable[dict],
|
|
40
|
+
path: str | Path,
|
|
41
|
+
*,
|
|
42
|
+
mode: str = "w",
|
|
43
|
+
) -> int: ...
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Serialise each record as a JSON line and write to *path*. Parent directories
|
|
47
|
+
are created automatically.
|
|
48
|
+
|
|
49
|
+
| Parameter | Description |
|
|
50
|
+
|-----------|-------------|
|
|
51
|
+
| `records` | Any iterable of `dict` objects (including generators). |
|
|
52
|
+
| `path` | Destination file path. |
|
|
53
|
+
| `mode` | `"w"` (overwrite, default) or `"a"` (append). |
|
|
54
|
+
|
|
55
|
+
**Returns:** Number of records written.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
### `read_jsonl()`
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
def read_jsonl(
|
|
63
|
+
path: str | Path,
|
|
64
|
+
*,
|
|
65
|
+
event_type: str | None = None,
|
|
66
|
+
skip_errors: bool = True,
|
|
67
|
+
) -> list[dict]: ...
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Read a JSONL file and return a list of `dict` records.
|
|
71
|
+
|
|
72
|
+
| Parameter | Description |
|
|
73
|
+
|-----------|-------------|
|
|
74
|
+
| `path` | Source file path. Raises `FileNotFoundError` if absent. |
|
|
75
|
+
| `event_type` | If set, only records with `"event_type" == event_type` are returned. |
|
|
76
|
+
| `skip_errors` | When `True` (default), malformed lines are silently skipped. Set to `False` to raise `json.JSONDecodeError` on the first bad line. |
|
|
77
|
+
|
|
78
|
+
Non-`dict` JSON values (arrays, scalars) are always skipped.
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
### `append_jsonl()`
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
def append_jsonl(record: dict, path: str | Path) -> None: ...
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Append a single record to *path*, creating the file if it does not exist.
|
|
89
|
+
Equivalent to `write_jsonl([record], path, mode="a")`.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
### `write_events()`
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
def write_events(
|
|
97
|
+
payloads: Iterable[dict],
|
|
98
|
+
path: str | Path,
|
|
99
|
+
*,
|
|
100
|
+
event_type: str,
|
|
101
|
+
source: str = "spanforge",
|
|
102
|
+
mode: str = "w",
|
|
103
|
+
) -> int: ...
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Wrap each payload in a spanforge event envelope and write to *path*.
|
|
107
|
+
|
|
108
|
+
Envelope format:
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{"event_type": "<event_type>", "source": "<source>", "payload": { ... }}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
### `read_events()`
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
def read_events(
|
|
120
|
+
path: str | Path,
|
|
121
|
+
*,
|
|
122
|
+
event_type: str,
|
|
123
|
+
) -> list[dict]: ...
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Read event envelopes from *path* and return the unwrapped `payload` objects
|
|
127
|
+
where `"event_type"` matches. Lines that do not carry a `"payload"` field
|
|
128
|
+
are silently skipped.
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# spanforge.plugins — Entry-point plugin discovery
|
|
2
|
+
|
|
3
|
+
> **Module:** `spanforge.plugins`
|
|
4
|
+
> **Added in:** 2.0.3
|
|
5
|
+
|
|
6
|
+
`spanforge.plugins` provides a single `discover()` helper that loads
|
|
7
|
+
third-party plugins registered via Python packaging entry-points. It
|
|
8
|
+
handles the API split across Python 3.9, 3.10, and 3.12+ transparently
|
|
9
|
+
and silently skips broken plugins.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Quick example
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from spanforge.plugins import discover
|
|
17
|
+
|
|
18
|
+
# Load all scorers registered under the "spanforge.scorers" group
|
|
19
|
+
scorers = discover("spanforge.scorers")
|
|
20
|
+
|
|
21
|
+
for scorer_cls in scorers:
|
|
22
|
+
scorer = scorer_cls()
|
|
23
|
+
print(scorer.name)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## API
|
|
29
|
+
|
|
30
|
+
### `discover()`
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
def discover(group: str) -> list[Any]: ...
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Return a list of objects (classes, functions, or instances) loaded from all
|
|
37
|
+
entry points registered under *group*.
|
|
38
|
+
|
|
39
|
+
| Parameter | Description |
|
|
40
|
+
|-----------|-------------|
|
|
41
|
+
| `group` | The entry-point group name, e.g. `"spanforge.scorers"`. |
|
|
42
|
+
|
|
43
|
+
**Returns:** A list of loaded objects. Broken entry points (those whose
|
|
44
|
+
`ep.load()` raises) are silently omitted. An empty list is returned if the
|
|
45
|
+
group has no registered entry points or if `importlib.metadata` is
|
|
46
|
+
unavailable.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Registering a plugin
|
|
51
|
+
|
|
52
|
+
Add an entry point to your package's `pyproject.toml`:
|
|
53
|
+
|
|
54
|
+
```toml
|
|
55
|
+
[project.entry-points."spanforge.scorers"]
|
|
56
|
+
my_scorer = "my_package.scorers:MyScorer"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
After `pip install`-ing your package, `discover("spanforge.scorers")` will
|
|
60
|
+
include `MyScorer` in the returned list.
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Python version compatibility
|
|
65
|
+
|
|
66
|
+
| Python | `entry_points()` API used |
|
|
67
|
+
|--------|--------------------------|
|
|
68
|
+
| 3.12+ | `entry_points(group=group)` |
|
|
69
|
+
| 3.10–3.11 | `entry_points().select(group=group)` |
|
|
70
|
+
| 3.9 | `entry_points().get(group, [])` (dict style) |
|