PyPI - swecc-mesocosm - Versions diffs - 0.1.0__tar.gz - Mend

swecc-mesocosm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

swecc_mesocosm-0.1.0/.gitignore +10 -0
swecc_mesocosm-0.1.0/LICENSE +21 -0
swecc_mesocosm-0.1.0/PKG-INFO +204 -0
swecc_mesocosm-0.1.0/README.md +141 -0
swecc_mesocosm-0.1.0/pyproject.toml +108 -0
swecc_mesocosm-0.1.0/swecc_mesocosm/__init__.py +12 -0
swecc_mesocosm-0.1.0/swecc_mesocosm/artifacts.py +37 -0
swecc_mesocosm-0.1.0/swecc_mesocosm/cli.py +691 -0
swecc_mesocosm-0.1.0/swecc_mesocosm/client.py +107 -0
swecc_mesocosm-0.1.0/swecc_mesocosm/infer.py +191 -0
swecc_mesocosm-0.1.0/swecc_mesocosm/policy/constraints.json +22 -0
swecc_mesocosm-0.1.0/swecc_mesocosm/settings.py +28 -0
swecc_mesocosm-0.1.0/swecc_mesocosm/validation.py +89 -0

swecc_mesocosm-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,10 @@
+dist/
+build/
+*.egg-info/
+__pycache__/
+*.pyc
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.coverage
+htmlcov/

swecc_mesocosm-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 SWECC Labs
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

swecc_mesocosm-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,204 @@
+Metadata-Version: 2.4
+Name: swecc-mesocosm
+Version: 0.1.0
+Summary: CLI and Python client for SWECC's benchmark and eval platform.
+Project-URL: Homepage, https://github.com/swecc-uw/swecc-core
+Project-URL: Repository, https://github.com/swecc-uw/swecc-core
+Project-URL: Issues, https://github.com/swecc-uw/swecc-core/issues
+Author-email: SWECC Labs <swecc@uw.edu>
+License: MIT License
+        Copyright (c) 2026 SWECC Labs
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Keywords: benchmark,evaluation,llm,swecc
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development :: Testing
+Requires-Python: >=3.11
+Requires-Dist: httpx>=0.27
+Requires-Dist: pydantic-settings>=2.5
+Requires-Dist: pydantic>=2.9
+Requires-Dist: rich>=13.7
+Requires-Dist: typer>=0.12
+Provides-Extra: dev
+Requires-Dist: black; extra == 'dev'
+Requires-Dist: build; extra == 'dev'
+Requires-Dist: mypy; extra == 'dev'
+Requires-Dist: pre-commit; extra == 'dev'
+Requires-Dist: pytest; extra == 'dev'
+Requires-Dist: pytest-asyncio; extra == 'dev'
+Requires-Dist: ruff; extra == 'dev'
+Requires-Dist: twine; extra == 'dev'
+Provides-Extra: lint
+Requires-Dist: black; extra == 'lint'
+Requires-Dist: mypy; extra == 'lint'
+Requires-Dist: ruff; extra == 'lint'
+Provides-Extra: test
+Requires-Dist: pytest; extra == 'test'
+Requires-Dist: pytest-asyncio; extra == 'test'
+Description-Content-Type: text/markdown
+# swecc-mesocosm
+CLI and Python client for SWECC's benchmark and eval platform.
+A *mesocosm* is a small, enclosed environment used for controlled experiments — which is exactly what this tool helps you build, register, and run evals against.
+## Install
+```bash
+pip install swecc-mesocosm
+# or, with uv:
+uv tool install swecc-mesocosm
+# or, with pipx:
+pipx install swecc-mesocosm
+```
+For local development against this monorepo:
+```bash
+pip install -e ./packages/swecc-mesocosm
+```
+## Configure
+The CLI reads `MESOCOSM_BASE_URL` from the environment (default: `http://127.0.0.1:8010`, matching `BENCH_API_PORT` in docker compose). You can also pass `--base-url` to any command.
+Production:
+```bash
+export MESOCOSM_BASE_URL=https://api.swecc.org/bench
+mesocosm doctor   # verify health + openapi
+```
+See `infra/mesocosm.env.example` in the monorepo root.
+```bash
+export MESOCOSM_BASE_URL=http://127.0.0.1:8010   # docker compose
+# or
+export MESOCOSM_BASE_URL=https://api.swecc.org/bench
+```
+## Commands
+```bash
+mesocosm --help
+# connectivity check (bench-api health + openapi)
+mesocosm doctor
+mesocosm doctor --base-url https://api.swecc.org/bench
+# inference + validation (no network)
+mesocosm suggest "Wordle clone where the agent gets 6 guesses."
+mesocosm validate ./my-domain.json
+# domain CRUD
+mesocosm register --id my-bench --name "My Bench" --owner-id me \
+  --description "Trivia about Python." --env-url https://envs.example.com/mybench
+mesocosm publish my-bench
+mesocosm get my-bench --artifacts
+mesocosm list --status published
+# evals
+mesocosm eval test --domain-id my-bench --vow-version 1.0.0 --model openai/gpt-4o-mini
+mesocosm eval run  --domain-id my-bench --vow-version 1.0.0 --model openai/gpt-4o-mini \
+  --num-episodes 20 --seed-set '[1,2,3]'
+# results
+mesocosm run get <run-id>
+mesocosm run episodes <run-id> --traces
+```
+All commands print JSON to stdout (pretty when stdout is a TTY, compact otherwise), so they pipe cleanly into `jq`:
+```bash
+mesocosm list --status published | jq '.[].id'
+```
+## Local vs bench-api commands
+**Local** means the CLI does not call bench-api at `MESOCOSM_BASE_URL` (no HTTP to `/v1/...`). That is not the same as “no LLM”: model calls happen on the **server** when you use `eval` commands.
+**Bench-api** means the command needs a reachable bench-api (`MESOCOSM_BASE_URL` or `--base-url` on the command).
+### Local (no bench-api)
+| Command | What it does |
+| --------| -------------|
+| `mesocosm --version` / `-V` | Print the installed package version. |
+| `mesocosm suggest <description>` | Regex heuristics on your text → JSON defaults (`benchmark_kind`, `scoring_source`, `max_steps`, `primary_metric`, `reasoning`, `tags`). Preview only; does not register. |
+| `mesocosm validate <path>` | Check a domain JSON payload against shipped `policy/constraints.json` (`-` = stdin). Exit 0 if `ok`, else 1. |
+These work without bench-api running.
+### Bench-api (HTTP)
+| Command | API | What it does |
+| --------|-----| -------------|
+| `mesocosm register` | `POST /v1/domains` (409 → `PATCH`) | Build or load a payload, optionally run local `validate`, then upsert a draft domain. |
+| `mesocosm publish <id>` | `POST /v1/domains/{id}/publish` | Publish a domain; print artifact SHA-256 digests. |
+| `mesocosm get <id>` | `GET /v1/domains/{id}` | Fetch a domain; `--artifacts` adds synthesized contract files locally. |
+| `mesocosm list` | `GET /v1/domains` | List domains (`--status`, `--json` for raw output). |
+| `mesocosm eval test` | `POST /v1/test/episode` | One test episode (model + env on the server). |
+| `mesocosm eval run` | `GET` domain + `POST /v1/runs` | Full eval run with aggregated scores. |
+| `mesocosm run get <run-id>` | `GET /v1/runs/{id}` (+ episodes) | Run status and aggregate scores. |
+| `mesocosm run episodes <run-id>` | `GET /v1/runs/{id}/episodes` | Episode list; `--traces` fetches traces too. |
+`register` is hybrid: inference and `validate` run locally; the upsert step needs bench-api.
+```text
+LOCAL                          BENCH-API
+────────────────────────────   ─────────────────────────────────────
+mesocosm --version             mesocosm register
+mesocosm suggest "<desc>"      mesocosm publish <id>
+mesocosm validate <file>       mesocosm get <id> [--artifacts]
+                               mesocosm list [--status ...] [--json]
+                               mesocosm eval test ...
+                               mesocosm eval run ...
+                               mesocosm run get <run-id>
+                               mesocosm run episodes <run-id> [--traces]
+```
+## Python client
+```python
+import asyncio
+from swecc_mesocosm import BenchClient
+async def main():
+    c = BenchClient(base_url="http://127.0.0.1:8000")
+    try:
+        domains = await c.list_domains(published_only=True)
+        print(len(domains), "published")
+    finally:
+        await c.aclose()
+asyncio.run(main())
+```
+## Policy / constraints
+`mesocosm validate` reads `swecc_mesocosm/policy/constraints.json` shipped with the package — required register fields, allowed model prefixes, etc. Edit that file (or fork the package) to tune for your event.

swecc_mesocosm-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,141 @@
+# swecc-mesocosm
+CLI and Python client for SWECC's benchmark and eval platform.
+A *mesocosm* is a small, enclosed environment used for controlled experiments — which is exactly what this tool helps you build, register, and run evals against.
+## Install
+```bash
+pip install swecc-mesocosm
+# or, with uv:
+uv tool install swecc-mesocosm
+# or, with pipx:
+pipx install swecc-mesocosm
+```
+For local development against this monorepo:
+```bash
+pip install -e ./packages/swecc-mesocosm
+```
+## Configure
+The CLI reads `MESOCOSM_BASE_URL` from the environment (default: `http://127.0.0.1:8010`, matching `BENCH_API_PORT` in docker compose). You can also pass `--base-url` to any command.
+Production:
+```bash
+export MESOCOSM_BASE_URL=https://api.swecc.org/bench
+mesocosm doctor   # verify health + openapi
+```
+See `infra/mesocosm.env.example` in the monorepo root.
+```bash
+export MESOCOSM_BASE_URL=http://127.0.0.1:8010   # docker compose
+# or
+export MESOCOSM_BASE_URL=https://api.swecc.org/bench
+```
+## Commands
+```bash
+mesocosm --help
+# connectivity check (bench-api health + openapi)
+mesocosm doctor
+mesocosm doctor --base-url https://api.swecc.org/bench
+# inference + validation (no network)
+mesocosm suggest "Wordle clone where the agent gets 6 guesses."
+mesocosm validate ./my-domain.json
+# domain CRUD
+mesocosm register --id my-bench --name "My Bench" --owner-id me \
+  --description "Trivia about Python." --env-url https://envs.example.com/mybench
+mesocosm publish my-bench
+mesocosm get my-bench --artifacts
+mesocosm list --status published
+# evals
+mesocosm eval test --domain-id my-bench --vow-version 1.0.0 --model openai/gpt-4o-mini
+mesocosm eval run  --domain-id my-bench --vow-version 1.0.0 --model openai/gpt-4o-mini \
+  --num-episodes 20 --seed-set '[1,2,3]'
+# results
+mesocosm run get <run-id>
+mesocosm run episodes <run-id> --traces
+```
+All commands print JSON to stdout (pretty when stdout is a TTY, compact otherwise), so they pipe cleanly into `jq`:
+```bash
+mesocosm list --status published | jq '.[].id'
+```
+## Local vs bench-api commands
+**Local** means the CLI does not call bench-api at `MESOCOSM_BASE_URL` (no HTTP to `/v1/...`). That is not the same as “no LLM”: model calls happen on the **server** when you use `eval` commands.
+**Bench-api** means the command needs a reachable bench-api (`MESOCOSM_BASE_URL` or `--base-url` on the command).
+### Local (no bench-api)
+| Command | What it does |
+| --------| -------------|
+| `mesocosm --version` / `-V` | Print the installed package version. |
+| `mesocosm suggest <description>` | Regex heuristics on your text → JSON defaults (`benchmark_kind`, `scoring_source`, `max_steps`, `primary_metric`, `reasoning`, `tags`). Preview only; does not register. |
+| `mesocosm validate <path>` | Check a domain JSON payload against shipped `policy/constraints.json` (`-` = stdin). Exit 0 if `ok`, else 1. |
+These work without bench-api running.
+### Bench-api (HTTP)
+| Command | API | What it does |
+| --------|-----| -------------|
+| `mesocosm register` | `POST /v1/domains` (409 → `PATCH`) | Build or load a payload, optionally run local `validate`, then upsert a draft domain. |
+| `mesocosm publish <id>` | `POST /v1/domains/{id}/publish` | Publish a domain; print artifact SHA-256 digests. |
+| `mesocosm get <id>` | `GET /v1/domains/{id}` | Fetch a domain; `--artifacts` adds synthesized contract files locally. |
+| `mesocosm list` | `GET /v1/domains` | List domains (`--status`, `--json` for raw output). |
+| `mesocosm eval test` | `POST /v1/test/episode` | One test episode (model + env on the server). |
+| `mesocosm eval run` | `GET` domain + `POST /v1/runs` | Full eval run with aggregated scores. |
+| `mesocosm run get <run-id>` | `GET /v1/runs/{id}` (+ episodes) | Run status and aggregate scores. |
+| `mesocosm run episodes <run-id>` | `GET /v1/runs/{id}/episodes` | Episode list; `--traces` fetches traces too. |
+`register` is hybrid: inference and `validate` run locally; the upsert step needs bench-api.
+```text
+LOCAL                          BENCH-API
+────────────────────────────   ─────────────────────────────────────
+mesocosm --version             mesocosm register
+mesocosm suggest "<desc>"      mesocosm publish <id>
+mesocosm validate <file>       mesocosm get <id> [--artifacts]
+                               mesocosm list [--status ...] [--json]
+                               mesocosm eval test ...
+                               mesocosm eval run ...
+                               mesocosm run get <run-id>
+                               mesocosm run episodes <run-id> [--traces]
+```
+## Python client
+```python
+import asyncio
+from swecc_mesocosm import BenchClient
+async def main():
+    c = BenchClient(base_url="http://127.0.0.1:8000")
+    try:
+        domains = await c.list_domains(published_only=True)
+        print(len(domains), "published")
+    finally:
+        await c.aclose()
+asyncio.run(main())
+```
+## Policy / constraints
+`mesocosm validate` reads `swecc_mesocosm/policy/constraints.json` shipped with the package — required register fields, allowed model prefixes, etc. Edit that file (or fork the package) to tune for your event.

swecc_mesocosm-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,108 @@
+[build-system]
+requires = ["hatchling>=1.21.0"]
+build-backend = "hatchling.build"
+[project]
+name = "swecc-mesocosm"
+version = "0.1.0"
+description = "CLI and Python client for SWECC's benchmark and eval platform."
+readme = "README.md"
+requires-python = ">=3.11"
+license = { file = "LICENSE" }
+keywords = ["benchmark", "evaluation", "llm", "swecc"]
+authors = [
+    { name = "SWECC Labs", email = "swecc@uw.edu" },
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development :: Testing",
+]
+dependencies = [
+    "httpx>=0.27",
+    "pydantic>=2.9",
+    "pydantic-settings>=2.5",
+    "typer>=0.12",
+    "rich>=13.7",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "mypy",
+    "ruff",
+    "black",
+    "pre-commit",
+    "build",
+    "twine",
+]
+lint = [
+    "ruff",
+    "black",
+    "mypy",
+]
+test = [
+    "pytest",
+    "pytest-asyncio",
+]
+[project.urls]
+Homepage = "https://github.com/swecc-uw/swecc-core"
+Repository = "https://github.com/swecc-uw/swecc-core"
+Issues = "https://github.com/swecc-uw/swecc-core/issues"
+[project.scripts]
+mesocosm = "swecc_mesocosm.cli:main"
+[tool.hatch.build]
+exclude = [
+    "tests/*",
+    "venv/*",
+    ".git/*",
+    "*.pyc",
+    "__pycache__/*",
+    ".pytest_cache/*",
+    ".mypy_cache/*",
+    ".ruff_cache/*",
+    "dist/*",
+    "build/*",
+]
+[tool.hatch.build.targets.wheel]
+packages = ["swecc_mesocosm"]
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+fix = true
+unsafe-fixes = false
+lint.select = ["E", "F", "I", "N", "W", "B", "UP", "RUF"]
+lint.ignore = []
+[tool.ruff.lint.isort]
+known-first-party = ["swecc_mesocosm"]
+[tool.mypy]
+python_version = "3.11"
+strict = true
+ignore_missing_imports = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+no_implicit_optional = true
+show_error_codes = true
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+filterwarnings = [
+    "ignore::DeprecationWarning",
+    "ignore::UserWarning",
+]

swecc_mesocosm-0.1.0/swecc_mesocosm/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""swecc-mesocosm — CLI and Python client for SWECC's bench platform."""
+from importlib.metadata import PackageNotFoundError, version
+from swecc_mesocosm.client import BenchClient
+try:
+    __version__ = version("swecc-mesocosm")
+except PackageNotFoundError:
+    __version__ = "0.0.0.dev"
+__all__ = ["BenchClient", "__version__"]

swecc_mesocosm-0.1.0/swecc_mesocosm/artifacts.py ADDED Viewed

@@ -0,0 +1,37 @@
+from __future__ import annotations
+import hashlib
+import json
+from typing import Any
+def _canonical_json(obj: Any) -> bytes:
+    return json.dumps(obj, sort_keys=True, ensure_ascii=False, separators=(",", ":")).encode(
+        "utf-8"
+    )
+def sha256_digest(obj: Any) -> str:
+    h = hashlib.sha256(_canonical_json(obj)).hexdigest()
+    return f"sha256:{h}"
+def compile_benchmark_artifacts(domain: dict[str, Any]) -> dict[str, Any]:
+    """Synthesize stable artifact views from a Domain object returned by the API."""
+    contract = domain.get("binding_vow")
+    eval_profile: dict[str, Any] = {
+        "scoring": domain.get("scoring"),
+        "domain_id": domain.get("id"),
+        "status": domain.get("status"),
+    }
+    dataset_lock: dict[str, Any] = {
+        "note": (
+            "No dataset lock is stored on the server yet. Pin seeds and env version in your repo."
+        ),
+        "domain_id": domain.get("id"),
+    }
+    return {
+        "contract.json": contract,
+        "eval_profile.json": eval_profile,
+        "dataset.lock.json": dataset_lock,
+    }