synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
- examples/multi_step/crafter_rl_lora.md +51 -10
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +7 -1
- examples/swe/task_app/grpo_swe_mini.py +55 -26
- examples/swe/task_app/hosted/rollout.py +40 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/task_app/__init__.py +2 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
- examples/task_apps/pokemon_red/task_app.py +606 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
- examples/warming_up_to_rl/run_eval.py +127 -18
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +41 -1
- synth_ai/api/train/builders.py +73 -29
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +44 -0
- synth_ai/api/train/configs/rl.py +134 -0
- synth_ai/api/train/configs/sft.py +95 -0
- synth_ai/api/train/configs/shared.py +24 -0
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +7 -51
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +49 -43
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/rl_demo.py +86 -106
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/task_apps.py +1710 -186
- synth_ai/demos/core/cli.py +121 -159
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
- synth_ai/environments/examples/crafter_classic/environment.py +16 -0
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +30 -4
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +127 -0
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +14 -5
- synth_ai/task/contracts.py +124 -38
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +53 -0
- synth_ai/task/rubrics/loaders.py +133 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +113 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/server.py +8 -7
- synth_ai/task/validators.py +269 -6
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/native_manager.py +3 -3
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""HTTP-safe serialization helpers for tracing v3.
|
|
2
|
+
|
|
3
|
+
These utilities normalize tracing structures (including dataclasses) into
|
|
4
|
+
JSON-serializable forms and provide a compact JSON encoder suitable for
|
|
5
|
+
HTTP transmission to backend services.
|
|
6
|
+
|
|
7
|
+
Design goals:
|
|
8
|
+
- Preserve structure while ensuring standard-compliant JSON (no NaN/Infinity)
|
|
9
|
+
- Handle common non-JSON types: datetime, Decimal, bytes, set/tuple, numpy scalars
|
|
10
|
+
- Keep output compact (no unnecessary whitespace) while readable if needed
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import base64
|
|
16
|
+
import json
|
|
17
|
+
from dataclasses import asdict, is_dataclass
|
|
18
|
+
from datetime import date, datetime
|
|
19
|
+
from decimal import Decimal
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import numpy as _np # type: ignore
|
|
25
|
+
except Exception: # pragma: no cover - numpy optional at runtime
|
|
26
|
+
_np = None # type: ignore
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def normalize_for_json(value: Any) -> Any:
|
|
30
|
+
"""Return a JSON-serializable version of ``value``.
|
|
31
|
+
|
|
32
|
+
Rules:
|
|
33
|
+
- dataclass → dict (recursively normalized)
|
|
34
|
+
- datetime/date → ISO-8601 string (UTC-aware datetimes preserve tzinfo)
|
|
35
|
+
- Decimal → float (fallback to string if not finite)
|
|
36
|
+
- bytes/bytearray → base64 string (RFC 4648)
|
|
37
|
+
- set/tuple → list
|
|
38
|
+
- Enum → enum.value (normalized)
|
|
39
|
+
- numpy scalar → corresponding Python scalar
|
|
40
|
+
- float NaN/Inf/−Inf → None (to keep JSON standard compliant)
|
|
41
|
+
- dict / list → recursively normalized
|
|
42
|
+
- other primitives (str, int, bool, None, float) passed through
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
# Dataclasses
|
|
46
|
+
if is_dataclass(value) and not isinstance(value, type):
|
|
47
|
+
try:
|
|
48
|
+
return normalize_for_json(asdict(value))
|
|
49
|
+
except Exception:
|
|
50
|
+
# Fallback: best-effort conversion via __dict__
|
|
51
|
+
return normalize_for_json(getattr(value, "__dict__", {}))
|
|
52
|
+
|
|
53
|
+
# Mapping
|
|
54
|
+
if isinstance(value, dict):
|
|
55
|
+
return {str(k): normalize_for_json(v) for k, v in value.items()}
|
|
56
|
+
|
|
57
|
+
# Sequences
|
|
58
|
+
if isinstance(value, list | tuple | set):
|
|
59
|
+
return [normalize_for_json(v) for v in value]
|
|
60
|
+
|
|
61
|
+
# Datetime / Date
|
|
62
|
+
if isinstance(value, datetime | date):
|
|
63
|
+
return value.isoformat()
|
|
64
|
+
|
|
65
|
+
# Decimal
|
|
66
|
+
if isinstance(value, Decimal):
|
|
67
|
+
try:
|
|
68
|
+
f = float(value)
|
|
69
|
+
if f != f or f in (float("inf"), float("-inf")):
|
|
70
|
+
return str(value)
|
|
71
|
+
return f
|
|
72
|
+
except Exception:
|
|
73
|
+
return str(value)
|
|
74
|
+
|
|
75
|
+
# Bytes-like
|
|
76
|
+
if isinstance(value, bytes | bytearray):
|
|
77
|
+
return base64.b64encode(bytes(value)).decode("ascii")
|
|
78
|
+
|
|
79
|
+
# Enum
|
|
80
|
+
if isinstance(value, Enum):
|
|
81
|
+
return normalize_for_json(value.value)
|
|
82
|
+
|
|
83
|
+
# Numpy scalars / arrays
|
|
84
|
+
if _np is not None:
|
|
85
|
+
if isinstance(value, _np.generic): # type: ignore[attr-defined]
|
|
86
|
+
return normalize_for_json(value.item())
|
|
87
|
+
if isinstance(value, _np.ndarray):
|
|
88
|
+
return normalize_for_json(value.tolist())
|
|
89
|
+
|
|
90
|
+
# Floats: sanitize NaN / Infinity to None
|
|
91
|
+
if isinstance(value, float):
|
|
92
|
+
if value != value or value in (float("inf"), float("-inf")):
|
|
93
|
+
return None
|
|
94
|
+
return value
|
|
95
|
+
|
|
96
|
+
return value
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def dumps_http_json(payload: Any) -> str:
|
|
100
|
+
"""Dump ``payload`` into a compact, HTTP-safe JSON string.
|
|
101
|
+
|
|
102
|
+
- Recursively normalizes non-JSON types (see ``normalize_for_json``)
|
|
103
|
+
- Disallows NaN/Infinity per RFC 8259 (allow_nan=False)
|
|
104
|
+
- Uses compact separators and preserves Unicode (ensure_ascii=False)
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
normalized = normalize_for_json(payload)
|
|
108
|
+
return json.dumps(
|
|
109
|
+
normalized,
|
|
110
|
+
ensure_ascii=False,
|
|
111
|
+
allow_nan=False,
|
|
112
|
+
separators=(",", ":"),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def serialize_trace_for_http(trace: Any) -> str:
|
|
117
|
+
"""Serialize a tracing v3 session (or dict-like) to HTTP-safe JSON.
|
|
118
|
+
|
|
119
|
+
Accepts either a dataclass (e.g., SessionTrace) or a dict/list and
|
|
120
|
+
applies normalization and compact JSON encoding.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
if is_dataclass(trace) and not isinstance(trace, type):
|
|
124
|
+
try:
|
|
125
|
+
return dumps_http_json(asdict(trace))
|
|
126
|
+
except Exception:
|
|
127
|
+
return dumps_http_json(getattr(trace, "__dict__", {}))
|
|
128
|
+
return dumps_http_json(trace)
|
|
129
|
+
|
|
130
|
+
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sqlite3
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
Row = sqlite3.Row
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def connect(db_path: str | bytes | int) -> sqlite3.Connection:
|
|
13
|
+
conn = sqlite3.connect(db_path)
|
|
14
|
+
conn.row_factory = sqlite3.Row
|
|
15
|
+
return conn
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _json_load(value: Any) -> Any:
|
|
19
|
+
if value is None:
|
|
20
|
+
return None
|
|
21
|
+
if isinstance(value, dict | list):
|
|
22
|
+
return value
|
|
23
|
+
if isinstance(value, bytes | bytearray):
|
|
24
|
+
value = value.decode("utf-8", errors="ignore")
|
|
25
|
+
try:
|
|
26
|
+
return json.loads(value)
|
|
27
|
+
except Exception:
|
|
28
|
+
return value
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def fetch_crafter_sessions(
|
|
32
|
+
conn: sqlite3.Connection,
|
|
33
|
+
*,
|
|
34
|
+
limit: int,
|
|
35
|
+
metadata_filter: str | None = None,
|
|
36
|
+
session_ids: Sequence[str] | None = None,
|
|
37
|
+
min_event_count: int = 0,
|
|
38
|
+
) -> list[str]:
|
|
39
|
+
if session_ids:
|
|
40
|
+
placeholders = ",".join("?" for _ in session_ids)
|
|
41
|
+
rows = conn.execute(
|
|
42
|
+
f"""
|
|
43
|
+
SELECT session_id
|
|
44
|
+
FROM session_traces
|
|
45
|
+
WHERE session_id IN ({placeholders})
|
|
46
|
+
ORDER BY created_at DESC
|
|
47
|
+
""",
|
|
48
|
+
tuple(session_ids),
|
|
49
|
+
).fetchall()
|
|
50
|
+
return [row["session_id"] for row in rows]
|
|
51
|
+
|
|
52
|
+
params: list[Any] = []
|
|
53
|
+
where_clauses: list[str] = []
|
|
54
|
+
if metadata_filter:
|
|
55
|
+
where_clauses.append("session_traces.metadata LIKE ?")
|
|
56
|
+
params.append(f"%{metadata_filter}%")
|
|
57
|
+
where_sql = ""
|
|
58
|
+
if where_clauses:
|
|
59
|
+
where_sql = "WHERE " + " AND ".join(where_clauses)
|
|
60
|
+
|
|
61
|
+
having_sql = ""
|
|
62
|
+
if min_event_count > 0:
|
|
63
|
+
having_sql = "HAVING COUNT(events.id) >= ?"
|
|
64
|
+
params.append(min_event_count)
|
|
65
|
+
|
|
66
|
+
query = f"""
|
|
67
|
+
SELECT session_traces.session_id
|
|
68
|
+
FROM session_traces
|
|
69
|
+
LEFT JOIN events ON session_traces.session_id = events.session_id
|
|
70
|
+
{where_sql}
|
|
71
|
+
GROUP BY session_traces.session_id
|
|
72
|
+
{having_sql}
|
|
73
|
+
ORDER BY session_traces.created_at DESC
|
|
74
|
+
LIMIT ?
|
|
75
|
+
"""
|
|
76
|
+
rows = conn.execute(query, (*params, limit)).fetchall()
|
|
77
|
+
return [row["session_id"] for row in rows]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def load_session_trace(conn: sqlite3.Connection, session_id: str) -> dict[str, Any]:
|
|
81
|
+
session_row = conn.execute(
|
|
82
|
+
"""
|
|
83
|
+
SELECT session_id, created_at, metadata
|
|
84
|
+
FROM session_traces
|
|
85
|
+
WHERE session_id = ?
|
|
86
|
+
""",
|
|
87
|
+
(session_id,),
|
|
88
|
+
).fetchone()
|
|
89
|
+
if not session_row:
|
|
90
|
+
raise ValueError(f"Session {session_id} not found")
|
|
91
|
+
|
|
92
|
+
timesteps = conn.execute(
|
|
93
|
+
"""
|
|
94
|
+
SELECT step_id,
|
|
95
|
+
step_index,
|
|
96
|
+
turn_number,
|
|
97
|
+
started_at,
|
|
98
|
+
completed_at,
|
|
99
|
+
step_metadata
|
|
100
|
+
FROM session_timesteps
|
|
101
|
+
WHERE session_id = ?
|
|
102
|
+
ORDER BY step_index ASC
|
|
103
|
+
""",
|
|
104
|
+
(session_id,),
|
|
105
|
+
).fetchall()
|
|
106
|
+
|
|
107
|
+
event_rows = conn.execute(
|
|
108
|
+
"""
|
|
109
|
+
SELECT *
|
|
110
|
+
FROM events
|
|
111
|
+
WHERE session_id = ?
|
|
112
|
+
ORDER BY event_time ASC, id ASC
|
|
113
|
+
""",
|
|
114
|
+
(session_id,),
|
|
115
|
+
).fetchall()
|
|
116
|
+
|
|
117
|
+
message_rows = conn.execute(
|
|
118
|
+
"""
|
|
119
|
+
SELECT *
|
|
120
|
+
FROM messages
|
|
121
|
+
WHERE session_id = ?
|
|
122
|
+
ORDER BY event_time ASC, id ASC
|
|
123
|
+
""",
|
|
124
|
+
(session_id,),
|
|
125
|
+
).fetchall()
|
|
126
|
+
|
|
127
|
+
event_rewards = conn.execute(
|
|
128
|
+
"""
|
|
129
|
+
SELECT *
|
|
130
|
+
FROM event_rewards
|
|
131
|
+
WHERE session_id = ?
|
|
132
|
+
ORDER BY turn_number ASC, id ASC
|
|
133
|
+
""",
|
|
134
|
+
(session_id,),
|
|
135
|
+
).fetchall()
|
|
136
|
+
|
|
137
|
+
outcome_rewards = conn.execute(
|
|
138
|
+
"""
|
|
139
|
+
SELECT *
|
|
140
|
+
FROM outcome_rewards
|
|
141
|
+
WHERE session_id = ?
|
|
142
|
+
ORDER BY created_at ASC
|
|
143
|
+
""",
|
|
144
|
+
(session_id,),
|
|
145
|
+
).fetchall()
|
|
146
|
+
|
|
147
|
+
metadata = _json_load(session_row["metadata"]) or {}
|
|
148
|
+
if isinstance(metadata, dict):
|
|
149
|
+
episode_id = metadata.get("episode_id")
|
|
150
|
+
if episode_id is not None and not isinstance(episode_id, str):
|
|
151
|
+
metadata["episode_id"] = str(episode_id)
|
|
152
|
+
|
|
153
|
+
events_payload = [
|
|
154
|
+
{
|
|
155
|
+
"id": row["id"],
|
|
156
|
+
"event_type": row["event_type"],
|
|
157
|
+
"system_instance_id": row["system_instance_id"],
|
|
158
|
+
"time_record": {
|
|
159
|
+
"event_time": row["event_time"],
|
|
160
|
+
"message_time": row["message_time"],
|
|
161
|
+
"created_at": row["created_at"],
|
|
162
|
+
},
|
|
163
|
+
"model_name": row["model_name"],
|
|
164
|
+
"provider": row["provider"],
|
|
165
|
+
"input_tokens": row["input_tokens"],
|
|
166
|
+
"output_tokens": row["output_tokens"],
|
|
167
|
+
"total_tokens": row["total_tokens"],
|
|
168
|
+
"cost_usd": row["cost_usd"],
|
|
169
|
+
"latency_ms": row["latency_ms"],
|
|
170
|
+
"span_id": row["span_id"],
|
|
171
|
+
"trace_id": row["trace_id"],
|
|
172
|
+
"call_records": _json_load(row["call_records"]) or [],
|
|
173
|
+
"reward": row["reward"],
|
|
174
|
+
"terminated": row["terminated"],
|
|
175
|
+
"truncated": row["truncated"],
|
|
176
|
+
"system_state_before": _json_load(row["system_state_before"]),
|
|
177
|
+
"system_state_after": _json_load(row["system_state_after"]),
|
|
178
|
+
"metadata": _json_load(row["metadata"]) or {},
|
|
179
|
+
"event_metadata": _json_load(row["event_metadata"]),
|
|
180
|
+
}
|
|
181
|
+
for row in event_rows
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
messages_payload = [
|
|
185
|
+
{
|
|
186
|
+
"id": row["id"],
|
|
187
|
+
"message_type": row["message_type"],
|
|
188
|
+
"content": row["content"],
|
|
189
|
+
"time_record": {
|
|
190
|
+
"event_time": row["event_time"],
|
|
191
|
+
"message_time": row["message_time"],
|
|
192
|
+
"timestamp": row["timestamp"],
|
|
193
|
+
},
|
|
194
|
+
"metadata": _json_load(row["metadata"]) or {},
|
|
195
|
+
}
|
|
196
|
+
for row in message_rows
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
trace: dict[str, Any] = {
|
|
200
|
+
"session_id": session_row["session_id"],
|
|
201
|
+
"created_at": session_row["created_at"],
|
|
202
|
+
"metadata": metadata,
|
|
203
|
+
"session_time_steps": [
|
|
204
|
+
{
|
|
205
|
+
"step_id": row["step_id"],
|
|
206
|
+
"step_index": row["step_index"],
|
|
207
|
+
"turn_number": row["turn_number"],
|
|
208
|
+
"started_at": row["started_at"],
|
|
209
|
+
"completed_at": row["completed_at"],
|
|
210
|
+
"metadata": _json_load(row["step_metadata"]) or {},
|
|
211
|
+
}
|
|
212
|
+
for row in timesteps
|
|
213
|
+
],
|
|
214
|
+
"event_history": events_payload,
|
|
215
|
+
"events": events_payload,
|
|
216
|
+
"markov_blanket_message_history": messages_payload,
|
|
217
|
+
"messages": messages_payload,
|
|
218
|
+
"event_rewards": [
|
|
219
|
+
{
|
|
220
|
+
"id": row["id"],
|
|
221
|
+
"event_id": row["event_id"],
|
|
222
|
+
"turn_number": row["turn_number"],
|
|
223
|
+
"reward_value": row["reward_value"],
|
|
224
|
+
"reward_type": row["reward_type"],
|
|
225
|
+
"key": row["key"],
|
|
226
|
+
"annotation": _json_load(row["annotation"]) or {},
|
|
227
|
+
"source": row["source"],
|
|
228
|
+
"created_at": row["created_at"],
|
|
229
|
+
}
|
|
230
|
+
for row in event_rewards
|
|
231
|
+
],
|
|
232
|
+
"outcome_rewards": [
|
|
233
|
+
{
|
|
234
|
+
"id": row["id"],
|
|
235
|
+
"total_reward": row["total_reward"],
|
|
236
|
+
"reward_metadata": _json_load(row["reward_metadata"]) or {},
|
|
237
|
+
"created_at": row["created_at"],
|
|
238
|
+
}
|
|
239
|
+
for row in outcome_rewards
|
|
240
|
+
],
|
|
241
|
+
}
|
|
242
|
+
return trace
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@dataclass
|
|
246
|
+
class DeterministicMetrics:
|
|
247
|
+
session_id: str
|
|
248
|
+
unique_achievement_reward: float
|
|
249
|
+
achievement_reward: float
|
|
250
|
+
outcome_total_reward: float
|
|
251
|
+
unique_achievement_count: int
|
|
252
|
+
final_achievement_count: int
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def compute_deterministic_metrics(conn: sqlite3.Connection, session_id: str) -> DeterministicMetrics:
|
|
256
|
+
event_rows = conn.execute(
|
|
257
|
+
"""
|
|
258
|
+
SELECT reward_type, reward_value, annotation
|
|
259
|
+
FROM event_rewards
|
|
260
|
+
WHERE session_id = ?
|
|
261
|
+
""",
|
|
262
|
+
(session_id,),
|
|
263
|
+
).fetchall()
|
|
264
|
+
|
|
265
|
+
unique_total = 0.0
|
|
266
|
+
all_total = 0.0
|
|
267
|
+
unique_achievements: set[str] = set()
|
|
268
|
+
|
|
269
|
+
for row in event_rows:
|
|
270
|
+
reward_type = row["reward_type"]
|
|
271
|
+
value = float(row["reward_value"] or 0.0)
|
|
272
|
+
if reward_type == "unique_achievement_delta":
|
|
273
|
+
unique_total += value
|
|
274
|
+
annotation = _json_load(row["annotation"]) or {}
|
|
275
|
+
for name in annotation.get("new_unique") or []:
|
|
276
|
+
if isinstance(name, str):
|
|
277
|
+
unique_achievements.add(name)
|
|
278
|
+
elif reward_type == "achievement_delta":
|
|
279
|
+
all_total += value
|
|
280
|
+
|
|
281
|
+
outcome_rows = conn.execute(
|
|
282
|
+
"""
|
|
283
|
+
SELECT total_reward, reward_metadata
|
|
284
|
+
FROM outcome_rewards
|
|
285
|
+
WHERE session_id = ?
|
|
286
|
+
""",
|
|
287
|
+
(session_id,),
|
|
288
|
+
).fetchall()
|
|
289
|
+
|
|
290
|
+
outcome_total = 0.0
|
|
291
|
+
final_achievements: set[str] = set()
|
|
292
|
+
for row in outcome_rows:
|
|
293
|
+
outcome_total += float(row["total_reward"] or 0.0)
|
|
294
|
+
metadata = _json_load(row["reward_metadata"]) or {}
|
|
295
|
+
for name in metadata.get("achievements") or []:
|
|
296
|
+
if isinstance(name, str):
|
|
297
|
+
final_achievements.add(name)
|
|
298
|
+
|
|
299
|
+
return DeterministicMetrics(
|
|
300
|
+
session_id=session_id,
|
|
301
|
+
unique_achievement_reward=unique_total,
|
|
302
|
+
achievement_reward=all_total,
|
|
303
|
+
outcome_total_reward=outcome_total,
|
|
304
|
+
unique_achievement_count=len(unique_achievements),
|
|
305
|
+
final_achievement_count=len(final_achievements),
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
__all__ = [
|
|
310
|
+
"DeterministicMetrics",
|
|
311
|
+
"compute_deterministic_metrics",
|
|
312
|
+
"connect",
|
|
313
|
+
"fetch_crafter_sessions",
|
|
314
|
+
"load_session_trace",
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
|
|
@@ -117,7 +117,7 @@ def _maybe_datetime(value: Any) -> Any:
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
def _load_json(value: Any) -> Any:
|
|
120
|
-
if value is None or isinstance(value,
|
|
120
|
+
if value is None or isinstance(value, dict | list):
|
|
121
121
|
return value or {}
|
|
122
122
|
if isinstance(value, str):
|
|
123
123
|
try:
|
|
@@ -584,7 +584,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
584
584
|
raise ValueError("No named parameters found in query for provided mapping")
|
|
585
585
|
values = tuple(params[key] for key in keys)
|
|
586
586
|
return new_query, values
|
|
587
|
-
if isinstance(params,
|
|
587
|
+
if isinstance(params, list | tuple):
|
|
588
588
|
return query, tuple(params)
|
|
589
589
|
raise TypeError("Unsupported parameter type for query execution")
|
|
590
590
|
|
|
@@ -881,7 +881,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
881
881
|
) -> int:
|
|
882
882
|
await self.initialize()
|
|
883
883
|
|
|
884
|
-
if not isinstance(event,
|
|
884
|
+
if not isinstance(event, EnvironmentEvent | LMCAISEvent | RuntimeEvent):
|
|
885
885
|
raise TypeError(f"Unsupported event type for native manager: {type(event)!r}")
|
|
886
886
|
|
|
887
887
|
metadata_json = metadata_override or event.metadata or {}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: synth-ai
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.13.dev2
|
|
4
4
|
Summary: RL as a service SDK - Core AI functionality and tracing
|
|
5
5
|
Author-email: Synth AI <josh@usesynth.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -50,9 +50,12 @@ Requires-Dist: textual>=1.1.0
|
|
|
50
50
|
Requires-Dist: openai-harmony>=0.0.1
|
|
51
51
|
Requires-Dist: asyncpg>=0.30.0
|
|
52
52
|
Requires-Dist: aiohttp>=3.8.0
|
|
53
|
+
Requires-Dist: httpx>=0.28.1
|
|
53
54
|
Requires-Dist: datasets>=4.0.0
|
|
54
55
|
Requires-Dist: transformers>=4.56.1
|
|
55
56
|
Requires-Dist: modal==1.1.4
|
|
57
|
+
Requires-Dist: pyboy>=2.6.0
|
|
58
|
+
Requires-Dist: setuptools>=80.9.0
|
|
56
59
|
Provides-Extra: dev
|
|
57
60
|
Requires-Dist: build>=1.2.2.post1; extra == "dev"
|
|
58
61
|
Requires-Dist: twine>=4.0.0; extra == "dev"
|