synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .core import register, train_command
|
|
4
|
+
from .errors import TrainCliError
|
|
5
|
+
from .validation import validate_train_environment
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"register",
|
|
9
|
+
"train_command",
|
|
10
|
+
"TrainCliError",
|
|
11
|
+
"validate_train_environment",
|
|
12
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
from synth_ai.api.train.cli import (
|
|
5
|
+
register as _register_with_cli,
|
|
6
|
+
)
|
|
7
|
+
from synth_ai.api.train.cli import (
|
|
8
|
+
train_command as _train_command,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = ["register", "train_command"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def register(cli: click.Group) -> None:
|
|
15
|
+
"""Attach the train command to the root CLI."""
|
|
16
|
+
_register_with_cli(cli)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def train_command(*args, **kwargs):
|
|
20
|
+
"""Entrypoint used by the train CLI command."""
|
|
21
|
+
return _train_command(*args, **kwargs)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, Tuple
|
|
6
|
+
|
|
7
|
+
from synth_ai.api.train.env_resolver import KeySpec, resolve_env
|
|
8
|
+
|
|
9
|
+
__all__ = ["validate_train_environment"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def validate_train_environment(
|
|
13
|
+
*,
|
|
14
|
+
config_path: Path | None,
|
|
15
|
+
explicit_env_paths: Iterable[str],
|
|
16
|
+
required_keys: list[KeySpec],
|
|
17
|
+
) -> Tuple[Path, Dict[str, str]]:
|
|
18
|
+
"""Validate and resolve environment secrets used by the train command."""
|
|
19
|
+
resolved_path, resolved_keys = resolve_env(
|
|
20
|
+
config_path=config_path,
|
|
21
|
+
explicit_env_paths=explicit_env_paths,
|
|
22
|
+
required_keys=required_keys,
|
|
23
|
+
)
|
|
24
|
+
return resolved_path, resolved_keys
|
synth_ai/cli/train.py
CHANGED
|
@@ -1,18 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
from synth_ai.api.train.cli import register as _register
|
|
6
|
-
from synth_ai.api.train.cli import train_command as _train_command
|
|
3
|
+
from synth_ai.cli.commands.train.core import register, train_command
|
|
7
4
|
|
|
8
5
|
__all__ = ["register", "train_command"]
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def register(cli: Any) -> None:
|
|
12
|
-
"""Compatibility wrapper for the legacy train CLI location."""
|
|
13
|
-
|
|
14
|
-
_register(cli)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def train_command(*args: Any, **kwargs: Any) -> Any:
|
|
18
|
-
return _train_command(*args, **kwargs)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This module now delegates to the TaskAppConfig defined in the local example at
|
|
4
4
|
`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
5
|
(running the file directly or targeting `fastapi_app` from external tooling).
|
|
6
|
-
Prefer using `uvx synth-ai
|
|
6
|
+
Prefer using `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This module now delegates to the TaskAppConfig defined in the local example at
|
|
4
4
|
`examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
5
|
(running the file directly or targeting `fastapi_app` from external tooling).
|
|
6
|
-
Prefer using `uvx synth-ai
|
|
6
|
+
Prefer using `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
@@ -6,16 +6,18 @@ the hash-based set-iteration nondeterminism that caused the drift.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import collections
|
|
9
|
+
import os
|
|
9
10
|
|
|
10
11
|
import crafter
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
# Patch messages permanently disabled to reduce noise
|
|
14
|
+
# print("[PATCH] Attempting to apply Crafter deterministic patch...")
|
|
13
15
|
|
|
14
16
|
# -----------------------------------------------------------------------------
|
|
15
17
|
# 1. Make per–chunk object order stable
|
|
16
18
|
# -----------------------------------------------------------------------------
|
|
17
19
|
if not hasattr(crafter.Env, "_orig_balance_object"):
|
|
18
|
-
print("[PATCH] Patching crafter.Env._balance_object...")
|
|
20
|
+
# print("[PATCH] Patching crafter.Env._balance_object...")
|
|
19
21
|
crafter.Env._orig_balance_object = crafter.Env._balance_object
|
|
20
22
|
|
|
21
23
|
def _balance_object_det(self, chunk, objs, *args, **kwargs):
|
|
@@ -25,9 +27,10 @@ if not hasattr(crafter.Env, "_orig_balance_object"):
|
|
|
25
27
|
return crafter.Env._orig_balance_object(self, chunk, objs, *args, **kwargs)
|
|
26
28
|
|
|
27
29
|
crafter.Env._balance_object = _balance_object_det
|
|
28
|
-
print("[PATCH] crafter.Env._balance_object patched.")
|
|
30
|
+
# print("[PATCH] crafter.Env._balance_object patched.")
|
|
29
31
|
else:
|
|
30
|
-
|
|
32
|
+
pass
|
|
33
|
+
# print("[PATCH] crafter.Env._balance_object already patched or _orig_balance_object exists.")
|
|
31
34
|
|
|
32
35
|
# -----------------------------------------------------------------------------
|
|
33
36
|
# 2. Make *chunk* iteration order stable
|
|
@@ -4,6 +4,7 @@ This version handles player references for Zombie and Skeleton objects.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import collections
|
|
7
|
+
import os
|
|
7
8
|
import pickle
|
|
8
9
|
from typing import Any, Dict, Optional, Set
|
|
9
10
|
|
|
@@ -11,11 +12,12 @@ import crafter
|
|
|
11
12
|
import numpy as np
|
|
12
13
|
from crafter import objects
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
# Patch messages permanently disabled
|
|
16
|
+
# print("[PATCH] Attempting to apply Crafter serialization patch v3...")
|
|
15
17
|
|
|
16
18
|
# Check if already patched
|
|
17
19
|
if not hasattr(crafter.Env, "save"):
|
|
18
|
-
print("[PATCH] Adding enhanced save/load methods to crafter.Env...")
|
|
20
|
+
# print("[PATCH] Adding enhanced save/load methods to crafter.Env...")
|
|
19
21
|
|
|
20
22
|
def _save(self) -> Dict[str, Any]:
|
|
21
23
|
"""Save complete environment state including all details."""
|
|
@@ -260,8 +262,10 @@ if not hasattr(crafter.Env, "save"):
|
|
|
260
262
|
crafter.Env.save = _save
|
|
261
263
|
crafter.Env.load = _load
|
|
262
264
|
|
|
263
|
-
|
|
265
|
+
pass
|
|
266
|
+
# print("[PATCH] crafter.Env.save() and load() methods added (v3).")
|
|
264
267
|
else:
|
|
265
|
-
|
|
268
|
+
pass
|
|
269
|
+
# print("[PATCH] crafter.Env already has save/load methods.")
|
|
266
270
|
|
|
267
|
-
print("[PATCH] Crafter serialization patch v3 complete.")
|
|
271
|
+
# print("[PATCH] Crafter serialization patch v3 complete.")
|
|
@@ -9,7 +9,8 @@ from typing import Any, Dict, Optional
|
|
|
9
9
|
|
|
10
10
|
import crafter
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
# Patch messages permanently disabled
|
|
13
|
+
# print("[PATCH] Attempting to apply simplified Crafter world configuration patch...")
|
|
13
14
|
|
|
14
15
|
# World configuration presets
|
|
15
16
|
WORLD_CONFIGS = {
|
|
@@ -279,8 +280,8 @@ def patched_env_init(
|
|
|
279
280
|
|
|
280
281
|
crafter.Env.__init__ = patched_env_init
|
|
281
282
|
|
|
282
|
-
print("[PATCH] Simplified Crafter world configuration patch complete.")
|
|
283
|
-
print("[PATCH] Available configs: easy, normal, hard, peaceful")
|
|
283
|
+
# print("[PATCH] Simplified Crafter world configuration patch complete.")
|
|
284
|
+
# print("[PATCH] Available configs: easy, normal, hard, peaceful")
|
|
284
285
|
|
|
285
286
|
# Example custom config
|
|
286
287
|
EXAMPLE_CUSTOM_CONFIG = {
|
|
@@ -14,12 +14,15 @@ from synth_ai.environments.stateful.engine import StatefulEngine, StatefulEngine
|
|
|
14
14
|
from synth_ai.environments.tasks.core import TaskInstance
|
|
15
15
|
|
|
16
16
|
from .engine_helpers.reward_components import (
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
RouteExplorationReward,
|
|
18
|
+
StrategicTrainingReward,
|
|
19
|
+
BattleProgressionReward,
|
|
20
|
+
GymPreparationReward,
|
|
21
|
+
ItemCollectionReward,
|
|
22
|
+
HealingManagementReward,
|
|
23
|
+
EfficientExplorationReward,
|
|
24
|
+
BadgeVictoryReward,
|
|
21
25
|
StepPenaltyComponent,
|
|
22
|
-
XPGainComponent,
|
|
23
26
|
)
|
|
24
27
|
from .engine_helpers.state_extraction import extract_game_state
|
|
25
28
|
|
|
@@ -268,15 +271,27 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
|
|
|
268
271
|
# For testing purposes, use None emulator
|
|
269
272
|
self.emulator = None
|
|
270
273
|
|
|
271
|
-
# Initialize reward stack with
|
|
274
|
+
# Initialize reward stack with comprehensive progress-based components
|
|
272
275
|
self.reward_stack = RewardStack(
|
|
273
276
|
components=[
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
277
|
+
# Major progress rewards
|
|
278
|
+
BadgeVictoryReward(), # +50.0 for Boulder Badge (main goal)
|
|
279
|
+
RouteExplorationReward(), # +1.0-5.0 for reaching key areas
|
|
280
|
+
GymPreparationReward(), # +3.0 for being gym-ready
|
|
281
|
+
|
|
282
|
+
# Training and battle rewards
|
|
283
|
+
StrategicTrainingReward(), # +0.2-3.0 for level ups and milestones
|
|
284
|
+
BattleProgressionReward(), # +0.1-1.0 for battles
|
|
285
|
+
|
|
286
|
+
# Resource management rewards
|
|
287
|
+
ItemCollectionReward(), # +0.1-0.5 for collecting items
|
|
288
|
+
HealingManagementReward(), # +0.05-0.8 for healing Pokemon
|
|
289
|
+
|
|
290
|
+
# Exploration efficiency
|
|
291
|
+
EfficientExplorationReward(), # +0.02 for discovering new positions
|
|
292
|
+
|
|
293
|
+
# No penalty for unproductive actions
|
|
294
|
+
StepPenaltyComponent(penalty=0.0), # 0.0 per step
|
|
280
295
|
]
|
|
281
296
|
)
|
|
282
297
|
|
|
@@ -640,6 +655,12 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
|
|
|
640
655
|
"prev_text_box_active": bool(prev_state.get("text_box_active", False)),
|
|
641
656
|
"prev_enemy_hp_current": int(prev_state.get("enemy_hp_current", 0)),
|
|
642
657
|
"prev_enemy_hp_percentage": float(prev_state.get("enemy_hp_percentage", 0.0)),
|
|
658
|
+
"prev_player_x": int(prev_state.get("player_x", 0)),
|
|
659
|
+
"prev_player_y": int(prev_state.get("player_y", 0)),
|
|
660
|
+
"prev_party": prev_state.get("party", []),
|
|
661
|
+
"prev_inventory": prev_state.get("inventory", []),
|
|
662
|
+
"prev_party_hp_current": int(prev_state.get("party_hp_current", 0)),
|
|
663
|
+
"prev_party_hp_max": int(prev_state.get("party_hp_max", 0)),
|
|
643
664
|
},
|
|
644
665
|
)
|
|
645
666
|
except Exception as e:
|