synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +20 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1699 -259
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -262
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev5.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,8 +8,9 @@ import json
|
|
|
8
8
|
import sqlite3
|
|
9
9
|
import sys
|
|
10
10
|
from collections import Counter, defaultdict
|
|
11
|
+
from collections.abc import Iterable
|
|
11
12
|
from pathlib import Path
|
|
12
|
-
from typing import Any
|
|
13
|
+
from typing import Any
|
|
13
14
|
|
|
14
15
|
Row = sqlite3.Row
|
|
15
16
|
|
|
@@ -23,7 +24,7 @@ def connect(db_path: Path) -> sqlite3.Connection:
|
|
|
23
24
|
def _parse_json(value: Any) -> Any:
|
|
24
25
|
if value is None:
|
|
25
26
|
return None
|
|
26
|
-
if isinstance(value,
|
|
27
|
+
if isinstance(value, dict | list):
|
|
27
28
|
return value
|
|
28
29
|
try:
|
|
29
30
|
return json.loads(value)
|
|
@@ -31,7 +32,7 @@ def _parse_json(value: Any) -> Any:
|
|
|
31
32
|
return None
|
|
32
33
|
|
|
33
34
|
|
|
34
|
-
AchievementMap = dict[
|
|
35
|
+
AchievementMap = dict[tuple[str, int], dict[str, list[str]]]
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
def fetch_achievement_data(
|
|
@@ -116,7 +117,7 @@ def fetch_achievement_data(
|
|
|
116
117
|
achievement_name_counts.update(achievement_set)
|
|
117
118
|
|
|
118
119
|
achievement_size_counts: Counter = Counter()
|
|
119
|
-
for
|
|
120
|
+
for _session_id, count in unique_counts_per_session.items():
|
|
120
121
|
achievement_size_counts[count] += 1
|
|
121
122
|
|
|
122
123
|
return (
|
|
@@ -203,25 +204,71 @@ def parse_event_filters(specs: list[str] | None) -> list[tuple[str, float]]:
|
|
|
203
204
|
if min_val_str:
|
|
204
205
|
try:
|
|
205
206
|
min_val = float(min_val_str)
|
|
206
|
-
except ValueError:
|
|
207
|
+
except ValueError as e:
|
|
207
208
|
print(f"Invalid event reward specification '{spec}'", file=sys.stderr)
|
|
208
|
-
raise SystemExit(1)
|
|
209
|
+
raise SystemExit(1) from e
|
|
209
210
|
filters.append((reward_type, min_val))
|
|
210
211
|
return filters
|
|
211
212
|
|
|
212
213
|
|
|
213
|
-
def
|
|
214
|
-
|
|
214
|
+
def _collect_content(
|
|
215
|
+
parts: Iterable[dict[str, Any]] | None,
|
|
216
|
+
) -> tuple[Any, bool]:
|
|
217
|
+
"""Normalise multimodal content parts into OpenAI-style segments."""
|
|
218
|
+
|
|
215
219
|
if not parts:
|
|
216
|
-
return ""
|
|
220
|
+
return "", False
|
|
221
|
+
|
|
222
|
+
segments: list[dict[str, Any]] = []
|
|
223
|
+
has_image = False
|
|
224
|
+
|
|
217
225
|
for part in parts:
|
|
218
226
|
if not isinstance(part, dict):
|
|
219
227
|
continue
|
|
220
|
-
|
|
228
|
+
ptype = part.get("type")
|
|
229
|
+
if ptype == "text":
|
|
221
230
|
text = part.get("text")
|
|
222
|
-
if isinstance(text, str)
|
|
223
|
-
|
|
224
|
-
|
|
231
|
+
if isinstance(text, str):
|
|
232
|
+
segments.append({"type": "text", "text": text})
|
|
233
|
+
elif ptype == "image":
|
|
234
|
+
uri = part.get("uri")
|
|
235
|
+
mime_type = part.get("mime_type") or "image/png"
|
|
236
|
+
data_url = None
|
|
237
|
+
if isinstance(uri, str) and uri.startswith("data:"):
|
|
238
|
+
data_url = uri
|
|
239
|
+
else:
|
|
240
|
+
source = part.get("data") or part.get("source")
|
|
241
|
+
if isinstance(source, dict):
|
|
242
|
+
base64_data = source.get("data")
|
|
243
|
+
media_type = source.get("media_type") or mime_type
|
|
244
|
+
if isinstance(base64_data, str) and base64_data:
|
|
245
|
+
data_url = f"data:{media_type};base64,{base64_data}"
|
|
246
|
+
if data_url:
|
|
247
|
+
has_image = True
|
|
248
|
+
segments.append({"type": "image_url", "image_url": {"url": data_url}})
|
|
249
|
+
elif ptype == "image_url":
|
|
250
|
+
image_url = part.get("image_url", {})
|
|
251
|
+
if isinstance(image_url, dict):
|
|
252
|
+
url = image_url.get("url")
|
|
253
|
+
if isinstance(url, str) and url:
|
|
254
|
+
has_image = True
|
|
255
|
+
segments.append({"type": "image_url", "image_url": {"url": url}})
|
|
256
|
+
|
|
257
|
+
if not segments:
|
|
258
|
+
return "", False
|
|
259
|
+
if not has_image and len(segments) == 1 and segments[0]["type"] == "text":
|
|
260
|
+
return segments[0]["text"], False
|
|
261
|
+
return segments, has_image
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _normalise_output_content(content: Any) -> tuple[Any, bool]:
|
|
265
|
+
if isinstance(content, list):
|
|
266
|
+
return _collect_content(content)
|
|
267
|
+
if isinstance(content, str):
|
|
268
|
+
return content, False
|
|
269
|
+
if content is None:
|
|
270
|
+
return "", False
|
|
271
|
+
return str(content), False
|
|
225
272
|
|
|
226
273
|
|
|
227
274
|
def _normalise_tool_calls(tool_calls: list[dict[str, Any]] | None) -> list[dict[str, Any]]:
|
|
@@ -233,7 +280,9 @@ def _normalise_tool_calls(tool_calls: list[dict[str, Any]] | None) -> list[dict[
|
|
|
233
280
|
continue
|
|
234
281
|
entry = dict(call)
|
|
235
282
|
|
|
236
|
-
func_payload: dict[str, Any] | None =
|
|
283
|
+
func_payload: dict[str, Any] | None = (
|
|
284
|
+
entry.get("function") if isinstance(entry.get("function"), dict) else None
|
|
285
|
+
)
|
|
237
286
|
name = entry.get("name") or (func_payload.get("name") if func_payload else None) or "tool"
|
|
238
287
|
|
|
239
288
|
args = None
|
|
@@ -249,7 +298,7 @@ def _normalise_tool_calls(tool_calls: list[dict[str, Any]] | None) -> list[dict[
|
|
|
249
298
|
except Exception:
|
|
250
299
|
args = raw
|
|
251
300
|
|
|
252
|
-
if isinstance(args,
|
|
301
|
+
if isinstance(args, dict | list):
|
|
253
302
|
args_str = json.dumps(args, ensure_ascii=False)
|
|
254
303
|
elif isinstance(args, str):
|
|
255
304
|
args_str = args
|
|
@@ -277,7 +326,7 @@ def _normalise_tool_calls(tool_calls: list[dict[str, Any]] | None) -> list[dict[
|
|
|
277
326
|
def build_sft_dataset(
|
|
278
327
|
conn: sqlite3.Connection,
|
|
279
328
|
achievements_map: AchievementMap,
|
|
280
|
-
sessions_filter:
|
|
329
|
+
sessions_filter: set[str],
|
|
281
330
|
*,
|
|
282
331
|
allowed_models: set[str] | None = None,
|
|
283
332
|
limit: int | None = None,
|
|
@@ -327,14 +376,18 @@ def build_sft_dataset(
|
|
|
327
376
|
|
|
328
377
|
for record in call_records:
|
|
329
378
|
messages: list[dict[str, Any]] = []
|
|
379
|
+
input_has_image = False
|
|
330
380
|
for message in record.get("input_messages", []):
|
|
331
381
|
role = message.get("role", "unknown")
|
|
332
|
-
content =
|
|
333
|
-
if not
|
|
382
|
+
content, has_image = _collect_content(message.get("parts"))
|
|
383
|
+
if (content == "" or content is None) and not has_image:
|
|
334
384
|
continue
|
|
385
|
+
if has_image and role == "user":
|
|
386
|
+
input_has_image = True
|
|
335
387
|
messages.append({"role": role, "content": content})
|
|
336
388
|
|
|
337
|
-
|
|
389
|
+
assistant_content_value: Any = ""
|
|
390
|
+
assistant_has_image = False
|
|
338
391
|
assistant_tool_calls: list[dict[str, Any]] = []
|
|
339
392
|
|
|
340
393
|
output_text = record.get("output_text")
|
|
@@ -349,17 +402,23 @@ def build_sft_dataset(
|
|
|
349
402
|
choices = parsed_response.get("choices") or []
|
|
350
403
|
if choices:
|
|
351
404
|
message = choices[0].get("message") or {}
|
|
352
|
-
|
|
405
|
+
assistant_content_value, assistant_has_image = _normalise_output_content(
|
|
406
|
+
message.get("content")
|
|
407
|
+
)
|
|
353
408
|
assistant_tool_calls = _normalise_tool_calls(message.get("tool_calls"))
|
|
354
409
|
|
|
355
410
|
if not assistant_tool_calls:
|
|
356
411
|
assistant_tool_calls = _normalise_tool_calls(record.get("output_tool_calls"))
|
|
357
412
|
|
|
358
|
-
assistant_message: dict[str, Any] = {
|
|
413
|
+
assistant_message: dict[str, Any] = {
|
|
414
|
+
"role": "assistant",
|
|
415
|
+
"content": assistant_content_value,
|
|
416
|
+
}
|
|
359
417
|
if assistant_tool_calls:
|
|
360
418
|
assistant_message["tool_calls"] = assistant_tool_calls
|
|
361
419
|
|
|
362
|
-
|
|
420
|
+
content_empty = assistant_message.get("content") in ("", None)
|
|
421
|
+
if content_empty and not assistant_message.get("tool_calls"):
|
|
363
422
|
continue
|
|
364
423
|
|
|
365
424
|
messages.append(assistant_message)
|
|
@@ -380,6 +439,9 @@ def build_sft_dataset(
|
|
|
380
439
|
"turned_true": achievements.get("all", []),
|
|
381
440
|
"cumulative_unique": cumulative_unique[session_id],
|
|
382
441
|
},
|
|
442
|
+
"user_has_image": input_has_image,
|
|
443
|
+
"assistant_has_image": assistant_has_image,
|
|
444
|
+
"has_image": input_has_image or assistant_has_image,
|
|
383
445
|
}
|
|
384
446
|
|
|
385
447
|
dataset.append({"messages": messages, "metadata": metadata})
|
|
@@ -426,27 +488,141 @@ def _validate_dataset(records: list[dict[str, Any]]) -> None:
|
|
|
426
488
|
raise SystemExit(f"Validation error while exporting dataset:\n - {summary}")
|
|
427
489
|
|
|
428
490
|
|
|
491
|
+
def _find_trace_database() -> Path | None:
|
|
492
|
+
"""Automatically discover the trace database in common locations."""
|
|
493
|
+
|
|
494
|
+
# Check for demo directory from state
|
|
495
|
+
try:
|
|
496
|
+
state_path = Path.home() / ".synth-ai" / "demo.json"
|
|
497
|
+
if state_path.exists():
|
|
498
|
+
import json
|
|
499
|
+
|
|
500
|
+
with state_path.open() as f:
|
|
501
|
+
data = json.load(f)
|
|
502
|
+
demo_dir = data.get("DEMO_DIR")
|
|
503
|
+
if demo_dir:
|
|
504
|
+
candidate = Path(demo_dir) / "traces" / "v3" / "synth_ai.db"
|
|
505
|
+
if candidate.exists():
|
|
506
|
+
return candidate
|
|
507
|
+
except Exception:
|
|
508
|
+
pass
|
|
509
|
+
|
|
510
|
+
# Search upward from current directory
|
|
511
|
+
cwd = Path.cwd()
|
|
512
|
+
for parent in [cwd] + list(cwd.parents):
|
|
513
|
+
candidate = parent / "traces" / "v3" / "synth_ai.db"
|
|
514
|
+
if candidate.exists():
|
|
515
|
+
return candidate
|
|
516
|
+
|
|
517
|
+
# Check standard locations
|
|
518
|
+
standard_locations = [
|
|
519
|
+
Path("traces/v3/synth_ai.db"),
|
|
520
|
+
Path("../traces/v3/synth_ai.db"),
|
|
521
|
+
Path.home() / "synth-ai" / "traces" / "v3" / "synth_ai.db",
|
|
522
|
+
]
|
|
523
|
+
|
|
524
|
+
for location in standard_locations:
|
|
525
|
+
try:
|
|
526
|
+
if location.exists():
|
|
527
|
+
return location.resolve()
|
|
528
|
+
except Exception:
|
|
529
|
+
continue
|
|
530
|
+
|
|
531
|
+
return None
|
|
532
|
+
|
|
533
|
+
|
|
429
534
|
def main() -> None:
|
|
430
535
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
431
|
-
parser.add_argument("--db", type=Path, default=
|
|
432
|
-
parser.add_argument(
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
536
|
+
parser.add_argument("--db", type=Path, default=None, help="Path to tracing_v3 SQLite DB")
|
|
537
|
+
parser.add_argument(
|
|
538
|
+
"--output",
|
|
539
|
+
type=Path,
|
|
540
|
+
required=False,
|
|
541
|
+
help="Destination JSONL path for the exported dataset",
|
|
542
|
+
)
|
|
543
|
+
parser.add_argument(
|
|
544
|
+
"--model",
|
|
545
|
+
action="append",
|
|
546
|
+
dest="models",
|
|
547
|
+
help="Restrict to sessions whose dominant model matches (repeatable)",
|
|
548
|
+
)
|
|
549
|
+
parser.add_argument(
|
|
550
|
+
"--provider",
|
|
551
|
+
action="append",
|
|
552
|
+
dest="providers",
|
|
553
|
+
help="Restrict to sessions whose dominant provider matches (repeatable)",
|
|
554
|
+
)
|
|
555
|
+
parser.add_argument(
|
|
556
|
+
"--min-unique", type=int, default=None, help="Minimum unique achievements per session"
|
|
557
|
+
)
|
|
558
|
+
parser.add_argument(
|
|
559
|
+
"--max-unique", type=int, default=None, help="Maximum unique achievements per session"
|
|
560
|
+
)
|
|
437
561
|
parser.add_argument(
|
|
438
562
|
"--exclude-achievement",
|
|
439
563
|
action="append",
|
|
440
564
|
dest="exclude_achievements",
|
|
441
565
|
help="Achievements to ignore when evaluating --min-unique/--max-unique (repeatable)",
|
|
442
566
|
)
|
|
443
|
-
parser.add_argument(
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
567
|
+
parser.add_argument(
|
|
568
|
+
"--require-achievement",
|
|
569
|
+
action="append",
|
|
570
|
+
dest="required_achievements",
|
|
571
|
+
help="Require these outcome achievements (repeatable)",
|
|
572
|
+
)
|
|
573
|
+
parser.add_argument(
|
|
574
|
+
"--min-outcome-reward",
|
|
575
|
+
type=float,
|
|
576
|
+
default=None,
|
|
577
|
+
help="Minimum total outcome reward per session",
|
|
578
|
+
)
|
|
579
|
+
parser.add_argument(
|
|
580
|
+
"--max-outcome-reward",
|
|
581
|
+
type=float,
|
|
582
|
+
default=None,
|
|
583
|
+
help="Maximum total outcome reward per session",
|
|
584
|
+
)
|
|
585
|
+
parser.add_argument(
|
|
586
|
+
"--event-reward",
|
|
587
|
+
action="append",
|
|
588
|
+
dest="event_reward_filters",
|
|
589
|
+
help="Require reward_type[:min_total] in event_rewards (repeatable)",
|
|
590
|
+
)
|
|
591
|
+
parser.add_argument(
|
|
592
|
+
"--limit", type=int, default=None, help="Maximum number of examples to emit"
|
|
593
|
+
)
|
|
448
594
|
args = parser.parse_args()
|
|
449
595
|
|
|
596
|
+
# Auto-discover database if not specified
|
|
597
|
+
db_path = args.db
|
|
598
|
+
if db_path is None:
|
|
599
|
+
db_path = _find_trace_database()
|
|
600
|
+
if db_path:
|
|
601
|
+
print(f"Found trace database: {db_path}")
|
|
602
|
+
else:
|
|
603
|
+
print("\nTrace database configuration:")
|
|
604
|
+
db_input = input("Trace database path [traces/v3/synth_ai.db]: ").strip()
|
|
605
|
+
db_path = Path(db_input) if db_input else Path("traces/v3/synth_ai.db")
|
|
606
|
+
|
|
607
|
+
if not db_path.exists():
|
|
608
|
+
print(f"Database not found: {db_path}", file=sys.stderr)
|
|
609
|
+
raise SystemExit(1)
|
|
610
|
+
|
|
611
|
+
output_path = args.output
|
|
612
|
+
if not output_path:
|
|
613
|
+
output_path = Path("ft_data/crafter_traces.jsonl")
|
|
614
|
+
print(f"Output will be written to: {output_path.resolve()}")
|
|
615
|
+
|
|
616
|
+
min_unique = args.min_unique
|
|
617
|
+
if min_unique is None:
|
|
618
|
+
min_unique = 0 # Default to including all traces
|
|
619
|
+
print(f"Minimum unique achievements filter: {min_unique} (all traces)")
|
|
620
|
+
|
|
621
|
+
# Override args with prompted values
|
|
622
|
+
args.db = db_path
|
|
623
|
+
args.output = output_path
|
|
624
|
+
args.min_unique = min_unique
|
|
625
|
+
|
|
450
626
|
if not args.db.exists():
|
|
451
627
|
print(f"Database not found: {args.db}", file=sys.stderr)
|
|
452
628
|
raise SystemExit(1)
|
|
@@ -488,7 +664,11 @@ def main() -> None:
|
|
|
488
664
|
|
|
489
665
|
outcome = outcome_data.get(session_id)
|
|
490
666
|
total_reward = outcome["total_reward"] if outcome else 0.0
|
|
491
|
-
final_achievements =
|
|
667
|
+
final_achievements = (
|
|
668
|
+
outcome["achievements"]
|
|
669
|
+
if outcome
|
|
670
|
+
else session_final_achievements.get(session_id, set())
|
|
671
|
+
)
|
|
492
672
|
|
|
493
673
|
if args.min_outcome_reward is not None and total_reward < args.min_outcome_reward:
|
|
494
674
|
continue
|
|
@@ -522,7 +702,9 @@ def main() -> None:
|
|
|
522
702
|
)
|
|
523
703
|
|
|
524
704
|
if not dataset:
|
|
525
|
-
print(
|
|
705
|
+
print(
|
|
706
|
+
"No rollout steps matched the filters (after session selection).", file=sys.stderr
|
|
707
|
+
)
|
|
526
708
|
raise SystemExit(1)
|
|
527
709
|
|
|
528
710
|
_validate_dataset(dataset)
|
|
@@ -530,7 +712,7 @@ def main() -> None:
|
|
|
530
712
|
session_ids = {item.get("metadata", {}).get("session_id") for item in dataset}
|
|
531
713
|
session_ids.discard(None)
|
|
532
714
|
print(
|
|
533
|
-
f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output}",
|
|
715
|
+
f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output.resolve()}",
|
|
534
716
|
file=sys.stderr,
|
|
535
717
|
)
|
|
536
718
|
finally:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
"""Quick smoke test that drives a rollout through the Groq proxy-backed Crafter Task App."""
|
|
4
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
5
|
import argparse
|
|
6
6
|
import asyncio
|
|
7
7
|
import os
|
|
@@ -29,8 +29,8 @@ def _build_policy_payload(seed: int, model: str) -> dict[str, Any]:
|
|
|
29
29
|
{
|
|
30
30
|
"role": "user",
|
|
31
31
|
"content": (
|
|
32
|
-
"Environment seed {seed}. Plan initial survival/crafting steps and then call interact with concrete actions."
|
|
33
|
-
)
|
|
32
|
+
f"Environment seed {seed}. Plan initial survival/crafting steps and then call interact with concrete actions."
|
|
33
|
+
),
|
|
34
34
|
},
|
|
35
35
|
],
|
|
36
36
|
}
|
|
@@ -63,13 +63,21 @@ async def run(args: argparse.Namespace) -> None:
|
|
|
63
63
|
response = await client.rollout(request)
|
|
64
64
|
print("rollout.metrics →", to_jsonable(response.metrics.model_dump()))
|
|
65
65
|
for idx, step in enumerate(response.trajectories[0].steps, start=1):
|
|
66
|
-
print(
|
|
66
|
+
print(
|
|
67
|
+
f"step[{idx}] tool_calls={step.tool_calls} reward={step.reward} info={to_jsonable(step.info)}"
|
|
68
|
+
)
|
|
67
69
|
|
|
68
70
|
|
|
69
71
|
def _parse_args() -> argparse.Namespace:
|
|
70
72
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
71
|
-
parser.add_argument(
|
|
72
|
-
|
|
73
|
+
parser.add_argument(
|
|
74
|
+
"--base-url", default=os.getenv("TASK_APP_BASE_URL", "http://localhost:8000")
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--api-key",
|
|
78
|
+
default=os.getenv("TASK_APP_API_KEY"),
|
|
79
|
+
required=os.getenv("TASK_APP_API_KEY") is None,
|
|
80
|
+
)
|
|
73
81
|
parser.add_argument("--model", default=os.getenv("GROQ_MODEL", "groq/mixtral-8x7b"))
|
|
74
82
|
parser.add_argument("--inference-url", default=os.getenv("TASK_APP_INFERENCE_URL"))
|
|
75
83
|
parser.add_argument("--seed", type=int, default=int(os.getenv("CRAFTER_TEST_SEED", "42")))
|
|
@@ -85,4 +93,3 @@ def main() -> None:
|
|
|
85
93
|
|
|
86
94
|
if __name__ == "__main__":
|
|
87
95
|
main()
|
|
88
|
-
|
|
@@ -8,11 +8,10 @@ import subprocess
|
|
|
8
8
|
import sys
|
|
9
9
|
import tempfile
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import Dict, Tuple
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
def load_env_file(path: Path) ->
|
|
15
|
-
env:
|
|
13
|
+
def load_env_file(path: Path) -> dict[str, str]:
|
|
14
|
+
env: dict[str, str] = {}
|
|
16
15
|
if not path.exists():
|
|
17
16
|
raise FileNotFoundError(f".env not found at {path}")
|
|
18
17
|
for line in path.read_text(encoding="utf-8").splitlines():
|
|
@@ -24,7 +23,7 @@ def load_env_file(path: Path) -> Dict[str, str]:
|
|
|
24
23
|
return env
|
|
25
24
|
|
|
26
25
|
|
|
27
|
-
def write_temp_env(kv:
|
|
26
|
+
def write_temp_env(kv: dict[str, str]) -> Path:
|
|
28
27
|
fd, p = tempfile.mkstemp(prefix="modal_secret_", suffix=".env")
|
|
29
28
|
path = Path(p)
|
|
30
29
|
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
@@ -33,22 +32,27 @@ def write_temp_env(kv: Dict[str, str]) -> Path:
|
|
|
33
32
|
return path
|
|
34
33
|
|
|
35
34
|
|
|
36
|
-
def run(cmd: str) ->
|
|
37
|
-
proc = subprocess.run(
|
|
35
|
+
def run(cmd: str) -> tuple[int, str]:
|
|
36
|
+
proc = subprocess.run(
|
|
37
|
+
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
|
|
38
|
+
)
|
|
38
39
|
return proc.returncode, proc.stdout
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
def ensure_secret(secret_name: str, kv:
|
|
42
|
+
def ensure_secret(secret_name: str, kv: dict[str, str]) -> None:
|
|
42
43
|
if not kv:
|
|
43
44
|
print(f"[skip] {secret_name}: no values provided")
|
|
44
45
|
return
|
|
45
46
|
# Prefer passing KEY=VALUE pairs to avoid Typer --env-file bug under some shells
|
|
46
47
|
kv_args = " ".join([f"{shlex.quote(k)}={shlex.quote(v)}" for k, v in kv.items()])
|
|
48
|
+
|
|
47
49
|
# Try plain modal first; fallback to uv run modal
|
|
48
|
-
def _create() ->
|
|
50
|
+
def _create() -> tuple[int, str]:
|
|
49
51
|
return run(f"modal secret create {shlex.quote(secret_name)} {kv_args}")
|
|
50
|
-
|
|
52
|
+
|
|
53
|
+
def _delete() -> tuple[int, str]:
|
|
51
54
|
return run(f"printf 'y\n' | modal secret delete {shlex.quote(secret_name)}")
|
|
55
|
+
|
|
52
56
|
rc, out = _create()
|
|
53
57
|
if rc != 0:
|
|
54
58
|
# Fallback: use uv run modal
|
|
@@ -70,22 +74,17 @@ def ensure_secret(secret_name: str, kv: Dict[str, str]) -> None:
|
|
|
70
74
|
|
|
71
75
|
|
|
72
76
|
def main() -> None:
|
|
73
|
-
ap = argparse.ArgumentParser(
|
|
74
|
-
|
|
77
|
+
ap = argparse.ArgumentParser(
|
|
78
|
+
description="Sync .env keys into Modal secret bundles for the task app"
|
|
79
|
+
)
|
|
80
|
+
ap.add_argument(
|
|
81
|
+
"--env-path", default=str(Path(__file__).parent / ".env"), help="Path to .env with keys"
|
|
82
|
+
)
|
|
75
83
|
args = ap.parse_args()
|
|
76
84
|
|
|
77
85
|
env = load_env_file(Path(args.env_path))
|
|
78
86
|
|
|
79
87
|
# Secrets used by the task app
|
|
80
|
-
env_secret = {
|
|
81
|
-
k: v
|
|
82
|
-
for k, v in {
|
|
83
|
-
"ENVIRONMENT_API_KEY": env.get("ENVIRONMENT_API_KEY", ""),
|
|
84
|
-
"dev_environment_api_key": env.get("ENVIRONMENT_API_KEY", ""),
|
|
85
|
-
}.items()
|
|
86
|
-
if v
|
|
87
|
-
}
|
|
88
|
-
|
|
89
88
|
groq_secret = {
|
|
90
89
|
k: v
|
|
91
90
|
for k, v in {
|
|
@@ -105,9 +104,16 @@ def main() -> None:
|
|
|
105
104
|
}
|
|
106
105
|
|
|
107
106
|
# Optional: backend key (not mounted by task app today, but useful to keep consistent)
|
|
108
|
-
synth_secret =
|
|
109
|
-
|
|
110
|
-
|
|
107
|
+
synth_secret = (
|
|
108
|
+
{"SYNTH_API_KEY": env.get("SYNTH_API_KEY", "")} if env.get("SYNTH_API_KEY") else {}
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
env_key = env.get("ENVIRONMENT_API_KEY", "")
|
|
112
|
+
if env_key:
|
|
113
|
+
print(
|
|
114
|
+
"Skipping Modal secret 'crafter-environment-sdk'; the task app now expects "
|
|
115
|
+
"ENVIRONMENT_API_KEY via --env-file so the CLI-minted value stays in sync."
|
|
116
|
+
)
|
|
111
117
|
ensure_secret("groq-api-key", groq_secret)
|
|
112
118
|
ensure_secret("openai-api-key", openai_secret)
|
|
113
119
|
if synth_secret:
|
|
@@ -123,5 +129,3 @@ if __name__ == "__main__":
|
|
|
123
129
|
except Exception as e:
|
|
124
130
|
print(f"[error] {type(e).__name__}: {e}")
|
|
125
131
|
sys.exit(1)
|
|
126
|
-
|
|
127
|
-
|
|
@@ -87,9 +87,16 @@ Evaluation scripts auto-load `.env` values. Update TOMLs under `configs/` with t
|
|
|
87
87
|
|
|
88
88
|
## 4. Tracing and SFT Dataset Export
|
|
89
89
|
|
|
90
|
-
1. Serve the task app with tracing enabled (see Section 2)
|
|
90
|
+
1. Serve the task app with tracing enabled (see Section 2). Optionally, run the traced rollout helper against the running server:
|
|
91
91
|
```bash
|
|
92
|
-
uv run python examples/warming_up_to_rl/run_local_rollout_traced.py
|
|
92
|
+
uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
|
|
93
|
+
--base-url http://localhost:8001 \
|
|
94
|
+
--api-key "$ENVIRONMENT_API_KEY" \
|
|
95
|
+
--inference-api-key "$GROQ_API_KEY" \
|
|
96
|
+
--model qwen/qwen3-32b \
|
|
97
|
+
--inference-url https://api.groq.com/openai \
|
|
98
|
+
--max-llm-calls 3 \
|
|
99
|
+
--run-id local-trace
|
|
93
100
|
```
|
|
94
101
|
2. Inspect local trace databases:
|
|
95
102
|
```bash
|