synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +20 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1699 -259
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -262
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev5.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,239 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Filter v3 Crafter traces into SFT JSONL requiring specific achievements.
|
|
4
|
-
|
|
5
|
-
Environment:
|
|
6
|
-
- CRAFTER_DB_URL (default: sqlite:///traces_v3_lm_synth/traces.db)
|
|
7
|
-
- OUTPUT_JSONL (default: ft_data/qwen4b_crafter_sft_ach.jsonl)
|
|
8
|
-
- REQUIRED_ACHIEVEMENTS (space-separated, default: collect_wood)
|
|
9
|
-
- MIN_TOTAL_REWARD (float, default: 0.0)
|
|
10
|
-
- MAX_COST (float, default: inf)
|
|
11
|
-
- MAX_TOKENS (int, default: inf)
|
|
12
|
-
- MODELS (optional, space-separated model names; default empty = all)
|
|
13
|
-
- WINDOW_MODE=1 to emit per-turn user→assistant examples
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
import asyncio
|
|
17
|
-
import json
|
|
18
|
-
import math
|
|
19
|
-
import os
|
|
20
|
-
import tomllib
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
from typing import Any
|
|
23
|
-
|
|
24
|
-
try:
|
|
25
|
-
# Preferred path (modal-specific)
|
|
26
|
-
from synth_ai.environments.examples.crafter_classic.agent_demos.crafter_modal_ft.filter_traces_sft_turso import ( # type: ignore
|
|
27
|
-
FinetuningDataExtractorV3,
|
|
28
|
-
)
|
|
29
|
-
except Exception: # pragma: no cover
|
|
30
|
-
try:
|
|
31
|
-
# Fallback path used in some dist builds
|
|
32
|
-
from synth_ai.environments.examples.crafter_classic.agent_demos.crafter_openai_ft.filter_traces_sft_turso import ( # type: ignore
|
|
33
|
-
FinetuningDataExtractorV3,
|
|
34
|
-
)
|
|
35
|
-
except Exception as _import_err: # pragma: no cover
|
|
36
|
-
raise ImportError(
|
|
37
|
-
"Could not import FinetuningDataExtractorV3 from synth_ai.") from _import_err
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def env_list(name: str) -> list[str]:
|
|
41
|
-
val = os.getenv(name, "").strip()
|
|
42
|
-
return val.split() if val else []
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def normalize_db_url(raw: str) -> str:
|
|
46
|
-
# Accept file path or sqlite URLs; ensure async driver prefix
|
|
47
|
-
if raw.endswith(".db") and not raw.startswith("sqlite"):
|
|
48
|
-
return f"sqlite+aiosqlite:///{raw}"
|
|
49
|
-
if raw.startswith("sqlite+aiosqlite///"):
|
|
50
|
-
return raw
|
|
51
|
-
if raw.startswith("sqlite///") and raw.endswith(".db"):
|
|
52
|
-
return raw.replace("sqlite///", "sqlite+aiosqlite///")
|
|
53
|
-
return raw
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def build_filters() -> dict[str, Any]:
|
|
57
|
-
cfg_default = Path(__file__).with_name("config.toml")
|
|
58
|
-
cfg_path = os.getenv("CRAFTER_CONFIG", str(cfg_default))
|
|
59
|
-
cfg: dict[str, Any] = {}
|
|
60
|
-
if os.path.exists(cfg_path):
|
|
61
|
-
with open(cfg_path, "rb") as f:
|
|
62
|
-
cfg = tomllib.load(f)
|
|
63
|
-
fcfg = cfg.get("filter", {})
|
|
64
|
-
# Default: no required achievements gating unless provided via env/config
|
|
65
|
-
req = set(env_list("REQUIRED_ACHIEVEMENTS") or fcfg.get("required_achievements", []))
|
|
66
|
-
models = env_list("MODELS")
|
|
67
|
-
# Default: allow zero reward unless overridden
|
|
68
|
-
min_reward = float(os.getenv("MIN_TOTAL_REWARD", str(fcfg.get("min_total_reward", 0.0))))
|
|
69
|
-
max_cost = float(os.getenv("MAX_COST", str(fcfg.get("max_cost", math.inf))))
|
|
70
|
-
max_tokens = int(os.getenv("MAX_TOKENS", str(fcfg.get("max_tokens", 1_000_000_000))))
|
|
71
|
-
return {
|
|
72
|
-
"required_achievements": req,
|
|
73
|
-
"models": models,
|
|
74
|
-
"min_total_reward": min_reward,
|
|
75
|
-
"max_cost": max_cost,
|
|
76
|
-
"max_tokens": max_tokens,
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
async def main() -> None:
|
|
81
|
-
cfg_default = Path(__file__).with_name("config.toml")
|
|
82
|
-
cfg_path = os.getenv("CRAFTER_CONFIG", str(cfg_default))
|
|
83
|
-
cfg: dict[str, Any] = {}
|
|
84
|
-
if os.path.exists(cfg_path):
|
|
85
|
-
with open(cfg_path, "rb") as f:
|
|
86
|
-
cfg = tomllib.load(f)
|
|
87
|
-
fcfg = cfg.get("filter", {})
|
|
88
|
-
tcfg = cfg.get("traces", {})
|
|
89
|
-
|
|
90
|
-
# Prefer env; else derive from config or repo-local v3 path
|
|
91
|
-
raw_db_url = os.getenv("CRAFTER_DB_URL", "")
|
|
92
|
-
if not raw_db_url:
|
|
93
|
-
db_path = fcfg.get("db_path")
|
|
94
|
-
if not db_path and tcfg.get("sqld_db_path"):
|
|
95
|
-
# derive the internal data file path from the sqld dir
|
|
96
|
-
db_path = str(Path(tcfg["sqld_db_path"]) / "dbs" / "default" / "data")
|
|
97
|
-
if db_path:
|
|
98
|
-
raw_db_url = f"sqlite+aiosqlite:///{db_path}"
|
|
99
|
-
else:
|
|
100
|
-
# Try repo-local default: traces/v3/synth_ai.db/dbs/default/data
|
|
101
|
-
repo_root = Path(__file__).resolve().parents[3]
|
|
102
|
-
candidate = repo_root / "traces" / "v3" / "synth_ai.db" / "dbs" / "default" / "data"
|
|
103
|
-
raw_db_url = f"sqlite+aiosqlite:///{candidate}"
|
|
104
|
-
db_url = normalize_db_url(raw_db_url)
|
|
105
|
-
output_path = os.getenv(
|
|
106
|
-
"OUTPUT_JSONL", fcfg.get("output_jsonl", "ft_data/qwen4b_crafter_sft_ach.jsonl")
|
|
107
|
-
)
|
|
108
|
-
filters = build_filters()
|
|
109
|
-
# Default: require >1 achievements unless explicitly specified in config/env
|
|
110
|
-
# If caller set REQUIRED_ACHIEVEMENTS or provided config 'required_achievements', we won't override.
|
|
111
|
-
# Otherwise, enforce min achievements via MIN_ACHIEVEMENTS (default 2)
|
|
112
|
-
if not filters.get("required_achievements"):
|
|
113
|
-
try:
|
|
114
|
-
min_ach = int(os.getenv("MIN_ACHIEVEMENTS", str(fcfg.get("min_achievements", 2))))
|
|
115
|
-
except Exception:
|
|
116
|
-
min_ach = 2
|
|
117
|
-
filters["min_achievements"] = min_ach
|
|
118
|
-
|
|
119
|
-
window_mode = os.getenv("WINDOW_MODE", "0") == "1"
|
|
120
|
-
|
|
121
|
-
print("🤖 Modal/Synth FT Filter (achievements)")
|
|
122
|
-
print("Using database:", db_url)
|
|
123
|
-
print("Output file:", output_path)
|
|
124
|
-
print(
|
|
125
|
-
"Filters:",
|
|
126
|
-
json.dumps(
|
|
127
|
-
{k: (list(v) if isinstance(v, set) else v) for k, v in filters.items()}, indent=2
|
|
128
|
-
),
|
|
129
|
-
)
|
|
130
|
-
print("Window mode:", window_mode)
|
|
131
|
-
|
|
132
|
-
# Print distributions (achievements and rewards) before filtering for visibility
|
|
133
|
-
try:
|
|
134
|
-
import numpy as _np
|
|
135
|
-
from collections import Counter as _Counter
|
|
136
|
-
async with FinetuningDataExtractorV3(db_url) as _ex:
|
|
137
|
-
_sessions = await _ex.get_all_sessions()
|
|
138
|
-
_ach_counts: _Counter[str] = _Counter()
|
|
139
|
-
_rewards: list[float] = []
|
|
140
|
-
for _, _row in _sessions.iterrows():
|
|
141
|
-
_sid = _row["session_id"]
|
|
142
|
-
_ach = await _ex.get_session_achievements(_sid) or []
|
|
143
|
-
for _a in _ach:
|
|
144
|
-
_ach_counts[_a] += 1
|
|
145
|
-
_met = await _ex.get_session_metrics(_sid)
|
|
146
|
-
try:
|
|
147
|
-
_rewards.append(float(_met.get("total_reward", 0.0) or 0.0))
|
|
148
|
-
except Exception:
|
|
149
|
-
pass
|
|
150
|
-
print(f"\nTotal sessions: {len(_sessions)}")
|
|
151
|
-
if _ach_counts:
|
|
152
|
-
print("\nAchievements by session (count):")
|
|
153
|
-
for _name, _c in sorted(_ach_counts.items(), key=lambda x: (-x[1], x[0])):
|
|
154
|
-
print(f" {_name}: {_c}")
|
|
155
|
-
if _rewards:
|
|
156
|
-
_r = _np.array(_rewards, dtype=float)
|
|
157
|
-
print("\nReward stats:")
|
|
158
|
-
print(f" min={_r.min():.2f} median={_np.median(_r):.2f} mean={_r.mean():.2f} max={_r.max():.2f}")
|
|
159
|
-
except Exception:
|
|
160
|
-
pass
|
|
161
|
-
|
|
162
|
-
required: set[str] = filters["required_achievements"]
|
|
163
|
-
models: list[str] = filters["models"]
|
|
164
|
-
min_reward: float = filters["min_total_reward"]
|
|
165
|
-
max_cost: float = filters["max_cost"]
|
|
166
|
-
max_tokens: int = filters["max_tokens"]
|
|
167
|
-
|
|
168
|
-
stats: dict[str, Any] = {
|
|
169
|
-
"total_sessions": 0,
|
|
170
|
-
"kept_sessions": 0,
|
|
171
|
-
"total_examples": 0,
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
async with FinetuningDataExtractorV3(db_url) as extractor:
|
|
175
|
-
all_sessions = await extractor.get_all_sessions()
|
|
176
|
-
stats["total_sessions"] = len(all_sessions)
|
|
177
|
-
|
|
178
|
-
kept: list[str] = []
|
|
179
|
-
for _, row in all_sessions.iterrows():
|
|
180
|
-
session_id = row["session_id"]
|
|
181
|
-
metrics = await extractor.get_session_metrics(session_id)
|
|
182
|
-
|
|
183
|
-
if metrics["total_reward"] < min_reward:
|
|
184
|
-
continue
|
|
185
|
-
if metrics["total_cost"] > max_cost:
|
|
186
|
-
continue
|
|
187
|
-
if metrics["total_tokens"] > max_tokens:
|
|
188
|
-
continue
|
|
189
|
-
|
|
190
|
-
if models:
|
|
191
|
-
model_query = """
|
|
192
|
-
SELECT DISTINCT model_name
|
|
193
|
-
FROM events
|
|
194
|
-
WHERE session_id = :session_id
|
|
195
|
-
AND event_type = 'cais'
|
|
196
|
-
AND model_name IS NOT NULL
|
|
197
|
-
"""
|
|
198
|
-
model_df = await extractor.db_manager.query_traces(
|
|
199
|
-
model_query, {"session_id": session_id}
|
|
200
|
-
)
|
|
201
|
-
session_models = model_df["model_name"].tolist() if not model_df.empty else []
|
|
202
|
-
if not any(m in models for m in session_models):
|
|
203
|
-
continue
|
|
204
|
-
|
|
205
|
-
# Respect either explicit required achievements OR min_achievements fallback
|
|
206
|
-
min_ach = int(filters.get("min_achievements", 0))
|
|
207
|
-
if required or min_ach > 0:
|
|
208
|
-
achievements = await extractor.get_session_achievements(session_id)
|
|
209
|
-
if not achievements:
|
|
210
|
-
continue
|
|
211
|
-
if required:
|
|
212
|
-
if not (required & set(achievements)):
|
|
213
|
-
continue
|
|
214
|
-
else:
|
|
215
|
-
if len(achievements) < min_ach:
|
|
216
|
-
continue
|
|
217
|
-
|
|
218
|
-
kept.append(session_id)
|
|
219
|
-
|
|
220
|
-
stats["kept_sessions"] = len(kept)
|
|
221
|
-
|
|
222
|
-
if window_mode:
|
|
223
|
-
training_data = await extractor.extract_openai_window_format(kept)
|
|
224
|
-
else:
|
|
225
|
-
training_data = await extractor.extract_openai_format(kept)
|
|
226
|
-
|
|
227
|
-
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
228
|
-
with open(output_path, "w") as f:
|
|
229
|
-
for ex in training_data:
|
|
230
|
-
f.write(json.dumps(ex) + "\n")
|
|
231
|
-
stats["total_examples"] = len(training_data)
|
|
232
|
-
|
|
233
|
-
print(
|
|
234
|
-
"\n✅ Wrote", stats["total_examples"], "examples from", stats["kept_sessions"], "sessions"
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
if __name__ == "__main__":
|
|
239
|
-
asyncio.run(main())
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
|
|
5
|
-
import sys
|
|
6
|
-
import os
|
|
7
|
-
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
|
|
8
|
-
|
|
9
|
-
from synth_ai.learning import FtClient
|
|
10
|
-
from examples.finetuning.synth_qwen_v1.util import load_env, load_state, save_state, parse_args
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
async def _run(mode: str | None) -> None:
|
|
14
|
-
base, key = load_env(mode)
|
|
15
|
-
client = FtClient(base_url=base, api_key=key)
|
|
16
|
-
|
|
17
|
-
st = load_state()
|
|
18
|
-
file_id = st.get("file_id")
|
|
19
|
-
if not file_id:
|
|
20
|
-
raise RuntimeError("state.json missing file_id; run upload_data.py first")
|
|
21
|
-
|
|
22
|
-
# Qwen3 0.6B default
|
|
23
|
-
resp = await client.create_sft_job(
|
|
24
|
-
model="Qwen/Qwen3-0.6B",
|
|
25
|
-
training_file_id=file_id,
|
|
26
|
-
hyperparameters={"n_epochs": 1, "batch_size": 4},
|
|
27
|
-
metadata={"upload_to_wasabi": True},
|
|
28
|
-
)
|
|
29
|
-
job_id = resp.get("job_id")
|
|
30
|
-
if not job_id:
|
|
31
|
-
raise RuntimeError(f"create_job missing job_id: {resp}")
|
|
32
|
-
print(f"job_id={job_id}")
|
|
33
|
-
save_state({"job_id": job_id})
|
|
34
|
-
|
|
35
|
-
start = await client.start_job(job_id)
|
|
36
|
-
print(f"start={start}")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def main() -> None:
|
|
40
|
-
args = parse_args()
|
|
41
|
-
asyncio.run(_run(args.mode))
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
if __name__ == "__main__":
|
|
45
|
-
main()
|
|
46
|
-
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
import asyncio
|
|
5
|
-
import json
|
|
6
|
-
import os
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
|
|
9
|
-
import sys
|
|
10
|
-
|
|
11
|
-
# Ensure repo root on path
|
|
12
|
-
ROOT = Path(__file__).parents[3]
|
|
13
|
-
if str(ROOT) not in sys.path:
|
|
14
|
-
sys.path.insert(0, str(ROOT))
|
|
15
|
-
|
|
16
|
-
from examples.finetuning.synth_qwen_v1.util import load_env, load_state # type: ignore
|
|
17
|
-
from synth_ai.inference import InferenceClient # type: ignore
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
async def main() -> None:
|
|
21
|
-
base_url, api_key = load_env(os.getenv("MODE") or os.getenv("ENV") or "local")
|
|
22
|
-
st_path = Path(__file__).parent / "state.json"
|
|
23
|
-
if not st_path.exists():
|
|
24
|
-
raise FileNotFoundError(f"state.json not found at {st_path}")
|
|
25
|
-
state = json.loads(st_path.read_text())
|
|
26
|
-
model = state.get("fine_tuned_model")
|
|
27
|
-
if not model:
|
|
28
|
-
raise RuntimeError("fine_tuned_model missing in state.json")
|
|
29
|
-
|
|
30
|
-
print(f"Backend: {base_url}")
|
|
31
|
-
print(f"Model: {model}")
|
|
32
|
-
|
|
33
|
-
ic = InferenceClient(base_url=base_url, api_key=api_key)
|
|
34
|
-
try:
|
|
35
|
-
resp = await ic.create_chat_completion(
|
|
36
|
-
model=model,
|
|
37
|
-
messages=[{"role": "user", "content": "Hello world!"}],
|
|
38
|
-
max_tokens=64,
|
|
39
|
-
temperature=0.2,
|
|
40
|
-
stream=False,
|
|
41
|
-
)
|
|
42
|
-
print("\n===== Response =====")
|
|
43
|
-
print(json.dumps(resp, indent=2))
|
|
44
|
-
print("===== End Response =====\n")
|
|
45
|
-
except Exception as e: # always print full failure context
|
|
46
|
-
import traceback
|
|
47
|
-
print("\n===== Inference Error =====")
|
|
48
|
-
print(f"Type: {type(e).__name__}")
|
|
49
|
-
print(f"Repr: {repr(e)}")
|
|
50
|
-
print("Traceback:")
|
|
51
|
-
print(traceback.format_exc())
|
|
52
|
-
try:
|
|
53
|
-
from synth_ai.http import HTTPError # type: ignore
|
|
54
|
-
if isinstance(e, HTTPError):
|
|
55
|
-
print("HTTPError details:")
|
|
56
|
-
print(f" status={e.status}")
|
|
57
|
-
print(f" url={e.url}")
|
|
58
|
-
print(f" message={e.message}")
|
|
59
|
-
if getattr(e, 'detail', None) is not None:
|
|
60
|
-
print(f" detail={e.detail}")
|
|
61
|
-
if getattr(e, 'body_snippet', None):
|
|
62
|
-
print(f" body_snippet={e.body_snippet}")
|
|
63
|
-
except Exception:
|
|
64
|
-
pass
|
|
65
|
-
print("===== End Inference Error =====\n")
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
if __name__ == "__main__":
|
|
69
|
-
asyncio.run(main())
|
|
70
|
-
|
|
71
|
-
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
|
|
5
|
-
import sys
|
|
6
|
-
import os
|
|
7
|
-
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
|
|
8
|
-
|
|
9
|
-
from synth_ai.inference import InferenceClient
|
|
10
|
-
from examples.finetuning.synth_qwen_v1.util import load_env, load_state, parse_args
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
async def _run(mode: str | None) -> None:
|
|
14
|
-
base, key = load_env(mode)
|
|
15
|
-
client = InferenceClient(base_url=base, api_key=key)
|
|
16
|
-
st = load_state()
|
|
17
|
-
model = st.get("fine_tuned_model") or "Qwen/Qwen2.5-0.5B"
|
|
18
|
-
print(f"model={model}")
|
|
19
|
-
resp = await client.create_chat_completion(
|
|
20
|
-
model=model,
|
|
21
|
-
messages=[{"role": "user", "content": "Give me a cheerful two-line greeting."}],
|
|
22
|
-
max_tokens=128,
|
|
23
|
-
temperature=0.7,
|
|
24
|
-
stream=False,
|
|
25
|
-
)
|
|
26
|
-
print(resp)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def main() -> None:
|
|
30
|
-
args = parse_args()
|
|
31
|
-
asyncio.run(_run(args.mode))
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if __name__ == "__main__":
|
|
35
|
-
main()
|
|
36
|
-
|
|
37
|
-
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
from typing import Dict
|
|
5
|
-
|
|
6
|
-
import sys
|
|
7
|
-
import os
|
|
8
|
-
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
|
|
9
|
-
|
|
10
|
-
from synth_ai.learning import JobHandle
|
|
11
|
-
from examples.finetuning.synth_qwen_v1.util import load_env, load_state, save_state, parse_args
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def _print_event(e: Dict) -> None:
|
|
15
|
-
try:
|
|
16
|
-
print(f"event seq={e.get('seq')} type={e.get('type')} msg={e.get('message')}")
|
|
17
|
-
except Exception:
|
|
18
|
-
print(str(e))
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
async def _run(mode: str | None) -> None:
|
|
22
|
-
base, key = load_env(mode)
|
|
23
|
-
st = load_state()
|
|
24
|
-
job_id = st.get("job_id")
|
|
25
|
-
if not job_id:
|
|
26
|
-
raise RuntimeError("state.json missing job_id; run finetune.py first")
|
|
27
|
-
|
|
28
|
-
# Use shared JobHandle poller abstraction (strict=True for FT)
|
|
29
|
-
handle = JobHandle(base, key, job_id, strict=True)
|
|
30
|
-
final = await handle.poll_until_terminal(interval_seconds=2.0, max_seconds=1800, on_event=_print_event)
|
|
31
|
-
print(f"final_status={final.get('status')}")
|
|
32
|
-
ft = final.get("fine_tuned_model")
|
|
33
|
-
if ft:
|
|
34
|
-
save_state({"fine_tuned_model": ft})
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def main() -> None:
|
|
38
|
-
args = parse_args()
|
|
39
|
-
asyncio.run(_run(args.mode))
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
if __name__ == "__main__":
|
|
43
|
-
main()
|
|
44
|
-
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import sys
|
|
5
|
-
import os
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
|
|
9
|
-
|
|
10
|
-
from examples.finetuning.synth_qwen_v1.util import validate_jsonl
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def main() -> None:
|
|
14
|
-
out_dir = Path(__file__).parent / "data"
|
|
15
|
-
out_dir.mkdir(parents=True, exist_ok=True)
|
|
16
|
-
out_path = out_dir / "training.jsonl"
|
|
17
|
-
|
|
18
|
-
# Minimal single-example JSONL
|
|
19
|
-
lines = [
|
|
20
|
-
json.dumps({
|
|
21
|
-
"messages": [
|
|
22
|
-
{"role": "user", "content": "Write a short greeting."},
|
|
23
|
-
{"role": "assistant", "content": "Hello there!"},
|
|
24
|
-
]
|
|
25
|
-
})
|
|
26
|
-
]
|
|
27
|
-
out_path.write_text("\n".join(lines) + "\n")
|
|
28
|
-
validate_jsonl(out_path)
|
|
29
|
-
print(f"Wrote {out_path}")
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if __name__ == "__main__":
|
|
33
|
-
main()
|
|
34
|
-
|
|
35
|
-
|
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Purge v3 trace databases:
|
|
4
|
-
- Find all paths matching **/traces_v3_lm_synth/traces.db under the repo
|
|
5
|
-
- If the DB is inside an `old/` path → delete the DB (and -wal/-shm) outright
|
|
6
|
-
- Else → delete records older than 24 hours and VACUUM to reclaim space
|
|
7
|
-
|
|
8
|
-
Run with: uvpm examples.finetuning.synth_qwen.purge_v3_traces
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
import contextlib
|
|
12
|
-
import datetime
|
|
13
|
-
import os
|
|
14
|
-
import shutil
|
|
15
|
-
import sqlite3
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def find_trace_dbs(repo_root: Path) -> list[Path]:
|
|
20
|
-
return list(repo_root.rglob("traces_v3_lm_synth/traces.db"))
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def delete_db_files(db_path: Path) -> None:
|
|
24
|
-
wal = db_path.with_suffix(".db-wal")
|
|
25
|
-
shm = db_path.with_suffix(".db-shm")
|
|
26
|
-
if db_path.exists():
|
|
27
|
-
os.remove(db_path)
|
|
28
|
-
if wal.exists():
|
|
29
|
-
os.remove(wal)
|
|
30
|
-
if shm.exists():
|
|
31
|
-
os.remove(shm)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def purge_older_than_24h(db_path: Path) -> None:
|
|
35
|
-
cutoff = (datetime.datetime.utcnow() - datetime.timedelta(hours=24)).strftime(
|
|
36
|
-
"%Y-%m-%d %H:%M:%S"
|
|
37
|
-
)
|
|
38
|
-
con = sqlite3.connect(str(db_path))
|
|
39
|
-
cur = con.cursor()
|
|
40
|
-
|
|
41
|
-
# Collect session_ids to purge
|
|
42
|
-
cur.execute("SELECT session_id FROM session_traces WHERE created_at < ?", (cutoff,))
|
|
43
|
-
session_ids = [row[0] for row in cur.fetchall()]
|
|
44
|
-
|
|
45
|
-
if session_ids:
|
|
46
|
-
placeholders = ",".join(["?"] * len(session_ids))
|
|
47
|
-
cur.execute(f"DELETE FROM events WHERE session_id IN ({placeholders})", session_ids)
|
|
48
|
-
cur.execute(f"DELETE FROM messages WHERE session_id IN ({placeholders})", session_ids)
|
|
49
|
-
cur.execute(
|
|
50
|
-
f"DELETE FROM session_timesteps WHERE session_id IN ({placeholders})", session_ids
|
|
51
|
-
)
|
|
52
|
-
cur.execute(
|
|
53
|
-
f"DELETE FROM session_traces WHERE session_id IN ({placeholders}) AND created_at < ?",
|
|
54
|
-
session_ids + [cutoff],
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
# Commit deletions before VACUUM
|
|
58
|
-
con.commit()
|
|
59
|
-
con.close()
|
|
60
|
-
|
|
61
|
-
# Attempt VACUUM
|
|
62
|
-
try:
|
|
63
|
-
con2 = sqlite3.connect(str(db_path))
|
|
64
|
-
cur2 = con2.cursor()
|
|
65
|
-
cur2.execute("VACUUM")
|
|
66
|
-
con2.commit()
|
|
67
|
-
con2.close()
|
|
68
|
-
return
|
|
69
|
-
except sqlite3.OperationalError:
|
|
70
|
-
with contextlib.suppress(Exception):
|
|
71
|
-
con2.close()
|
|
72
|
-
|
|
73
|
-
# Fallback: VACUUM INTO a temp path (e.g., /tmp) then replace atomically
|
|
74
|
-
tmp_target = Path("/tmp") / f"{db_path.stem}_compacted.db"
|
|
75
|
-
try:
|
|
76
|
-
con3 = sqlite3.connect(str(db_path))
|
|
77
|
-
cur3 = con3.cursor()
|
|
78
|
-
cur3.execute(f"VACUUM INTO '{tmp_target.as_posix()}'")
|
|
79
|
-
con3.commit()
|
|
80
|
-
con3.close()
|
|
81
|
-
|
|
82
|
-
# Replace original DB with compacted copy
|
|
83
|
-
delete_db_files(db_path)
|
|
84
|
-
shutil.move(str(tmp_target), str(db_path))
|
|
85
|
-
finally:
|
|
86
|
-
if tmp_target.exists():
|
|
87
|
-
# Clean up if move failed
|
|
88
|
-
with contextlib.suppress(Exception):
|
|
89
|
-
os.remove(tmp_target)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def main() -> None:
|
|
93
|
-
repo_root = Path(__file__).resolve().parents[3]
|
|
94
|
-
dbs = find_trace_dbs(repo_root)
|
|
95
|
-
print(f"🔎 Found {len(dbs)} v3 trace DB(s)")
|
|
96
|
-
|
|
97
|
-
for db in dbs:
|
|
98
|
-
db_str = str(db)
|
|
99
|
-
if "/old/" in db_str or db_str.endswith("/old/traces_v3_lm_synth/traces.db"):
|
|
100
|
-
print(f"🗑️ Deleting DB under old/: {db_str}")
|
|
101
|
-
delete_db_files(db)
|
|
102
|
-
continue
|
|
103
|
-
print(f"🧹 Purging records older than 24h: {db_str}")
|
|
104
|
-
purge_older_than_24h(db)
|
|
105
|
-
print(f"✅ Purged and compacted: {db_str}")
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
if __name__ == "__main__":
|
|
109
|
-
main()
|