synth-ai 0.2.4.dev4__tar.gz → 0.2.4.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (411) hide show
  1. synth_ai-0.2.4.dev6/MANIFEST.in +31 -0
  2. {synth_ai-0.2.4.dev4/synth_ai.egg-info → synth_ai-0.2.4.dev6}/PKG-INFO +2 -1
  3. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/pyproject.toml +2 -1
  4. synth_ai-0.2.4.dev6/synth_ai/environments/examples/__init__.py +1 -0
  5. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  6. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +252 -0
  7. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  8. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  9. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  10. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1195 -0
  11. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  12. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  13. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_duckdb_v2_backup.py +413 -0
  14. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +760 -0
  15. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +383 -0
  16. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_synth.py +34 -0
  17. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  18. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  19. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  20. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  21. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  22. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  23. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  24. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  25. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  26. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  27. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +323 -0
  28. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_duckdb_v2_backup.py +386 -0
  29. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  30. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  31. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  32. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  33. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  34. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  35. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  36. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  37. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  38. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  39. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  40. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  41. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v2_backup.py +1352 -0
  42. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +852 -0
  43. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  44. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  45. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  46. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +214 -0
  47. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  48. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  49. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_config.toml +4 -0
  50. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  51. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  52. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_lm_config.toml +25 -0
  53. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  54. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  55. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  56. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  57. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  58. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/duckdb_filter_config.toml +33 -0
  59. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  60. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  61. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  62. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_config_strict.toml +42 -0
  63. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  64. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  65. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/gemini_ft_config.toml +31 -0
  66. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  67. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  68. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  69. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  70. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  71. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/practical_filter_config.toml +33 -0
  72. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  73. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  74. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  75. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  76. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  77. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  78. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/test_comparison_config.toml +20 -0
  79. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/test_filter_config.toml +28 -0
  80. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  81. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  82. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  83. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/engine.py +579 -0
  84. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  85. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  86. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  87. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
  88. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/environment.py +364 -0
  89. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/old/engine_serialization_patch.py +141 -0
  90. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/old/engine_serialization_patch_v2.py +243 -0
  91. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/old/environment_v3.py +260 -0
  92. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/old/trace_hooks.py +377 -0
  93. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/old/world_config_patch.py +533 -0
  94. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
  95. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
  96. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
  97. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
  98. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  99. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  100. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
  101. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
  102. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
  103. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
  104. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
  105. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
  106. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
  107. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
  108. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
  109. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/environment.py +312 -0
  110. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  111. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  112. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  113. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  114. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  115. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  116. synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
  117. synth_ai-0.2.4.dev6/synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  118. synth_ai-0.2.4.dev6/synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  119. synth_ai-0.2.4.dev6/synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  120. synth_ai-0.2.4.dev6/synth_ai/environments/examples/enron/engine.py +291 -0
  121. synth_ai-0.2.4.dev6/synth_ai/environments/examples/enron/environment.py +165 -0
  122. synth_ai-0.2.4.dev6/synth_ai/environments/examples/enron/taskset.py +112 -0
  123. synth_ai-0.2.4.dev6/synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  124. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/__init__.py +48 -0
  125. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  126. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  127. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  128. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  129. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/engine.py +589 -0
  130. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/environment.py +274 -0
  131. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  132. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  133. synth_ai-0.2.4.dev6/synth_ai/environments/examples/minigrid/taskset.py +583 -0
  134. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/__init__.py +7 -0
  135. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/achievements.py +337 -0
  136. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  137. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  138. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  139. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/engine.py +738 -0
  140. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/environment.py +255 -0
  141. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  142. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  143. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  144. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  145. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  146. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  147. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  148. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  149. synth_ai-0.2.4.dev6/synth_ai/environments/examples/nethack/taskset.py +323 -0
  150. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/__init__.py +7 -0
  151. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  152. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/config_logging.py +110 -0
  153. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine.py +693 -0
  154. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  155. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  156. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  157. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  158. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  159. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  160. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  161. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  162. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  163. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  164. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  165. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  166. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  167. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  168. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  169. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  170. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  171. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/environment.py +235 -0
  172. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/taskset.py +77 -0
  173. synth_ai-0.2.4.dev6/synth_ai/environments/examples/red/units/__init__.py +1 -0
  174. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/__init__.py +1 -0
  175. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  176. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine.py +675 -0
  177. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  178. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  179. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  180. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  181. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  182. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  183. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  184. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  185. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  186. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  187. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  188. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  189. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/environment.py +228 -0
  190. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  191. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  192. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/taskset.py +425 -0
  193. synth_ai-0.2.4.dev6/synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  194. synth_ai-0.2.4.dev6/synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  195. synth_ai-0.2.4.dev6/synth_ai/environments/examples/tictactoe/engine.py +368 -0
  196. synth_ai-0.2.4.dev6/synth_ai/environments/examples/tictactoe/environment.py +239 -0
  197. synth_ai-0.2.4.dev6/synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  198. synth_ai-0.2.4.dev6/synth_ai/environments/examples/verilog/__init__.py +10 -0
  199. synth_ai-0.2.4.dev6/synth_ai/environments/examples/verilog/engine.py +328 -0
  200. synth_ai-0.2.4.dev6/synth_ai/environments/examples/verilog/environment.py +349 -0
  201. synth_ai-0.2.4.dev6/synth_ai/environments/examples/verilog/taskset.py +418 -0
  202. synth_ai-0.2.4.dev6/synth_ai/environments/examples/wordle/__init__.py +29 -0
  203. synth_ai-0.2.4.dev6/synth_ai/environments/examples/wordle/engine.py +391 -0
  204. synth_ai-0.2.4.dev6/synth_ai/environments/examples/wordle/environment.py +154 -0
  205. synth_ai-0.2.4.dev6/synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +75 -0
  206. synth_ai-0.2.4.dev6/synth_ai/environments/examples/wordle/taskset.py +222 -0
  207. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/service/app.py +8 -0
  208. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/service/core_routes.py +38 -0
  209. synth_ai-0.2.4.dev6/synth_ai/learning/prompts/banking77_injection_eval.py +163 -0
  210. synth_ai-0.2.4.dev6/synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +201 -0
  211. synth_ai-0.2.4.dev6/synth_ai/learning/prompts/mipro.py +280 -0
  212. synth_ai-0.2.4.dev6/synth_ai/learning/prompts/random_search.py +247 -0
  213. synth_ai-0.2.4.dev6/synth_ai/learning/prompts/run_mipro_banking77.py +160 -0
  214. synth_ai-0.2.4.dev6/synth_ai/learning/prompts/run_random_search_banking77.py +305 -0
  215. synth_ai-0.2.4.dev6/synth_ai/lm/injection.py +81 -0
  216. synth_ai-0.2.4.dev6/synth_ai/lm/overrides.py +204 -0
  217. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/provider_support/anthropic.py +39 -12
  218. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/provider_support/openai.py +31 -4
  219. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/core/anthropic_api.py +16 -0
  220. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/openai_standard.py +35 -5
  221. synth_ai-0.2.4.dev6/synth_ai/v0/tracing/events/__init__.py +0 -0
  222. synth_ai-0.2.4.dev6/synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  223. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6/synth_ai.egg-info}/PKG-INFO +2 -1
  224. synth_ai-0.2.4.dev6/synth_ai.egg-info/SOURCES.txt +406 -0
  225. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai.egg-info/requires.txt +1 -0
  226. synth_ai-0.2.4.dev4/MANIFEST.in +0 -22
  227. synth_ai-0.2.4.dev4/synth_ai/learning/prompts/mipro.py +0 -8
  228. synth_ai-0.2.4.dev4/synth_ai.egg-info/SOURCES.txt +0 -194
  229. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/LICENSE +0 -0
  230. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/README.md +0 -0
  231. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/setup.cfg +0 -0
  232. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/__init__.py +0 -0
  233. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/__main__.py +0 -0
  234. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/__init__.py +0 -0
  235. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/balance.py +0 -0
  236. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/calc.py +0 -0
  237. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/demo.py +0 -0
  238. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/legacy_root_backup.py +0 -0
  239. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/man.py +0 -0
  240. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/recent.py +0 -0
  241. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/root.py +0 -0
  242. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/status.py +0 -0
  243. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/traces.py +0 -0
  244. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/cli/watch.py +0 -0
  245. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/compound/cais.py +0 -0
  246. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/config/base_url.py +0 -0
  247. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/core/experiment.py +0 -0
  248. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/core/system.py +0 -0
  249. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/__init__.py +0 -0
  250. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/__init__.py +0 -0
  251. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/artifacts/__init__.py +0 -0
  252. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/artifacts/base.py +0 -0
  253. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/core.py +0 -0
  254. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/db/__init__.py +0 -0
  255. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/db/sqlite.py +0 -0
  256. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/registry.py +0 -0
  257. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/resources/sqlite.py +0 -0
  258. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/results.py +0 -0
  259. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/rewards/__init__.py +0 -0
  260. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/rewards/core.py +0 -0
  261. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/shared_engine.py +0 -0
  262. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/environment/tools/__init__.py +0 -0
  263. /synth_ai-0.2.4.dev4/synth_ai/environments/reproducibility/helpers.py → /synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  264. /synth_ai-0.2.4.dev4/synth_ai/learning/filtering.py → /synth_ai-0.2.4.dev6/synth_ai/environments/examples/crafter_classic/old/filter_traces_fbc.py +0 -0
  265. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/reproducibility/core.py +0 -0
  266. /synth_ai-0.2.4.dev4/synth_ai/learning/offline/dpo.py → /synth_ai-0.2.4.dev6/synth_ai/environments/reproducibility/helpers.py +0 -0
  267. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/reproducibility/tree.py +0 -0
  268. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/service/external_registry.py +0 -0
  269. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/service/registry.py +0 -0
  270. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/stateful/__init__.py +0 -0
  271. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/stateful/core.py +0 -0
  272. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/stateful/engine.py +0 -0
  273. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/stateful/state.py +0 -0
  274. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/tasks/api.py +0 -0
  275. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/tasks/core.py +0 -0
  276. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/tasks/filters.py +0 -0
  277. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/tasks/utils.py +0 -0
  278. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/v0_observability/history.py +0 -0
  279. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/environments/v0_observability/log.py +0 -0
  280. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/evals/base.py +0 -0
  281. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/experimental/synth_oss.py +0 -0
  282. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/install_sqld.sh +0 -0
  283. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/learning/core.py +0 -0
  284. /synth_ai-0.2.4.dev4/synth_ai/learning/offline/sft.py → /synth_ai-0.2.4.dev6/synth_ai/learning/filtering.py +0 -0
  285. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/learning/gateway.py +0 -0
  286. /synth_ai-0.2.4.dev4/synth_ai/learning/offline/shared.py → /synth_ai-0.2.4.dev6/synth_ai/learning/offline/dpo.py +0 -0
  287. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/learning/offline/providers.py +0 -0
  288. /synth_ai-0.2.4.dev4/synth_ai/learning/online/grpo.py → /synth_ai-0.2.4.dev6/synth_ai/learning/offline/sft.py +0 -0
  289. /synth_ai-0.2.4.dev4/synth_ai/learning/online/irft.py → /synth_ai-0.2.4.dev6/synth_ai/learning/offline/shared.py +0 -0
  290. /synth_ai-0.2.4.dev4/synth_ai/learning/prompts/gepa.py → /synth_ai-0.2.4.dev6/synth_ai/learning/online/grpo.py +0 -0
  291. /synth_ai-0.2.4.dev4/synth_ai/lm/caching/__init__.py → /synth_ai-0.2.4.dev6/synth_ai/learning/online/irft.py +0 -0
  292. /synth_ai-0.2.4.dev4/synth_ai/lm/caching/dbs.py → /synth_ai-0.2.4.dev6/synth_ai/learning/prompts/gepa.py +0 -0
  293. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/__init__.py +0 -0
  294. {synth_ai-0.2.4.dev4/synth_ai/lm/cost → synth_ai-0.2.4.dev6/synth_ai/lm/caching}/__init__.py +0 -0
  295. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/caching/constants.py +0 -0
  296. /synth_ai-0.2.4.dev4/synth_ai/lm/structured_outputs/__init__.py → /synth_ai-0.2.4.dev6/synth_ai/lm/caching/dbs.py +0 -0
  297. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/caching/ephemeral.py +0 -0
  298. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/caching/handler.py +0 -0
  299. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/caching/initialize.py +0 -0
  300. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/caching/persistent.py +0 -0
  301. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/config.py +0 -0
  302. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/constants.py +0 -0
  303. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/core/__init__.py +0 -0
  304. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/core/all.py +0 -0
  305. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/core/exceptions.py +0 -0
  306. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/core/main.py +0 -0
  307. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/core/main_v3.py +0 -0
  308. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/core/vendor_clients.py +0 -0
  309. {synth_ai-0.2.4.dev4/synth_ai/lm/vendors → synth_ai-0.2.4.dev6/synth_ai/lm/cost}/__init__.py +0 -0
  310. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/cost/monitor.py +0 -0
  311. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/cost/statefulness.py +0 -0
  312. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/provider_support/__init__.py +0 -0
  313. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/provider_support/suppress_logging.py +0 -0
  314. {synth_ai-0.2.4.dev4/synth_ai/lm/vendors/core → synth_ai-0.2.4.dev6/synth_ai/lm/structured_outputs}/__init__.py +0 -0
  315. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/structured_outputs/handler.py +0 -0
  316. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/structured_outputs/inject.py +0 -0
  317. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/structured_outputs/rehabilitate.py +0 -0
  318. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/tools/__init__.py +0 -0
  319. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/tools/base.py +0 -0
  320. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/unified_interface.py +0 -0
  321. {synth_ai-0.2.4.dev4/synth_ai/lm/vendors/local → synth_ai-0.2.4.dev6/synth_ai/lm/vendors}/__init__.py +0 -0
  322. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/base.py +0 -0
  323. {synth_ai-0.2.4.dev4/synth_ai/lm/vendors/supported → synth_ai-0.2.4.dev6/synth_ai/lm/vendors/core}/__init__.py +0 -0
  324. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/core/gemini_api.py +0 -0
  325. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/core/mistral_api.py +0 -0
  326. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/core/openai_api.py +0 -0
  327. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  328. {synth_ai-0.2.4.dev4/synth_ai/v0/tracing → synth_ai-0.2.4.dev6/synth_ai/lm/vendors/local}/__init__.py +0 -0
  329. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/local/ollama.py +0 -0
  330. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/openai_standard_responses.py +0 -0
  331. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/retries.py +0 -0
  332. {synth_ai-0.2.4.dev4/synth_ai/v0/tracing/events → synth_ai-0.2.4.dev6/synth_ai/lm/vendors/supported}/__init__.py +0 -0
  333. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/supported/custom_endpoint.py +0 -0
  334. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/supported/deepseek.py +0 -0
  335. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/supported/grok.py +0 -0
  336. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/supported/groq.py +0 -0
  337. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/supported/ollama.py +0 -0
  338. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/supported/openrouter.py +0 -0
  339. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/supported/together.py +0 -0
  340. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/vendors/synth_client.py +0 -0
  341. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/lm/warmup.py +0 -0
  342. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing/__init__.py +0 -0
  343. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v1/__init__.py +0 -0
  344. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/__init__.py +0 -0
  345. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/abstractions.py +0 -0
  346. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/config.py +0 -0
  347. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/db_config.py +0 -0
  348. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/decorators.py +0 -0
  349. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/examples/basic_usage.py +0 -0
  350. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/hooks.py +0 -0
  351. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/llm_call_record_helpers.py +0 -0
  352. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/lm_call_record_abstractions.py +0 -0
  353. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/migration_helper.py +0 -0
  354. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/replica_sync.py +0 -0
  355. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/session_tracer.py +0 -0
  356. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/storage/__init__.py +0 -0
  357. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/storage/base.py +0 -0
  358. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/storage/config.py +0 -0
  359. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/storage/exceptions.py +0 -0
  360. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/storage/factory.py +0 -0
  361. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/storage/types.py +0 -0
  362. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/storage/utils.py +0 -0
  363. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/turso/__init__.py +0 -0
  364. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/turso/daemon.py +0 -0
  365. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/turso/manager.py +0 -0
  366. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/turso/models.py +0 -0
  367. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tracing_v3/utils.py +0 -0
  368. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tui/__init__.py +0 -0
  369. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tui/__main__.py +0 -0
  370. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tui/cli/__init__.py +0 -0
  371. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tui/cli/query_experiments.py +0 -0
  372. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tui/cli/query_experiments_v3.py +0 -0
  373. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/tui/dashboard.py +0 -0
  374. {synth_ai-0.2.4.dev4/synth_ai/v0/tracing_v1/events → synth_ai-0.2.4.dev6/synth_ai/v0/tracing}/__init__.py +0 -0
  375. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/abstractions.py +0 -0
  376. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/base_client.py +0 -0
  377. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/client_manager.py +0 -0
  378. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/config.py +0 -0
  379. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/context.py +0 -0
  380. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/decorators.py +0 -0
  381. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/events/manage.py +0 -0
  382. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/events/scope.py +0 -0
  383. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/events/store.py +0 -0
  384. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/immediate_client.py +0 -0
  385. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/local.py +0 -0
  386. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/log_client_base.py +0 -0
  387. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/retry_queue.py +0 -0
  388. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/trackers.py +0 -0
  389. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/upload.py +0 -0
  390. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing/utils.py +0 -0
  391. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/__init__.py +0 -0
  392. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/abstractions.py +0 -0
  393. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/base_client.py +0 -0
  394. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/client_manager.py +0 -0
  395. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/config.py +0 -0
  396. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/context.py +0 -0
  397. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/decorators.py +0 -0
  398. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/events/manage.py +0 -0
  399. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/events/scope.py +0 -0
  400. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/events/store.py +0 -0
  401. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/immediate_client.py +0 -0
  402. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/local.py +0 -0
  403. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/log_client_base.py +0 -0
  404. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/retry_queue.py +0 -0
  405. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/trackers.py +0 -0
  406. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/upload.py +0 -0
  407. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/v0/tracing_v1/utils.py +0 -0
  408. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai/zyk/__init__.py +0 -0
  409. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai.egg-info/dependency_links.txt +0 -0
  410. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai.egg-info/entry_points.txt +0 -0
  411. {synth_ai-0.2.4.dev4 → synth_ai-0.2.4.dev6}/synth_ai.egg-info/top_level.txt +0 -0
@@ -0,0 +1,31 @@
1
+ include README.md
2
+ include LICENSE
3
+ include synth_ai/install_sqld.sh
4
+ recursive-include synth_ai *.py
5
+ # Only include lightweight config/data files from core packages
6
+ recursive-include synth_ai *.toml
7
+
8
+ # Prune heavy example/demo/test/data trees from the sdist (keep code)
9
+ prune tests
10
+ prune private_tests
11
+ prune synth_ai/tracing_v3/tests
12
+ recursive-exclude synth_ai **/test_*.py
13
+ recursive-exclude synth_ai **/tests/*
14
+
15
+ # Remove large data files from package
16
+ recursive-exclude synth_ai *.json
17
+ recursive-exclude synth_ai *.db
18
+ recursive-exclude synth_ai *.duckdb
19
+ recursive-exclude synth_ai *.sqlite
20
+ recursive-exclude synth_ai *.parquet
21
+ recursive-exclude synth_ai *.csv
22
+ recursive-exclude synth_ai *.npz
23
+ recursive-exclude synth_ai *.npy
24
+ recursive-exclude synth_ai *.pt
25
+ recursive-exclude synth_ai *.pth
26
+ recursive-exclude synth_ai *.bin
27
+ recursive-exclude synth_ai *.zip
28
+ recursive-exclude synth_ai *.tar
29
+ global-exclude *.pyc
30
+ global-exclude __pycache__
31
+ global-exclude .DS_Store
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synth-ai
3
- Version: 0.2.4.dev4
3
+ Version: 0.2.4.dev6
4
4
  Summary: Software for aiding the best and multiplying the will - Core AI functionality and tracing
5
5
  Author-email: Synth AI <josh@usesynth.ai>
6
6
  License-Expression: MIT
@@ -49,6 +49,7 @@ Requires-Dist: textual>=1.1.0
49
49
  Requires-Dist: openai-harmony>=0.0.1
50
50
  Requires-Dist: asyncpg>=0.30.0
51
51
  Requires-Dist: aiohttp>=3.8.0
52
+ Requires-Dist: datasets>=4.0.0
52
53
  Provides-Extra: dev
53
54
  Requires-Dist: build>=1.2.2.post1; extra == "dev"
54
55
  Requires-Dist: twine>=4.0.0; extra == "dev"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "synth-ai"
3
- version = "0.2.4.dev4"
3
+ version = "0.2.4.dev6"
4
4
  description = "Software for aiding the best and multiplying the will - Core AI functionality and tracing"
5
5
  authors = [{name = "Synth AI", email = "josh@usesynth.ai"}]
6
6
  license = "MIT"
@@ -51,6 +51,7 @@ dependencies = [
51
51
  "openai-harmony>=0.0.1", # For OSS-GPT Harmony encoding support
52
52
  "asyncpg>=0.30.0",
53
53
  "aiohttp>=3.8.0", # For async HTTP requests in Harmony integration
54
+ "datasets>=4.0.0",
54
55
  ]
55
56
 
56
57
  [project.scripts]
@@ -0,0 +1 @@
1
+ """Environment examples and demos."""
@@ -0,0 +1,8 @@
1
+ from .config_logging import configure_logging
2
+ from .environment import CrafterClassicEnvironment
3
+ from .engine import CrafterEngine
4
+
5
+ # Configure logging when crafter_classic module is imported
6
+ configure_logging()
7
+
8
+ __all__ = ["CrafterClassicEnvironment", "CrafterEngine"]
@@ -0,0 +1,252 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Run Crafter agent and analyze semantic map words - output as markdown tables only.
4
+
5
+ This script:
6
+ 1. Runs a Crafter agent for multiple episodes
7
+ 2. Extracts all unique words from the semantic map observations
8
+ 3. Outputs analysis as markdown tables (no plotting dependencies)
9
+
10
+ Usage:
11
+ python analyze_semantic_words_markdown.py --model gemini-1.5-flash --episodes 3
12
+ """
13
+
14
+ import asyncio
15
+ import argparse
16
+ import json
17
+ import re
18
+ from collections import Counter
19
+ from pathlib import Path
20
+ from typing import Dict, List, Set
21
+ from datetime import datetime
22
+
23
+ # Import the Crafter agent
24
+ import sys
25
+ sys.path.append(str(Path(__file__).parent))
26
+ from test_crafter_react_agent import run_crafter_episodes
27
+
28
+ def extract_words_from_semantic_map(observation: str) -> Set[str]:
29
+ """Extract meaningful words from a semantic map observation string."""
30
+ if not observation or "semantic_map" not in observation.lower():
31
+ return set()
32
+
33
+ # Look for patterns like object names in the semantic map
34
+ # Common Crafter objects/entities
35
+ crafter_words = {
36
+ # Resources
37
+ 'wood', 'stone', 'coal', 'iron', 'diamond', 'water',
38
+ # Animals
39
+ 'cow', 'pig', 'skeleton', 'zombie',
40
+ # Structures/Objects
41
+ 'tree', 'grass', 'furnace', 'table', 'bed', 'chest',
42
+ 'house', 'fence', 'door', 'wall',
43
+ # Tools
44
+ 'axe', 'pickaxe', 'sword', 'shovel',
45
+ # Food
46
+ 'bread', 'meat', 'apple',
47
+ # Environment
48
+ 'mountain', 'river', 'forest', 'desert', 'cave',
49
+ 'lava', 'sand', 'dirt', 'path',
50
+ # Actions/States
51
+ 'crafting', 'mining', 'building', 'farming',
52
+ 'health', 'hunger', 'energy'
53
+ }
54
+
55
+ # Extract words using regex - look for alphabetic words
56
+ words = re.findall(r'\b[a-zA-Z]{3,}\b', observation.lower())
57
+
58
+ # Filter to keep only meaningful Crafter-related words
59
+ found_words = set()
60
+ for word in words:
61
+ if word in crafter_words:
62
+ found_words.add(word)
63
+ # Also check for partial matches for compound words
64
+ elif any(cw in word for cw in crafter_words):
65
+ found_words.add(word)
66
+
67
+ return found_words
68
+
69
+ def analyze_episode_traces(traces_data: List[Dict]) -> Dict[str, int]:
70
+ """Analyze traces to extract semantic map words."""
71
+ word_counter = Counter()
72
+
73
+ for episode_data in traces_data:
74
+ if 'observations' in episode_data:
75
+ for obs in episode_data['observations']:
76
+ if isinstance(obs, dict):
77
+ # Look for semantic map in observation
78
+ obs_str = str(obs)
79
+ words = extract_words_from_semantic_map(obs_str)
80
+ word_counter.update(words)
81
+ elif isinstance(obs, str):
82
+ words = extract_words_from_semantic_map(obs)
83
+ word_counter.update(words)
84
+
85
+ return dict(word_counter)
86
+
87
+ def generate_markdown_report(word_counts: Dict[str, int], model: str, episodes: int) -> str:
88
+ """Generate a markdown report of the semantic map analysis."""
89
+ if not word_counts:
90
+ return "# Semantic Map Analysis\n\n**No words found in semantic maps!**\n"
91
+
92
+ total_words = sum(word_counts.values())
93
+ unique_words = len(word_counts)
94
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
95
+
96
+ # Sort words by frequency
97
+ sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
98
+
99
+ # Generate markdown
100
+ md = f"""# Semantic Map Word Analysis
101
+
102
+ **Model:** {model}
103
+ **Episodes:** {episodes}
104
+ **Generated:** {timestamp}
105
+
106
+ ## Summary
107
+
108
+ - **Total word occurrences:** {total_words}
109
+ - **Unique words discovered:** {unique_words}
110
+ - **Average occurrences per word:** {total_words/unique_words:.1f}
111
+
112
+ ## Top Words by Frequency
113
+
114
+ | Rank | Word | Count | Percentage |
115
+ |------|------|-------|------------|
116
+ """
117
+
118
+ # Top 15 words table
119
+ for i, (word, count) in enumerate(sorted_words[:15], 1):
120
+ percentage = (count / total_words) * 100
121
+ md += f"| {i:2d} | {word} | {count} | {percentage:.1f}% |\n"
122
+
123
+ # Word categories
124
+ categories = {
125
+ "Resources": ['wood', 'stone', 'coal', 'iron', 'diamond', 'water'],
126
+ "Animals": ['cow', 'pig', 'skeleton', 'zombie'],
127
+ "Structures": ['tree', 'furnace', 'table', 'house', 'chest', 'fence', 'door'],
128
+ "Tools": ['axe', 'pickaxe', 'sword', 'shovel'],
129
+ "Environment": ['mountain', 'river', 'forest', 'desert', 'cave', 'lava', 'grass'],
130
+ "Food": ['bread', 'meat', 'apple']
131
+ }
132
+
133
+ md += "\n## Words by Category\n\n"
134
+
135
+ for category, words in categories.items():
136
+ found_words = [(w, word_counts[w]) for w in words if w in word_counts]
137
+ if found_words:
138
+ md += f"### {category}\n\n"
139
+ md += "| Word | Count |\n|------|-------|\n"
140
+ for word, count in sorted(found_words, key=lambda x: x[1], reverse=True):
141
+ md += f"| {word} | {count} |\n"
142
+ md += "\n"
143
+
144
+ # Frequency distribution
145
+ freq_counts = Counter(word_counts.values())
146
+ md += "## Frequency Distribution\n\n"
147
+ md += "| Frequency | Number of Words |\n|-----------|----------------|\n"
148
+ for freq in sorted(freq_counts.keys(), reverse=True):
149
+ md += f"| {freq} | {freq_counts[freq]} |\n"
150
+
151
+ # All words alphabetically
152
+ md += "\n## All Words (Alphabetical)\n\n"
153
+ md += "| Word | Count |\n|------|-------|\n"
154
+ for word in sorted(word_counts.keys()):
155
+ md += f"| {word} | {word_counts[word]} |\n"
156
+
157
+ return md
158
+
159
+ async def main():
160
+ parser = argparse.ArgumentParser(description="Analyze semantic map words - markdown output only")
161
+ parser.add_argument("--model", default="gemini-1.5-flash",
162
+ help="Model to use for agent (default: gemini-1.5-flash)")
163
+ parser.add_argument("--episodes", type=int, default=3,
164
+ help="Number of episodes to run (default: 3)")
165
+ parser.add_argument("--max-turns", type=int, default=50,
166
+ help="Maximum turns per episode (default: 50)")
167
+ parser.add_argument("--output-dir", default="semantic_analysis",
168
+ help="Directory to save analysis results")
169
+
170
+ args = parser.parse_args()
171
+
172
+ print(f"🚀 Running {args.episodes} episodes with {args.model}")
173
+ print(f"📊 Will analyze semantic map words and generate markdown report")
174
+
175
+ # Create output directory
176
+ output_dir = Path(args.output_dir)
177
+ output_dir.mkdir(exist_ok=True)
178
+
179
+ # Run the agent episodes
180
+ try:
181
+ print("\n🎮 Starting Crafter episodes...")
182
+ traces_result = await run_crafter_episodes(
183
+ model_name=args.model,
184
+ num_episodes=args.episodes,
185
+ max_turns=args.max_turns,
186
+ difficulty="easy",
187
+ base_seed=1000
188
+ )
189
+
190
+ print(f"✅ Completed {args.episodes} episodes")
191
+
192
+ # Analyze semantic map words
193
+ print("\n🔍 Analyzing semantic map words...")
194
+ word_counts = analyze_episode_traces(traces_result)
195
+
196
+ # Generate markdown report
197
+ print("\n📝 Generating markdown report...")
198
+ markdown_report = generate_markdown_report(word_counts, args.model, args.episodes)
199
+
200
+ # Save markdown report
201
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
202
+ report_file = output_dir / f"semantic_analysis_{args.model}_{timestamp}.md"
203
+
204
+ with open(report_file, 'w') as f:
205
+ f.write(markdown_report)
206
+
207
+ print(f"💾 Markdown report saved to: {report_file}")
208
+
209
+ # Also save raw data as JSON
210
+ analysis_data = {
211
+ "model": args.model,
212
+ "episodes": args.episodes,
213
+ "timestamp": timestamp,
214
+ "word_counts": word_counts,
215
+ "total_unique_words": len(word_counts),
216
+ "total_word_occurrences": sum(word_counts.values())
217
+ }
218
+
219
+ json_file = output_dir / f"word_data_{args.model}_{timestamp}.json"
220
+ with open(json_file, 'w') as f:
221
+ json.dump(analysis_data, f, indent=2)
222
+
223
+ print(f"💾 Raw data saved to: {json_file}")
224
+
225
+ # Print summary to console
226
+ print("\n" + "="*60)
227
+ print("SEMANTIC MAP WORD ANALYSIS SUMMARY")
228
+ print("="*60)
229
+
230
+ if word_counts:
231
+ total_words = sum(word_counts.values())
232
+ unique_words = len(word_counts)
233
+ print(f"Total word occurrences: {total_words}")
234
+ print(f"Unique words discovered: {unique_words}")
235
+
236
+ # Top 10 most common words
237
+ sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
238
+ print(f"\nTop 10 most frequent words:")
239
+ for i, (word, count) in enumerate(sorted_words[:10], 1):
240
+ print(f"{i:2d}. {word:<12} ({count} times)")
241
+ else:
242
+ print("No semantic map words found!")
243
+
244
+ print(f"\n📄 Full analysis available in: {report_file}")
245
+ print("\n🎉 Analysis complete!")
246
+
247
+ except Exception as e:
248
+ print(f"❌ Error during analysis: {e}")
249
+ raise
250
+
251
+ if __name__ == "__main__":
252
+ asyncio.run(main())
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Run script for Full Enchilada Crafter Evaluation
4
+ """
5
+
6
+ import asyncio
7
+ import argparse
8
+ from src.synth_env.examples.crafter_classic.agent_demos.full_enchilada import (
9
+ run_full_enchilada_eval,
10
+ )
11
+
12
+
13
+ async def main():
14
+ parser = argparse.ArgumentParser(description="Run Full Enchilada Crafter Evaluation")
15
+ parser.add_argument(
16
+ "--models", nargs="+", default=["gpt-4o-mini"], help="Model names to evaluate"
17
+ )
18
+ parser.add_argument(
19
+ "--difficulties",
20
+ nargs="+",
21
+ default=["easy", "hard"],
22
+ help="Difficulty levels to test",
23
+ )
24
+ parser.add_argument(
25
+ "--num-trajectories",
26
+ type=int,
27
+ default=3,
28
+ help="Number of trajectories per condition",
29
+ )
30
+ parser.add_argument("--max-turns", type=int, default=30, help="Maximum turns per trajectory")
31
+ parser.add_argument("--no-images", action="store_true", help="Disable image capture")
32
+ parser.add_argument(
33
+ "--no-viewer",
34
+ action="store_true",
35
+ help="Don't launch the viewer after evaluation",
36
+ )
37
+ parser.add_argument(
38
+ "--output-dir",
39
+ type=str,
40
+ default=None,
41
+ help="Output directory (default: src/evals/crafter/run_TIMESTAMP)",
42
+ )
43
+
44
+ args = parser.parse_args()
45
+
46
+ await run_full_enchilada_eval(
47
+ model_names=args.models,
48
+ difficulties=args.difficulties,
49
+ num_trajectories=args.num_trajectories,
50
+ max_turns=args.max_turns,
51
+ capture_images=not args.no_images,
52
+ launch_viewer=not args.no_viewer,
53
+ output_dir=args.output_dir,
54
+ )
55
+
56
+
57
+ if __name__ == "__main__":
58
+ asyncio.run(main())
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Browse existing Crafter evaluations and launch viewer for a selected run.
4
+ """
5
+
6
+ import argparse
7
+ import json
8
+ from pathlib import Path
9
+ from datetime import datetime
10
+ import asyncio
11
+ from tabulate import tabulate
12
+
13
+ from src.synth_env.examples.crafter_classic.agent_demos.full_enchilada import (
14
+ set_current_eval_dir,
15
+ app,
16
+ )
17
+ from fastapi.staticfiles import StaticFiles
18
+ import uvicorn
19
+
20
+
21
+ def list_evaluations(evals_dir: Path = Path("src/evals/crafter")):
22
+ """List all available evaluations with summary info."""
23
+ if not evals_dir.exists():
24
+ print(f"No evaluations found at {evals_dir}")
25
+ return []
26
+
27
+ evaluations = []
28
+ for run_dir in sorted(evals_dir.glob("run_*"), reverse=True):
29
+ if run_dir.is_dir():
30
+ summary_file = run_dir / "evaluation_summary.json"
31
+ if summary_file.exists():
32
+ with open(summary_file, "r") as f:
33
+ summary = json.load(f)
34
+
35
+ eval_info = {
36
+ "run_id": run_dir.name,
37
+ "timestamp": summary["evaluation_metadata"]["timestamp"],
38
+ "models": ", ".join(summary["models_evaluated"]),
39
+ "difficulties": ", ".join(summary["difficulties_evaluated"]),
40
+ "num_trajectories": summary["evaluation_metadata"]["num_trajectories"],
41
+ "path": run_dir,
42
+ }
43
+ evaluations.append(eval_info)
44
+
45
+ return evaluations
46
+
47
+
48
+ async def view_evaluation(eval_dir: Path):
49
+ """Launch viewer for a specific evaluation."""
50
+ if not eval_dir.exists():
51
+ print(f"Evaluation directory not found: {eval_dir}")
52
+ return
53
+
54
+ viewer_dir = eval_dir / "viewer"
55
+ if not viewer_dir.exists():
56
+ print(f"Viewer files not found in {eval_dir}")
57
+ return
58
+
59
+ print(f"\n📁 Viewing evaluation: {eval_dir}")
60
+ print("🌐 Launching viewer at http://localhost:8000")
61
+ print(" Press Ctrl+C to stop the viewer")
62
+
63
+ # Set the current eval directory for the viewer
64
+ set_current_eval_dir(eval_dir)
65
+
66
+ # Mount static files from the viewer directory
67
+ app.mount("/", StaticFiles(directory=str(viewer_dir), html=True), name="viewer")
68
+
69
+ # Run viewer
70
+ config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="error")
71
+ server = uvicorn.Server(config)
72
+ await server.serve()
73
+
74
+
75
+ async def main():
76
+ parser = argparse.ArgumentParser(description="Browse Crafter evaluations")
77
+ parser.add_argument(
78
+ "--eval-dir",
79
+ type=str,
80
+ default="src/evals/crafter",
81
+ help="Base directory for evaluations",
82
+ )
83
+ parser.add_argument(
84
+ "--run-id", type=str, help="Specific run ID to view (e.g., run_20240115_143022)"
85
+ )
86
+ parser.add_argument("--latest", action="store_true", help="View the latest evaluation")
87
+
88
+ args = parser.parse_args()
89
+ evals_dir = Path(args.eval_dir)
90
+
91
+ # List evaluations
92
+ evaluations = list_evaluations(evals_dir)
93
+
94
+ if not evaluations:
95
+ return
96
+
97
+ # Display table of evaluations
98
+ if not args.run_id and not args.latest:
99
+ print("\n📊 Available Crafter Evaluations:")
100
+ table_data = []
101
+ for i, eval_info in enumerate(evaluations):
102
+ # Parse timestamp for cleaner display
103
+ try:
104
+ ts = datetime.fromisoformat(eval_info["timestamp"])
105
+ ts_str = ts.strftime("%Y-%m-%d %H:%M:%S")
106
+ except:
107
+ ts_str = eval_info["timestamp"]
108
+
109
+ table_data.append(
110
+ [
111
+ i + 1,
112
+ eval_info["run_id"],
113
+ ts_str,
114
+ eval_info["models"],
115
+ eval_info["difficulties"],
116
+ eval_info["num_trajectories"],
117
+ ]
118
+ )
119
+
120
+ headers = ["#", "Run ID", "Timestamp", "Models", "Difficulties", "Trajectories"]
121
+ print(tabulate(table_data, headers=headers, tablefmt="grid"))
122
+
123
+ # Ask user to select
124
+ print("\nEnter the number of the evaluation to view (or 'q' to quit): ", end="")
125
+ choice = input().strip()
126
+
127
+ if choice.lower() == "q":
128
+ return
129
+
130
+ try:
131
+ idx = int(choice) - 1
132
+ if 0 <= idx < len(evaluations):
133
+ selected_eval = evaluations[idx]
134
+ await view_evaluation(selected_eval["path"])
135
+ else:
136
+ print("Invalid selection")
137
+ except ValueError:
138
+ print("Invalid input")
139
+
140
+ # View specific run
141
+ elif args.run_id:
142
+ eval_path = evals_dir / args.run_id
143
+ await view_evaluation(eval_path)
144
+
145
+ # View latest
146
+ elif args.latest and evaluations:
147
+ latest_eval = evaluations[0]
148
+ await view_evaluation(latest_eval["path"])
149
+
150
+
151
+ if __name__ == "__main__":
152
+ asyncio.run(main())
@@ -0,0 +1,24 @@
1
+ [evaluation]
2
+ # Maximum number of turns per agent
3
+ max_turns = 100
4
+
5
+ # Number of trajectories per model-difficulty combination
6
+ trajectories_per_condition = 10
7
+
8
+ # Difficulty modes to test
9
+ difficulties = ["easy"]
10
+
11
+ # Models to evaluate
12
+ models = [
13
+ "gpt-4.1-nano",
14
+ "gpt-4o-mini"
15
+ ]
16
+
17
+ # Parallel execution settings
18
+ parallel_episodes = true
19
+ timeout_seconds = 300
20
+
21
+ # Output settings
22
+ show_progress_bars = true
23
+ show_detailed_logging = false
24
+ show_final_table = true