synth-ai 0.2.4.dev7__py3-none-any.whl → 0.2.4.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (154) hide show
  1. synth_ai/__init__.py +1 -1
  2. synth_ai/cli/__init__.py +6 -0
  3. synth_ai/cli/balance.py +3 -15
  4. synth_ai/cli/demo.py +68 -9
  5. synth_ai/cli/rl_demo.py +137 -0
  6. synth_ai/cli/root.py +65 -0
  7. synth_ai/config/base_url.py +47 -0
  8. synth_ai/demos/core/__init__.py +1 -0
  9. synth_ai/demos/core/cli.py +621 -0
  10. synth_ai/demos/demo_task_apps/__init__.py +1 -0
  11. synth_ai/demos/demo_task_apps/core.py +374 -0
  12. synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
  13. synth_ai/demos/demo_task_apps/math/app.py +37 -0
  14. synth_ai/demos/demo_task_apps/math/config.toml +44 -0
  15. synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
  16. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
  17. synth_ai/environments/examples/bandit/__init__.py +33 -0
  18. synth_ai/environments/examples/bandit/engine.py +294 -0
  19. synth_ai/environments/examples/bandit/environment.py +194 -0
  20. synth_ai/environments/examples/bandit/taskset.py +200 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  26. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  27. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  28. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
  29. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
  30. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  31. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  32. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  33. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  34. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  35. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  36. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  37. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  38. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  39. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  40. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
  41. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  42. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  43. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  44. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  45. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  46. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  47. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  48. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  49. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  50. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  51. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  52. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  53. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
  54. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
  55. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
  56. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  57. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
  58. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  59. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  60. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  61. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  62. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  63. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  64. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  65. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  66. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  67. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  68. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  69. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  70. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  71. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  72. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  73. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  74. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  75. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  76. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  77. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  78. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  79. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  80. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  81. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  82. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  83. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  84. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  85. synth_ai/environments/examples/crafter_classic/environment.py +41 -2
  86. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  87. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  88. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  89. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  90. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  91. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  92. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  93. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  94. synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
  95. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  96. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
  97. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  98. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
  99. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  100. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  101. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
  102. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  103. synth_ai/environments/examples/red/units/__init__.py +1 -0
  104. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
  105. synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
  106. synth_ai/environments/service/app.py +8 -0
  107. synth_ai/http.py +102 -0
  108. synth_ai/inference/__init__.py +7 -0
  109. synth_ai/inference/client.py +20 -0
  110. synth_ai/install_sqld.sh +40 -0
  111. synth_ai/jobs/client.py +246 -0
  112. synth_ai/learning/__init__.py +24 -0
  113. synth_ai/learning/client.py +149 -0
  114. synth_ai/learning/config.py +43 -0
  115. synth_ai/learning/constants.py +29 -0
  116. synth_ai/learning/ft_client.py +59 -0
  117. synth_ai/learning/health.py +43 -0
  118. synth_ai/learning/jobs.py +205 -0
  119. synth_ai/learning/rl_client.py +256 -0
  120. synth_ai/learning/sse.py +58 -0
  121. synth_ai/learning/validators.py +48 -0
  122. synth_ai/lm/core/main_v3.py +13 -0
  123. synth_ai/lm/core/synth_models.py +48 -0
  124. synth_ai/lm/core/vendor_clients.py +9 -6
  125. synth_ai/lm/vendors/core/openai_api.py +31 -3
  126. synth_ai/lm/vendors/openai_standard.py +45 -14
  127. synth_ai/lm/vendors/supported/custom_endpoint.py +12 -2
  128. synth_ai/lm/vendors/synth_client.py +372 -28
  129. synth_ai/rl/__init__.py +30 -0
  130. synth_ai/rl/contracts.py +32 -0
  131. synth_ai/rl/env_keys.py +137 -0
  132. synth_ai/rl/secrets.py +19 -0
  133. synth_ai/scripts/verify_rewards.py +100 -0
  134. synth_ai/task/__init__.py +10 -0
  135. synth_ai/task/contracts.py +120 -0
  136. synth_ai/task/health.py +28 -0
  137. synth_ai/task/validators.py +12 -0
  138. synth_ai/tracing_v3/hooks.py +3 -1
  139. synth_ai/tracing_v3/session_tracer.py +123 -2
  140. synth_ai/tracing_v3/turso/manager.py +218 -0
  141. synth_ai/tracing_v3/turso/models.py +53 -0
  142. synth_ai-0.2.4.dev9.dist-info/METADATA +91 -0
  143. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/RECORD +147 -30
  144. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/entry_points.txt +1 -0
  145. synth_ai/tui/__init__.py +0 -1
  146. synth_ai/tui/__main__.py +0 -13
  147. synth_ai/tui/cli/__init__.py +0 -1
  148. synth_ai/tui/cli/query_experiments.py +0 -164
  149. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  150. synth_ai/tui/dashboard.py +0 -340
  151. synth_ai-0.2.4.dev7.dist-info/METADATA +0 -193
  152. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/WHEEL +0 -0
  153. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/licenses/LICENSE +0 -0
  154. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Runner script for NetHack evaluation framework.
4
+ """
5
+
6
+ import argparse
7
+ import asyncio
8
+ import os
9
+ import time
10
+
11
+ from eval_framework import run_nethack_eval
12
+
13
+
14
+ async def main():
15
+ """Main evaluation runner."""
16
+ parser = argparse.ArgumentParser(description="Run NetHack evaluation framework")
17
+ parser.add_argument(
18
+ "--models",
19
+ nargs="+",
20
+ default=["gemini-1.5-flash-latest"],
21
+ help="Model names to evaluate",
22
+ )
23
+ parser.add_argument("--difficulties", nargs="+", default=["easy"], help="Difficulty levels")
24
+ parser.add_argument(
25
+ "--num-trajectories",
26
+ type=int,
27
+ default=3,
28
+ help="Number of trajectories per condition",
29
+ )
30
+ parser.add_argument("--max-turns", type=int, default=50, help="Maximum turns per trajectory")
31
+
32
+ args = parser.parse_args()
33
+
34
+ print(f"Starting NetHack evaluation...")
35
+ print(f"Models: {args.models}")
36
+ print(f"Difficulties: {args.difficulties}")
37
+ print(f"Trajectories per condition: {args.num_trajectories}")
38
+ print(f"Max turns: {args.max_turns}")
39
+
40
+ start_time = time.time()
41
+
42
+ try:
43
+ report = await run_nethack_eval(
44
+ model_names=args.models,
45
+ difficulties=args.difficulties,
46
+ num_trajectories=args.num_trajectories,
47
+ max_turns=args.max_turns,
48
+ )
49
+
50
+ end_time = time.time()
51
+ duration = end_time - start_time
52
+
53
+ print(f"\n✅ Evaluation completed in {duration:.1f} seconds")
54
+
55
+ # Save report to file
56
+ import json
57
+
58
+ output_file = f"nethack_eval_results_{int(time.time())}.json"
59
+ with open(output_file, "w") as f:
60
+ json.dump(report, f, indent=2, default=str)
61
+ print(f"📁 Full report saved to {output_file}")
62
+
63
+ except Exception as e:
64
+ print(f"❌ Evaluation failed: {e}")
65
+ import traceback
66
+
67
+ traceback.print_exc()
68
+ return 1
69
+
70
+ return 0
71
+
72
+
73
+ if __name__ == "__main__":
74
+ exit(asyncio.run(main()))