synth-ai 0.2.9.dev3__py3-none-any.whl → 0.2.9.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (107) hide show
  1. examples/analyze_semantic_words.sh +17 -0
  2. examples/common_old/backend.py +21 -0
  3. examples/crafter_debug_render.py +180 -0
  4. examples/evals_old/README.md +98 -0
  5. examples/evals_old/__init__.py +6 -0
  6. examples/evals_old/compare_models.py +1037 -0
  7. examples/evals_old/example_log.md +145 -0
  8. examples/evals_old/run_demo.sh +126 -0
  9. examples/evals_old/trace_analysis.py +270 -0
  10. examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
  11. examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
  12. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
  13. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
  14. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
  15. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
  16. examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
  17. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
  18. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
  19. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
  20. examples/finetuning_old/synth_qwen_v1/README.md +68 -0
  21. examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
  22. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
  23. examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
  24. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
  25. examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
  26. examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
  27. examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
  28. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
  29. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
  30. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
  31. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
  32. examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
  33. examples/finetuning_old/synth_qwen_v1/util.py +147 -0
  34. examples/rl/README.md +169 -0
  35. examples/rl/configs/eval_base_qwen.toml +15 -0
  36. examples/rl/configs/eval_rl_qwen.toml +11 -0
  37. examples/rl/configs/rl_from_base_qwen.toml +35 -0
  38. examples/rl/configs/rl_from_base_qwen17.toml +74 -0
  39. examples/rl/configs/rl_from_ft_qwen.toml +35 -0
  40. examples/rl/download_dataset.py +64 -0
  41. examples/rl/run_eval.py +435 -0
  42. examples/rl/run_rl_and_save.py +94 -0
  43. examples/rl/task_app/README.md +22 -0
  44. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
  45. examples/rl/task_app/math_task_app.py +107 -0
  46. examples/rl_old/task_app.py +962 -0
  47. examples/run_crafter_demo.sh +10 -0
  48. examples/warming_up_to_rl/analyze_trace_db.py +420 -0
  49. examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
  50. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  51. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
  52. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
  53. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
  54. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
  55. examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
  56. examples/warming_up_to_rl/export_trace_sft.py +541 -0
  57. examples/warming_up_to_rl/groq_test.py +88 -0
  58. examples/warming_up_to_rl/manage_secrets.py +127 -0
  59. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  60. examples/warming_up_to_rl/old/notes.md +73 -0
  61. examples/warming_up_to_rl/readme.md +172 -0
  62. examples/warming_up_to_rl/run_eval.py +434 -0
  63. examples/warming_up_to_rl/run_fft_and_save.py +309 -0
  64. examples/warming_up_to_rl/run_local_rollout.py +188 -0
  65. examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
  66. examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
  67. examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
  68. examples/warming_up_to_rl/run_rl_and_save.py +101 -0
  69. examples/warming_up_to_rl/run_rollout_remote.py +129 -0
  70. examples/warming_up_to_rl/task_app/README.md +38 -0
  71. {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
  72. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
  73. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  74. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  75. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
  76. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
  77. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  78. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +58 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
  98. synth_ai/api/train/config_finder.py +18 -18
  99. synth_ai/api/train/env_resolver.py +28 -1
  100. synth_ai/cli/task_apps.py +264 -55
  101. synth_ai/task/apps/__init__.py +54 -13
  102. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/METADATA +1 -1
  103. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/RECORD +107 -12
  104. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/top_level.txt +1 -0
  105. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/WHEEL +0 -0
  106. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/entry_points.txt +0 -0
  107. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,197 @@
1
+ from __future__ import annotations
2
+
3
+ import uuid
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime
6
+ from typing import Any, Dict, Optional
7
+
8
+ from synth_ai.environments.stateful.core import StatefulEnvironment
9
+
10
+
11
+ @dataclass
12
+ class EnvHandle:
13
+ """In-memory handle for an active environment instance."""
14
+
15
+ env_id: str
16
+ env: Any # StatefulEnvironment or wrapper
17
+ last_observation: Optional[Dict[str, Any]]
18
+ last_info: Optional[Dict[str, Any]]
19
+ step_idx: int
20
+ seed: Optional[int]
21
+ rl_run_id: str
22
+ created_at: datetime = field(default_factory=datetime.utcnow)
23
+
24
+
25
+ @dataclass
26
+ class PolicyHandle:
27
+ """In-memory handle for an active policy instance."""
28
+
29
+ policy_id: str
30
+ policy: Any # Policy instance
31
+ bound_env_id: Optional[str]
32
+ rl_run_id: str
33
+ created_at: datetime = field(default_factory=datetime.utcnow)
34
+
35
+
36
+ @dataclass
37
+ class RunHandle:
38
+ """Track run status for abort support."""
39
+
40
+ run_id: str
41
+ status: str # "running" | "aborted" | "completed"
42
+ started_at: datetime
43
+ finished_at: Optional[datetime] = None
44
+
45
+
46
+ @dataclass
47
+ class SnapshotMeta:
48
+ """Metadata for a stored snapshot."""
49
+
50
+ snapshot_id: str
51
+ kind: str # "env" | "policy"
52
+ rl_run_id: str
53
+ parent_snapshot_id: Optional[str]
54
+ size: int
55
+ created_at: datetime
56
+ path: str
57
+
58
+
59
+ class Registry:
60
+ """In-memory registries for the service."""
61
+
62
+ def __init__(self) -> None:
63
+ self.envs: Dict[str, EnvHandle] = {}
64
+ self.policies: Dict[str, PolicyHandle] = {}
65
+ self.runs: Dict[str, RunHandle] = {}
66
+ self.snapshots: Dict[str, SnapshotMeta] = {}
67
+
68
+ def generate_id(self) -> str:
69
+ """Generate a UUID for unique identification."""
70
+ return str(uuid.uuid4())
71
+
72
+ def register_env(
73
+ self,
74
+ env: Any,
75
+ seed: Optional[int],
76
+ rl_run_id: str,
77
+ last_observation: Optional[Dict[str, Any]] = None,
78
+ last_info: Optional[Dict[str, Any]] = None,
79
+ ) -> str:
80
+ """Register a new environment instance."""
81
+ env_id = self.generate_id()
82
+ handle = EnvHandle(
83
+ env_id=env_id,
84
+ env=env,
85
+ last_observation=last_observation,
86
+ last_info=last_info,
87
+ step_idx=0,
88
+ seed=seed,
89
+ rl_run_id=rl_run_id,
90
+ )
91
+ self.envs[env_id] = handle
92
+ return env_id
93
+
94
+ def register_policy(
95
+ self,
96
+ policy: Any,
97
+ rl_run_id: str,
98
+ bound_env_id: Optional[str] = None,
99
+ ) -> str:
100
+ """Register a new policy instance."""
101
+ policy_id = self.generate_id()
102
+ handle = PolicyHandle(
103
+ policy_id=policy_id,
104
+ policy=policy,
105
+ bound_env_id=bound_env_id,
106
+ rl_run_id=rl_run_id,
107
+ )
108
+ self.policies[policy_id] = handle
109
+ return policy_id
110
+
111
+ def register_run(self, run_id: Optional[str] = None) -> str:
112
+ """Register a new run."""
113
+ if run_id is None:
114
+ run_id = self.generate_id()
115
+ handle = RunHandle(
116
+ run_id=run_id,
117
+ status="running",
118
+ started_at=datetime.utcnow(),
119
+ )
120
+ self.runs[run_id] = handle
121
+ return run_id
122
+
123
+ def abort_run(self, run_id: str) -> bool:
124
+ """Mark a run as aborted."""
125
+ if run_id in self.runs:
126
+ self.runs[run_id].status = "aborted"
127
+ self.runs[run_id].finished_at = datetime.utcnow()
128
+ return True
129
+ return False
130
+
131
+ def complete_run(self, run_id: str) -> bool:
132
+ """Mark a run as completed."""
133
+ if run_id in self.runs:
134
+ self.runs[run_id].status = "completed"
135
+ self.runs[run_id].finished_at = datetime.utcnow()
136
+ return True
137
+ return False
138
+
139
+ def is_run_aborted(self, run_id: str) -> bool:
140
+ """Check if a run has been aborted."""
141
+ return run_id in self.runs and self.runs[run_id].status == "aborted"
142
+
143
+ def register_snapshot(
144
+ self,
145
+ kind: str,
146
+ rl_run_id: str,
147
+ size: int,
148
+ path: str,
149
+ parent_snapshot_id: Optional[str] = None,
150
+ ) -> str:
151
+ """Register a new snapshot."""
152
+ snapshot_id = self.generate_id()
153
+ meta = SnapshotMeta(
154
+ snapshot_id=snapshot_id,
155
+ kind=kind,
156
+ rl_run_id=rl_run_id,
157
+ parent_snapshot_id=parent_snapshot_id,
158
+ size=size,
159
+ created_at=datetime.utcnow(),
160
+ path=path,
161
+ )
162
+ self.snapshots[snapshot_id] = meta
163
+ return snapshot_id
164
+
165
+ def get_env(self, env_id: str) -> Optional[EnvHandle]:
166
+ """Get an environment handle by ID."""
167
+ return self.envs.get(env_id)
168
+
169
+ def get_policy(self, policy_id: str) -> Optional[PolicyHandle]:
170
+ """Get a policy handle by ID."""
171
+ return self.policies.get(policy_id)
172
+
173
+ def get_run(self, run_id: str) -> Optional[RunHandle]:
174
+ """Get a run handle by ID."""
175
+ return self.runs.get(run_id)
176
+
177
+ def get_snapshot(self, snapshot_id: str) -> Optional[SnapshotMeta]:
178
+ """Get snapshot metadata by ID."""
179
+ return self.snapshots.get(snapshot_id)
180
+
181
+ def remove_env(self, env_id: str) -> bool:
182
+ """Remove an environment from the registry."""
183
+ if env_id in self.envs:
184
+ del self.envs[env_id]
185
+ return True
186
+ return False
187
+
188
+ def remove_policy(self, policy_id: str) -> bool:
189
+ """Remove a policy from the registry."""
190
+ if policy_id in self.policies:
191
+ del self.policies[policy_id]
192
+ return True
193
+ return False
194
+
195
+
196
+ # Global registry instance
197
+ registry = Registry()