synth-ai 0.2.9.dev3__py3-none-any.whl → 0.2.9.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +17 -0
- examples/common_old/backend.py +21 -0
- examples/crafter_debug_render.py +180 -0
- examples/evals_old/README.md +98 -0
- examples/evals_old/__init__.py +6 -0
- examples/evals_old/compare_models.py +1037 -0
- examples/evals_old/example_log.md +145 -0
- examples/evals_old/run_demo.sh +126 -0
- examples/evals_old/trace_analysis.py +270 -0
- examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
- examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
- examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
- examples/finetuning_old/synth_qwen_v1/README.md +68 -0
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
- examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
- examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
- examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
- examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
- examples/finetuning_old/synth_qwen_v1/util.py +147 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +15 -0
- examples/rl/configs/eval_rl_qwen.toml +11 -0
- examples/rl/configs/rl_from_base_qwen.toml +35 -0
- examples/rl/configs/rl_from_base_qwen17.toml +74 -0
- examples/rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/rl/download_dataset.py +64 -0
- examples/rl/run_eval.py +435 -0
- examples/rl/run_rl_and_save.py +94 -0
- examples/rl/task_app/README.md +22 -0
- {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
- examples/rl/task_app/math_task_app.py +107 -0
- examples/rl_old/task_app.py +962 -0
- examples/run_crafter_demo.sh +10 -0
- examples/warming_up_to_rl/analyze_trace_db.py +420 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
- examples/warming_up_to_rl/export_trace_sft.py +541 -0
- examples/warming_up_to_rl/groq_test.py +88 -0
- examples/warming_up_to_rl/manage_secrets.py +127 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +172 -0
- examples/warming_up_to_rl/run_eval.py +434 -0
- examples/warming_up_to_rl/run_fft_and_save.py +309 -0
- examples/warming_up_to_rl/run_local_rollout.py +188 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
- examples/warming_up_to_rl/run_rl_and_save.py +101 -0
- examples/warming_up_to_rl/run_rollout_remote.py +129 -0
- examples/warming_up_to_rl/task_app/README.md +38 -0
- {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
- synth_ai/api/train/config_finder.py +18 -18
- synth_ai/api/train/env_resolver.py +28 -1
- synth_ai/cli/task_apps.py +291 -56
- synth_ai/task/apps/__init__.py +54 -13
- {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/METADATA +1 -1
- {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/RECORD +106 -13
- {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/top_level.txt +1 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
|
+
|
|
8
|
+
from synth_ai.environments.stateful.core import StatefulEnvironment
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class EnvHandle:
|
|
13
|
+
"""In-memory handle for an active environment instance."""
|
|
14
|
+
|
|
15
|
+
env_id: str
|
|
16
|
+
env: Any # StatefulEnvironment or wrapper
|
|
17
|
+
last_observation: Optional[Dict[str, Any]]
|
|
18
|
+
last_info: Optional[Dict[str, Any]]
|
|
19
|
+
step_idx: int
|
|
20
|
+
seed: Optional[int]
|
|
21
|
+
rl_run_id: str
|
|
22
|
+
created_at: datetime = field(default_factory=datetime.utcnow)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class PolicyHandle:
|
|
27
|
+
"""In-memory handle for an active policy instance."""
|
|
28
|
+
|
|
29
|
+
policy_id: str
|
|
30
|
+
policy: Any # Policy instance
|
|
31
|
+
bound_env_id: Optional[str]
|
|
32
|
+
rl_run_id: str
|
|
33
|
+
created_at: datetime = field(default_factory=datetime.utcnow)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class RunHandle:
|
|
38
|
+
"""Track run status for abort support."""
|
|
39
|
+
|
|
40
|
+
run_id: str
|
|
41
|
+
status: str # "running" | "aborted" | "completed"
|
|
42
|
+
started_at: datetime
|
|
43
|
+
finished_at: Optional[datetime] = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class SnapshotMeta:
|
|
48
|
+
"""Metadata for a stored snapshot."""
|
|
49
|
+
|
|
50
|
+
snapshot_id: str
|
|
51
|
+
kind: str # "env" | "policy"
|
|
52
|
+
rl_run_id: str
|
|
53
|
+
parent_snapshot_id: Optional[str]
|
|
54
|
+
size: int
|
|
55
|
+
created_at: datetime
|
|
56
|
+
path: str
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Registry:
|
|
60
|
+
"""In-memory registries for the service."""
|
|
61
|
+
|
|
62
|
+
def __init__(self) -> None:
|
|
63
|
+
self.envs: Dict[str, EnvHandle] = {}
|
|
64
|
+
self.policies: Dict[str, PolicyHandle] = {}
|
|
65
|
+
self.runs: Dict[str, RunHandle] = {}
|
|
66
|
+
self.snapshots: Dict[str, SnapshotMeta] = {}
|
|
67
|
+
|
|
68
|
+
def generate_id(self) -> str:
|
|
69
|
+
"""Generate a UUID for unique identification."""
|
|
70
|
+
return str(uuid.uuid4())
|
|
71
|
+
|
|
72
|
+
def register_env(
|
|
73
|
+
self,
|
|
74
|
+
env: Any,
|
|
75
|
+
seed: Optional[int],
|
|
76
|
+
rl_run_id: str,
|
|
77
|
+
last_observation: Optional[Dict[str, Any]] = None,
|
|
78
|
+
last_info: Optional[Dict[str, Any]] = None,
|
|
79
|
+
) -> str:
|
|
80
|
+
"""Register a new environment instance."""
|
|
81
|
+
env_id = self.generate_id()
|
|
82
|
+
handle = EnvHandle(
|
|
83
|
+
env_id=env_id,
|
|
84
|
+
env=env,
|
|
85
|
+
last_observation=last_observation,
|
|
86
|
+
last_info=last_info,
|
|
87
|
+
step_idx=0,
|
|
88
|
+
seed=seed,
|
|
89
|
+
rl_run_id=rl_run_id,
|
|
90
|
+
)
|
|
91
|
+
self.envs[env_id] = handle
|
|
92
|
+
return env_id
|
|
93
|
+
|
|
94
|
+
def register_policy(
|
|
95
|
+
self,
|
|
96
|
+
policy: Any,
|
|
97
|
+
rl_run_id: str,
|
|
98
|
+
bound_env_id: Optional[str] = None,
|
|
99
|
+
) -> str:
|
|
100
|
+
"""Register a new policy instance."""
|
|
101
|
+
policy_id = self.generate_id()
|
|
102
|
+
handle = PolicyHandle(
|
|
103
|
+
policy_id=policy_id,
|
|
104
|
+
policy=policy,
|
|
105
|
+
bound_env_id=bound_env_id,
|
|
106
|
+
rl_run_id=rl_run_id,
|
|
107
|
+
)
|
|
108
|
+
self.policies[policy_id] = handle
|
|
109
|
+
return policy_id
|
|
110
|
+
|
|
111
|
+
def register_run(self, run_id: Optional[str] = None) -> str:
|
|
112
|
+
"""Register a new run."""
|
|
113
|
+
if run_id is None:
|
|
114
|
+
run_id = self.generate_id()
|
|
115
|
+
handle = RunHandle(
|
|
116
|
+
run_id=run_id,
|
|
117
|
+
status="running",
|
|
118
|
+
started_at=datetime.utcnow(),
|
|
119
|
+
)
|
|
120
|
+
self.runs[run_id] = handle
|
|
121
|
+
return run_id
|
|
122
|
+
|
|
123
|
+
def abort_run(self, run_id: str) -> bool:
|
|
124
|
+
"""Mark a run as aborted."""
|
|
125
|
+
if run_id in self.runs:
|
|
126
|
+
self.runs[run_id].status = "aborted"
|
|
127
|
+
self.runs[run_id].finished_at = datetime.utcnow()
|
|
128
|
+
return True
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
def complete_run(self, run_id: str) -> bool:
|
|
132
|
+
"""Mark a run as completed."""
|
|
133
|
+
if run_id in self.runs:
|
|
134
|
+
self.runs[run_id].status = "completed"
|
|
135
|
+
self.runs[run_id].finished_at = datetime.utcnow()
|
|
136
|
+
return True
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
def is_run_aborted(self, run_id: str) -> bool:
|
|
140
|
+
"""Check if a run has been aborted."""
|
|
141
|
+
return run_id in self.runs and self.runs[run_id].status == "aborted"
|
|
142
|
+
|
|
143
|
+
def register_snapshot(
|
|
144
|
+
self,
|
|
145
|
+
kind: str,
|
|
146
|
+
rl_run_id: str,
|
|
147
|
+
size: int,
|
|
148
|
+
path: str,
|
|
149
|
+
parent_snapshot_id: Optional[str] = None,
|
|
150
|
+
) -> str:
|
|
151
|
+
"""Register a new snapshot."""
|
|
152
|
+
snapshot_id = self.generate_id()
|
|
153
|
+
meta = SnapshotMeta(
|
|
154
|
+
snapshot_id=snapshot_id,
|
|
155
|
+
kind=kind,
|
|
156
|
+
rl_run_id=rl_run_id,
|
|
157
|
+
parent_snapshot_id=parent_snapshot_id,
|
|
158
|
+
size=size,
|
|
159
|
+
created_at=datetime.utcnow(),
|
|
160
|
+
path=path,
|
|
161
|
+
)
|
|
162
|
+
self.snapshots[snapshot_id] = meta
|
|
163
|
+
return snapshot_id
|
|
164
|
+
|
|
165
|
+
def get_env(self, env_id: str) -> Optional[EnvHandle]:
|
|
166
|
+
"""Get an environment handle by ID."""
|
|
167
|
+
return self.envs.get(env_id)
|
|
168
|
+
|
|
169
|
+
def get_policy(self, policy_id: str) -> Optional[PolicyHandle]:
|
|
170
|
+
"""Get a policy handle by ID."""
|
|
171
|
+
return self.policies.get(policy_id)
|
|
172
|
+
|
|
173
|
+
def get_run(self, run_id: str) -> Optional[RunHandle]:
|
|
174
|
+
"""Get a run handle by ID."""
|
|
175
|
+
return self.runs.get(run_id)
|
|
176
|
+
|
|
177
|
+
def get_snapshot(self, snapshot_id: str) -> Optional[SnapshotMeta]:
|
|
178
|
+
"""Get snapshot metadata by ID."""
|
|
179
|
+
return self.snapshots.get(snapshot_id)
|
|
180
|
+
|
|
181
|
+
def remove_env(self, env_id: str) -> bool:
|
|
182
|
+
"""Remove an environment from the registry."""
|
|
183
|
+
if env_id in self.envs:
|
|
184
|
+
del self.envs[env_id]
|
|
185
|
+
return True
|
|
186
|
+
return False
|
|
187
|
+
|
|
188
|
+
def remove_policy(self, policy_id: str) -> bool:
|
|
189
|
+
"""Remove a policy from the registry."""
|
|
190
|
+
if policy_id in self.policies:
|
|
191
|
+
del self.policies[policy_id]
|
|
192
|
+
return True
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# Global registry instance
|
|
197
|
+
registry = Registry()
|