synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (155) hide show
  1. examples/common_old/backend.py +0 -1
  2. examples/crafter_debug_render.py +15 -6
  3. examples/evals_old/compare_models.py +1 -0
  4. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
  5. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
  6. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
  7. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
  8. examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
  9. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
  10. examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
  11. examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
  12. examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
  13. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
  14. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
  15. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
  16. examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
  17. examples/finetuning_old/synth_qwen_v1/util.py +7 -2
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +17 -15
  22. examples/rl/run_rl_and_save.py +24 -7
  23. examples/rl/task_app/math_single_step.py +128 -11
  24. examples/rl/task_app/math_task_app.py +11 -3
  25. examples/rl_old/task_app.py +222 -53
  26. examples/warming_up_to_rl/analyze_trace_db.py +7 -5
  27. examples/warming_up_to_rl/export_trace_sft.py +141 -16
  28. examples/warming_up_to_rl/groq_test.py +11 -4
  29. examples/warming_up_to_rl/manage_secrets.py +15 -6
  30. examples/warming_up_to_rl/readme.md +9 -2
  31. examples/warming_up_to_rl/run_eval.py +108 -30
  32. examples/warming_up_to_rl/run_fft_and_save.py +128 -52
  33. examples/warming_up_to_rl/run_local_rollout.py +87 -36
  34. examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
  35. examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
  36. examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
  37. examples/warming_up_to_rl/run_rl_and_save.py +31 -7
  38. examples/warming_up_to_rl/run_rollout_remote.py +37 -10
  39. examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
  40. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
  41. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
  42. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  43. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  44. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  45. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
  46. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
  47. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
  48. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
  49. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  50. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
  51. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  52. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
  53. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
  54. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
  55. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  56. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
  57. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
  58. synth_ai/__init__.py +1 -0
  59. synth_ai/api/train/builders.py +34 -10
  60. synth_ai/api/train/cli.py +172 -32
  61. synth_ai/api/train/config_finder.py +59 -4
  62. synth_ai/api/train/env_resolver.py +32 -14
  63. synth_ai/api/train/pollers.py +11 -3
  64. synth_ai/api/train/task_app.py +4 -1
  65. synth_ai/api/train/utils.py +20 -4
  66. synth_ai/cli/__init__.py +11 -4
  67. synth_ai/cli/balance.py +1 -1
  68. synth_ai/cli/demo.py +19 -5
  69. synth_ai/cli/rl_demo.py +75 -16
  70. synth_ai/cli/root.py +116 -37
  71. synth_ai/cli/task_apps.py +1276 -186
  72. synth_ai/cli/traces.py +1 -0
  73. synth_ai/cli/turso.py +73 -0
  74. synth_ai/core/experiment.py +0 -2
  75. synth_ai/demo_registry.py +67 -30
  76. synth_ai/demos/core/cli.py +493 -164
  77. synth_ai/demos/demo_task_apps/core.py +50 -6
  78. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  79. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
  80. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  81. synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
  82. synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
  83. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  84. synth_ai/environments/examples/bandit/engine.py +12 -4
  85. synth_ai/environments/examples/bandit/taskset.py +4 -4
  86. synth_ai/environments/reproducibility/tree.py +3 -1
  87. synth_ai/environments/service/core_routes.py +6 -2
  88. synth_ai/evals/base.py +0 -2
  89. synth_ai/experimental/synth_oss.py +11 -12
  90. synth_ai/handshake.py +3 -1
  91. synth_ai/http_client.py +31 -7
  92. synth_ai/inference/__init__.py +0 -2
  93. synth_ai/inference/client.py +8 -4
  94. synth_ai/jobs/client.py +40 -10
  95. synth_ai/learning/client.py +33 -8
  96. synth_ai/learning/config.py +0 -2
  97. synth_ai/learning/constants.py +0 -2
  98. synth_ai/learning/ft_client.py +6 -3
  99. synth_ai/learning/health.py +9 -2
  100. synth_ai/learning/jobs.py +17 -5
  101. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
  102. synth_ai/learning/prompts/random_search.py +4 -1
  103. synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
  104. synth_ai/learning/rl_client.py +42 -14
  105. synth_ai/learning/sse.py +0 -2
  106. synth_ai/learning/validators.py +6 -2
  107. synth_ai/lm/caching/ephemeral.py +1 -3
  108. synth_ai/lm/core/exceptions.py +0 -2
  109. synth_ai/lm/core/main.py +13 -1
  110. synth_ai/lm/core/synth_models.py +0 -1
  111. synth_ai/lm/core/vendor_clients.py +4 -2
  112. synth_ai/lm/overrides.py +2 -2
  113. synth_ai/lm/vendors/core/anthropic_api.py +7 -7
  114. synth_ai/lm/vendors/core/openai_api.py +2 -0
  115. synth_ai/lm/vendors/openai_standard.py +3 -1
  116. synth_ai/lm/vendors/openai_standard_responses.py +6 -3
  117. synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
  118. synth_ai/lm/vendors/synth_client.py +37 -10
  119. synth_ai/rl/__init__.py +0 -1
  120. synth_ai/rl/contracts.py +0 -2
  121. synth_ai/rl/env_keys.py +6 -1
  122. synth_ai/task/__init__.py +1 -0
  123. synth_ai/task/apps/__init__.py +11 -11
  124. synth_ai/task/auth.py +29 -17
  125. synth_ai/task/client.py +3 -1
  126. synth_ai/task/contracts.py +1 -0
  127. synth_ai/task/datasets.py +3 -1
  128. synth_ai/task/errors.py +3 -2
  129. synth_ai/task/health.py +0 -2
  130. synth_ai/task/json.py +0 -1
  131. synth_ai/task/proxy.py +2 -5
  132. synth_ai/task/rubrics.py +9 -3
  133. synth_ai/task/server.py +31 -5
  134. synth_ai/task/tracing_utils.py +8 -3
  135. synth_ai/task/validators.py +0 -1
  136. synth_ai/task/vendors.py +0 -1
  137. synth_ai/tracing_v3/db_config.py +26 -1
  138. synth_ai/tracing_v3/decorators.py +1 -0
  139. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  140. synth_ai/tracing_v3/hooks.py +2 -0
  141. synth_ai/tracing_v3/replica_sync.py +1 -0
  142. synth_ai/tracing_v3/session_tracer.py +24 -3
  143. synth_ai/tracing_v3/storage/base.py +4 -1
  144. synth_ai/tracing_v3/storage/factory.py +0 -1
  145. synth_ai/tracing_v3/turso/manager.py +102 -38
  146. synth_ai/tracing_v3/turso/models.py +4 -1
  147. synth_ai/tracing_v3/utils.py +1 -0
  148. synth_ai/v0/tracing/upload.py +32 -135
  149. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
  150. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -154
  151. synth_ai/install_sqld.sh +0 -40
  152. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
  153. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
  154. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
  155. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ from dataclasses import dataclass
8
8
  from pathlib import Path
9
9
  from typing import Any, Dict, Iterable, List, Sequence
10
10
 
11
- from synth_ai.task.contracts import RolloutRequest, RolloutResponse, TaskInfo
11
+ from synth_ai.task.contracts import RolloutRequest, RolloutResponse, TaskInfo, RolloutMetrics
12
12
  from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
13
13
  from synth_ai.task.rubrics import load_rubric
14
14
  from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
@@ -33,15 +33,16 @@ for path in [REPO_ROOT, TASK_APP_ROOT, SYNTH_ENVS_HOSTED_ROOT]:
33
33
  if path_str not in sys.path:
34
34
  sys.path.insert(0, path_str)
35
35
 
36
+ HAS_HOSTED = True
36
37
  try:
37
38
  import crafter # type: ignore
38
39
  import crafter.constants as C # type: ignore
39
- from synth_ai.environments.examples.crafter_classic.taskset import TRAIT_BOUNDS, world_traits
40
- from synth_envs_hosted.branching import router as branching_router
41
- from synth_envs_hosted.environment_routes import router as environment_router
42
- from synth_envs_hosted.hosted_app import TaskApp as HostedTaskApp
43
- from synth_envs_hosted.policy_routes import router as policy_router
44
- from synth_envs_hosted.rollout import (
40
+ from synth_ai.environments.examples.crafter_classic.taskset import TRAIT_BOUNDS
41
+ from synth_envs_hosted.branching import router as branching_router # type: ignore
42
+ from synth_envs_hosted.environment_routes import router as environment_router # type: ignore
43
+ from synth_envs_hosted.hosted_app import TaskApp as HostedTaskApp # type: ignore
44
+ from synth_envs_hosted.policy_routes import router as policy_router # type: ignore
45
+ from synth_envs_hosted.rollout import ( # type: ignore
45
46
  RolloutEnvSpec as LegacyRolloutEnvSpec,
46
47
  RolloutPolicySpec as LegacyRolloutPolicySpec,
47
48
  RolloutRecordConfig as LegacyRolloutRecordConfig,
@@ -54,7 +55,9 @@ except Exception as exc: # pragma: no cover - import-time validation
54
55
  # Provide a more actionable error with the missing module and fix hints
55
56
  missing_mod = None
56
57
  if isinstance(exc, ModuleNotFoundError):
57
- missing_mod = getattr(exc, "name", None) or str(exc).split("'")[1] if "'" in str(exc) else None
58
+ missing_mod = (
59
+ getattr(exc, "name", None) or str(exc).split("'")[1] if "'" in str(exc) else None
60
+ )
58
61
  fix_hint = None
59
62
  if missing_mod:
60
63
  mapping = {
@@ -74,12 +77,16 @@ except Exception as exc: # pragma: no cover - import-time validation
74
77
  f"For Modal: add '{pkg}' to ModalDeploymentConfig.pip_packages in synth_ai/task/apps/grpo_crafter.py.\n"
75
78
  f"Locally: pip install {pkg}"
76
79
  )
77
- detailed = (
78
- "grpo_crafter task app requires example dependencies and runtime libs.\n"
79
- + (fix_hint + "\n" if fix_hint else "")
80
- + f"Original error: {exc}"
81
- )
82
- raise RuntimeError(detailed) from exc
80
+ # Allow running without synth_envs_hosted; gate hosted features off
81
+ if missing_mod == "synth_envs_hosted":
82
+ HAS_HOSTED = False
83
+ else:
84
+ detailed = (
85
+ "grpo_crafter task app requires example dependencies and runtime libs.\n"
86
+ + (fix_hint + "\n" if fix_hint else "")
87
+ + f"Original error: {exc}"
88
+ )
89
+ raise RuntimeError(detailed) from exc
83
90
 
84
91
 
85
92
  CRAFTING_RULES_SYSTEM_HINT = (
@@ -130,7 +137,7 @@ class CrafterDataset:
130
137
  env = crafter.Env(area=self.area, length=self.length, seed=seed)
131
138
  try:
132
139
  env.reset()
133
- traits = world_traits(env)
140
+ traits = _compute_world_traits(env)
134
141
  player = getattr(env, "_player", None)
135
142
  inventory = dict(getattr(player, "inventory", {})) if player else {}
136
143
  position = getattr(player, "pos", None)
@@ -151,10 +158,9 @@ class CrafterDataset:
151
158
 
152
159
  def _difficulty(self, traits: Dict[str, int]) -> str:
153
160
  for difficulty, bounds in TRAIT_BOUNDS.items():
154
- if (
155
- traits.get("trees", 0) >= bounds.get("min_trees", 0)
156
- and traits.get("hostiles", 0) <= bounds.get("max_hostiles", 0)
157
- ):
161
+ if traits.get("trees", 0) >= bounds.get("min_trees", 0) and traits.get(
162
+ "hostiles", 0
163
+ ) <= bounds.get("max_hostiles", 0):
158
164
  return difficulty
159
165
  return "custom"
160
166
 
@@ -163,6 +169,35 @@ class CrafterDataset:
163
169
  return [self.seed_min, self.seed_max]
164
170
 
165
171
 
172
+ def _compute_world_traits(env: "crafter.Env", radius: int = 10) -> Dict[str, int]:
173
+ # Local copy to avoid import-time issues; mirrors synth_ai.environments.examples.crafter_classic.taskset.world_traits
174
+ from crafter import objects as _objects # type: ignore
175
+ import numpy as _np # type: ignore
176
+
177
+ player = getattr(env, "_player", None)
178
+ if player is None:
179
+ return {"trees": 0, "cows": 0, "hostiles": 0}
180
+ pos = _np.array(getattr(player, "pos", [0, 0]))
181
+ counts = {"trees": 0, "cows": 0, "hostiles": 0}
182
+ world = getattr(env, "_world", None)
183
+ objects = getattr(world, "_objects", []) if world is not None else []
184
+ for obj in objects:
185
+ if obj is None or obj is player:
186
+ continue
187
+ try:
188
+ if _np.abs(getattr(obj, "pos") - pos).sum() > radius:
189
+ continue
190
+ except Exception:
191
+ continue
192
+ if isinstance(obj, _objects.Plant) and getattr(obj, "kind", "") == "tree":
193
+ counts["trees"] += 1
194
+ elif isinstance(obj, _objects.Cow):
195
+ counts["cows"] += 1
196
+ elif isinstance(obj, (_objects.Zombie, _objects.Skeleton)):
197
+ counts["hostiles"] += 1
198
+ return counts
199
+
200
+
166
201
  def env_value(key: str, default: Any) -> Any:
167
202
  import os
168
203
 
@@ -266,7 +301,9 @@ def describe_taskset(dataset: CrafterDataset) -> Dict[str, Any]:
266
301
  }
267
302
 
268
303
 
269
- def provide_task_instances(dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]) -> Iterable[TaskInfo]:
304
+ def provide_task_instances(
305
+ dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
306
+ ) -> Iterable[TaskInfo]:
270
307
  infos: list[TaskInfo] = []
271
308
  for seed_value in seeds:
272
309
  summary = dataset.describe_seed(seed_value)
@@ -315,6 +352,24 @@ def _normalise_op(op_value: Any, index: int) -> str:
315
352
 
316
353
 
317
354
  async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
355
+ # If hosted env service code is not bundled, return a no-op rollout response compatible with contracts
356
+ if not HAS_HOSTED:
357
+ return RolloutResponse(
358
+ run_id=request.run_id,
359
+ trajectories=[],
360
+ branches={},
361
+ metrics=RolloutMetrics(
362
+ episode_returns=[],
363
+ mean_return=0.0,
364
+ num_steps=0,
365
+ num_episodes=0,
366
+ details={},
367
+ ),
368
+ aborted=False,
369
+ ops_executed=0,
370
+ trace=None,
371
+ )
372
+
318
373
  converted_ops: List[str] = [_normalise_op(op, idx) for idx, op in enumerate(request.ops)]
319
374
  legacy_request = LegacyRolloutRequest(
320
375
  run_id=request.run_id,
@@ -338,7 +393,9 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
338
393
  synth_base_url=request.synth_base_url,
339
394
  )
340
395
 
341
- legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(legacy_request, fastapi_request)
396
+ legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
397
+ legacy_request, fastapi_request
398
+ )
342
399
  data = legacy_response.model_dump()
343
400
  metrics = data.get("metrics", {}) or {}
344
401
  metrics.setdefault("outcome_score", None)
@@ -352,11 +409,13 @@ def build_config() -> TaskAppConfig:
352
409
  registry, dataset = build_dataset()
353
410
  base_info = _base_task_info(dataset)
354
411
 
355
- hosted_task_app = HostedTaskApp()
412
+ hosted_task_app = HostedTaskApp() if HAS_HOSTED else None
356
413
 
357
414
  tracing_enabled = tracing_env_enabled()
358
415
  tracing_db_url = resolve_tracing_db_url()
359
- tracer_factory = build_tracer_factory(SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url)
416
+ tracer_factory = build_tracer_factory(
417
+ SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
418
+ )
360
419
  sft_output_dir = resolve_sft_output_dir()
361
420
 
362
421
  app_state: Dict[str, Any] = {
@@ -383,6 +442,8 @@ def build_config() -> TaskAppConfig:
383
442
  def _provide_instances(seeds: Sequence[int]):
384
443
  return provide_task_instances(dataset, base_info, seeds)
385
444
 
445
+ routers: tuple = (environment_router, policy_router, branching_router) if HAS_HOSTED else ()
446
+
386
447
  config = TaskAppConfig(
387
448
  app_id="grpo-crafter",
388
449
  name="GRPO Crafter Task App",
@@ -393,8 +454,10 @@ def build_config() -> TaskAppConfig:
393
454
  rollout=rollout_executor,
394
455
  dataset_registry=registry,
395
456
  rubrics=RubricBundle(outcome=OUTCOME_RUBRIC, events=EVENTS_RUBRIC),
396
- proxy=ProxyConfig(enable_openai=True, enable_groq=True, system_hint=CRAFTING_RULES_SYSTEM_HINT),
397
- routers=(environment_router, policy_router, branching_router),
457
+ proxy=ProxyConfig(
458
+ enable_openai=True, enable_groq=True, system_hint=CRAFTING_RULES_SYSTEM_HINT
459
+ ),
460
+ routers=routers,
398
461
  app_state=app_state,
399
462
  cors_origins=["*"],
400
463
  )
@@ -426,8 +489,8 @@ register_task_app(
426
489
  "crafter",
427
490
  ),
428
491
  extra_local_dirs=(
429
- (str(REPO_ROOT / 'synth_ai'), '/opt/synth_ai_repo/synth_ai'),
430
- (str(TASK_APP_ROOT), '/opt/synth_ai_repo/examples/warming_up_to_rl/task_app'),
492
+ (str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
493
+ (str(TASK_APP_ROOT), "/opt/synth_ai_repo/examples/warming_up_to_rl/task_app"),
431
494
  ),
432
495
  secret_names=("crafter-environment-sdk", "groq-api-key", "openai-api-key"),
433
496
  memory=16384,
@@ -1,9 +1,8 @@
1
-
2
1
  """Compatibility wrapper for the GRPO Crafter task app.
3
2
 
4
- This module now delegates to the shared TaskAppConfig defined in
5
- `synth_ai.task.apps.grpo_crafter`. It is kept for legacy usage (running the
6
- file directly or targeting `fastapi_app` from external tooling). Prefer using
3
+ This module now delegates to the TaskAppConfig defined in the colocated example at
4
+ `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
5
+ (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
7
6
  `uvx synth-ai serve grpo-crafter` for local development and testing.
8
7
  """
9
8
 
@@ -25,27 +24,10 @@ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
25
24
  APP_ID = "grpo-crafter"
26
25
 
27
26
 
28
- _BASE_CONFIG = build_config()
29
- TASK_APP_CONFIG = TaskAppConfig(
30
- app_id="grpo-crafter",
31
- name=_BASE_CONFIG.name,
32
- description=_BASE_CONFIG.description,
33
- base_task_info=_BASE_CONFIG.base_task_info,
34
- describe_taskset=_BASE_CONFIG.describe_taskset,
35
- provide_task_instances=_BASE_CONFIG.provide_task_instances,
36
- rollout=_BASE_CONFIG.rollout,
37
- dataset_registry=_BASE_CONFIG.dataset_registry,
38
- rubrics=_BASE_CONFIG.rubrics,
39
- proxy=_BASE_CONFIG.proxy,
40
- routers=_BASE_CONFIG.routers,
41
- middleware=_BASE_CONFIG.middleware,
42
- app_state=_BASE_CONFIG.app_state,
43
- require_api_key=_BASE_CONFIG.require_api_key,
44
- expose_debug_env=_BASE_CONFIG.expose_debug_env,
45
- cors_origins=_BASE_CONFIG.cors_origins,
46
- startup_hooks=_BASE_CONFIG.startup_hooks,
47
- shutdown_hooks=_BASE_CONFIG.shutdown_hooks,
48
- )
27
+ def _build_base_config() -> TaskAppConfig:
28
+ # Lazily construct the base config to avoid heavy work at import time
29
+ return build_config()
30
+
49
31
 
50
32
  try:
51
33
  _REGISTERED_ENTRY = registry.get(APP_ID)
@@ -59,8 +41,8 @@ else:
59
41
 
60
42
  def build_task_app_config() -> TaskAppConfig:
61
43
  """Return a fresh TaskAppConfig for this wrapper."""
62
-
63
- return TASK_APP_CONFIG.clone()
44
+ base = _build_base_config()
45
+ return base.clone()
64
46
 
65
47
 
66
48
  def fastapi_app():
@@ -58,9 +58,7 @@ async def validate_environment_observation(observation: Any, context: str) -> No
58
58
  "terminated",
59
59
  }
60
60
  if wordle_keys.issubset(set(observation.keys())):
61
- logger.info(
62
- f"🔍 ENV_ROUTES: Validating Wordle observation structure in {context}"
63
- )
61
+ logger.info(f"🔍 ENV_ROUTES: Validating Wordle observation structure in {context}")
64
62
  logger.info(f"🔍 ENV_ROUTES: Observation keys: {list(observation.keys())}")
65
63
 
66
64
  missing_keys = wordle_keys - set(observation.keys())
@@ -278,9 +276,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
278
276
  WordleEnvironment,
279
277
  )
280
278
  except Exception as e:
281
- raise HTTPException(
282
- status_code=500, detail=f"Wordle modules unavailable: {e}"
283
- )
279
+ raise HTTPException(status_code=500, detail=f"Wordle modules unavailable: {e}")
284
280
 
285
281
  # Lazy import of wrapper within branch
286
282
  try:
@@ -288,9 +284,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
288
284
  WordleEnvironmentWrapper as _WordleWrapper,
289
285
  )
290
286
  except Exception as e:
291
- raise HTTPException(
292
- status_code=500, detail=f"Wordle wrapper unavailable: {e}"
293
- )
287
+ raise HTTPException(status_code=500, detail=f"Wordle wrapper unavailable: {e}")
294
288
 
295
289
  cfg = request.config or {}
296
290
  word_length = int(cfg.get("word_length", 5))
@@ -307,12 +301,8 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
307
301
  )
308
302
  instance = WordleTaskInstance(
309
303
  id=uuid4(),
310
- impetus=Impetus(
311
- instructions="Play Wordle. Submit one 5-letter word per turn."
312
- ),
313
- intent=Intent(
314
- rubric="guess the word", gold_trajectories=None, gold_state_diff={}
315
- ),
304
+ impetus=Impetus(instructions="Play Wordle. Submit one 5-letter word per turn."),
305
+ intent=Intent(rubric="guess the word", gold_trajectories=None, gold_state_diff={}),
316
306
  metadata=md,
317
307
  is_reproducible=True,
318
308
  initial_engine_snapshot=None,
@@ -345,9 +335,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
345
335
  if key in observation_for_registry:
346
336
  del observation_for_registry[key]
347
337
 
348
- await validate_environment_observation(
349
- observation_for_registry, "initialize"
350
- )
338
+ await validate_environment_observation(observation_for_registry, "initialize")
351
339
 
352
340
  env_id = registry.register_env(
353
341
  env=wrapper,
@@ -376,9 +364,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
376
364
  SokobanEnvironment,
377
365
  )
378
366
  except Exception as e:
379
- raise HTTPException(
380
- status_code=500, detail=f"Sokoban modules unavailable: {e}"
381
- )
367
+ raise HTTPException(status_code=500, detail=f"Sokoban modules unavailable: {e}")
382
368
 
383
369
  # Lazy import of wrapper within branch
384
370
  try:
@@ -386,9 +372,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
386
372
  SokobanEnvironmentWrapper as _SokobanWrapper,
387
373
  )
388
374
  except Exception as e:
389
- raise HTTPException(
390
- status_code=500, detail=f"Sokoban wrapper unavailable: {e}"
391
- )
375
+ raise HTTPException(status_code=500, detail=f"Sokoban wrapper unavailable: {e}")
392
376
 
393
377
  cfg = request.config or {}
394
378
  difficulty = cfg.get("difficulty", "easy")
@@ -411,9 +395,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
411
395
  )
412
396
  base_env = SokobanEnvironment(task_instance=instance)
413
397
 
414
- wrapper = _SokobanWrapper(
415
- env=base_env, seed=request.seed, config=cfg
416
- )
398
+ wrapper = _SokobanWrapper(env=base_env, seed=request.seed, config=cfg)
417
399
  result = await wrapper.initialize()
418
400
 
419
401
  # Handle the observation structure consistently for Sokoban
@@ -453,9 +435,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
453
435
  MathEnvironmentWrapper as _MathWrapper,
454
436
  )
455
437
  except Exception as e:
456
- raise HTTPException(
457
- status_code=500, detail=f"Math wrapper unavailable: {e}"
458
- )
438
+ raise HTTPException(status_code=500, detail=f"Math wrapper unavailable: {e}")
459
439
 
460
440
  wrapper = _MathWrapper(
461
441
  seed=request.seed,
@@ -464,7 +444,11 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
464
444
  )
465
445
  result = await wrapper.initialize()
466
446
 
467
- observation_for_registry = result["observation"].copy() if isinstance(result, dict) and "observation" in result else result.copy()
447
+ observation_for_registry = (
448
+ result["observation"].copy()
449
+ if isinstance(result, dict) and "observation" in result
450
+ else result.copy()
451
+ )
468
452
  for key in ["step_idx", "info"]:
469
453
  if key in observation_for_registry:
470
454
  del observation_for_registry[key]
@@ -509,7 +493,9 @@ async def compat_initialize(payload: dict) -> EnvCreateResponse:
509
493
  difficulty = str(wc.get("difficulty"))
510
494
  elif isinstance(cfg, dict) and cfg.get("difficulty"):
511
495
  difficulty = str(cfg.get("difficulty"))
512
- req = EnvCreateRequest(env_name="crafter", config={"difficulty": difficulty}, seed=seed, rl_run_id="eval")
496
+ req = EnvCreateRequest(
497
+ env_name="crafter", config={"difficulty": difficulty}, seed=seed, rl_run_id="eval"
498
+ )
513
499
  return await create_environment(req)
514
500
 
515
501
 
@@ -525,10 +511,12 @@ async def compat_step(payload: dict) -> EnvStepResponse:
525
511
  actions_list = action.get("actions") if isinstance(action, dict) else None
526
512
  if isinstance(actions_list, list) and actions_list:
527
513
  for a in actions_list:
528
- tool_calls.append({
529
- "tool": "interact",
530
- "args": {"action": a},
531
- })
514
+ tool_calls.append(
515
+ {
516
+ "tool": "interact",
517
+ "args": {"action": a},
518
+ }
519
+ )
532
520
  req = EnvStepRequest(env_id=env_id, tool_calls=tool_calls)
533
521
  return await step_environment(req)
534
522
 
@@ -545,9 +533,7 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
545
533
  """Reset an environment to its initial state."""
546
534
  handle = registry.get_env(request.env_id)
547
535
  if not handle:
548
- raise HTTPException(
549
- status_code=404, detail=f"Environment {request.env_id} not found"
550
- )
536
+ raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
551
537
 
552
538
  try:
553
539
  # Determine wrapper type and rebuild base env if a new seed is provided
@@ -606,9 +592,7 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
606
592
  WordleEnvironment,
607
593
  )
608
594
  except Exception as e:
609
- raise HTTPException(
610
- status_code=500, detail=f"Wordle modules unavailable: {e}"
611
- )
595
+ raise HTTPException(status_code=500, detail=f"Wordle modules unavailable: {e}")
612
596
 
613
597
  init_snap = getattr(wrapper, "initial_engine_snapshot", None)
614
598
  if init_snap is not None:
@@ -655,9 +639,7 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
655
639
  WordleEnvironment,
656
640
  )
657
641
  except Exception as e:
658
- raise HTTPException(
659
- status_code=500, detail=f"Wordle modules unavailable: {e}"
660
- )
642
+ raise HTTPException(status_code=500, detail=f"Wordle modules unavailable: {e}")
661
643
 
662
644
  init_snap = getattr(wrapper, "initial_engine_snapshot", None)
663
645
  if init_snap is not None:
@@ -711,9 +693,7 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
711
693
  SokobanEnvironment,
712
694
  )
713
695
  except Exception as e:
714
- raise HTTPException(
715
- status_code=500, detail=f"Sokoban modules unavailable: {e}"
716
- )
696
+ raise HTTPException(status_code=500, detail=f"Sokoban modules unavailable: {e}")
717
697
 
718
698
  cfg = dict(wrapper.config or {})
719
699
  metadata = SokobanTaskInstanceMetadata(
@@ -746,9 +726,7 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
746
726
  SokobanEnvironment,
747
727
  )
748
728
  except Exception as e:
749
- raise HTTPException(
750
- status_code=500, detail=f"Sokoban modules unavailable: {e}"
751
- )
729
+ raise HTTPException(status_code=500, detail=f"Sokoban modules unavailable: {e}")
752
730
 
753
731
  cfg = dict(wrapper.config or {})
754
732
  metadata = SokobanTaskInstanceMetadata(
@@ -757,9 +735,7 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
757
735
  instance = SokobanTaskInstance(
758
736
  id=uuid4(),
759
737
  impetus=Impetus(instructions="Reset"),
760
- intent=Intent(
761
- rubric={"goal": "Reset"}, gold_trajectories=None, gold_state_diff={}
762
- ),
738
+ intent=Intent(rubric={"goal": "Reset"}, gold_trajectories=None, gold_state_diff={}),
763
739
  metadata=metadata,
764
740
  is_reproducible=True,
765
741
  initial_engine_snapshot=cfg.get("initial_state"),
@@ -818,9 +794,7 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
818
794
  """Execute a step in the environment."""
819
795
  handle = registry.get_env(request.env_id)
820
796
  if not handle:
821
- raise HTTPException(
822
- status_code=404, detail=f"Environment {request.env_id} not found"
823
- )
797
+ raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
824
798
 
825
799
  try:
826
800
  # Execute the step, pre-normalizing invalid Wordle guesses to avoid hard failures
@@ -836,12 +810,7 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
836
810
  expected_len = int(getattr(wrapper, "word_length", 5))
837
811
  normalized: List[Dict[str, Any]] = []
838
812
  for tc in request.tool_calls or []:
839
- tool = (
840
- tc.get("tool")
841
- or tc.get("tool_name")
842
- or tc.get("name")
843
- or "interact"
844
- )
813
+ tool = tc.get("tool") or tc.get("tool_name") or tc.get("name") or "interact"
845
814
  args = tc.get("arguments") or tc.get("args") or {}
846
815
  if isinstance(args, str):
847
816
  try:
@@ -861,9 +830,7 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
861
830
  # Preserve the original tool name (interact or submit) for the environment to handle
862
831
  normalized.append({"tool": tool, "args": {"guess": g}})
863
832
  else:
864
- normalized.append(
865
- {"tool": "invalid_guess", "args": {"original_guess": guess}}
866
- )
833
+ normalized.append({"tool": "invalid_guess", "args": {"original_guess": guess}})
867
834
  result = await wrapper.step(normalized)
868
835
  else:
869
836
  result = await handle.env.step(request.tool_calls)
@@ -922,12 +889,7 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
922
889
  expected_len = int(getattr(wrapper, "word_length", 5))
923
890
  normalized: List[Dict[str, Any]] = []
924
891
  for tc in request.tool_calls or []:
925
- tool = (
926
- tc.get("tool")
927
- or tc.get("tool_name")
928
- or tc.get("name")
929
- or "interact"
930
- )
892
+ tool = tc.get("tool") or tc.get("tool_name") or tc.get("name") or "interact"
931
893
  args = tc.get("arguments") or tc.get("args") or {}
932
894
  if isinstance(args, str):
933
895
  try:
@@ -947,9 +909,7 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
947
909
  }
948
910
  )
949
911
  else:
950
- normalized.append(
951
- {"tool": "interact", "args": {"guess": g}}
952
- )
912
+ normalized.append({"tool": "interact", "args": {"guess": g}})
953
913
  else:
954
914
  normalized.append(
955
915
  {"tool": "invalid_guess", "args": {"original_guess": guess}}
@@ -989,9 +949,7 @@ async def snapshot_environment(request: EnvSnapshotRequest) -> EnvSnapshotRespon
989
949
  """Create a snapshot of the environment state."""
990
950
  handle = registry.get_env(request.env_id)
991
951
  if not handle:
992
- raise HTTPException(
993
- status_code=404, detail=f"Environment {request.env_id} not found"
994
- )
952
+ raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
995
953
 
996
954
  try:
997
955
  # Serialize environment state
@@ -1030,9 +988,7 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
1030
988
  """Restore an environment from a snapshot."""
1031
989
  snapshot = registry.get_snapshot(request.snapshot_id)
1032
990
  if not snapshot:
1033
- raise HTTPException(
1034
- status_code=404, detail=f"Snapshot {request.snapshot_id} not found"
1035
- )
991
+ raise HTTPException(status_code=404, detail=f"Snapshot {request.snapshot_id} not found")
1036
992
 
1037
993
  if snapshot.kind != "env":
1038
994
  raise HTTPException(
@@ -1113,9 +1069,7 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
1113
1069
  WordleEnvironment,
1114
1070
  )
1115
1071
  except Exception as e:
1116
- raise HTTPException(
1117
- status_code=500, detail=f"Wordle modules unavailable: {e}"
1118
- )
1072
+ raise HTTPException(status_code=500, detail=f"Wordle modules unavailable: {e}")
1119
1073
 
1120
1074
  cfg = state_dict.get("config", {}) or {}
1121
1075
  word_length = int(cfg.get("word_length", 5))
@@ -1150,12 +1104,8 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
1150
1104
  WordleEnvironmentWrapper as _WordleWrapper,
1151
1105
  )
1152
1106
  except Exception as e:
1153
- raise HTTPException(
1154
- status_code=500, detail=f"Wordle wrapper unavailable: {e}"
1155
- )
1156
- wrapper = await _WordleWrapper.deserialize(
1157
- payload=state_dict, env=base_env
1158
- )
1107
+ raise HTTPException(status_code=500, detail=f"Wordle wrapper unavailable: {e}")
1108
+ wrapper = await _WordleWrapper.deserialize(payload=state_dict, env=base_env)
1159
1109
 
1160
1110
  env_id = registry.register_env(
1161
1111
  env=wrapper,
@@ -1184,14 +1134,10 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
1184
1134
  SokobanEnvironment,
1185
1135
  )
1186
1136
  except Exception as e:
1187
- raise HTTPException(
1188
- status_code=500, detail=f"Sokoban modules unavailable: {e}"
1189
- )
1137
+ raise HTTPException(status_code=500, detail=f"Sokoban modules unavailable: {e}")
1190
1138
 
1191
1139
  cfg = state_dict.get("config", {}) or {}
1192
- metadata = SokobanTaskInstanceMetadata(
1193
- difficulty=cfg.get("difficulty", "easy")
1194
- )
1140
+ metadata = SokobanTaskInstanceMetadata(difficulty=cfg.get("difficulty", "easy"))
1195
1141
  instance = SokobanTaskInstance(
1196
1142
  id=uuid4(),
1197
1143
  impetus=Impetus(instructions="Restore"),
@@ -1211,12 +1157,8 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
1211
1157
  SokobanEnvironmentWrapper as _SokobanWrapper,
1212
1158
  )
1213
1159
  except Exception as e:
1214
- raise HTTPException(
1215
- status_code=500, detail=f"Sokoban wrapper unavailable: {e}"
1216
- )
1217
- wrapper = await _SokobanWrapper.deserialize(
1218
- payload=state_dict, env=base_env
1219
- )
1160
+ raise HTTPException(status_code=500, detail=f"Sokoban wrapper unavailable: {e}")
1161
+ wrapper = await _SokobanWrapper.deserialize(payload=state_dict, env=base_env)
1220
1162
 
1221
1163
  env_id = registry.register_env(
1222
1164
  env=wrapper,
@@ -1242,9 +1184,7 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
1242
1184
  )
1243
1185
 
1244
1186
  except Exception as e:
1245
- logger.error(
1246
- f"Failed to restore environment from snapshot {request.snapshot_id}: {e}"
1247
- )
1187
+ logger.error(f"Failed to restore environment from snapshot {request.snapshot_id}: {e}")
1248
1188
  raise HTTPException(status_code=500, detail=str(e))
1249
1189
 
1250
1190
 
@@ -1253,9 +1193,7 @@ async def terminate_environment(request: EnvTerminateRequest) -> EnvTerminateRes
1253
1193
  """Terminate an environment and clean up resources."""
1254
1194
  handle = registry.get_env(request.env_id)
1255
1195
  if not handle:
1256
- raise HTTPException(
1257
- status_code=404, detail=f"Environment {request.env_id} not found"
1258
- )
1196
+ raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
1259
1197
 
1260
1198
  try:
1261
1199
  # Call terminate on the environment
@@ -1 +1 @@
1
- """Environment implementations."""
1
+ """Environment implementations."""
@@ -3,4 +3,4 @@
3
3
  from .environment import CrafterEnvironmentWrapper
4
4
  from .policy import CrafterPolicy
5
5
 
6
- __all__ = ["CrafterEnvironmentWrapper", "CrafterPolicy"]
6
+ __all__ = ["CrafterEnvironmentWrapper", "CrafterPolicy"]
@@ -1 +1 @@
1
- # wraps hosted app
1
+ # wraps hosted app