synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.2.0.dist-info/METADATA +0 -36
- synth_ai-0.2.0.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,57 @@
|
|
1
|
+
"""
|
2
|
+
External environment registry support.
|
3
|
+
|
4
|
+
This module provides functionality to register environments from external packages.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import importlib
|
8
|
+
import logging
|
9
|
+
from typing import List, Dict
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
class ExternalRegistryConfig:
|
15
|
+
"""Configuration for external environment registries."""
|
16
|
+
|
17
|
+
def __init__(self, external_environments: List[Dict[str, str]] = None):
|
18
|
+
self.external_environments = external_environments or []
|
19
|
+
|
20
|
+
|
21
|
+
def load_external_environments(config: ExternalRegistryConfig):
|
22
|
+
"""
|
23
|
+
Load and register environments from external packages.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
config: Configuration specifying external environment sources
|
27
|
+
"""
|
28
|
+
for env_config in config.external_environments:
|
29
|
+
module_name = env_config.get("module")
|
30
|
+
function_name = env_config.get("function", "integrate_with_environments_service")
|
31
|
+
|
32
|
+
if not module_name:
|
33
|
+
logger.warning("External environment config missing 'module' field")
|
34
|
+
continue
|
35
|
+
|
36
|
+
try:
|
37
|
+
# Import the module
|
38
|
+
module = importlib.import_module(module_name)
|
39
|
+
|
40
|
+
# Get the registration function
|
41
|
+
if hasattr(module, function_name):
|
42
|
+
register_func = getattr(module, function_name)
|
43
|
+
register_func()
|
44
|
+
logger.info(f"Successfully loaded environments from {module_name}")
|
45
|
+
else:
|
46
|
+
logger.warning(f"Module {module_name} does not have function {function_name}")
|
47
|
+
|
48
|
+
except ImportError as e:
|
49
|
+
logger.error(f"Failed to import module {module_name}: {e}")
|
50
|
+
except Exception as e:
|
51
|
+
logger.error(f"Error loading environments from {module_name}: {e}")
|
52
|
+
|
53
|
+
|
54
|
+
__all__ = [
|
55
|
+
"ExternalRegistryConfig",
|
56
|
+
"load_external_environments",
|
57
|
+
]
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# This file re-exports the actual registry functions from synth_ai.environments.environment.registry
|
2
|
+
# to be used by the service layer, maintaining a clean separation if needed.
|
3
|
+
from synth_ai.environments.environment.registry import (
|
4
|
+
register_environment,
|
5
|
+
get_environment_cls,
|
6
|
+
list_supported_env_types,
|
7
|
+
)
|
8
|
+
|
9
|
+
__all__ = ["register_environment", "get_environment_cls", "list_supported_env_types"]
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Stateful environment components."""
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from abc import abstractmethod
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from synth_ai.environments.environment.shared_engine import Engine, InternalObservation
|
5
|
+
from synth_ai.environments.environment.tools import EnvToolCall
|
6
|
+
|
7
|
+
|
8
|
+
class StatefulEnvironment(Engine):
|
9
|
+
@abstractmethod
|
10
|
+
async def initialize(self) -> InternalObservation:
|
11
|
+
pass
|
12
|
+
|
13
|
+
@abstractmethod
|
14
|
+
async def terminate(self) -> InternalObservation:
|
15
|
+
pass
|
16
|
+
|
17
|
+
# main external api
|
18
|
+
@abstractmethod
|
19
|
+
def validate_tool_calls(self, tool_calls: EnvToolCall):
|
20
|
+
pass
|
21
|
+
|
22
|
+
@abstractmethod
|
23
|
+
async def step(self, tool_calls: List[EnvToolCall]) -> InternalObservation:
|
24
|
+
pass
|
25
|
+
|
26
|
+
@abstractmethod
|
27
|
+
async def checkpoint(self) -> InternalObservation:
|
28
|
+
pass
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from synth_ai.environments.environment.shared_engine import Engine
|
2
|
+
from typing import TypeVar
|
3
|
+
|
4
|
+
|
5
|
+
SnapshotType = TypeVar("SnapshotType", bound="StatefulEngineSnapshot")
|
6
|
+
|
7
|
+
|
8
|
+
class StatefulEngineSnapshot:
|
9
|
+
pass
|
10
|
+
|
11
|
+
|
12
|
+
class StatefulEngine(Engine):
|
13
|
+
async def serialize(self):
|
14
|
+
pass
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
async def deserialize(self, engine_snapshot: StatefulEngineSnapshot):
|
18
|
+
pass
|
19
|
+
|
20
|
+
async def _step_engine(self):
|
21
|
+
pass
|
@@ -0,0 +1,19 @@
|
|
1
|
+
from synth_ai.environments.tasks.core import (
|
2
|
+
Task,
|
3
|
+
TaskInstance,
|
4
|
+
TaskInstanceSet,
|
5
|
+
TaskInstanceMetadata,
|
6
|
+
SplitInfo,
|
7
|
+
Impetus,
|
8
|
+
Intent,
|
9
|
+
)
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
"Task",
|
13
|
+
"TaskInstance",
|
14
|
+
"TaskInstanceSet",
|
15
|
+
"TaskInstanceMetadata",
|
16
|
+
"SplitInfo",
|
17
|
+
"Impetus",
|
18
|
+
"Intent",
|
19
|
+
]
|
@@ -0,0 +1,78 @@
|
|
1
|
+
from typing import Optional, Dict, List, Callable, Set, Any
|
2
|
+
from synth_ai.environments.v0_observability.history import SynthGlobalTrajectory
|
3
|
+
from uuid import UUID
|
4
|
+
from abc import abstractmethod
|
5
|
+
from dataclasses import dataclass, field
|
6
|
+
|
7
|
+
|
8
|
+
@dataclass
|
9
|
+
class Task:
|
10
|
+
global_premises: str
|
11
|
+
global_constraints: str
|
12
|
+
global_objectives: str
|
13
|
+
|
14
|
+
shared_env_params: Optional[Dict]
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class TaskInstanceMetadata:
|
19
|
+
pass
|
20
|
+
|
21
|
+
|
22
|
+
@dataclass
|
23
|
+
class Intent:
|
24
|
+
rubric: Dict[str, Any]
|
25
|
+
gold_trajectories: Optional[SynthGlobalTrajectory]
|
26
|
+
gold_state_diff: Dict
|
27
|
+
deterministic_eval_functions: List[Callable] = field(default_factory=list)
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class Impetus:
|
32
|
+
instructions: str
|
33
|
+
|
34
|
+
# ?
|
35
|
+
|
36
|
+
|
37
|
+
@dataclass
|
38
|
+
class TaskInstance:
|
39
|
+
id: UUID
|
40
|
+
impetus: Impetus
|
41
|
+
intent: Intent
|
42
|
+
metadata: TaskInstanceMetadata
|
43
|
+
is_reproducible: bool
|
44
|
+
initial_engine_snapshot: Optional["StatefulEngineSnapshot"]
|
45
|
+
|
46
|
+
@abstractmethod
|
47
|
+
async def serialize(self) -> Dict:
|
48
|
+
pass
|
49
|
+
|
50
|
+
@abstractmethod
|
51
|
+
async def deserialize(self) -> "TaskInstance":
|
52
|
+
pass
|
53
|
+
|
54
|
+
|
55
|
+
@dataclass
|
56
|
+
class TaskInstanceMetadataFilter:
|
57
|
+
@abstractmethod
|
58
|
+
def __call__(
|
59
|
+
self, instance: TaskInstance
|
60
|
+
) -> bool: # Use Any temporarily for broader compatibility
|
61
|
+
# Using Any avoids strict dependency on AgentStatefulTaskInstance here
|
62
|
+
# Subclasses like MetadataFilter in helpers.py can specify the type.
|
63
|
+
"""Return True if the instance passes the filter."""
|
64
|
+
|
65
|
+
|
66
|
+
@dataclass
|
67
|
+
class SplitInfo:
|
68
|
+
val_instance_ids: Set[str]
|
69
|
+
test_instance_ids: Set[str]
|
70
|
+
_is_split_defined: bool
|
71
|
+
|
72
|
+
|
73
|
+
@dataclass
|
74
|
+
class TaskInstanceSet:
|
75
|
+
name: str
|
76
|
+
description: str
|
77
|
+
instances: List[TaskInstance]
|
78
|
+
split_info: SplitInfo
|
@@ -0,0 +1,39 @@
|
|
1
|
+
from typing import Any, Collection, Optional
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from synth_ai.environments.tasks.core import TaskInstanceMetadataFilter, TaskInstance
|
4
|
+
|
5
|
+
|
6
|
+
@dataclass
|
7
|
+
class ValueFilter(TaskInstanceMetadataFilter):
|
8
|
+
key: str
|
9
|
+
values: Collection[Any]
|
10
|
+
|
11
|
+
def __call__(self, instance: TaskInstance) -> bool:
|
12
|
+
instance_value = getattr(instance.metadata, self.key, None)
|
13
|
+
if instance_value is None:
|
14
|
+
return False
|
15
|
+
return instance_value in self.values
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class RangeFilter(TaskInstanceMetadataFilter):
|
20
|
+
key: str
|
21
|
+
min_val: Optional[float] = None
|
22
|
+
max_val: Optional[float] = None
|
23
|
+
|
24
|
+
def __call__(self, instance: TaskInstance) -> bool:
|
25
|
+
instance_value = getattr(instance.metadata, self.key, None)
|
26
|
+
if instance_value is None:
|
27
|
+
# If the attribute doesn't exist on the metadata, it can't be in range.
|
28
|
+
return False
|
29
|
+
|
30
|
+
if not isinstance(instance_value, (int, float)):
|
31
|
+
# If the attribute is not a number, it can't be in a numerical range.
|
32
|
+
# Or, we could raise an error, depending on desired strictness.
|
33
|
+
return False
|
34
|
+
|
35
|
+
if self.min_val is not None and instance_value < self.min_val:
|
36
|
+
return False
|
37
|
+
if self.max_val is not None and instance_value > self.max_val:
|
38
|
+
return False
|
39
|
+
return True
|
@@ -0,0 +1,89 @@
|
|
1
|
+
"""
|
2
|
+
Utility functions and generic filters for taskset creation.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Any, Collection, Optional, List, Set
|
6
|
+
from uuid import UUID, uuid4
|
7
|
+
from synth_ai.environments.tasks.core import (
|
8
|
+
TaskInstanceMetadataFilter,
|
9
|
+
TaskInstanceSet,
|
10
|
+
SplitInfo,
|
11
|
+
TaskInstance,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
def parse_or_new_uuid(raw_id: Optional[str]) -> UUID:
|
16
|
+
"""
|
17
|
+
Parse a raw ID string into a UUID, or generate a new one if invalid or missing.
|
18
|
+
"""
|
19
|
+
try:
|
20
|
+
return UUID(raw_id) # type: ignore[arg-type]
|
21
|
+
except Exception:
|
22
|
+
return uuid4()
|
23
|
+
|
24
|
+
|
25
|
+
class ValueFilter(TaskInstanceMetadataFilter):
|
26
|
+
"""
|
27
|
+
Filter TaskInstances by exact match of a metadata attribute.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self, key: str, values: Collection[Any]):
|
31
|
+
self.key = key
|
32
|
+
self.values = set(values)
|
33
|
+
|
34
|
+
def __call__(self, instance: TaskInstance) -> bool:
|
35
|
+
return getattr(instance.metadata, self.key, None) in self.values
|
36
|
+
|
37
|
+
|
38
|
+
class RangeFilter(TaskInstanceMetadataFilter):
|
39
|
+
"""
|
40
|
+
Filter TaskInstances where a numeric metadata attribute falls within [min_value, max_value].
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(
|
44
|
+
self,
|
45
|
+
key: str,
|
46
|
+
min_value: Optional[float] = None,
|
47
|
+
max_value: Optional[float] = None,
|
48
|
+
):
|
49
|
+
self.key = key
|
50
|
+
self.min_value = min_value
|
51
|
+
self.max_value = max_value
|
52
|
+
|
53
|
+
def __call__(self, instance: TaskInstance) -> bool:
|
54
|
+
value = getattr(instance.metadata, self.key, None)
|
55
|
+
if self.min_value is not None and (value is None or value < self.min_value):
|
56
|
+
return False
|
57
|
+
if self.max_value is not None and (value is None or value > self.max_value):
|
58
|
+
return False
|
59
|
+
return True
|
60
|
+
|
61
|
+
|
62
|
+
def make_taskset(
|
63
|
+
name: str,
|
64
|
+
description: str,
|
65
|
+
instances: List[TaskInstance],
|
66
|
+
val_filter: Optional[TaskInstanceMetadataFilter] = None,
|
67
|
+
test_filter: Optional[TaskInstanceMetadataFilter] = None,
|
68
|
+
) -> TaskInstanceSet:
|
69
|
+
"""
|
70
|
+
Assemble a TaskInstanceSet by applying optional validation and test filters.
|
71
|
+
"""
|
72
|
+
val_ids: Set[Any] = set()
|
73
|
+
test_ids: Set[Any] = set()
|
74
|
+
if val_filter:
|
75
|
+
val_ids = {inst.id for inst in instances if val_filter(inst)}
|
76
|
+
if test_filter:
|
77
|
+
test_ids = {inst.id for inst in instances if test_filter(inst)}
|
78
|
+
is_defined = val_filter is not None or test_filter is not None
|
79
|
+
split_info = SplitInfo(
|
80
|
+
val_instance_ids=val_ids,
|
81
|
+
test_instance_ids=test_ids,
|
82
|
+
_is_split_defined=is_defined,
|
83
|
+
)
|
84
|
+
return TaskInstanceSet(
|
85
|
+
name=name,
|
86
|
+
description=description,
|
87
|
+
instances=instances,
|
88
|
+
split_info=split_info,
|
89
|
+
)
|
@@ -0,0 +1 @@
|
|
1
|
+
DISKCACHE_SIZE_LIMIT = 10 * 1024 * 1024 * 1024 # 10GB
|
@@ -5,8 +5,8 @@ from typing import Optional, Union
|
|
5
5
|
from diskcache import Cache
|
6
6
|
from pydantic import BaseModel
|
7
7
|
|
8
|
-
from synth_ai.
|
9
|
-
from synth_ai.
|
8
|
+
from synth_ai.lm.caching.constants import DISKCACHE_SIZE_LIMIT
|
9
|
+
from synth_ai.lm.vendors.base import BaseLMResponse
|
10
10
|
|
11
11
|
|
12
12
|
@dataclass
|
@@ -27,9 +27,7 @@ class EphemeralCache:
|
|
27
27
|
return None
|
28
28
|
|
29
29
|
if not isinstance(cache_data, dict):
|
30
|
-
return BaseLMResponse(
|
31
|
-
raw_response=cache_data, structured_output=None, tool_calls=None
|
32
|
-
)
|
30
|
+
return BaseLMResponse(raw_response=cache_data, structured_output=None, tool_calls=None)
|
33
31
|
|
34
32
|
raw_response = cache_data.get("raw_response")
|
35
33
|
tool_calls = cache_data.get("tool_calls")
|
@@ -54,9 +52,7 @@ class EphemeralCache:
|
|
54
52
|
"raw_response": response.raw_response
|
55
53
|
if response.raw_response is not None
|
56
54
|
else None,
|
57
|
-
"tool_calls": response.tool_calls
|
58
|
-
if response.tool_calls is not None
|
59
|
-
else None,
|
55
|
+
"tool_calls": response.tool_calls if response.tool_calls is not None else None,
|
60
56
|
"structured_output": (
|
61
57
|
response.structured_output.model_dump()
|
62
58
|
if response.structured_output is not None
|
@@ -3,17 +3,19 @@ from typing import Any, Dict, List, Optional, Type
|
|
3
3
|
|
4
4
|
from pydantic import BaseModel
|
5
5
|
|
6
|
-
from synth_ai.
|
7
|
-
from synth_ai.
|
8
|
-
from synth_ai.
|
9
|
-
from synth_ai.
|
6
|
+
from synth_ai.lm.caching.ephemeral import EphemeralCache
|
7
|
+
from synth_ai.lm.caching.persistent import PersistentCache
|
8
|
+
from synth_ai.lm.tools.base import BaseTool
|
9
|
+
from synth_ai.lm.vendors.base import BaseLMResponse
|
10
10
|
|
11
11
|
persistent_cache = PersistentCache()
|
12
12
|
ephemeral_cache = EphemeralCache()
|
13
13
|
|
14
14
|
import logging
|
15
|
+
|
15
16
|
logger = logging.getLogger(__name__)
|
16
17
|
|
18
|
+
|
17
19
|
def map_params_to_key(
|
18
20
|
messages: List[Dict],
|
19
21
|
model: str,
|
@@ -30,7 +32,7 @@ def map_params_to_key(
|
|
30
32
|
normalized_messages = "".join([msg["content"] for msg in messages])
|
31
33
|
normalized_model = model
|
32
34
|
normalized_temperature = f"{temperature:.2f}"[:4]
|
33
|
-
normalized_response_model = str(response_model.
|
35
|
+
normalized_response_model = str(response_model.model_json_schema()) if response_model else ""
|
34
36
|
normalized_reasoning_effort = reasoning_effort if reasoning_effort else ""
|
35
37
|
|
36
38
|
# Normalize tools if present
|
@@ -44,10 +46,10 @@ def map_params_to_key(
|
|
44
46
|
"arguments": tool.arguments.schema(),
|
45
47
|
}
|
46
48
|
tool_schemas.append(str(tool_schema))
|
47
|
-
#logger.error(f"Tool schemas: {tool_schemas}")
|
49
|
+
# logger.error(f"Tool schemas: {tool_schemas}")
|
48
50
|
normalized_tools = "".join(tool_schemas)
|
49
51
|
elif tools:
|
50
|
-
#logger.error(f"Tools: {tools}")
|
52
|
+
# logger.error(f"Tools: {tools}")
|
51
53
|
normalized_tools = "".join([str(tool) for tool in tools])
|
52
54
|
|
53
55
|
key_str = ""
|
@@ -57,30 +59,28 @@ def map_params_to_key(
|
|
57
59
|
normalized_temperature,
|
58
60
|
normalized_response_model,
|
59
61
|
normalized_tools,
|
60
|
-
normalized_reasoning_effort
|
62
|
+
normalized_reasoning_effort,
|
61
63
|
]
|
62
64
|
for component in components:
|
63
65
|
if component:
|
64
66
|
key_str += str(component)
|
65
67
|
|
66
|
-
return hashlib.sha256(key_str.encode()).hexdigest()
|
68
|
+
return hashlib.sha256(key_str.encode()).hexdigest()
|
67
69
|
|
68
70
|
|
69
71
|
class CacheHandler:
|
70
72
|
use_persistent_store: bool = False
|
71
73
|
use_ephemeral_store: bool = True
|
72
74
|
|
73
|
-
def __init__(
|
74
|
-
self, use_persistent_store: bool = False, use_ephemeral_store: bool = True
|
75
|
-
):
|
75
|
+
def __init__(self, use_persistent_store: bool = False, use_ephemeral_store: bool = True):
|
76
76
|
self.use_persistent_store = use_persistent_store
|
77
77
|
self.use_ephemeral_store = use_ephemeral_store
|
78
78
|
|
79
79
|
def _validate_messages(self, messages: List[Dict[str, Any]]) -> None:
|
80
80
|
"""Validate that messages are in the correct format."""
|
81
|
-
assert all(
|
82
|
-
|
83
|
-
)
|
81
|
+
assert all([type(msg["content"]) == str for msg in messages]), (
|
82
|
+
"All message contents must be strings"
|
83
|
+
)
|
84
84
|
|
85
85
|
def hit_managed_cache(
|
86
86
|
self,
|
@@ -1,9 +1,7 @@
|
|
1
|
-
from synth_ai.
|
1
|
+
from synth_ai.lm.caching.handler import CacheHandler
|
2
2
|
|
3
3
|
cache_handler = CacheHandler(use_ephemeral_store=True, use_persistent_store=True)
|
4
|
-
ephemeral_cache_handler = CacheHandler(
|
5
|
-
use_ephemeral_store=True, use_persistent_store=False
|
6
|
-
)
|
4
|
+
ephemeral_cache_handler = CacheHandler(use_ephemeral_store=True, use_persistent_store=False)
|
7
5
|
|
8
6
|
|
9
7
|
def get_cache_handler(use_ephemeral_cache_only: bool = False):
|
@@ -6,7 +6,7 @@ from typing import Optional, Type, Union
|
|
6
6
|
|
7
7
|
from pydantic import BaseModel
|
8
8
|
|
9
|
-
from synth_ai.
|
9
|
+
from synth_ai.lm.vendors.base import BaseLMResponse
|
10
10
|
|
11
11
|
|
12
12
|
@dataclass
|
@@ -31,14 +31,10 @@ class PersistentCache:
|
|
31
31
|
cache_data = json.loads(result[0])
|
32
32
|
except json.JSONDecodeError:
|
33
33
|
# Handle legacy string responses
|
34
|
-
return BaseLMResponse(
|
35
|
-
raw_response=result[0], structured_output=None, tool_calls=None
|
36
|
-
)
|
34
|
+
return BaseLMResponse(raw_response=result[0], structured_output=None, tool_calls=None)
|
37
35
|
|
38
36
|
if not isinstance(cache_data, dict):
|
39
|
-
return BaseLMResponse(
|
40
|
-
raw_response=cache_data, structured_output=None, tool_calls=None
|
41
|
-
)
|
37
|
+
return BaseLMResponse(raw_response=cache_data, structured_output=None, tool_calls=None)
|
42
38
|
|
43
39
|
raw_response = cache_data.get("raw_response")
|
44
40
|
tool_calls = cache_data.get("tool_calls")
|
@@ -61,9 +57,7 @@ class PersistentCache:
|
|
61
57
|
"raw_response": response.raw_response
|
62
58
|
if response.raw_response is not None
|
63
59
|
else None,
|
64
|
-
"tool_calls": response.tool_calls
|
65
|
-
if response.tool_calls is not None
|
66
|
-
else None,
|
60
|
+
"tool_calls": response.tool_calls if response.tool_calls is not None else None,
|
67
61
|
"structured_output": (
|
68
62
|
response.structured_output.model_dump()
|
69
63
|
if response.structured_output is not None
|
@@ -7,4 +7,5 @@ def should_use_cache() -> bool:
|
|
7
7
|
cache_env = os.getenv("USE_ZYK_CACHE", "true").lower()
|
8
8
|
return cache_env not in ("false", "0", "no")
|
9
9
|
|
10
|
-
|
10
|
+
|
11
|
+
reasoning_models = ["o1", "o3-mini", "o3", "o4-mini", "claude-3-7-sonnet-latest"]
|
@@ -1,4 +1,4 @@
|
|
1
|
-
OPENAI_REASONING_MODELS = ["o4", "o4-mini", "o3","o3-mini", "o1-mini", "o1"]
|
1
|
+
OPENAI_REASONING_MODELS = ["o4", "o4-mini", "o3", "o3-mini", "o1-mini", "o1"]
|
2
2
|
CLAUDE_REASONING_MODELS = ["claude-3-7-sonnet-latest"]
|
3
3
|
GEMINI_REASONING_MODELS = ["gemini-2.5-flash", "gemini-2.5-pro"]
|
4
4
|
|
@@ -19,4 +19,4 @@ SONNET_37_BUDGETS = {
|
|
19
19
|
|
20
20
|
REASONING_MODELS = OPENAI_REASONING_MODELS + CLAUDE_REASONING_MODELS + GEMINI_REASONING_MODELS
|
21
21
|
|
22
|
-
SPECIAL_BASE_TEMPS = {model: 1 for model in REASONING_MODELS}
|
22
|
+
SPECIAL_BASE_TEMPS = {model: 1 for model in REASONING_MODELS}
|
@@ -1,16 +1,16 @@
|
|
1
|
-
from synth_ai.
|
2
|
-
from synth_ai.
|
3
|
-
from synth_ai.
|
1
|
+
from synth_ai.lm.vendors.core.anthropic_api import AnthropicAPI
|
2
|
+
from synth_ai.lm.vendors.core.gemini_api import GeminiAPI
|
3
|
+
from synth_ai.lm.vendors.core.openai_api import (
|
4
4
|
OpenAIPrivate,
|
5
5
|
OpenAIStructuredOutputClient,
|
6
6
|
)
|
7
|
-
from synth_ai.
|
8
|
-
from synth_ai.
|
9
|
-
from synth_ai.
|
10
|
-
from synth_ai.
|
11
|
-
from synth_ai.
|
12
|
-
from synth_ai.
|
13
|
-
from synth_ai.
|
7
|
+
from synth_ai.lm.vendors.supported.deepseek import DeepSeekAPI
|
8
|
+
from synth_ai.lm.vendors.supported.together import TogetherAPI
|
9
|
+
from synth_ai.lm.vendors.supported.groq import GroqAPI
|
10
|
+
from synth_ai.lm.vendors.supported.grok import GrokAPI
|
11
|
+
from synth_ai.lm.vendors.core.mistral_api import MistralAPI
|
12
|
+
from synth_ai.lm.vendors.supported.custom_endpoint import CustomEndpointAPI
|
13
|
+
from synth_ai.lm.vendors.supported.openrouter import OpenRouterAPI
|
14
14
|
|
15
15
|
|
16
16
|
class OpenAIClient(OpenAIPrivate):
|