synth-ai 0.2.4.dev7__py3-none-any.whl ā 0.2.4.dev9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +1 -1
- synth_ai/cli/__init__.py +6 -0
- synth_ai/cli/balance.py +3 -15
- synth_ai/cli/demo.py +68 -9
- synth_ai/cli/rl_demo.py +137 -0
- synth_ai/cli/root.py +65 -0
- synth_ai/config/base_url.py +47 -0
- synth_ai/demos/core/__init__.py +1 -0
- synth_ai/demos/core/cli.py +621 -0
- synth_ai/demos/demo_task_apps/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/core.py +374 -0
- synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/math/app.py +37 -0
- synth_ai/demos/demo_task_apps/math/config.toml +44 -0
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
- synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
- synth_ai/environments/examples/bandit/__init__.py +33 -0
- synth_ai/environments/examples/bandit/engine.py +294 -0
- synth_ai/environments/examples/bandit/environment.py +194 -0
- synth_ai/environments/examples/bandit/taskset.py +200 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
- synth_ai/environments/examples/crafter_classic/environment.py +41 -2
- synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
- synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
- synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
- synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
- synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
- synth_ai/environments/service/app.py +8 -0
- synth_ai/http.py +102 -0
- synth_ai/inference/__init__.py +7 -0
- synth_ai/inference/client.py +20 -0
- synth_ai/install_sqld.sh +40 -0
- synth_ai/jobs/client.py +246 -0
- synth_ai/learning/__init__.py +24 -0
- synth_ai/learning/client.py +149 -0
- synth_ai/learning/config.py +43 -0
- synth_ai/learning/constants.py +29 -0
- synth_ai/learning/ft_client.py +59 -0
- synth_ai/learning/health.py +43 -0
- synth_ai/learning/jobs.py +205 -0
- synth_ai/learning/rl_client.py +256 -0
- synth_ai/learning/sse.py +58 -0
- synth_ai/learning/validators.py +48 -0
- synth_ai/lm/core/main_v3.py +13 -0
- synth_ai/lm/core/synth_models.py +48 -0
- synth_ai/lm/core/vendor_clients.py +9 -6
- synth_ai/lm/vendors/core/openai_api.py +31 -3
- synth_ai/lm/vendors/openai_standard.py +45 -14
- synth_ai/lm/vendors/supported/custom_endpoint.py +12 -2
- synth_ai/lm/vendors/synth_client.py +372 -28
- synth_ai/rl/__init__.py +30 -0
- synth_ai/rl/contracts.py +32 -0
- synth_ai/rl/env_keys.py +137 -0
- synth_ai/rl/secrets.py +19 -0
- synth_ai/scripts/verify_rewards.py +100 -0
- synth_ai/task/__init__.py +10 -0
- synth_ai/task/contracts.py +120 -0
- synth_ai/task/health.py +28 -0
- synth_ai/task/validators.py +12 -0
- synth_ai/tracing_v3/hooks.py +3 -1
- synth_ai/tracing_v3/session_tracer.py +123 -2
- synth_ai/tracing_v3/turso/manager.py +218 -0
- synth_ai/tracing_v3/turso/models.py +53 -0
- synth_ai-0.2.4.dev9.dist-info/METADATA +91 -0
- {synth_ai-0.2.4.dev7.dist-info ā synth_ai-0.2.4.dev9.dist-info}/RECORD +147 -30
- {synth_ai-0.2.4.dev7.dist-info ā synth_ai-0.2.4.dev9.dist-info}/entry_points.txt +1 -0
- synth_ai/tui/__init__.py +0 -1
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -340
- synth_ai-0.2.4.dev7.dist-info/METADATA +0 -193
- {synth_ai-0.2.4.dev7.dist-info ā synth_ai-0.2.4.dev9.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev7.dist-info ā synth_ai-0.2.4.dev9.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev7.dist-info ā synth_ai-0.2.4.dev9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Deep profiling of environment service slowness."""
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import time
|
|
6
|
+
import httpx
|
|
7
|
+
import json
|
|
8
|
+
import cProfile
|
|
9
|
+
import pstats
|
|
10
|
+
import io
|
|
11
|
+
from typing import Dict, List, Any, Tuple
|
|
12
|
+
import statistics
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
# Configure logging
|
|
16
|
+
logging.basicConfig(
|
|
17
|
+
level=logging.INFO,
|
|
18
|
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
19
|
+
)
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class EnvironmentProfiler:
|
|
24
|
+
"""Profile different layers of the environment service stack."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, service_url: str = "http://localhost:8901"):
|
|
27
|
+
self.service_url = service_url
|
|
28
|
+
self.timings: Dict[str, List[float]] = {
|
|
29
|
+
# Network layer
|
|
30
|
+
"http_request_total": [],
|
|
31
|
+
"http_request_only": [],
|
|
32
|
+
"json_serialization": [],
|
|
33
|
+
"json_deserialization": [],
|
|
34
|
+
|
|
35
|
+
# Service layer
|
|
36
|
+
"service_processing": [],
|
|
37
|
+
"env_initialization": [],
|
|
38
|
+
"env_step": [],
|
|
39
|
+
|
|
40
|
+
# Data transfer
|
|
41
|
+
"request_size_bytes": [],
|
|
42
|
+
"response_size_bytes": [],
|
|
43
|
+
|
|
44
|
+
# Connection
|
|
45
|
+
"connection_setup": [],
|
|
46
|
+
"dns_lookup": [],
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async def profile_single_step(self, client: httpx.AsyncClient, env_id: str) -> Dict[str, Any]:
|
|
50
|
+
"""Profile a single environment step with detailed timing."""
|
|
51
|
+
|
|
52
|
+
# Prepare request payload
|
|
53
|
+
payload = {
|
|
54
|
+
"env_id": env_id,
|
|
55
|
+
"action": {
|
|
56
|
+
"tool_calls": [{
|
|
57
|
+
"tool": "interact",
|
|
58
|
+
"args": {"action": 0}
|
|
59
|
+
}]
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Time JSON serialization
|
|
64
|
+
json_start = time.time()
|
|
65
|
+
json_data = json.dumps(payload)
|
|
66
|
+
json_time = time.time() - json_start
|
|
67
|
+
self.timings["json_serialization"].append(json_time)
|
|
68
|
+
self.timings["request_size_bytes"].append(len(json_data))
|
|
69
|
+
|
|
70
|
+
# Time the full HTTP request
|
|
71
|
+
total_start = time.time()
|
|
72
|
+
|
|
73
|
+
# Make request with detailed timing
|
|
74
|
+
response = await client.post(
|
|
75
|
+
f"{self.service_url}/env/CrafterClassic/step",
|
|
76
|
+
content=json_data,
|
|
77
|
+
headers={"Content-Type": "application/json"},
|
|
78
|
+
timeout=30.0
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
total_time = time.time() - total_start
|
|
82
|
+
self.timings["http_request_total"].append(total_time)
|
|
83
|
+
|
|
84
|
+
# Time JSON deserialization
|
|
85
|
+
response_text = response.text
|
|
86
|
+
self.timings["response_size_bytes"].append(len(response_text))
|
|
87
|
+
|
|
88
|
+
deser_start = time.time()
|
|
89
|
+
response_data = json.loads(response_text)
|
|
90
|
+
deser_time = time.time() - deser_start
|
|
91
|
+
self.timings["json_deserialization"].append(deser_time)
|
|
92
|
+
|
|
93
|
+
# Calculate network-only time (excluding serialization)
|
|
94
|
+
network_only = total_time - json_time - deser_time
|
|
95
|
+
self.timings["http_request_only"].append(network_only)
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
"total_time": total_time,
|
|
99
|
+
"network_time": network_only,
|
|
100
|
+
"json_serialize": json_time,
|
|
101
|
+
"json_deserialize": deser_time,
|
|
102
|
+
"request_size": len(json_data),
|
|
103
|
+
"response_size": len(response_text),
|
|
104
|
+
"response_data": response_data
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async def profile_with_connection_reuse(self):
|
|
108
|
+
"""Test performance with connection reuse vs new connections."""
|
|
109
|
+
logger.info("\n" + "="*60)
|
|
110
|
+
logger.info("TESTING CONNECTION REUSE IMPACT")
|
|
111
|
+
logger.info("="*60)
|
|
112
|
+
|
|
113
|
+
# Test 1: Reusing connection
|
|
114
|
+
logger.info("\n1. With connection reuse:")
|
|
115
|
+
async with httpx.AsyncClient() as client:
|
|
116
|
+
# Initialize environment
|
|
117
|
+
init_response = await client.post(
|
|
118
|
+
f"{self.service_url}/env/CrafterClassic/initialize",
|
|
119
|
+
json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
|
|
120
|
+
)
|
|
121
|
+
env_id = init_response.json()["env_id"]
|
|
122
|
+
|
|
123
|
+
# Run 10 steps with same connection
|
|
124
|
+
reuse_times = []
|
|
125
|
+
for i in range(10):
|
|
126
|
+
result = await self.profile_single_step(client, env_id)
|
|
127
|
+
reuse_times.append(result["total_time"])
|
|
128
|
+
logger.debug(f" Step {i+1}: {result['total_time']:.3f}s")
|
|
129
|
+
|
|
130
|
+
# Cleanup
|
|
131
|
+
await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": env_id})
|
|
132
|
+
|
|
133
|
+
logger.info(f" Mean time with reuse: {statistics.mean(reuse_times):.3f}s")
|
|
134
|
+
|
|
135
|
+
# Test 2: New connection each time
|
|
136
|
+
logger.info("\n2. With new connection each time:")
|
|
137
|
+
|
|
138
|
+
# Initialize environment first
|
|
139
|
+
async with httpx.AsyncClient() as client:
|
|
140
|
+
init_response = await client.post(
|
|
141
|
+
f"{self.service_url}/env/CrafterClassic/initialize",
|
|
142
|
+
json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
|
|
143
|
+
)
|
|
144
|
+
env_id = init_response.json()["env_id"]
|
|
145
|
+
|
|
146
|
+
new_conn_times = []
|
|
147
|
+
for i in range(10):
|
|
148
|
+
# New client each time
|
|
149
|
+
async with httpx.AsyncClient() as client:
|
|
150
|
+
result = await self.profile_single_step(client, env_id)
|
|
151
|
+
new_conn_times.append(result["total_time"])
|
|
152
|
+
logger.debug(f" Step {i+1}: {result['total_time']:.3f}s")
|
|
153
|
+
|
|
154
|
+
# Cleanup
|
|
155
|
+
async with httpx.AsyncClient() as client:
|
|
156
|
+
await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": env_id})
|
|
157
|
+
|
|
158
|
+
logger.info(f" Mean time with new connections: {statistics.mean(new_conn_times):.3f}s")
|
|
159
|
+
logger.info(f" Overhead from new connections: {statistics.mean(new_conn_times) - statistics.mean(reuse_times):.3f}s")
|
|
160
|
+
|
|
161
|
+
async def profile_payload_size_impact(self):
|
|
162
|
+
"""Test if large payloads are causing slowness."""
|
|
163
|
+
logger.info("\n" + "="*60)
|
|
164
|
+
logger.info("TESTING PAYLOAD SIZE IMPACT")
|
|
165
|
+
logger.info("="*60)
|
|
166
|
+
|
|
167
|
+
async with httpx.AsyncClient() as client:
|
|
168
|
+
# Initialize environment
|
|
169
|
+
init_response = await client.post(
|
|
170
|
+
f"{self.service_url}/env/CrafterClassic/initialize",
|
|
171
|
+
json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
|
|
172
|
+
)
|
|
173
|
+
env_data = init_response.json()
|
|
174
|
+
env_id = env_data["env_id"]
|
|
175
|
+
|
|
176
|
+
# Check observation size
|
|
177
|
+
obs_json = json.dumps(env_data["observation"])
|
|
178
|
+
logger.info(f"\nObservation size: {len(obs_json)} bytes ({len(obs_json)/1024:.1f} KB)")
|
|
179
|
+
|
|
180
|
+
# Analyze observation structure
|
|
181
|
+
obs = env_data["observation"]
|
|
182
|
+
field_sizes = {}
|
|
183
|
+
for key, value in obs.items():
|
|
184
|
+
if isinstance(value, (list, dict)):
|
|
185
|
+
field_sizes[key] = len(json.dumps(value))
|
|
186
|
+
else:
|
|
187
|
+
field_sizes[key] = len(str(value))
|
|
188
|
+
|
|
189
|
+
# Sort by size
|
|
190
|
+
sorted_fields = sorted(field_sizes.items(), key=lambda x: x[1], reverse=True)
|
|
191
|
+
logger.info("\nLargest observation fields:")
|
|
192
|
+
for field, size in sorted_fields[:5]:
|
|
193
|
+
logger.info(f" {field}: {size} bytes ({size/1024:.1f} KB)")
|
|
194
|
+
|
|
195
|
+
# Test step timing
|
|
196
|
+
step_times = []
|
|
197
|
+
for i in range(5):
|
|
198
|
+
result = await self.profile_single_step(client, env_id)
|
|
199
|
+
step_times.append(result)
|
|
200
|
+
|
|
201
|
+
# Analyze
|
|
202
|
+
logger.info("\nTiming breakdown (average of 5 steps):")
|
|
203
|
+
logger.info(f" Total time: {statistics.mean([r['total_time'] for r in step_times]):.3f}s")
|
|
204
|
+
logger.info(f" Network only: {statistics.mean([r['network_time'] for r in step_times]):.3f}s")
|
|
205
|
+
logger.info(f" JSON serialize: {statistics.mean([r['json_serialize'] for r in step_times]):.6f}s")
|
|
206
|
+
logger.info(f" JSON deserialize: {statistics.mean([r['json_deserialize'] for r in step_times]):.6f}s")
|
|
207
|
+
logger.info(f" Response size: {statistics.mean([r['response_size'] for r in step_times]):.0f} bytes")
|
|
208
|
+
|
|
209
|
+
# Cleanup
|
|
210
|
+
await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": env_id})
|
|
211
|
+
|
|
212
|
+
async def test_concurrent_environments(self):
|
|
213
|
+
"""Test if multiple environments interfere with each other."""
|
|
214
|
+
logger.info("\n" + "="*60)
|
|
215
|
+
logger.info("TESTING CONCURRENT ENVIRONMENT INTERFERENCE")
|
|
216
|
+
logger.info("="*60)
|
|
217
|
+
|
|
218
|
+
async with httpx.AsyncClient() as client:
|
|
219
|
+
# Create 5 environments
|
|
220
|
+
env_ids = []
|
|
221
|
+
for i in range(5):
|
|
222
|
+
init_response = await client.post(
|
|
223
|
+
f"{self.service_url}/env/CrafterClassic/initialize",
|
|
224
|
+
json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
|
|
225
|
+
)
|
|
226
|
+
env_ids.append(init_response.json()["env_id"])
|
|
227
|
+
|
|
228
|
+
logger.info(f"Created {len(env_ids)} environments")
|
|
229
|
+
|
|
230
|
+
# Test 1: Sequential steps across different environments
|
|
231
|
+
logger.info("\n1. Sequential steps across environments:")
|
|
232
|
+
seq_times = []
|
|
233
|
+
for i in range(10):
|
|
234
|
+
env_id = env_ids[i % len(env_ids)]
|
|
235
|
+
result = await self.profile_single_step(client, env_id)
|
|
236
|
+
seq_times.append(result["total_time"])
|
|
237
|
+
|
|
238
|
+
logger.info(f" Mean time: {statistics.mean(seq_times):.3f}s")
|
|
239
|
+
|
|
240
|
+
# Test 2: Concurrent steps
|
|
241
|
+
logger.info("\n2. Concurrent steps:")
|
|
242
|
+
|
|
243
|
+
async def concurrent_step(env_id: str) -> float:
|
|
244
|
+
result = await self.profile_single_step(client, env_id)
|
|
245
|
+
return result["total_time"]
|
|
246
|
+
|
|
247
|
+
# Run 5 concurrent steps
|
|
248
|
+
start = time.time()
|
|
249
|
+
concurrent_results = await asyncio.gather(*[
|
|
250
|
+
concurrent_step(env_id) for env_id in env_ids
|
|
251
|
+
])
|
|
252
|
+
concurrent_time = time.time() - start
|
|
253
|
+
|
|
254
|
+
logger.info(f" Total time for 5 concurrent steps: {concurrent_time:.3f}s")
|
|
255
|
+
logger.info(f" Mean individual step time: {statistics.mean(concurrent_results):.3f}s")
|
|
256
|
+
|
|
257
|
+
# Cleanup
|
|
258
|
+
for env_id in env_ids:
|
|
259
|
+
await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": env_id})
|
|
260
|
+
|
|
261
|
+
async def profile_service_internals(self):
|
|
262
|
+
"""Try to understand what the service is doing internally."""
|
|
263
|
+
logger.info("\n" + "="*60)
|
|
264
|
+
logger.info("ANALYZING SERVICE BEHAVIOR")
|
|
265
|
+
logger.info("="*60)
|
|
266
|
+
|
|
267
|
+
async with httpx.AsyncClient() as client:
|
|
268
|
+
# Test with minimal config
|
|
269
|
+
logger.info("\n1. Testing with minimal world size:")
|
|
270
|
+
|
|
271
|
+
# Small world
|
|
272
|
+
small_response = await client.post(
|
|
273
|
+
f"{self.service_url}/env/CrafterClassic/initialize",
|
|
274
|
+
json={"initial_state": {}, "config": {"area": [32, 32], "length": 10}}
|
|
275
|
+
)
|
|
276
|
+
small_env_id = small_response.json()["env_id"]
|
|
277
|
+
|
|
278
|
+
# Time steps
|
|
279
|
+
small_times = []
|
|
280
|
+
for i in range(5):
|
|
281
|
+
result = await self.profile_single_step(client, small_env_id)
|
|
282
|
+
small_times.append(result["total_time"])
|
|
283
|
+
|
|
284
|
+
logger.info(f" 32x32 world mean step time: {statistics.mean(small_times):.3f}s")
|
|
285
|
+
|
|
286
|
+
# Normal world
|
|
287
|
+
logger.info("\n2. Testing with normal world size:")
|
|
288
|
+
normal_response = await client.post(
|
|
289
|
+
f"{self.service_url}/env/CrafterClassic/initialize",
|
|
290
|
+
json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
|
|
291
|
+
)
|
|
292
|
+
normal_env_id = normal_response.json()["env_id"]
|
|
293
|
+
|
|
294
|
+
normal_times = []
|
|
295
|
+
for i in range(5):
|
|
296
|
+
result = await self.profile_single_step(client, normal_env_id)
|
|
297
|
+
normal_times.append(result["total_time"])
|
|
298
|
+
|
|
299
|
+
logger.info(f" 64x64 world mean step time: {statistics.mean(normal_times):.3f}s")
|
|
300
|
+
|
|
301
|
+
# Cleanup
|
|
302
|
+
await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": small_env_id})
|
|
303
|
+
await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": normal_env_id})
|
|
304
|
+
|
|
305
|
+
def print_summary(self):
|
|
306
|
+
"""Print timing summary."""
|
|
307
|
+
logger.info("\n" + "="*60)
|
|
308
|
+
logger.info("TIMING SUMMARY")
|
|
309
|
+
logger.info("="*60)
|
|
310
|
+
|
|
311
|
+
for category, times in self.timings.items():
|
|
312
|
+
if times and category not in ["request_size_bytes", "response_size_bytes"]:
|
|
313
|
+
logger.info(f"\n{category}:")
|
|
314
|
+
logger.info(f" Samples: {len(times)}")
|
|
315
|
+
logger.info(f" Mean: {statistics.mean(times):.3f}s")
|
|
316
|
+
logger.info(f" Median: {statistics.median(times):.3f}s")
|
|
317
|
+
logger.info(f" Min: {min(times):.3f}s")
|
|
318
|
+
logger.info(f" Max: {max(times):.3f}s")
|
|
319
|
+
|
|
320
|
+
# Print size statistics
|
|
321
|
+
if self.timings["response_size_bytes"]:
|
|
322
|
+
logger.info(f"\nResponse sizes:")
|
|
323
|
+
logger.info(f" Mean: {statistics.mean(self.timings['response_size_bytes'])/1024:.1f} KB")
|
|
324
|
+
logger.info(f" Max: {max(self.timings['response_size_bytes'])/1024:.1f} KB")
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
async def trace_single_request():
|
|
328
|
+
"""Trace a single request in detail to see where time goes."""
|
|
329
|
+
logger.info("\n" + "="*60)
|
|
330
|
+
logger.info("TRACING SINGLE REQUEST IN DETAIL")
|
|
331
|
+
logger.info("="*60)
|
|
332
|
+
|
|
333
|
+
# Use httpx events to trace request lifecycle
|
|
334
|
+
async def log_request_start(request):
|
|
335
|
+
logger.info(f" [REQUEST START] {request.method} {request.url}")
|
|
336
|
+
|
|
337
|
+
async def log_request_end(request):
|
|
338
|
+
logger.info(f" [REQUEST END] {request.method} {request.url}")
|
|
339
|
+
|
|
340
|
+
async def log_response_start(response):
|
|
341
|
+
logger.info(f" [RESPONSE START] Status: {response.status_code}")
|
|
342
|
+
|
|
343
|
+
async def log_response_end(response):
|
|
344
|
+
logger.info(f" [RESPONSE END] Status: {response.status_code}")
|
|
345
|
+
|
|
346
|
+
event_hooks = {
|
|
347
|
+
"request": [log_request_start, log_request_end],
|
|
348
|
+
"response": [log_response_start, log_response_end]
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
async with httpx.AsyncClient(event_hooks=event_hooks) as client:
|
|
352
|
+
# Initialize
|
|
353
|
+
logger.info("\nInitializing environment...")
|
|
354
|
+
init_start = time.time()
|
|
355
|
+
|
|
356
|
+
init_response = await client.post(
|
|
357
|
+
"http://localhost:8901/env/CrafterClassic/initialize",
|
|
358
|
+
json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}},
|
|
359
|
+
timeout=30.0
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
init_time = time.time() - init_start
|
|
363
|
+
env_id = init_response.json()["env_id"]
|
|
364
|
+
logger.info(f" Initialization took: {init_time:.3f}s")
|
|
365
|
+
|
|
366
|
+
# Single step with detailed timing
|
|
367
|
+
logger.info("\nExecuting single step...")
|
|
368
|
+
|
|
369
|
+
payload = {
|
|
370
|
+
"env_id": env_id,
|
|
371
|
+
"action": {"tool_calls": [{"tool": "interact", "args": {"action": 0}}]}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
# Time each phase
|
|
375
|
+
phases = {}
|
|
376
|
+
|
|
377
|
+
# Phase 1: Serialize
|
|
378
|
+
phase_start = time.time()
|
|
379
|
+
json_data = json.dumps(payload)
|
|
380
|
+
phases["serialize"] = time.time() - phase_start
|
|
381
|
+
|
|
382
|
+
# Phase 2: Send request and wait for response
|
|
383
|
+
phase_start = time.time()
|
|
384
|
+
response = await client.post(
|
|
385
|
+
"http://localhost:8901/env/CrafterClassic/step",
|
|
386
|
+
content=json_data,
|
|
387
|
+
headers={"Content-Type": "application/json"},
|
|
388
|
+
timeout=30.0
|
|
389
|
+
)
|
|
390
|
+
phases["network"] = time.time() - phase_start
|
|
391
|
+
|
|
392
|
+
# Phase 3: Read response
|
|
393
|
+
phase_start = time.time()
|
|
394
|
+
response_text = response.text
|
|
395
|
+
phases["read_response"] = time.time() - phase_start
|
|
396
|
+
|
|
397
|
+
# Phase 4: Parse JSON
|
|
398
|
+
phase_start = time.time()
|
|
399
|
+
response_data = json.loads(response_text)
|
|
400
|
+
phases["deserialize"] = time.time() - phase_start
|
|
401
|
+
|
|
402
|
+
# Print breakdown
|
|
403
|
+
total_time = sum(phases.values())
|
|
404
|
+
logger.info(f"\n Total step time: {total_time:.3f}s")
|
|
405
|
+
logger.info(" Breakdown:")
|
|
406
|
+
for phase, duration in phases.items():
|
|
407
|
+
percentage = (duration / total_time) * 100
|
|
408
|
+
logger.info(f" {phase}: {duration:.3f}s ({percentage:.1f}%)")
|
|
409
|
+
|
|
410
|
+
# Cleanup
|
|
411
|
+
await client.post(
|
|
412
|
+
"http://localhost:8901/env/CrafterClassic/terminate",
|
|
413
|
+
json={"env_id": env_id}
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
async def main():
|
|
418
|
+
"""Run all profiling tests."""
|
|
419
|
+
profiler = EnvironmentProfiler()
|
|
420
|
+
|
|
421
|
+
# First trace a single request
|
|
422
|
+
await trace_single_request()
|
|
423
|
+
|
|
424
|
+
# Run profiling tests
|
|
425
|
+
await profiler.profile_with_connection_reuse()
|
|
426
|
+
await profiler.profile_payload_size_impact()
|
|
427
|
+
await profiler.test_concurrent_environments()
|
|
428
|
+
await profiler.profile_service_internals()
|
|
429
|
+
|
|
430
|
+
# Print summary
|
|
431
|
+
profiler.print_summary()
|
|
432
|
+
|
|
433
|
+
# Final analysis
|
|
434
|
+
logger.info("\n" + "="*60)
|
|
435
|
+
logger.info("ANALYSIS & RECOMMENDATIONS")
|
|
436
|
+
logger.info("="*60)
|
|
437
|
+
|
|
438
|
+
if profiler.timings["http_request_only"]:
|
|
439
|
+
mean_network = statistics.mean(profiler.timings["http_request_only"])
|
|
440
|
+
if mean_network > 1.0:
|
|
441
|
+
logger.info("\nā ļø Network latency is high (>1s). Possible causes:")
|
|
442
|
+
logger.info(" - Service is overloaded")
|
|
443
|
+
logger.info(" - Python GIL blocking with concurrent requests")
|
|
444
|
+
logger.info(" - Inefficient service implementation")
|
|
445
|
+
|
|
446
|
+
if profiler.timings["response_size_bytes"]:
|
|
447
|
+
mean_size = statistics.mean(profiler.timings["response_size_bytes"])
|
|
448
|
+
if mean_size > 50000: # 50KB
|
|
449
|
+
logger.info("\nā ļø Large response payloads. Consider:")
|
|
450
|
+
logger.info(" - Compressing responses")
|
|
451
|
+
logger.info(" - Removing unnecessary fields from observations")
|
|
452
|
+
logger.info(" - Using binary protocols instead of JSON")
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
if __name__ == "__main__":
|
|
456
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Script to replicate the issue with Qwen 7B model inference
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
# Configuration
|
|
12
|
+
BASE_URL = "http://localhost:8000/api/v1/learning"
|
|
13
|
+
API_KEY = "test-api-key" # Replace with your actual API key
|
|
14
|
+
MODEL = "Qwen/Qwen2.5-7B-Instruct"
|
|
15
|
+
|
|
16
|
+
async def test_base_model_inference():
|
|
17
|
+
"""Test base model inference through the local learning service."""
|
|
18
|
+
|
|
19
|
+
print(f"š Testing Base Model Inference")
|
|
20
|
+
print(f"Time: {datetime.now()}")
|
|
21
|
+
print(f"Endpoint: {BASE_URL}/v1/chat/completions")
|
|
22
|
+
print(f"Model: {MODEL}")
|
|
23
|
+
print("=" * 60)
|
|
24
|
+
|
|
25
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
26
|
+
headers = {
|
|
27
|
+
"Authorization": f"Bearer {API_KEY}",
|
|
28
|
+
"Content-Type": "application/json"
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
payload = {
|
|
32
|
+
"model": MODEL,
|
|
33
|
+
"messages": [
|
|
34
|
+
{"role": "user", "content": "Hello, can you help me play Crafter?"}
|
|
35
|
+
],
|
|
36
|
+
"temperature": 0.7,
|
|
37
|
+
"max_tokens": 100
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
print("\nš¤ Request:")
|
|
41
|
+
print(f"Headers: {json.dumps(headers, indent=2)}")
|
|
42
|
+
print(f"Payload: {json.dumps(payload, indent=2)}")
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
print("\nā³ Sending request...")
|
|
46
|
+
response = await client.post(
|
|
47
|
+
f"{BASE_URL}/v1/chat/completions",
|
|
48
|
+
headers=headers,
|
|
49
|
+
json=payload
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
print(f"\nš„ Response Status: {response.status_code}")
|
|
53
|
+
print(f"Response Headers: {dict(response.headers)}")
|
|
54
|
+
|
|
55
|
+
if response.status_code == 200:
|
|
56
|
+
data = response.json()
|
|
57
|
+
print(f"\nā
Success!")
|
|
58
|
+
print(f"Response: {json.dumps(data, indent=2)}")
|
|
59
|
+
|
|
60
|
+
# Extract the assistant's message
|
|
61
|
+
if "choices" in data and data["choices"]:
|
|
62
|
+
content = data["choices"][0]["message"]["content"]
|
|
63
|
+
print(f"\nš¤ Assistant says: {content}")
|
|
64
|
+
|
|
65
|
+
# Check if it's the "not implemented" message
|
|
66
|
+
if "Base model inference not implemented" in content:
|
|
67
|
+
print("\nā ļø WARNING: The service returned a 'not implemented' message!")
|
|
68
|
+
print("This means the service is responding but base models aren't supported yet.")
|
|
69
|
+
else:
|
|
70
|
+
print(f"\nā Error Response:")
|
|
71
|
+
print(f"Body: {response.text}")
|
|
72
|
+
|
|
73
|
+
except httpx.ConnectError as e:
|
|
74
|
+
print(f"\nā Connection Error: Could not connect to {BASE_URL}")
|
|
75
|
+
print(f"Error: {e}")
|
|
76
|
+
print("\nPossible issues:")
|
|
77
|
+
print("1. The backend service is not running on port 8000")
|
|
78
|
+
print("2. The learning service is not properly configured")
|
|
79
|
+
print("3. The proxy route /api/v1/learning is not set up")
|
|
80
|
+
|
|
81
|
+
except httpx.TimeoutException as e:
|
|
82
|
+
print(f"\nā Timeout Error: Request timed out after 30 seconds")
|
|
83
|
+
print(f"Error: {e}")
|
|
84
|
+
|
|
85
|
+
except Exception as e:
|
|
86
|
+
print(f"\nā Unexpected Error: {type(e).__name__}")
|
|
87
|
+
print(f"Error: {e}")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
async def test_crafter_integration():
|
|
91
|
+
"""Test the full Crafter + LLM integration."""
|
|
92
|
+
|
|
93
|
+
print("\n\nš® Testing Crafter Integration")
|
|
94
|
+
print("=" * 60)
|
|
95
|
+
|
|
96
|
+
# First check if Crafter service is running
|
|
97
|
+
crafter_url = "http://localhost:8901"
|
|
98
|
+
|
|
99
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
100
|
+
try:
|
|
101
|
+
print(f"\n1ļøā£ Checking Crafter service at {crafter_url}...")
|
|
102
|
+
response = await client.get(f"{crafter_url}/health")
|
|
103
|
+
if response.status_code == 200:
|
|
104
|
+
print("ā
Crafter service is running!")
|
|
105
|
+
else:
|
|
106
|
+
print(f"ā ļø Crafter service returned status {response.status_code}")
|
|
107
|
+
except Exception as e:
|
|
108
|
+
print(f"ā Crafter service is not running on port 8901")
|
|
109
|
+
print(f" Error: {e}")
|
|
110
|
+
print("\n To start Crafter service:")
|
|
111
|
+
print(" uv run python -m uvicorn synth_ai.environments.service.app:app --host 0.0.0.0 --port 8901")
|
|
112
|
+
|
|
113
|
+
# Test creating an environment
|
|
114
|
+
if True: # You can set to False to skip environment creation
|
|
115
|
+
print(f"\n2ļøā£ Testing environment creation...")
|
|
116
|
+
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
117
|
+
try:
|
|
118
|
+
response = await client.post(
|
|
119
|
+
f"{crafter_url}/create_env",
|
|
120
|
+
json={
|
|
121
|
+
"instance_id": "test_instance",
|
|
122
|
+
"render_mode": "rgb_array",
|
|
123
|
+
"difficulty": "easy",
|
|
124
|
+
"seed": 42
|
|
125
|
+
}
|
|
126
|
+
)
|
|
127
|
+
if response.status_code == 200:
|
|
128
|
+
print("ā
Environment created successfully!")
|
|
129
|
+
else:
|
|
130
|
+
print(f"ā Failed to create environment: {response.text}")
|
|
131
|
+
except Exception as e:
|
|
132
|
+
print(f"ā Error creating environment: {e}")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def main():
|
|
136
|
+
"""Run all tests."""
|
|
137
|
+
print("š Replicating Qwen 7B Model Inference Issue")
|
|
138
|
+
print("=" * 60)
|
|
139
|
+
|
|
140
|
+
# Run async tests
|
|
141
|
+
loop = asyncio.new_event_loop()
|
|
142
|
+
asyncio.set_event_loop(loop)
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
# Test base model inference
|
|
146
|
+
loop.run_until_complete(test_base_model_inference())
|
|
147
|
+
|
|
148
|
+
# Test Crafter integration
|
|
149
|
+
loop.run_until_complete(test_crafter_integration())
|
|
150
|
+
|
|
151
|
+
finally:
|
|
152
|
+
loop.close()
|
|
153
|
+
|
|
154
|
+
print("\n\nš Summary:")
|
|
155
|
+
print("=" * 60)
|
|
156
|
+
print("This script tested:")
|
|
157
|
+
print("1. Base model inference through the learning service proxy")
|
|
158
|
+
print("2. Crafter environment service availability")
|
|
159
|
+
print("\nTo fix the issues:")
|
|
160
|
+
print("1. Ensure the backend is running: cd backend && uvicorn app.main:app --reload")
|
|
161
|
+
print("2. Ensure the learning service is configured and running")
|
|
162
|
+
print("3. Start Crafter service: uv run python -m uvicorn synth_ai.environments.service.app:app --host 0.0.0.0 --port 8901")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
if __name__ == "__main__":
|
|
166
|
+
main()
|