synth-ai 0.2.4.dev7__py3-none-any.whl → 0.2.4.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (154) hide show
  1. synth_ai/__init__.py +1 -1
  2. synth_ai/cli/__init__.py +6 -0
  3. synth_ai/cli/balance.py +3 -15
  4. synth_ai/cli/demo.py +68 -9
  5. synth_ai/cli/rl_demo.py +137 -0
  6. synth_ai/cli/root.py +65 -0
  7. synth_ai/config/base_url.py +47 -0
  8. synth_ai/demos/core/__init__.py +1 -0
  9. synth_ai/demos/core/cli.py +621 -0
  10. synth_ai/demos/demo_task_apps/__init__.py +1 -0
  11. synth_ai/demos/demo_task_apps/core.py +374 -0
  12. synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
  13. synth_ai/demos/demo_task_apps/math/app.py +37 -0
  14. synth_ai/demos/demo_task_apps/math/config.toml +44 -0
  15. synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
  16. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
  17. synth_ai/environments/examples/bandit/__init__.py +33 -0
  18. synth_ai/environments/examples/bandit/engine.py +294 -0
  19. synth_ai/environments/examples/bandit/environment.py +194 -0
  20. synth_ai/environments/examples/bandit/taskset.py +200 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  26. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  27. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  28. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
  29. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
  30. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  31. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  32. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  33. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  34. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  35. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  36. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  37. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  38. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  39. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  40. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
  41. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  42. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  43. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  44. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  45. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  46. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  47. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  48. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  49. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  50. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  51. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  52. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  53. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
  54. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
  55. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
  56. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  57. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
  58. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  59. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  60. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  61. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  62. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  63. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  64. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  65. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  66. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  67. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  68. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  69. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  70. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  71. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  72. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  73. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  74. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  75. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  76. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  77. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  78. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  79. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  80. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  81. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  82. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  83. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  84. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  85. synth_ai/environments/examples/crafter_classic/environment.py +41 -2
  86. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  87. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  88. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  89. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  90. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  91. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  92. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  93. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  94. synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
  95. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  96. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
  97. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  98. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
  99. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  100. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  101. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
  102. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  103. synth_ai/environments/examples/red/units/__init__.py +1 -0
  104. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
  105. synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
  106. synth_ai/environments/service/app.py +8 -0
  107. synth_ai/http.py +102 -0
  108. synth_ai/inference/__init__.py +7 -0
  109. synth_ai/inference/client.py +20 -0
  110. synth_ai/install_sqld.sh +40 -0
  111. synth_ai/jobs/client.py +246 -0
  112. synth_ai/learning/__init__.py +24 -0
  113. synth_ai/learning/client.py +149 -0
  114. synth_ai/learning/config.py +43 -0
  115. synth_ai/learning/constants.py +29 -0
  116. synth_ai/learning/ft_client.py +59 -0
  117. synth_ai/learning/health.py +43 -0
  118. synth_ai/learning/jobs.py +205 -0
  119. synth_ai/learning/rl_client.py +256 -0
  120. synth_ai/learning/sse.py +58 -0
  121. synth_ai/learning/validators.py +48 -0
  122. synth_ai/lm/core/main_v3.py +13 -0
  123. synth_ai/lm/core/synth_models.py +48 -0
  124. synth_ai/lm/core/vendor_clients.py +9 -6
  125. synth_ai/lm/vendors/core/openai_api.py +31 -3
  126. synth_ai/lm/vendors/openai_standard.py +45 -14
  127. synth_ai/lm/vendors/supported/custom_endpoint.py +12 -2
  128. synth_ai/lm/vendors/synth_client.py +372 -28
  129. synth_ai/rl/__init__.py +30 -0
  130. synth_ai/rl/contracts.py +32 -0
  131. synth_ai/rl/env_keys.py +137 -0
  132. synth_ai/rl/secrets.py +19 -0
  133. synth_ai/scripts/verify_rewards.py +100 -0
  134. synth_ai/task/__init__.py +10 -0
  135. synth_ai/task/contracts.py +120 -0
  136. synth_ai/task/health.py +28 -0
  137. synth_ai/task/validators.py +12 -0
  138. synth_ai/tracing_v3/hooks.py +3 -1
  139. synth_ai/tracing_v3/session_tracer.py +123 -2
  140. synth_ai/tracing_v3/turso/manager.py +218 -0
  141. synth_ai/tracing_v3/turso/models.py +53 -0
  142. synth_ai-0.2.4.dev9.dist-info/METADATA +91 -0
  143. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/RECORD +147 -30
  144. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/entry_points.txt +1 -0
  145. synth_ai/tui/__init__.py +0 -1
  146. synth_ai/tui/__main__.py +0 -13
  147. synth_ai/tui/cli/__init__.py +0 -1
  148. synth_ai/tui/cli/query_experiments.py +0 -164
  149. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  150. synth_ai/tui/dashboard.py +0 -340
  151. synth_ai-0.2.4.dev7.dist-info/METADATA +0 -193
  152. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/WHEEL +0 -0
  153. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/licenses/LICENSE +0 -0
  154. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,456 @@
1
+ #!/usr/bin/env python3
2
+ """Deep profiling of environment service slowness."""
3
+
4
+ import asyncio
5
+ import time
6
+ import httpx
7
+ import json
8
+ import cProfile
9
+ import pstats
10
+ import io
11
+ from typing import Dict, List, Any, Tuple
12
+ import statistics
13
+ import logging
14
+
15
+ # Configure logging
16
+ logging.basicConfig(
17
+ level=logging.INFO,
18
+ format='%(asctime)s - %(levelname)s - %(message)s'
19
+ )
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class EnvironmentProfiler:
24
+ """Profile different layers of the environment service stack."""
25
+
26
+ def __init__(self, service_url: str = "http://localhost:8901"):
27
+ self.service_url = service_url
28
+ self.timings: Dict[str, List[float]] = {
29
+ # Network layer
30
+ "http_request_total": [],
31
+ "http_request_only": [],
32
+ "json_serialization": [],
33
+ "json_deserialization": [],
34
+
35
+ # Service layer
36
+ "service_processing": [],
37
+ "env_initialization": [],
38
+ "env_step": [],
39
+
40
+ # Data transfer
41
+ "request_size_bytes": [],
42
+ "response_size_bytes": [],
43
+
44
+ # Connection
45
+ "connection_setup": [],
46
+ "dns_lookup": [],
47
+ }
48
+
49
+ async def profile_single_step(self, client: httpx.AsyncClient, env_id: str) -> Dict[str, Any]:
50
+ """Profile a single environment step with detailed timing."""
51
+
52
+ # Prepare request payload
53
+ payload = {
54
+ "env_id": env_id,
55
+ "action": {
56
+ "tool_calls": [{
57
+ "tool": "interact",
58
+ "args": {"action": 0}
59
+ }]
60
+ }
61
+ }
62
+
63
+ # Time JSON serialization
64
+ json_start = time.time()
65
+ json_data = json.dumps(payload)
66
+ json_time = time.time() - json_start
67
+ self.timings["json_serialization"].append(json_time)
68
+ self.timings["request_size_bytes"].append(len(json_data))
69
+
70
+ # Time the full HTTP request
71
+ total_start = time.time()
72
+
73
+ # Make request with detailed timing
74
+ response = await client.post(
75
+ f"{self.service_url}/env/CrafterClassic/step",
76
+ content=json_data,
77
+ headers={"Content-Type": "application/json"},
78
+ timeout=30.0
79
+ )
80
+
81
+ total_time = time.time() - total_start
82
+ self.timings["http_request_total"].append(total_time)
83
+
84
+ # Time JSON deserialization
85
+ response_text = response.text
86
+ self.timings["response_size_bytes"].append(len(response_text))
87
+
88
+ deser_start = time.time()
89
+ response_data = json.loads(response_text)
90
+ deser_time = time.time() - deser_start
91
+ self.timings["json_deserialization"].append(deser_time)
92
+
93
+ # Calculate network-only time (excluding serialization)
94
+ network_only = total_time - json_time - deser_time
95
+ self.timings["http_request_only"].append(network_only)
96
+
97
+ return {
98
+ "total_time": total_time,
99
+ "network_time": network_only,
100
+ "json_serialize": json_time,
101
+ "json_deserialize": deser_time,
102
+ "request_size": len(json_data),
103
+ "response_size": len(response_text),
104
+ "response_data": response_data
105
+ }
106
+
107
+ async def profile_with_connection_reuse(self):
108
+ """Test performance with connection reuse vs new connections."""
109
+ logger.info("\n" + "="*60)
110
+ logger.info("TESTING CONNECTION REUSE IMPACT")
111
+ logger.info("="*60)
112
+
113
+ # Test 1: Reusing connection
114
+ logger.info("\n1. With connection reuse:")
115
+ async with httpx.AsyncClient() as client:
116
+ # Initialize environment
117
+ init_response = await client.post(
118
+ f"{self.service_url}/env/CrafterClassic/initialize",
119
+ json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
120
+ )
121
+ env_id = init_response.json()["env_id"]
122
+
123
+ # Run 10 steps with same connection
124
+ reuse_times = []
125
+ for i in range(10):
126
+ result = await self.profile_single_step(client, env_id)
127
+ reuse_times.append(result["total_time"])
128
+ logger.debug(f" Step {i+1}: {result['total_time']:.3f}s")
129
+
130
+ # Cleanup
131
+ await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": env_id})
132
+
133
+ logger.info(f" Mean time with reuse: {statistics.mean(reuse_times):.3f}s")
134
+
135
+ # Test 2: New connection each time
136
+ logger.info("\n2. With new connection each time:")
137
+
138
+ # Initialize environment first
139
+ async with httpx.AsyncClient() as client:
140
+ init_response = await client.post(
141
+ f"{self.service_url}/env/CrafterClassic/initialize",
142
+ json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
143
+ )
144
+ env_id = init_response.json()["env_id"]
145
+
146
+ new_conn_times = []
147
+ for i in range(10):
148
+ # New client each time
149
+ async with httpx.AsyncClient() as client:
150
+ result = await self.profile_single_step(client, env_id)
151
+ new_conn_times.append(result["total_time"])
152
+ logger.debug(f" Step {i+1}: {result['total_time']:.3f}s")
153
+
154
+ # Cleanup
155
+ async with httpx.AsyncClient() as client:
156
+ await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": env_id})
157
+
158
+ logger.info(f" Mean time with new connections: {statistics.mean(new_conn_times):.3f}s")
159
+ logger.info(f" Overhead from new connections: {statistics.mean(new_conn_times) - statistics.mean(reuse_times):.3f}s")
160
+
161
+ async def profile_payload_size_impact(self):
162
+ """Test if large payloads are causing slowness."""
163
+ logger.info("\n" + "="*60)
164
+ logger.info("TESTING PAYLOAD SIZE IMPACT")
165
+ logger.info("="*60)
166
+
167
+ async with httpx.AsyncClient() as client:
168
+ # Initialize environment
169
+ init_response = await client.post(
170
+ f"{self.service_url}/env/CrafterClassic/initialize",
171
+ json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
172
+ )
173
+ env_data = init_response.json()
174
+ env_id = env_data["env_id"]
175
+
176
+ # Check observation size
177
+ obs_json = json.dumps(env_data["observation"])
178
+ logger.info(f"\nObservation size: {len(obs_json)} bytes ({len(obs_json)/1024:.1f} KB)")
179
+
180
+ # Analyze observation structure
181
+ obs = env_data["observation"]
182
+ field_sizes = {}
183
+ for key, value in obs.items():
184
+ if isinstance(value, (list, dict)):
185
+ field_sizes[key] = len(json.dumps(value))
186
+ else:
187
+ field_sizes[key] = len(str(value))
188
+
189
+ # Sort by size
190
+ sorted_fields = sorted(field_sizes.items(), key=lambda x: x[1], reverse=True)
191
+ logger.info("\nLargest observation fields:")
192
+ for field, size in sorted_fields[:5]:
193
+ logger.info(f" {field}: {size} bytes ({size/1024:.1f} KB)")
194
+
195
+ # Test step timing
196
+ step_times = []
197
+ for i in range(5):
198
+ result = await self.profile_single_step(client, env_id)
199
+ step_times.append(result)
200
+
201
+ # Analyze
202
+ logger.info("\nTiming breakdown (average of 5 steps):")
203
+ logger.info(f" Total time: {statistics.mean([r['total_time'] for r in step_times]):.3f}s")
204
+ logger.info(f" Network only: {statistics.mean([r['network_time'] for r in step_times]):.3f}s")
205
+ logger.info(f" JSON serialize: {statistics.mean([r['json_serialize'] for r in step_times]):.6f}s")
206
+ logger.info(f" JSON deserialize: {statistics.mean([r['json_deserialize'] for r in step_times]):.6f}s")
207
+ logger.info(f" Response size: {statistics.mean([r['response_size'] for r in step_times]):.0f} bytes")
208
+
209
+ # Cleanup
210
+ await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": env_id})
211
+
212
+ async def test_concurrent_environments(self):
213
+ """Test if multiple environments interfere with each other."""
214
+ logger.info("\n" + "="*60)
215
+ logger.info("TESTING CONCURRENT ENVIRONMENT INTERFERENCE")
216
+ logger.info("="*60)
217
+
218
+ async with httpx.AsyncClient() as client:
219
+ # Create 5 environments
220
+ env_ids = []
221
+ for i in range(5):
222
+ init_response = await client.post(
223
+ f"{self.service_url}/env/CrafterClassic/initialize",
224
+ json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
225
+ )
226
+ env_ids.append(init_response.json()["env_id"])
227
+
228
+ logger.info(f"Created {len(env_ids)} environments")
229
+
230
+ # Test 1: Sequential steps across different environments
231
+ logger.info("\n1. Sequential steps across environments:")
232
+ seq_times = []
233
+ for i in range(10):
234
+ env_id = env_ids[i % len(env_ids)]
235
+ result = await self.profile_single_step(client, env_id)
236
+ seq_times.append(result["total_time"])
237
+
238
+ logger.info(f" Mean time: {statistics.mean(seq_times):.3f}s")
239
+
240
+ # Test 2: Concurrent steps
241
+ logger.info("\n2. Concurrent steps:")
242
+
243
+ async def concurrent_step(env_id: str) -> float:
244
+ result = await self.profile_single_step(client, env_id)
245
+ return result["total_time"]
246
+
247
+ # Run 5 concurrent steps
248
+ start = time.time()
249
+ concurrent_results = await asyncio.gather(*[
250
+ concurrent_step(env_id) for env_id in env_ids
251
+ ])
252
+ concurrent_time = time.time() - start
253
+
254
+ logger.info(f" Total time for 5 concurrent steps: {concurrent_time:.3f}s")
255
+ logger.info(f" Mean individual step time: {statistics.mean(concurrent_results):.3f}s")
256
+
257
+ # Cleanup
258
+ for env_id in env_ids:
259
+ await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": env_id})
260
+
261
+ async def profile_service_internals(self):
262
+ """Try to understand what the service is doing internally."""
263
+ logger.info("\n" + "="*60)
264
+ logger.info("ANALYZING SERVICE BEHAVIOR")
265
+ logger.info("="*60)
266
+
267
+ async with httpx.AsyncClient() as client:
268
+ # Test with minimal config
269
+ logger.info("\n1. Testing with minimal world size:")
270
+
271
+ # Small world
272
+ small_response = await client.post(
273
+ f"{self.service_url}/env/CrafterClassic/initialize",
274
+ json={"initial_state": {}, "config": {"area": [32, 32], "length": 10}}
275
+ )
276
+ small_env_id = small_response.json()["env_id"]
277
+
278
+ # Time steps
279
+ small_times = []
280
+ for i in range(5):
281
+ result = await self.profile_single_step(client, small_env_id)
282
+ small_times.append(result["total_time"])
283
+
284
+ logger.info(f" 32x32 world mean step time: {statistics.mean(small_times):.3f}s")
285
+
286
+ # Normal world
287
+ logger.info("\n2. Testing with normal world size:")
288
+ normal_response = await client.post(
289
+ f"{self.service_url}/env/CrafterClassic/initialize",
290
+ json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}}
291
+ )
292
+ normal_env_id = normal_response.json()["env_id"]
293
+
294
+ normal_times = []
295
+ for i in range(5):
296
+ result = await self.profile_single_step(client, normal_env_id)
297
+ normal_times.append(result["total_time"])
298
+
299
+ logger.info(f" 64x64 world mean step time: {statistics.mean(normal_times):.3f}s")
300
+
301
+ # Cleanup
302
+ await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": small_env_id})
303
+ await client.post(f"{self.service_url}/env/CrafterClassic/terminate", json={"env_id": normal_env_id})
304
+
305
+ def print_summary(self):
306
+ """Print timing summary."""
307
+ logger.info("\n" + "="*60)
308
+ logger.info("TIMING SUMMARY")
309
+ logger.info("="*60)
310
+
311
+ for category, times in self.timings.items():
312
+ if times and category not in ["request_size_bytes", "response_size_bytes"]:
313
+ logger.info(f"\n{category}:")
314
+ logger.info(f" Samples: {len(times)}")
315
+ logger.info(f" Mean: {statistics.mean(times):.3f}s")
316
+ logger.info(f" Median: {statistics.median(times):.3f}s")
317
+ logger.info(f" Min: {min(times):.3f}s")
318
+ logger.info(f" Max: {max(times):.3f}s")
319
+
320
+ # Print size statistics
321
+ if self.timings["response_size_bytes"]:
322
+ logger.info(f"\nResponse sizes:")
323
+ logger.info(f" Mean: {statistics.mean(self.timings['response_size_bytes'])/1024:.1f} KB")
324
+ logger.info(f" Max: {max(self.timings['response_size_bytes'])/1024:.1f} KB")
325
+
326
+
327
+ async def trace_single_request():
328
+ """Trace a single request in detail to see where time goes."""
329
+ logger.info("\n" + "="*60)
330
+ logger.info("TRACING SINGLE REQUEST IN DETAIL")
331
+ logger.info("="*60)
332
+
333
+ # Use httpx events to trace request lifecycle
334
+ async def log_request_start(request):
335
+ logger.info(f" [REQUEST START] {request.method} {request.url}")
336
+
337
+ async def log_request_end(request):
338
+ logger.info(f" [REQUEST END] {request.method} {request.url}")
339
+
340
+ async def log_response_start(response):
341
+ logger.info(f" [RESPONSE START] Status: {response.status_code}")
342
+
343
+ async def log_response_end(response):
344
+ logger.info(f" [RESPONSE END] Status: {response.status_code}")
345
+
346
+ event_hooks = {
347
+ "request": [log_request_start, log_request_end],
348
+ "response": [log_response_start, log_response_end]
349
+ }
350
+
351
+ async with httpx.AsyncClient(event_hooks=event_hooks) as client:
352
+ # Initialize
353
+ logger.info("\nInitializing environment...")
354
+ init_start = time.time()
355
+
356
+ init_response = await client.post(
357
+ "http://localhost:8901/env/CrafterClassic/initialize",
358
+ json={"initial_state": {}, "config": {"area": [64, 64], "length": 100}},
359
+ timeout=30.0
360
+ )
361
+
362
+ init_time = time.time() - init_start
363
+ env_id = init_response.json()["env_id"]
364
+ logger.info(f" Initialization took: {init_time:.3f}s")
365
+
366
+ # Single step with detailed timing
367
+ logger.info("\nExecuting single step...")
368
+
369
+ payload = {
370
+ "env_id": env_id,
371
+ "action": {"tool_calls": [{"tool": "interact", "args": {"action": 0}}]}
372
+ }
373
+
374
+ # Time each phase
375
+ phases = {}
376
+
377
+ # Phase 1: Serialize
378
+ phase_start = time.time()
379
+ json_data = json.dumps(payload)
380
+ phases["serialize"] = time.time() - phase_start
381
+
382
+ # Phase 2: Send request and wait for response
383
+ phase_start = time.time()
384
+ response = await client.post(
385
+ "http://localhost:8901/env/CrafterClassic/step",
386
+ content=json_data,
387
+ headers={"Content-Type": "application/json"},
388
+ timeout=30.0
389
+ )
390
+ phases["network"] = time.time() - phase_start
391
+
392
+ # Phase 3: Read response
393
+ phase_start = time.time()
394
+ response_text = response.text
395
+ phases["read_response"] = time.time() - phase_start
396
+
397
+ # Phase 4: Parse JSON
398
+ phase_start = time.time()
399
+ response_data = json.loads(response_text)
400
+ phases["deserialize"] = time.time() - phase_start
401
+
402
+ # Print breakdown
403
+ total_time = sum(phases.values())
404
+ logger.info(f"\n Total step time: {total_time:.3f}s")
405
+ logger.info(" Breakdown:")
406
+ for phase, duration in phases.items():
407
+ percentage = (duration / total_time) * 100
408
+ logger.info(f" {phase}: {duration:.3f}s ({percentage:.1f}%)")
409
+
410
+ # Cleanup
411
+ await client.post(
412
+ "http://localhost:8901/env/CrafterClassic/terminate",
413
+ json={"env_id": env_id}
414
+ )
415
+
416
+
417
+ async def main():
418
+ """Run all profiling tests."""
419
+ profiler = EnvironmentProfiler()
420
+
421
+ # First trace a single request
422
+ await trace_single_request()
423
+
424
+ # Run profiling tests
425
+ await profiler.profile_with_connection_reuse()
426
+ await profiler.profile_payload_size_impact()
427
+ await profiler.test_concurrent_environments()
428
+ await profiler.profile_service_internals()
429
+
430
+ # Print summary
431
+ profiler.print_summary()
432
+
433
+ # Final analysis
434
+ logger.info("\n" + "="*60)
435
+ logger.info("ANALYSIS & RECOMMENDATIONS")
436
+ logger.info("="*60)
437
+
438
+ if profiler.timings["http_request_only"]:
439
+ mean_network = statistics.mean(profiler.timings["http_request_only"])
440
+ if mean_network > 1.0:
441
+ logger.info("\nāš ļø Network latency is high (>1s). Possible causes:")
442
+ logger.info(" - Service is overloaded")
443
+ logger.info(" - Python GIL blocking with concurrent requests")
444
+ logger.info(" - Inefficient service implementation")
445
+
446
+ if profiler.timings["response_size_bytes"]:
447
+ mean_size = statistics.mean(profiler.timings["response_size_bytes"])
448
+ if mean_size > 50000: # 50KB
449
+ logger.info("\nāš ļø Large response payloads. Consider:")
450
+ logger.info(" - Compressing responses")
451
+ logger.info(" - Removing unnecessary fields from observations")
452
+ logger.info(" - Using binary protocols instead of JSON")
453
+
454
+
455
+ if __name__ == "__main__":
456
+ asyncio.run(main())
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to replicate the issue with Qwen 7B model inference
4
+ """
5
+
6
+ import httpx
7
+ import asyncio
8
+ import json
9
+ from datetime import datetime
10
+
11
+ # Configuration
12
+ BASE_URL = "http://localhost:8000/api/v1/learning"
13
+ API_KEY = "test-api-key" # Replace with your actual API key
14
+ MODEL = "Qwen/Qwen2.5-7B-Instruct"
15
+
16
+ async def test_base_model_inference():
17
+ """Test base model inference through the local learning service."""
18
+
19
+ print(f"šŸ” Testing Base Model Inference")
20
+ print(f"Time: {datetime.now()}")
21
+ print(f"Endpoint: {BASE_URL}/v1/chat/completions")
22
+ print(f"Model: {MODEL}")
23
+ print("=" * 60)
24
+
25
+ async with httpx.AsyncClient(timeout=30.0) as client:
26
+ headers = {
27
+ "Authorization": f"Bearer {API_KEY}",
28
+ "Content-Type": "application/json"
29
+ }
30
+
31
+ payload = {
32
+ "model": MODEL,
33
+ "messages": [
34
+ {"role": "user", "content": "Hello, can you help me play Crafter?"}
35
+ ],
36
+ "temperature": 0.7,
37
+ "max_tokens": 100
38
+ }
39
+
40
+ print("\nšŸ“¤ Request:")
41
+ print(f"Headers: {json.dumps(headers, indent=2)}")
42
+ print(f"Payload: {json.dumps(payload, indent=2)}")
43
+
44
+ try:
45
+ print("\nā³ Sending request...")
46
+ response = await client.post(
47
+ f"{BASE_URL}/v1/chat/completions",
48
+ headers=headers,
49
+ json=payload
50
+ )
51
+
52
+ print(f"\nšŸ“„ Response Status: {response.status_code}")
53
+ print(f"Response Headers: {dict(response.headers)}")
54
+
55
+ if response.status_code == 200:
56
+ data = response.json()
57
+ print(f"\nāœ… Success!")
58
+ print(f"Response: {json.dumps(data, indent=2)}")
59
+
60
+ # Extract the assistant's message
61
+ if "choices" in data and data["choices"]:
62
+ content = data["choices"][0]["message"]["content"]
63
+ print(f"\nšŸ¤– Assistant says: {content}")
64
+
65
+ # Check if it's the "not implemented" message
66
+ if "Base model inference not implemented" in content:
67
+ print("\nāš ļø WARNING: The service returned a 'not implemented' message!")
68
+ print("This means the service is responding but base models aren't supported yet.")
69
+ else:
70
+ print(f"\nāŒ Error Response:")
71
+ print(f"Body: {response.text}")
72
+
73
+ except httpx.ConnectError as e:
74
+ print(f"\nāŒ Connection Error: Could not connect to {BASE_URL}")
75
+ print(f"Error: {e}")
76
+ print("\nPossible issues:")
77
+ print("1. The backend service is not running on port 8000")
78
+ print("2. The learning service is not properly configured")
79
+ print("3. The proxy route /api/v1/learning is not set up")
80
+
81
+ except httpx.TimeoutException as e:
82
+ print(f"\nāŒ Timeout Error: Request timed out after 30 seconds")
83
+ print(f"Error: {e}")
84
+
85
+ except Exception as e:
86
+ print(f"\nāŒ Unexpected Error: {type(e).__name__}")
87
+ print(f"Error: {e}")
88
+
89
+
90
+ async def test_crafter_integration():
91
+ """Test the full Crafter + LLM integration."""
92
+
93
+ print("\n\nšŸŽ® Testing Crafter Integration")
94
+ print("=" * 60)
95
+
96
+ # First check if Crafter service is running
97
+ crafter_url = "http://localhost:8901"
98
+
99
+ async with httpx.AsyncClient(timeout=5.0) as client:
100
+ try:
101
+ print(f"\n1ļøāƒ£ Checking Crafter service at {crafter_url}...")
102
+ response = await client.get(f"{crafter_url}/health")
103
+ if response.status_code == 200:
104
+ print("āœ… Crafter service is running!")
105
+ else:
106
+ print(f"āš ļø Crafter service returned status {response.status_code}")
107
+ except Exception as e:
108
+ print(f"āŒ Crafter service is not running on port 8901")
109
+ print(f" Error: {e}")
110
+ print("\n To start Crafter service:")
111
+ print(" uv run python -m uvicorn synth_ai.environments.service.app:app --host 0.0.0.0 --port 8901")
112
+
113
+ # Test creating an environment
114
+ if True: # You can set to False to skip environment creation
115
+ print(f"\n2ļøāƒ£ Testing environment creation...")
116
+ async with httpx.AsyncClient(timeout=10.0) as client:
117
+ try:
118
+ response = await client.post(
119
+ f"{crafter_url}/create_env",
120
+ json={
121
+ "instance_id": "test_instance",
122
+ "render_mode": "rgb_array",
123
+ "difficulty": "easy",
124
+ "seed": 42
125
+ }
126
+ )
127
+ if response.status_code == 200:
128
+ print("āœ… Environment created successfully!")
129
+ else:
130
+ print(f"āŒ Failed to create environment: {response.text}")
131
+ except Exception as e:
132
+ print(f"āŒ Error creating environment: {e}")
133
+
134
+
135
+ def main():
136
+ """Run all tests."""
137
+ print("šŸš€ Replicating Qwen 7B Model Inference Issue")
138
+ print("=" * 60)
139
+
140
+ # Run async tests
141
+ loop = asyncio.new_event_loop()
142
+ asyncio.set_event_loop(loop)
143
+
144
+ try:
145
+ # Test base model inference
146
+ loop.run_until_complete(test_base_model_inference())
147
+
148
+ # Test Crafter integration
149
+ loop.run_until_complete(test_crafter_integration())
150
+
151
+ finally:
152
+ loop.close()
153
+
154
+ print("\n\nšŸ“‹ Summary:")
155
+ print("=" * 60)
156
+ print("This script tested:")
157
+ print("1. Base model inference through the learning service proxy")
158
+ print("2. Crafter environment service availability")
159
+ print("\nTo fix the issues:")
160
+ print("1. Ensure the backend is running: cd backend && uvicorn app.main:app --reload")
161
+ print("2. Ensure the learning service is configured and running")
162
+ print("3. Start Crafter service: uv run python -m uvicorn synth_ai.environments.service.app:app --host 0.0.0.0 --port 8901")
163
+
164
+
165
+ if __name__ == "__main__":
166
+ main()