synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,204 @@
1
+ """Banking77 baseline file for intent classification evaluation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict
6
+
7
+ from datasets import load_dataset
8
+
9
+ from synth_ai.baseline import BaselineConfig, BaselineTaskRunner, DataSplit, TaskResult
10
+ from synth_ai.inference import InferenceClient
11
+ import os
12
+ import httpx
13
+
14
+
15
+ # Load dataset once at module level
16
+ _dataset = None
17
+ _label_names = None
18
+
19
+
20
+ def _load_dataset():
21
+ """Load Banking77 dataset."""
22
+ global _dataset, _label_names
23
+ if _dataset is None:
24
+ try:
25
+ _dataset = load_dataset("PolyAI/banking77")
26
+ except Exception:
27
+ # Fallback: try without org prefix
28
+ _dataset = load_dataset("banking77")
29
+ _label_names = _dataset["train"].features["label"].names
30
+ return _dataset, _label_names
31
+
32
+
33
+ class Banking77TaskRunner(BaselineTaskRunner):
34
+ """Task runner for Banking77 intent classification."""
35
+
36
+ def __init__(self, policy_config: Dict[str, Any], env_config: Dict[str, Any]):
37
+ super().__init__(policy_config, env_config)
38
+
39
+ # Load dataset
40
+ self.dataset, self.label_names = _load_dataset()
41
+
42
+ # Store config for inference
43
+ self.model = policy_config["model"]
44
+ self.temperature = policy_config.get("temperature", 0.0)
45
+ self.max_tokens = policy_config.get("max_tokens", 128)
46
+ self.inference_url = policy_config.get("inference_url")
47
+
48
+ # Tool definition
49
+ self.tool = {
50
+ "type": "function",
51
+ "function": {
52
+ "name": "banking77_classify",
53
+ "description": "Classify a banking query into an intent",
54
+ "parameters": {
55
+ "type": "object",
56
+ "properties": {
57
+ "label": {
58
+ "type": "string",
59
+ "enum": self.label_names,
60
+ "description": "The intent label",
61
+ }
62
+ },
63
+ "required": ["label"],
64
+ },
65
+ },
66
+ }
67
+
68
+ async def run_task(self, seed: int) -> TaskResult:
69
+ """Run a single Banking77 classification task."""
70
+
71
+ # Get split
72
+ split = self.env_config.get("split", "train")
73
+
74
+ # Get example from dataset
75
+ example = self.dataset[split][seed]
76
+
77
+ # Build prompt
78
+ system_prompt = f"""You are an expert banking assistant that classifies customer queries.
79
+ Given a customer message, respond with exactly one intent label using the tool call.
80
+
81
+ Valid intents: {', '.join(self.label_names)}"""
82
+
83
+ user_prompt = f"Customer Query: {example['text']}\n\nClassify this query."
84
+
85
+ # Run inference
86
+ messages = [
87
+ {"role": "system", "content": system_prompt},
88
+ {"role": "user", "content": user_prompt},
89
+ ]
90
+
91
+ # Use InferenceClient if URL provided, otherwise use OpenAI-compatible API
92
+ if self.inference_url and self.inference_url.startswith("http"):
93
+ api_key = os.getenv("SYNTH_API_KEY") or os.getenv("OPENAI_API_KEY") or ""
94
+ base_url = self.inference_url.rstrip("/")
95
+ if not base_url.endswith("/api"):
96
+ base_url = f"{base_url}/api" if "/api" not in base_url else base_url
97
+ client = InferenceClient(base_url=base_url, api_key=api_key)
98
+ response = await client.create_chat_completion(
99
+ model=self.model,
100
+ messages=messages,
101
+ tools=[self.tool],
102
+ tool_choice={"type": "function", "function": {"name": "banking77_classify"}},
103
+ temperature=self.temperature,
104
+ max_tokens=self.max_tokens,
105
+ )
106
+ else:
107
+ # Use OpenAI/Groq directly
108
+ # Check if model starts with groq: prefix
109
+ model_name = self.model
110
+ use_groq = model_name.startswith("groq:")
111
+ if use_groq:
112
+ model_name = model_name[5:] # Remove "groq:" prefix
113
+
114
+ api_key = os.getenv("GROQ_API_KEY") if use_groq else os.getenv("OPENAI_API_KEY") or ""
115
+ base_url = "https://api.groq.com/openai/v1" if use_groq else "https://api.openai.com/v1"
116
+ async with httpx.AsyncClient() as http_client:
117
+ resp = await http_client.post(
118
+ f"{base_url}/chat/completions",
119
+ json={
120
+ "model": model_name,
121
+ "messages": messages,
122
+ "tools": [self.tool],
123
+ "tool_choice": {"type": "function", "function": {"name": "banking77_classify"}},
124
+ "temperature": self.temperature,
125
+ "max_tokens": self.max_tokens,
126
+ },
127
+ headers={"Authorization": f"Bearer {api_key}"} if api_key else {},
128
+ )
129
+ response = resp.json()
130
+
131
+ # Extract prediction
132
+ predicted_label = ""
133
+ tool_calls = []
134
+ if "choices" in response and len(response["choices"]) > 0:
135
+ message = response["choices"][0].get("message", {})
136
+ tool_calls = message.get("tool_calls", [])
137
+ elif "tool_calls" in response:
138
+ tool_calls = response["tool_calls"]
139
+
140
+ if tool_calls:
141
+ # Handle both string and dict arguments
142
+ args = tool_calls[0]["function"].get("arguments", "")
143
+ if isinstance(args, str):
144
+ import json
145
+ args = json.loads(args)
146
+ predicted_label = args.get("label", "") if isinstance(args, dict) else ""
147
+
148
+ # Evaluate
149
+ expected_label = self.label_names[example["label"]]
150
+ correct = predicted_label == expected_label
151
+
152
+ return TaskResult(
153
+ seed=seed,
154
+ success=True,
155
+ outcome_reward=1.0 if correct else 0.0,
156
+ total_steps=1,
157
+ metadata={
158
+ "query": example["text"],
159
+ "expected": expected_label,
160
+ "predicted": predicted_label,
161
+ "correct": correct,
162
+ "split": split,
163
+ },
164
+ )
165
+
166
+
167
+ # Define baseline config
168
+ # Note: We need to load the dataset first to get the label names
169
+ _load_dataset()
170
+ gepa_baseline = BaselineConfig(
171
+ baseline_id="gepa",
172
+ name="GEPA - Banking77 Intent Classification",
173
+ description="Banking77 intent classification baseline for prompt optimization experiments",
174
+ task_runner=Banking77TaskRunner,
175
+ splits={
176
+ "train": DataSplit(
177
+ name="train",
178
+ seeds=list(range(min(10000, len(_dataset["train"]))) if _dataset else range(10000)),
179
+ ),
180
+ "val": DataSplit(
181
+ name="val",
182
+ seeds=list(range(min(1000, len(_dataset["test"]))) if _dataset else range(1000)),
183
+ ),
184
+ "test": DataSplit(
185
+ name="test",
186
+ seeds=list(range(min(3000, len(_dataset["test"]))) if _dataset else range(3000)),
187
+ ),
188
+ },
189
+ default_policy_config={
190
+ "model": "groq:llama-3.1-70b-versatile",
191
+ "temperature": 0.0,
192
+ "max_tokens": 128,
193
+ },
194
+ default_env_config={
195
+ "split": "train",
196
+ },
197
+ metadata={
198
+ "dataset": "PolyAI/banking77",
199
+ "num_classes": 77,
200
+ "task_type": "classification",
201
+ },
202
+ tags=["classification", "nlp", "intent", "blog-post"],
203
+ )
204
+
@@ -0,0 +1,97 @@
1
+ """
2
+ Example script showing how to query prompt learning job results.
3
+
4
+ Usage:
5
+ python query_prompts_example.py pl_9c58b711c2644083
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ from pprint import pprint
11
+
12
+ from synth_ai.learning import get_prompts, get_prompt_text, get_scoring_summary
13
+
14
+
15
+ def main():
16
+ if len(sys.argv) < 2:
17
+ print("Usage: python query_prompts_example.py <job_id>")
18
+ print("Example: python query_prompts_example.py pl_9c58b711c2644083")
19
+ sys.exit(1)
20
+
21
+ job_id = sys.argv[1]
22
+
23
+ # Get credentials from environment
24
+ base_url = os.getenv("BACKEND_BASE_URL", "http://localhost:8000")
25
+ api_key = os.getenv("SYNTH_API_KEY")
26
+
27
+ if not api_key:
28
+ print("Error: SYNTH_API_KEY environment variable not set")
29
+ sys.exit(1)
30
+
31
+ print(f"Querying job: {job_id}")
32
+ print(f"Backend: {base_url}")
33
+ print("=" * 80)
34
+
35
+ # Get all prompts and metadata
36
+ print("\n📊 Fetching prompt results...")
37
+ results = get_prompts(job_id, base_url, api_key)
38
+
39
+ # Print best score
40
+ if results.best_score is not None:
41
+ print(f"\n🏆 Best Score: {results.best_score:.3f} ({results.best_score * 100:.1f}%)")
42
+
43
+ # Print top-K prompts with scores
44
+ top_prompts = results.top_prompts
45
+ if top_prompts:
46
+ print(f"\n📝 Top {len(top_prompts)} Prompts:")
47
+ print("=" * 80)
48
+ for prompt_info in sorted(top_prompts, key=lambda p: p.get("rank", 999)):
49
+ rank = prompt_info["rank"]
50
+ train_accuracy = prompt_info.get("train_accuracy")
51
+ val_accuracy = prompt_info.get("val_accuracy")
52
+
53
+ print(f"\nRank #{rank}:")
54
+ if train_accuracy is not None:
55
+ print(f" Train Accuracy: {train_accuracy:.3f} ({train_accuracy * 100:.1f}%)")
56
+ if val_accuracy is not None:
57
+ print(f" Val Accuracy: {val_accuracy:.3f} ({val_accuracy * 100:.1f}%)")
58
+ print(f" Prompt Text:")
59
+ print(" " + "-" * 76)
60
+ full_text = prompt_info.get("full_text", "")
61
+ for line in full_text.split("\n"):
62
+ print(f" {line}")
63
+ print(" " + "-" * 76)
64
+
65
+ # Get scoring summary
66
+ print("\n📈 Scoring Summary:")
67
+ print("=" * 80)
68
+ summary = get_scoring_summary(job_id, base_url, api_key)
69
+
70
+ print(f"Best Train Accuracy: {summary['best_train_accuracy']:.3f} ({summary['best_train_accuracy'] * 100:.1f}%)")
71
+ if summary['best_val_accuracy']:
72
+ print(f"Best Val Accuracy: {summary['best_val_accuracy']:.3f} ({summary['best_val_accuracy'] * 100:.1f}%)")
73
+ print(f"Mean Train Accuracy: {summary['mean_train_accuracy']:.3f} ({summary['mean_train_accuracy'] * 100:.1f}%)")
74
+ print(f"Candidates Tried: {summary['num_candidates_tried']}")
75
+ print(f"Frontier Candidates: {summary['num_frontier_candidates']}")
76
+
77
+ print(f"\nScore Distribution:")
78
+ for bin_range, count in summary['score_distribution'].items():
79
+ bar = "█" * count
80
+ print(f" {bin_range}: {count:3d} {bar}")
81
+
82
+ # Quick access to best prompt text only
83
+ print("\n💡 Quick access to best prompt:")
84
+ print("=" * 80)
85
+ best_text = get_prompt_text(job_id, base_url, api_key, rank=1)
86
+ if best_text:
87
+ print(best_text)
88
+ else:
89
+ print("Best prompt text not available yet (job may still be running)")
90
+
91
+ print("\n" + "=" * 80)
92
+ print("✅ Query complete!")
93
+
94
+
95
+ if __name__ == "__main__":
96
+ main()
97
+
@@ -0,0 +1,87 @@
1
+ #!/bin/bash
2
+ # Run GEPA optimization for Banking77 against the backend
3
+
4
+ set -e
5
+
6
+ echo "🧬 Running GEPA on Banking77"
7
+ echo "============================="
8
+
9
+ # Check for required environment variables
10
+ if [ -z "$SYNTH_API_KEY" ]; then
11
+ echo "❌ Error: SYNTH_API_KEY not set"
12
+ echo "Please get your API key from the backend and set it:"
13
+ echo " export SYNTH_API_KEY=your_key"
14
+ exit 1
15
+ fi
16
+
17
+ if [ -z "$ENVIRONMENT_API_KEY" ]; then
18
+ echo "❌ Error: ENVIRONMENT_API_KEY not set"
19
+ echo "Please set the same key used when deploying the task app:"
20
+ echo " export ENVIRONMENT_API_KEY=your_key"
21
+ exit 1
22
+ fi
23
+
24
+ if [ -z "$GROQ_API_KEY" ]; then
25
+ echo "❌ Error: GROQ_API_KEY not set"
26
+ echo "Please set your Groq API key:"
27
+ echo " export GROQ_API_KEY=your_key"
28
+ exit 1
29
+ fi
30
+
31
+ # Default to localhost backend if not specified
32
+ BACKEND_URL="${BACKEND_BASE_URL:-http://localhost:8000}"
33
+
34
+ echo "✅ SYNTH_API_KEY: ${SYNTH_API_KEY:0:20}..."
35
+ echo "✅ ENVIRONMENT_API_KEY: ${ENVIRONMENT_API_KEY:0:20}..."
36
+ echo "✅ GROQ_API_KEY: ${GROQ_API_KEY:0:20}..."
37
+ echo "✅ Backend URL: $BACKEND_URL"
38
+ echo ""
39
+
40
+ # Navigate to repo root
41
+ cd "$(dirname "$0")/../../.."
42
+
43
+ # Check if task app is running
44
+ echo "🔍 Checking if Banking77 task app is running on http://127.0.0.1:8102..."
45
+ if ! curl -s -f -H "X-API-Key: $ENVIRONMENT_API_KEY" http://127.0.0.1:8102/health > /dev/null 2>&1; then
46
+ echo "❌ Error: Banking77 task app is not running on http://127.0.0.1:8102"
47
+ echo ""
48
+ echo "Please start it first:"
49
+ echo " ./examples/blog_posts/gepa/deploy_banking77_task_app.sh"
50
+ echo ""
51
+ echo "Or in another terminal:"
52
+ echo " cd $(pwd)"
53
+ echo " uvx synth-ai deploy banking77 --runtime uvicorn --port 8102"
54
+ exit 1
55
+ fi
56
+ echo "✅ Task app is healthy"
57
+ echo ""
58
+
59
+ # Check backend connection
60
+ echo "🔍 Checking backend connection to $BACKEND_URL..."
61
+ if ! curl -s -f "$BACKEND_URL/api/health" > /dev/null 2>&1; then
62
+ echo "⚠️ Warning: Cannot connect to backend at $BACKEND_URL"
63
+ echo "Make sure the backend is running."
64
+ read -p "Continue anyway? (y/N) " -n 1 -r
65
+ echo
66
+ if [[ ! $REPLY =~ ^[Yy]$ ]]; then
67
+ exit 1
68
+ fi
69
+ else
70
+ echo "✅ Backend is healthy"
71
+ fi
72
+ echo ""
73
+
74
+ echo "🚀 Starting GEPA training..."
75
+ echo "Config: examples/blog_posts/gepa/configs/banking77_gepa_local.toml"
76
+ echo ""
77
+
78
+ # Run the training
79
+ uvx synth-ai train \
80
+ --type prompt_learning \
81
+ --config examples/blog_posts/gepa/configs/banking77_gepa_local.toml \
82
+ --backend "$BACKEND_URL" \
83
+ --poll
84
+
85
+ echo ""
86
+ echo "✅ GEPA training complete!"
87
+
@@ -0,0 +1,105 @@
1
+ """Metadata for GEPA blog task app coverage.
2
+
3
+ This module centralises the set of task apps that the GEPA blog post
4
+ references so that configuration files and documentation can import the
5
+ same canonical definitions. Each entry mirrors a task app that is
6
+ available via Synth's prompt-learning backend, making it easier to keep
7
+ configs, docs, and evaluation notebooks in sync.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ from typing import Iterable, Sequence
14
+
15
+
16
+ @dataclass(frozen=True, slots=True)
17
+ class TaskAppSupport:
18
+ """Describes a task app that the GEPA blog supports."""
19
+
20
+ app_id: str
21
+ display_name: str
22
+ dataset_id: str
23
+ description: str
24
+ default_port: int
25
+ tags: Sequence[str]
26
+ metrics: Sequence[str]
27
+ sources: Sequence[str]
28
+
29
+
30
+ SUPPORTED_TASK_APPS: tuple[TaskAppSupport, ...] = (
31
+ TaskAppSupport(
32
+ app_id="banking77",
33
+ display_name="Banking77 Intent Classification",
34
+ dataset_id="PolyAI/banking77",
35
+ description="Classify banking customer support queries into 77 intents.",
36
+ default_port=8102,
37
+ tags=("classification", "intent", "nlp"),
38
+ metrics=("accuracy",),
39
+ sources=(
40
+ "GEPA blog quickstart",
41
+ "PolyAI Banking77 dataset card",
42
+ ),
43
+ ),
44
+ TaskAppSupport(
45
+ app_id="hotpotqa",
46
+ display_name="HotpotQA Multi-Hop QA",
47
+ dataset_id="hotpot_qa",
48
+ description="Answer multi-hop questions with supporting facts sourced from Wikipedia passages.",
49
+ default_port=8110,
50
+ tags=("qa", "multi-hop", "reasoning"),
51
+ metrics=("answer_em", "supporting_fact_f1"),
52
+ sources=(
53
+ "GEPA Table 1",
54
+ "HotpotQA (Yang et al., 2018)",
55
+ ),
56
+ ),
57
+ TaskAppSupport(
58
+ app_id="ifbench",
59
+ display_name="IFBench Instruction Following",
60
+ dataset_id="Muennighoff/IFBench",
61
+ description="Follow natural language instructions focusing on faithful adherence.",
62
+ default_port=8111,
63
+ tags=("instruction-following", "nlp"),
64
+ metrics=("compliance", "accuracy"),
65
+ sources=(
66
+ "GEPA Table 1",
67
+ "IFBench benchmark release",
68
+ ),
69
+ ),
70
+ TaskAppSupport(
71
+ app_id="hover",
72
+ display_name="HoVer Claim Verification",
73
+ dataset_id="hover",
74
+ description="Determine whether Wikipedia claims are supported, refuted, or not enough info given retrieved evidence.",
75
+ default_port=8112,
76
+ tags=("fact-checking", "classification"),
77
+ metrics=("label_accuracy", "evidence_f1"),
78
+ sources=(
79
+ "GEPA Table 1",
80
+ "HoVer benchmark (Jiang et al., 2020)",
81
+ ),
82
+ ),
83
+ TaskAppSupport(
84
+ app_id="pupa",
85
+ display_name="PUPA Privacy-Aware Delegation",
86
+ dataset_id="microsoft/PUPA",
87
+ description="Delegate actions while respecting privacy policies and extracting structured responses.",
88
+ default_port=8113,
89
+ tags=("delegation", "privacy", "structured-output"),
90
+ metrics=("privacy_compliance", "task_success"),
91
+ sources=(
92
+ "GEPA Table 1",
93
+ "PUPA benchmark release",
94
+ ),
95
+ ),
96
+ )
97
+
98
+
99
+ def list_supported_task_apps() -> Iterable[TaskAppSupport]:
100
+ """Return iterable over supported task apps for convenience."""
101
+
102
+ return SUPPORTED_TASK_APPS
103
+
104
+
105
+ __all__ = ["TaskAppSupport", "SUPPORTED_TASK_APPS", "list_supported_task_apps"]
@@ -0,0 +1,67 @@
1
+ #!/bin/bash
2
+ # Quick test script for GEPA Banking77 prompt learning
3
+ # Tests against local backend on port 8000
4
+
5
+ set -e
6
+
7
+ echo "🚀 Testing GEPA Prompt Learning for Banking77"
8
+ echo "=============================================="
9
+
10
+ # Check required environment variables
11
+ if [ -z "$SYNTH_API_KEY" ]; then
12
+ echo "❌ ERROR: SYNTH_API_KEY not set"
13
+ exit 1
14
+ fi
15
+
16
+ if [ -z "$ENVIRONMENT_API_KEY" ]; then
17
+ echo "❌ ERROR: ENVIRONMENT_API_KEY not set"
18
+ exit 1
19
+ fi
20
+
21
+ # Set backend URL (default to localhost:8000)
22
+ BACKEND_URL="${BACKEND_BASE_URL:-http://localhost:8000}"
23
+ echo "📍 Backend URL: $BACKEND_URL"
24
+
25
+ # Check backend is accessible
26
+ echo "🔍 Checking backend health..."
27
+ if curl -s -f "$BACKEND_URL/api/health" > /dev/null 2>&1; then
28
+ echo "✅ Backend is accessible"
29
+ else
30
+ echo "❌ ERROR: Backend not accessible at $BACKEND_URL"
31
+ echo " Make sure backend is running on port 8000"
32
+ exit 1
33
+ fi
34
+
35
+ # Check task app is accessible
36
+ TASK_APP_URL="${TASK_APP_URL:-http://127.0.0.1:8102}"
37
+ echo "🔍 Checking task app health..."
38
+ if curl -s -f -H "X-API-Key: $ENVIRONMENT_API_KEY" "$TASK_APP_URL/health" > /dev/null 2>&1; then
39
+ echo "✅ Task app is accessible"
40
+ else
41
+ echo "⚠️ WARNING: Task app not accessible at $TASK_APP_URL"
42
+ echo " You may need to deploy it first:"
43
+ echo " uvx synth-ai deploy banking77 --runtime uvicorn --port 8102"
44
+ fi
45
+
46
+ # Run GEPA training
47
+ echo ""
48
+ echo "🎯 Starting GEPA prompt optimization..."
49
+ echo ""
50
+
51
+ CONFIG_FILE="examples/blog_posts/gepa/configs/banking77_gepa_local.toml"
52
+
53
+ if [ ! -f "$CONFIG_FILE" ]; then
54
+ echo "❌ ERROR: Config file not found: $CONFIG_FILE"
55
+ exit 1
56
+ fi
57
+
58
+ uvx synth-ai train \
59
+ --type prompt_learning \
60
+ --config "$CONFIG_FILE" \
61
+ --backend "$BACKEND_URL" \
62
+ --poll \
63
+ --poll-timeout 3600
64
+
65
+ echo ""
66
+ echo "✅ GEPA training completed!"
67
+
@@ -0,0 +1,123 @@
1
+ #!/bin/bash
2
+ # Verify Banking77 setup is working
3
+
4
+ set -e
5
+
6
+ echo "🔍 Verifying Banking77 Setup"
7
+ echo "============================="
8
+ echo ""
9
+
10
+ cd "$(dirname "$0")/../../.."
11
+
12
+ echo "1️⃣ Checking Python import..."
13
+ python3 -c "
14
+ try:
15
+ from examples.task_apps.banking77.banking77_task_app import build_config
16
+ print(' ✅ Task app imports successfully')
17
+ config = build_config()
18
+ print(f' ✅ Config built: app_id={config.app_id}')
19
+ print(f' ✅ Task name: {config.name}')
20
+ except ImportError as e:
21
+ print(f' ❌ Import error: {e}')
22
+ print(' 💡 Run: uv pip install -e .')
23
+ exit(1)
24
+ except Exception as e:
25
+ print(f' ❌ Error: {e}')
26
+ exit(1)
27
+ "
28
+
29
+ echo ""
30
+ echo "2️⃣ Checking CLI registration..."
31
+ if uvx synth-ai task-app list 2>/dev/null | grep -q "banking77"; then
32
+ echo " ✅ Banking77 registered with CLI"
33
+ else
34
+ echo " ⚠️ Banking77 not found in task-app list"
35
+ echo " 💡 This is OK if you haven't run 'uv pip install -e .' yet"
36
+ fi
37
+
38
+ echo ""
39
+ echo "3️⃣ Checking helper scripts..."
40
+ if [ -x "./examples/blog_posts/gepa/deploy_banking77_task_app.sh" ]; then
41
+ echo " ✅ deploy_banking77_task_app.sh is executable"
42
+ else
43
+ echo " ❌ deploy_banking77_task_app.sh is not executable"
44
+ echo " 💡 Run: chmod +x ./examples/blog_posts/gepa/deploy_banking77_task_app.sh"
45
+ fi
46
+
47
+ if [ -x "./examples/blog_posts/gepa/run_gepa_banking77.sh" ]; then
48
+ echo " ✅ run_gepa_banking77.sh is executable"
49
+ else
50
+ echo " ❌ run_gepa_banking77.sh is not executable"
51
+ echo " 💡 Run: chmod +x ./examples/blog_posts/gepa/run_gepa_banking77.sh"
52
+ fi
53
+
54
+ echo ""
55
+ echo "4️⃣ Checking configuration files..."
56
+ if [ -f "./examples/blog_posts/gepa/configs/banking77_gepa_local.toml" ]; then
57
+ echo " ✅ banking77_gepa_local.toml exists"
58
+ else
59
+ echo " ❌ banking77_gepa_local.toml not found"
60
+ fi
61
+
62
+ echo ""
63
+ echo "5️⃣ Checking environment variables..."
64
+ if [ -n "$GROQ_API_KEY" ]; then
65
+ echo " ✅ GROQ_API_KEY is set (${GROQ_API_KEY:0:10}...)"
66
+ else
67
+ echo " ⚠️ GROQ_API_KEY not set"
68
+ echo " 💡 Run: export GROQ_API_KEY='gsk_...'"
69
+ fi
70
+
71
+ if [ -n "$ENVIRONMENT_API_KEY" ]; then
72
+ echo " ✅ ENVIRONMENT_API_KEY is set (${ENVIRONMENT_API_KEY:0:10}...)"
73
+ else
74
+ echo " ⚠️ ENVIRONMENT_API_KEY not set"
75
+ echo " 💡 Run: export ENVIRONMENT_API_KEY=\$(python3 -c 'import secrets; print(secrets.token_urlsafe(32))')"
76
+ fi
77
+
78
+ if [ -n "$SYNTH_API_KEY" ]; then
79
+ echo " ✅ SYNTH_API_KEY is set (${SYNTH_API_KEY:0:10}...)"
80
+ else
81
+ echo " ⚠️ SYNTH_API_KEY not set"
82
+ echo " 💡 Get from backend admin or .env.dev file"
83
+ fi
84
+
85
+ echo ""
86
+ echo "6️⃣ Checking services..."
87
+ if curl -s -f http://localhost:8000/api/health > /dev/null 2>&1; then
88
+ echo " ✅ Backend is running on http://localhost:8000"
89
+ else
90
+ echo " ⚠️ Backend not reachable at http://localhost:8000"
91
+ echo " 💡 Start the backend before running GEPA"
92
+ fi
93
+
94
+ if curl -s -f http://127.0.0.1:8102/health > /dev/null 2>&1; then
95
+ echo " ✅ Task app is running on http://127.0.0.1:8102"
96
+ else
97
+ echo " ⚠️ Task app not running on http://127.0.0.1:8102"
98
+ echo " 💡 Run: ./examples/blog_posts/gepa/deploy_banking77_task_app.sh"
99
+ fi
100
+
101
+ echo ""
102
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
103
+ echo "Summary"
104
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
105
+ echo ""
106
+ echo "To run Banking77 GEPA:"
107
+ echo ""
108
+ echo " 1. Install dependencies:"
109
+ echo " uv pip install -e ."
110
+ echo ""
111
+ echo " 2. Set environment variables:"
112
+ echo " export GROQ_API_KEY='gsk_...'"
113
+ echo " export SYNTH_API_KEY='your-backend-key'"
114
+ echo " export ENVIRONMENT_API_KEY=\$(python3 -c 'import secrets; print(secrets.token_urlsafe(32))')"
115
+ echo ""
116
+ echo " 3. Start task app (Terminal 1):"
117
+ echo " ./examples/blog_posts/gepa/deploy_banking77_task_app.sh"
118
+ echo ""
119
+ echo " 4. Run GEPA (Terminal 2):"
120
+ echo " ./examples/blog_posts/gepa/run_gepa_banking77.sh"
121
+ echo ""
122
+ echo "✅ Setup verification complete!"
123
+