synth-ai 0.1.0.dev4__py3-none-any.whl → 0.1.0.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (46) hide show
  1. public_tests/synth_sdk.py +389 -0
  2. public_tests/test_agent.py +538 -0
  3. public_tests/test_recursive_structured_outputs.py +180 -0
  4. public_tests/test_structured_outputs.py +100 -0
  5. synth_ai/zyk/lms/__init__.py +0 -0
  6. synth_ai/zyk/lms/caching/__init__.py +0 -0
  7. synth_ai/zyk/lms/caching/constants.py +1 -0
  8. synth_ai/zyk/lms/caching/dbs.py +0 -0
  9. synth_ai/zyk/lms/caching/ephemeral.py +50 -0
  10. synth_ai/zyk/lms/caching/handler.py +92 -0
  11. synth_ai/zyk/lms/caching/initialize.py +13 -0
  12. synth_ai/zyk/lms/caching/persistent.py +55 -0
  13. synth_ai/zyk/lms/config.py +8 -0
  14. synth_ai/zyk/lms/core/__init__.py +0 -0
  15. synth_ai/zyk/lms/core/all.py +35 -0
  16. synth_ai/zyk/lms/core/exceptions.py +9 -0
  17. synth_ai/zyk/lms/core/main.py +245 -0
  18. synth_ai/zyk/lms/core/vendor_clients.py +60 -0
  19. synth_ai/zyk/lms/cost/__init__.py +0 -0
  20. synth_ai/zyk/lms/cost/monitor.py +1 -0
  21. synth_ai/zyk/lms/cost/statefulness.py +1 -0
  22. synth_ai/zyk/lms/structured_outputs/__init__.py +0 -0
  23. synth_ai/zyk/lms/structured_outputs/handler.py +388 -0
  24. synth_ai/zyk/lms/structured_outputs/inject.py +185 -0
  25. synth_ai/zyk/lms/structured_outputs/rehabilitate.py +186 -0
  26. synth_ai/zyk/lms/vendors/__init__.py +0 -0
  27. synth_ai/zyk/lms/vendors/base.py +15 -0
  28. synth_ai/zyk/lms/vendors/constants.py +5 -0
  29. synth_ai/zyk/lms/vendors/core/__init__.py +0 -0
  30. synth_ai/zyk/lms/vendors/core/anthropic_api.py +191 -0
  31. synth_ai/zyk/lms/vendors/core/gemini_api.py +146 -0
  32. synth_ai/zyk/lms/vendors/core/openai_api.py +145 -0
  33. synth_ai/zyk/lms/vendors/local/__init__.py +0 -0
  34. synth_ai/zyk/lms/vendors/local/ollama.py +0 -0
  35. synth_ai/zyk/lms/vendors/openai_standard.py +141 -0
  36. synth_ai/zyk/lms/vendors/retries.py +3 -0
  37. synth_ai/zyk/lms/vendors/supported/__init__.py +0 -0
  38. synth_ai/zyk/lms/vendors/supported/deepseek.py +18 -0
  39. synth_ai/zyk/lms/vendors/supported/together.py +11 -0
  40. {synth_ai-0.1.0.dev4.dist-info → synth_ai-0.1.0.dev6.dist-info}/METADATA +1 -1
  41. synth_ai-0.1.0.dev6.dist-info/RECORD +46 -0
  42. synth_ai-0.1.0.dev6.dist-info/top_level.txt +2 -0
  43. synth_ai-0.1.0.dev4.dist-info/RECORD +0 -7
  44. synth_ai-0.1.0.dev4.dist-info/top_level.txt +0 -1
  45. {synth_ai-0.1.0.dev4.dist-info → synth_ai-0.1.0.dev6.dist-info}/LICENSE +0 -0
  46. {synth_ai-0.1.0.dev4.dist-info → synth_ai-0.1.0.dev6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,389 @@
1
+ # from dev.testing.hendryks import HendryksMathBenchmark, TrivialHendryksMathAgent
2
+ import asyncio
3
+ import json
4
+ import logging
5
+ import os
6
+ import re
7
+ import sys
8
+ import time
9
+ import uuid
10
+ from typing import Dict, List
11
+
12
+ import pytest
13
+ from datasets import load_dataset
14
+ from dotenv import load_dotenv
15
+ from pydantic import BaseModel
16
+ from synth_sdk.tracing.abstractions import (
17
+ Dataset,
18
+ RewardSignal,
19
+ TrainingQuestion,
20
+ )
21
+ from synth_sdk.tracing.client_manager import ClientManager
22
+ from synth_sdk.tracing.decorators import get_tracing_config, trace_system_async
23
+ from synth_sdk.tracing.upload import upload
24
+ from synth_sdk.tracing.utils import get_system_id
25
+
26
+ from synth_ai.zyk import LM
27
+
28
+
29
+ class HendryksMathBenchmark:
30
+ def __init__(self):
31
+ self.name = "hendryks_math"
32
+ self.temp_dir = "temp"
33
+ os.makedirs(self.temp_dir, exist_ok=True)
34
+ os.makedirs("datasets/competition_math", exist_ok=True)
35
+
36
+ def load_data(self):
37
+ cache_path = "datasets/competition_math/dataset.json"
38
+
39
+ # Try to load from cache first
40
+ if os.path.exists(cache_path):
41
+ with open(cache_path, "r") as f:
42
+ dataset = json.load(f)
43
+ problems = []
44
+ for item in dataset["train"]: # Using train split for consistency
45
+ problem = {
46
+ "question": item["problem"],
47
+ "answer": item["solution"],
48
+ "subject": item.get("type", "unknown"),
49
+ "level": "competition", # All problems are competition level
50
+ }
51
+ problems.append(problem)
52
+ return problems
53
+
54
+ # If not cached, load from HF and cache
55
+ dataset = load_dataset("competition_math", "main")
56
+ with open(cache_path, "w") as f:
57
+ json.dump(
58
+ {"train": list(dataset["train"]), "test": list(dataset["test"])}, f
59
+ )
60
+
61
+ # Convert to our format
62
+ problems = []
63
+ for item in dataset["train"]:
64
+ problem = {
65
+ "question": item["problem"],
66
+ "answer": item["solution"],
67
+ "subject": item.get("type", "unknown"),
68
+ "level": "competition",
69
+ }
70
+ problems.append(problem)
71
+
72
+ return problems
73
+
74
+ def get_problems(self):
75
+ temp_path = os.path.join(self.temp_dir, "hendryks_math.json")
76
+
77
+ # Load from temp file if it exists
78
+ if os.path.exists(temp_path):
79
+ with open(temp_path, "r") as f:
80
+ return json.load(f)
81
+
82
+ # Otherwise load from dataset and save
83
+ problems = self.load_data()
84
+ with open(temp_path, "w") as f:
85
+ json.dump(problems, f)
86
+ return problems
87
+
88
+ def score_answer(self, question: str, proposed_answer: str) -> bool:
89
+ """Score a proposed answer against the correct answer for a given question."""
90
+ # Find the problem that matches the question
91
+ problems = self.get_problems()
92
+ matching_problem = next(
93
+ (p for p in problems if p["question"] == question), None
94
+ )
95
+
96
+ if not matching_problem:
97
+ raise ValueError("Question not found in benchmark")
98
+
99
+ # Extract answer from proposed solution's \boxed{} format
100
+ proposed_match = re.search(r"\\boxed{((?:[^{}]|{[^{}]*})*)}", proposed_answer)
101
+ if not proposed_match:
102
+ return False
103
+
104
+ # Extract answer from correct solution's \boxed{} format
105
+ correct_match = re.search(
106
+ r"\\boxed{((?:[^{}]|{[^{}]*})*)}", matching_problem["answer"]
107
+ )
108
+ if not correct_match:
109
+ return False
110
+
111
+ extracted_proposed = proposed_match.group(1).strip()
112
+ extracted_correct = correct_match.group(1).strip()
113
+
114
+ # print(f"Proposed answer: {extracted_proposed}")
115
+ # print(f"Correct answer: {extracted_correct}")
116
+
117
+ return extracted_proposed == extracted_correct
118
+
119
+
120
+ class TrivialHendryksMathAgent:
121
+ def __init__(self):
122
+ self.lm = LM( # gemini-1.5-flash
123
+ model_name="gpt-4o-mini",
124
+ formatting_model_name="gpt-4o-mini",
125
+ temperature=0.1,
126
+ synth_logging=True,
127
+ )
128
+ self.system_name = "HendryksMathAgent"
129
+ self.system_id = get_system_id(self.system_name)
130
+ self.system_instance_id = str(uuid.uuid4())
131
+
132
+ @trace_system_async(
133
+ origin="agent",
134
+ event_type="plan",
135
+ manage_event="create_and_end",
136
+ increment_partition=True,
137
+ verbose=True,
138
+ )
139
+ async def plan(self, math_question: str) -> str:
140
+ logger.debug("Starting plan method with trace decorator")
141
+ try:
142
+
143
+ class Plan(BaseModel):
144
+ content: str
145
+
146
+ response = await self.lm.respond_async(
147
+ system_message="""You are an AI assisting a colleague in completing a mathematics problem.
148
+ You will be given a mathematics problem statement. Your task is to create a detailed plan to solve the problem,
149
+ breaking it down into clear, logical steps.""",
150
+ user_message=f"""Please provide a detailed, step-by-step plan to solve this math problem:
151
+ {math_question}
152
+
153
+ Your plan should include:
154
+ 1. A clear statement of the given information and problem to be solved
155
+ 2. Identification of relevant mathematical concepts and techniques
156
+ 3. Definition of variables and known relationships
157
+ 4. A step-by-step approach to solving the problem
158
+ 5. Explanation of the reasoning behind each step""",
159
+ response_model=Plan,
160
+ )
161
+ logger.debug("Successfully got response from LM in plan method")
162
+ return response.content
163
+ except Exception as e:
164
+ logger.error(f"Error in plan method: {str(e)}", exc_info=True)
165
+ raise
166
+
167
+ @trace_system_async(
168
+ origin="agent",
169
+ event_type="execute",
170
+ manage_event="create_and_end",
171
+ increment_partition=True,
172
+ verbose=True,
173
+ )
174
+ async def execute(self, plan: str) -> str:
175
+ logger.debug("Starting execute method with trace decorator")
176
+ try:
177
+
178
+ class Solution(BaseModel):
179
+ content: str
180
+
181
+ response = await self.lm.respond_async(
182
+ system_message="""You are an AI mathematical problem-solving assistant.
183
+ You will be given a solution plan. Your task is to implement this plan,
184
+ showing all work and verifying correctness at each step.""",
185
+ user_message=f"""
186
+ Plan:
187
+ {plan}
188
+
189
+ Please solve this problem by carefully following the provided plan. Show all your work and calculations.
190
+ Leave your final answer at the very end in the format \\boxed{{answer}}.""",
191
+ response_model=Solution,
192
+ )
193
+ logger.debug("Successfully got response from LM in execute method")
194
+ return response.content
195
+ except Exception as e:
196
+ logger.error(f"Error in execute method: {str(e)}", exc_info=True)
197
+ raise
198
+
199
+ async def run(self, math_question: str) -> str:
200
+ logger.debug("Starting run method")
201
+ plan = await self.plan(math_question)
202
+ logger.debug("Completed plan method")
203
+ solution = await self.execute(plan)
204
+ logger.debug("Completed execute method")
205
+ return solution
206
+
207
+
208
+ # Configure logging
209
+ logging.basicConfig(
210
+ level=logging.ERROR, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
211
+ )
212
+ logger = logging.getLogger(__name__)
213
+
214
+ # Add logging for trace decorator
215
+ trace_logger = logging.getLogger("synth_sdk.tracing.decorators")
216
+ trace_logger.setLevel(logging.ERROR)
217
+
218
+ # Add logging for client manager
219
+ client_logger = logging.getLogger("synth_sdk.tracing.client_manager")
220
+ client_logger.setLevel(logging.ERROR)
221
+
222
+ load_dotenv()
223
+
224
+
225
+ async def setup_synth_config():
226
+ """Setup synth configuration for deferred logging."""
227
+ logger.info("Setting up synth configuration for deferred logging")
228
+ os.environ["SYNTH_LOGGING_MODE"] = "deferred"
229
+ os.environ["SYNTH_ENDPOINT_OVERRIDE"] = "https://agent-learning.onrender.com"
230
+ config = get_tracing_config()
231
+ ClientManager.initialize(config)
232
+ logger.info("Synth config:")
233
+ logger.info(f" Mode: {config.mode}")
234
+ logger.info(f" API Key present: {bool(config.api_key)}")
235
+ logger.info(f" Base URL: {config.base_url}")
236
+
237
+
238
+ @pytest.mark.asyncio
239
+ async def test_deferred_logging():
240
+ """Test deferred logging with both pytest and regular assertions."""
241
+ logger.info("=== STARTING DEFERRED LOGGING TEST ===")
242
+ start_time = time.time()
243
+ logger.info(f"Test start time: {start_time}")
244
+
245
+ # Determine if running under pytest
246
+ is_pytest = "pytest" in sys.modules
247
+
248
+ try:
249
+ await setup_synth_config()
250
+
251
+ # Initialize and run agent
252
+ benchmark = HendryksMathBenchmark()
253
+ agent = TrivialHendryksMathAgent()
254
+ logger.info(f"Agent system ID: {agent.system_id}")
255
+ logger.info(f"Agent system instance ID: {agent.system_instance_id}")
256
+
257
+ problems = benchmark.get_problems()
258
+ test_problem = problems[0]["question"]
259
+ logger.info(f"Using test problem: {test_problem}")
260
+
261
+ # Run the agent
262
+ logger.info("Running agent...")
263
+ solution = await agent.run(test_problem)
264
+ logger.info(f"Agent solution: {solution}")
265
+
266
+ # Create dataset and upload results
267
+ logger.info("Creating dataset and uploading results...")
268
+ dataset = Dataset(
269
+ questions=[
270
+ TrainingQuestion(
271
+ id="q0",
272
+ intent="Test math problem",
273
+ criteria="Testing deferred tracing and upload functionality",
274
+ )
275
+ ],
276
+ reward_signals=[
277
+ RewardSignal(
278
+ question_id="q0",
279
+ system_instance_id=agent.system_instance_id,
280
+ reward=1.0,
281
+ annotation="Test reward",
282
+ )
283
+ ],
284
+ )
285
+
286
+ # Upload the dataset and traces
287
+ logger.info("Starting upload process...")
288
+ upload_id, questions_json, reward_signals_json, traces_json = upload(
289
+ dataset=dataset
290
+ )
291
+
292
+ logger.info(f"Upload completed with ID: {upload_id}")
293
+ logger.debug(f"Number of traces: {len(traces_json)}")
294
+ print(traces_json)
295
+
296
+ # Verify upload results
297
+ if is_pytest:
298
+ assert upload_id
299
+ assert questions_json
300
+ assert reward_signals_json
301
+ assert traces_json
302
+ else:
303
+ assert upload_id, "Upload ID should not be empty"
304
+ assert questions_json, "Questions JSON should not be empty"
305
+ assert reward_signals_json, "Reward signals JSON should not be empty"
306
+ assert traces_json, "Traces JSON should not be empty"
307
+
308
+ # Verify trace content
309
+ for i, trace in enumerate(traces_json):
310
+ logger.debug(f"Verifying trace {i}:")
311
+ verify_trace_content(trace, is_pytest)
312
+
313
+ logger.info("All traces verified successfully!")
314
+ return True
315
+
316
+ except AssertionError as e:
317
+ logger.error(f"Test failed: {str(e)}")
318
+ if is_pytest:
319
+ raise
320
+ return False
321
+ except Exception as e:
322
+ logger.error(f"Unexpected error: {str(e)}", exc_info=True)
323
+ if is_pytest:
324
+ raise
325
+ return False
326
+
327
+
328
+ def verify_trace_content(trace: dict, is_pytest: bool = False) -> None:
329
+ """Verify the content of a trace."""
330
+ if is_pytest:
331
+ assert trace["system_instance_id"]
332
+ else:
333
+ assert trace["system_instance_id"], "Trace missing system_instance_id"
334
+
335
+ # Verify events were captured
336
+ has_events = False
337
+ for partition in trace["partition"]:
338
+ if len(partition["events"]) > 0:
339
+ has_events = True
340
+ for event in partition["events"]:
341
+ logger.debug(f"Checking event: {json.dumps(event, indent=2)}")
342
+ if "agent_compute_step" in event:
343
+ step = event["agent_compute_step"]
344
+ logger.debug(f"Checking compute step: {json.dumps(step, indent=2)}")
345
+ if is_pytest:
346
+ assert step.get("model_name") is not None
347
+ assert step.get("model_name") != ""
348
+ else:
349
+ assert (
350
+ step.get("model_name") is not None
351
+ ), "Model name is missing"
352
+ assert step.get("model_name") != "", "Model name is empty"
353
+
354
+ if step.get("compute_input"):
355
+ for input_item in step["compute_input"]:
356
+ if is_pytest:
357
+ assert "messages" in input_item, input_item.keys()
358
+ else:
359
+ assert "messages" in input_item, (
360
+ f"Input must have 'messages' key, but found keys: {list(input_item.keys())}"
361
+ f"\nFull input: {json.dumps(input_item, indent=2)}"
362
+ )
363
+ messages = input_item["messages"]
364
+ if is_pytest:
365
+ assert isinstance(messages, list)
366
+ assert len(messages) == 2
367
+ else:
368
+ assert isinstance(
369
+ messages, list
370
+ ), "Messages must be a list"
371
+ assert len(messages) == 2, (
372
+ f"Expected exactly 2 messages (system and user), but found {len(messages)}"
373
+ f"\nMessages: {json.dumps(messages, indent=2)}"
374
+ )
375
+ break
376
+
377
+ if is_pytest:
378
+ assert has_events
379
+ else:
380
+ assert (
381
+ has_events
382
+ ), f"At least one partition should contain events - {trace['partition']}"
383
+
384
+
385
+ if __name__ == "__main__":
386
+ # Remove the pytest check so the test always runs
387
+ success = asyncio.run(test_deferred_logging())
388
+ print("✅ All tests passed!" if success else "❌ Tests failed!")
389
+ exit(0 if success else 1)