synth-ai 0.1.0.dev39__py3-none-any.whl → 0.1.0.dev49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +3 -1
- {synth_ai-0.1.0.dev39.dist-info → synth_ai-0.1.0.dev49.dist-info}/METADATA +12 -11
- synth_ai-0.1.0.dev49.dist-info/RECORD +6 -0
- {synth_ai-0.1.0.dev39.dist-info → synth_ai-0.1.0.dev49.dist-info}/WHEEL +1 -1
- synth_ai-0.1.0.dev49.dist-info/top_level.txt +1 -0
- private_tests/try_synth_sdk.py +0 -1
- public_tests/test_agent.py +0 -538
- public_tests/test_all_structured_outputs.py +0 -196
- public_tests/test_anthropic_structured_outputs.py +0 -0
- public_tests/test_deepseek_structured_outputs.py +0 -0
- public_tests/test_deepseek_tools.py +0 -64
- public_tests/test_gemini_output.py +0 -188
- public_tests/test_gemini_structured_outputs.py +0 -106
- public_tests/test_models.py +0 -183
- public_tests/test_openai_structured_outputs.py +0 -106
- public_tests/test_reasoning_effort.py +0 -75
- public_tests/test_reasoning_models.py +0 -92
- public_tests/test_recursive_structured_outputs.py +0 -180
- public_tests/test_structured.py +0 -137
- public_tests/test_structured_outputs.py +0 -109
- public_tests/test_synth_sdk.py +0 -384
- public_tests/test_text.py +0 -160
- public_tests/test_tools.py +0 -319
- synth_ai/zyk/__init__.py +0 -3
- synth_ai/zyk/lms/__init__.py +0 -0
- synth_ai/zyk/lms/caching/__init__.py +0 -0
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/caching/dbs.py +0 -0
- synth_ai/zyk/lms/caching/ephemeral.py +0 -72
- synth_ai/zyk/lms/caching/handler.py +0 -142
- synth_ai/zyk/lms/caching/initialize.py +0 -13
- synth_ai/zyk/lms/caching/persistent.py +0 -83
- synth_ai/zyk/lms/config.py +0 -8
- synth_ai/zyk/lms/core/__init__.py +0 -0
- synth_ai/zyk/lms/core/all.py +0 -47
- synth_ai/zyk/lms/core/exceptions.py +0 -9
- synth_ai/zyk/lms/core/main.py +0 -314
- synth_ai/zyk/lms/core/vendor_clients.py +0 -85
- synth_ai/zyk/lms/cost/__init__.py +0 -0
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai/zyk/lms/structured_outputs/__init__.py +0 -0
- synth_ai/zyk/lms/structured_outputs/handler.py +0 -442
- synth_ai/zyk/lms/structured_outputs/inject.py +0 -314
- synth_ai/zyk/lms/structured_outputs/rehabilitate.py +0 -187
- synth_ai/zyk/lms/tools/base.py +0 -104
- synth_ai/zyk/lms/vendors/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/base.py +0 -31
- synth_ai/zyk/lms/vendors/constants.py +0 -22
- synth_ai/zyk/lms/vendors/core/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/core/anthropic_api.py +0 -413
- synth_ai/zyk/lms/vendors/core/gemini_api.py +0 -306
- synth_ai/zyk/lms/vendors/core/mistral_api.py +0 -327
- synth_ai/zyk/lms/vendors/core/openai_api.py +0 -185
- synth_ai/zyk/lms/vendors/local/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/local/ollama.py +0 -0
- synth_ai/zyk/lms/vendors/openai_standard.py +0 -374
- synth_ai/zyk/lms/vendors/retries.py +0 -3
- synth_ai/zyk/lms/vendors/supported/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/supported/deepseek.py +0 -73
- synth_ai/zyk/lms/vendors/supported/groq.py +0 -16
- synth_ai/zyk/lms/vendors/supported/ollama.py +0 -14
- synth_ai/zyk/lms/vendors/supported/together.py +0 -11
- synth_ai-0.1.0.dev39.dist-info/RECORD +0 -67
- synth_ai-0.1.0.dev39.dist-info/top_level.txt +0 -4
- tests/test_agent.py +0 -538
- tests/test_recursive_structured_outputs.py +0 -180
- tests/test_structured_outputs.py +0 -100
- {synth_ai-0.1.0.dev39.dist-info → synth_ai-0.1.0.dev49.dist-info}/licenses/LICENSE +0 -0
public_tests/test_structured.py
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
from typing import List
|
2
|
-
|
3
|
-
import openai
|
4
|
-
from pydantic import BaseModel
|
5
|
-
|
6
|
-
from synth_ai.zyk import LM
|
7
|
-
|
8
|
-
class Person(BaseModel):
|
9
|
-
name: str
|
10
|
-
age: int
|
11
|
-
hobbies: List[str]
|
12
|
-
|
13
|
-
|
14
|
-
TEST_PROMPT = "Extract information about a person from this text: John is 30 years old and enjoys reading, hiking, and photography."
|
15
|
-
|
16
|
-
|
17
|
-
def test_openai_structured_lm():
|
18
|
-
lm = LM(
|
19
|
-
model_name="gpt-4o-mini",
|
20
|
-
formatting_model_name="gpt-4o-mini",
|
21
|
-
temperature=0,
|
22
|
-
)
|
23
|
-
|
24
|
-
response = lm.respond_sync(
|
25
|
-
system_message="",
|
26
|
-
user_message=TEST_PROMPT,
|
27
|
-
response_model=Person,
|
28
|
-
)
|
29
|
-
|
30
|
-
assert isinstance(response.structured_output, Person)
|
31
|
-
assert response.structured_output.name == "John"
|
32
|
-
assert response.structured_output.age == 30
|
33
|
-
assert set(response.structured_output.hobbies) == {
|
34
|
-
"reading",
|
35
|
-
"hiking",
|
36
|
-
"photography",
|
37
|
-
}
|
38
|
-
|
39
|
-
def test_anthropic_structured_lm():
|
40
|
-
lm = LM(
|
41
|
-
model_name="claude-3-haiku-20240307",
|
42
|
-
formatting_model_name="claude-3-haiku-20240307",
|
43
|
-
temperature=0,
|
44
|
-
)
|
45
|
-
|
46
|
-
response = lm.respond_sync(
|
47
|
-
system_message="You are a helpful assistant that extracts structured information.",
|
48
|
-
user_message=TEST_PROMPT,
|
49
|
-
response_model=Person,
|
50
|
-
)
|
51
|
-
|
52
|
-
assert isinstance(response.structured_output, Person)
|
53
|
-
assert response.structured_output.name == "John"
|
54
|
-
assert response.structured_output.age == 30
|
55
|
-
assert set(response.structured_output.hobbies) == {
|
56
|
-
"reading",
|
57
|
-
"hiking",
|
58
|
-
"photography",
|
59
|
-
}
|
60
|
-
|
61
|
-
|
62
|
-
# def test_gemini_structured():
|
63
|
-
# client = GeminiAPI(
|
64
|
-
# used_for_structured_outputs=True,
|
65
|
-
# exceptions_to_retry=[],
|
66
|
-
# )
|
67
|
-
|
68
|
-
# response = client._hit_api_sync(
|
69
|
-
# model="gemini-2.0-flash",
|
70
|
-
# messages=[
|
71
|
-
# {
|
72
|
-
# "role": "system",
|
73
|
-
# "content": "You are a helpful assistant that extracts structured information.",
|
74
|
-
# },
|
75
|
-
# {"role": "user", "content": TEST_PROMPT},
|
76
|
-
# ],
|
77
|
-
# response_model=Person,
|
78
|
-
# #temperature=0,
|
79
|
-
# )
|
80
|
-
|
81
|
-
# assert isinstance(response.structured_output, Person)
|
82
|
-
# assert response.structured_output.name == "John"
|
83
|
-
# assert response.structured_output.age == 30
|
84
|
-
# assert set(response.structured_output.hobbies) == {
|
85
|
-
# "reading",
|
86
|
-
# "hiking",
|
87
|
-
# "photography",
|
88
|
-
# }
|
89
|
-
|
90
|
-
|
91
|
-
def test_gemini_structured_lm():
|
92
|
-
lm = LM(
|
93
|
-
model_name="gemini-2.0-flash",
|
94
|
-
formatting_model_name="gemini-2.0-flash",
|
95
|
-
temperature=0,
|
96
|
-
)
|
97
|
-
|
98
|
-
response = lm.respond_sync(
|
99
|
-
system_message="You are a helpful assistant that extracts structured information.",
|
100
|
-
user_message=TEST_PROMPT,
|
101
|
-
response_model=Person,
|
102
|
-
)
|
103
|
-
|
104
|
-
assert isinstance(response.structured_output, Person)
|
105
|
-
assert response.structured_output.name == "John"
|
106
|
-
assert response.structured_output.age == 30
|
107
|
-
assert set(response.structured_output.hobbies) == {
|
108
|
-
"reading",
|
109
|
-
"hiking",
|
110
|
-
"photography",
|
111
|
-
}
|
112
|
-
|
113
|
-
def test_mistral_structured_lm():
|
114
|
-
lm = LM(
|
115
|
-
model_name="mistral-small-latest",
|
116
|
-
formatting_model_name="mistral-small-latest",
|
117
|
-
temperature=0,
|
118
|
-
)
|
119
|
-
|
120
|
-
response = lm.respond_sync(
|
121
|
-
system_message="You are a helpful assistant that extracts structured information.",
|
122
|
-
user_message=TEST_PROMPT,
|
123
|
-
response_model=Person,
|
124
|
-
)
|
125
|
-
|
126
|
-
assert isinstance(response.structured_output, Person)
|
127
|
-
assert response.structured_output.name == "John"
|
128
|
-
assert response.structured_output.age == 30
|
129
|
-
assert set(response.structured_output.hobbies) == {
|
130
|
-
"reading",
|
131
|
-
"hiking",
|
132
|
-
"photography",
|
133
|
-
}
|
134
|
-
|
135
|
-
|
136
|
-
if __name__ == "__main__":
|
137
|
-
test_openai_structured_lm()
|
@@ -1,109 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import unittest
|
3
|
-
from typing import List
|
4
|
-
|
5
|
-
from pydantic import BaseModel, Field
|
6
|
-
|
7
|
-
from synth_ai.zyk.lms.core.main import LM
|
8
|
-
from synth_ai.zyk import BaseLMResponse
|
9
|
-
|
10
|
-
|
11
|
-
# Define example structured output models
|
12
|
-
class SimpleResponse(BaseModel):
|
13
|
-
message: str
|
14
|
-
confidence_between_zero_one: float = Field(
|
15
|
-
..., description="Confidence level between 0 and 1"
|
16
|
-
)
|
17
|
-
|
18
|
-
|
19
|
-
class ComplexResponse(BaseModel):
|
20
|
-
title: str
|
21
|
-
tags: List[str]
|
22
|
-
content: str
|
23
|
-
|
24
|
-
|
25
|
-
class NestedResponse(BaseModel):
|
26
|
-
main_category: str
|
27
|
-
subcategories: List[str]
|
28
|
-
details: SimpleResponse
|
29
|
-
|
30
|
-
|
31
|
-
class TestLMStructuredOutputs(unittest.TestCase):
|
32
|
-
@classmethod
|
33
|
-
def setUpClass(cls):
|
34
|
-
# Initialize LMs for both forced_json and stringified_json modes
|
35
|
-
cls.lm_forced_json = LM(
|
36
|
-
model_name="gpt-4o-mini",
|
37
|
-
formatting_model_name="gpt-4o-mini",
|
38
|
-
temperature=0.7,
|
39
|
-
max_retries="Few",
|
40
|
-
structured_output_mode="forced_json",
|
41
|
-
)
|
42
|
-
cls.lm_stringified_json = LM(
|
43
|
-
model_name="gemma3-27b-it",
|
44
|
-
formatting_model_name="gpt-4o-mini",
|
45
|
-
temperature=0.7,
|
46
|
-
max_retries="Few",
|
47
|
-
structured_output_mode="stringified_json",
|
48
|
-
)
|
49
|
-
|
50
|
-
def test_sync_simple_response(self):
|
51
|
-
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
52
|
-
with self.subTest(
|
53
|
-
mode=lm.structured_output_handler.handler.structured_output_mode
|
54
|
-
):
|
55
|
-
result = lm.respond_sync(
|
56
|
-
system_message="You are a helpful assistant.",
|
57
|
-
user_message="Give me a short greeting and your confidence level.",
|
58
|
-
response_model=SimpleResponse,
|
59
|
-
)
|
60
|
-
self.assertIsInstance(result.structured_output, SimpleResponse)
|
61
|
-
self.assertIsInstance(result.structured_output.message, str)
|
62
|
-
self.assertIsInstance(
|
63
|
-
result.structured_output.confidence_between_zero_one, float
|
64
|
-
)
|
65
|
-
self.assertGreaterEqual(
|
66
|
-
result.structured_output.confidence_between_zero_one, 0
|
67
|
-
)
|
68
|
-
self.assertLessEqual(
|
69
|
-
result.structured_output.confidence_between_zero_one, 1
|
70
|
-
)
|
71
|
-
|
72
|
-
def test_sync_complex_response(self):
|
73
|
-
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
74
|
-
with self.subTest(
|
75
|
-
mode=lm.structured_output_handler.handler.structured_output_mode
|
76
|
-
):
|
77
|
-
result = lm.respond_sync(
|
78
|
-
system_message="You are a content creator.",
|
79
|
-
user_message="Create a short blog post about AI.",
|
80
|
-
response_model=ComplexResponse,
|
81
|
-
)
|
82
|
-
self.assertIsInstance(result.structured_output, ComplexResponse)
|
83
|
-
self.assertIsInstance(result.structured_output.title, str)
|
84
|
-
self.assertIsInstance(result.structured_output.tags, list)
|
85
|
-
self.assertIsInstance(result.structured_output.content, str)
|
86
|
-
|
87
|
-
async def async_nested_response(self, lm):
|
88
|
-
result = await lm.respond_async(
|
89
|
-
system_message="You are a categorization expert.",
|
90
|
-
user_message="Categorize 'Python' and provide a brief description.",
|
91
|
-
response_model=NestedResponse,
|
92
|
-
)
|
93
|
-
print("Result:")
|
94
|
-
assert not isinstance(result.structured_output, BaseLMResponse), "Structured output must be a Pydantic model or None - got BaseLMResponse"
|
95
|
-
self.assertIsInstance(result.structured_output, NestedResponse)
|
96
|
-
self.assertIsInstance(result.structured_output.main_category, str)
|
97
|
-
self.assertIsInstance(result.structured_output.subcategories, list)
|
98
|
-
self.assertIsInstance(result.structured_output.details, SimpleResponse)
|
99
|
-
|
100
|
-
def test_async_nested_response(self):
|
101
|
-
for lm in [self.lm_forced_json, self.lm_stringified_json]: #
|
102
|
-
with self.subTest(
|
103
|
-
mode=lm.structured_output_handler.handler.structured_output_mode
|
104
|
-
):
|
105
|
-
asyncio.run(self.async_nested_response(lm))
|
106
|
-
|
107
|
-
|
108
|
-
if __name__ == "__main__":
|
109
|
-
unittest.main()
|
public_tests/test_synth_sdk.py
DELETED
@@ -1,384 +0,0 @@
|
|
1
|
-
# # from dev.testing.hendryks import HendryksMathBenchmark, TrivialHendryksMathAgent
|
2
|
-
# import asyncio
|
3
|
-
# import json
|
4
|
-
# import logging
|
5
|
-
# import os
|
6
|
-
# import re
|
7
|
-
# import sys
|
8
|
-
# import time
|
9
|
-
# import uuid
|
10
|
-
# from typing import List, Dict
|
11
|
-
# from pydantic import BaseModel
|
12
|
-
# import pytest
|
13
|
-
# from dotenv import load_dotenv
|
14
|
-
# from synth_sdk.tracing.abstractions import (
|
15
|
-
# Dataset,
|
16
|
-
# RewardSignal,
|
17
|
-
# TrainingQuestion,
|
18
|
-
# )
|
19
|
-
# from synth_sdk.tracing.client_manager import ClientManager
|
20
|
-
# from synth_sdk.tracing.decorators import get_tracing_config, trace_system_async
|
21
|
-
# from synth_sdk.tracing.upload import upload
|
22
|
-
# from synth_sdk.tracing.utils import get_system_id
|
23
|
-
|
24
|
-
# from datasets import load_dataset
|
25
|
-
# from zyk import LM
|
26
|
-
|
27
|
-
|
28
|
-
# class HendryksMathBenchmark:
|
29
|
-
# def __init__(self):
|
30
|
-
# self.name = "hendryks_math"
|
31
|
-
# self.temp_dir = "temp"
|
32
|
-
# os.makedirs(self.temp_dir, exist_ok=True)
|
33
|
-
# os.makedirs("datasets/competition_math", exist_ok=True)
|
34
|
-
|
35
|
-
# def load_data(self):
|
36
|
-
# cache_path = "datasets/competition_math/dataset.json"
|
37
|
-
|
38
|
-
# # Try to load from cache first
|
39
|
-
# if os.path.exists(cache_path):
|
40
|
-
# with open(cache_path, "r") as f:
|
41
|
-
# dataset = json.load(f)
|
42
|
-
# problems = []
|
43
|
-
# for item in dataset["train"]: # Using train split for consistency
|
44
|
-
# problem = {
|
45
|
-
# "question": item["problem"],
|
46
|
-
# "answer": item["solution"],
|
47
|
-
# "subject": item.get("type", "unknown"),
|
48
|
-
# "level": "competition", # All problems are competition level
|
49
|
-
# }
|
50
|
-
# problems.append(problem)
|
51
|
-
# return problems
|
52
|
-
|
53
|
-
# # If not cached, load from HF and cache
|
54
|
-
# dataset = load_dataset("competition_math", "main")
|
55
|
-
# with open(cache_path, "w") as f:
|
56
|
-
# json.dump(
|
57
|
-
# {"train": list(dataset["train"]), "test": list(dataset["test"])}, f
|
58
|
-
# )
|
59
|
-
|
60
|
-
# # Convert to our format
|
61
|
-
# problems = []
|
62
|
-
# for item in dataset["train"]:
|
63
|
-
# problem = {
|
64
|
-
# "question": item["problem"],
|
65
|
-
# "answer": item["solution"],
|
66
|
-
# "subject": item.get("type", "unknown"),
|
67
|
-
# "level": "competition",
|
68
|
-
# }
|
69
|
-
# problems.append(problem)
|
70
|
-
|
71
|
-
# return problems
|
72
|
-
|
73
|
-
# def get_problems(self):
|
74
|
-
# temp_path = os.path.join(self.temp_dir, "hendryks_math.json")
|
75
|
-
|
76
|
-
# # Load from temp file if it exists
|
77
|
-
# if os.path.exists(temp_path):
|
78
|
-
# with open(temp_path, "r") as f:
|
79
|
-
# return json.load(f)
|
80
|
-
|
81
|
-
# # Otherwise load from dataset and save
|
82
|
-
# problems = self.load_data()
|
83
|
-
# with open(temp_path, "w") as f:
|
84
|
-
# json.dump(problems, f)
|
85
|
-
# return problems
|
86
|
-
|
87
|
-
# def score_answer(self, question: str, proposed_answer: str) -> bool:
|
88
|
-
# """Score a proposed answer against the correct answer for a given question."""
|
89
|
-
# # Find the problem that matches the question
|
90
|
-
# problems = self.get_problems()
|
91
|
-
# matching_problem = next(
|
92
|
-
# (p for p in problems if p["question"] == question), None
|
93
|
-
# )
|
94
|
-
|
95
|
-
# if not matching_problem:
|
96
|
-
# raise ValueError("Question not found in benchmark")
|
97
|
-
|
98
|
-
# # Extract answer from proposed solution's \boxed{} format
|
99
|
-
# proposed_match = re.search(r"\\boxed{((?:[^{}]|{[^{}]*})*)}", proposed_answer)
|
100
|
-
# if not proposed_match:
|
101
|
-
# return False
|
102
|
-
|
103
|
-
# # Extract answer from correct solution's \boxed{} format
|
104
|
-
# correct_match = re.search(
|
105
|
-
# r"\\boxed{((?:[^{}]|{[^{}]*})*)}", matching_problem["answer"]
|
106
|
-
# )
|
107
|
-
# if not correct_match:
|
108
|
-
# return False
|
109
|
-
|
110
|
-
# extracted_proposed = proposed_match.group(1).strip()
|
111
|
-
# extracted_correct = correct_match.group(1).strip()
|
112
|
-
|
113
|
-
# # print(f"Proposed answer: {extracted_proposed}")
|
114
|
-
# # print(f"Correct answer: {extracted_correct}")
|
115
|
-
|
116
|
-
# return extracted_proposed == extracted_correct
|
117
|
-
|
118
|
-
|
119
|
-
# class TrivialHendryksMathAgent:
|
120
|
-
# def __init__(self):
|
121
|
-
# self.lm = LM( # gemini-1.5-flash
|
122
|
-
# model_name="gpt-4o-mini",
|
123
|
-
# formatting_model_name="gpt-4o-mini",
|
124
|
-
# temperature=0.1,
|
125
|
-
# synth_logging=True,
|
126
|
-
# )
|
127
|
-
# self.system_name = "HendryksMathAgent"
|
128
|
-
# self.system_id = get_system_id(self.system_name)
|
129
|
-
# self.system_instance_id = str(uuid.uuid4())
|
130
|
-
|
131
|
-
# @trace_system_async(
|
132
|
-
# origin="agent",
|
133
|
-
# event_type="plan",
|
134
|
-
# manage_event="create_and_end",
|
135
|
-
# increment_partition=True,
|
136
|
-
# verbose=True,
|
137
|
-
# )
|
138
|
-
# async def plan(self, math_question: str) -> str:
|
139
|
-
# logger.debug("Starting plan method with trace decorator")
|
140
|
-
# try:
|
141
|
-
# class Plan(BaseModel):
|
142
|
-
# content: str
|
143
|
-
# response = await self.lm.respond_async(
|
144
|
-
# system_message="""You are an AI assisting a colleague in completing a mathematics problem.
|
145
|
-
# You will be given a mathematics problem statement. Your task is to create a detailed plan to solve the problem,
|
146
|
-
# breaking it down into clear, logical steps.""",
|
147
|
-
# user_message=f"""Please provide a detailed, step-by-step plan to solve this math problem:
|
148
|
-
# {math_question}
|
149
|
-
|
150
|
-
# Your plan should include:
|
151
|
-
# 1. A clear statement of the given information and problem to be solved
|
152
|
-
# 2. Identification of relevant mathematical concepts and techniques
|
153
|
-
# 3. Definition of variables and known relationships
|
154
|
-
# 4. A step-by-step approach to solving the problem
|
155
|
-
# 5. Explanation of the reasoning behind each step""",
|
156
|
-
# response_model=Plan
|
157
|
-
# )
|
158
|
-
# logger.debug("Successfully got response from LM in plan method")
|
159
|
-
# return response.content
|
160
|
-
# except Exception as e:
|
161
|
-
# logger.error(f"Error in plan method: {str(e)}", exc_info=True)
|
162
|
-
# raise
|
163
|
-
|
164
|
-
# @trace_system_async(
|
165
|
-
# origin="agent",
|
166
|
-
# event_type="execute",
|
167
|
-
# manage_event="create_and_end",
|
168
|
-
# increment_partition=True,
|
169
|
-
# verbose=True,
|
170
|
-
# )
|
171
|
-
# async def execute(self, plan: str) -> str:
|
172
|
-
# logger.debug("Starting execute method with trace decorator")
|
173
|
-
# try:
|
174
|
-
# class Solution(BaseModel):
|
175
|
-
# content: str
|
176
|
-
# response = await self.lm.respond_async(
|
177
|
-
# system_message="""You are an AI mathematical problem-solving assistant.
|
178
|
-
# You will be given a solution plan. Your task is to implement this plan,
|
179
|
-
# showing all work and verifying correctness at each step.""",
|
180
|
-
# user_message=f"""
|
181
|
-
# Plan:
|
182
|
-
# {plan}
|
183
|
-
|
184
|
-
# Please solve this problem by carefully following the provided plan. Show all your work and calculations.
|
185
|
-
# Leave your final answer at the very end in the format \\boxed{{answer}}.""",
|
186
|
-
# response_model=Solution,
|
187
|
-
# )
|
188
|
-
# logger.debug("Successfully got response from LM in execute method")
|
189
|
-
# return response.content
|
190
|
-
# except Exception as e:
|
191
|
-
# logger.error(f"Error in execute method: {str(e)}", exc_info=True)
|
192
|
-
# raise
|
193
|
-
|
194
|
-
# async def run(self, math_question: str) -> str:
|
195
|
-
# logger.debug("Starting run method")
|
196
|
-
# plan = await self.plan(math_question)
|
197
|
-
# logger.debug("Completed plan method")
|
198
|
-
# solution = await self.execute(plan)
|
199
|
-
# logger.debug("Completed execute method")
|
200
|
-
# return solution
|
201
|
-
|
202
|
-
|
203
|
-
# # Configure logging
|
204
|
-
# logging.basicConfig(
|
205
|
-
# level=logging.ERROR, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
206
|
-
# )
|
207
|
-
# logger = logging.getLogger(__name__)
|
208
|
-
|
209
|
-
# # Add logging for trace decorator
|
210
|
-
# trace_logger = logging.getLogger("synth_sdk.tracing.decorators")
|
211
|
-
# trace_logger.setLevel(logging.ERROR)
|
212
|
-
|
213
|
-
# # Add logging for client manager
|
214
|
-
# client_logger = logging.getLogger("synth_sdk.tracing.client_manager")
|
215
|
-
# client_logger.setLevel(logging.ERROR)
|
216
|
-
|
217
|
-
# load_dotenv()
|
218
|
-
|
219
|
-
|
220
|
-
# async def setup_synth_config():
|
221
|
-
# """Setup synth configuration for deferred logging."""
|
222
|
-
# logger.info("Setting up synth configuration for deferred logging")
|
223
|
-
# os.environ["SYNTH_LOGGING_MODE"] = "deferred"
|
224
|
-
# os.environ["SYNTH_ENDPOINT_OVERRIDE"] = "https://agent-learning.onrender.com"
|
225
|
-
# config = get_tracing_config()
|
226
|
-
# ClientManager.initialize(config)
|
227
|
-
# logger.info("Synth config:")
|
228
|
-
# logger.info(f" Mode: {config.mode}")
|
229
|
-
# logger.info(f" API Key present: {bool(config.api_key)}")
|
230
|
-
# logger.info(f" Base URL: {config.base_url}")
|
231
|
-
|
232
|
-
|
233
|
-
# @pytest.mark.asyncio
|
234
|
-
# async def test_deferred_logging():
|
235
|
-
# """Test deferred logging with both pytest and regular assertions."""
|
236
|
-
# logger.info("=== STARTING DEFERRED LOGGING TEST ===")
|
237
|
-
# start_time = time.time()
|
238
|
-
# logger.info(f"Test start time: {start_time}")
|
239
|
-
|
240
|
-
# # Determine if running under pytest
|
241
|
-
# is_pytest = "pytest" in sys.modules
|
242
|
-
|
243
|
-
# try:
|
244
|
-
# await setup_synth_config()
|
245
|
-
|
246
|
-
# # Initialize and run agent
|
247
|
-
# benchmark = HendryksMathBenchmark()
|
248
|
-
# agent = TrivialHendryksMathAgent()
|
249
|
-
# logger.info(f"Agent system ID: {agent.system_id}")
|
250
|
-
# logger.info(f"Agent system instance ID: {agent.system_instance_id}")
|
251
|
-
|
252
|
-
# problems = benchmark.get_problems()
|
253
|
-
# test_problem = problems[0]["question"]
|
254
|
-
# logger.info(f"Using test problem: {test_problem}")
|
255
|
-
|
256
|
-
# # Run the agent
|
257
|
-
# logger.info("Running agent...")
|
258
|
-
# solution = await agent.run(test_problem)
|
259
|
-
# logger.info(f"Agent solution: {solution}")
|
260
|
-
|
261
|
-
# # Create dataset and upload results
|
262
|
-
# logger.info("Creating dataset and uploading results...")
|
263
|
-
# dataset = Dataset(
|
264
|
-
# questions=[
|
265
|
-
# TrainingQuestion(
|
266
|
-
# id="q0",
|
267
|
-
# intent="Test math problem",
|
268
|
-
# criteria="Testing deferred tracing and upload functionality",
|
269
|
-
# )
|
270
|
-
# ],
|
271
|
-
# reward_signals=[
|
272
|
-
# RewardSignal(
|
273
|
-
# question_id="q0",
|
274
|
-
# system_instance_id=agent.system_instance_id,
|
275
|
-
# reward=1.0,
|
276
|
-
# annotation="Test reward",
|
277
|
-
# )
|
278
|
-
# ],
|
279
|
-
# )
|
280
|
-
|
281
|
-
# # Upload the dataset and traces
|
282
|
-
# logger.info("Starting upload process...")
|
283
|
-
# upload_id, questions_json, reward_signals_json, traces_json = upload(
|
284
|
-
# dataset=dataset
|
285
|
-
# )
|
286
|
-
|
287
|
-
# logger.info(f"Upload completed with ID: {upload_id}")
|
288
|
-
# logger.debug(f"Number of traces: {len(traces_json)}")
|
289
|
-
# print(traces_json)
|
290
|
-
|
291
|
-
# # Verify upload results
|
292
|
-
# if is_pytest:
|
293
|
-
# assert upload_id
|
294
|
-
# assert questions_json
|
295
|
-
# assert reward_signals_json
|
296
|
-
# assert traces_json
|
297
|
-
# else:
|
298
|
-
# assert upload_id, "Upload ID should not be empty"
|
299
|
-
# assert questions_json, "Questions JSON should not be empty"
|
300
|
-
# assert reward_signals_json, "Reward signals JSON should not be empty"
|
301
|
-
# assert traces_json, "Traces JSON should not be empty"
|
302
|
-
|
303
|
-
# # Verify trace content
|
304
|
-
# for i, trace in enumerate(traces_json):
|
305
|
-
# logger.debug(f"Verifying trace {i}:")
|
306
|
-
# verify_trace_content(trace, is_pytest)
|
307
|
-
|
308
|
-
# logger.info("All traces verified successfully!")
|
309
|
-
# return True
|
310
|
-
|
311
|
-
# except AssertionError as e:
|
312
|
-
# logger.error(f"Test failed: {str(e)}")
|
313
|
-
# if is_pytest:
|
314
|
-
# raise
|
315
|
-
# return False
|
316
|
-
# except Exception as e:
|
317
|
-
# logger.error(f"Unexpected error: {str(e)}", exc_info=True)
|
318
|
-
# if is_pytest:
|
319
|
-
# raise
|
320
|
-
# return False
|
321
|
-
|
322
|
-
|
323
|
-
# def verify_trace_content(trace: dict, is_pytest: bool = False) -> None:
|
324
|
-
# """Verify the content of a trace."""
|
325
|
-
# if is_pytest:
|
326
|
-
# assert trace["system_instance_id"]
|
327
|
-
# else:
|
328
|
-
# assert trace["system_instance_id"], "Trace missing system_instance_id"
|
329
|
-
|
330
|
-
# # Verify events were captured
|
331
|
-
# has_events = False
|
332
|
-
# for partition in trace["partition"]:
|
333
|
-
# if len(partition["events"]) > 0:
|
334
|
-
# has_events = True
|
335
|
-
# for event in partition["events"]:
|
336
|
-
# logger.debug(f"Checking event: {json.dumps(event, indent=2)}")
|
337
|
-
# if "agent_compute_step" in event:
|
338
|
-
# step = event["agent_compute_step"]
|
339
|
-
# logger.debug(f"Checking compute step: {json.dumps(step, indent=2)}")
|
340
|
-
# if is_pytest:
|
341
|
-
# assert step.get("model_name") is not None
|
342
|
-
# assert step.get("model_name") != ""
|
343
|
-
# else:
|
344
|
-
# assert (
|
345
|
-
# step.get("model_name") is not None
|
346
|
-
# ), "Model name is missing"
|
347
|
-
# assert step.get("model_name") != "", "Model name is empty"
|
348
|
-
|
349
|
-
# if step.get("compute_input"):
|
350
|
-
# for input_item in step["compute_input"]:
|
351
|
-
# if is_pytest:
|
352
|
-
# assert "messages" in input_item, input_item.keys()
|
353
|
-
# else:
|
354
|
-
# assert "messages" in input_item, (
|
355
|
-
# f"Input must have 'messages' key, but found keys: {list(input_item.keys())}"
|
356
|
-
# f"\nFull input: {json.dumps(input_item, indent=2)}"
|
357
|
-
# )
|
358
|
-
# messages = input_item["messages"]
|
359
|
-
# if is_pytest:
|
360
|
-
# assert isinstance(messages, list)
|
361
|
-
# assert len(messages) == 2
|
362
|
-
# else:
|
363
|
-
# assert isinstance(
|
364
|
-
# messages, list
|
365
|
-
# ), "Messages must be a list"
|
366
|
-
# assert len(messages) == 2, (
|
367
|
-
# f"Expected exactly 2 messages (system and user), but found {len(messages)}"
|
368
|
-
# f"\nMessages: {json.dumps(messages, indent=2)}"
|
369
|
-
# )
|
370
|
-
# break
|
371
|
-
|
372
|
-
# if is_pytest:
|
373
|
-
# assert has_events
|
374
|
-
# else:
|
375
|
-
# assert (
|
376
|
-
# has_events
|
377
|
-
# ), f"At least one partition should contain events - {trace['partition']}"
|
378
|
-
|
379
|
-
|
380
|
-
# if __name__ == "__main__":
|
381
|
-
# # Remove the pytest check so the test always runs
|
382
|
-
# success = asyncio.run(test_deferred_logging())
|
383
|
-
# print("✅ All tests passed!" if success else "❌ Tests failed!")
|
384
|
-
# exit(0 if success else 1)
|