synth-ai 0.1.0.dev8__py3-none-any.whl → 0.1.0.dev10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- private_tests/try_synth_sdk.py +1 -0
- public_tests/test_all_structured_outputs.py +195 -0
- synth_ai/zyk/lms/core/all.py +6 -0
- synth_ai/zyk/lms/core/main.py +5 -1
- synth_ai/zyk/lms/core/vendor_clients.py +8 -0
- synth_ai/zyk/lms/vendors/supported/groq.py +16 -0
- {synth_ai-0.1.0.dev8.dist-info → synth_ai-0.1.0.dev10.dist-info}/METADATA +5 -2
- {synth_ai-0.1.0.dev8.dist-info → synth_ai-0.1.0.dev10.dist-info}/RECORD +15 -9
- {synth_ai-0.1.0.dev8.dist-info → synth_ai-0.1.0.dev10.dist-info}/top_level.txt +2 -0
- tests/test_agent.py +538 -0
- tests/test_recursive_structured_outputs.py +180 -0
- tests/test_structured_outputs.py +100 -0
- /public_tests/{synth_sdk.py → test_synth_sdk.py} +0 -0
- {synth_ai-0.1.0.dev8.dist-info → synth_ai-0.1.0.dev10.dist-info}/LICENSE +0 -0
- {synth_ai-0.1.0.dev8.dist-info → synth_ai-0.1.0.dev10.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import synth_sdk.tracing
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from synth_ai.zyk.lms.core.main import LM
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StateUpdate(BaseModel):
|
|
10
|
+
"""Response model for state updates from LLM"""
|
|
11
|
+
|
|
12
|
+
short_term_plan: Optional[str] = None
|
|
13
|
+
objective: Optional[str] = None
|
|
14
|
+
final_results: Optional[Dict[str, Any]] = None
|
|
15
|
+
|
|
16
|
+
def model_post_init(self, __context):
|
|
17
|
+
super().model_post_init(__context)
|
|
18
|
+
# Ensure no protected fields are present
|
|
19
|
+
protected_fields = ["message_history", "step_summaries"]
|
|
20
|
+
for field in protected_fields:
|
|
21
|
+
if hasattr(self, field):
|
|
22
|
+
raise ValueError(f"Cannot modify protected field: {field}")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.fixture(scope="module")
|
|
26
|
+
def models():
|
|
27
|
+
"""Initialize LMs for different vendors"""
|
|
28
|
+
return {
|
|
29
|
+
"gpt-4o-mini": LM(
|
|
30
|
+
model_name="gpt-4o-mini",
|
|
31
|
+
formatting_model_name="gpt-4o-mini",
|
|
32
|
+
temperature=0.1,
|
|
33
|
+
structured_output_mode="forced_json",
|
|
34
|
+
),
|
|
35
|
+
"o3-mini": LM(
|
|
36
|
+
model_name="o3-mini",
|
|
37
|
+
formatting_model_name="gpt-4o-mini",
|
|
38
|
+
temperature=0.1,
|
|
39
|
+
structured_output_mode="forced_json",
|
|
40
|
+
),
|
|
41
|
+
"gemini-1.5-flash": LM(
|
|
42
|
+
model_name="gemini-1.5-flash",
|
|
43
|
+
formatting_model_name="gpt-4o-mini",
|
|
44
|
+
temperature=0.1,
|
|
45
|
+
structured_output_mode="stringified_json",
|
|
46
|
+
),
|
|
47
|
+
"claude-3-haiku-20240307": LM(
|
|
48
|
+
model_name="claude-3-haiku-20240307",
|
|
49
|
+
formatting_model_name="gpt-4o-mini",
|
|
50
|
+
temperature=0.1,
|
|
51
|
+
structured_output_mode="stringified_json",
|
|
52
|
+
),
|
|
53
|
+
"deepseek-chat": LM(
|
|
54
|
+
model_name="deepseek-chat",
|
|
55
|
+
formatting_model_name="gpt-4o-mini",
|
|
56
|
+
temperature=0.1,
|
|
57
|
+
structured_output_mode="stringified_json",
|
|
58
|
+
),
|
|
59
|
+
"deepseek-reasoner": LM(
|
|
60
|
+
model_name="deepseek-reasoner",
|
|
61
|
+
formatting_model_name="gpt-4o-mini",
|
|
62
|
+
temperature=1,
|
|
63
|
+
structured_output_mode="stringified_json",
|
|
64
|
+
),
|
|
65
|
+
"llama-3.1-8b-instant": LM(
|
|
66
|
+
model_name="llama-3.1-8b-instant",
|
|
67
|
+
formatting_model_name="gpt-4o-mini",
|
|
68
|
+
temperature=0.1,
|
|
69
|
+
structured_output_mode="stringified_json",
|
|
70
|
+
),
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@pytest.fixture
|
|
75
|
+
def system_message():
|
|
76
|
+
"""System message for state updates"""
|
|
77
|
+
return """You are helping update the agent's state. Look at the current state and state_delta_instructions and update the state.
|
|
78
|
+
|
|
79
|
+
Available fields you can modify:
|
|
80
|
+
{
|
|
81
|
+
"short_term_plan": "str",
|
|
82
|
+
"objective": "str",
|
|
83
|
+
"final_results": "Dict[str, Any]"
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
Protected fields (do not modify):
|
|
87
|
+
{
|
|
88
|
+
"message_history": "Cannot directly edit message history - it is managed internally",
|
|
89
|
+
"step_summaries": "Cannot directly edit step summaries - they are generated automatically"
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
Please be brief, the state ought not be too long."""
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@pytest.fixture
|
|
96
|
+
def current_state():
|
|
97
|
+
"""Initial state for testing"""
|
|
98
|
+
return {
|
|
99
|
+
"short_term_plan": "Current plan: Review code changes",
|
|
100
|
+
"objective": "Review pull request",
|
|
101
|
+
"final_results": {
|
|
102
|
+
"findings": [],
|
|
103
|
+
"recommendations": [],
|
|
104
|
+
"analysis": {},
|
|
105
|
+
"status": "IN_PROGRESS",
|
|
106
|
+
},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@pytest.mark.timeout(15)
|
|
111
|
+
@pytest.mark.parametrize(
|
|
112
|
+
"model_name",
|
|
113
|
+
[
|
|
114
|
+
"gpt-4o-mini",
|
|
115
|
+
"gemini-1.5-flash",
|
|
116
|
+
"claude-3-haiku-20240307",
|
|
117
|
+
"deepseek-chat",
|
|
118
|
+
"llama-3.1-8b-instant",
|
|
119
|
+
],
|
|
120
|
+
)
|
|
121
|
+
def test_state_delta_handling(
|
|
122
|
+
model_name: str, models: Dict[str, LM], system_message: str, current_state: Dict
|
|
123
|
+
):
|
|
124
|
+
"""Test that each model correctly handles state updates"""
|
|
125
|
+
|
|
126
|
+
state_delta_instructions = """Update the final_results to include findings about code quality issues. Add a recommendation to improve error handling."""
|
|
127
|
+
user_message = f"Current state: {current_state}\nState delta instructions: {state_delta_instructions}\n\nHow should the state be updated?"
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
result = models[model_name].respond_sync(
|
|
131
|
+
system_message=system_message,
|
|
132
|
+
user_message=user_message,
|
|
133
|
+
response_model=StateUpdate,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Verify response structure
|
|
137
|
+
assert isinstance(result, StateUpdate)
|
|
138
|
+
|
|
139
|
+
# Verify only allowed fields are present and have correct types
|
|
140
|
+
if result.short_term_plan is not None:
|
|
141
|
+
assert isinstance(result.short_term_plan, str)
|
|
142
|
+
if result.objective is not None:
|
|
143
|
+
assert isinstance(result.objective, str)
|
|
144
|
+
if result.final_results is not None:
|
|
145
|
+
assert isinstance(result.final_results, dict)
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
pytest.fail(f"Model {model_name} failed: {str(e)}")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@pytest.mark.timeout(15)
|
|
152
|
+
@pytest.mark.parametrize(
|
|
153
|
+
"model_name",
|
|
154
|
+
[
|
|
155
|
+
"gpt-4o-mini",
|
|
156
|
+
"gemini-1.5-flash",
|
|
157
|
+
"claude-3-haiku-20240307",
|
|
158
|
+
"deepseek-chat",
|
|
159
|
+
"llama-3.1-8b-instant",
|
|
160
|
+
],
|
|
161
|
+
)
|
|
162
|
+
def test_state_delta_protected_fields(
|
|
163
|
+
model_name: str, models: Dict[str, LM], system_message: str
|
|
164
|
+
):
|
|
165
|
+
"""Test that models respect protected fields"""
|
|
166
|
+
|
|
167
|
+
current_state = {
|
|
168
|
+
"short_term_plan": "Current plan: Review code changes",
|
|
169
|
+
"objective": "Review pull request",
|
|
170
|
+
"message_history": ["Previous message 1", "Previous message 2"],
|
|
171
|
+
"step_summaries": ["Step 1 summary", "Step 2 summary"],
|
|
172
|
+
"final_results": {
|
|
173
|
+
"findings": [],
|
|
174
|
+
"recommendations": [],
|
|
175
|
+
"analysis": {},
|
|
176
|
+
"status": "IN_PROGRESS",
|
|
177
|
+
},
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
state_delta_instructions = """Update the message history to include new findings and update step summaries with recent progress."""
|
|
181
|
+
user_message = f"Current state: {current_state}\nState delta instructions: {state_delta_instructions}\n\nHow should the state be updated?"
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
result = models[model_name].respond_sync(
|
|
185
|
+
system_message=system_message,
|
|
186
|
+
user_message=user_message,
|
|
187
|
+
response_model=StateUpdate,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Verify no protected fields are present
|
|
191
|
+
assert not hasattr(result, "message_history")
|
|
192
|
+
assert not hasattr(result, "step_summaries")
|
|
193
|
+
|
|
194
|
+
except Exception as e:
|
|
195
|
+
pytest.fail(f"Model {model_name} failed: {str(e)}")
|
synth_ai/zyk/lms/core/all.py
CHANGED
|
@@ -6,6 +6,7 @@ from synth_ai.zyk.lms.vendors.core.openai_api import (
|
|
|
6
6
|
)
|
|
7
7
|
from synth_ai.zyk.lms.vendors.supported.deepseek import DeepSeekAPI
|
|
8
8
|
from synth_ai.zyk.lms.vendors.supported.together import TogetherAPI
|
|
9
|
+
from synth_ai.zyk.lms.vendors.supported.groq import GroqAPI
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class OpenAIClient(OpenAIPrivate):
|
|
@@ -33,3 +34,8 @@ class DeepSeekClient(DeepSeekAPI):
|
|
|
33
34
|
class TogetherClient(TogetherAPI):
|
|
34
35
|
def __init__(self):
|
|
35
36
|
super().__init__()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GroqClient(GroqAPI):
|
|
40
|
+
def __init__(self):
|
|
41
|
+
super().__init__()
|
synth_ai/zyk/lms/core/main.py
CHANGED
|
@@ -11,6 +11,8 @@ from synth_ai.zyk.lms.core.vendor_clients import (
|
|
|
11
11
|
from synth_ai.zyk.lms.structured_outputs.handler import StructuredOutputHandler
|
|
12
12
|
from synth_ai.zyk.lms.vendors.base import VendorBase
|
|
13
13
|
|
|
14
|
+
REASONING_MODELS = ["deepseek-reasoner", "o1-mini", "o1-preview", "o1", "o3"]
|
|
15
|
+
|
|
14
16
|
|
|
15
17
|
def build_messages(
|
|
16
18
|
sys_msg: str,
|
|
@@ -105,7 +107,9 @@ class LM:
|
|
|
105
107
|
"forced_json",
|
|
106
108
|
{"max_retries": max_retries_dict.get(max_retries, 2)},
|
|
107
109
|
)
|
|
108
|
-
|
|
110
|
+
# Override temperature to 1 for reasoning models
|
|
111
|
+
effective_temperature = 1.0 if model_name in REASONING_MODELS else temperature
|
|
112
|
+
self.lm_config = {"temperature": effective_temperature}
|
|
109
113
|
self.model_name = model_name
|
|
110
114
|
|
|
111
115
|
def respond_sync(
|
|
@@ -8,6 +8,7 @@ from synth_ai.zyk.lms.core.all import (
|
|
|
8
8
|
# OpenAIClient,
|
|
9
9
|
OpenAIStructuredOutputClient,
|
|
10
10
|
TogetherClient,
|
|
11
|
+
GroqAPI,
|
|
11
12
|
)
|
|
12
13
|
|
|
13
14
|
openai_naming_regexes: List[Pattern] = [
|
|
@@ -29,6 +30,11 @@ together_naming_regexes: List[Pattern] = [
|
|
|
29
30
|
re.compile(r"^.*\/.*$"),
|
|
30
31
|
]
|
|
31
32
|
|
|
33
|
+
groq_naming_regexes: List[Pattern] = [
|
|
34
|
+
re.compile(r"^llama-3.3-70b-versatile$"),
|
|
35
|
+
re.compile(r"^llama-3.1-8b-instant$"),
|
|
36
|
+
]
|
|
37
|
+
|
|
32
38
|
|
|
33
39
|
def get_client(
|
|
34
40
|
model_name: str,
|
|
@@ -56,5 +62,7 @@ def get_client(
|
|
|
56
62
|
return DeepSeekClient()
|
|
57
63
|
elif any(regex.match(model_name) for regex in together_naming_regexes):
|
|
58
64
|
return TogetherClient()
|
|
65
|
+
elif any(regex.match(model_name) for regex in groq_naming_regexes):
|
|
66
|
+
return GroqAPI()
|
|
59
67
|
else:
|
|
60
68
|
raise ValueError(f"Invalid model name: {model_name}")
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from groq import AsyncGroq, Groq
|
|
5
|
+
|
|
6
|
+
from synth_ai.zyk.lms.vendors.openai_standard import OpenAIStandard
|
|
7
|
+
|
|
8
|
+
load_dotenv()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GroqAPI(OpenAIStandard):
|
|
12
|
+
def __init__(self):
|
|
13
|
+
super().__init__(
|
|
14
|
+
sync_client=Groq(api_key=os.getenv("GROQ_API_KEY")),
|
|
15
|
+
async_client=AsyncGroq(api_key=os.getenv("GROQ_API_KEY")),
|
|
16
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: synth-ai
|
|
3
|
-
Version: 0.1.0.
|
|
3
|
+
Version: 0.1.0.dev10
|
|
4
4
|
Summary: Software for aiding the best and multiplying the will.
|
|
5
5
|
Home-page: https://github.com/synth-laboratories/synth-ai
|
|
6
6
|
Author: Josh Purtell
|
|
@@ -44,8 +44,11 @@ Requires-Dist: google>=3.0.0
|
|
|
44
44
|
Requires-Dist: google-generativeai>=0.8.1
|
|
45
45
|
Requires-Dist: together>=1.2.12
|
|
46
46
|
Requires-Dist: langfuse>=2.56.1
|
|
47
|
-
Requires-Dist: synth-sdk
|
|
47
|
+
Requires-Dist: synth-sdk>=0.3.1.dev3
|
|
48
48
|
Requires-Dist: datasets>=3.2.0
|
|
49
|
+
Requires-Dist: groq>=0.18.0
|
|
50
|
+
Requires-Dist: pytest-timeout>=2.3.1
|
|
51
|
+
Requires-Dist: lock>=2018.3.25.2110
|
|
49
52
|
Dynamic: author
|
|
50
53
|
Dynamic: home-page
|
|
51
54
|
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
private_tests/try_synth_sdk.py,sha256=vk4lUEfpQfLACFl6Qw468t_lsuYxuoIIr05WRgWKGKY,24
|
|
2
2
|
public_tests/test_agent.py,sha256=CjPPWuMWC_TzX1DkDald-bbAxgjXE-HPQvFhq2B--5k,22363
|
|
3
|
+
public_tests/test_all_structured_outputs.py,sha256=SkvQq4paFVh2b3XfeZJ0ihd4LLKgVvVk2Yd4bIXZEEw,6307
|
|
3
4
|
public_tests/test_recursive_structured_outputs.py,sha256=Ne-9XwnOxN7eSpGbNHOpegR-sRj589I84T6y8Z_4QnA,5781
|
|
4
5
|
public_tests/test_structured_outputs.py,sha256=J7sfbGZ7OeB5ONIKpcCTymyayNyAdFfGokC1bcUrSx0,3651
|
|
6
|
+
public_tests/test_synth_sdk.py,sha256=fqkzyzLb_NW4k8EiP2mJ5HZk3lDTi1juyTf9Gv_9wfc,14238
|
|
5
7
|
synth_ai/__init__.py,sha256=2siivzLbT2r-EA7m91dcJB-6Vsurc5_sX3WiKf4_o8Y,198
|
|
6
8
|
synth_ai/zyk/__init__.py,sha256=zoPor1PI2OrgpCu-MBLZXcX1jAbSgD9q0kqZpTghTcQ,60
|
|
7
9
|
synth_ai/zyk/lms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -14,10 +16,10 @@ synth_ai/zyk/lms/caching/handler.py,sha256=sewq5rRfqXHzCEiXvdckbuxYp9ze_EjVSndnU
|
|
|
14
16
|
synth_ai/zyk/lms/caching/initialize.py,sha256=zZls6RKAax6Z-8oJInGaSg_RPN_fEZ6e_RCX64lMLJw,416
|
|
15
17
|
synth_ai/zyk/lms/caching/persistent.py,sha256=mQmP1z0rWVYjxwso5zIwd51Df2dWZvdHonuqsOY6SFI,2075
|
|
16
18
|
synth_ai/zyk/lms/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
synth_ai/zyk/lms/core/all.py,sha256=
|
|
19
|
+
synth_ai/zyk/lms/core/all.py,sha256=kuuu0V1_DVJp8GtZztdwFiTwGVYIdJ946Y1V0D4zz8Y,1034
|
|
18
20
|
synth_ai/zyk/lms/core/exceptions.py,sha256=K0BVdAzxVIchsvYZAaHEH1GAWBZvpxhFi-SPcJOjyPQ,205
|
|
19
|
-
synth_ai/zyk/lms/core/main.py,sha256=
|
|
20
|
-
synth_ai/zyk/lms/core/vendor_clients.py,sha256=
|
|
21
|
+
synth_ai/zyk/lms/core/main.py,sha256=fdAPBjnyOfLBPtcD0D79tn0f7SrKv49zCP6IhiAeH9Y,9019
|
|
22
|
+
synth_ai/zyk/lms/core/vendor_clients.py,sha256=tuYf9Jio9mnCajPc1dCwurdprknRGnbHv7wwHC5tIdY,2119
|
|
21
23
|
synth_ai/zyk/lms/cost/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
24
|
synth_ai/zyk/lms/cost/monitor.py,sha256=cSKIvw6WdPZIRubADWxQoh1MdB40T8-jjgfNUeUHIn0,5
|
|
23
25
|
synth_ai/zyk/lms/cost/statefulness.py,sha256=TOsuXL8IjtKOYJ2aJQF8TwJVqn_wQ7AIwJJmdhMye7U,36
|
|
@@ -38,9 +40,13 @@ synth_ai/zyk/lms/vendors/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
38
40
|
synth_ai/zyk/lms/vendors/local/ollama.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
41
|
synth_ai/zyk/lms/vendors/supported/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
42
|
synth_ai/zyk/lms/vendors/supported/deepseek.py,sha256=diFfdhPMO5bLFZxnYj7VT0v6jKTlOYESBkspUuVa2eY,529
|
|
43
|
+
synth_ai/zyk/lms/vendors/supported/groq.py,sha256=Fbi7QvhdLx0F-VHO5PY-uIQlPR0bo3C9h1MvIOx8nz0,388
|
|
41
44
|
synth_ai/zyk/lms/vendors/supported/together.py,sha256=Ni_jBqqGPN0PkkY-Ew64s3gNKk51k3FCpLSwlNhKbf0,342
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
synth_ai-0.1.0.
|
|
46
|
-
synth_ai-0.1.0.
|
|
45
|
+
tests/test_agent.py,sha256=CjPPWuMWC_TzX1DkDald-bbAxgjXE-HPQvFhq2B--5k,22363
|
|
46
|
+
tests/test_recursive_structured_outputs.py,sha256=Ne-9XwnOxN7eSpGbNHOpegR-sRj589I84T6y8Z_4QnA,5781
|
|
47
|
+
tests/test_structured_outputs.py,sha256=J7sfbGZ7OeB5ONIKpcCTymyayNyAdFfGokC1bcUrSx0,3651
|
|
48
|
+
synth_ai-0.1.0.dev10.dist-info/LICENSE,sha256=ynhjRQUfqA_RdGRATApfFA_fBAy9cno04sLtLUqxVFM,1069
|
|
49
|
+
synth_ai-0.1.0.dev10.dist-info/METADATA,sha256=0JR0iVQFHPMzRDsm8PxJMsAM76PYPHKONQRNZQ2LsHU,2712
|
|
50
|
+
synth_ai-0.1.0.dev10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
51
|
+
synth_ai-0.1.0.dev10.dist-info/top_level.txt,sha256=5GzJO9j-KbJ_4ppxhmCUa_qdhHM4-9cHHNU76yAI8do,42
|
|
52
|
+
synth_ai-0.1.0.dev10.dist-info/RECORD,,
|
tests/test_agent.py
ADDED
|
@@ -0,0 +1,538 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
from typing import List, Union
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from synth_ai.zyk.lms.core.main import LM
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class UnitTestDict(BaseModel):
|
|
10
|
+
test_description: str
|
|
11
|
+
input_names: List[str]
|
|
12
|
+
input_types: List[str]
|
|
13
|
+
stringified_input_values: List[str]
|
|
14
|
+
assertion_condition: str
|
|
15
|
+
assertion_type: str # Consider using Literal for specific assertion types
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ActionArgument(BaseModel):
|
|
19
|
+
key: str
|
|
20
|
+
value: Union[str, int, float, bool, UnitTestDict]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ReAct(BaseModel):
|
|
24
|
+
reasoning: str
|
|
25
|
+
action_name: str
|
|
26
|
+
action_args: List[ActionArgument] # Dict#[str, Dict]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
system = """
|
|
30
|
+
<System Message>
|
|
31
|
+
# Premise
|
|
32
|
+
You are a software engineer
|
|
33
|
+
Here is some information about this setting
|
|
34
|
+
<Setting Information>
|
|
35
|
+
You are working to solve a computer science problem. You will need to submit a solution to the problem, which will be tested against a suite of hidden unit tests.
|
|
36
|
+
</Setting Information>
|
|
37
|
+
<Actions Available>
|
|
38
|
+
<edit_submission>
|
|
39
|
+
<action_context>
|
|
40
|
+
Edit the submission code. Use this when you want to make changes to the current solution.
|
|
41
|
+
</action_context>
|
|
42
|
+
<action_arg_spec>
|
|
43
|
+
{'first_line': <class 'int'>, 'last_line': <class 'int'>, 'new_code': <class 'str'>}
|
|
44
|
+
</action_arg_spec>
|
|
45
|
+
<action_description>
|
|
46
|
+
Edit the submission code
|
|
47
|
+
</action_description>
|
|
48
|
+
|
|
49
|
+
</edit_submission>
|
|
50
|
+
<add_submission>
|
|
51
|
+
<action_context>
|
|
52
|
+
Add the submission code. Use this when you want to start from scratch with a new solution.
|
|
53
|
+
</action_context>
|
|
54
|
+
<action_arg_spec>
|
|
55
|
+
{'submission': <class 'str'>}
|
|
56
|
+
</action_arg_spec>
|
|
57
|
+
<action_description>
|
|
58
|
+
Add the submission code
|
|
59
|
+
</action_description>
|
|
60
|
+
|
|
61
|
+
</add_submission>
|
|
62
|
+
<add_unit_test>
|
|
63
|
+
<action_context>
|
|
64
|
+
Add a unit test. The unit test information you submit must be in the format of a BCBUnitTest:
|
|
65
|
+
|
|
66
|
+
class BCBUnitTest(BaseModel):
|
|
67
|
+
test_description: str
|
|
68
|
+
input_names: List[str]
|
|
69
|
+
input_types: List[str]
|
|
70
|
+
input_values: List[Any]
|
|
71
|
+
assertion_condition: str
|
|
72
|
+
assertion_type: Literal["assertTrue", "assertRaises"] = "assertTrue"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
It will be parsed via BCBUnitTest(**unit_test_dict)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# Some various notes:
|
|
80
|
+
1. If an input should be of a type defined by a specific package, add the package name/alias to the type. E.g. "np.ndarray" or "pd.DataFrame". You still should fully define the value for the input_value field e.g. "pd.DataFrame({'a': [1, 2, 3]})"
|
|
81
|
+
|
|
82
|
+
2. Unit tests will be compiled from the BCBUnitTest class as follows:
|
|
83
|
+
A. For AssertTrue type tests, the test will be compiled as follows:
|
|
84
|
+
```python
|
|
85
|
+
def test_case(self):
|
|
86
|
+
# {{self.test_description}}
|
|
87
|
+
|
|
88
|
+
{{defs}}
|
|
89
|
+
result = {{function_name}}(**{{{{args}}}}})
|
|
90
|
+
self.{{self.assertion_type}}({{self.assertion_condition}})
|
|
91
|
+
```
|
|
92
|
+
B. For AssertRaises type tests, the test will be compiled as follows:
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
def test_case(self):
|
|
96
|
+
# {{self.test_description}}
|
|
97
|
+
{{defs}}
|
|
98
|
+
with self.{{self.assertion_type}}({{self.assertion_condition}}):
|
|
99
|
+
{{function_name}}(**{{{{args}}}}})
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Provide information accordingly.
|
|
103
|
+
|
|
104
|
+
</action_context>
|
|
105
|
+
<action_arg_spec>
|
|
106
|
+
{'unit_test_name': <class 'str'>, 'unit_test_dict': typing.Dict}
|
|
107
|
+
</action_arg_spec>
|
|
108
|
+
<action_description>
|
|
109
|
+
Add a unit test
|
|
110
|
+
</action_description>
|
|
111
|
+
|
|
112
|
+
</add_unit_test>
|
|
113
|
+
<remove_unit_test>
|
|
114
|
+
<action_context>
|
|
115
|
+
Remove a unit test
|
|
116
|
+
</action_context>
|
|
117
|
+
<action_arg_spec>
|
|
118
|
+
{'unit_test_name': <class 'str'>}
|
|
119
|
+
</action_arg_spec>
|
|
120
|
+
<action_description>
|
|
121
|
+
Remove a unit test
|
|
122
|
+
</action_description>
|
|
123
|
+
|
|
124
|
+
</remove_unit_test>
|
|
125
|
+
<test_submission>
|
|
126
|
+
<action_context>
|
|
127
|
+
Test the submission
|
|
128
|
+
</action_context>
|
|
129
|
+
<action_arg_spec>
|
|
130
|
+
{}
|
|
131
|
+
</action_arg_spec>
|
|
132
|
+
<action_description>
|
|
133
|
+
Test the submission
|
|
134
|
+
</action_description>
|
|
135
|
+
|
|
136
|
+
</test_submission>
|
|
137
|
+
<submit_solution>
|
|
138
|
+
<action_context>
|
|
139
|
+
Submit the solution
|
|
140
|
+
</action_context>
|
|
141
|
+
<action_arg_spec>
|
|
142
|
+
{}
|
|
143
|
+
</action_arg_spec>
|
|
144
|
+
<action_description>
|
|
145
|
+
Submit the solution
|
|
146
|
+
</action_description>
|
|
147
|
+
|
|
148
|
+
</submit_solution>
|
|
149
|
+
|
|
150
|
+
</Actions Available>
|
|
151
|
+
You'll be given your past actions/thoughts, along with recent raw observations from the environment
|
|
152
|
+
The environment one step in the past is your current environment.
|
|
153
|
+
|
|
154
|
+
# Objective
|
|
155
|
+
Please complete the problem by drafting a solution, creating unit tests, improving the solution, and submitting the solution.
|
|
156
|
+
|
|
157
|
+
# Constraints
|
|
158
|
+
You will be given a code_prompt_for_answer, which contains imports and the function signature. Your solution must comprise code that can be appended to code_prompt_for_answer and run as a single script.
|
|
159
|
+
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
user = """
|
|
163
|
+
<User Message>
|
|
164
|
+
# Recent Actions / Thoughts
|
|
165
|
+
|
|
166
|
+
# Recent Observations
|
|
167
|
+
<1 environment step(s) in the past>{'action_result': None, 'environment_state': {'question': 'import pandas as pd\nimport numpy as np\n\n# Constants\nCOLUMNS = [\'column1\', \'column2\', \'column3\', \'column4\', \'column5\']\n\ndef task_func(df, dct):\n '''\n Replace certain values in a DataFrame with a dictionary mapping and calculate the Pearson correlation coefficient between each pair of columns.\n\n Parameters:\n df (DataFrame): The input DataFrame, containing numeric or categorical data.\n dct (dict): A dictionary for replacing values in df, where keys are existing values and values are new values.\n\n Returns:\n DataFrame: A DataFrame with the correlation coefficients between each pair of columns. The format of the DataFrame is a square matrix with column and index labels matching the columns of the input DataFrame.\n \n Requirements:\n - pandas\n - numpy\n \n Note:\n - This function operates on DataFrames containing numeric or categorical data that can be replaced with numeric values, as correlation calculations require numeric data.\n - This function using pearson method to calculate the correlation matrix.\n \n Raises:\n - This function will raise a ValueError is input df is not a DataFrame.\n \n Example:\n >>> df = pd.DataFrame({\'A\': [1, 2, 3], \'B\': [4, 5, 6]})\n >>> dct = {1: 10, 2: 20, 3: 30, 4: 40, 5: 50, 6: 60}\n >>> correlation_matrix = task_func(df, dct)\n >>> correlation_matrix.shape == (2, 2)\n True\n >>> np.allclose(correlation_matrix, np.array([[1.0, 1.0], [1.0, 1.0]]))\n True\n '''\n', 'code_prompt_for_answer': "import pandas as pd\nimport numpy as np\n# Constants\nCOLUMNS = ['column1', 'column2', 'column3', 'column4', 'column5']\ndef task_func(df, dct):\n", 'unit_tests_you_have_written': {}, 'current_solution': ''}}</1 environment step(s) in the past>
|
|
168
|
+
|
|
169
|
+
Your next actions / thought:
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
###
|
|
174
|
+
|
|
175
|
+
hard_system = """
|
|
176
|
+
# Premise
|
|
177
|
+
You are a software engineer
|
|
178
|
+
Here is some information about this setting
|
|
179
|
+
<Setting Information>
|
|
180
|
+
You are working to solve a computer science problem. You will need to submit a solution to the problem, which will be tested against a suite of hidden unit tests.
|
|
181
|
+
</Setting Information>
|
|
182
|
+
<Actions Available>
|
|
183
|
+
<edit_submission>
|
|
184
|
+
<action_context>
|
|
185
|
+
Edit the submission code. Use this when you want to make changes to the current solution.
|
|
186
|
+
</action_context>
|
|
187
|
+
<action_arg_spec>
|
|
188
|
+
{'first_line': <class 'int'>, 'last_line': <class 'int'>, 'new_code': <class 'str'>}
|
|
189
|
+
</action_arg_spec>
|
|
190
|
+
<action_description>
|
|
191
|
+
Edit the submission code
|
|
192
|
+
</action_description>
|
|
193
|
+
|
|
194
|
+
</edit_submission>
|
|
195
|
+
<add_submission>
|
|
196
|
+
<action_context>
|
|
197
|
+
Add the submission code. Use this when you want to start from scratch with a new solution.
|
|
198
|
+
</action_context>
|
|
199
|
+
<action_arg_spec>
|
|
200
|
+
{'submission': <class 'str'>}
|
|
201
|
+
</action_arg_spec>
|
|
202
|
+
<action_description>
|
|
203
|
+
Add the submission code
|
|
204
|
+
</action_description>
|
|
205
|
+
|
|
206
|
+
</add_submission>
|
|
207
|
+
<add_unit_test>
|
|
208
|
+
<action_context>
|
|
209
|
+
Add a unit test. The unit test information you submit must be in the format of a BCBUnitTest:
|
|
210
|
+
|
|
211
|
+
class BCBUnitTest(BaseModel):
|
|
212
|
+
test_description: str
|
|
213
|
+
input_names: List[str]
|
|
214
|
+
input_types: List[str]
|
|
215
|
+
input_values: List[Any]
|
|
216
|
+
assertion_condition: str
|
|
217
|
+
assertion_type: Literal["assertTrue", "assertRaises"] = "assertTrue"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
It will be parsed via BCBUnitTest(**unit_test_dict)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# Some various notes:
|
|
225
|
+
1. If an input should be of a type defined by a specific package, add the package name/alias to the type. E.g. "np.ndarray" or "pd.DataFrame". You still should fully define the value for the input_value field e.g. "pd.DataFrame({'a': [1, 2, 3]})"
|
|
226
|
+
|
|
227
|
+
2. Unit tests will be compiled from the BCBUnitTest class as follows:
|
|
228
|
+
A. For AssertTrue type tests, the test will be compiled as follows:
|
|
229
|
+
```python
|
|
230
|
+
def test_case(self):
|
|
231
|
+
# {{self.test_description}}
|
|
232
|
+
|
|
233
|
+
{{defs}}
|
|
234
|
+
result = {{function_name}}(**{{{{args}}}}})
|
|
235
|
+
self.{{self.assertion_type}}({{self.assertion_condition}})
|
|
236
|
+
```
|
|
237
|
+
B. For AssertRaises type tests, the test will be compiled as follows:
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
def test_case(self):
|
|
241
|
+
# {{self.test_description}}
|
|
242
|
+
{{defs}}
|
|
243
|
+
with self.{{self.assertion_type}}({{self.assertion_condition}}):
|
|
244
|
+
{{function_name}}(**{{{{args}}}}})
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Provide information accordingly.
|
|
248
|
+
|
|
249
|
+
</action_context>
|
|
250
|
+
<action_arg_spec>
|
|
251
|
+
{'unit_test_name': <class 'str'>, 'unit_test_dict': typing.Dict}
|
|
252
|
+
</action_arg_spec>
|
|
253
|
+
<action_description>
|
|
254
|
+
Add a unit test
|
|
255
|
+
</action_description>
|
|
256
|
+
|
|
257
|
+
</add_unit_test>
|
|
258
|
+
<remove_unit_test>
|
|
259
|
+
<action_context>
|
|
260
|
+
Remove a unit test
|
|
261
|
+
</action_context>
|
|
262
|
+
<action_arg_spec>
|
|
263
|
+
{'unit_test_name': <class 'str'>}
|
|
264
|
+
</action_arg_spec>
|
|
265
|
+
<action_description>
|
|
266
|
+
Remove a unit test
|
|
267
|
+
</action_description>
|
|
268
|
+
|
|
269
|
+
</remove_unit_test>
|
|
270
|
+
<test_submission>
|
|
271
|
+
<action_context>
|
|
272
|
+
Test the submission
|
|
273
|
+
</action_context>
|
|
274
|
+
<action_arg_spec>
|
|
275
|
+
{}
|
|
276
|
+
</action_arg_spec>
|
|
277
|
+
<action_description>
|
|
278
|
+
Test the submission
|
|
279
|
+
</action_description>
|
|
280
|
+
|
|
281
|
+
</test_submission>
|
|
282
|
+
<submit_solution>
|
|
283
|
+
<action_context>
|
|
284
|
+
Submit the solution
|
|
285
|
+
</action_context>
|
|
286
|
+
<action_arg_spec>
|
|
287
|
+
{}
|
|
288
|
+
</action_arg_spec>
|
|
289
|
+
<action_description>
|
|
290
|
+
Submit the solution
|
|
291
|
+
</action_description>
|
|
292
|
+
|
|
293
|
+
</submit_solution>
|
|
294
|
+
|
|
295
|
+
</Actions Available>
|
|
296
|
+
You'll be given your past actions/thoughts, along with recent raw observations from the environment
|
|
297
|
+
The environment one step in the past is your current environment.
|
|
298
|
+
|
|
299
|
+
# Objective
|
|
300
|
+
Please complete the problem by drafting a solution, creating unit tests, improving the solution, and submitting the solution.
|
|
301
|
+
|
|
302
|
+
# Constraints
|
|
303
|
+
You will be given a code_prompt_for_answer, which contains imports and the function signature. Your solution must comprise code that can be appended to code_prompt_for_answer and run as a single script.
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
<User Message>
|
|
307
|
+
# Recent Actions / Thoughts
|
|
308
|
+
|
|
309
|
+
# Recent Observations
|
|
310
|
+
<1 environment step(s) in the past>{'action_result': None, 'environment_state': {'question': 'import pandas as pd\nimport numpy as np\n\n# Constants\nCOLUMNS = [\'column1\', \'column2\', \'column3\', \'column4\', \'column5\']\n\ndef task_func(df, dct):\n '''\n Replace certain values in a DataFrame with a dictionary mapping and calculate the Pearson correlation coefficient between each pair of columns.\n\n Parameters:\n df (DataFrame): The input DataFrame, containing numeric or categorical data.\n dct (dict): A dictionary for replacing values in df, where keys are existing values and values are new values.\n\n Returns:\n DataFrame: A DataFrame with the correlation coefficients between each pair of columns. The format of the DataFrame is a square matrix with column and index labels matching the columns of the input DataFrame.\n \n Requirements:\n - pandas\n - numpy\n \n Note:\n - This function operates on DataFrames containing numeric or categorical data that can be replaced with numeric values, as correlation calculations require numeric data.\n - This function using pearson method to calculate the correlation matrix.\n \n Raises:\n - This function will raise a ValueError is input df is not a DataFrame.\n \n Example:\n >>> df = pd.DataFrame({\'A\': [1, 2, 3], \'B\': [4, 5, 6]})\n >>> dct = {1: 10, 2: 20, 3: 30, 4: 40, 5: 50, 6: 60}\n >>> correlation_matrix = task_func(df, dct)\n >>> correlation_matrix.shape == (2, 2)\n True\n >>> np.allclose(correlation_matrix, np.array([[1.0, 1.0], [1.0, 1.0]]))\n True\n '''\n', 'code_prompt_for_answer': "import pandas as pd\nimport numpy as np\n# Constants\nCOLUMNS = ['column1', 'column2', 'column3', 'column4', 'column5']\ndef task_func(df, dct):\n", 'unit_tests_you_have_written': {}, 'current_solution': ''}}</1 environment step(s) in the past>
|
|
311
|
+
|
|
312
|
+
Your next actions / thought:
|
|
313
|
+
|
|
314
|
+
Structured output: reasoning="I need to implement the function 'task_func' that replaces values in a DataFrame based on a dictionary and calculates the Pearson correlation coefficient between the columns. I will also ensure to handle the case where the input is not a DataFrame by raising a ValueError." action_name='edit_submission' action_args=[ActionArgument(key='first_line', value=4), ActionArgument(key='last_line', value=4), ActionArgument(key='new_code', value=" if not isinstance(df, pd.DataFrame):\n raise ValueError('Input must be a DataFrame')\n df.replace(dct, inplace=True)\n return df.corr(method='pearson')")]
|
|
315
|
+
<System Message>
|
|
316
|
+
# Premise
|
|
317
|
+
You are a software engineer
|
|
318
|
+
Here is some information about this setting
|
|
319
|
+
<Setting Information>
|
|
320
|
+
You are working to solve a computer science problem. You will need to submit a solution to the problem, which will be tested against a suite of hidden unit tests.
|
|
321
|
+
</Setting Information>
|
|
322
|
+
<Actions Available>
|
|
323
|
+
<edit_submission>
|
|
324
|
+
<action_context>
|
|
325
|
+
Edit the submission code. Use this when you want to make changes to the current solution.
|
|
326
|
+
</action_context>
|
|
327
|
+
<action_arg_spec>
|
|
328
|
+
{'first_line': <class 'int'>, 'last_line': <class 'int'>, 'new_code': <class 'str'>}
|
|
329
|
+
</action_arg_spec>
|
|
330
|
+
<action_description>
|
|
331
|
+
Edit the submission code
|
|
332
|
+
</action_description>
|
|
333
|
+
|
|
334
|
+
</edit_submission>
|
|
335
|
+
<add_submission>
|
|
336
|
+
<action_context>
|
|
337
|
+
Add the submission code. Use this when you want to start from scratch with a new solution.
|
|
338
|
+
</action_context>
|
|
339
|
+
<action_arg_spec>
|
|
340
|
+
{'submission': <class 'str'>}
|
|
341
|
+
</action_arg_spec>
|
|
342
|
+
<action_description>
|
|
343
|
+
Add the submission code
|
|
344
|
+
</action_description>
|
|
345
|
+
|
|
346
|
+
</add_submission>
|
|
347
|
+
<add_unit_test>
|
|
348
|
+
<action_context>
|
|
349
|
+
Add a unit test. The unit test information you submit must be in the format of a BCBUnitTest:
|
|
350
|
+
|
|
351
|
+
class BCBUnitTest(BaseModel):
|
|
352
|
+
test_description: str
|
|
353
|
+
input_names: List[str]
|
|
354
|
+
input_types: List[str]
|
|
355
|
+
input_values: List[Any]
|
|
356
|
+
assertion_condition: str
|
|
357
|
+
assertion_type: Literal["assertTrue", "assertRaises"] = "assertTrue"
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
It will be parsed via BCBUnitTest(**unit_test_dict)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
# Some various notes:
|
|
365
|
+
1. If an input should be of a type defined by a specific package, add the package name/alias to the type. E.g. "np.ndarray" or "pd.DataFrame". You still should fully define the value for the input_value field e.g. "pd.DataFrame({'a': [1, 2, 3]})"
|
|
366
|
+
|
|
367
|
+
2. Unit tests will be compiled from the BCBUnitTest class as follows:
|
|
368
|
+
A. For AssertTrue type tests, the test will be compiled as follows:
|
|
369
|
+
```python
|
|
370
|
+
def test_case(self):
|
|
371
|
+
# {{self.test_description}}
|
|
372
|
+
|
|
373
|
+
{{defs}}
|
|
374
|
+
result = {{function_name}}(**{{{{args}}}}})
|
|
375
|
+
self.{{self.assertion_type}}({{self.assertion_condition}})
|
|
376
|
+
```
|
|
377
|
+
B. For AssertRaises type tests, the test will be compiled as follows:
|
|
378
|
+
|
|
379
|
+
```python
|
|
380
|
+
def test_case(self):
|
|
381
|
+
# {{self.test_description}}
|
|
382
|
+
{{defs}}
|
|
383
|
+
with self.{{self.assertion_type}}({{self.assertion_condition}}):
|
|
384
|
+
{{function_name}}(**{{{{args}}}}})
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
Provide information accordingly.
|
|
388
|
+
|
|
389
|
+
</action_context>
|
|
390
|
+
<action_arg_spec>
|
|
391
|
+
{'unit_test_name': <class 'str'>, 'unit_test_dict': typing.Dict}
|
|
392
|
+
</action_arg_spec>
|
|
393
|
+
<action_description>
|
|
394
|
+
Add a unit test
|
|
395
|
+
</action_description>
|
|
396
|
+
|
|
397
|
+
</add_unit_test>
|
|
398
|
+
<remove_unit_test>
|
|
399
|
+
<action_context>
|
|
400
|
+
Remove a unit test
|
|
401
|
+
</action_context>
|
|
402
|
+
<action_arg_spec>
|
|
403
|
+
{'unit_test_name': <class 'str'>}
|
|
404
|
+
</action_arg_spec>
|
|
405
|
+
<action_description>
|
|
406
|
+
Remove a unit test
|
|
407
|
+
</action_description>
|
|
408
|
+
|
|
409
|
+
</remove_unit_test>
|
|
410
|
+
<test_submission>
|
|
411
|
+
<action_context>
|
|
412
|
+
Test the submission
|
|
413
|
+
</action_context>
|
|
414
|
+
<action_arg_spec>
|
|
415
|
+
{}
|
|
416
|
+
</action_arg_spec>
|
|
417
|
+
<action_description>
|
|
418
|
+
Test the submission
|
|
419
|
+
</action_description>
|
|
420
|
+
|
|
421
|
+
</test_submission>
|
|
422
|
+
<submit_solution>
|
|
423
|
+
<action_context>
|
|
424
|
+
Submit the solution
|
|
425
|
+
</action_context>
|
|
426
|
+
<action_arg_spec>
|
|
427
|
+
{}
|
|
428
|
+
</action_arg_spec>
|
|
429
|
+
<action_description>
|
|
430
|
+
Submit the solution
|
|
431
|
+
</action_description>
|
|
432
|
+
|
|
433
|
+
</submit_solution>
|
|
434
|
+
|
|
435
|
+
</Actions Available>
|
|
436
|
+
You'll be given your past actions/thoughts, along with recent raw observations from the environment
|
|
437
|
+
The environment one step in the past is your current environment.
|
|
438
|
+
|
|
439
|
+
# Objective
|
|
440
|
+
Please complete the problem by drafting a solution, creating unit tests, improving the solution, and submitting the solution.
|
|
441
|
+
|
|
442
|
+
# Constraints
|
|
443
|
+
You will be given a code_prompt_for_answer, which contains imports and the function signature. Your solution must comprise code that can be appended to code_prompt_for_answer and run as a single script.
|
|
444
|
+
"""
|
|
445
|
+
|
|
446
|
+
hard_user = """
|
|
447
|
+
# Recent Actions / Thoughts
|
|
448
|
+
<1 reasoning step(s) in the past>reasoning="I need to implement the function 'task_func' that replaces values in a DataFrame based on a dictionary and calculates the Pearson correlation coefficient between the columns. I will also ensure to handle the case where the input is not a DataFrame by raising a ValueError." action_name='edit_submission' action_args=[ActionArgument(key='first_line', value=4), ActionArgument(key='last_line', value=4), ActionArgument(key='new_code', value=" if not isinstance(df, pd.DataFrame):\n raise ValueError('Input must be a DataFrame')\n df.replace(dct, inplace=True)\n return df.corr(method='pearson')")]</1 reasoning step(s) in the past>
|
|
449
|
+
# Recent Observations
|
|
450
|
+
<1 environment step(s) in the past>success=True result='Edited submission successfully'</1 environment step(s) in the past>
|
|
451
|
+
<2 environment step(s) in the past>{'action_result': None, 'environment_state': {'question': 'import pandas as pd\nimport numpy as np\n\n# Constants\nCOLUMNS = [\'column1\', \'column2\', \'column3\', \'column4\', \'column5\']\n\ndef task_func(df, dct):\n '''\n Replace certain values in a DataFrame with a dictionary mapping and calculate the Pearson correlation coefficient between each pair of columns.\n\n Parameters:\n df (DataFrame): The input DataFrame, containing numeric or categorical data.\n dct (dict): A dictionary for replacing values in df, where keys are existing values and values are new values.\n\n Returns:\n DataFrame: A DataFrame with the correlation coefficients between each pair of columns. The format of the DataFrame is a square matrix with column and index labels matching the columns of the input DataFrame.\n \n Requirements:\n - pandas\n - numpy\n \n Note:\n - This function operates on DataFrames containing numeric or categorical data that can be replaced with numeric values, as correlation calculations require numeric data.\n - This function using pearson method to calculate the correlation matrix.\n \n Raises:\n - This function will raise a ValueError is input df is not a DataFrame.\n \n Example:\n >>> df = pd.DataFrame({\'A\': [1, 2, 3], \'B\': [4, 5, 6]})\n >>> dct = {1: 10, 2: 20, 3: 30, 4: 40, 5: 50, 6: 60}\n >>> correlation_matrix = task_func(df, dct)\n >>> correlation_matrix.shape == (2, 2)\n True\n >>> np.allclose(correlation_matrix, np.array([[1.0, 1.0], [1.0, 1.0]]))\n True\n '''\n', 'code_prompt_for_answer': "import pandas as pd\nimport numpy as np\n# Constants\nCOLUMNS = ['column1', 'column2', 'column3', 'column4', 'column5']\ndef task_func(df, dct):\n", 'unit_tests_you_have_written': {}, 'current_solution': ''}}</2 environment step(s) in the past>
|
|
452
|
+
|
|
453
|
+
Your next actions / thought:
|
|
454
|
+
"""
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
class TestLMStructuredOutputs(unittest.TestCase):
|
|
458
|
+
# ... existing code ...
|
|
459
|
+
|
|
460
|
+
@classmethod
|
|
461
|
+
def setUpClass(cls):
|
|
462
|
+
# Initialize LMs for both forced_json and stringified_json modes
|
|
463
|
+
cls.lm_forced_json = LM(
|
|
464
|
+
model_name="gpt-4o-mini",
|
|
465
|
+
formatting_model_name="gpt-4o-mini",
|
|
466
|
+
temperature=0.0,
|
|
467
|
+
max_retries="Few",
|
|
468
|
+
structured_output_mode="forced_json",
|
|
469
|
+
)
|
|
470
|
+
cls.lm_stringified_json = LM(
|
|
471
|
+
model_name="gpt-4o-mini",
|
|
472
|
+
formatting_model_name="gpt-4o-mini",
|
|
473
|
+
temperature=0.0,
|
|
474
|
+
max_retries="Few",
|
|
475
|
+
structured_output_mode="stringified_json",
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
def test_sync_react_response_content(self):
|
|
479
|
+
system_message = system
|
|
480
|
+
|
|
481
|
+
user_message = user
|
|
482
|
+
|
|
483
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
|
484
|
+
with self.subTest(
|
|
485
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
|
486
|
+
):
|
|
487
|
+
result = lm.respond_sync(
|
|
488
|
+
system_message=system_message,
|
|
489
|
+
user_message=user_message,
|
|
490
|
+
response_model=ReAct,
|
|
491
|
+
)
|
|
492
|
+
self.assertIsInstance(result, ReAct)
|
|
493
|
+
self.assertIsInstance(result.reasoning, str)
|
|
494
|
+
self.assertIsInstance(result.action_name, str)
|
|
495
|
+
self.assertIsInstance(result.action_args, list)
|
|
496
|
+
for arg in result.action_args:
|
|
497
|
+
self.assertIsInstance(arg, ActionArgument)
|
|
498
|
+
self.assertIsInstance(arg.key, str)
|
|
499
|
+
# self.assertIsInstance(arg.value, str)
|
|
500
|
+
|
|
501
|
+
def test_sync_react_response_hard_content(self):
|
|
502
|
+
system_message = hard_system
|
|
503
|
+
|
|
504
|
+
user_message = hard_user
|
|
505
|
+
|
|
506
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
|
507
|
+
with self.subTest(
|
|
508
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
|
509
|
+
):
|
|
510
|
+
result = lm.respond_sync(
|
|
511
|
+
system_message=system_message,
|
|
512
|
+
user_message=user_message,
|
|
513
|
+
response_model=ReAct,
|
|
514
|
+
)
|
|
515
|
+
self.assertIsInstance(result, ReAct)
|
|
516
|
+
self.assertIsInstance(result.reasoning, str)
|
|
517
|
+
self.assertIsInstance(result.action_name, str)
|
|
518
|
+
self.assertIsInstance(result.action_args, list)
|
|
519
|
+
for arg in result.action_args:
|
|
520
|
+
self.assertIsInstance(arg, ActionArgument)
|
|
521
|
+
self.assertIsInstance(arg.key, str)
|
|
522
|
+
# self.assertIsInstance(arg.value, str)
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
# use non-trivial fallback?
|
|
526
|
+
|
|
527
|
+
if __name__ == "__main__":
|
|
528
|
+
# Create an instance of the test class
|
|
529
|
+
test_instance = TestLMStructuredOutputs()
|
|
530
|
+
|
|
531
|
+
# Set up the class (this would normally be done by unittest)
|
|
532
|
+
test_instance.setUpClass()
|
|
533
|
+
|
|
534
|
+
# Run the test methods
|
|
535
|
+
test_instance.test_sync_react_response_content()
|
|
536
|
+
test_instance.test_sync_react_response_hard_content()
|
|
537
|
+
|
|
538
|
+
print("All tests completed.")
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import unittest
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from synth_ai.zyk.lms.core.main import LM
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Define example structured output models
|
|
11
|
+
class SimpleResponse(BaseModel):
|
|
12
|
+
message: str
|
|
13
|
+
confidence: float
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ComplexResponse(BaseModel):
|
|
17
|
+
title: str
|
|
18
|
+
tags: List[str]
|
|
19
|
+
content: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NestedResponse(BaseModel):
|
|
23
|
+
main_category: str
|
|
24
|
+
subcategories: List[str]
|
|
25
|
+
details: SimpleResponse
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Define nested structured output models
|
|
29
|
+
class Address(BaseModel):
|
|
30
|
+
street: str
|
|
31
|
+
city: str
|
|
32
|
+
country: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PersonalInfo(BaseModel):
|
|
36
|
+
name: str
|
|
37
|
+
age: int
|
|
38
|
+
address: Address
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class WorkInfo(BaseModel):
|
|
42
|
+
company: str
|
|
43
|
+
position: str
|
|
44
|
+
years_experience: int
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class NestedPersonResponse(BaseModel):
|
|
48
|
+
personal: PersonalInfo
|
|
49
|
+
work: WorkInfo
|
|
50
|
+
skills: List[str]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ProjectDetails(BaseModel):
|
|
54
|
+
name: str
|
|
55
|
+
description: str
|
|
56
|
+
technologies: List[str]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class NestedPortfolioResponse(BaseModel):
|
|
60
|
+
developer: PersonalInfo
|
|
61
|
+
projects: List[ProjectDetails]
|
|
62
|
+
total_experience: int
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class NestedCompanyResponse(BaseModel):
|
|
66
|
+
name: str
|
|
67
|
+
founded: int
|
|
68
|
+
headquarters: Address
|
|
69
|
+
employees: List[PersonalInfo]
|
|
70
|
+
main_products: List[str]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class TestLMStructuredOutputs(unittest.TestCase):
|
|
74
|
+
@classmethod
|
|
75
|
+
def setUpClass(cls):
|
|
76
|
+
# Initialize the LM once for all tests
|
|
77
|
+
cls.lm = LM(
|
|
78
|
+
model_name="gpt-4o-mini",
|
|
79
|
+
formatting_model_name="gpt-4o-mini",
|
|
80
|
+
temperature=0.7,
|
|
81
|
+
max_retries="Few",
|
|
82
|
+
structured_output_mode="forced_json",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def test_sync_simple_response(self):
|
|
86
|
+
result = self.lm.respond_sync(
|
|
87
|
+
system_message="You are a helpful assistant.",
|
|
88
|
+
user_message="Give me a short greeting and your confidence level.",
|
|
89
|
+
response_model=SimpleResponse,
|
|
90
|
+
)
|
|
91
|
+
self.assertIsInstance(result, SimpleResponse)
|
|
92
|
+
self.assertIsInstance(result.message, str)
|
|
93
|
+
self.assertIsInstance(result.confidence, float)
|
|
94
|
+
self.assertGreaterEqual(result.confidence, 0)
|
|
95
|
+
self.assertLessEqual(result.confidence, 1)
|
|
96
|
+
|
|
97
|
+
def test_sync_complex_response(self):
|
|
98
|
+
result = self.lm.respond_sync(
|
|
99
|
+
system_message="You are a content creator.",
|
|
100
|
+
user_message="Create a short blog post about AI.",
|
|
101
|
+
response_model=ComplexResponse,
|
|
102
|
+
)
|
|
103
|
+
self.assertIsInstance(result, ComplexResponse)
|
|
104
|
+
self.assertIsInstance(result.title, str)
|
|
105
|
+
self.assertIsInstance(result.tags, list)
|
|
106
|
+
self.assertIsInstance(result.content, str)
|
|
107
|
+
|
|
108
|
+
async def async_nested_response(self):
|
|
109
|
+
result = await self.lm.respond_async(
|
|
110
|
+
system_message="You are a categorization expert.",
|
|
111
|
+
user_message="Categorize 'Python' and provide a brief description.",
|
|
112
|
+
response_model=NestedResponse,
|
|
113
|
+
)
|
|
114
|
+
self.assertIsInstance(result, NestedResponse)
|
|
115
|
+
self.assertIsInstance(result.main_category, str)
|
|
116
|
+
self.assertIsInstance(result.subcategories, list)
|
|
117
|
+
self.assertIsInstance(result.details, SimpleResponse)
|
|
118
|
+
|
|
119
|
+
def test_async_nested_response(self):
|
|
120
|
+
asyncio.run(self.async_nested_response())
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class TestLMNestedStructuredOutputs(unittest.TestCase):
|
|
124
|
+
@classmethod
|
|
125
|
+
def setUpClass(cls):
|
|
126
|
+
# Initialize the LM once for all tests
|
|
127
|
+
cls.lm = LM(
|
|
128
|
+
model_name="gpt-4o-mini",
|
|
129
|
+
formatting_model_name="gpt-4o-mini",
|
|
130
|
+
temperature=0.7,
|
|
131
|
+
max_retries="Few",
|
|
132
|
+
structured_output_mode="forced_json",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
def test_sync_nested_person_response(self):
|
|
136
|
+
result = self.lm.respond_sync(
|
|
137
|
+
system_message="You are an HR assistant.",
|
|
138
|
+
user_message="Provide detailed information about a fictional employee named John Doe.",
|
|
139
|
+
response_model=NestedPersonResponse,
|
|
140
|
+
)
|
|
141
|
+
self.assertIsInstance(result, NestedPersonResponse)
|
|
142
|
+
self.assertIsInstance(result.personal, PersonalInfo)
|
|
143
|
+
self.assertIsInstance(result.personal.address, Address)
|
|
144
|
+
self.assertIsInstance(result.work, WorkInfo)
|
|
145
|
+
self.assertIsInstance(result.skills, list)
|
|
146
|
+
|
|
147
|
+
def test_sync_nested_portfolio_response(self):
|
|
148
|
+
result = self.lm.respond_sync(
|
|
149
|
+
system_message="You are a portfolio manager.",
|
|
150
|
+
user_message="Create a portfolio for a fictional software developer with multiple projects.",
|
|
151
|
+
response_model=NestedPortfolioResponse,
|
|
152
|
+
)
|
|
153
|
+
self.assertIsInstance(result, NestedPortfolioResponse)
|
|
154
|
+
self.assertIsInstance(result.developer, PersonalInfo)
|
|
155
|
+
self.assertIsInstance(result.developer.address, Address)
|
|
156
|
+
self.assertIsInstance(result.projects, list)
|
|
157
|
+
for project in result.projects:
|
|
158
|
+
self.assertIsInstance(project, ProjectDetails)
|
|
159
|
+
self.assertIsInstance(result.total_experience, int)
|
|
160
|
+
|
|
161
|
+
async def async_nested_company_response(self):
|
|
162
|
+
result = await self.lm.respond_async(
|
|
163
|
+
system_message="You are a company information specialist.",
|
|
164
|
+
user_message="Provide detailed information about a fictional tech company.",
|
|
165
|
+
response_model=NestedCompanyResponse,
|
|
166
|
+
)
|
|
167
|
+
self.assertIsInstance(result, NestedCompanyResponse)
|
|
168
|
+
self.assertIsInstance(result.headquarters, Address)
|
|
169
|
+
self.assertIsInstance(result.employees, list)
|
|
170
|
+
for employee in result.employees:
|
|
171
|
+
self.assertIsInstance(employee, PersonalInfo)
|
|
172
|
+
self.assertIsInstance(employee.address, Address)
|
|
173
|
+
self.assertIsInstance(result.main_products, list)
|
|
174
|
+
|
|
175
|
+
def test_async_nested_company_response(self):
|
|
176
|
+
asyncio.run(self.async_nested_company_response())
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
if __name__ == "__main__":
|
|
180
|
+
unittest.main()
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import unittest
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from synth_ai.zyk.lms.core.main import LM
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Define example structured output models
|
|
11
|
+
class SimpleResponse(BaseModel):
|
|
12
|
+
message: str
|
|
13
|
+
confidence_between_zero_one: float = Field(
|
|
14
|
+
..., description="Confidence level between 0 and 1"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ComplexResponse(BaseModel):
|
|
19
|
+
title: str
|
|
20
|
+
tags: List[str]
|
|
21
|
+
content: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class NestedResponse(BaseModel):
|
|
25
|
+
main_category: str
|
|
26
|
+
subcategories: List[str]
|
|
27
|
+
details: SimpleResponse
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TestLMStructuredOutputs(unittest.TestCase):
|
|
31
|
+
@classmethod
|
|
32
|
+
def setUpClass(cls):
|
|
33
|
+
# Initialize LMs for both forced_json and stringified_json modes
|
|
34
|
+
cls.lm_forced_json = LM(
|
|
35
|
+
model_name="gpt-4o-mini",
|
|
36
|
+
formatting_model_name="gpt-4o-mini",
|
|
37
|
+
temperature=0.7,
|
|
38
|
+
max_retries="Few",
|
|
39
|
+
structured_output_mode="forced_json",
|
|
40
|
+
)
|
|
41
|
+
cls.lm_stringified_json = LM(
|
|
42
|
+
model_name="gpt-4o-mini",
|
|
43
|
+
formatting_model_name="gpt-4o-mini",
|
|
44
|
+
temperature=0.7,
|
|
45
|
+
max_retries="Few",
|
|
46
|
+
structured_output_mode="stringified_json",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def test_sync_simple_response(self):
|
|
50
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
|
51
|
+
with self.subTest(
|
|
52
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
|
53
|
+
):
|
|
54
|
+
result = lm.respond_sync(
|
|
55
|
+
system_message="You are a helpful assistant.",
|
|
56
|
+
user_message="Give me a short greeting and your confidence level.",
|
|
57
|
+
response_model=SimpleResponse,
|
|
58
|
+
)
|
|
59
|
+
self.assertIsInstance(result, SimpleResponse)
|
|
60
|
+
self.assertIsInstance(result.message, str)
|
|
61
|
+
self.assertIsInstance(result.confidence_between_zero_one, float)
|
|
62
|
+
self.assertGreaterEqual(result.confidence_between_zero_one, 0)
|
|
63
|
+
self.assertLessEqual(result.confidence_between_zero_one, 1)
|
|
64
|
+
|
|
65
|
+
def test_sync_complex_response(self):
|
|
66
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
|
67
|
+
with self.subTest(
|
|
68
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
|
69
|
+
):
|
|
70
|
+
result = lm.respond_sync(
|
|
71
|
+
system_message="You are a content creator.",
|
|
72
|
+
user_message="Create a short blog post about AI.",
|
|
73
|
+
response_model=ComplexResponse,
|
|
74
|
+
)
|
|
75
|
+
self.assertIsInstance(result, ComplexResponse)
|
|
76
|
+
self.assertIsInstance(result.title, str)
|
|
77
|
+
self.assertIsInstance(result.tags, list)
|
|
78
|
+
self.assertIsInstance(result.content, str)
|
|
79
|
+
|
|
80
|
+
async def async_nested_response(self, lm):
|
|
81
|
+
result = await lm.respond_async(
|
|
82
|
+
system_message="You are a categorization expert.",
|
|
83
|
+
user_message="Categorize 'Python' and provide a brief description.",
|
|
84
|
+
response_model=NestedResponse,
|
|
85
|
+
)
|
|
86
|
+
self.assertIsInstance(result, NestedResponse)
|
|
87
|
+
self.assertIsInstance(result.main_category, str)
|
|
88
|
+
self.assertIsInstance(result.subcategories, list)
|
|
89
|
+
self.assertIsInstance(result.details, SimpleResponse)
|
|
90
|
+
|
|
91
|
+
def test_async_nested_response(self):
|
|
92
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]: #
|
|
93
|
+
with self.subTest(
|
|
94
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
|
95
|
+
):
|
|
96
|
+
asyncio.run(self.async_nested_response(lm))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
if __name__ == "__main__":
|
|
100
|
+
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|