synth-ai 0.1.0.dev9__py3-none-any.whl → 0.1.0.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

@@ -0,0 +1 @@
1
+ import synth_sdk.tracing
@@ -0,0 +1,195 @@
1
+ from typing import Any, Dict, Optional
2
+
3
+ import pytest
4
+ from pydantic import BaseModel
5
+
6
+ from synth_ai.zyk.lms.core.main import LM
7
+
8
+
9
+ class StateUpdate(BaseModel):
10
+ """Response model for state updates from LLM"""
11
+
12
+ short_term_plan: Optional[str] = None
13
+ objective: Optional[str] = None
14
+ final_results: Optional[Dict[str, Any]] = None
15
+
16
+ def model_post_init(self, __context):
17
+ super().model_post_init(__context)
18
+ # Ensure no protected fields are present
19
+ protected_fields = ["message_history", "step_summaries"]
20
+ for field in protected_fields:
21
+ if hasattr(self, field):
22
+ raise ValueError(f"Cannot modify protected field: {field}")
23
+
24
+
25
+ @pytest.fixture(scope="module")
26
+ def models():
27
+ """Initialize LMs for different vendors"""
28
+ return {
29
+ "gpt-4o-mini": LM(
30
+ model_name="gpt-4o-mini",
31
+ formatting_model_name="gpt-4o-mini",
32
+ temperature=0.1,
33
+ structured_output_mode="forced_json",
34
+ ),
35
+ "o3-mini": LM(
36
+ model_name="o3-mini",
37
+ formatting_model_name="gpt-4o-mini",
38
+ temperature=0.1,
39
+ structured_output_mode="forced_json",
40
+ ),
41
+ "gemini-1.5-flash": LM(
42
+ model_name="gemini-1.5-flash",
43
+ formatting_model_name="gpt-4o-mini",
44
+ temperature=0.1,
45
+ structured_output_mode="stringified_json",
46
+ ),
47
+ "claude-3-haiku-20240307": LM(
48
+ model_name="claude-3-haiku-20240307",
49
+ formatting_model_name="gpt-4o-mini",
50
+ temperature=0.1,
51
+ structured_output_mode="stringified_json",
52
+ ),
53
+ "deepseek-chat": LM(
54
+ model_name="deepseek-chat",
55
+ formatting_model_name="gpt-4o-mini",
56
+ temperature=0.1,
57
+ structured_output_mode="stringified_json",
58
+ ),
59
+ "deepseek-reasoner": LM(
60
+ model_name="deepseek-reasoner",
61
+ formatting_model_name="gpt-4o-mini",
62
+ temperature=1,
63
+ structured_output_mode="stringified_json",
64
+ ),
65
+ "llama-3.1-8b-instant": LM(
66
+ model_name="llama-3.1-8b-instant",
67
+ formatting_model_name="gpt-4o-mini",
68
+ temperature=0.1,
69
+ structured_output_mode="stringified_json",
70
+ ),
71
+ }
72
+
73
+
74
+ @pytest.fixture
75
+ def system_message():
76
+ """System message for state updates"""
77
+ return """You are helping update the agent's state. Look at the current state and state_delta_instructions and update the state.
78
+
79
+ Available fields you can modify:
80
+ {
81
+ "short_term_plan": "str",
82
+ "objective": "str",
83
+ "final_results": "Dict[str, Any]"
84
+ }
85
+
86
+ Protected fields (do not modify):
87
+ {
88
+ "message_history": "Cannot directly edit message history - it is managed internally",
89
+ "step_summaries": "Cannot directly edit step summaries - they are generated automatically"
90
+ }
91
+
92
+ Please be brief, the state ought not be too long."""
93
+
94
+
95
+ @pytest.fixture
96
+ def current_state():
97
+ """Initial state for testing"""
98
+ return {
99
+ "short_term_plan": "Current plan: Review code changes",
100
+ "objective": "Review pull request",
101
+ "final_results": {
102
+ "findings": [],
103
+ "recommendations": [],
104
+ "analysis": {},
105
+ "status": "IN_PROGRESS",
106
+ },
107
+ }
108
+
109
+
110
+ @pytest.mark.timeout(15)
111
+ @pytest.mark.parametrize(
112
+ "model_name",
113
+ [
114
+ "gpt-4o-mini",
115
+ "gemini-1.5-flash",
116
+ "claude-3-haiku-20240307",
117
+ "deepseek-chat",
118
+ "llama-3.1-8b-instant",
119
+ ],
120
+ )
121
+ def test_state_delta_handling(
122
+ model_name: str, models: Dict[str, LM], system_message: str, current_state: Dict
123
+ ):
124
+ """Test that each model correctly handles state updates"""
125
+
126
+ state_delta_instructions = """Update the final_results to include findings about code quality issues. Add a recommendation to improve error handling."""
127
+ user_message = f"Current state: {current_state}\nState delta instructions: {state_delta_instructions}\n\nHow should the state be updated?"
128
+
129
+ try:
130
+ result = models[model_name].respond_sync(
131
+ system_message=system_message,
132
+ user_message=user_message,
133
+ response_model=StateUpdate,
134
+ )
135
+
136
+ # Verify response structure
137
+ assert isinstance(result, StateUpdate)
138
+
139
+ # Verify only allowed fields are present and have correct types
140
+ if result.short_term_plan is not None:
141
+ assert isinstance(result.short_term_plan, str)
142
+ if result.objective is not None:
143
+ assert isinstance(result.objective, str)
144
+ if result.final_results is not None:
145
+ assert isinstance(result.final_results, dict)
146
+
147
+ except Exception as e:
148
+ pytest.fail(f"Model {model_name} failed: {str(e)}")
149
+
150
+
151
+ @pytest.mark.timeout(15)
152
+ @pytest.mark.parametrize(
153
+ "model_name",
154
+ [
155
+ "gpt-4o-mini",
156
+ "gemini-1.5-flash",
157
+ "claude-3-haiku-20240307",
158
+ "deepseek-chat",
159
+ "llama-3.1-8b-instant",
160
+ ],
161
+ )
162
+ def test_state_delta_protected_fields(
163
+ model_name: str, models: Dict[str, LM], system_message: str
164
+ ):
165
+ """Test that models respect protected fields"""
166
+
167
+ current_state = {
168
+ "short_term_plan": "Current plan: Review code changes",
169
+ "objective": "Review pull request",
170
+ "message_history": ["Previous message 1", "Previous message 2"],
171
+ "step_summaries": ["Step 1 summary", "Step 2 summary"],
172
+ "final_results": {
173
+ "findings": [],
174
+ "recommendations": [],
175
+ "analysis": {},
176
+ "status": "IN_PROGRESS",
177
+ },
178
+ }
179
+
180
+ state_delta_instructions = """Update the message history to include new findings and update step summaries with recent progress."""
181
+ user_message = f"Current state: {current_state}\nState delta instructions: {state_delta_instructions}\n\nHow should the state be updated?"
182
+
183
+ try:
184
+ result = models[model_name].respond_sync(
185
+ system_message=system_message,
186
+ user_message=user_message,
187
+ response_model=StateUpdate,
188
+ )
189
+
190
+ # Verify no protected fields are present
191
+ assert not hasattr(result, "message_history")
192
+ assert not hasattr(result, "step_summaries")
193
+
194
+ except Exception as e:
195
+ pytest.fail(f"Model {model_name} failed: {str(e)}")
@@ -6,6 +6,7 @@ from synth_ai.zyk.lms.vendors.core.openai_api import (
6
6
  )
7
7
  from synth_ai.zyk.lms.vendors.supported.deepseek import DeepSeekAPI
8
8
  from synth_ai.zyk.lms.vendors.supported.together import TogetherAPI
9
+ from synth_ai.zyk.lms.vendors.supported.groq import GroqAPI
9
10
 
10
11
 
11
12
  class OpenAIClient(OpenAIPrivate):
@@ -33,3 +34,8 @@ class DeepSeekClient(DeepSeekAPI):
33
34
  class TogetherClient(TogetherAPI):
34
35
  def __init__(self):
35
36
  super().__init__()
37
+
38
+
39
+ class GroqClient(GroqAPI):
40
+ def __init__(self):
41
+ super().__init__()
@@ -11,6 +11,8 @@ from synth_ai.zyk.lms.core.vendor_clients import (
11
11
  from synth_ai.zyk.lms.structured_outputs.handler import StructuredOutputHandler
12
12
  from synth_ai.zyk.lms.vendors.base import VendorBase
13
13
 
14
+ REASONING_MODELS = ["deepseek-reasoner", "o1-mini", "o1-preview", "o1", "o3"]
15
+
14
16
 
15
17
  def build_messages(
16
18
  sys_msg: str,
@@ -105,7 +107,9 @@ class LM:
105
107
  "forced_json",
106
108
  {"max_retries": max_retries_dict.get(max_retries, 2)},
107
109
  )
108
- self.lm_config = {"temperature": temperature}
110
+ # Override temperature to 1 for reasoning models
111
+ effective_temperature = 1.0 if model_name in REASONING_MODELS else temperature
112
+ self.lm_config = {"temperature": effective_temperature}
109
113
  self.model_name = model_name
110
114
 
111
115
  def respond_sync(
@@ -8,6 +8,7 @@ from synth_ai.zyk.lms.core.all import (
8
8
  # OpenAIClient,
9
9
  OpenAIStructuredOutputClient,
10
10
  TogetherClient,
11
+ GroqAPI,
11
12
  )
12
13
 
13
14
  openai_naming_regexes: List[Pattern] = [
@@ -29,6 +30,11 @@ together_naming_regexes: List[Pattern] = [
29
30
  re.compile(r"^.*\/.*$"),
30
31
  ]
31
32
 
33
+ groq_naming_regexes: List[Pattern] = [
34
+ re.compile(r"^llama-3.3-70b-versatile$"),
35
+ re.compile(r"^llama-3.1-8b-instant$"),
36
+ ]
37
+
32
38
 
33
39
  def get_client(
34
40
  model_name: str,
@@ -56,5 +62,7 @@ def get_client(
56
62
  return DeepSeekClient()
57
63
  elif any(regex.match(model_name) for regex in together_naming_regexes):
58
64
  return TogetherClient()
65
+ elif any(regex.match(model_name) for regex in groq_naming_regexes):
66
+ return GroqAPI()
59
67
  else:
60
68
  raise ValueError(f"Invalid model name: {model_name}")
@@ -0,0 +1,16 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from groq import AsyncGroq, Groq
5
+
6
+ from synth_ai.zyk.lms.vendors.openai_standard import OpenAIStandard
7
+
8
+ load_dotenv()
9
+
10
+
11
+ class GroqAPI(OpenAIStandard):
12
+ def __init__(self):
13
+ super().__init__(
14
+ sync_client=Groq(api_key=os.getenv("GROQ_API_KEY")),
15
+ async_client=AsyncGroq(api_key=os.getenv("GROQ_API_KEY")),
16
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: synth-ai
3
- Version: 0.1.0.dev9
3
+ Version: 0.1.0.dev10
4
4
  Summary: Software for aiding the best and multiplying the will.
5
5
  Home-page: https://github.com/synth-laboratories/synth-ai
6
6
  Author: Josh Purtell
@@ -44,8 +44,11 @@ Requires-Dist: google>=3.0.0
44
44
  Requires-Dist: google-generativeai>=0.8.1
45
45
  Requires-Dist: together>=1.2.12
46
46
  Requires-Dist: langfuse>=2.56.1
47
- Requires-Dist: synth-sdk==0.3.1.dev3
47
+ Requires-Dist: synth-sdk>=0.3.1.dev3
48
48
  Requires-Dist: datasets>=3.2.0
49
+ Requires-Dist: groq>=0.18.0
50
+ Requires-Dist: pytest-timeout>=2.3.1
51
+ Requires-Dist: lock>=2018.3.25.2110
49
52
  Dynamic: author
50
53
  Dynamic: home-page
51
54
 
@@ -1,7 +1,9 @@
1
- public_tests/synth_sdk.py,sha256=fqkzyzLb_NW4k8EiP2mJ5HZk3lDTi1juyTf9Gv_9wfc,14238
1
+ private_tests/try_synth_sdk.py,sha256=vk4lUEfpQfLACFl6Qw468t_lsuYxuoIIr05WRgWKGKY,24
2
2
  public_tests/test_agent.py,sha256=CjPPWuMWC_TzX1DkDald-bbAxgjXE-HPQvFhq2B--5k,22363
3
+ public_tests/test_all_structured_outputs.py,sha256=SkvQq4paFVh2b3XfeZJ0ihd4LLKgVvVk2Yd4bIXZEEw,6307
3
4
  public_tests/test_recursive_structured_outputs.py,sha256=Ne-9XwnOxN7eSpGbNHOpegR-sRj589I84T6y8Z_4QnA,5781
4
5
  public_tests/test_structured_outputs.py,sha256=J7sfbGZ7OeB5ONIKpcCTymyayNyAdFfGokC1bcUrSx0,3651
6
+ public_tests/test_synth_sdk.py,sha256=fqkzyzLb_NW4k8EiP2mJ5HZk3lDTi1juyTf9Gv_9wfc,14238
5
7
  synth_ai/__init__.py,sha256=2siivzLbT2r-EA7m91dcJB-6Vsurc5_sX3WiKf4_o8Y,198
6
8
  synth_ai/zyk/__init__.py,sha256=zoPor1PI2OrgpCu-MBLZXcX1jAbSgD9q0kqZpTghTcQ,60
7
9
  synth_ai/zyk/lms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -14,10 +16,10 @@ synth_ai/zyk/lms/caching/handler.py,sha256=sewq5rRfqXHzCEiXvdckbuxYp9ze_EjVSndnU
14
16
  synth_ai/zyk/lms/caching/initialize.py,sha256=zZls6RKAax6Z-8oJInGaSg_RPN_fEZ6e_RCX64lMLJw,416
15
17
  synth_ai/zyk/lms/caching/persistent.py,sha256=mQmP1z0rWVYjxwso5zIwd51Df2dWZvdHonuqsOY6SFI,2075
16
18
  synth_ai/zyk/lms/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- synth_ai/zyk/lms/core/all.py,sha256=oUplKT1AsTv9Uvx2SqVjGjOsa0ml5G_GJFdKp6rPiGs,894
19
+ synth_ai/zyk/lms/core/all.py,sha256=kuuu0V1_DVJp8GtZztdwFiTwGVYIdJ946Y1V0D4zz8Y,1034
18
20
  synth_ai/zyk/lms/core/exceptions.py,sha256=K0BVdAzxVIchsvYZAaHEH1GAWBZvpxhFi-SPcJOjyPQ,205
19
- synth_ai/zyk/lms/core/main.py,sha256=LMPsr8fF93kRk5sts5Q9-acKyLsX4tmJEoifj7DQHvo,8786
20
- synth_ai/zyk/lms/core/vendor_clients.py,sha256=O2KWCFJ0XHWhd4-_UE-T0WQKKgz_SNT7X4nV9j1YEPM,1878
21
+ synth_ai/zyk/lms/core/main.py,sha256=fdAPBjnyOfLBPtcD0D79tn0f7SrKv49zCP6IhiAeH9Y,9019
22
+ synth_ai/zyk/lms/core/vendor_clients.py,sha256=tuYf9Jio9mnCajPc1dCwurdprknRGnbHv7wwHC5tIdY,2119
21
23
  synth_ai/zyk/lms/cost/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
24
  synth_ai/zyk/lms/cost/monitor.py,sha256=cSKIvw6WdPZIRubADWxQoh1MdB40T8-jjgfNUeUHIn0,5
23
25
  synth_ai/zyk/lms/cost/statefulness.py,sha256=TOsuXL8IjtKOYJ2aJQF8TwJVqn_wQ7AIwJJmdhMye7U,36
@@ -38,9 +40,13 @@ synth_ai/zyk/lms/vendors/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
38
40
  synth_ai/zyk/lms/vendors/local/ollama.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
41
  synth_ai/zyk/lms/vendors/supported/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
42
  synth_ai/zyk/lms/vendors/supported/deepseek.py,sha256=diFfdhPMO5bLFZxnYj7VT0v6jKTlOYESBkspUuVa2eY,529
43
+ synth_ai/zyk/lms/vendors/supported/groq.py,sha256=Fbi7QvhdLx0F-VHO5PY-uIQlPR0bo3C9h1MvIOx8nz0,388
41
44
  synth_ai/zyk/lms/vendors/supported/together.py,sha256=Ni_jBqqGPN0PkkY-Ew64s3gNKk51k3FCpLSwlNhKbf0,342
42
- synth_ai-0.1.0.dev9.dist-info/LICENSE,sha256=ynhjRQUfqA_RdGRATApfFA_fBAy9cno04sLtLUqxVFM,1069
43
- synth_ai-0.1.0.dev9.dist-info/METADATA,sha256=aMcnA1M3zSQ1ZE8RzMgtmIdPSzRxor8gDhtwVzU9M4Q,2610
44
- synth_ai-0.1.0.dev9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
45
- synth_ai-0.1.0.dev9.dist-info/top_level.txt,sha256=MKoWqlbnW0ZKcm_eLzeCpgAihgL59ZrZZ8Q1HnZwHdg,22
46
- synth_ai-0.1.0.dev9.dist-info/RECORD,,
45
+ tests/test_agent.py,sha256=CjPPWuMWC_TzX1DkDald-bbAxgjXE-HPQvFhq2B--5k,22363
46
+ tests/test_recursive_structured_outputs.py,sha256=Ne-9XwnOxN7eSpGbNHOpegR-sRj589I84T6y8Z_4QnA,5781
47
+ tests/test_structured_outputs.py,sha256=J7sfbGZ7OeB5ONIKpcCTymyayNyAdFfGokC1bcUrSx0,3651
48
+ synth_ai-0.1.0.dev10.dist-info/LICENSE,sha256=ynhjRQUfqA_RdGRATApfFA_fBAy9cno04sLtLUqxVFM,1069
49
+ synth_ai-0.1.0.dev10.dist-info/METADATA,sha256=0JR0iVQFHPMzRDsm8PxJMsAM76PYPHKONQRNZQ2LsHU,2712
50
+ synth_ai-0.1.0.dev10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
51
+ synth_ai-0.1.0.dev10.dist-info/top_level.txt,sha256=5GzJO9j-KbJ_4ppxhmCUa_qdhHM4-9cHHNU76yAI8do,42
52
+ synth_ai-0.1.0.dev10.dist-info/RECORD,,
@@ -1,2 +1,4 @@
1
+ private_tests
1
2
  public_tests
2
3
  synth_ai
4
+ tests
tests/test_agent.py ADDED
@@ -0,0 +1,538 @@
1
+ import unittest
2
+ from typing import List, Union
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from synth_ai.zyk.lms.core.main import LM
7
+
8
+
9
+ class UnitTestDict(BaseModel):
10
+ test_description: str
11
+ input_names: List[str]
12
+ input_types: List[str]
13
+ stringified_input_values: List[str]
14
+ assertion_condition: str
15
+ assertion_type: str # Consider using Literal for specific assertion types
16
+
17
+
18
+ class ActionArgument(BaseModel):
19
+ key: str
20
+ value: Union[str, int, float, bool, UnitTestDict]
21
+
22
+
23
+ class ReAct(BaseModel):
24
+ reasoning: str
25
+ action_name: str
26
+ action_args: List[ActionArgument] # Dict#[str, Dict]
27
+
28
+
29
+ system = """
30
+ <System Message>
31
+ # Premise
32
+ You are a software engineer
33
+ Here is some information about this setting
34
+ <Setting Information>
35
+ You are working to solve a computer science problem. You will need to submit a solution to the problem, which will be tested against a suite of hidden unit tests.
36
+ </Setting Information>
37
+ <Actions Available>
38
+ <edit_submission>
39
+ <action_context>
40
+ Edit the submission code. Use this when you want to make changes to the current solution.
41
+ </action_context>
42
+ <action_arg_spec>
43
+ {'first_line': <class 'int'>, 'last_line': <class 'int'>, 'new_code': <class 'str'>}
44
+ </action_arg_spec>
45
+ <action_description>
46
+ Edit the submission code
47
+ </action_description>
48
+
49
+ </edit_submission>
50
+ <add_submission>
51
+ <action_context>
52
+ Add the submission code. Use this when you want to start from scratch with a new solution.
53
+ </action_context>
54
+ <action_arg_spec>
55
+ {'submission': <class 'str'>}
56
+ </action_arg_spec>
57
+ <action_description>
58
+ Add the submission code
59
+ </action_description>
60
+
61
+ </add_submission>
62
+ <add_unit_test>
63
+ <action_context>
64
+ Add a unit test. The unit test information you submit must be in the format of a BCBUnitTest:
65
+
66
+ class BCBUnitTest(BaseModel):
67
+ test_description: str
68
+ input_names: List[str]
69
+ input_types: List[str]
70
+ input_values: List[Any]
71
+ assertion_condition: str
72
+ assertion_type: Literal["assertTrue", "assertRaises"] = "assertTrue"
73
+
74
+
75
+ It will be parsed via BCBUnitTest(**unit_test_dict)
76
+
77
+
78
+
79
+ # Some various notes:
80
+ 1. If an input should be of a type defined by a specific package, add the package name/alias to the type. E.g. "np.ndarray" or "pd.DataFrame". You still should fully define the value for the input_value field e.g. "pd.DataFrame({'a': [1, 2, 3]})"
81
+
82
+ 2. Unit tests will be compiled from the BCBUnitTest class as follows:
83
+ A. For AssertTrue type tests, the test will be compiled as follows:
84
+ ```python
85
+ def test_case(self):
86
+ # {{self.test_description}}
87
+
88
+ {{defs}}
89
+ result = {{function_name}}(**{{{{args}}}}})
90
+ self.{{self.assertion_type}}({{self.assertion_condition}})
91
+ ```
92
+ B. For AssertRaises type tests, the test will be compiled as follows:
93
+
94
+ ```python
95
+ def test_case(self):
96
+ # {{self.test_description}}
97
+ {{defs}}
98
+ with self.{{self.assertion_type}}({{self.assertion_condition}}):
99
+ {{function_name}}(**{{{{args}}}}})
100
+ ```
101
+
102
+ Provide information accordingly.
103
+
104
+ </action_context>
105
+ <action_arg_spec>
106
+ {'unit_test_name': <class 'str'>, 'unit_test_dict': typing.Dict}
107
+ </action_arg_spec>
108
+ <action_description>
109
+ Add a unit test
110
+ </action_description>
111
+
112
+ </add_unit_test>
113
+ <remove_unit_test>
114
+ <action_context>
115
+ Remove a unit test
116
+ </action_context>
117
+ <action_arg_spec>
118
+ {'unit_test_name': <class 'str'>}
119
+ </action_arg_spec>
120
+ <action_description>
121
+ Remove a unit test
122
+ </action_description>
123
+
124
+ </remove_unit_test>
125
+ <test_submission>
126
+ <action_context>
127
+ Test the submission
128
+ </action_context>
129
+ <action_arg_spec>
130
+ {}
131
+ </action_arg_spec>
132
+ <action_description>
133
+ Test the submission
134
+ </action_description>
135
+
136
+ </test_submission>
137
+ <submit_solution>
138
+ <action_context>
139
+ Submit the solution
140
+ </action_context>
141
+ <action_arg_spec>
142
+ {}
143
+ </action_arg_spec>
144
+ <action_description>
145
+ Submit the solution
146
+ </action_description>
147
+
148
+ </submit_solution>
149
+
150
+ </Actions Available>
151
+ You'll be given your past actions/thoughts, along with recent raw observations from the environment
152
+ The environment one step in the past is your current environment.
153
+
154
+ # Objective
155
+ Please complete the problem by drafting a solution, creating unit tests, improving the solution, and submitting the solution.
156
+
157
+ # Constraints
158
+ You will be given a code_prompt_for_answer, which contains imports and the function signature. Your solution must comprise code that can be appended to code_prompt_for_answer and run as a single script.
159
+
160
+ """
161
+
162
+ user = """
163
+ <User Message>
164
+ # Recent Actions / Thoughts
165
+
166
+ # Recent Observations
167
+ <1 environment step(s) in the past>{'action_result': None, 'environment_state': {'question': 'import pandas as pd\nimport numpy as np\n\n# Constants\nCOLUMNS = [\'column1\', \'column2\', \'column3\', \'column4\', \'column5\']\n\ndef task_func(df, dct):\n '''\n Replace certain values in a DataFrame with a dictionary mapping and calculate the Pearson correlation coefficient between each pair of columns.\n\n Parameters:\n df (DataFrame): The input DataFrame, containing numeric or categorical data.\n dct (dict): A dictionary for replacing values in df, where keys are existing values and values are new values.\n\n Returns:\n DataFrame: A DataFrame with the correlation coefficients between each pair of columns. The format of the DataFrame is a square matrix with column and index labels matching the columns of the input DataFrame.\n \n Requirements:\n - pandas\n - numpy\n \n Note:\n - This function operates on DataFrames containing numeric or categorical data that can be replaced with numeric values, as correlation calculations require numeric data.\n - This function using pearson method to calculate the correlation matrix.\n \n Raises:\n - This function will raise a ValueError is input df is not a DataFrame.\n \n Example:\n >>> df = pd.DataFrame({\'A\': [1, 2, 3], \'B\': [4, 5, 6]})\n >>> dct = {1: 10, 2: 20, 3: 30, 4: 40, 5: 50, 6: 60}\n >>> correlation_matrix = task_func(df, dct)\n >>> correlation_matrix.shape == (2, 2)\n True\n >>> np.allclose(correlation_matrix, np.array([[1.0, 1.0], [1.0, 1.0]]))\n True\n '''\n', 'code_prompt_for_answer': "import pandas as pd\nimport numpy as np\n# Constants\nCOLUMNS = ['column1', 'column2', 'column3', 'column4', 'column5']\ndef task_func(df, dct):\n", 'unit_tests_you_have_written': {}, 'current_solution': ''}}</1 environment step(s) in the past>
168
+
169
+ Your next actions / thought:
170
+ """
171
+
172
+
173
+ ###
174
+
175
+ hard_system = """
176
+ # Premise
177
+ You are a software engineer
178
+ Here is some information about this setting
179
+ <Setting Information>
180
+ You are working to solve a computer science problem. You will need to submit a solution to the problem, which will be tested against a suite of hidden unit tests.
181
+ </Setting Information>
182
+ <Actions Available>
183
+ <edit_submission>
184
+ <action_context>
185
+ Edit the submission code. Use this when you want to make changes to the current solution.
186
+ </action_context>
187
+ <action_arg_spec>
188
+ {'first_line': <class 'int'>, 'last_line': <class 'int'>, 'new_code': <class 'str'>}
189
+ </action_arg_spec>
190
+ <action_description>
191
+ Edit the submission code
192
+ </action_description>
193
+
194
+ </edit_submission>
195
+ <add_submission>
196
+ <action_context>
197
+ Add the submission code. Use this when you want to start from scratch with a new solution.
198
+ </action_context>
199
+ <action_arg_spec>
200
+ {'submission': <class 'str'>}
201
+ </action_arg_spec>
202
+ <action_description>
203
+ Add the submission code
204
+ </action_description>
205
+
206
+ </add_submission>
207
+ <add_unit_test>
208
+ <action_context>
209
+ Add a unit test. The unit test information you submit must be in the format of a BCBUnitTest:
210
+
211
+ class BCBUnitTest(BaseModel):
212
+ test_description: str
213
+ input_names: List[str]
214
+ input_types: List[str]
215
+ input_values: List[Any]
216
+ assertion_condition: str
217
+ assertion_type: Literal["assertTrue", "assertRaises"] = "assertTrue"
218
+
219
+
220
+ It will be parsed via BCBUnitTest(**unit_test_dict)
221
+
222
+
223
+
224
+ # Some various notes:
225
+ 1. If an input should be of a type defined by a specific package, add the package name/alias to the type. E.g. "np.ndarray" or "pd.DataFrame". You still should fully define the value for the input_value field e.g. "pd.DataFrame({'a': [1, 2, 3]})"
226
+
227
+ 2. Unit tests will be compiled from the BCBUnitTest class as follows:
228
+ A. For AssertTrue type tests, the test will be compiled as follows:
229
+ ```python
230
+ def test_case(self):
231
+ # {{self.test_description}}
232
+
233
+ {{defs}}
234
+ result = {{function_name}}(**{{{{args}}}}})
235
+ self.{{self.assertion_type}}({{self.assertion_condition}})
236
+ ```
237
+ B. For AssertRaises type tests, the test will be compiled as follows:
238
+
239
+ ```python
240
+ def test_case(self):
241
+ # {{self.test_description}}
242
+ {{defs}}
243
+ with self.{{self.assertion_type}}({{self.assertion_condition}}):
244
+ {{function_name}}(**{{{{args}}}}})
245
+ ```
246
+
247
+ Provide information accordingly.
248
+
249
+ </action_context>
250
+ <action_arg_spec>
251
+ {'unit_test_name': <class 'str'>, 'unit_test_dict': typing.Dict}
252
+ </action_arg_spec>
253
+ <action_description>
254
+ Add a unit test
255
+ </action_description>
256
+
257
+ </add_unit_test>
258
+ <remove_unit_test>
259
+ <action_context>
260
+ Remove a unit test
261
+ </action_context>
262
+ <action_arg_spec>
263
+ {'unit_test_name': <class 'str'>}
264
+ </action_arg_spec>
265
+ <action_description>
266
+ Remove a unit test
267
+ </action_description>
268
+
269
+ </remove_unit_test>
270
+ <test_submission>
271
+ <action_context>
272
+ Test the submission
273
+ </action_context>
274
+ <action_arg_spec>
275
+ {}
276
+ </action_arg_spec>
277
+ <action_description>
278
+ Test the submission
279
+ </action_description>
280
+
281
+ </test_submission>
282
+ <submit_solution>
283
+ <action_context>
284
+ Submit the solution
285
+ </action_context>
286
+ <action_arg_spec>
287
+ {}
288
+ </action_arg_spec>
289
+ <action_description>
290
+ Submit the solution
291
+ </action_description>
292
+
293
+ </submit_solution>
294
+
295
+ </Actions Available>
296
+ You'll be given your past actions/thoughts, along with recent raw observations from the environment
297
+ The environment one step in the past is your current environment.
298
+
299
+ # Objective
300
+ Please complete the problem by drafting a solution, creating unit tests, improving the solution, and submitting the solution.
301
+
302
+ # Constraints
303
+ You will be given a code_prompt_for_answer, which contains imports and the function signature. Your solution must comprise code that can be appended to code_prompt_for_answer and run as a single script.
304
+
305
+
306
+ <User Message>
307
+ # Recent Actions / Thoughts
308
+
309
+ # Recent Observations
310
+ <1 environment step(s) in the past>{'action_result': None, 'environment_state': {'question': 'import pandas as pd\nimport numpy as np\n\n# Constants\nCOLUMNS = [\'column1\', \'column2\', \'column3\', \'column4\', \'column5\']\n\ndef task_func(df, dct):\n '''\n Replace certain values in a DataFrame with a dictionary mapping and calculate the Pearson correlation coefficient between each pair of columns.\n\n Parameters:\n df (DataFrame): The input DataFrame, containing numeric or categorical data.\n dct (dict): A dictionary for replacing values in df, where keys are existing values and values are new values.\n\n Returns:\n DataFrame: A DataFrame with the correlation coefficients between each pair of columns. The format of the DataFrame is a square matrix with column and index labels matching the columns of the input DataFrame.\n \n Requirements:\n - pandas\n - numpy\n \n Note:\n - This function operates on DataFrames containing numeric or categorical data that can be replaced with numeric values, as correlation calculations require numeric data.\n - This function using pearson method to calculate the correlation matrix.\n \n Raises:\n - This function will raise a ValueError is input df is not a DataFrame.\n \n Example:\n >>> df = pd.DataFrame({\'A\': [1, 2, 3], \'B\': [4, 5, 6]})\n >>> dct = {1: 10, 2: 20, 3: 30, 4: 40, 5: 50, 6: 60}\n >>> correlation_matrix = task_func(df, dct)\n >>> correlation_matrix.shape == (2, 2)\n True\n >>> np.allclose(correlation_matrix, np.array([[1.0, 1.0], [1.0, 1.0]]))\n True\n '''\n', 'code_prompt_for_answer': "import pandas as pd\nimport numpy as np\n# Constants\nCOLUMNS = ['column1', 'column2', 'column3', 'column4', 'column5']\ndef task_func(df, dct):\n", 'unit_tests_you_have_written': {}, 'current_solution': ''}}</1 environment step(s) in the past>
311
+
312
+ Your next actions / thought:
313
+
314
+ Structured output: reasoning="I need to implement the function 'task_func' that replaces values in a DataFrame based on a dictionary and calculates the Pearson correlation coefficient between the columns. I will also ensure to handle the case where the input is not a DataFrame by raising a ValueError." action_name='edit_submission' action_args=[ActionArgument(key='first_line', value=4), ActionArgument(key='last_line', value=4), ActionArgument(key='new_code', value=" if not isinstance(df, pd.DataFrame):\n raise ValueError('Input must be a DataFrame')\n df.replace(dct, inplace=True)\n return df.corr(method='pearson')")]
315
+ <System Message>
316
+ # Premise
317
+ You are a software engineer
318
+ Here is some information about this setting
319
+ <Setting Information>
320
+ You are working to solve a computer science problem. You will need to submit a solution to the problem, which will be tested against a suite of hidden unit tests.
321
+ </Setting Information>
322
+ <Actions Available>
323
+ <edit_submission>
324
+ <action_context>
325
+ Edit the submission code. Use this when you want to make changes to the current solution.
326
+ </action_context>
327
+ <action_arg_spec>
328
+ {'first_line': <class 'int'>, 'last_line': <class 'int'>, 'new_code': <class 'str'>}
329
+ </action_arg_spec>
330
+ <action_description>
331
+ Edit the submission code
332
+ </action_description>
333
+
334
+ </edit_submission>
335
+ <add_submission>
336
+ <action_context>
337
+ Add the submission code. Use this when you want to start from scratch with a new solution.
338
+ </action_context>
339
+ <action_arg_spec>
340
+ {'submission': <class 'str'>}
341
+ </action_arg_spec>
342
+ <action_description>
343
+ Add the submission code
344
+ </action_description>
345
+
346
+ </add_submission>
347
+ <add_unit_test>
348
+ <action_context>
349
+ Add a unit test. The unit test information you submit must be in the format of a BCBUnitTest:
350
+
351
+ class BCBUnitTest(BaseModel):
352
+ test_description: str
353
+ input_names: List[str]
354
+ input_types: List[str]
355
+ input_values: List[Any]
356
+ assertion_condition: str
357
+ assertion_type: Literal["assertTrue", "assertRaises"] = "assertTrue"
358
+
359
+
360
+ It will be parsed via BCBUnitTest(**unit_test_dict)
361
+
362
+
363
+
364
+ # Some various notes:
365
+ 1. If an input should be of a type defined by a specific package, add the package name/alias to the type. E.g. "np.ndarray" or "pd.DataFrame". You still should fully define the value for the input_value field e.g. "pd.DataFrame({'a': [1, 2, 3]})"
366
+
367
+ 2. Unit tests will be compiled from the BCBUnitTest class as follows:
368
+ A. For AssertTrue type tests, the test will be compiled as follows:
369
+ ```python
370
+ def test_case(self):
371
+ # {{self.test_description}}
372
+
373
+ {{defs}}
374
+ result = {{function_name}}(**{{{{args}}}}})
375
+ self.{{self.assertion_type}}({{self.assertion_condition}})
376
+ ```
377
+ B. For AssertRaises type tests, the test will be compiled as follows:
378
+
379
+ ```python
380
+ def test_case(self):
381
+ # {{self.test_description}}
382
+ {{defs}}
383
+ with self.{{self.assertion_type}}({{self.assertion_condition}}):
384
+ {{function_name}}(**{{{{args}}}}})
385
+ ```
386
+
387
+ Provide information accordingly.
388
+
389
+ </action_context>
390
+ <action_arg_spec>
391
+ {'unit_test_name': <class 'str'>, 'unit_test_dict': typing.Dict}
392
+ </action_arg_spec>
393
+ <action_description>
394
+ Add a unit test
395
+ </action_description>
396
+
397
+ </add_unit_test>
398
+ <remove_unit_test>
399
+ <action_context>
400
+ Remove a unit test
401
+ </action_context>
402
+ <action_arg_spec>
403
+ {'unit_test_name': <class 'str'>}
404
+ </action_arg_spec>
405
+ <action_description>
406
+ Remove a unit test
407
+ </action_description>
408
+
409
+ </remove_unit_test>
410
+ <test_submission>
411
+ <action_context>
412
+ Test the submission
413
+ </action_context>
414
+ <action_arg_spec>
415
+ {}
416
+ </action_arg_spec>
417
+ <action_description>
418
+ Test the submission
419
+ </action_description>
420
+
421
+ </test_submission>
422
+ <submit_solution>
423
+ <action_context>
424
+ Submit the solution
425
+ </action_context>
426
+ <action_arg_spec>
427
+ {}
428
+ </action_arg_spec>
429
+ <action_description>
430
+ Submit the solution
431
+ </action_description>
432
+
433
+ </submit_solution>
434
+
435
+ </Actions Available>
436
+ You'll be given your past actions/thoughts, along with recent raw observations from the environment
437
+ The environment one step in the past is your current environment.
438
+
439
+ # Objective
440
+ Please complete the problem by drafting a solution, creating unit tests, improving the solution, and submitting the solution.
441
+
442
+ # Constraints
443
+ You will be given a code_prompt_for_answer, which contains imports and the function signature. Your solution must comprise code that can be appended to code_prompt_for_answer and run as a single script.
444
+ """
445
+
446
+ hard_user = """
447
+ # Recent Actions / Thoughts
448
+ <1 reasoning step(s) in the past>reasoning="I need to implement the function 'task_func' that replaces values in a DataFrame based on a dictionary and calculates the Pearson correlation coefficient between the columns. I will also ensure to handle the case where the input is not a DataFrame by raising a ValueError." action_name='edit_submission' action_args=[ActionArgument(key='first_line', value=4), ActionArgument(key='last_line', value=4), ActionArgument(key='new_code', value=" if not isinstance(df, pd.DataFrame):\n raise ValueError('Input must be a DataFrame')\n df.replace(dct, inplace=True)\n return df.corr(method='pearson')")]</1 reasoning step(s) in the past>
449
+ # Recent Observations
450
+ <1 environment step(s) in the past>success=True result='Edited submission successfully'</1 environment step(s) in the past>
451
+ <2 environment step(s) in the past>{'action_result': None, 'environment_state': {'question': 'import pandas as pd\nimport numpy as np\n\n# Constants\nCOLUMNS = [\'column1\', \'column2\', \'column3\', \'column4\', \'column5\']\n\ndef task_func(df, dct):\n '''\n Replace certain values in a DataFrame with a dictionary mapping and calculate the Pearson correlation coefficient between each pair of columns.\n\n Parameters:\n df (DataFrame): The input DataFrame, containing numeric or categorical data.\n dct (dict): A dictionary for replacing values in df, where keys are existing values and values are new values.\n\n Returns:\n DataFrame: A DataFrame with the correlation coefficients between each pair of columns. The format of the DataFrame is a square matrix with column and index labels matching the columns of the input DataFrame.\n \n Requirements:\n - pandas\n - numpy\n \n Note:\n - This function operates on DataFrames containing numeric or categorical data that can be replaced with numeric values, as correlation calculations require numeric data.\n - This function using pearson method to calculate the correlation matrix.\n \n Raises:\n - This function will raise a ValueError is input df is not a DataFrame.\n \n Example:\n >>> df = pd.DataFrame({\'A\': [1, 2, 3], \'B\': [4, 5, 6]})\n >>> dct = {1: 10, 2: 20, 3: 30, 4: 40, 5: 50, 6: 60}\n >>> correlation_matrix = task_func(df, dct)\n >>> correlation_matrix.shape == (2, 2)\n True\n >>> np.allclose(correlation_matrix, np.array([[1.0, 1.0], [1.0, 1.0]]))\n True\n '''\n', 'code_prompt_for_answer': "import pandas as pd\nimport numpy as np\n# Constants\nCOLUMNS = ['column1', 'column2', 'column3', 'column4', 'column5']\ndef task_func(df, dct):\n", 'unit_tests_you_have_written': {}, 'current_solution': ''}}</2 environment step(s) in the past>
452
+
453
+ Your next actions / thought:
454
+ """
455
+
456
+
457
+ class TestLMStructuredOutputs(unittest.TestCase):
458
+ # ... existing code ...
459
+
460
+ @classmethod
461
+ def setUpClass(cls):
462
+ # Initialize LMs for both forced_json and stringified_json modes
463
+ cls.lm_forced_json = LM(
464
+ model_name="gpt-4o-mini",
465
+ formatting_model_name="gpt-4o-mini",
466
+ temperature=0.0,
467
+ max_retries="Few",
468
+ structured_output_mode="forced_json",
469
+ )
470
+ cls.lm_stringified_json = LM(
471
+ model_name="gpt-4o-mini",
472
+ formatting_model_name="gpt-4o-mini",
473
+ temperature=0.0,
474
+ max_retries="Few",
475
+ structured_output_mode="stringified_json",
476
+ )
477
+
478
+ def test_sync_react_response_content(self):
479
+ system_message = system
480
+
481
+ user_message = user
482
+
483
+ for lm in [self.lm_forced_json, self.lm_stringified_json]:
484
+ with self.subTest(
485
+ mode=lm.structured_output_handler.handler.structured_output_mode
486
+ ):
487
+ result = lm.respond_sync(
488
+ system_message=system_message,
489
+ user_message=user_message,
490
+ response_model=ReAct,
491
+ )
492
+ self.assertIsInstance(result, ReAct)
493
+ self.assertIsInstance(result.reasoning, str)
494
+ self.assertIsInstance(result.action_name, str)
495
+ self.assertIsInstance(result.action_args, list)
496
+ for arg in result.action_args:
497
+ self.assertIsInstance(arg, ActionArgument)
498
+ self.assertIsInstance(arg.key, str)
499
+ # self.assertIsInstance(arg.value, str)
500
+
501
+ def test_sync_react_response_hard_content(self):
502
+ system_message = hard_system
503
+
504
+ user_message = hard_user
505
+
506
+ for lm in [self.lm_forced_json, self.lm_stringified_json]:
507
+ with self.subTest(
508
+ mode=lm.structured_output_handler.handler.structured_output_mode
509
+ ):
510
+ result = lm.respond_sync(
511
+ system_message=system_message,
512
+ user_message=user_message,
513
+ response_model=ReAct,
514
+ )
515
+ self.assertIsInstance(result, ReAct)
516
+ self.assertIsInstance(result.reasoning, str)
517
+ self.assertIsInstance(result.action_name, str)
518
+ self.assertIsInstance(result.action_args, list)
519
+ for arg in result.action_args:
520
+ self.assertIsInstance(arg, ActionArgument)
521
+ self.assertIsInstance(arg.key, str)
522
+ # self.assertIsInstance(arg.value, str)
523
+
524
+
525
+ # use non-trivial fallback?
526
+
527
+ if __name__ == "__main__":
528
+ # Create an instance of the test class
529
+ test_instance = TestLMStructuredOutputs()
530
+
531
+ # Set up the class (this would normally be done by unittest)
532
+ test_instance.setUpClass()
533
+
534
+ # Run the test methods
535
+ test_instance.test_sync_react_response_content()
536
+ test_instance.test_sync_react_response_hard_content()
537
+
538
+ print("All tests completed.")
@@ -0,0 +1,180 @@
1
+ import asyncio
2
+ import unittest
3
+ from typing import List
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from synth_ai.zyk.lms.core.main import LM
8
+
9
+
10
+ # Define example structured output models
11
+ class SimpleResponse(BaseModel):
12
+ message: str
13
+ confidence: float
14
+
15
+
16
+ class ComplexResponse(BaseModel):
17
+ title: str
18
+ tags: List[str]
19
+ content: str
20
+
21
+
22
+ class NestedResponse(BaseModel):
23
+ main_category: str
24
+ subcategories: List[str]
25
+ details: SimpleResponse
26
+
27
+
28
+ # Define nested structured output models
29
+ class Address(BaseModel):
30
+ street: str
31
+ city: str
32
+ country: str
33
+
34
+
35
+ class PersonalInfo(BaseModel):
36
+ name: str
37
+ age: int
38
+ address: Address
39
+
40
+
41
+ class WorkInfo(BaseModel):
42
+ company: str
43
+ position: str
44
+ years_experience: int
45
+
46
+
47
+ class NestedPersonResponse(BaseModel):
48
+ personal: PersonalInfo
49
+ work: WorkInfo
50
+ skills: List[str]
51
+
52
+
53
+ class ProjectDetails(BaseModel):
54
+ name: str
55
+ description: str
56
+ technologies: List[str]
57
+
58
+
59
+ class NestedPortfolioResponse(BaseModel):
60
+ developer: PersonalInfo
61
+ projects: List[ProjectDetails]
62
+ total_experience: int
63
+
64
+
65
+ class NestedCompanyResponse(BaseModel):
66
+ name: str
67
+ founded: int
68
+ headquarters: Address
69
+ employees: List[PersonalInfo]
70
+ main_products: List[str]
71
+
72
+
73
+ class TestLMStructuredOutputs(unittest.TestCase):
74
+ @classmethod
75
+ def setUpClass(cls):
76
+ # Initialize the LM once for all tests
77
+ cls.lm = LM(
78
+ model_name="gpt-4o-mini",
79
+ formatting_model_name="gpt-4o-mini",
80
+ temperature=0.7,
81
+ max_retries="Few",
82
+ structured_output_mode="forced_json",
83
+ )
84
+
85
+ def test_sync_simple_response(self):
86
+ result = self.lm.respond_sync(
87
+ system_message="You are a helpful assistant.",
88
+ user_message="Give me a short greeting and your confidence level.",
89
+ response_model=SimpleResponse,
90
+ )
91
+ self.assertIsInstance(result, SimpleResponse)
92
+ self.assertIsInstance(result.message, str)
93
+ self.assertIsInstance(result.confidence, float)
94
+ self.assertGreaterEqual(result.confidence, 0)
95
+ self.assertLessEqual(result.confidence, 1)
96
+
97
+ def test_sync_complex_response(self):
98
+ result = self.lm.respond_sync(
99
+ system_message="You are a content creator.",
100
+ user_message="Create a short blog post about AI.",
101
+ response_model=ComplexResponse,
102
+ )
103
+ self.assertIsInstance(result, ComplexResponse)
104
+ self.assertIsInstance(result.title, str)
105
+ self.assertIsInstance(result.tags, list)
106
+ self.assertIsInstance(result.content, str)
107
+
108
+ async def async_nested_response(self):
109
+ result = await self.lm.respond_async(
110
+ system_message="You are a categorization expert.",
111
+ user_message="Categorize 'Python' and provide a brief description.",
112
+ response_model=NestedResponse,
113
+ )
114
+ self.assertIsInstance(result, NestedResponse)
115
+ self.assertIsInstance(result.main_category, str)
116
+ self.assertIsInstance(result.subcategories, list)
117
+ self.assertIsInstance(result.details, SimpleResponse)
118
+
119
+ def test_async_nested_response(self):
120
+ asyncio.run(self.async_nested_response())
121
+
122
+
123
+ class TestLMNestedStructuredOutputs(unittest.TestCase):
124
+ @classmethod
125
+ def setUpClass(cls):
126
+ # Initialize the LM once for all tests
127
+ cls.lm = LM(
128
+ model_name="gpt-4o-mini",
129
+ formatting_model_name="gpt-4o-mini",
130
+ temperature=0.7,
131
+ max_retries="Few",
132
+ structured_output_mode="forced_json",
133
+ )
134
+
135
+ def test_sync_nested_person_response(self):
136
+ result = self.lm.respond_sync(
137
+ system_message="You are an HR assistant.",
138
+ user_message="Provide detailed information about a fictional employee named John Doe.",
139
+ response_model=NestedPersonResponse,
140
+ )
141
+ self.assertIsInstance(result, NestedPersonResponse)
142
+ self.assertIsInstance(result.personal, PersonalInfo)
143
+ self.assertIsInstance(result.personal.address, Address)
144
+ self.assertIsInstance(result.work, WorkInfo)
145
+ self.assertIsInstance(result.skills, list)
146
+
147
+ def test_sync_nested_portfolio_response(self):
148
+ result = self.lm.respond_sync(
149
+ system_message="You are a portfolio manager.",
150
+ user_message="Create a portfolio for a fictional software developer with multiple projects.",
151
+ response_model=NestedPortfolioResponse,
152
+ )
153
+ self.assertIsInstance(result, NestedPortfolioResponse)
154
+ self.assertIsInstance(result.developer, PersonalInfo)
155
+ self.assertIsInstance(result.developer.address, Address)
156
+ self.assertIsInstance(result.projects, list)
157
+ for project in result.projects:
158
+ self.assertIsInstance(project, ProjectDetails)
159
+ self.assertIsInstance(result.total_experience, int)
160
+
161
+ async def async_nested_company_response(self):
162
+ result = await self.lm.respond_async(
163
+ system_message="You are a company information specialist.",
164
+ user_message="Provide detailed information about a fictional tech company.",
165
+ response_model=NestedCompanyResponse,
166
+ )
167
+ self.assertIsInstance(result, NestedCompanyResponse)
168
+ self.assertIsInstance(result.headquarters, Address)
169
+ self.assertIsInstance(result.employees, list)
170
+ for employee in result.employees:
171
+ self.assertIsInstance(employee, PersonalInfo)
172
+ self.assertIsInstance(employee.address, Address)
173
+ self.assertIsInstance(result.main_products, list)
174
+
175
+ def test_async_nested_company_response(self):
176
+ asyncio.run(self.async_nested_company_response())
177
+
178
+
179
+ if __name__ == "__main__":
180
+ unittest.main()
@@ -0,0 +1,100 @@
1
+ import asyncio
2
+ import unittest
3
+ from typing import List
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from synth_ai.zyk.lms.core.main import LM
8
+
9
+
10
+ # Define example structured output models
11
+ class SimpleResponse(BaseModel):
12
+ message: str
13
+ confidence_between_zero_one: float = Field(
14
+ ..., description="Confidence level between 0 and 1"
15
+ )
16
+
17
+
18
+ class ComplexResponse(BaseModel):
19
+ title: str
20
+ tags: List[str]
21
+ content: str
22
+
23
+
24
+ class NestedResponse(BaseModel):
25
+ main_category: str
26
+ subcategories: List[str]
27
+ details: SimpleResponse
28
+
29
+
30
+ class TestLMStructuredOutputs(unittest.TestCase):
31
+ @classmethod
32
+ def setUpClass(cls):
33
+ # Initialize LMs for both forced_json and stringified_json modes
34
+ cls.lm_forced_json = LM(
35
+ model_name="gpt-4o-mini",
36
+ formatting_model_name="gpt-4o-mini",
37
+ temperature=0.7,
38
+ max_retries="Few",
39
+ structured_output_mode="forced_json",
40
+ )
41
+ cls.lm_stringified_json = LM(
42
+ model_name="gpt-4o-mini",
43
+ formatting_model_name="gpt-4o-mini",
44
+ temperature=0.7,
45
+ max_retries="Few",
46
+ structured_output_mode="stringified_json",
47
+ )
48
+
49
+ def test_sync_simple_response(self):
50
+ for lm in [self.lm_forced_json, self.lm_stringified_json]:
51
+ with self.subTest(
52
+ mode=lm.structured_output_handler.handler.structured_output_mode
53
+ ):
54
+ result = lm.respond_sync(
55
+ system_message="You are a helpful assistant.",
56
+ user_message="Give me a short greeting and your confidence level.",
57
+ response_model=SimpleResponse,
58
+ )
59
+ self.assertIsInstance(result, SimpleResponse)
60
+ self.assertIsInstance(result.message, str)
61
+ self.assertIsInstance(result.confidence_between_zero_one, float)
62
+ self.assertGreaterEqual(result.confidence_between_zero_one, 0)
63
+ self.assertLessEqual(result.confidence_between_zero_one, 1)
64
+
65
+ def test_sync_complex_response(self):
66
+ for lm in [self.lm_forced_json, self.lm_stringified_json]:
67
+ with self.subTest(
68
+ mode=lm.structured_output_handler.handler.structured_output_mode
69
+ ):
70
+ result = lm.respond_sync(
71
+ system_message="You are a content creator.",
72
+ user_message="Create a short blog post about AI.",
73
+ response_model=ComplexResponse,
74
+ )
75
+ self.assertIsInstance(result, ComplexResponse)
76
+ self.assertIsInstance(result.title, str)
77
+ self.assertIsInstance(result.tags, list)
78
+ self.assertIsInstance(result.content, str)
79
+
80
+ async def async_nested_response(self, lm):
81
+ result = await lm.respond_async(
82
+ system_message="You are a categorization expert.",
83
+ user_message="Categorize 'Python' and provide a brief description.",
84
+ response_model=NestedResponse,
85
+ )
86
+ self.assertIsInstance(result, NestedResponse)
87
+ self.assertIsInstance(result.main_category, str)
88
+ self.assertIsInstance(result.subcategories, list)
89
+ self.assertIsInstance(result.details, SimpleResponse)
90
+
91
+ def test_async_nested_response(self):
92
+ for lm in [self.lm_forced_json, self.lm_stringified_json]: #
93
+ with self.subTest(
94
+ mode=lm.structured_output_handler.handler.structured_output_mode
95
+ ):
96
+ asyncio.run(self.async_nested_response(lm))
97
+
98
+
99
+ if __name__ == "__main__":
100
+ unittest.main()
File without changes