vision-agent 0.2.228__py3-none-any.whl → 0.2.230__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,30 +18,15 @@ from vision_agent.agent.types import (
18
18
  )
19
19
  from vision_agent.agent.vision_agent_coder_v2 import format_code_context
20
20
  from vision_agent.agent.vision_agent_prompts_v2 import CONVERSATION
21
- from vision_agent.lmm import LMM, AnthropicLMM
21
+ from vision_agent.configs import Config
22
+ from vision_agent.lmm import LMM
22
23
  from vision_agent.lmm.types import Message
23
24
  from vision_agent.utils.execute import CodeInterpreter, CodeInterpreterFactory
24
25
 
25
-
26
- def run_conversation(agent: LMM, chat: List[AgentMessage]) -> str:
27
- # only keep last 10 messages
28
- conv = format_conversation(chat[-10:])
29
- prompt = CONVERSATION.format(
30
- conversation=conv,
31
- )
32
- response = agent([{"role": "user", "content": prompt}], stream=False)
33
- return cast(str, response)
26
+ CONFIG = Config()
34
27
 
35
28
 
36
- def check_for_interaction(chat: List[AgentMessage]) -> bool:
37
- return (
38
- len(chat) > 2
39
- and chat[-2].role == "interaction"
40
- and chat[-1].role == "interaction_response"
41
- )
42
-
43
-
44
- def extract_conversation_for_generate_code(
29
+ def extract_conversation(
45
30
  chat: List[AgentMessage],
46
31
  ) -> Tuple[List[AgentMessage], Optional[str]]:
47
32
  chat = copy.deepcopy(chat)
@@ -75,7 +60,27 @@ def extract_conversation_for_generate_code(
75
60
 
76
61
  extracted_chat_strip_code = [chat_i] + extracted_chat_strip_code
77
62
 
78
- return extracted_chat_strip_code[-5:], final_code
63
+ return extracted_chat_strip_code, final_code
64
+
65
+
66
+ def run_conversation(agent: LMM, chat: List[AgentMessage]) -> str:
67
+ extracted_chat, _ = extract_conversation(chat)
68
+ extracted_chat = extracted_chat[-10:]
69
+
70
+ conv = format_conversation(chat)
71
+ prompt = CONVERSATION.format(
72
+ conversation=conv,
73
+ )
74
+ response = agent([{"role": "user", "content": prompt}], stream=False)
75
+ return cast(str, response)
76
+
77
+
78
+ def check_for_interaction(chat: List[AgentMessage]) -> bool:
79
+ return (
80
+ len(chat) > 2
81
+ and chat[-2].role == "interaction"
82
+ and chat[-1].role == "interaction_response"
83
+ )
79
84
 
80
85
 
81
86
  def maybe_run_action(
@@ -84,8 +89,10 @@ def maybe_run_action(
84
89
  chat: List[AgentMessage],
85
90
  code_interpreter: Optional[CodeInterpreter] = None,
86
91
  ) -> Optional[List[AgentMessage]]:
92
+ extracted_chat, final_code = extract_conversation(chat)
93
+ # only keep last 5 messages to keep context recent and not overwhelm LLM
94
+ extracted_chat = extracted_chat[-5:]
87
95
  if action == "generate_or_edit_vision_code":
88
- extracted_chat, _ = extract_conversation_for_generate_code(chat)
89
96
  # there's an issue here because coder.generate_code will send it's code_context
90
97
  # to the outside user via it's update_callback, but we don't necessarily have
91
98
  # access to that update_callback here, so we re-create the message using
@@ -105,7 +112,6 @@ def maybe_run_action(
105
112
  )
106
113
  ]
107
114
  elif action == "edit_code":
108
- extracted_chat, final_code = extract_conversation_for_generate_code(chat)
109
115
  plan_context = PlanContext(
110
116
  plan="Edit the latest code observed in the fewest steps possible according to the user's feedback.",
111
117
  instructions=[
@@ -158,14 +164,7 @@ class VisionAgentV2(Agent):
158
164
  that will send back intermediate conversation messages.
159
165
  """
160
166
 
161
- self.agent = (
162
- agent
163
- if agent is not None
164
- else AnthropicLMM(
165
- model_name="claude-3-5-sonnet-20241022",
166
- temperature=0.0,
167
- )
168
- )
167
+ self.agent = agent if agent is not None else CONFIG.create_agent()
169
168
  self.coder = (
170
169
  coder
171
170
  if coder is not None
@@ -0,0 +1 @@
1
+ from .config import Config
@@ -0,0 +1,150 @@
1
+ from typing import Type
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from vision_agent.lmm import LMM, AnthropicLMM
6
+
7
+
8
+ class Config(BaseModel):
9
+ # for vision_agent_v2
10
+ agent: Type[LMM] = Field(default=AnthropicLMM)
11
+ agent_kwargs: dict = Field(
12
+ default_factory=lambda: {
13
+ "model_name": "claude-3-5-sonnet-20241022",
14
+ "temperature": 0.0,
15
+ "image_size": 768,
16
+ }
17
+ )
18
+
19
+ # for vision_agent_planner_v2
20
+ planner: Type[LMM] = Field(default=AnthropicLMM)
21
+ planner_kwargs: dict = Field(
22
+ default_factory=lambda: {
23
+ "model_name": "claude-3-5-sonnet-20241022",
24
+ "temperature": 0.0,
25
+ "image_size": 768,
26
+ }
27
+ )
28
+
29
+ # for vision_agent_planner_v2
30
+ summarizer: Type[LMM] = Field(default=AnthropicLMM)
31
+ summarizer_kwargs: dict = Field(
32
+ default_factory=lambda: {
33
+ "model_name": "claude-3-5-sonnet-20241022",
34
+ "temperature": 0.0,
35
+ "image_size": 768,
36
+ }
37
+ )
38
+
39
+ # for vision_agent_planner_v2
40
+ critic: Type[LMM] = Field(default=AnthropicLMM)
41
+ critic_kwargs: dict = Field(
42
+ default_factory=lambda: {
43
+ "model_name": "claude-3-5-sonnet-20241022",
44
+ "temperature": 0.0,
45
+ "image_size": 768,
46
+ }
47
+ )
48
+
49
+ # for vision_agent_coder_v2
50
+ coder: Type[LMM] = Field(default=AnthropicLMM)
51
+ coder_kwargs: dict = Field(
52
+ default_factory=lambda: {
53
+ "model_name": "claude-3-5-sonnet-20241022",
54
+ "temperature": 0.0,
55
+ "image_size": 768,
56
+ }
57
+ )
58
+
59
+ # for vision_agent_coder_v2
60
+ tester: Type[LMM] = Field(default=AnthropicLMM)
61
+ tester_kwargs: dict = Field(
62
+ default_factory=lambda: {
63
+ "model_name": "claude-3-5-sonnet-20241022",
64
+ "temperature": 0.0,
65
+ "image_size": 768,
66
+ }
67
+ )
68
+
69
+ # for vision_agent_coder_v2
70
+ debugger: Type[LMM] = Field(default=AnthropicLMM)
71
+ debugger_kwargs: dict = Field(
72
+ default_factory=lambda: {
73
+ "model_name": "claude-3-5-sonnet-20241022",
74
+ "temperature": 0.0,
75
+ "image_size": 768,
76
+ }
77
+ )
78
+
79
+ # for get_tool_for_task
80
+ tool_tester: Type[LMM] = Field(default=AnthropicLMM)
81
+ tool_tester_kwargs: dict = Field(
82
+ default_factory=lambda: {
83
+ "model_name": "claude-3-5-sonnet-20241022",
84
+ "temperature": 1.0,
85
+ "image_size": 768,
86
+ }
87
+ )
88
+
89
+ # for get_tool_for_task
90
+ tool_chooser: Type[LMM] = Field(default=AnthropicLMM)
91
+ tool_chooser_kwargs: dict = Field(
92
+ default_factory=lambda: {
93
+ "model_name": "claude-3-5-sonnet-20241022",
94
+ "temperature": 1.0,
95
+ "image_size": 768,
96
+ }
97
+ )
98
+
99
+ # for suggestions module
100
+ suggester: Type[LMM] = Field(default=AnthropicLMM)
101
+ suggester_kwargs: dict = Field(
102
+ default_factory=lambda: {
103
+ "model_name": "claude-3-5-sonnet-20241022",
104
+ "temperature": 1.0,
105
+ "image_size": 768,
106
+ }
107
+ )
108
+
109
+ # for vqa module
110
+ vqa: Type[LMM] = Field(default=AnthropicLMM)
111
+ vqa_kwargs: dict = Field(
112
+ default_factory=lambda: {
113
+ "model_name": "claude-3-5-sonnet-20241022",
114
+ "temperature": 1.0,
115
+ "image_size": 768,
116
+ }
117
+ )
118
+
119
+ def create_agent(self) -> LMM:
120
+ return self.agent(**self.agent_kwargs)
121
+
122
+ def create_planner(self) -> LMM:
123
+ return self.planner(**self.planner_kwargs)
124
+
125
+ def create_summarizer(self) -> LMM:
126
+ return self.summarizer(**self.summarizer_kwargs)
127
+
128
+ def create_critic(self) -> LMM:
129
+ return self.critic(**self.critic_kwargs)
130
+
131
+ def create_coder(self) -> LMM:
132
+ return self.coder(**self.coder_kwargs)
133
+
134
+ def create_tester(self) -> LMM:
135
+ return self.tester(**self.tester_kwargs)
136
+
137
+ def create_debugger(self) -> LMM:
138
+ return self.debugger(**self.debugger_kwargs)
139
+
140
+ def create_tool_tester(self) -> LMM:
141
+ return self.tool_tester(**self.tool_tester_kwargs)
142
+
143
+ def create_tool_chooser(self) -> LMM:
144
+ return self.tool_chooser(**self.tool_chooser_kwargs)
145
+
146
+ def create_suggester(self) -> LMM:
147
+ return self.suggester(**self.suggester_kwargs)
148
+
149
+ def create_vqa(self) -> LMM:
150
+ return self.vqa(**self.vqa_kwargs)
@@ -0,0 +1,150 @@
1
+ from typing import Type
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from vision_agent.lmm import LMM, AnthropicLMM, OpenAILMM
6
+
7
+
8
+ class Config(BaseModel):
9
+ # for vision_agent_v2
10
+ agent: Type[LMM] = Field(default=AnthropicLMM)
11
+ agent_kwargs: dict = Field(
12
+ default_factory=lambda: {
13
+ "model_name": "claude-3-5-sonnet-20241022",
14
+ "temperature": 0.0,
15
+ "image_size": 768,
16
+ }
17
+ )
18
+
19
+ # for vision_agent_planner_v2
20
+ planner: Type[LMM] = Field(default=AnthropicLMM)
21
+ planner_kwargs: dict = Field(
22
+ default_factory=lambda: {
23
+ "model_name": "claude-3-5-sonnet-20241022",
24
+ "temperature": 0.0,
25
+ "image_size": 768,
26
+ }
27
+ )
28
+
29
+ # for vision_agent_planner_v2
30
+ summarizer: Type[LMM] = Field(default=OpenAILMM)
31
+ summarizer_kwargs: dict = Field(
32
+ default_factory=lambda: {
33
+ "model_name": "o1",
34
+ "temperature": 1.0, # o1 has fixed temperature
35
+ "image_size": 768,
36
+ }
37
+ )
38
+
39
+ # for vision_agent_planner_v2
40
+ critic: Type[LMM] = Field(default=AnthropicLMM)
41
+ critic_kwargs: dict = Field(
42
+ default_factory=lambda: {
43
+ "model_name": "claude-3-5-sonnet-20241022",
44
+ "temperature": 0.0,
45
+ "image_size": 768,
46
+ }
47
+ )
48
+
49
+ # for vision_agent_coder_v2
50
+ coder: Type[LMM] = Field(default=AnthropicLMM)
51
+ coder_kwargs: dict = Field(
52
+ default_factory=lambda: {
53
+ "model_name": "claude-3-5-sonnet-20241022",
54
+ "temperature": 0.0,
55
+ "image_size": 768,
56
+ }
57
+ )
58
+
59
+ # for vision_agent_coder_v2
60
+ tester: Type[LMM] = Field(default=AnthropicLMM)
61
+ tester_kwargs: dict = Field(
62
+ default_factory=lambda: {
63
+ "model_name": "claude-3-5-sonnet-20241022",
64
+ "temperature": 0.0,
65
+ "image_size": 768,
66
+ }
67
+ )
68
+
69
+ # for vision_agent_coder_v2
70
+ debugger: Type[LMM] = Field(default=AnthropicLMM)
71
+ debugger_kwargs: dict = Field(
72
+ default_factory=lambda: {
73
+ "model_name": "claude-3-5-sonnet-20241022",
74
+ "temperature": 0.0,
75
+ "image_size": 768,
76
+ }
77
+ )
78
+
79
+ # for get_tool_for_task
80
+ tool_tester: Type[LMM] = Field(default=AnthropicLMM)
81
+ tool_tester_kwargs: dict = Field(
82
+ default_factory=lambda: {
83
+ "model_name": "claude-3-5-sonnet-20241022",
84
+ "temperature": 0.0,
85
+ "image_size": 768,
86
+ }
87
+ )
88
+
89
+ # for get_tool_for_task
90
+ tool_chooser: Type[LMM] = Field(default=AnthropicLMM)
91
+ tool_chooser_kwargs: dict = Field(
92
+ default_factory=lambda: {
93
+ "model_name": "claude-3-5-sonnet-20241022",
94
+ "temperature": 1.0,
95
+ "image_size": 768,
96
+ }
97
+ )
98
+
99
+ # for suggestions module
100
+ suggester: Type[LMM] = Field(default=AnthropicLMM)
101
+ suggester_kwargs: dict = Field(
102
+ default_factory=lambda: {
103
+ "model_name": "claude-3-5-sonnet-20241022",
104
+ "temperature": 1.0,
105
+ "image_size": 768,
106
+ }
107
+ )
108
+
109
+ # for vqa module
110
+ vqa: Type[LMM] = Field(default=AnthropicLMM)
111
+ vqa_kwargs: dict = Field(
112
+ default_factory=lambda: {
113
+ "model_name": "claude-3-5-sonnet-20241022",
114
+ "temperature": 0.0,
115
+ "image_size": 768,
116
+ }
117
+ )
118
+
119
+ def create_agent(self) -> LMM:
120
+ return self.agent(**self.agent_kwargs)
121
+
122
+ def create_planner(self) -> LMM:
123
+ return self.planner(**self.planner_kwargs)
124
+
125
+ def create_summarizer(self) -> LMM:
126
+ return self.summarizer(**self.summarizer_kwargs)
127
+
128
+ def create_critic(self) -> LMM:
129
+ return self.critic(**self.critic_kwargs)
130
+
131
+ def create_coder(self) -> LMM:
132
+ return self.coder(**self.coder_kwargs)
133
+
134
+ def create_tester(self) -> LMM:
135
+ return self.tester(**self.tester_kwargs)
136
+
137
+ def create_debugger(self) -> LMM:
138
+ return self.debugger(**self.debugger_kwargs)
139
+
140
+ def create_tool_tester(self) -> LMM:
141
+ return self.tool_tester(**self.tool_tester_kwargs)
142
+
143
+ def create_tool_chooser(self) -> LMM:
144
+ return self.tool_chooser(**self.tool_chooser_kwargs)
145
+
146
+ def create_suggester(self) -> LMM:
147
+ return self.suggester(**self.suggester_kwargs)
148
+
149
+ def create_vqa(self) -> LMM:
150
+ return self.vqa(**self.vqa_kwargs)
@@ -0,0 +1,150 @@
1
+ from typing import Type
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from vision_agent.lmm import LMM, AnthropicLMM, OpenAILMM
6
+
7
+
8
+ class Config(BaseModel):
9
+ # for vision_agent_v2
10
+ agent: Type[LMM] = Field(default=AnthropicLMM)
11
+ agent_kwargs: dict = Field(
12
+ default_factory=lambda: {
13
+ "model_name": "claude-3-5-sonnet-20241022",
14
+ "temperature": 0.0,
15
+ "image_size": 768,
16
+ }
17
+ )
18
+
19
+ # for vision_agent_planner_v2
20
+ planner: Type[LMM] = Field(default=AnthropicLMM)
21
+ planner_kwargs: dict = Field(
22
+ default_factory=lambda: {
23
+ "model_name": "claude-3-5-sonnet-20241022",
24
+ "temperature": 0.0,
25
+ "image_size": 768,
26
+ }
27
+ )
28
+
29
+ # for vision_agent_planner_v2
30
+ summarizer: Type[LMM] = Field(default=OpenAILMM)
31
+ summarizer_kwargs: dict = Field(
32
+ default_factory=lambda: {
33
+ "model_name": "o1",
34
+ "temperature": 1.0, # o1 has fixed temperature
35
+ "image_size": 768,
36
+ }
37
+ )
38
+
39
+ # for vision_agent_planner_v2
40
+ critic: Type[LMM] = Field(default=AnthropicLMM)
41
+ critic_kwargs: dict = Field(
42
+ default_factory=lambda: {
43
+ "model_name": "claude-3-5-sonnet-20241022",
44
+ "temperature": 0.0,
45
+ "image_size": 768,
46
+ }
47
+ )
48
+
49
+ # for vision_agent_coder_v2
50
+ coder: Type[LMM] = Field(default=AnthropicLMM)
51
+ coder_kwargs: dict = Field(
52
+ default_factory=lambda: {
53
+ "model_name": "claude-3-5-sonnet-20241022",
54
+ "temperature": 0.0,
55
+ "image_size": 768,
56
+ }
57
+ )
58
+
59
+ # for vision_agent_coder_v2
60
+ tester: Type[LMM] = Field(default=AnthropicLMM)
61
+ tester_kwargs: dict = Field(
62
+ default_factory=lambda: {
63
+ "model_name": "claude-3-5-sonnet-20241022",
64
+ "temperature": 0.0,
65
+ "image_size": 768,
66
+ }
67
+ )
68
+
69
+ # for vision_agent_coder_v2
70
+ debugger: Type[LMM] = Field(default=AnthropicLMM)
71
+ debugger_kwargs: dict = Field(
72
+ default_factory=lambda: {
73
+ "model_name": "claude-3-5-sonnet-20241022",
74
+ "temperature": 0.0,
75
+ "image_size": 768,
76
+ }
77
+ )
78
+
79
+ # for get_tool_for_task
80
+ tool_tester: Type[LMM] = Field(default=AnthropicLMM)
81
+ tool_tester_kwargs: dict = Field(
82
+ default_factory=lambda: {
83
+ "model_name": "claude-3-5-sonnet-20241022",
84
+ "temperature": 0.0,
85
+ "image_size": 768,
86
+ }
87
+ )
88
+
89
+ # for get_tool_for_task
90
+ tool_chooser: Type[LMM] = Field(default=AnthropicLMM)
91
+ tool_chooser_kwargs: dict = Field(
92
+ default_factory=lambda: {
93
+ "model_name": "claude-3-5-sonnet-20241022",
94
+ "temperature": 1.0,
95
+ "image_size": 768,
96
+ }
97
+ )
98
+
99
+ # for suggestions module
100
+ suggester: Type[LMM] = Field(default=AnthropicLMM)
101
+ suggester_kwargs: dict = Field(
102
+ default_factory=lambda: {
103
+ "model_name": "claude-3-5-sonnet-20241022",
104
+ "temperature": 1.0,
105
+ "image_size": 768,
106
+ }
107
+ )
108
+
109
+ # for vqa module
110
+ vqa: Type[LMM] = Field(default=AnthropicLMM)
111
+ vqa_kwargs: dict = Field(
112
+ default_factory=lambda: {
113
+ "model_name": "claude-3-5-sonnet-20241022",
114
+ "temperature": 0.0,
115
+ "image_size": 768,
116
+ }
117
+ )
118
+
119
+ def create_agent(self) -> LMM:
120
+ return self.agent(**self.agent_kwargs)
121
+
122
+ def create_planner(self) -> LMM:
123
+ return self.planner(**self.planner_kwargs)
124
+
125
+ def create_summarizer(self) -> LMM:
126
+ return self.summarizer(**self.summarizer_kwargs)
127
+
128
+ def create_critic(self) -> LMM:
129
+ return self.critic(**self.critic_kwargs)
130
+
131
+ def create_coder(self) -> LMM:
132
+ return self.coder(**self.coder_kwargs)
133
+
134
+ def create_tester(self) -> LMM:
135
+ return self.tester(**self.tester_kwargs)
136
+
137
+ def create_debugger(self) -> LMM:
138
+ return self.debugger(**self.debugger_kwargs)
139
+
140
+ def create_tool_tester(self) -> LMM:
141
+ return self.tool_tester(**self.tool_tester_kwargs)
142
+
143
+ def create_tool_chooser(self) -> LMM:
144
+ return self.tool_chooser(**self.tool_chooser_kwargs)
145
+
146
+ def create_suggester(self) -> LMM:
147
+ return self.suggester(**self.suggester_kwargs)
148
+
149
+ def create_vqa(self) -> LMM:
150
+ return self.vqa(**self.vqa_kwargs)