vision-agent 1.0.4__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ from typing import Type
2
2
 
3
3
  from pydantic import BaseModel, Field
4
4
 
5
- from vision_agent.lmm import LMM, AnthropicLMM
5
+ from vision_agent.lmm import LMM, AnthropicLMM, OpenAILMM
6
6
 
7
7
 
8
8
  class Config(BaseModel):
@@ -10,7 +10,7 @@ class Config(BaseModel):
10
10
  agent: Type[LMM] = Field(default=AnthropicLMM)
11
11
  agent_kwargs: dict = Field(
12
12
  default_factory=lambda: {
13
- "model_name": "claude-3-5-sonnet-20241022",
13
+ "model_name": "claude-3-7-sonnet-20250219",
14
14
  "temperature": 0.0,
15
15
  "image_size": 768,
16
16
  }
@@ -20,18 +20,17 @@ class Config(BaseModel):
20
20
  planner: Type[LMM] = Field(default=AnthropicLMM)
21
21
  planner_kwargs: dict = Field(
22
22
  default_factory=lambda: {
23
- "model_name": "claude-3-5-sonnet-20241022",
23
+ "model_name": "claude-3-7-sonnet-20250219",
24
24
  "temperature": 0.0,
25
25
  "image_size": 768,
26
26
  }
27
27
  )
28
28
 
29
- # for vision_agent_planner_v2
30
29
  summarizer: Type[LMM] = Field(default=AnthropicLMM)
31
30
  summarizer_kwargs: dict = Field(
32
31
  default_factory=lambda: {
33
- "model_name": "claude-3-5-sonnet-20241022",
34
- "temperature": 0.0,
32
+ "model_name": "claude-3-7-sonnet-20250219",
33
+ "temperature": 1.0, # o1 has fixed temperature
35
34
  "image_size": 768,
36
35
  }
37
36
  )
@@ -40,7 +39,7 @@ class Config(BaseModel):
40
39
  critic: Type[LMM] = Field(default=AnthropicLMM)
41
40
  critic_kwargs: dict = Field(
42
41
  default_factory=lambda: {
43
- "model_name": "claude-3-5-sonnet-20241022",
42
+ "model_name": "claude-3-7-sonnet-20250219",
44
43
  "temperature": 0.0,
45
44
  "image_size": 768,
46
45
  }
@@ -50,7 +49,7 @@ class Config(BaseModel):
50
49
  coder: Type[LMM] = Field(default=AnthropicLMM)
51
50
  coder_kwargs: dict = Field(
52
51
  default_factory=lambda: {
53
- "model_name": "claude-3-5-sonnet-20241022",
52
+ "model_name": "claude-3-7-sonnet-20250219",
54
53
  "temperature": 0.0,
55
54
  "image_size": 768,
56
55
  }
@@ -60,7 +59,7 @@ class Config(BaseModel):
60
59
  tester: Type[LMM] = Field(default=AnthropicLMM)
61
60
  tester_kwargs: dict = Field(
62
61
  default_factory=lambda: {
63
- "model_name": "claude-3-5-sonnet-20241022",
62
+ "model_name": "claude-3-7-sonnet-20250219",
64
63
  "temperature": 0.0,
65
64
  "image_size": 768,
66
65
  }
@@ -70,7 +69,7 @@ class Config(BaseModel):
70
69
  debugger: Type[LMM] = Field(default=AnthropicLMM)
71
70
  debugger_kwargs: dict = Field(
72
71
  default_factory=lambda: {
73
- "model_name": "claude-3-5-sonnet-20241022",
72
+ "model_name": "claude-3-7-sonnet-20250219",
74
73
  "temperature": 0.0,
75
74
  "image_size": 768,
76
75
  }
@@ -80,7 +79,7 @@ class Config(BaseModel):
80
79
  tool_tester: Type[LMM] = Field(default=AnthropicLMM)
81
80
  tool_tester_kwargs: dict = Field(
82
81
  default_factory=lambda: {
83
- "model_name": "claude-3-5-sonnet-20241022",
82
+ "model_name": "claude-3-7-sonnet-20250219",
84
83
  "temperature": 0.0,
85
84
  "image_size": 768,
86
85
  }
@@ -90,19 +89,30 @@ class Config(BaseModel):
90
89
  tool_chooser: Type[LMM] = Field(default=AnthropicLMM)
91
90
  tool_chooser_kwargs: dict = Field(
92
91
  default_factory=lambda: {
93
- "model_name": "claude-3-5-sonnet-20241022",
92
+ "model_name": "claude-3-7-sonnet-20250219",
94
93
  "temperature": 1.0,
95
94
  "image_size": 768,
96
95
  }
97
96
  )
98
97
 
98
+ # for get_tool_for_task
99
+ od_judge: Type[LMM] = Field(default=AnthropicLMM)
100
+ od_judge_kwargs: dict = Field(
101
+ default_factory=lambda: {
102
+ "model_name": "claude-3-7-sonnet-20250219",
103
+ "temperature": 0.0,
104
+ "image_size": 512,
105
+ }
106
+ )
107
+
99
108
  # for suggestions module
100
- suggester: Type[LMM] = Field(default=AnthropicLMM)
109
+ suggester: Type[LMM] = Field(default=OpenAILMM)
101
110
  suggester_kwargs: dict = Field(
102
111
  default_factory=lambda: {
103
- "model_name": "claude-3-5-sonnet-20241022",
112
+ "model_name": "o1",
104
113
  "temperature": 1.0,
105
- "image_size": 768,
114
+ "image_detail": "high",
115
+ "image_size": 1024,
106
116
  }
107
117
  )
108
118
 
@@ -110,7 +120,7 @@ class Config(BaseModel):
110
120
  vqa: Type[LMM] = Field(default=AnthropicLMM)
111
121
  vqa_kwargs: dict = Field(
112
122
  default_factory=lambda: {
113
- "model_name": "claude-3-5-sonnet-20241022",
123
+ "model_name": "claude-3-7-sonnet-20250219",
114
124
  "temperature": 0.0,
115
125
  "image_size": 768,
116
126
  }
@@ -143,6 +153,9 @@ class Config(BaseModel):
143
153
  def create_tool_chooser(self) -> LMM:
144
154
  return self.tool_chooser(**self.tool_chooser_kwargs)
145
155
 
156
+ def create_od_judge(self) -> LMM:
157
+ return self.od_judge(**self.od_judge_kwargs)
158
+
146
159
  def create_suggester(self) -> LMM:
147
160
  return self.suggester(**self.suggester_kwargs)
148
161
 
@@ -2,7 +2,7 @@ from typing import Type
2
2
 
3
3
  from pydantic import BaseModel, Field
4
4
 
5
- from vision_agent.lmm import LMM, AnthropicLMM, OpenAILMM
5
+ from vision_agent.lmm import LMM, AnthropicLMM, OpenAILMM, GoogleLMM
6
6
 
7
7
 
8
8
  class Config(BaseModel):
@@ -10,7 +10,7 @@ class Config(BaseModel):
10
10
  agent: Type[LMM] = Field(default=AnthropicLMM)
11
11
  agent_kwargs: dict = Field(
12
12
  default_factory=lambda: {
13
- "model_name": "claude-3-5-sonnet-20241022",
13
+ "model_name": "claude-3-7-sonnet-20250219",
14
14
  "temperature": 0.0,
15
15
  "image_size": 768,
16
16
  }
@@ -20,17 +20,16 @@ class Config(BaseModel):
20
20
  planner: Type[LMM] = Field(default=AnthropicLMM)
21
21
  planner_kwargs: dict = Field(
22
22
  default_factory=lambda: {
23
- "model_name": "claude-3-5-sonnet-20241022",
23
+ "model_name": "claude-3-7-sonnet-20250219",
24
24
  "temperature": 0.0,
25
25
  "image_size": 768,
26
26
  }
27
27
  )
28
28
 
29
- # for vision_agent_planner_v2
30
- summarizer: Type[LMM] = Field(default=OpenAILMM)
29
+ summarizer: Type[LMM] = Field(default=AnthropicLMM)
31
30
  summarizer_kwargs: dict = Field(
32
31
  default_factory=lambda: {
33
- "model_name": "o1",
32
+ "model_name": "claude-3-7-sonnet-20250219",
34
33
  "temperature": 1.0, # o1 has fixed temperature
35
34
  "image_size": 768,
36
35
  }
@@ -40,7 +39,7 @@ class Config(BaseModel):
40
39
  critic: Type[LMM] = Field(default=AnthropicLMM)
41
40
  critic_kwargs: dict = Field(
42
41
  default_factory=lambda: {
43
- "model_name": "claude-3-5-sonnet-20241022",
42
+ "model_name": "claude-3-7-sonnet-20250219",
44
43
  "temperature": 0.0,
45
44
  "image_size": 768,
46
45
  }
@@ -50,7 +49,7 @@ class Config(BaseModel):
50
49
  coder: Type[LMM] = Field(default=AnthropicLMM)
51
50
  coder_kwargs: dict = Field(
52
51
  default_factory=lambda: {
53
- "model_name": "claude-3-5-sonnet-20241022",
52
+ "model_name": "claude-3-7-sonnet-20250219",
54
53
  "temperature": 0.0,
55
54
  "image_size": 768,
56
55
  }
@@ -60,7 +59,7 @@ class Config(BaseModel):
60
59
  tester: Type[LMM] = Field(default=AnthropicLMM)
61
60
  tester_kwargs: dict = Field(
62
61
  default_factory=lambda: {
63
- "model_name": "claude-3-5-sonnet-20241022",
62
+ "model_name": "claude-3-7-sonnet-20250219",
64
63
  "temperature": 0.0,
65
64
  "image_size": 768,
66
65
  }
@@ -70,7 +69,7 @@ class Config(BaseModel):
70
69
  debugger: Type[LMM] = Field(default=AnthropicLMM)
71
70
  debugger_kwargs: dict = Field(
72
71
  default_factory=lambda: {
73
- "model_name": "claude-3-5-sonnet-20241022",
72
+ "model_name": "claude-3-7-sonnet-20250219",
74
73
  "temperature": 0.0,
75
74
  "image_size": 768,
76
75
  }
@@ -80,7 +79,7 @@ class Config(BaseModel):
80
79
  tool_tester: Type[LMM] = Field(default=AnthropicLMM)
81
80
  tool_tester_kwargs: dict = Field(
82
81
  default_factory=lambda: {
83
- "model_name": "claude-3-5-sonnet-20241022",
82
+ "model_name": "claude-3-7-sonnet-20250219",
84
83
  "temperature": 0.0,
85
84
  "image_size": 768,
86
85
  }
@@ -90,7 +89,7 @@ class Config(BaseModel):
90
89
  tool_chooser: Type[LMM] = Field(default=AnthropicLMM)
91
90
  tool_chooser_kwargs: dict = Field(
92
91
  default_factory=lambda: {
93
- "model_name": "claude-3-5-sonnet-20241022",
92
+ "model_name": "claude-3-7-sonnet-20250219",
94
93
  "temperature": 1.0,
95
94
  "image_size": 768,
96
95
  }
@@ -100,7 +99,7 @@ class Config(BaseModel):
100
99
  od_judge: Type[LMM] = Field(default=AnthropicLMM)
101
100
  od_judge_kwargs: dict = Field(
102
101
  default_factory=lambda: {
103
- "model_name": "claude-3-5-sonnet-20241022",
102
+ "model_name": "claude-3-7-sonnet-20250219",
104
103
  "temperature": 0.0,
105
104
  "image_size": 512,
106
105
  }
@@ -118,10 +117,10 @@ class Config(BaseModel):
118
117
  )
119
118
 
120
119
  # for vqa module
121
- vqa: Type[LMM] = Field(default=AnthropicLMM)
120
+ vqa: Type[LMM] = Field(default=GoogleLMM)
122
121
  vqa_kwargs: dict = Field(
123
122
  default_factory=lambda: {
124
- "model_name": "claude-3-5-sonnet-20241022",
123
+ "model_name": "gemini-2.0-flash-exp",
125
124
  "temperature": 0.0,
126
125
  "image_size": 768,
127
126
  }
@@ -10,7 +10,7 @@ class Config(BaseModel):
10
10
  agent: Type[LMM] = Field(default=OpenAILMM)
11
11
  agent_kwargs: dict = Field(
12
12
  default_factory=lambda: {
13
- "model_name": "gpt-4o-2024-08-06",
13
+ "model_name": "gpt-4o-2024-11-20",
14
14
  "temperature": 0.0,
15
15
  "image_size": 768,
16
16
  "image_detail": "low",
@@ -21,7 +21,7 @@ class Config(BaseModel):
21
21
  planner: Type[LMM] = Field(default=OpenAILMM)
22
22
  planner_kwargs: dict = Field(
23
23
  default_factory=lambda: {
24
- "model_name": "gpt-4o-2024-08-06",
24
+ "model_name": "gpt-4o-2024-11-20",
25
25
  "temperature": 0.0,
26
26
  "image_size": 768,
27
27
  "image_detail": "low",
@@ -42,7 +42,7 @@ class Config(BaseModel):
42
42
  critic: Type[LMM] = Field(default=OpenAILMM)
43
43
  critic_kwargs: dict = Field(
44
44
  default_factory=lambda: {
45
- "model_name": "gpt-4o-2024-08-06",
45
+ "model_name": "gpt-4o-2024-11-20",
46
46
  "temperature": 0.0,
47
47
  "image_size": 768,
48
48
  "image_detail": "low",
@@ -53,7 +53,7 @@ class Config(BaseModel):
53
53
  coder: Type[LMM] = Field(default=OpenAILMM)
54
54
  coder_kwargs: dict = Field(
55
55
  default_factory=lambda: {
56
- "model_name": "gpt-4o-2024-08-06",
56
+ "model_name": "gpt-4o-2024-11-20",
57
57
  "temperature": 0.0,
58
58
  "image_size": 768,
59
59
  "image_detail": "low",
@@ -64,7 +64,7 @@ class Config(BaseModel):
64
64
  tester: Type[LMM] = Field(default=OpenAILMM)
65
65
  tester_kwargs: dict = Field(
66
66
  default_factory=lambda: {
67
- "model_name": "gpt-4o-2024-08-06",
67
+ "model_name": "gpt-4o-2024-11-20",
68
68
  "temperature": 0.0,
69
69
  "image_size": 768,
70
70
  "image_detail": "low",
@@ -75,7 +75,7 @@ class Config(BaseModel):
75
75
  debugger: Type[LMM] = Field(default=OpenAILMM)
76
76
  debugger_kwargs: dict = Field(
77
77
  default_factory=lambda: {
78
- "model_name": "gpt-4o-2024-08-06",
78
+ "model_name": "gpt-4o-2024-11-20",
79
79
  "temperature": 0.0,
80
80
  "image_size": 768,
81
81
  "image_detail": "low",
@@ -86,7 +86,7 @@ class Config(BaseModel):
86
86
  tool_tester: Type[LMM] = Field(default=OpenAILMM)
87
87
  tool_tester_kwargs: dict = Field(
88
88
  default_factory=lambda: {
89
- "model_name": "gpt-4o-2024-08-06",
89
+ "model_name": "gpt-4o-2024-11-20",
90
90
  "temperature": 0.0,
91
91
  "image_size": 768,
92
92
  "image_detail": "low",
@@ -97,7 +97,7 @@ class Config(BaseModel):
97
97
  tool_chooser: Type[LMM] = Field(default=OpenAILMM)
98
98
  tool_chooser_kwargs: dict = Field(
99
99
  default_factory=lambda: {
100
- "model_name": "gpt-4o-2024-08-06",
100
+ "model_name": "gpt-4o-2024-11-20",
101
101
  "temperature": 1.0,
102
102
  "image_size": 768,
103
103
  "image_detail": "low",
@@ -108,7 +108,7 @@ class Config(BaseModel):
108
108
  suggester: Type[LMM] = Field(default=OpenAILMM)
109
109
  suggester_kwargs: dict = Field(
110
110
  default_factory=lambda: {
111
- "model_name": "gpt-4o-2024-08-06",
111
+ "model_name": "gpt-4o-2024-11-20",
112
112
  "temperature": 1.0,
113
113
  "image_size": 768,
114
114
  "image_detail": "low",
@@ -119,7 +119,7 @@ class Config(BaseModel):
119
119
  vqa: Type[LMM] = Field(default=OpenAILMM)
120
120
  vqa_kwargs: dict = Field(
121
121
  default_factory=lambda: {
122
- "model_name": "gpt-4o-2024-08-06",
122
+ "model_name": "gpt-4o-2024-11-20",
123
123
  "temperature": 0.0,
124
124
  "image_size": 768,
125
125
  "image_detail": "low",
vision_agent/lmm/lmm.py CHANGED
@@ -98,7 +98,7 @@ class OpenAILMM(LMM):
98
98
  for c in chat:
99
99
  fixed_c = {"role": c["role"]}
100
100
  fixed_c["content"] = [{"type": "text", "text": c["content"]}] # type: ignore
101
- if "media" in c:
101
+ if "media" in c and self.model_name != "o3-mini":
102
102
  for media in c["media"]:
103
103
  resize = kwargs["resize"] if "resize" in kwargs else self.image_size
104
104
  image_detail = (
@@ -154,7 +154,7 @@ class OpenAILMM(LMM):
154
154
  ],
155
155
  }
156
156
  ]
157
- if media and len(media) > 0:
157
+ if media and len(media) > 0 and self.model_name != "o3-mini":
158
158
  for m in media:
159
159
  resize = kwargs["resize"] if "resize" in kwargs else None
160
160
  image_detail = (
@@ -1,13 +1,7 @@
1
1
  from typing import Callable, List, Optional
2
2
 
3
3
  from .meta_tools import (
4
- create_code_artifact,
5
- edit_code_artifact,
6
- edit_vision_code,
7
- generate_vision_code,
8
4
  get_tool_descriptions,
9
- list_artifacts,
10
- open_code_artifact,
11
5
  view_media_artifact,
12
6
  )
13
7
  from .planner_tools import judge_od_results