vision-agent 0.2.166__py3-none-any.whl → 0.2.168__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/agent/vision_agent.py +71 -5
- vision_agent/tools/meta_tools.py +3 -2
- {vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/METADATA +1 -1
- {vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/RECORD +6 -6
- {vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/WHEEL +0 -0
@@ -85,6 +85,15 @@ def format_agent_message(agent_message: str) -> str:
|
|
85
85
|
return output
|
86
86
|
|
87
87
|
|
88
|
+
def _clean_response(response: str) -> str:
|
89
|
+
# Sometimes the LLM will hallucinate responses to an <execute_python> tag as if it
|
90
|
+
# had already executed the code. This function removes the hallucinated response.
|
91
|
+
if "<execute_python>" in response:
|
92
|
+
end_execute_python = response.find("</execute_python>")
|
93
|
+
response = response[: end_execute_python + len("</execute_python>")]
|
94
|
+
return response
|
95
|
+
|
96
|
+
|
88
97
|
def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
|
89
98
|
chat = copy.deepcopy(chat)
|
90
99
|
|
@@ -114,6 +123,10 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
|
|
114
123
|
message["media"] = chat[-1]["media"]
|
115
124
|
conv_resp = cast(str, orch([message], stream=False))
|
116
125
|
|
126
|
+
# clean the response first, if we are executing code, do not resond or end
|
127
|
+
# conversation before the code has been executed.
|
128
|
+
conv_resp = _clean_response(conv_resp)
|
129
|
+
|
117
130
|
let_user_respond_str = extract_tag(conv_resp, "let_user_respond")
|
118
131
|
let_user_respond = (
|
119
132
|
"true" in let_user_respond_str.lower() if let_user_respond_str else False
|
@@ -197,6 +210,51 @@ def add_step_descriptions(response: Dict[str, Any]) -> Dict[str, Any]:
|
|
197
210
|
return response
|
198
211
|
|
199
212
|
|
213
|
+
def new_format_to_old_format(new_format: Dict[str, Any]) -> Dict[str, Any]:
|
214
|
+
thoughts = new_format["thinking"] if new_format["thinking"] is not None else ""
|
215
|
+
response = new_format["response"] if new_format["response"] is not None else ""
|
216
|
+
if new_format["execute_python"] is not None:
|
217
|
+
response += (
|
218
|
+
f"\n<execute_python>\n{new_format['execute_python']}\n</execute_python>"
|
219
|
+
)
|
220
|
+
return {
|
221
|
+
"thoughts": thoughts,
|
222
|
+
"response": response,
|
223
|
+
"let_user_respond": new_format["let_user_respond"],
|
224
|
+
}
|
225
|
+
|
226
|
+
|
227
|
+
def old_format_to_new_format(old_format_str: str) -> str:
|
228
|
+
try:
|
229
|
+
old_format = json.loads(old_format_str)
|
230
|
+
except json.JSONDecodeError:
|
231
|
+
return old_format_str
|
232
|
+
|
233
|
+
thinking = old_format["thoughts"] if old_format["thoughts"].strip() != "" else None
|
234
|
+
let_user_respond = old_format["let_user_respond"]
|
235
|
+
if "<execute_python>" in old_format["response"]:
|
236
|
+
execute_python = extract_tag(old_format["response"], "execute_python")
|
237
|
+
response = (
|
238
|
+
old_format["response"]
|
239
|
+
.replace(execute_python, "")
|
240
|
+
.replace("<execute_python>", "")
|
241
|
+
.replace("</execute_python>", "")
|
242
|
+
.strip()
|
243
|
+
)
|
244
|
+
else:
|
245
|
+
execute_python = None
|
246
|
+
response = old_format["response"]
|
247
|
+
|
248
|
+
return json.dumps(
|
249
|
+
{
|
250
|
+
"thinking": thinking,
|
251
|
+
"response": response,
|
252
|
+
"execute_python": execute_python,
|
253
|
+
"let_user_respond": let_user_respond,
|
254
|
+
}
|
255
|
+
)
|
256
|
+
|
257
|
+
|
200
258
|
class VisionAgent(Agent):
|
201
259
|
"""Vision Agent is an agent that can chat with the user and call tools or other
|
202
260
|
agents to generate code for it. Vision Agent uses python code to execute actions
|
@@ -361,11 +419,11 @@ class VisionAgent(Agent):
|
|
361
419
|
(
|
362
420
|
{
|
363
421
|
"role": c["role"],
|
364
|
-
"content": c["content"],
|
422
|
+
"content": old_format_to_new_format(c["content"]), # type: ignore
|
365
423
|
"media": c["media"],
|
366
424
|
}
|
367
425
|
if "media" in c
|
368
|
-
else {"role": c["role"], "content": c["content"]}
|
426
|
+
else {"role": c["role"], "content": old_format_to_new_format(c["content"])} # type: ignore
|
369
427
|
)
|
370
428
|
for c in int_chat
|
371
429
|
],
|
@@ -419,13 +477,17 @@ class VisionAgent(Agent):
|
|
419
477
|
int_chat.append(
|
420
478
|
{
|
421
479
|
"role": "assistant",
|
422
|
-
"content": json.dumps(
|
480
|
+
"content": json.dumps(
|
481
|
+
new_format_to_old_format(add_step_descriptions(response))
|
482
|
+
),
|
423
483
|
}
|
424
484
|
)
|
425
485
|
orig_chat.append(
|
426
486
|
{
|
427
487
|
"role": "assistant",
|
428
|
-
"content": json.dumps(
|
488
|
+
"content": json.dumps(
|
489
|
+
new_format_to_old_format(add_step_descriptions(response))
|
490
|
+
),
|
429
491
|
}
|
430
492
|
)
|
431
493
|
|
@@ -458,7 +520,11 @@ class VisionAgent(Agent):
|
|
458
520
|
self.streaming_message(
|
459
521
|
{
|
460
522
|
"role": "assistant",
|
461
|
-
"content": json.dumps(
|
523
|
+
"content": json.dumps(
|
524
|
+
new_format_to_old_format(
|
525
|
+
add_step_descriptions(response)
|
526
|
+
)
|
527
|
+
),
|
462
528
|
"finished": finished and code_action is None,
|
463
529
|
}
|
464
530
|
)
|
vision_agent/tools/meta_tools.py
CHANGED
@@ -676,12 +676,13 @@ def use_extra_vision_agent_args(
|
|
676
676
|
for node in red:
|
677
677
|
# seems to always be atomtrailers not call type
|
678
678
|
if node.type == "atomtrailers":
|
679
|
+
if node.name.value == "generate_vision_code":
|
680
|
+
node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
|
681
|
+
|
679
682
|
if (
|
680
683
|
node.name.value == "generate_vision_code"
|
681
684
|
or node.name.value == "edit_vision_code"
|
682
685
|
):
|
683
|
-
node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
|
684
|
-
|
685
686
|
if custom_tool_names is not None:
|
686
687
|
node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
|
687
688
|
cleaned_code = red.dumps().strip()
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=mZpfOGIhPwo96Cr8y9sN6iG9m4npmC_bHeSxtoxtkt8,26217
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
|
8
8
|
vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
|
@@ -17,7 +17,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
|
|
17
17
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
18
18
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
19
19
|
vision_agent/tools/__init__.py,sha256=u-vS5iORB4ccvxoAjbtpvhTALDhXGilcATIq1_eZhKo,2332
|
20
|
-
vision_agent/tools/meta_tools.py,sha256=
|
20
|
+
vision_agent/tools/meta_tools.py,sha256=7XM3VP4EW4Dtg_Hvoov_laOAEaZLdSGOeA-iPb7CimU,28315
|
21
21
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
22
22
|
vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
|
23
23
|
vision_agent/tools/tools.py,sha256=iKsBZxJ5--xWK-mqgZ1jbX_bfGS5HmAp-VRZ69m9yPg,77921
|
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
29
29
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
30
30
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
31
31
|
vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
32
|
+
vision_agent-0.2.168.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
33
|
+
vision_agent-0.2.168.dist-info/METADATA,sha256=R1REF7QKrhmMCsbDPr9NyeCV7oKFLrc7W90u850y-Rg,18034
|
34
|
+
vision_agent-0.2.168.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
35
|
+
vision_agent-0.2.168.dist-info/RECORD,,
|
File without changes
|
File without changes
|