vision-agent 0.2.166__py3-none-any.whl → 0.2.168__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +71 -5
- vision_agent/tools/meta_tools.py +3 -2
- {vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/METADATA +1 -1
- {vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/RECORD +6 -6
- {vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/WHEEL +0 -0
@@ -85,6 +85,15 @@ def format_agent_message(agent_message: str) -> str:
|
|
85
85
|
return output
|
86
86
|
|
87
87
|
|
88
|
+
def _clean_response(response: str) -> str:
|
89
|
+
# Sometimes the LLM will hallucinate responses to an <execute_python> tag as if it
|
90
|
+
# had already executed the code. This function removes the hallucinated response.
|
91
|
+
if "<execute_python>" in response:
|
92
|
+
end_execute_python = response.find("</execute_python>")
|
93
|
+
response = response[: end_execute_python + len("</execute_python>")]
|
94
|
+
return response
|
95
|
+
|
96
|
+
|
88
97
|
def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
|
89
98
|
chat = copy.deepcopy(chat)
|
90
99
|
|
@@ -114,6 +123,10 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
|
|
114
123
|
message["media"] = chat[-1]["media"]
|
115
124
|
conv_resp = cast(str, orch([message], stream=False))
|
116
125
|
|
126
|
+
# clean the response first, if we are executing code, do not resond or end
|
127
|
+
# conversation before the code has been executed.
|
128
|
+
conv_resp = _clean_response(conv_resp)
|
129
|
+
|
117
130
|
let_user_respond_str = extract_tag(conv_resp, "let_user_respond")
|
118
131
|
let_user_respond = (
|
119
132
|
"true" in let_user_respond_str.lower() if let_user_respond_str else False
|
@@ -197,6 +210,51 @@ def add_step_descriptions(response: Dict[str, Any]) -> Dict[str, Any]:
|
|
197
210
|
return response
|
198
211
|
|
199
212
|
|
213
|
+
def new_format_to_old_format(new_format: Dict[str, Any]) -> Dict[str, Any]:
|
214
|
+
thoughts = new_format["thinking"] if new_format["thinking"] is not None else ""
|
215
|
+
response = new_format["response"] if new_format["response"] is not None else ""
|
216
|
+
if new_format["execute_python"] is not None:
|
217
|
+
response += (
|
218
|
+
f"\n<execute_python>\n{new_format['execute_python']}\n</execute_python>"
|
219
|
+
)
|
220
|
+
return {
|
221
|
+
"thoughts": thoughts,
|
222
|
+
"response": response,
|
223
|
+
"let_user_respond": new_format["let_user_respond"],
|
224
|
+
}
|
225
|
+
|
226
|
+
|
227
|
+
def old_format_to_new_format(old_format_str: str) -> str:
|
228
|
+
try:
|
229
|
+
old_format = json.loads(old_format_str)
|
230
|
+
except json.JSONDecodeError:
|
231
|
+
return old_format_str
|
232
|
+
|
233
|
+
thinking = old_format["thoughts"] if old_format["thoughts"].strip() != "" else None
|
234
|
+
let_user_respond = old_format["let_user_respond"]
|
235
|
+
if "<execute_python>" in old_format["response"]:
|
236
|
+
execute_python = extract_tag(old_format["response"], "execute_python")
|
237
|
+
response = (
|
238
|
+
old_format["response"]
|
239
|
+
.replace(execute_python, "")
|
240
|
+
.replace("<execute_python>", "")
|
241
|
+
.replace("</execute_python>", "")
|
242
|
+
.strip()
|
243
|
+
)
|
244
|
+
else:
|
245
|
+
execute_python = None
|
246
|
+
response = old_format["response"]
|
247
|
+
|
248
|
+
return json.dumps(
|
249
|
+
{
|
250
|
+
"thinking": thinking,
|
251
|
+
"response": response,
|
252
|
+
"execute_python": execute_python,
|
253
|
+
"let_user_respond": let_user_respond,
|
254
|
+
}
|
255
|
+
)
|
256
|
+
|
257
|
+
|
200
258
|
class VisionAgent(Agent):
|
201
259
|
"""Vision Agent is an agent that can chat with the user and call tools or other
|
202
260
|
agents to generate code for it. Vision Agent uses python code to execute actions
|
@@ -361,11 +419,11 @@ class VisionAgent(Agent):
|
|
361
419
|
(
|
362
420
|
{
|
363
421
|
"role": c["role"],
|
364
|
-
"content": c["content"],
|
422
|
+
"content": old_format_to_new_format(c["content"]), # type: ignore
|
365
423
|
"media": c["media"],
|
366
424
|
}
|
367
425
|
if "media" in c
|
368
|
-
else {"role": c["role"], "content": c["content"]}
|
426
|
+
else {"role": c["role"], "content": old_format_to_new_format(c["content"])} # type: ignore
|
369
427
|
)
|
370
428
|
for c in int_chat
|
371
429
|
],
|
@@ -419,13 +477,17 @@ class VisionAgent(Agent):
|
|
419
477
|
int_chat.append(
|
420
478
|
{
|
421
479
|
"role": "assistant",
|
422
|
-
"content": json.dumps(
|
480
|
+
"content": json.dumps(
|
481
|
+
new_format_to_old_format(add_step_descriptions(response))
|
482
|
+
),
|
423
483
|
}
|
424
484
|
)
|
425
485
|
orig_chat.append(
|
426
486
|
{
|
427
487
|
"role": "assistant",
|
428
|
-
"content": json.dumps(
|
488
|
+
"content": json.dumps(
|
489
|
+
new_format_to_old_format(add_step_descriptions(response))
|
490
|
+
),
|
429
491
|
}
|
430
492
|
)
|
431
493
|
|
@@ -458,7 +520,11 @@ class VisionAgent(Agent):
|
|
458
520
|
self.streaming_message(
|
459
521
|
{
|
460
522
|
"role": "assistant",
|
461
|
-
"content": json.dumps(
|
523
|
+
"content": json.dumps(
|
524
|
+
new_format_to_old_format(
|
525
|
+
add_step_descriptions(response)
|
526
|
+
)
|
527
|
+
),
|
462
528
|
"finished": finished and code_action is None,
|
463
529
|
}
|
464
530
|
)
|
vision_agent/tools/meta_tools.py
CHANGED
@@ -676,12 +676,13 @@ def use_extra_vision_agent_args(
|
|
676
676
|
for node in red:
|
677
677
|
# seems to always be atomtrailers not call type
|
678
678
|
if node.type == "atomtrailers":
|
679
|
+
if node.name.value == "generate_vision_code":
|
680
|
+
node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
|
681
|
+
|
679
682
|
if (
|
680
683
|
node.name.value == "generate_vision_code"
|
681
684
|
or node.name.value == "edit_vision_code"
|
682
685
|
):
|
683
|
-
node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
|
684
|
-
|
685
686
|
if custom_tool_names is not None:
|
686
687
|
node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
|
687
688
|
cleaned_code = red.dumps().strip()
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=mZpfOGIhPwo96Cr8y9sN6iG9m4npmC_bHeSxtoxtkt8,26217
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
|
8
8
|
vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
|
@@ -17,7 +17,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
|
|
17
17
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
18
18
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
19
19
|
vision_agent/tools/__init__.py,sha256=u-vS5iORB4ccvxoAjbtpvhTALDhXGilcATIq1_eZhKo,2332
|
20
|
-
vision_agent/tools/meta_tools.py,sha256=
|
20
|
+
vision_agent/tools/meta_tools.py,sha256=7XM3VP4EW4Dtg_Hvoov_laOAEaZLdSGOeA-iPb7CimU,28315
|
21
21
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
22
22
|
vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
|
23
23
|
vision_agent/tools/tools.py,sha256=iKsBZxJ5--xWK-mqgZ1jbX_bfGS5HmAp-VRZ69m9yPg,77921
|
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
29
29
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
30
30
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
31
31
|
vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
32
|
+
vision_agent-0.2.168.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
33
|
+
vision_agent-0.2.168.dist-info/METADATA,sha256=R1REF7QKrhmMCsbDPr9NyeCV7oKFLrc7W90u850y-Rg,18034
|
34
|
+
vision_agent-0.2.168.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
35
|
+
vision_agent-0.2.168.dist-info/RECORD,,
|
File without changes
|
File without changes
|