vision-agent 0.2.166__tar.gz → 0.2.168__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.166 → vision_agent-0.2.168}/PKG-INFO +1 -1
- {vision_agent-0.2.166 → vision_agent-0.2.168}/pyproject.toml +1 -1
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/vision_agent.py +71 -5
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/tools/meta_tools.py +3 -2
- {vision_agent-0.2.166 → vision_agent-0.2.168}/LICENSE +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/README.md +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/vision_agent_planner.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/utils/video.py +0 -0
@@ -85,6 +85,15 @@ def format_agent_message(agent_message: str) -> str:
|
|
85
85
|
return output
|
86
86
|
|
87
87
|
|
88
|
+
def _clean_response(response: str) -> str:
|
89
|
+
# Sometimes the LLM will hallucinate responses to an <execute_python> tag as if it
|
90
|
+
# had already executed the code. This function removes the hallucinated response.
|
91
|
+
if "<execute_python>" in response:
|
92
|
+
end_execute_python = response.find("</execute_python>")
|
93
|
+
response = response[: end_execute_python + len("</execute_python>")]
|
94
|
+
return response
|
95
|
+
|
96
|
+
|
88
97
|
def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
|
89
98
|
chat = copy.deepcopy(chat)
|
90
99
|
|
@@ -114,6 +123,10 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
|
|
114
123
|
message["media"] = chat[-1]["media"]
|
115
124
|
conv_resp = cast(str, orch([message], stream=False))
|
116
125
|
|
126
|
+
# clean the response first, if we are executing code, do not resond or end
|
127
|
+
# conversation before the code has been executed.
|
128
|
+
conv_resp = _clean_response(conv_resp)
|
129
|
+
|
117
130
|
let_user_respond_str = extract_tag(conv_resp, "let_user_respond")
|
118
131
|
let_user_respond = (
|
119
132
|
"true" in let_user_respond_str.lower() if let_user_respond_str else False
|
@@ -197,6 +210,51 @@ def add_step_descriptions(response: Dict[str, Any]) -> Dict[str, Any]:
|
|
197
210
|
return response
|
198
211
|
|
199
212
|
|
213
|
+
def new_format_to_old_format(new_format: Dict[str, Any]) -> Dict[str, Any]:
|
214
|
+
thoughts = new_format["thinking"] if new_format["thinking"] is not None else ""
|
215
|
+
response = new_format["response"] if new_format["response"] is not None else ""
|
216
|
+
if new_format["execute_python"] is not None:
|
217
|
+
response += (
|
218
|
+
f"\n<execute_python>\n{new_format['execute_python']}\n</execute_python>"
|
219
|
+
)
|
220
|
+
return {
|
221
|
+
"thoughts": thoughts,
|
222
|
+
"response": response,
|
223
|
+
"let_user_respond": new_format["let_user_respond"],
|
224
|
+
}
|
225
|
+
|
226
|
+
|
227
|
+
def old_format_to_new_format(old_format_str: str) -> str:
|
228
|
+
try:
|
229
|
+
old_format = json.loads(old_format_str)
|
230
|
+
except json.JSONDecodeError:
|
231
|
+
return old_format_str
|
232
|
+
|
233
|
+
thinking = old_format["thoughts"] if old_format["thoughts"].strip() != "" else None
|
234
|
+
let_user_respond = old_format["let_user_respond"]
|
235
|
+
if "<execute_python>" in old_format["response"]:
|
236
|
+
execute_python = extract_tag(old_format["response"], "execute_python")
|
237
|
+
response = (
|
238
|
+
old_format["response"]
|
239
|
+
.replace(execute_python, "")
|
240
|
+
.replace("<execute_python>", "")
|
241
|
+
.replace("</execute_python>", "")
|
242
|
+
.strip()
|
243
|
+
)
|
244
|
+
else:
|
245
|
+
execute_python = None
|
246
|
+
response = old_format["response"]
|
247
|
+
|
248
|
+
return json.dumps(
|
249
|
+
{
|
250
|
+
"thinking": thinking,
|
251
|
+
"response": response,
|
252
|
+
"execute_python": execute_python,
|
253
|
+
"let_user_respond": let_user_respond,
|
254
|
+
}
|
255
|
+
)
|
256
|
+
|
257
|
+
|
200
258
|
class VisionAgent(Agent):
|
201
259
|
"""Vision Agent is an agent that can chat with the user and call tools or other
|
202
260
|
agents to generate code for it. Vision Agent uses python code to execute actions
|
@@ -361,11 +419,11 @@ class VisionAgent(Agent):
|
|
361
419
|
(
|
362
420
|
{
|
363
421
|
"role": c["role"],
|
364
|
-
"content": c["content"],
|
422
|
+
"content": old_format_to_new_format(c["content"]), # type: ignore
|
365
423
|
"media": c["media"],
|
366
424
|
}
|
367
425
|
if "media" in c
|
368
|
-
else {"role": c["role"], "content": c["content"]}
|
426
|
+
else {"role": c["role"], "content": old_format_to_new_format(c["content"])} # type: ignore
|
369
427
|
)
|
370
428
|
for c in int_chat
|
371
429
|
],
|
@@ -419,13 +477,17 @@ class VisionAgent(Agent):
|
|
419
477
|
int_chat.append(
|
420
478
|
{
|
421
479
|
"role": "assistant",
|
422
|
-
"content": json.dumps(
|
480
|
+
"content": json.dumps(
|
481
|
+
new_format_to_old_format(add_step_descriptions(response))
|
482
|
+
),
|
423
483
|
}
|
424
484
|
)
|
425
485
|
orig_chat.append(
|
426
486
|
{
|
427
487
|
"role": "assistant",
|
428
|
-
"content": json.dumps(
|
488
|
+
"content": json.dumps(
|
489
|
+
new_format_to_old_format(add_step_descriptions(response))
|
490
|
+
),
|
429
491
|
}
|
430
492
|
)
|
431
493
|
|
@@ -458,7 +520,11 @@ class VisionAgent(Agent):
|
|
458
520
|
self.streaming_message(
|
459
521
|
{
|
460
522
|
"role": "assistant",
|
461
|
-
"content": json.dumps(
|
523
|
+
"content": json.dumps(
|
524
|
+
new_format_to_old_format(
|
525
|
+
add_step_descriptions(response)
|
526
|
+
)
|
527
|
+
),
|
462
528
|
"finished": finished and code_action is None,
|
463
529
|
}
|
464
530
|
)
|
@@ -676,12 +676,13 @@ def use_extra_vision_agent_args(
|
|
676
676
|
for node in red:
|
677
677
|
# seems to always be atomtrailers not call type
|
678
678
|
if node.type == "atomtrailers":
|
679
|
+
if node.name.value == "generate_vision_code":
|
680
|
+
node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
|
681
|
+
|
679
682
|
if (
|
680
683
|
node.name.value == "generate_vision_code"
|
681
684
|
or node.name.value == "edit_vision_code"
|
682
685
|
):
|
683
|
-
node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
|
684
|
-
|
685
686
|
if custom_tool_names is not None:
|
686
687
|
node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
|
687
688
|
cleaned_code = red.dumps().strip()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
{vision_agent-0.2.166 → vision_agent-0.2.168}/vision_agent/agent/vision_agent_planner_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|