vision-agent 0.2.167__py3-none-any.whl → 0.2.169__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +56 -5
- {vision_agent-0.2.167.dist-info → vision_agent-0.2.169.dist-info}/METADATA +1 -1
- {vision_agent-0.2.167.dist-info → vision_agent-0.2.169.dist-info}/RECORD +5 -5
- {vision_agent-0.2.167.dist-info → vision_agent-0.2.169.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.167.dist-info → vision_agent-0.2.169.dist-info}/WHEEL +0 -0
@@ -210,6 +210,51 @@ def add_step_descriptions(response: Dict[str, Any]) -> Dict[str, Any]:
|
|
210
210
|
return response
|
211
211
|
|
212
212
|
|
213
|
+
def new_format_to_old_format(new_format: Dict[str, Any]) -> Dict[str, Any]:
|
214
|
+
thoughts = new_format["thinking"] if new_format["thinking"] is not None else ""
|
215
|
+
response = new_format["response"] if new_format["response"] is not None else ""
|
216
|
+
if new_format["execute_python"] is not None:
|
217
|
+
response += (
|
218
|
+
f"\n<execute_python>\n{new_format['execute_python']}\n</execute_python>"
|
219
|
+
)
|
220
|
+
return {
|
221
|
+
"thoughts": thoughts,
|
222
|
+
"response": response,
|
223
|
+
"let_user_respond": new_format["let_user_respond"],
|
224
|
+
}
|
225
|
+
|
226
|
+
|
227
|
+
def old_format_to_new_format(old_format_str: str) -> str:
|
228
|
+
try:
|
229
|
+
old_format = json.loads(old_format_str)
|
230
|
+
except json.JSONDecodeError:
|
231
|
+
return old_format_str
|
232
|
+
|
233
|
+
thinking = old_format["thoughts"] if old_format["thoughts"].strip() != "" else None
|
234
|
+
let_user_respond = old_format["let_user_respond"]
|
235
|
+
if "<execute_python>" in old_format["response"]:
|
236
|
+
execute_python = extract_tag(old_format["response"], "execute_python")
|
237
|
+
response = (
|
238
|
+
old_format["response"]
|
239
|
+
.replace(execute_python, "")
|
240
|
+
.replace("<execute_python>", "")
|
241
|
+
.replace("</execute_python>", "")
|
242
|
+
.strip()
|
243
|
+
)
|
244
|
+
else:
|
245
|
+
execute_python = None
|
246
|
+
response = old_format["response"]
|
247
|
+
|
248
|
+
return json.dumps(
|
249
|
+
{
|
250
|
+
"thinking": thinking,
|
251
|
+
"response": response,
|
252
|
+
"execute_python": execute_python,
|
253
|
+
"let_user_respond": let_user_respond,
|
254
|
+
}
|
255
|
+
)
|
256
|
+
|
257
|
+
|
213
258
|
class VisionAgent(Agent):
|
214
259
|
"""Vision Agent is an agent that can chat with the user and call tools or other
|
215
260
|
agents to generate code for it. Vision Agent uses python code to execute actions
|
@@ -374,11 +419,11 @@ class VisionAgent(Agent):
|
|
374
419
|
(
|
375
420
|
{
|
376
421
|
"role": c["role"],
|
377
|
-
"content": c["content"],
|
422
|
+
"content": old_format_to_new_format(c["content"]), # type: ignore
|
378
423
|
"media": c["media"],
|
379
424
|
}
|
380
425
|
if "media" in c
|
381
|
-
else {"role": c["role"], "content": c["content"]}
|
426
|
+
else {"role": c["role"], "content": old_format_to_new_format(c["content"])} # type: ignore
|
382
427
|
)
|
383
428
|
for c in int_chat
|
384
429
|
],
|
@@ -432,13 +477,17 @@ class VisionAgent(Agent):
|
|
432
477
|
int_chat.append(
|
433
478
|
{
|
434
479
|
"role": "assistant",
|
435
|
-
"content": json.dumps(
|
480
|
+
"content": json.dumps(
|
481
|
+
new_format_to_old_format(add_step_descriptions(response))
|
482
|
+
),
|
436
483
|
}
|
437
484
|
)
|
438
485
|
orig_chat.append(
|
439
486
|
{
|
440
487
|
"role": "assistant",
|
441
|
-
"content": json.dumps(
|
488
|
+
"content": json.dumps(
|
489
|
+
new_format_to_old_format(add_step_descriptions(response))
|
490
|
+
),
|
442
491
|
}
|
443
492
|
)
|
444
493
|
|
@@ -471,7 +520,9 @@ class VisionAgent(Agent):
|
|
471
520
|
self.streaming_message(
|
472
521
|
{
|
473
522
|
"role": "assistant",
|
474
|
-
"content":
|
523
|
+
"content": new_format_to_old_format(
|
524
|
+
add_step_descriptions(response)
|
525
|
+
),
|
475
526
|
"finished": finished and code_action is None,
|
476
527
|
}
|
477
528
|
)
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=OBwj1RuJDXAFmnN6RRiG8mxZC6Nqfa7LddymvbI958w,26135
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
|
8
8
|
vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
|
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
29
29
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
30
30
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
31
31
|
vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
32
|
+
vision_agent-0.2.169.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
33
|
+
vision_agent-0.2.169.dist-info/METADATA,sha256=VkqOSaDlrvOBxu6m6CsC_OjRPOjxWHbIEgIxKvFAjYc,18034
|
34
|
+
vision_agent-0.2.169.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
35
|
+
vision_agent-0.2.169.dist-info/RECORD,,
|
File without changes
|
File without changes
|