vision-agent 0.2.143__py3-none-any.whl → 0.2.145__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/agent/vision_agent.py +55 -32
- vision_agent/tools/meta_tools.py +1 -0
- vision_agent/tools/tools.py +6 -1
- {vision_agent-0.2.143.dist-info → vision_agent-0.2.145.dist-info}/METADATA +1 -1
- {vision_agent-0.2.143.dist-info → vision_agent-0.2.145.dist-info}/RECORD +7 -7
- {vision_agent-0.2.143.dist-info → vision_agent-0.2.145.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.143.dist-info → vision_agent-0.2.145.dist-info}/WHEEL +0 -0
@@ -229,7 +229,7 @@ class VisionAgent(Agent):
|
|
229
229
|
) as code_interpreter:
|
230
230
|
orig_chat = copy.deepcopy(chat)
|
231
231
|
int_chat = copy.deepcopy(chat)
|
232
|
-
|
232
|
+
last_user_message = chat[-1]
|
233
233
|
media_list = []
|
234
234
|
for chat_i in int_chat:
|
235
235
|
if "media" in chat_i:
|
@@ -278,32 +278,9 @@ class VisionAgent(Agent):
|
|
278
278
|
orig_chat.append({"role": "observation", "content": artifacts_loaded})
|
279
279
|
self.streaming_message({"role": "observation", "content": artifacts_loaded})
|
280
280
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
if user_code_action is not None:
|
285
|
-
user_result, user_obs = run_code_action(
|
286
|
-
user_code_action, code_interpreter, str(remote_artifacts_path)
|
287
|
-
)
|
288
|
-
if self.verbosity >= 1:
|
289
|
-
_LOGGER.info(user_obs)
|
290
|
-
int_chat.append({"role": "observation", "content": user_obs})
|
291
|
-
orig_chat.append(
|
292
|
-
{
|
293
|
-
"role": "observation",
|
294
|
-
"content": user_obs,
|
295
|
-
"execution": user_result,
|
296
|
-
}
|
297
|
-
)
|
298
|
-
self.streaming_message(
|
299
|
-
{
|
300
|
-
"role": "observation",
|
301
|
-
"content": user_obs,
|
302
|
-
"execution": user_result,
|
303
|
-
"finished": True,
|
304
|
-
}
|
305
|
-
)
|
306
|
-
finished = True
|
281
|
+
finished = self.execute_user_code_action(
|
282
|
+
last_user_message, code_interpreter, remote_artifacts_path
|
283
|
+
)
|
307
284
|
|
308
285
|
while not finished and iterations < self.max_iterations:
|
309
286
|
response = run_conversation(self.agent, int_chat)
|
@@ -315,11 +292,6 @@ class VisionAgent(Agent):
|
|
315
292
|
# sometimes it gets stuck in a loop, so we force it to exit
|
316
293
|
if last_response == response:
|
317
294
|
response["let_user_respond"] = True
|
318
|
-
self.streaming_message(
|
319
|
-
{"role": "assistant", "error": "Stuck in loop"}
|
320
|
-
)
|
321
|
-
else:
|
322
|
-
self.streaming_message({"role": "assistant", "content": response})
|
323
295
|
|
324
296
|
finished = response["let_user_respond"]
|
325
297
|
|
@@ -327,6 +299,28 @@ class VisionAgent(Agent):
|
|
327
299
|
response["response"], test_multi_plan, customized_tool_names
|
328
300
|
)
|
329
301
|
|
302
|
+
if last_response == response:
|
303
|
+
self.streaming_message(
|
304
|
+
{
|
305
|
+
"role": "assistant",
|
306
|
+
"content": "{}",
|
307
|
+
"error": {
|
308
|
+
"name": "Error when running conversation agent",
|
309
|
+
"value": "Agent is stuck in conversation loop, exited",
|
310
|
+
"traceback_raw": [],
|
311
|
+
},
|
312
|
+
"finished": finished and code_action is None,
|
313
|
+
}
|
314
|
+
)
|
315
|
+
else:
|
316
|
+
self.streaming_message(
|
317
|
+
{
|
318
|
+
"role": "assistant",
|
319
|
+
"content": response,
|
320
|
+
"finished": finished and code_action is None,
|
321
|
+
}
|
322
|
+
)
|
323
|
+
|
330
324
|
if code_action is not None:
|
331
325
|
result, obs = run_code_action(
|
332
326
|
code_action, code_interpreter, str(remote_artifacts_path)
|
@@ -353,6 +347,7 @@ class VisionAgent(Agent):
|
|
353
347
|
"role": "observation",
|
354
348
|
"content": obs,
|
355
349
|
"execution": result,
|
350
|
+
"finished": finished,
|
356
351
|
}
|
357
352
|
)
|
358
353
|
|
@@ -367,6 +362,34 @@ class VisionAgent(Agent):
|
|
367
362
|
artifacts.save()
|
368
363
|
return orig_chat, artifacts
|
369
364
|
|
365
|
+
def execute_user_code_action(
|
366
|
+
self,
|
367
|
+
last_user_message: Message,
|
368
|
+
code_interpreter: CodeInterpreter,
|
369
|
+
remote_artifacts_path: Path,
|
370
|
+
) -> bool:
|
371
|
+
if last_user_message["role"] != "user":
|
372
|
+
return False
|
373
|
+
user_code_action = parse_execution(
|
374
|
+
cast(str, last_user_message.get("content", "")), False
|
375
|
+
)
|
376
|
+
if user_code_action is not None:
|
377
|
+
user_result, user_obs = run_code_action(
|
378
|
+
user_code_action, code_interpreter, str(remote_artifacts_path)
|
379
|
+
)
|
380
|
+
if self.verbosity >= 1:
|
381
|
+
_LOGGER.info(user_obs)
|
382
|
+
self.streaming_message(
|
383
|
+
{
|
384
|
+
"role": "observation",
|
385
|
+
"content": user_obs,
|
386
|
+
"execution": user_result,
|
387
|
+
"finished": True,
|
388
|
+
}
|
389
|
+
)
|
390
|
+
return True
|
391
|
+
return False
|
392
|
+
|
370
393
|
def streaming_message(self, message: Dict[str, Any]) -> None:
|
371
394
|
if self.callback_message:
|
372
395
|
self.callback_message(message)
|
vision_agent/tools/meta_tools.py
CHANGED
vision_agent/tools/tools.py
CHANGED
@@ -1181,7 +1181,12 @@ def florence2_phrase_grounding(
|
|
1181
1181
|
fine_tuning=FineTuning(job_id=UUID(fine_tune_id)),
|
1182
1182
|
)
|
1183
1183
|
data = data_obj.model_dump(by_alias=True)
|
1184
|
-
detections = send_inference_request(
|
1184
|
+
detections = send_inference_request(
|
1185
|
+
data,
|
1186
|
+
"tools",
|
1187
|
+
v2=False,
|
1188
|
+
metadata_payload={"function_name": "florence2_phrase_grounding"},
|
1189
|
+
)
|
1185
1190
|
else:
|
1186
1191
|
data = {
|
1187
1192
|
"image": image_b64,
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=PEUHqvnHmFL4np_TeFmKMwr5s_dWfdfJz6TF_ogd1dU,2353
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=Fp2uSbroRzGrxEwbb9srGdl0h31awkzDFm2tTfn28GI,17587
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=4bbebV1sKE10vsxcZR-R8P54X2HjLeU9lDt7ylIZAT4,38429
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=YWK4C--YRS1Kuab11Gn-AXBzar1j_GNnTnxi_nnaPRY,14901
|
8
8
|
vision_agent/agent/vision_agent_prompts.py,sha256=e_ASPeRFU1yZsQhCkK_bIBG-eyIWyWXmN64lFk-r7e0,10897
|
@@ -15,10 +15,10 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
|
|
15
15
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
16
16
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
17
17
|
vision_agent/tools/__init__.py,sha256=zUv3aVPN1MXfyQiQi5To4rkQGtG7mxLQ1NjLI3pxM80,2412
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=iHvMeBktWcVi-0DOrSMak1gsZrM_VKJlAq1mAFbBemE,23477
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=5ukuDMxbEH4iKetYR9I7twzsA8ECyP4tVwYXQq54mxI,8020
|
21
|
-
vision_agent/tools/tools.py,sha256=
|
21
|
+
vision_agent/tools/tools.py,sha256=lLGZ-GgORSbFCTaBWI07Om1L8gnO00d8MN600YC7s_o,73773
|
22
22
|
vision_agent/tools/tools_types.py,sha256=JUOZWGW2q-dlJ85CHr9gvo9KQk_rXyjJhi-iwPNn4eM,2397
|
23
23
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.145.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.145.dist-info/METADATA,sha256=Z7gVyhZAtrpb3zTrFl2LZli9hw6f2VW_0p5_MQtpURM,13758
|
32
|
+
vision_agent-0.2.145.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.145.dist-info/RECORD,,
|
File without changes
|
File without changes
|