vision-agent 0.2.157__py3-none-any.whl → 0.2.159__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +52 -3
- vision_agent/tools/meta_tools.py +13 -6
- vision_agent/tools/tools.py +4 -2
- {vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/METADATA +1 -1
- {vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/RECORD +7 -7
- {vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/WHEEL +0 -0
@@ -149,6 +149,32 @@ def execute_user_code_action(
|
|
149
149
|
return user_result, user_obs
|
150
150
|
|
151
151
|
|
152
|
+
def add_step_descriptions(response: Dict[str, str]) -> Dict[str, str]:
|
153
|
+
response = copy.deepcopy(response)
|
154
|
+
if "response" in response:
|
155
|
+
resp_str = response["response"]
|
156
|
+
if "<execute_python>" in resp_str:
|
157
|
+
# only include descriptions for these, the rest will just have executing
|
158
|
+
# code
|
159
|
+
description_map = {
|
160
|
+
"open_code_artifact": "Reading file.",
|
161
|
+
"create_code_artifact": "Creating file.",
|
162
|
+
"edit_code_artifact": "Editing file.",
|
163
|
+
"generate_vision_code": "Generating vision code.",
|
164
|
+
"edit_vision_code": "Editing vision code.",
|
165
|
+
}
|
166
|
+
description = ""
|
167
|
+
for k, v in description_map.items():
|
168
|
+
if k in resp_str:
|
169
|
+
description += v + " "
|
170
|
+
if description == "":
|
171
|
+
description = "Executing code."
|
172
|
+
resp_str = resp_str[resp_str.find("<execute_python>") :]
|
173
|
+
resp_str = description + resp_str
|
174
|
+
response["response"] = resp_str
|
175
|
+
return response
|
176
|
+
|
177
|
+
|
152
178
|
class VisionAgent(Agent):
|
153
179
|
"""Vision Agent is an agent that can chat with the user and call tools or other
|
154
180
|
agents to generate code for it. Vision Agent uses python code to execute actions
|
@@ -335,8 +361,18 @@ class VisionAgent(Agent):
|
|
335
361
|
response = run_conversation(self.agent, int_chat)
|
336
362
|
if self.verbosity >= 1:
|
337
363
|
_LOGGER.info(response)
|
338
|
-
int_chat.append(
|
339
|
-
|
364
|
+
int_chat.append(
|
365
|
+
{
|
366
|
+
"role": "assistant",
|
367
|
+
"content": str(add_step_descriptions(response)),
|
368
|
+
}
|
369
|
+
)
|
370
|
+
orig_chat.append(
|
371
|
+
{
|
372
|
+
"role": "assistant",
|
373
|
+
"content": str(add_step_descriptions(response)),
|
374
|
+
}
|
375
|
+
)
|
340
376
|
|
341
377
|
# sometimes it gets stuck in a loop, so we force it to exit
|
342
378
|
if last_response == response:
|
@@ -382,8 +418,18 @@ class VisionAgent(Agent):
|
|
382
418
|
|
383
419
|
obs_chat_elt: Message = {"role": "observation", "content": obs}
|
384
420
|
if media_obs and result.success:
|
421
|
+
# for view_media_artifact, we need to ensure the media is loaded
|
422
|
+
# locally so the conversation agent can actually see it
|
423
|
+
code_interpreter.download_file(
|
424
|
+
str(remote_artifacts_path.name),
|
425
|
+
str(self.local_artifacts_path),
|
426
|
+
)
|
427
|
+
artifacts.load(
|
428
|
+
self.local_artifacts_path,
|
429
|
+
Path(self.local_artifacts_path).parent,
|
430
|
+
)
|
385
431
|
obs_chat_elt["media"] = [
|
386
|
-
Path(
|
432
|
+
Path(self.local_artifacts_path).parent / media_ob
|
387
433
|
for media_ob in media_obs
|
388
434
|
]
|
389
435
|
|
@@ -407,6 +453,9 @@ class VisionAgent(Agent):
|
|
407
453
|
code_interpreter.download_file(
|
408
454
|
str(remote_artifacts_path.name), str(self.local_artifacts_path)
|
409
455
|
)
|
456
|
+
artifacts.load(
|
457
|
+
self.local_artifacts_path, Path(self.local_artifacts_path).parent
|
458
|
+
)
|
410
459
|
return orig_chat, artifacts
|
411
460
|
|
412
461
|
def streaming_message(self, message: Dict[str, Any]) -> None:
|
vision_agent/tools/meta_tools.py
CHANGED
@@ -92,19 +92,26 @@ class Artifacts:
|
|
92
92
|
|
93
93
|
self.code_sandbox_runtime = None
|
94
94
|
|
95
|
-
def load(
|
96
|
-
|
97
|
-
|
95
|
+
def load(
|
96
|
+
self,
|
97
|
+
artifacts_path: Union[str, Path],
|
98
|
+
load_to: Optional[Union[str, Path]] = None,
|
99
|
+
) -> None:
|
100
|
+
"""Loads are artifacts into the load_to path. If load_to is None, it will load
|
101
|
+
into remote_save_path. If an artifact value is None it will skip loading it.
|
98
102
|
|
99
103
|
Parameters:
|
100
|
-
|
104
|
+
artifacts_path (Union[str, Path]): The file path to load the artifacts from
|
101
105
|
"""
|
102
|
-
with open(
|
106
|
+
with open(artifacts_path, "rb") as f:
|
103
107
|
self.artifacts = pkl.load(f)
|
108
|
+
|
109
|
+
load_to = self.remote_save_path.parent if load_to is None else Path(load_to)
|
110
|
+
|
104
111
|
for k, v in self.artifacts.items():
|
105
112
|
if v is not None:
|
106
113
|
mode = "w" if isinstance(v, str) else "wb"
|
107
|
-
with open(
|
114
|
+
with open(load_to / k, mode) as f:
|
108
115
|
f.write(v)
|
109
116
|
|
110
117
|
def show(self, uploaded_file_path: Optional[Union[str, Path]] = None) -> str:
|
vision_agent/tools/tools.py
CHANGED
@@ -700,6 +700,7 @@ def countgd_counting(
|
|
700
700
|
{'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58},
|
701
701
|
]
|
702
702
|
"""
|
703
|
+
image_size = image.shape[:2]
|
703
704
|
buffer_bytes = numpy_to_bytes(image)
|
704
705
|
files = [("image", buffer_bytes)]
|
705
706
|
prompt = prompt.replace(", ", " .")
|
@@ -712,7 +713,7 @@ def countgd_counting(
|
|
712
713
|
bboxes_formatted = [
|
713
714
|
ODResponseData(
|
714
715
|
label=bbox["label"],
|
715
|
-
bbox=
|
716
|
+
bbox=normalize_bbox(bbox["bounding_box"], image_size),
|
716
717
|
score=round(bbox["score"], 2),
|
717
718
|
)
|
718
719
|
for bbox in bboxes_per_frame
|
@@ -757,6 +758,7 @@ def countgd_example_based_counting(
|
|
757
758
|
{'score': 0.98, 'label': 'object', 'bounding_box': [0.44, 0.24, 0.49, 0.58},
|
758
759
|
]
|
759
760
|
"""
|
761
|
+
image_size = image.shape[:2]
|
760
762
|
buffer_bytes = numpy_to_bytes(image)
|
761
763
|
files = [("image", buffer_bytes)]
|
762
764
|
visual_prompts = [
|
@@ -771,7 +773,7 @@ def countgd_example_based_counting(
|
|
771
773
|
bboxes_formatted = [
|
772
774
|
ODResponseData(
|
773
775
|
label=bbox["label"],
|
774
|
-
bbox=
|
776
|
+
bbox=normalize_bbox(bbox["bounding_box"], image_size),
|
775
777
|
score=round(bbox["score"], 2),
|
776
778
|
)
|
777
779
|
for bbox in bboxes_per_frame
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=eIpLz2NunEqEsBBrECJaD34-2uY0bsFNnW-XKfqqohs,2518
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=etqyLMZHJJz_A6tkonoYGlYvFvEW0uUHs5D1gsYwkSs,20412
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=2ZoGikn2nakGDfs20XRshZjQUyvbw6l47UhExJAYkqI,38515
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=BmbTMhth4v1qLexuoSeyo47QQ0kPQvL1pLbCJHMsWDw,18910
|
8
8
|
vision_agent/agent/vision_agent_prompts.py,sha256=LZ9Bnx7ZFkqbNOMqwfdiWZU4niND9Z1ArcFHNSn_jzA,11187
|
@@ -15,10 +15,10 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
|
|
15
15
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
16
16
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
17
17
|
vision_agent/tools/__init__.py,sha256=PLVbfTMjKxQlHIRWnq9b785W9a52AXQS_tOa0tkQ0ZY,2420
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=VKvrGgd_uvB8nEGTfouz8ij9MKJJh9G5bOg4mVMSrqY,25418
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
|
21
|
-
vision_agent/tools/tools.py,sha256=
|
21
|
+
vision_agent/tools/tools.py,sha256=vS1yCk3Fza9eYOTHPFwwroo_ULdw2ztMQMb81x1U5f8,78524
|
22
22
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
23
23
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
27
27
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.159.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.159.dist-info/METADATA,sha256=iKyw9w-VOAaZ2EqPJmRozZ8J8QP0DR87gog3HeJ3mcc,17753
|
32
|
+
vision_agent-0.2.159.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.159.dist-info/RECORD,,
|
File without changes
|
File without changes
|