vision-agent 0.2.158__tar.gz → 0.2.159__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.158 → vision_agent-0.2.159}/PKG-INFO +1 -1
- {vision_agent-0.2.158 → vision_agent-0.2.159}/pyproject.toml +1 -1
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/vision_agent.py +51 -4
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/meta_tools.py +13 -6
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/tools.py +4 -2
- {vision_agent-0.2.158 → vision_agent-0.2.159}/LICENSE +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/README.md +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/video.py +0 -0
@@ -149,6 +149,32 @@ def execute_user_code_action(
|
|
149
149
|
return user_result, user_obs
|
150
150
|
|
151
151
|
|
152
|
+
def add_step_descriptions(response: Dict[str, str]) -> Dict[str, str]:
|
153
|
+
response = copy.deepcopy(response)
|
154
|
+
if "response" in response:
|
155
|
+
resp_str = response["response"]
|
156
|
+
if "<execute_python>" in resp_str:
|
157
|
+
# only include descriptions for these, the rest will just have executing
|
158
|
+
# code
|
159
|
+
description_map = {
|
160
|
+
"open_code_artifact": "Reading file.",
|
161
|
+
"create_code_artifact": "Creating file.",
|
162
|
+
"edit_code_artifact": "Editing file.",
|
163
|
+
"generate_vision_code": "Generating vision code.",
|
164
|
+
"edit_vision_code": "Editing vision code.",
|
165
|
+
}
|
166
|
+
description = ""
|
167
|
+
for k, v in description_map.items():
|
168
|
+
if k in resp_str:
|
169
|
+
description += v + " "
|
170
|
+
if description == "":
|
171
|
+
description = "Executing code."
|
172
|
+
resp_str = resp_str[resp_str.find("<execute_python>") :]
|
173
|
+
resp_str = description + resp_str
|
174
|
+
response["response"] = resp_str
|
175
|
+
return response
|
176
|
+
|
177
|
+
|
152
178
|
class VisionAgent(Agent):
|
153
179
|
"""Vision Agent is an agent that can chat with the user and call tools or other
|
154
180
|
agents to generate code for it. Vision Agent uses python code to execute actions
|
@@ -335,8 +361,18 @@ class VisionAgent(Agent):
|
|
335
361
|
response = run_conversation(self.agent, int_chat)
|
336
362
|
if self.verbosity >= 1:
|
337
363
|
_LOGGER.info(response)
|
338
|
-
int_chat.append(
|
339
|
-
|
364
|
+
int_chat.append(
|
365
|
+
{
|
366
|
+
"role": "assistant",
|
367
|
+
"content": str(add_step_descriptions(response)),
|
368
|
+
}
|
369
|
+
)
|
370
|
+
orig_chat.append(
|
371
|
+
{
|
372
|
+
"role": "assistant",
|
373
|
+
"content": str(add_step_descriptions(response)),
|
374
|
+
}
|
375
|
+
)
|
340
376
|
|
341
377
|
# sometimes it gets stuck in a loop, so we force it to exit
|
342
378
|
if last_response == response:
|
@@ -382,6 +418,16 @@ class VisionAgent(Agent):
|
|
382
418
|
|
383
419
|
obs_chat_elt: Message = {"role": "observation", "content": obs}
|
384
420
|
if media_obs and result.success:
|
421
|
+
# for view_media_artifact, we need to ensure the media is loaded
|
422
|
+
# locally so the conversation agent can actually see it
|
423
|
+
code_interpreter.download_file(
|
424
|
+
str(remote_artifacts_path.name),
|
425
|
+
str(self.local_artifacts_path),
|
426
|
+
)
|
427
|
+
artifacts.load(
|
428
|
+
self.local_artifacts_path,
|
429
|
+
Path(self.local_artifacts_path).parent,
|
430
|
+
)
|
385
431
|
obs_chat_elt["media"] = [
|
386
432
|
Path(self.local_artifacts_path).parent / media_ob
|
387
433
|
for media_ob in media_obs
|
@@ -407,8 +453,9 @@ class VisionAgent(Agent):
|
|
407
453
|
code_interpreter.download_file(
|
408
454
|
str(remote_artifacts_path.name), str(self.local_artifacts_path)
|
409
455
|
)
|
410
|
-
artifacts.load(
|
411
|
-
|
456
|
+
artifacts.load(
|
457
|
+
self.local_artifacts_path, Path(self.local_artifacts_path).parent
|
458
|
+
)
|
412
459
|
return orig_chat, artifacts
|
413
460
|
|
414
461
|
def streaming_message(self, message: Dict[str, Any]) -> None:
|
@@ -92,19 +92,26 @@ class Artifacts:
|
|
92
92
|
|
93
93
|
self.code_sandbox_runtime = None
|
94
94
|
|
95
|
-
def load(
|
96
|
-
|
97
|
-
|
95
|
+
def load(
|
96
|
+
self,
|
97
|
+
artifacts_path: Union[str, Path],
|
98
|
+
load_to: Optional[Union[str, Path]] = None,
|
99
|
+
) -> None:
|
100
|
+
"""Loads are artifacts into the load_to path. If load_to is None, it will load
|
101
|
+
into remote_save_path. If an artifact value is None it will skip loading it.
|
98
102
|
|
99
103
|
Parameters:
|
100
|
-
|
104
|
+
artifacts_path (Union[str, Path]): The file path to load the artifacts from
|
101
105
|
"""
|
102
|
-
with open(
|
106
|
+
with open(artifacts_path, "rb") as f:
|
103
107
|
self.artifacts = pkl.load(f)
|
108
|
+
|
109
|
+
load_to = self.remote_save_path.parent if load_to is None else Path(load_to)
|
110
|
+
|
104
111
|
for k, v in self.artifacts.items():
|
105
112
|
if v is not None:
|
106
113
|
mode = "w" if isinstance(v, str) else "wb"
|
107
|
-
with open(
|
114
|
+
with open(load_to / k, mode) as f:
|
108
115
|
f.write(v)
|
109
116
|
|
110
117
|
def show(self, uploaded_file_path: Optional[Union[str, Path]] = None) -> str:
|
@@ -700,6 +700,7 @@ def countgd_counting(
|
|
700
700
|
{'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58},
|
701
701
|
]
|
702
702
|
"""
|
703
|
+
image_size = image.shape[:2]
|
703
704
|
buffer_bytes = numpy_to_bytes(image)
|
704
705
|
files = [("image", buffer_bytes)]
|
705
706
|
prompt = prompt.replace(", ", " .")
|
@@ -712,7 +713,7 @@ def countgd_counting(
|
|
712
713
|
bboxes_formatted = [
|
713
714
|
ODResponseData(
|
714
715
|
label=bbox["label"],
|
715
|
-
bbox=
|
716
|
+
bbox=normalize_bbox(bbox["bounding_box"], image_size),
|
716
717
|
score=round(bbox["score"], 2),
|
717
718
|
)
|
718
719
|
for bbox in bboxes_per_frame
|
@@ -757,6 +758,7 @@ def countgd_example_based_counting(
|
|
757
758
|
{'score': 0.98, 'label': 'object', 'bounding_box': [0.44, 0.24, 0.49, 0.58},
|
758
759
|
]
|
759
760
|
"""
|
761
|
+
image_size = image.shape[:2]
|
760
762
|
buffer_bytes = numpy_to_bytes(image)
|
761
763
|
files = [("image", buffer_bytes)]
|
762
764
|
visual_prompts = [
|
@@ -771,7 +773,7 @@ def countgd_example_based_counting(
|
|
771
773
|
bboxes_formatted = [
|
772
774
|
ODResponseData(
|
773
775
|
label=bbox["label"],
|
774
|
-
bbox=
|
776
|
+
bbox=normalize_bbox(bbox["bounding_box"], image_size),
|
775
777
|
score=round(bbox["score"], 2),
|
776
778
|
)
|
777
779
|
for bbox in bboxes_per_frame
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|