vision-agent 0.2.158__tar.gz → 0.2.159__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. {vision_agent-0.2.158 → vision_agent-0.2.159}/PKG-INFO +1 -1
  2. {vision_agent-0.2.158 → vision_agent-0.2.159}/pyproject.toml +1 -1
  3. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/vision_agent.py +51 -4
  4. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/meta_tools.py +13 -6
  5. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/tools.py +4 -2
  6. {vision_agent-0.2.158 → vision_agent-0.2.159}/LICENSE +0 -0
  7. {vision_agent-0.2.158 → vision_agent-0.2.159}/README.md +0 -0
  8. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/__init__.py +0 -0
  9. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/__init__.py +0 -0
  10. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/agent.py +0 -0
  11. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/agent_utils.py +0 -0
  12. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/vision_agent_coder.py +0 -0
  13. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  14. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/agent/vision_agent_prompts.py +0 -0
  15. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/clients/__init__.py +0 -0
  16. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/clients/http.py +0 -0
  17. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/clients/landing_public_api.py +0 -0
  18. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/fonts/__init__.py +0 -0
  19. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  20. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/lmm/__init__.py +0 -0
  21. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/lmm/lmm.py +0 -0
  22. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/lmm/types.py +0 -0
  23. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/__init__.py +0 -0
  24. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/prompts.py +0 -0
  25. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/tool_utils.py +0 -0
  26. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/tools/tools_types.py +0 -0
  27. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/__init__.py +0 -0
  28. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/exceptions.py +0 -0
  29. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/execute.py +0 -0
  30. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/image_utils.py +0 -0
  31. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/sim.py +0 -0
  32. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/type_defs.py +0 -0
  33. {vision_agent-0.2.158 → vision_agent-0.2.159}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.158
3
+ Version: 0.2.159
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.158"
7
+ version = "0.2.159"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -149,6 +149,32 @@ def execute_user_code_action(
149
149
  return user_result, user_obs
150
150
 
151
151
 
152
+ def add_step_descriptions(response: Dict[str, str]) -> Dict[str, str]:
153
+ response = copy.deepcopy(response)
154
+ if "response" in response:
155
+ resp_str = response["response"]
156
+ if "<execute_python>" in resp_str:
157
+ # only include descriptions for these, the rest will just have executing
158
+ # code
159
+ description_map = {
160
+ "open_code_artifact": "Reading file.",
161
+ "create_code_artifact": "Creating file.",
162
+ "edit_code_artifact": "Editing file.",
163
+ "generate_vision_code": "Generating vision code.",
164
+ "edit_vision_code": "Editing vision code.",
165
+ }
166
+ description = ""
167
+ for k, v in description_map.items():
168
+ if k in resp_str:
169
+ description += v + " "
170
+ if description == "":
171
+ description = "Executing code."
172
+ resp_str = resp_str[resp_str.find("<execute_python>") :]
173
+ resp_str = description + resp_str
174
+ response["response"] = resp_str
175
+ return response
176
+
177
+
152
178
  class VisionAgent(Agent):
153
179
  """Vision Agent is an agent that can chat with the user and call tools or other
154
180
  agents to generate code for it. Vision Agent uses python code to execute actions
@@ -335,8 +361,18 @@ class VisionAgent(Agent):
335
361
  response = run_conversation(self.agent, int_chat)
336
362
  if self.verbosity >= 1:
337
363
  _LOGGER.info(response)
338
- int_chat.append({"role": "assistant", "content": str(response)})
339
- orig_chat.append({"role": "assistant", "content": str(response)})
364
+ int_chat.append(
365
+ {
366
+ "role": "assistant",
367
+ "content": str(add_step_descriptions(response)),
368
+ }
369
+ )
370
+ orig_chat.append(
371
+ {
372
+ "role": "assistant",
373
+ "content": str(add_step_descriptions(response)),
374
+ }
375
+ )
340
376
 
341
377
  # sometimes it gets stuck in a loop, so we force it to exit
342
378
  if last_response == response:
@@ -382,6 +418,16 @@ class VisionAgent(Agent):
382
418
 
383
419
  obs_chat_elt: Message = {"role": "observation", "content": obs}
384
420
  if media_obs and result.success:
421
+ # for view_media_artifact, we need to ensure the media is loaded
422
+ # locally so the conversation agent can actually see it
423
+ code_interpreter.download_file(
424
+ str(remote_artifacts_path.name),
425
+ str(self.local_artifacts_path),
426
+ )
427
+ artifacts.load(
428
+ self.local_artifacts_path,
429
+ Path(self.local_artifacts_path).parent,
430
+ )
385
431
  obs_chat_elt["media"] = [
386
432
  Path(self.local_artifacts_path).parent / media_ob
387
433
  for media_ob in media_obs
@@ -407,8 +453,9 @@ class VisionAgent(Agent):
407
453
  code_interpreter.download_file(
408
454
  str(remote_artifacts_path.name), str(self.local_artifacts_path)
409
455
  )
410
- artifacts.load(self.local_artifacts_path)
411
- artifacts.save()
456
+ artifacts.load(
457
+ self.local_artifacts_path, Path(self.local_artifacts_path).parent
458
+ )
412
459
  return orig_chat, artifacts
413
460
 
414
461
  def streaming_message(self, message: Dict[str, Any]) -> None:
@@ -92,19 +92,26 @@ class Artifacts:
92
92
 
93
93
  self.code_sandbox_runtime = None
94
94
 
95
- def load(self, file_path: Union[str, Path]) -> None:
96
- """Loads are artifacts into the remote environment. If an artifact value is None
97
- it will skip loading it.
95
+ def load(
96
+ self,
97
+ artifacts_path: Union[str, Path],
98
+ load_to: Optional[Union[str, Path]] = None,
99
+ ) -> None:
100
+ """Loads are artifacts into the load_to path. If load_to is None, it will load
101
+ into remote_save_path. If an artifact value is None it will skip loading it.
98
102
 
99
103
  Parameters:
100
- file_path (Union[str, Path]): The file path to load the artifacts from
104
+ artifacts_path (Union[str, Path]): The file path to load the artifacts from
101
105
  """
102
- with open(file_path, "rb") as f:
106
+ with open(artifacts_path, "rb") as f:
103
107
  self.artifacts = pkl.load(f)
108
+
109
+ load_to = self.remote_save_path.parent if load_to is None else Path(load_to)
110
+
104
111
  for k, v in self.artifacts.items():
105
112
  if v is not None:
106
113
  mode = "w" if isinstance(v, str) else "wb"
107
- with open(self.remote_save_path.parent / k, mode) as f:
114
+ with open(load_to / k, mode) as f:
108
115
  f.write(v)
109
116
 
110
117
  def show(self, uploaded_file_path: Optional[Union[str, Path]] = None) -> str:
@@ -700,6 +700,7 @@ def countgd_counting(
700
700
  {'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58},
701
701
  ]
702
702
  """
703
+ image_size = image.shape[:2]
703
704
  buffer_bytes = numpy_to_bytes(image)
704
705
  files = [("image", buffer_bytes)]
705
706
  prompt = prompt.replace(", ", " .")
@@ -712,7 +713,7 @@ def countgd_counting(
712
713
  bboxes_formatted = [
713
714
  ODResponseData(
714
715
  label=bbox["label"],
715
- bbox=list(map(lambda x: round(x, 2), bbox["bounding_box"])),
716
+ bbox=normalize_bbox(bbox["bounding_box"], image_size),
716
717
  score=round(bbox["score"], 2),
717
718
  )
718
719
  for bbox in bboxes_per_frame
@@ -757,6 +758,7 @@ def countgd_example_based_counting(
757
758
  {'score': 0.98, 'label': 'object', 'bounding_box': [0.44, 0.24, 0.49, 0.58},
758
759
  ]
759
760
  """
761
+ image_size = image.shape[:2]
760
762
  buffer_bytes = numpy_to_bytes(image)
761
763
  files = [("image", buffer_bytes)]
762
764
  visual_prompts = [
@@ -771,7 +773,7 @@ def countgd_example_based_counting(
771
773
  bboxes_formatted = [
772
774
  ODResponseData(
773
775
  label=bbox["label"],
774
- bbox=list(map(lambda x: round(x, 2), bbox["bounding_box"])),
776
+ bbox=normalize_bbox(bbox["bounding_box"], image_size),
775
777
  score=round(bbox["score"], 2),
776
778
  )
777
779
  for bbox in bboxes_per_frame
File without changes
File without changes