vision-agent 0.2.165__tar.gz → 0.2.167__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. {vision_agent-0.2.165 → vision_agent-0.2.167}/PKG-INFO +1 -1
  2. {vision_agent-0.2.165 → vision_agent-0.2.167}/pyproject.toml +1 -1
  3. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/agent/vision_agent.py +14 -1
  4. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/tools/meta_tools.py +3 -2
  5. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/utils/video.py +23 -3
  6. {vision_agent-0.2.165 → vision_agent-0.2.167}/LICENSE +0 -0
  7. {vision_agent-0.2.165 → vision_agent-0.2.167}/README.md +0 -0
  8. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/__init__.py +0 -0
  9. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/agent/__init__.py +0 -0
  10. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/agent/agent.py +0 -0
  11. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/agent/agent_utils.py +0 -0
  12. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/agent/vision_agent_coder.py +0 -0
  13. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  14. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/agent/vision_agent_planner.py +0 -0
  15. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  16. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/agent/vision_agent_prompts.py +0 -0
  17. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/clients/__init__.py +0 -0
  18. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/clients/http.py +0 -0
  19. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/clients/landing_public_api.py +0 -0
  20. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/fonts/__init__.py +0 -0
  21. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  22. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/lmm/__init__.py +0 -0
  23. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/lmm/lmm.py +0 -0
  24. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/lmm/types.py +0 -0
  25. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/tools/__init__.py +0 -0
  26. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/tools/prompts.py +0 -0
  27. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/tools/tool_utils.py +0 -0
  28. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/tools/tools.py +0 -0
  29. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/tools/tools_types.py +0 -0
  30. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/utils/__init__.py +0 -0
  31. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/utils/exceptions.py +0 -0
  32. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/utils/execute.py +0 -0
  33. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/utils/image_utils.py +0 -0
  34. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/utils/sim.py +0 -0
  35. {vision_agent-0.2.165 → vision_agent-0.2.167}/vision_agent/utils/type_defs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.165
3
+ Version: 0.2.167
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.165"
7
+ version = "0.2.167"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -85,6 +85,15 @@ def format_agent_message(agent_message: str) -> str:
85
85
  return output
86
86
 
87
87
 
88
+ def _clean_response(response: str) -> str:
89
+ # Sometimes the LLM will hallucinate responses to an <execute_python> tag as if it
90
+ # had already executed the code. This function removes the hallucinated response.
91
+ if "<execute_python>" in response:
92
+ end_execute_python = response.find("</execute_python>")
93
+ response = response[: end_execute_python + len("</execute_python>")]
94
+ return response
95
+
96
+
88
97
  def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
89
98
  chat = copy.deepcopy(chat)
90
99
 
@@ -114,6 +123,10 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
114
123
  message["media"] = chat[-1]["media"]
115
124
  conv_resp = cast(str, orch([message], stream=False))
116
125
 
126
+ # clean the response first, if we are executing code, do not resond or end
127
+ # conversation before the code has been executed.
128
+ conv_resp = _clean_response(conv_resp)
129
+
117
130
  let_user_respond_str = extract_tag(conv_resp, "let_user_respond")
118
131
  let_user_respond = (
119
132
  "true" in let_user_respond_str.lower() if let_user_respond_str else False
@@ -458,7 +471,7 @@ class VisionAgent(Agent):
458
471
  self.streaming_message(
459
472
  {
460
473
  "role": "assistant",
461
- "content": json.dumps(response),
474
+ "content": json.dumps(add_step_descriptions(response)),
462
475
  "finished": finished and code_action is None,
463
476
  }
464
477
  )
@@ -676,12 +676,13 @@ def use_extra_vision_agent_args(
676
676
  for node in red:
677
677
  # seems to always be atomtrailers not call type
678
678
  if node.type == "atomtrailers":
679
+ if node.name.value == "generate_vision_code":
680
+ node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
681
+
679
682
  if (
680
683
  node.name.value == "generate_vision_code"
681
684
  or node.name.value == "edit_vision_code"
682
685
  ):
683
- node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
684
-
685
686
  if custom_tool_names is not None:
686
687
  node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
687
688
  cleaned_code = red.dumps().strip()
@@ -11,6 +11,9 @@ import numpy as np
11
11
  _LOGGER = logging.getLogger(__name__)
12
12
  # The maximum length of the clip to extract frames from, in seconds
13
13
 
14
+ _DEFAULT_VIDEO_FPS = 24
15
+ _DEFAULT_INPUT_FPS = 1.0
16
+
14
17
 
15
18
  def play_video(video_base64: str) -> None:
16
19
  """Play a video file"""
@@ -51,7 +54,9 @@ def _resize_frame(frame: np.ndarray) -> np.ndarray:
51
54
 
52
55
 
53
56
  def video_writer(
54
- frames: List[np.ndarray], fps: float = 1.0, filename: Optional[str] = None
57
+ frames: List[np.ndarray],
58
+ fps: float = _DEFAULT_INPUT_FPS,
59
+ filename: Optional[str] = None,
55
60
  ) -> str:
56
61
  if filename is None:
57
62
  filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
@@ -78,7 +83,7 @@ def video_writer(
78
83
 
79
84
 
80
85
  def frames_to_bytes(
81
- frames: List[np.ndarray], fps: float = 1.0, file_ext: str = ".mp4"
86
+ frames: List[np.ndarray], fps: float = _DEFAULT_INPUT_FPS, file_ext: str = ".mp4"
82
87
  ) -> bytes:
83
88
  r"""Convert a list of frames to a video file encoded into a byte string.
84
89
 
@@ -101,7 +106,7 @@ def frames_to_bytes(
101
106
  # same file name and the time savings are very large.
102
107
  @lru_cache(maxsize=8)
103
108
  def extract_frames_from_video(
104
- video_uri: str, fps: float = 1.0
109
+ video_uri: str, fps: float = _DEFAULT_INPUT_FPS
105
110
  ) -> List[Tuple[np.ndarray, float]]:
106
111
  """Extract frames from a video along with the timestamp in seconds.
107
112
 
@@ -118,6 +123,16 @@ def extract_frames_from_video(
118
123
 
119
124
  cap = cv2.VideoCapture(video_uri)
120
125
  orig_fps = cap.get(cv2.CAP_PROP_FPS)
126
+ if not orig_fps or orig_fps <= 0:
127
+ _LOGGER.warning(
128
+ f"Input video, {video_uri}, has no fps, using the default value {_DEFAULT_VIDEO_FPS}"
129
+ )
130
+ orig_fps = _DEFAULT_VIDEO_FPS
131
+ if not fps or fps <= 0:
132
+ _LOGGER.warning(
133
+ f"Input fps, {fps}, is illegal, using the default value: {_DEFAULT_INPUT_FPS}"
134
+ )
135
+ fps = _DEFAULT_INPUT_FPS
121
136
  orig_frame_time = 1 / orig_fps
122
137
  targ_frame_time = 1 / fps
123
138
  frames: List[Tuple[np.ndarray, float]] = []
@@ -129,10 +144,15 @@ def extract_frames_from_video(
129
144
  break
130
145
 
131
146
  elapsed_time += orig_frame_time
147
+ # This is to prevent float point precision loss issue, which can cause
148
+ # the elapsed time to be slightly less than the target frame time, which
149
+ # causes the last frame to be skipped
150
+ elapsed_time = round(elapsed_time, 8)
132
151
  if elapsed_time >= targ_frame_time:
133
152
  frames.append((cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), i / orig_fps))
134
153
  elapsed_time -= targ_frame_time
135
154
 
136
155
  i += 1
137
156
  cap.release()
157
+ _LOGGER.info(f"Extracted {len(frames)} frames from {video_uri}")
138
158
  return frames
File without changes
File without changes