vision-agent 0.2.203__tar.gz → 0.2.207__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. {vision_agent-0.2.203 → vision_agent-0.2.207}/PKG-INFO +1 -1
  2. {vision_agent-0.2.203 → vision_agent-0.2.207}/pyproject.toml +1 -1
  3. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder_v2.py +2 -2
  4. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner_v2.py +3 -1
  5. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_prompts.py +1 -1
  6. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/__init__.py +1 -0
  7. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/planner_tools.py +64 -36
  8. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/tools.py +7 -0
  9. {vision_agent-0.2.203 → vision_agent-0.2.207}/LICENSE +0 -0
  10. {vision_agent-0.2.203 → vision_agent-0.2.207}/README.md +0 -0
  11. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/.sim_tools/df.csv +0 -0
  12. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/.sim_tools/embs.npy +0 -0
  13. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/__init__.py +0 -0
  14. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/README.md +0 -0
  15. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/__init__.py +0 -0
  16. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/agent.py +0 -0
  17. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/agent_utils.py +0 -0
  18. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/types.py +0 -0
  19. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent.py +0 -0
  20. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder.py +0 -0
  21. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  22. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
  23. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner.py +0 -0
  24. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  25. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
  26. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
  27. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_v2.py +0 -0
  28. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/clients/__init__.py +0 -0
  29. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/clients/http.py +0 -0
  30. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/clients/landing_public_api.py +0 -0
  31. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/fonts/__init__.py +0 -0
  32. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  33. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/lmm/__init__.py +0 -0
  34. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/lmm/lmm.py +0 -0
  35. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/lmm/types.py +0 -0
  36. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/meta_tools.py +0 -0
  37. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/prompts.py +0 -0
  38. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/tool_utils.py +0 -0
  39. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/tools_types.py +0 -0
  40. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/__init__.py +0 -0
  41. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/exceptions.py +0 -0
  42. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/execute.py +0 -0
  43. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/image_utils.py +0 -0
  44. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/sim.py +0 -0
  45. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/type_defs.py +0 -0
  46. {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.203
3
+ Version: 0.2.207
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.203"
7
+ version = "0.2.207"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -34,7 +34,7 @@ from vision_agent.utils.execute import (
34
34
  CodeInterpreterFactory,
35
35
  Execution,
36
36
  )
37
- from vision_agent.utils.sim import Sim, load_cached_sim
37
+ from vision_agent.utils.sim import Sim
38
38
 
39
39
  _CONSOLE = Console()
40
40
 
@@ -316,7 +316,7 @@ class VisionAgentCoderV2(AgentCoder):
316
316
  elif isinstance(tool_recommender, Sim):
317
317
  self.tool_recommender = tool_recommender
318
318
  else:
319
- self.tool_recommender = load_cached_sim(T.TOOLS_DF)
319
+ self.tool_recommender = T.get_tool_recommender()
320
320
 
321
321
  self.verbose = verbose
322
322
  self.code_sandbox_runtime = code_sandbox_runtime
@@ -367,8 +367,10 @@ def replace_interaction_with_obs(chat: List[AgentMessage]) -> List[AgentMessage]
367
367
  response = json.loads(chat[i + 1].content)
368
368
  function_name = response["function_name"]
369
369
  tool_doc = get_tool_documentation(function_name)
370
+ if "box_threshold" in response:
371
+ tool_doc = f"Use the following function with box_threshold={response['box_threshold']}\n\n{tool_doc}"
370
372
  new_chat.append(AgentMessage(role="observation", content=tool_doc))
371
- except json.JSONDecodeError:
373
+ except (json.JSONDecodeError, KeyError):
372
374
  raise ValueError(f"Invalid JSON in interaction response: {chat_i}")
373
375
  else:
374
376
  new_chat.append(chat_i)
@@ -280,7 +280,7 @@ def main():
280
280
  if __name__ == "__main__":
281
281
  main()
282
282
  '''
283
- edit_code_artifact(artifacts, 'streamlit_app.py', CODE, 0, 0)
283
+ edit_code_artifact(artifacts, 'streamlit_app.py', 0, 0, CODE)
284
284
  </execute_python>
285
285
 
286
286
  OBSERVATION:
@@ -45,6 +45,7 @@ from .tools import (
45
45
  generate_pose_image,
46
46
  generate_soft_edge_image,
47
47
  get_tool_documentation,
48
+ get_tool_recommender,
48
49
  git_vqa_v2,
49
50
  gpt4o_image_vqa,
50
51
  gpt4o_video_vqa,
@@ -1,3 +1,4 @@
1
+ import inspect
1
2
  import logging
2
3
  import shutil
3
4
  import tempfile
@@ -31,10 +32,8 @@ from vision_agent.utils.execute import (
31
32
  MimeType,
32
33
  )
33
34
  from vision_agent.utils.image_utils import convert_to_b64
34
- from vision_agent.utils.sim import load_cached_sim
35
35
 
36
36
  TOOL_FUNCTIONS = {tool.__name__: tool for tool in T.TOOLS}
37
- TOOL_RECOMMENDER = load_cached_sim(T.TOOLS_DF)
38
37
 
39
38
  _LOGGER = logging.getLogger(__name__)
40
39
  EXAMPLES = f"\n{TEST_TOOLS_EXAMPLE1}\n{TEST_TOOLS_EXAMPLE2}\n"
@@ -51,7 +50,7 @@ def format_tool_output(tool_thoughts: str, tool_docstring: str) -> str:
51
50
 
52
51
 
53
52
  def extract_tool_info(
54
- tool_choice_context: Dict[str, Any]
53
+ tool_choice_context: Dict[str, Any],
55
54
  ) -> Tuple[Optional[Callable], str, str, str]:
56
55
  tool_thoughts = tool_choice_context.get("thoughts", "")
57
56
  tool_docstring = ""
@@ -63,12 +62,55 @@ def extract_tool_info(
63
62
  return tool, tool_thoughts, tool_docstring, ""
64
63
 
65
64
 
65
+ def replace_box_threshold(code: str, functions: List[str], box_threshold: float) -> str:
66
+ class ReplaceBoxThresholdTransformer(cst.CSTTransformer):
67
+ def leave_Call(
68
+ self, original_node: cst.Call, updated_node: cst.Call
69
+ ) -> cst.Call:
70
+ if (
71
+ isinstance(updated_node.func, cst.Name)
72
+ and updated_node.func.value in functions
73
+ ) or (
74
+ isinstance(updated_node.func, cst.Attribute)
75
+ and updated_node.func.attr.value in functions
76
+ ):
77
+ new_args = []
78
+ found = False
79
+ for arg in updated_node.args:
80
+ if arg.keyword and arg.keyword.value == "box_threshold":
81
+ new_arg = arg.with_changes(value=cst.Float(str(box_threshold)))
82
+ new_args.append(new_arg)
83
+ found = True
84
+ else:
85
+ new_args.append(arg)
86
+
87
+ if not found:
88
+ new_args.append(
89
+ cst.Arg(
90
+ keyword=cst.Name("box_threshold"),
91
+ value=cst.Float(str(box_threshold)),
92
+ equal=cst.AssignEqual(
93
+ whitespace_before=cst.SimpleWhitespace(""),
94
+ whitespace_after=cst.SimpleWhitespace(""),
95
+ ),
96
+ )
97
+ )
98
+ return updated_node.with_changes(args=new_args)
99
+ return updated_node
100
+
101
+ tree = cst.parse_module(code)
102
+ transformer = ReplaceBoxThresholdTransformer()
103
+ new_tree = tree.visit(transformer)
104
+ return new_tree.code
105
+
106
+
66
107
  def run_tool_testing(
67
108
  task: str,
68
109
  image_paths: List[str],
69
110
  lmm: LMM,
70
111
  exclude_tools: Optional[List[str]],
71
112
  code_interpreter: CodeInterpreter,
113
+ process_code: Callable[[str], str] = lambda x: x,
72
114
  ) -> tuple[str, str, Execution]:
73
115
  """Helper function to generate and run tool testing code."""
74
116
  query = lmm.generate(CATEGORIZE_TOOL_REQUEST.format(task=task))
@@ -80,7 +122,7 @@ def run_tool_testing(
80
122
  f"I need models from the {category.strip()} category of tools. {task}"
81
123
  )
82
124
 
83
- tool_docs = TOOL_RECOMMENDER.top_k(category, k=10, thresh=0.2)
125
+ tool_docs = T.get_tool_recommender().top_k(category, k=10, thresh=0.2)
84
126
  if exclude_tools is not None and len(exclude_tools) > 0:
85
127
  cleaned_tool_docs = []
86
128
  for tool_doc in tool_docs:
@@ -101,6 +143,7 @@ def run_tool_testing(
101
143
  code = extract_tag(response, "code") # type: ignore
102
144
  if code is None:
103
145
  raise ValueError(f"Could not extract code from response: {response}")
146
+ code = process_code(code)
104
147
  tool_output = code_interpreter.exec_isolation(DefaultImports.prepend_imports(code))
105
148
  tool_output_str = tool_output.text(include_results=False).strip()
106
149
 
@@ -119,6 +162,7 @@ def run_tool_testing(
119
162
  media=str(image_paths),
120
163
  )
121
164
  code = extract_code(lmm.generate(prompt, media=image_paths)) # type: ignore
165
+ code = process_code(code)
122
166
  tool_output = code_interpreter.exec_isolation(
123
167
  DefaultImports.prepend_imports(code)
124
168
  )
@@ -200,7 +244,9 @@ def get_tool_for_task(
200
244
  context=f"<code>\n{code}\n</code>\n<tool_output>\n{tool_output_str}\n</tool_output>",
201
245
  previous_attempts=error_message,
202
246
  )
203
- tool_choice_context_dict = extract_json(lmm.generate(prompt, media=image_paths)) # type: ignore
247
+ tool_choice_context_dict = extract_json(
248
+ lmm.generate(prompt, media=image_paths) # type: ignore
249
+ )
204
250
  tool, tool_thoughts, tool_docstring, error_message = extract_tool_info(
205
251
  tool_choice_context_dict
206
252
  )
@@ -221,36 +267,7 @@ def get_tool_documentation(tool_name: str) -> str:
221
267
  def get_tool_for_task_human_reviewer(
222
268
  task: str, images: List[np.ndarray], exclude_tools: Optional[List[str]] = None
223
269
  ) -> None:
224
- # NOTE: this should be the same documentation as get_tool_for_task
225
- """Given a task and one or more images this function will find a tool to accomplish
226
- the jobs. It prints the tool documentation and thoughts on why it chose the tool.
227
-
228
- It can produce tools for the following types of tasks:
229
- - Object detection and counting
230
- - Classification
231
- - Segmentation
232
- - OCR
233
- - VQA
234
- - Depth and pose estimation
235
- - Video object tracking
236
-
237
- Wait until the documentation is printed to use the function so you know what the
238
- input and output signatures are.
239
-
240
- Parameters:
241
- task: str: The task to accomplish.
242
- images: List[np.ndarray]: The images to use for the task.
243
- exclude_tools: Optional[List[str]]: A list of tool names to exclude from the
244
- recommendations. This is helpful if you are calling get_tool_for_task twice
245
- and do not want the same tool recommended.
246
-
247
- Returns:
248
- The tool to use for the task is printed to stdout
249
-
250
- Examples
251
- --------
252
- >>> get_tool_for_task("Give me an OCR model that can find 'hot chocolate' in the image", [image])
253
- """
270
+ # NOTE: this will have the same documentation as get_tool_for_task
254
271
  lmm = AnthropicLMM()
255
272
 
256
273
  with (
@@ -263,8 +280,19 @@ def get_tool_for_task_human_reviewer(
263
280
  Image.fromarray(image).save(image_path)
264
281
  image_paths.append(image_path)
265
282
 
283
+ tools = [
284
+ t.__name__
285
+ for t in T.TOOLS
286
+ if inspect.signature(t).parameters.get("box_threshold") # type: ignore
287
+ ]
288
+
266
289
  _, _, tool_output = run_tool_testing(
267
- task, image_paths, lmm, exclude_tools, code_interpreter
290
+ task,
291
+ image_paths,
292
+ lmm,
293
+ exclude_tools,
294
+ code_interpreter,
295
+ process_code=lambda x: replace_box_threshold(x, tools, 0.05),
268
296
  )
269
297
 
270
298
  # need to re-display results for the outer notebook to see them
@@ -4,6 +4,7 @@ import logging
4
4
  import os
5
5
  import tempfile
6
6
  import urllib.request
7
+ from functools import lru_cache
7
8
  from importlib import resources
8
9
  from pathlib import Path
9
10
  from typing import Any, Dict, List, Optional, Tuple, Union, cast
@@ -45,6 +46,7 @@ from vision_agent.utils.image_utils import (
45
46
  rle_decode,
46
47
  rle_decode_array,
47
48
  )
49
+ from vision_agent.utils.sim import Sim, load_cached_sim
48
50
  from vision_agent.utils.video import (
49
51
  extract_frames_from_video,
50
52
  frames_to_bytes,
@@ -80,6 +82,11 @@ _OCR_URL = "https://app.landing.ai/ocr/v1/detect-text"
80
82
  _LOGGER = logging.getLogger(__name__)
81
83
 
82
84
 
85
+ @lru_cache(maxsize=1)
86
+ def get_tool_recommender() -> Sim:
87
+ return load_cached_sim(TOOLS_DF)
88
+
89
+
83
90
  def grounding_dino(
84
91
  prompt: str,
85
92
  image: np.ndarray,
File without changes
File without changes