vision-agent 0.2.185__tar.gz → 0.2.186__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. {vision_agent-0.2.185 → vision_agent-0.2.186}/PKG-INFO +3 -3
  2. {vision_agent-0.2.185 → vision_agent-0.2.186}/README.md +2 -2
  3. {vision_agent-0.2.185 → vision_agent-0.2.186}/pyproject.toml +1 -1
  4. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/tools/tools.py +13 -15
  5. {vision_agent-0.2.185 → vision_agent-0.2.186}/LICENSE +0 -0
  6. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/__init__.py +0 -0
  7. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/agent/__init__.py +0 -0
  8. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/agent/agent.py +0 -0
  9. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/agent/agent_utils.py +0 -0
  10. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/agent/vision_agent.py +0 -0
  11. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/agent/vision_agent_coder.py +0 -0
  12. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  13. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/agent/vision_agent_planner.py +0 -0
  14. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  15. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/agent/vision_agent_prompts.py +0 -0
  16. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/clients/__init__.py +0 -0
  17. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/clients/http.py +0 -0
  18. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/clients/landing_public_api.py +0 -0
  19. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/fonts/__init__.py +0 -0
  20. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  21. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/lmm/__init__.py +0 -0
  22. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/lmm/lmm.py +0 -0
  23. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/lmm/types.py +0 -0
  24. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/tools/__init__.py +0 -0
  25. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/tools/meta_tools.py +0 -0
  26. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/tools/prompts.py +0 -0
  27. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/tools/tool_utils.py +0 -0
  28. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/tools/tools_types.py +0 -0
  29. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/utils/__init__.py +0 -0
  30. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/utils/exceptions.py +0 -0
  31. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/utils/execute.py +0 -0
  32. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/utils/image_utils.py +0 -0
  33. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/utils/sim.py +0 -0
  34. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/utils/type_defs.py +0 -0
  35. {vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.185
3
+ Version: 0.2.186
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -63,10 +63,10 @@ code to solve the task for them. Check out our discord for updates and roadmaps!
63
63
  ## Table of Contents
64
64
  - [🚀Quick Start](#quick-start)
65
65
  - [📚Documentation](#documentation)
66
- - [🔍🤖VisionAgent](#vision-agent-basic-usage)
66
+ - [🔍🤖VisionAgent](#visionagent-basic-usage)
67
67
  - [🛠️Tools](#tools)
68
68
  - [🤖LMMs](#lmms)
69
- - [💻🤖VisionAgent Coder](#vision-agent-coder)
69
+ - [💻🤖VisionAgent Coder](#visionagent-coder)
70
70
  - [🏗️Additional Backends](#additional-backends)
71
71
 
72
72
  ## Quick Start
@@ -21,10 +21,10 @@ code to solve the task for them. Check out our discord for updates and roadmaps!
21
21
  ## Table of Contents
22
22
  - [🚀Quick Start](#quick-start)
23
23
  - [📚Documentation](#documentation)
24
- - [🔍🤖VisionAgent](#vision-agent-basic-usage)
24
+ - [🔍🤖VisionAgent](#visionagent-basic-usage)
25
25
  - [🛠️Tools](#tools)
26
26
  - [🤖LMMs](#lmms)
27
- - [💻🤖VisionAgent Coder](#vision-agent-coder)
27
+ - [💻🤖VisionAgent Coder](#visionagent-coder)
28
28
  - [🏗️Additional Backends](#additional-backends)
29
29
 
30
30
  ## Quick Start
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.185"
7
+ version = "0.2.186"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -1057,23 +1057,25 @@ def video_temporal_localization(
1057
1057
  prompt: str,
1058
1058
  frames: List[np.ndarray],
1059
1059
  model: str = "qwen2vl",
1060
- chunk_length: Optional[float] = None,
1061
- chunk_length_seconds: Optional[float] = None,
1062
1060
  chunk_length_frames: Optional[int] = 2,
1063
1061
  ) -> List[float]:
1064
- """'video_temporal_localization' is a tool that can find objects in a video given a question about it.
1065
- It returns a list of floats with a value of 1.0 if the object to be found is present in the chunk of video being analyzed.
1062
+ """'video_temporal_localization' will run qwen2vl on each chunk_length_frames
1063
+ value selected for the video. It can detect multiple objects independently per
1064
+ chunk_length_frames given a text prompt such as a referring expression
1065
+ but does not track objects across frames.
1066
+ It returns a list of floats with a value of 1.0 if the objects are found in a given
1067
+ chunk_length_frames of the video.
1066
1068
 
1067
1069
  Parameters:
1068
1070
  prompt (str): The question about the video
1069
1071
  frames (List[np.ndarray]): The reference frames used for the question
1070
- model (str): The model to use for the inference. Valid values are 'qwen2vl', 'gpt4o', 'internlm-xcomposer'
1071
- chunk_length (Optional[float]): length of each chunk in seconds
1072
- chunk_length_seconds (Optional[float]): alternative length for chunk in seconds
1072
+ model (str): The model to use for the inference. Valid values are
1073
+ 'qwen2vl', 'gpt4o', 'internlm-xcomposer'
1073
1074
  chunk_length_frames (Optional[int]): length of each chunk in frames
1074
1075
 
1075
1076
  Returns:
1076
- List[float]: A list of floats with a value of 1.0 if the object to be found is present in the chunk of video
1077
+ List[float]: A list of floats with a value of 1.0 if the objects to be found
1078
+ are present in the chunk_length_frames of the video.
1077
1079
 
1078
1080
  Example
1079
1081
  -------
@@ -1088,10 +1090,6 @@ def video_temporal_localization(
1088
1090
  "model": model,
1089
1091
  "function_name": "video_temporal_localization",
1090
1092
  }
1091
- if chunk_length is not None:
1092
- payload["chunk_length"] = chunk_length
1093
- if chunk_length_seconds is not None:
1094
- payload["chunk_length_seconds"] = chunk_length_seconds
1095
1093
  if chunk_length_frames is not None:
1096
1094
  payload["chunk_length_frames"] = chunk_length_frames
1097
1095
 
@@ -1790,9 +1788,8 @@ def flux_image_inpainting(
1790
1788
  where 1 indicates areas to be inpainted and 0 indicates areas to be preserved.
1791
1789
 
1792
1790
  Returns:
1793
- np.ndarray:
1794
- The generated image(s) as a numpy array in RGB format
1795
- with values ranging from 0 to 255.
1791
+ np.ndarray: The generated image(s) as a numpy array in RGB format with values
1792
+ ranging from 0 to 255.
1796
1793
 
1797
1794
  -------
1798
1795
  Example:
@@ -2351,6 +2348,7 @@ FUNCTION_TOOLS = [
2351
2348
  closest_box_distance,
2352
2349
  qwen2_vl_images_vqa,
2353
2350
  qwen2_vl_video_vqa,
2351
+ video_temporal_localization,
2354
2352
  ]
2355
2353
 
2356
2354
  UTIL_TOOLS = [
File without changes