vision-agent 0.2.190__tar.gz → 0.2.192__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. {vision_agent-0.2.190 → vision_agent-0.2.192}/PKG-INFO +7 -11
  2. {vision_agent-0.2.190 → vision_agent-0.2.192}/README.md +6 -10
  3. {vision_agent-0.2.190 → vision_agent-0.2.192}/pyproject.toml +1 -1
  4. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_coder.py +0 -3
  5. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_planner.py +2 -10
  6. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/tools.py +13 -10
  7. {vision_agent-0.2.190 → vision_agent-0.2.192}/LICENSE +0 -0
  8. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/__init__.py +0 -0
  9. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/__init__.py +0 -0
  10. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/agent.py +0 -0
  11. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/agent_utils.py +0 -0
  12. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent.py +0 -0
  13. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  14. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  15. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_prompts.py +0 -0
  16. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/clients/__init__.py +0 -0
  17. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/clients/http.py +0 -0
  18. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/clients/landing_public_api.py +0 -0
  19. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/fonts/__init__.py +0 -0
  20. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  21. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/lmm/__init__.py +0 -0
  22. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/lmm/lmm.py +0 -0
  23. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/lmm/types.py +0 -0
  24. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/__init__.py +5 -5
  25. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/meta_tools.py +0 -0
  26. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/prompts.py +0 -0
  27. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/tool_utils.py +0 -0
  28. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/tools_types.py +0 -0
  29. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/__init__.py +0 -0
  30. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/exceptions.py +0 -0
  31. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/execute.py +0 -0
  32. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/image_utils.py +0 -0
  33. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/sim.py +0 -0
  34. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/type_defs.py +0 -0
  35. {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.190
3
+ Version: 0.2.192
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -54,11 +54,7 @@ Description-Content-Type: text/markdown
54
54
  </div>
55
55
 
56
56
  VisionAgent is a library that helps you utilize agent frameworks to generate code to
57
- solve your vision task. Many current vision problems can easily take hours or days to
58
- solve, you need to find the right model, figure out how to use it and program it to
59
- accomplish the task you want. VisionAgent aims to provide an in-seconds experience by
60
- allowing users to describe their problem in text and have the agent framework generate
61
- code to solve the task for them. Check out our discord for updates and roadmaps!
57
+ solve your vision task. Check out our discord for updates and roadmaps!
62
58
 
63
59
  ## Table of Contents
64
60
  - [🚀Quick Start](#quick-start)
@@ -82,19 +78,19 @@ To get started with the python library, you can install it using pip:
82
78
  pip install vision-agent
83
79
  ```
84
80
 
85
- Ensure you have an Anthropic key and an OpenAI API key and set in your environment
81
+ Ensure you have both an Anthropic key and an OpenAI API key and set in your environment
86
82
  variables (if you are using Azure OpenAI please see the Azure setup section):
87
83
 
88
84
  ```bash
89
- export ANTHROPIC_API_KEY="your-api-key"
90
- export OPENAI_API_KEY="your-api-key"
85
+ export ANTHROPIC_API_KEY="your-api-key" # needed for VisionAgent and VisionAgentCoder
86
+ export OPENAI_API_KEY="your-api-key" # needed for ToolRecommender
91
87
  ```
92
88
 
93
89
  ### Basic Usage
94
90
  To get started you can just import the `VisionAgent` and start chatting with it:
95
91
  ```python
96
92
  >>> from vision_agent.agent import VisionAgent
97
- >>> agent = VisionAgent()
93
+ >>> agent = VisionAgent(verbosity=2)
98
94
  >>> resp = agent("Hello")
99
95
  >>> print(resp)
100
96
  [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "{'thoughts': 'The user has greeted me. I will respond with a greeting and ask how I can assist them.', 'response': 'Hello! How can I assist you today?', 'let_user_respond': True}"}]
@@ -103,7 +99,7 @@ To get started you can just import the `VisionAgent` and start chatting with it:
103
99
  ```
104
100
 
105
101
  The chat messages are similar to `OpenAI`'s format with `role` and `content` keys but
106
- in addition to those you can add `medai` which is a list of media files that can either
102
+ in addition to those you can add `media` which is a list of media files that can either
107
103
  be images or video files.
108
104
 
109
105
  ## Documentation
@@ -12,11 +12,7 @@
12
12
  </div>
13
13
 
14
14
  VisionAgent is a library that helps you utilize agent frameworks to generate code to
15
- solve your vision task. Many current vision problems can easily take hours or days to
16
- solve, you need to find the right model, figure out how to use it and program it to
17
- accomplish the task you want. VisionAgent aims to provide an in-seconds experience by
18
- allowing users to describe their problem in text and have the agent framework generate
19
- code to solve the task for them. Check out our discord for updates and roadmaps!
15
+ solve your vision task. Check out our discord for updates and roadmaps!
20
16
 
21
17
  ## Table of Contents
22
18
  - [🚀Quick Start](#quick-start)
@@ -40,19 +36,19 @@ To get started with the python library, you can install it using pip:
40
36
  pip install vision-agent
41
37
  ```
42
38
 
43
- Ensure you have an Anthropic key and an OpenAI API key and set in your environment
39
+ Ensure you have both an Anthropic key and an OpenAI API key and set in your environment
44
40
  variables (if you are using Azure OpenAI please see the Azure setup section):
45
41
 
46
42
  ```bash
47
- export ANTHROPIC_API_KEY="your-api-key"
48
- export OPENAI_API_KEY="your-api-key"
43
+ export ANTHROPIC_API_KEY="your-api-key" # needed for VisionAgent and VisionAgentCoder
44
+ export OPENAI_API_KEY="your-api-key" # needed for ToolRecommender
49
45
  ```
50
46
 
51
47
  ### Basic Usage
52
48
  To get started you can just import the `VisionAgent` and start chatting with it:
53
49
  ```python
54
50
  >>> from vision_agent.agent import VisionAgent
55
- >>> agent = VisionAgent()
51
+ >>> agent = VisionAgent(verbosity=2)
56
52
  >>> resp = agent("Hello")
57
53
  >>> print(resp)
58
54
  [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "{'thoughts': 'The user has greeted me. I will respond with a greeting and ask how I can assist them.', 'response': 'Hello! How can I assist you today?', 'let_user_respond': True}"}]
@@ -61,7 +57,7 @@ To get started you can just import the `VisionAgent` and start chatting with it:
61
57
  ```
62
58
 
63
59
  The chat messages are similar to `OpenAI`'s format with `role` and `content` keys but
64
- in addition to those you can add `medai` which is a list of media files that can either
60
+ in addition to those you can add `media` which is a list of media files that can either
65
61
  be images or video files.
66
62
 
67
63
  ## Documentation
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.190"
7
+ version = "0.2.192"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -527,9 +527,6 @@ class VisionAgentCoder(Agent):
527
527
  [{"role": "user", "content": "describe your task here..."}].
528
528
  plan_context (PlanContext): The context of the plan, including the plans,
529
529
  best_plan, plan_thoughts, tool_doc, and tool_output.
530
- test_multi_plan (bool): Whether to test multiple plans or just the best plan.
531
- custom_tool_names (Optional[List[str]]): A list of custom tool names to use
532
- for the planner.
533
530
 
534
531
  Returns:
535
532
  Dict[str, Any]: A dictionary containing the code output by the
@@ -519,11 +519,7 @@ class OpenAIVisionAgentPlanner(VisionAgentPlanner):
519
519
  code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
520
520
  ) -> None:
521
521
  super().__init__(
522
- planner=(
523
- OpenAILMM(temperature=0.0, json_mode=True)
524
- if planner is None
525
- else planner
526
- ),
522
+ planner=(OpenAILMM(temperature=0.0) if planner is None else planner),
527
523
  tool_recommender=tool_recommender,
528
524
  verbosity=verbosity,
529
525
  report_progress_callback=report_progress_callback,
@@ -567,11 +563,7 @@ class AzureVisionAgentPlanner(VisionAgentPlanner):
567
563
  code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
568
564
  ) -> None:
569
565
  super().__init__(
570
- planner=(
571
- AzureOpenAILMM(temperature=0.0, json_mode=True)
572
- if planner is None
573
- else planner
574
- ),
566
+ planner=(AzureOpenAILMM(temperature=0.0) if planner is None else planner),
575
567
  tool_recommender=(
576
568
  AzureSim(T.TOOLS_DF, sim_key="desc")
577
569
  if tool_recommender is None
@@ -27,10 +27,7 @@ from vision_agent.tools.tool_utils import (
27
27
  send_inference_request,
28
28
  send_task_inference_request,
29
29
  )
30
- from vision_agent.tools.tools_types import (
31
- JobStatus,
32
- ODResponseData,
33
- )
30
+ from vision_agent.tools.tools_types import JobStatus, ODResponseData
34
31
  from vision_agent.utils.exceptions import FineTuneModelIsNotReady
35
32
  from vision_agent.utils.execute import FileSerializer, MimeType
36
33
  from vision_agent.utils.image_utils import (
@@ -641,8 +638,8 @@ def loca_visual_prompt_counting(
641
638
 
642
639
  Parameters:
643
640
  image (np.ndarray): The image that contains lot of instances of a single object
644
- visual_prompt (Dict[str, List[float]]): Bounding box of the object in format
645
- [xmin, ymin, xmax, ymax]. Only 1 bounding box can be provided.
641
+ visual_prompt (Dict[str, List[float]]): Bounding box of the object in
642
+ format [xmin, ymin, xmax, ymax]. Only 1 bounding box can be provided.
646
643
 
647
644
  Returns:
648
645
  Dict[str, Any]: A dictionary containing the key 'count' and the count as a
@@ -750,10 +747,10 @@ def countgd_example_based_counting(
750
747
 
751
748
  Parameters:
752
749
  visual_prompts (List[List[float]]): Bounding boxes of the object in format
753
- [xmin, ymin, xmax, ymax]. Upto 3 bounding boxes can be provided.
754
- image (np.ndarray): The image that contains multiple instances of the object.
755
- box_threshold (float, optional): The threshold for detection. Defaults
756
- to 0.23.
750
+ [xmin, ymin, xmax, ymax]. Upto 3 bounding boxes can be provided. image
751
+ (np.ndarray): The image that contains multiple instances of the object.
752
+ box_threshold (float, optional): The threshold for detection. Defaults to
753
+ 0.23.
757
754
 
758
755
  Returns:
759
756
  List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -1809,6 +1806,12 @@ def flux_image_inpainting(
1809
1806
  ):
1810
1807
  raise ValueError("The image or mask does not have enough size for inpainting")
1811
1808
 
1809
+ if image.shape[0] % 8 != 0 or image.shape[1] % 8 != 0:
1810
+ new_height = (image.shape[0] // 8) * 8
1811
+ new_width = (image.shape[1] // 8) * 8
1812
+ image = cv2.resize(image, (new_width, new_height))
1813
+ mask = cv2.resize(mask, (new_width, new_height))
1814
+
1812
1815
  if np.array_equal(mask, mask.astype(bool).astype(int)):
1813
1816
  mask = np.where(mask > 0, 255, 0).astype(np.uint8)
1814
1817
  else:
File without changes
@@ -40,6 +40,7 @@ from .tools import (
40
40
  florence2_roberta_vqa,
41
41
  florence2_sam2_image,
42
42
  florence2_sam2_video_tracking,
43
+ flux_image_inpainting,
43
44
  generate_pose_image,
44
45
  generate_soft_edge_image,
45
46
  get_tool_documentation,
@@ -59,17 +60,16 @@ from .tools import (
59
60
  overlay_segmentation_masks,
60
61
  owl_v2_image,
61
62
  owl_v2_video,
63
+ qwen2_vl_images_vqa,
64
+ qwen2_vl_video_vqa,
62
65
  save_image,
63
66
  save_json,
64
67
  save_video,
68
+ siglip_classification,
65
69
  template_match,
70
+ video_temporal_localization,
66
71
  vit_image_classification,
67
72
  vit_nsfw_classification,
68
- qwen2_vl_images_vqa,
69
- qwen2_vl_video_vqa,
70
- video_temporal_localization,
71
- flux_image_inpainting,
72
- siglip_classification,
73
73
  )
74
74
 
75
75
  __new_tools__ = [