vision-agent 0.2.54__tar.gz → 0.2.56__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {vision_agent-0.2.54 → vision_agent-0.2.56}/PKG-INFO +14 -16
  2. {vision_agent-0.2.54 → vision_agent-0.2.56}/README.md +13 -15
  3. {vision_agent-0.2.54 → vision_agent-0.2.56}/pyproject.toml +1 -1
  4. vision_agent-0.2.56/vision_agent/agent/__init__.py +2 -0
  5. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/vision_agent.py +65 -1
  6. vision_agent-0.2.54/vision_agent/agent/__init__.py +0 -2
  7. {vision_agent-0.2.54 → vision_agent-0.2.56}/LICENSE +0 -0
  8. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/__init__.py +0 -0
  9. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/agent.py +0 -0
  10. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/agent_coder.py +0 -0
  11. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/agent_coder_prompts.py +0 -0
  12. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/data_interpreter.py +0 -0
  13. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/data_interpreter_prompts.py +0 -0
  14. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/easytool.py +0 -0
  15. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/easytool_prompts.py +0 -0
  16. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/easytool_v2.py +0 -0
  17. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/easytool_v2_prompts.py +0 -0
  18. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/reflexion.py +0 -0
  19. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/reflexion_prompts.py +0 -0
  20. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/agent/vision_agent_prompts.py +0 -0
  21. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/fonts/__init__.py +0 -0
  22. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  23. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/llm/__init__.py +0 -0
  24. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/llm/llm.py +0 -0
  25. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/lmm/__init__.py +0 -0
  26. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/lmm/lmm.py +0 -0
  27. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/tools/__init__.py +0 -0
  28. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/tools/easytool_tools.py +0 -0
  29. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/tools/prompts.py +0 -0
  30. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/tools/tool_utils.py +0 -0
  31. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/tools/tools.py +0 -0
  32. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/utils/__init__.py +0 -0
  33. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/utils/execute.py +0 -0
  34. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/utils/image_utils.py +0 -0
  35. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/utils/sim.py +0 -0
  36. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/utils/type_defs.py +0 -0
  37. {vision_agent-0.2.54 → vision_agent-0.2.56}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.54
3
+ Version: 0.2.56
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -38,7 +38,6 @@ Description-Content-Type: text/markdown
38
38
  <img alt="vision_agent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo.jpg?raw=true">
39
39
 
40
40
  # 🔍🤖 Vision Agent
41
-
42
41
  [![](https://dcbadge.vercel.app/api/server/wPdN8RCYew?compact=true&style=flat)](https://discord.gg/wPdN8RCYew)
43
42
  ![ci_status](https://github.com/landing-ai/vision-agent/actions/workflows/ci_cd.yml/badge.svg)
44
43
  [![PyPI version](https://badge.fury.io/py/vision-agent.svg)](https://badge.fury.io/py/vision-agent)
@@ -52,9 +51,14 @@ accomplish the task you want. Vision Agent aims to provide an in-seconds experie
52
51
  allowing users to describe their problem in text and have the agent framework generate
53
52
  code to solve the task for them. Check out our discord for updates and roadmaps!
54
53
 
54
+
55
+ ## Web Application
56
+
57
+ Try Vision Agent live on [va.landing.ai](https://va.landing.ai/)
58
+
55
59
  ## Documentation
56
60
 
57
- - [Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
61
+ [Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
58
62
 
59
63
 
60
64
  ## Getting Started
@@ -88,28 +92,28 @@ from vision_agent.tools import load_image, grounding_sam
88
92
  def calculate_filled_percentage(image_path: str) -> float:
89
93
  # Step 1: Load the image
90
94
  image = load_image(image_path)
91
-
95
+
92
96
  # Step 2: Segment the jar
93
97
  jar_segments = grounding_sam(prompt="jar", image=image)
94
-
98
+
95
99
  # Step 3: Segment the coffee beans
96
100
  coffee_beans_segments = grounding_sam(prompt="coffee beans", image=image)
97
-
101
+
98
102
  # Step 4: Calculate the area of the segmented jar
99
103
  jar_area = 0
100
104
  for segment in jar_segments:
101
105
  jar_area += segment['mask'].sum()
102
-
106
+
103
107
  # Step 5: Calculate the area of the segmented coffee beans
104
108
  coffee_beans_area = 0
105
109
  for segment in coffee_beans_segments:
106
110
  coffee_beans_area += segment['mask'].sum()
107
-
111
+
108
112
  # Step 6: Compute the percentage of the jar area that is filled with coffee beans
109
113
  if jar_area == 0:
110
114
  return 0.0 # To avoid division by zero
111
115
  filled_percentage = (coffee_beans_area / jar_area) * 100
112
-
116
+
113
117
  # Step 7: Return the computed percentage
114
118
  return filled_percentage
115
119
  ```
@@ -197,12 +201,6 @@ You can then run Vision Agent using the Azure OpenAI models:
197
201
  ```python
198
202
  import vision_agent as va
199
203
  import vision_agent.tools as T
200
- agent = va.agent.VisionAgent(
201
- planner=va.llm.AzureOpenAILLM(),
202
- coder=va.lmm.AzureOpenAILLM(),
203
- tester=va.lmm.AzureOpenAILLM(),
204
- debugger=va.lmm.AzureOpenAILLM(),
205
- tool_recommender=va.utils.AzureSim(T.TOOLS_DF, sim_key="desc"),
206
- )
204
+ agent = va.agent.AzureVisionAgent()
207
205
  ```
208
206
 
@@ -2,7 +2,6 @@
2
2
  <img alt="vision_agent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo.jpg?raw=true">
3
3
 
4
4
  # 🔍🤖 Vision Agent
5
-
6
5
  [![](https://dcbadge.vercel.app/api/server/wPdN8RCYew?compact=true&style=flat)](https://discord.gg/wPdN8RCYew)
7
6
  ![ci_status](https://github.com/landing-ai/vision-agent/actions/workflows/ci_cd.yml/badge.svg)
8
7
  [![PyPI version](https://badge.fury.io/py/vision-agent.svg)](https://badge.fury.io/py/vision-agent)
@@ -16,9 +15,14 @@ accomplish the task you want. Vision Agent aims to provide an in-seconds experie
16
15
  allowing users to describe their problem in text and have the agent framework generate
17
16
  code to solve the task for them. Check out our discord for updates and roadmaps!
18
17
 
18
+
19
+ ## Web Application
20
+
21
+ Try Vision Agent live on [va.landing.ai](https://va.landing.ai/)
22
+
19
23
  ## Documentation
20
24
 
21
- - [Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
25
+ [Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
22
26
 
23
27
 
24
28
  ## Getting Started
@@ -52,28 +56,28 @@ from vision_agent.tools import load_image, grounding_sam
52
56
  def calculate_filled_percentage(image_path: str) -> float:
53
57
  # Step 1: Load the image
54
58
  image = load_image(image_path)
55
-
59
+
56
60
  # Step 2: Segment the jar
57
61
  jar_segments = grounding_sam(prompt="jar", image=image)
58
-
62
+
59
63
  # Step 3: Segment the coffee beans
60
64
  coffee_beans_segments = grounding_sam(prompt="coffee beans", image=image)
61
-
65
+
62
66
  # Step 4: Calculate the area of the segmented jar
63
67
  jar_area = 0
64
68
  for segment in jar_segments:
65
69
  jar_area += segment['mask'].sum()
66
-
70
+
67
71
  # Step 5: Calculate the area of the segmented coffee beans
68
72
  coffee_beans_area = 0
69
73
  for segment in coffee_beans_segments:
70
74
  coffee_beans_area += segment['mask'].sum()
71
-
75
+
72
76
  # Step 6: Compute the percentage of the jar area that is filled with coffee beans
73
77
  if jar_area == 0:
74
78
  return 0.0 # To avoid division by zero
75
79
  filled_percentage = (coffee_beans_area / jar_area) * 100
76
-
80
+
77
81
  # Step 7: Return the computed percentage
78
82
  return filled_percentage
79
83
  ```
@@ -161,11 +165,5 @@ You can then run Vision Agent using the Azure OpenAI models:
161
165
  ```python
162
166
  import vision_agent as va
163
167
  import vision_agent.tools as T
164
- agent = va.agent.VisionAgent(
165
- planner=va.llm.AzureOpenAILLM(),
166
- coder=va.lmm.AzureOpenAILLM(),
167
- tester=va.lmm.AzureOpenAILLM(),
168
- debugger=va.lmm.AzureOpenAILLM(),
169
- tool_recommender=va.utils.AzureSim(T.TOOLS_DF, sim_key="desc"),
170
- )
168
+ agent = va.agent.AzureVisionAgent()
171
169
  ```
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.54"
7
+ version = "0.2.56"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -0,0 +1,2 @@
1
+ from .agent import Agent
2
+ from .vision_agent import AzureVisionAgent, VisionAgent
@@ -13,6 +13,7 @@ from rich.style import Style
13
13
  from rich.syntax import Syntax
14
14
  from tabulate import tabulate
15
15
 
16
+ from vision_agent.llm.llm import AzureOpenAILLM
16
17
  import vision_agent.tools as T
17
18
  from vision_agent.agent import Agent
18
19
  from vision_agent.agent.vision_agent_prompts import (
@@ -29,7 +30,7 @@ from vision_agent.lmm import LMM, OpenAILMM
29
30
  from vision_agent.utils import CodeInterpreterFactory, Execution
30
31
  from vision_agent.utils.execute import CodeInterpreter
31
32
  from vision_agent.utils.image_utils import b64_to_pil
32
- from vision_agent.utils.sim import Sim
33
+ from vision_agent.utils.sim import AzureSim, Sim
33
34
  from vision_agent.utils.video import play_video
34
35
 
35
36
  logging.basicConfig(stream=sys.stdout)
@@ -615,3 +616,66 @@ class VisionAgent(Agent):
615
616
  def log_progress(self, data: Dict[str, Any]) -> None:
616
617
  if self.report_progress_callback is not None:
617
618
  self.report_progress_callback(data)
619
+
620
+
621
+ class AzureVisionAgent(VisionAgent):
622
+ """Vision Agent that uses Azure OpenAI APIs for planning, coding, testing.
623
+
624
+ Pre-requisites:
625
+ 1. Set the environment variable AZURE_OPENAI_API_KEY to your Azure OpenAI API key.
626
+ 2. Set the environment variable AZURE_OPENAI_ENDPOINT to your Azure OpenAI endpoint.
627
+
628
+ Example
629
+ -------
630
+ >>> from vision_agent import AzureVisionAgent
631
+ >>> agent = AzureVisionAgent()
632
+ >>> code = agent("What percentage of the area of the jar is filled with coffee beans?", media="jar.jpg")
633
+ """
634
+
635
+ def __init__(
636
+ self,
637
+ planner: Optional[Union[LLM, LMM]] = None,
638
+ coder: Optional[LLM] = None,
639
+ tester: Optional[LLM] = None,
640
+ debugger: Optional[LLM] = None,
641
+ tool_recommender: Optional[Sim] = None,
642
+ verbosity: int = 0,
643
+ report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
644
+ ) -> None:
645
+ """Initialize the Vision Agent.
646
+
647
+ Parameters:
648
+ planner (Optional[LLM]): The planner model to use. Defaults to OpenAILLM.
649
+ coder (Optional[LLM]): The coder model to use. Defaults to OpenAILLM.
650
+ tester (Optional[LLM]): The tester model to use. Defaults to OpenAILLM.
651
+ debugger (Optional[LLM]): The debugger model to
652
+ tool_recommender (Optional[Sim]): The tool recommender model to use.
653
+ verbosity (int): The verbosity level of the agent. Defaults to 0. 2 is the
654
+ highest verbosity level which will output all intermediate debugging
655
+ code.
656
+ report_progress_callback: a callback to report the progress of the agent.
657
+ This is useful for streaming logs in a web application where multiple
658
+ VisionAgent instances are running in parallel. This callback ensures
659
+ that the progress are not mixed up.
660
+ """
661
+ super().__init__(
662
+ planner=(
663
+ AzureOpenAILLM(temperature=0.0, json_mode=True)
664
+ if planner is None
665
+ else planner
666
+ ),
667
+ coder=AzureOpenAILLM(temperature=0.0) if coder is None else coder,
668
+ tester=AzureOpenAILLM(temperature=0.0) if tester is None else tester,
669
+ debugger=(
670
+ AzureOpenAILLM(temperature=0.0, json_mode=True)
671
+ if debugger is None
672
+ else debugger
673
+ ),
674
+ tool_recommender=(
675
+ AzureSim(T.TOOLS_DF, sim_key="desc")
676
+ if tool_recommender is None
677
+ else tool_recommender
678
+ ),
679
+ verbosity=verbosity,
680
+ report_progress_callback=report_progress_callback,
681
+ )
@@ -1,2 +0,0 @@
1
- from .agent import Agent
2
- from .vision_agent import VisionAgent
File without changes