vision-agent 0.2.54__py3-none-any.whl → 0.2.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/__init__.py +1 -1
- vision_agent/agent/vision_agent.py +65 -1
- {vision_agent-0.2.54.dist-info → vision_agent-0.2.56.dist-info}/METADATA +14 -16
- {vision_agent-0.2.54.dist-info → vision_agent-0.2.56.dist-info}/RECORD +6 -6
- {vision_agent-0.2.54.dist-info → vision_agent-0.2.56.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.54.dist-info → vision_agent-0.2.56.dist-info}/WHEEL +0 -0
vision_agent/agent/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
from .agent import Agent
|
2
|
-
from .vision_agent import VisionAgent
|
2
|
+
from .vision_agent import AzureVisionAgent, VisionAgent
|
@@ -13,6 +13,7 @@ from rich.style import Style
|
|
13
13
|
from rich.syntax import Syntax
|
14
14
|
from tabulate import tabulate
|
15
15
|
|
16
|
+
from vision_agent.llm.llm import AzureOpenAILLM
|
16
17
|
import vision_agent.tools as T
|
17
18
|
from vision_agent.agent import Agent
|
18
19
|
from vision_agent.agent.vision_agent_prompts import (
|
@@ -29,7 +30,7 @@ from vision_agent.lmm import LMM, OpenAILMM
|
|
29
30
|
from vision_agent.utils import CodeInterpreterFactory, Execution
|
30
31
|
from vision_agent.utils.execute import CodeInterpreter
|
31
32
|
from vision_agent.utils.image_utils import b64_to_pil
|
32
|
-
from vision_agent.utils.sim import Sim
|
33
|
+
from vision_agent.utils.sim import AzureSim, Sim
|
33
34
|
from vision_agent.utils.video import play_video
|
34
35
|
|
35
36
|
logging.basicConfig(stream=sys.stdout)
|
@@ -615,3 +616,66 @@ class VisionAgent(Agent):
|
|
615
616
|
def log_progress(self, data: Dict[str, Any]) -> None:
|
616
617
|
if self.report_progress_callback is not None:
|
617
618
|
self.report_progress_callback(data)
|
619
|
+
|
620
|
+
|
621
|
+
class AzureVisionAgent(VisionAgent):
|
622
|
+
"""Vision Agent that uses Azure OpenAI APIs for planning, coding, testing.
|
623
|
+
|
624
|
+
Pre-requisites:
|
625
|
+
1. Set the environment variable AZURE_OPENAI_API_KEY to your Azure OpenAI API key.
|
626
|
+
2. Set the environment variable AZURE_OPENAI_ENDPOINT to your Azure OpenAI endpoint.
|
627
|
+
|
628
|
+
Example
|
629
|
+
-------
|
630
|
+
>>> from vision_agent import AzureVisionAgent
|
631
|
+
>>> agent = AzureVisionAgent()
|
632
|
+
>>> code = agent("What percentage of the area of the jar is filled with coffee beans?", media="jar.jpg")
|
633
|
+
"""
|
634
|
+
|
635
|
+
def __init__(
|
636
|
+
self,
|
637
|
+
planner: Optional[Union[LLM, LMM]] = None,
|
638
|
+
coder: Optional[LLM] = None,
|
639
|
+
tester: Optional[LLM] = None,
|
640
|
+
debugger: Optional[LLM] = None,
|
641
|
+
tool_recommender: Optional[Sim] = None,
|
642
|
+
verbosity: int = 0,
|
643
|
+
report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
|
644
|
+
) -> None:
|
645
|
+
"""Initialize the Vision Agent.
|
646
|
+
|
647
|
+
Parameters:
|
648
|
+
planner (Optional[LLM]): The planner model to use. Defaults to OpenAILLM.
|
649
|
+
coder (Optional[LLM]): The coder model to use. Defaults to OpenAILLM.
|
650
|
+
tester (Optional[LLM]): The tester model to use. Defaults to OpenAILLM.
|
651
|
+
debugger (Optional[LLM]): The debugger model to
|
652
|
+
tool_recommender (Optional[Sim]): The tool recommender model to use.
|
653
|
+
verbosity (int): The verbosity level of the agent. Defaults to 0. 2 is the
|
654
|
+
highest verbosity level which will output all intermediate debugging
|
655
|
+
code.
|
656
|
+
report_progress_callback: a callback to report the progress of the agent.
|
657
|
+
This is useful for streaming logs in a web application where multiple
|
658
|
+
VisionAgent instances are running in parallel. This callback ensures
|
659
|
+
that the progress are not mixed up.
|
660
|
+
"""
|
661
|
+
super().__init__(
|
662
|
+
planner=(
|
663
|
+
AzureOpenAILLM(temperature=0.0, json_mode=True)
|
664
|
+
if planner is None
|
665
|
+
else planner
|
666
|
+
),
|
667
|
+
coder=AzureOpenAILLM(temperature=0.0) if coder is None else coder,
|
668
|
+
tester=AzureOpenAILLM(temperature=0.0) if tester is None else tester,
|
669
|
+
debugger=(
|
670
|
+
AzureOpenAILLM(temperature=0.0, json_mode=True)
|
671
|
+
if debugger is None
|
672
|
+
else debugger
|
673
|
+
),
|
674
|
+
tool_recommender=(
|
675
|
+
AzureSim(T.TOOLS_DF, sim_key="desc")
|
676
|
+
if tool_recommender is None
|
677
|
+
else tool_recommender
|
678
|
+
),
|
679
|
+
verbosity=verbosity,
|
680
|
+
report_progress_callback=report_progress_callback,
|
681
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.56
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -38,7 +38,6 @@ Description-Content-Type: text/markdown
|
|
38
38
|
<img alt="vision_agent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo.jpg?raw=true">
|
39
39
|
|
40
40
|
# 🔍🤖 Vision Agent
|
41
|
-
|
42
41
|
[](https://discord.gg/wPdN8RCYew)
|
43
42
|

|
44
43
|
[](https://badge.fury.io/py/vision-agent)
|
@@ -52,9 +51,14 @@ accomplish the task you want. Vision Agent aims to provide an in-seconds experie
|
|
52
51
|
allowing users to describe their problem in text and have the agent framework generate
|
53
52
|
code to solve the task for them. Check out our discord for updates and roadmaps!
|
54
53
|
|
54
|
+
|
55
|
+
## Web Application
|
56
|
+
|
57
|
+
Try Vision Agent live on [va.landing.ai](https://va.landing.ai/)
|
58
|
+
|
55
59
|
## Documentation
|
56
60
|
|
57
|
-
|
61
|
+
[Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
|
58
62
|
|
59
63
|
|
60
64
|
## Getting Started
|
@@ -88,28 +92,28 @@ from vision_agent.tools import load_image, grounding_sam
|
|
88
92
|
def calculate_filled_percentage(image_path: str) -> float:
|
89
93
|
# Step 1: Load the image
|
90
94
|
image = load_image(image_path)
|
91
|
-
|
95
|
+
|
92
96
|
# Step 2: Segment the jar
|
93
97
|
jar_segments = grounding_sam(prompt="jar", image=image)
|
94
|
-
|
98
|
+
|
95
99
|
# Step 3: Segment the coffee beans
|
96
100
|
coffee_beans_segments = grounding_sam(prompt="coffee beans", image=image)
|
97
|
-
|
101
|
+
|
98
102
|
# Step 4: Calculate the area of the segmented jar
|
99
103
|
jar_area = 0
|
100
104
|
for segment in jar_segments:
|
101
105
|
jar_area += segment['mask'].sum()
|
102
|
-
|
106
|
+
|
103
107
|
# Step 5: Calculate the area of the segmented coffee beans
|
104
108
|
coffee_beans_area = 0
|
105
109
|
for segment in coffee_beans_segments:
|
106
110
|
coffee_beans_area += segment['mask'].sum()
|
107
|
-
|
111
|
+
|
108
112
|
# Step 6: Compute the percentage of the jar area that is filled with coffee beans
|
109
113
|
if jar_area == 0:
|
110
114
|
return 0.0 # To avoid division by zero
|
111
115
|
filled_percentage = (coffee_beans_area / jar_area) * 100
|
112
|
-
|
116
|
+
|
113
117
|
# Step 7: Return the computed percentage
|
114
118
|
return filled_percentage
|
115
119
|
```
|
@@ -197,12 +201,6 @@ You can then run Vision Agent using the Azure OpenAI models:
|
|
197
201
|
```python
|
198
202
|
import vision_agent as va
|
199
203
|
import vision_agent.tools as T
|
200
|
-
agent = va.agent.
|
201
|
-
planner=va.llm.AzureOpenAILLM(),
|
202
|
-
coder=va.lmm.AzureOpenAILLM(),
|
203
|
-
tester=va.lmm.AzureOpenAILLM(),
|
204
|
-
debugger=va.lmm.AzureOpenAILLM(),
|
205
|
-
tool_recommender=va.utils.AzureSim(T.TOOLS_DF, sim_key="desc"),
|
206
|
-
)
|
204
|
+
agent = va.agent.AzureVisionAgent()
|
207
205
|
```
|
208
206
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
|
2
|
-
vision_agent/agent/__init__.py,sha256=
|
2
|
+
vision_agent/agent/__init__.py,sha256=IUwfbPMcT8X_rnXMLmI8gJ4ltsHy_XSs9eLiKURJxeY,81
|
3
3
|
vision_agent/agent/agent.py,sha256=TXh93MOwmArNRieOkYrhliq1rf7wIkhxvCdTiGhTqFs,538
|
4
4
|
vision_agent/agent/agent_coder.py,sha256=IBGQYw_XT08GIZYi9Q9i9GJU_icneGaQ9r1EIxP-G4g,7228
|
5
5
|
vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
|
@@ -11,7 +11,7 @@ vision_agent/agent/easytool_v2.py,sha256=LY2cqzjVHBr7QMn4WsrZ7AfpWrDN0LjJIrd5tMo
|
|
11
11
|
vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
|
12
12
|
vision_agent/agent/reflexion.py,sha256=scck3YcME6DhX5Vs4Wr1rYb8S4wkBUkN9UksyazfrZg,10506
|
13
13
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
14
|
-
vision_agent/agent/vision_agent.py,sha256=
|
14
|
+
vision_agent/agent/vision_agent.py,sha256=fM_-0qIHHGPwa4S0dtcY7ikzXUaht0BjHo4wVRwwWsE,23720
|
15
15
|
vision_agent/agent/vision_agent_prompts.py,sha256=9QVQA-YTSHhYHYbxiqCWFVBHIa6uV4WF0z6599mV_Oc,8470
|
16
16
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
@@ -30,7 +30,7 @@ vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOk
|
|
30
30
|
vision_agent/utils/sim.py,sha256=rGRGnjsy91IOn8qzt7k04PIRj5jyiaQyYAQl7ossPt8,4195
|
31
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
32
32
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
36
|
-
vision_agent-0.2.
|
33
|
+
vision_agent-0.2.56.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.56.dist-info/METADATA,sha256=5U-ScwukTYixyLsP6Cvhysq54ulCs4txG1pmxHzIjdA,6737
|
35
|
+
vision_agent-0.2.56.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.56.dist-info/RECORD,,
|
File without changes
|
File without changes
|