vision-agent 0.2.51__py3-none-any.whl → 0.2.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent_prompts.py +1 -1
- vision_agent/llm/llm.py +1 -1
- vision_agent/lmm/lmm.py +7 -2
- vision_agent/tools/tools.py +5 -10
- vision_agent/utils/__init__.py +1 -1
- vision_agent/utils/sim.py +39 -3
- {vision_agent-0.2.51.dist-info → vision_agent-0.2.53.dist-info}/METADATA +10 -9
- {vision_agent-0.2.51.dist-info → vision_agent-0.2.53.dist-info}/RECORD +10 -10
- {vision_agent-0.2.51.dist-info → vision_agent-0.2.53.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.51.dist-info → vision_agent-0.2.53.dist-info}/WHEEL +0 -0
@@ -71,7 +71,7 @@ This is the documentation for the functions you have access to. You may call any
|
|
71
71
|
1. **Understand and Clarify**: Make sure you understand the task.
|
72
72
|
2. **Algorithm/Method Selection**: Decide on the most efficient way.
|
73
73
|
3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
|
74
|
-
4. **Code Generation**: Translate your pseudocode into executable Python code. Ensure you use correct arguments, remember coordinates are always returned normalized from `vision_agent.tools`.
|
74
|
+
4. **Code Generation**: Translate your pseudocode into executable Python code. Ensure you use correct arguments, remember coordinates are always returned normalized from `vision_agent.tools`. All images from `vision_agent.tools` are in RGB format, red is (255, 0, 0) and blue is (0, 0, 255).
|
75
75
|
5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools import *`. Use a debug flag in the function parameters to toggle logging on and off.
|
76
76
|
"""
|
77
77
|
|
vision_agent/llm/llm.py
CHANGED
@@ -148,7 +148,7 @@ class OpenAILLM(LLM):
|
|
148
148
|
class AzureOpenAILLM(OpenAILLM):
|
149
149
|
def __init__(
|
150
150
|
self,
|
151
|
-
model_name: str = "gpt-
|
151
|
+
model_name: str = "gpt-4o",
|
152
152
|
api_key: Optional[str] = None,
|
153
153
|
api_version: str = "2024-02-01",
|
154
154
|
azure_endpoint: Optional[str] = None,
|
vision_agent/lmm/lmm.py
CHANGED
@@ -286,11 +286,12 @@ class OpenAILMM(LMM):
|
|
286
286
|
class AzureOpenAILMM(OpenAILMM):
|
287
287
|
def __init__(
|
288
288
|
self,
|
289
|
-
model_name: str = "gpt-
|
289
|
+
model_name: str = "gpt-4o",
|
290
290
|
api_key: Optional[str] = None,
|
291
291
|
api_version: str = "2024-02-01",
|
292
292
|
azure_endpoint: Optional[str] = None,
|
293
293
|
max_tokens: int = 1024,
|
294
|
+
json_mode: bool = False,
|
294
295
|
**kwargs: Any,
|
295
296
|
):
|
296
297
|
if not api_key:
|
@@ -307,7 +308,11 @@ class AzureOpenAILMM(OpenAILMM):
|
|
307
308
|
api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
|
308
309
|
)
|
309
310
|
self.model_name = model_name
|
310
|
-
|
311
|
+
|
312
|
+
if "max_tokens" not in kwargs:
|
313
|
+
kwargs["max_tokens"] = max_tokens
|
314
|
+
if json_mode:
|
315
|
+
kwargs["response_format"] = {"type": "json_object"}
|
311
316
|
self.kwargs = kwargs
|
312
317
|
|
313
318
|
|
vision_agent/tools/tools.py
CHANGED
@@ -179,7 +179,7 @@ def extract_frames(
|
|
179
179
|
) -> List[Tuple[np.ndarray, float]]:
|
180
180
|
"""'extract_frames' extracts frames from a video, returns a list of tuples (frame,
|
181
181
|
timestamp), where timestamp is the relative time in seconds where the frame was
|
182
|
-
captured. The frame is a
|
182
|
+
captured. The frame is a numpy array.
|
183
183
|
|
184
184
|
Parameters:
|
185
185
|
video_uri (Union[str, Path]): The path to the video file.
|
@@ -530,27 +530,22 @@ def load_image(image_path: str) -> np.ndarray:
|
|
530
530
|
return np.array(image)
|
531
531
|
|
532
532
|
|
533
|
-
def save_image(image: np.ndarray) ->
|
534
|
-
"""'save_image' is a utility function that saves an image
|
533
|
+
def save_image(image: np.ndarray, file_path: str) -> None:
|
534
|
+
"""'save_image' is a utility function that saves an image to a file path.
|
535
535
|
|
536
536
|
Parameters:
|
537
537
|
image (np.ndarray): The image to save.
|
538
|
-
|
539
|
-
Returns:
|
540
|
-
str: The path to the saved image.
|
538
|
+
file_path (str): The path to save the image file.
|
541
539
|
|
542
540
|
Example
|
543
541
|
-------
|
544
542
|
>>> save_image(image)
|
545
|
-
"/tmp/tmpabc123.png"
|
546
543
|
"""
|
547
544
|
from IPython.display import display
|
548
545
|
|
549
546
|
pil_image = Image.fromarray(image.astype(np.uint8))
|
550
547
|
display(pil_image)
|
551
|
-
|
552
|
-
pil_image.save(f, "PNG")
|
553
|
-
return f.name
|
548
|
+
pil_image.save(file_path)
|
554
549
|
|
555
550
|
|
556
551
|
def save_video(
|
vision_agent/utils/__init__.py
CHANGED
vision_agent/utils/sim.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
+
import os
|
1
2
|
from pathlib import Path
|
2
3
|
from typing import Dict, List, Optional, Sequence, Union
|
3
4
|
|
4
5
|
import numpy as np
|
5
6
|
import pandas as pd
|
6
|
-
from openai import Client
|
7
|
+
from openai import AzureOpenAI, Client, OpenAI
|
7
8
|
from scipy.spatial.distance import cosine # type: ignore
|
8
9
|
|
9
10
|
|
@@ -33,9 +34,9 @@ class Sim:
|
|
33
34
|
"""
|
34
35
|
self.df = df
|
35
36
|
if not api_key:
|
36
|
-
self.client =
|
37
|
+
self.client = OpenAI()
|
37
38
|
else:
|
38
|
-
self.client =
|
39
|
+
self.client = OpenAI(api_key=api_key)
|
39
40
|
|
40
41
|
self.model = model
|
41
42
|
if "embs" not in df.columns and sim_key is None:
|
@@ -78,6 +79,41 @@ class Sim:
|
|
78
79
|
return res[[c for c in res.columns if c != "embs"]].to_dict(orient="records")
|
79
80
|
|
80
81
|
|
82
|
+
class AzureSim(Sim):
|
83
|
+
def __init__(
|
84
|
+
self,
|
85
|
+
df: pd.DataFrame,
|
86
|
+
sim_key: Optional[str] = None,
|
87
|
+
api_key: Optional[str] = None,
|
88
|
+
api_version: str = "2024-02-01",
|
89
|
+
azure_endpoint: Optional[str] = None,
|
90
|
+
model: str = "text-embedding-3-small",
|
91
|
+
) -> None:
|
92
|
+
if not api_key:
|
93
|
+
api_key = os.getenv("AZURE_OPENAI_API_KEY")
|
94
|
+
if not azure_endpoint:
|
95
|
+
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
|
96
|
+
|
97
|
+
if not api_key:
|
98
|
+
raise ValueError("Azure OpenAI API key is required.")
|
99
|
+
if not azure_endpoint:
|
100
|
+
raise ValueError("Azure OpenAI endpoint is required.")
|
101
|
+
|
102
|
+
self.df = df
|
103
|
+
self.client = AzureOpenAI(
|
104
|
+
api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
|
105
|
+
)
|
106
|
+
|
107
|
+
self.model = model
|
108
|
+
if "embs" not in df.columns and sim_key is None:
|
109
|
+
raise ValueError("key is required if no column 'embs' is present.")
|
110
|
+
|
111
|
+
if sim_key is not None:
|
112
|
+
self.df["embs"] = self.df[sim_key].apply(
|
113
|
+
lambda x: get_embedding(self.client, x, model=self.model)
|
114
|
+
)
|
115
|
+
|
116
|
+
|
81
117
|
def merge_sim(sim1: Sim, sim2: Sim) -> Sim:
|
82
118
|
return Sim(pd.concat([sim1.df, sim2.df], ignore_index=True))
|
83
119
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.53
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -195,13 +195,14 @@ export AZURE_OPENAI_ENDPOINT="your-endpoint"
|
|
195
195
|
You can then run Vision Agent using the Azure OpenAI models:
|
196
196
|
|
197
197
|
```python
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
198
|
+
import vision_agent as va
|
199
|
+
import vision_agent.tools as T
|
200
|
+
agent = va.agent.VisionAgent(
|
201
|
+
planner=va.llm.AzureOpenAILLM(),
|
202
|
+
coder=va.lmm.AzureOpenAILLM(),
|
203
|
+
tester=va.lmm.AzureOpenAILLM(),
|
204
|
+
debugger=va.lmm.AzureOpenAILLM(),
|
205
|
+
tool_recommender=va.utils.AzureSim(T.TOOLS_DF, sim_key="desc"),
|
206
|
+
)
|
205
207
|
```
|
206
208
|
|
207
|
-
|
@@ -12,25 +12,25 @@ vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47b
|
|
12
12
|
vision_agent/agent/reflexion.py,sha256=scck3YcME6DhX5Vs4Wr1rYb8S4wkBUkN9UksyazfrZg,10506
|
13
13
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
14
14
|
vision_agent/agent/vision_agent.py,sha256=wGGISg6pDVNseF2fIAN1jH66OX2qZk2nDhuobeSNGHk,20957
|
15
|
-
vision_agent/agent/vision_agent_prompts.py,sha256=
|
15
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=9QVQA-YTSHhYHYbxiqCWFVBHIa6uV4WF0z6599mV_Oc,8470
|
16
16
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
18
18
|
vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
|
19
|
-
vision_agent/llm/llm.py,sha256=
|
19
|
+
vision_agent/llm/llm.py,sha256=jElloDcvNp00e4LJp3jelwa34CwVFzQ_SfOcGr0omK8,5938
|
20
20
|
vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
|
21
|
-
vision_agent/lmm/lmm.py,sha256=
|
21
|
+
vision_agent/lmm/lmm.py,sha256=tvBkG3Ot9l4O7lysRLlh7PyYg_p5ufMcEC0bm_UrnUM,10776
|
22
22
|
vision_agent/tools/__init__.py,sha256=Sng6dChynJJCYWjraXXM0tep_VPdnYl3L9vb0HMy_Pc,1528
|
23
23
|
vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
|
24
24
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
25
25
|
vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
|
26
|
-
vision_agent/tools/tools.py,sha256=
|
27
|
-
vision_agent/utils/__init__.py,sha256=
|
26
|
+
vision_agent/tools/tools.py,sha256=aEph9ikQklqKzz18jgjO7eC77VqmkJCYEZ8DqtpihYg,26944
|
27
|
+
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
28
28
|
vision_agent/utils/execute.py,sha256=GqoAodxtwTPBr1nujPTsWiZO2rBGvWVXTe8lgxY4d_g,20603
|
29
29
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
30
|
-
vision_agent/utils/sim.py,sha256=
|
30
|
+
vision_agent/utils/sim.py,sha256=rGRGnjsy91IOn8qzt7k04PIRj5jyiaQyYAQl7ossPt8,4195
|
31
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
32
32
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
36
|
-
vision_agent-0.2.
|
33
|
+
vision_agent-0.2.53.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.53.dist-info/METADATA,sha256=0p9P33esnEQAWlqOsXC9YLxcR1R5E0oKURtWZ40hf_U,6887
|
35
|
+
vision_agent-0.2.53.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.53.dist-info/RECORD,,
|
File without changes
|
File without changes
|