vision-agent 0.0.53__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +1 -1
- vision_agent/tools/tools.py +27 -63
- vision_agent/type_defs.py +48 -0
- {vision_agent-0.0.53.dist-info → vision_agent-0.1.1.dist-info}/METADATA +2 -1
- {vision_agent-0.0.53.dist-info → vision_agent-0.1.1.dist-info}/RECORD +7 -6
- {vision_agent-0.0.53.dist-info → vision_agent-0.1.1.dist-info}/LICENSE +0 -0
- {vision_agent-0.0.53.dist-info → vision_agent-0.1.1.dist-info}/WHEEL +0 -0
@@ -476,7 +476,7 @@ class VisionAgent(Agent):
|
|
476
476
|
reflections += "\n" + reflection
|
477
477
|
# '<END>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
|
478
478
|
self.log_progress(
|
479
|
-
f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}
|
479
|
+
f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
|
480
480
|
)
|
481
481
|
|
482
482
|
if visualize_output:
|
vision_agent/tools/tools.py
CHANGED
@@ -12,8 +12,11 @@ from PIL.Image import Image as ImageType
|
|
12
12
|
|
13
13
|
from vision_agent.image_utils import convert_to_b64, get_image_size
|
14
14
|
from vision_agent.tools.video import extract_frames_from_video
|
15
|
+
from vision_agent.type_defs import LandingaiAPIKey
|
15
16
|
|
16
17
|
_LOGGER = logging.getLogger(__name__)
|
18
|
+
_LND_API_KEY = LandingaiAPIKey().api_key
|
19
|
+
_LND_API_URL = "https://api.dev.landing.ai/v1/agent"
|
17
20
|
|
18
21
|
|
19
22
|
def normalize_bbox(
|
@@ -80,8 +83,6 @@ class CLIP(Tool):
|
|
80
83
|
[{"labels": ["red line", "yellow dot"], "scores": [0.98, 0.02]}]
|
81
84
|
"""
|
82
85
|
|
83
|
-
_ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
|
84
|
-
|
85
86
|
name = "clip_"
|
86
87
|
description = "'clip_' is a tool that can classify any image given a set of input names or tags. It returns a list of the input names along with their probability scores."
|
87
88
|
usage = {
|
@@ -125,23 +126,9 @@ class CLIP(Tool):
|
|
125
126
|
"image": image_b64,
|
126
127
|
"tool": "closed_set_image_classification",
|
127
128
|
}
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
json=data,
|
132
|
-
)
|
133
|
-
resp_json: Dict[str, Any] = res.json()
|
134
|
-
if (
|
135
|
-
"statusCode" in resp_json and resp_json["statusCode"] != 200
|
136
|
-
) or "statusCode" not in resp_json:
|
137
|
-
_LOGGER.error(f"Request failed: {resp_json}")
|
138
|
-
raise ValueError(f"Request failed: {resp_json}")
|
139
|
-
|
140
|
-
resp_json["data"]["scores"] = [
|
141
|
-
round(prob, 4) for prob in resp_json["data"]["scores"]
|
142
|
-
]
|
143
|
-
|
144
|
-
return resp_json["data"] # type: ignore
|
129
|
+
resp_data = _send_inference_request(data, "tools")
|
130
|
+
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
|
131
|
+
return resp_data
|
145
132
|
|
146
133
|
|
147
134
|
class ImageCaption(Tool):
|
@@ -156,8 +143,6 @@ class ImageCaption(Tool):
|
|
156
143
|
{'text': ['a box of orange and white socks']}
|
157
144
|
"""
|
158
145
|
|
159
|
-
_ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
|
160
|
-
|
161
146
|
name = "image_caption_"
|
162
147
|
description = "'image_caption_' is a tool that can caption an image based on its contents or tags. It returns a text describing the image"
|
163
148
|
usage = {
|
@@ -197,19 +182,7 @@ class ImageCaption(Tool):
|
|
197
182
|
"image": image_b64,
|
198
183
|
"tool": "image_captioning",
|
199
184
|
}
|
200
|
-
|
201
|
-
self._ENDPOINT,
|
202
|
-
headers={"Content-Type": "application/json"},
|
203
|
-
json=data,
|
204
|
-
)
|
205
|
-
resp_json: Dict[str, Any] = res.json()
|
206
|
-
if (
|
207
|
-
"statusCode" in resp_json and resp_json["statusCode"] != 200
|
208
|
-
) or "statusCode" not in resp_json:
|
209
|
-
_LOGGER.error(f"Request failed: {resp_json}")
|
210
|
-
raise ValueError(f"Request failed: {resp_json}")
|
211
|
-
|
212
|
-
return resp_json["data"] # type: ignore
|
185
|
+
return _send_inference_request(data, "tools")
|
213
186
|
|
214
187
|
|
215
188
|
class GroundingDINO(Tool):
|
@@ -226,8 +199,6 @@ class GroundingDINO(Tool):
|
|
226
199
|
'scores': [0.98, 0.02]}]
|
227
200
|
"""
|
228
201
|
|
229
|
-
_ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
|
230
|
-
|
231
202
|
name = "grounding_dino_"
|
232
203
|
description = "'grounding_dino_' is a tool that can detect arbitrary objects with inputs such as category names or referring expressions. It returns a list of bounding boxes, label names and associated probability scores."
|
233
204
|
usage = {
|
@@ -290,24 +261,13 @@ class GroundingDINO(Tool):
|
|
290
261
|
"tool": "visual_grounding",
|
291
262
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
292
263
|
}
|
293
|
-
|
294
|
-
self._ENDPOINT,
|
295
|
-
headers={"Content-Type": "application/json"},
|
296
|
-
json=request_data,
|
297
|
-
)
|
298
|
-
resp_json: Dict[str, Any] = res.json()
|
299
|
-
if (
|
300
|
-
"statusCode" in resp_json and resp_json["statusCode"] != 200
|
301
|
-
) or "statusCode" not in resp_json:
|
302
|
-
_LOGGER.error(f"Request failed: {resp_json}")
|
303
|
-
raise ValueError(f"Request failed: {resp_json}")
|
304
|
-
data: Dict[str, Any] = resp_json["data"]
|
264
|
+
data: Dict[str, Any] = _send_inference_request(request_data, "tools")
|
305
265
|
if "bboxes" in data:
|
306
266
|
data["bboxes"] = [normalize_bbox(box, image_size) for box in data["bboxes"]]
|
307
267
|
if "scores" in data:
|
308
268
|
data["scores"] = [round(score, 2) for score in data["scores"]]
|
309
269
|
if "labels" in data:
|
310
|
-
data["labels"] =
|
270
|
+
data["labels"] = list(data["labels"])
|
311
271
|
data["size"] = (image_size[1], image_size[0])
|
312
272
|
return data
|
313
273
|
|
@@ -335,8 +295,6 @@ class GroundingSAM(Tool):
|
|
335
295
|
[1, 1, 1, ..., 1, 1, 1]], dtype=uint8)]}]
|
336
296
|
"""
|
337
297
|
|
338
|
-
_ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
|
339
|
-
|
340
298
|
name = "grounding_sam_"
|
341
299
|
description = "'grounding_sam_' is a tool that can detect arbitrary objects with inputs such as category names or referring expressions. It returns a list of bounding boxes, label names and masks file names and associated probability scores."
|
342
300
|
usage = {
|
@@ -399,18 +357,7 @@ class GroundingSAM(Tool):
|
|
399
357
|
"tool": "visual_grounding_segment",
|
400
358
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
401
359
|
}
|
402
|
-
|
403
|
-
self._ENDPOINT,
|
404
|
-
headers={"Content-Type": "application/json"},
|
405
|
-
json=request_data,
|
406
|
-
)
|
407
|
-
resp_json: Dict[str, Any] = res.json()
|
408
|
-
if (
|
409
|
-
"statusCode" in resp_json and resp_json["statusCode"] != 200
|
410
|
-
) or "statusCode" not in resp_json:
|
411
|
-
_LOGGER.error(f"Request failed: {resp_json}")
|
412
|
-
raise ValueError(f"Request failed: {resp_json}")
|
413
|
-
data: Dict[str, Any] = resp_json["data"]
|
360
|
+
data: Dict[str, Any] = _send_inference_request(request_data, "tools")
|
414
361
|
ret_pred: Dict[str, List] = {"labels": [], "bboxes": [], "masks": []}
|
415
362
|
if "bboxes" in data:
|
416
363
|
ret_pred["bboxes"] = [
|
@@ -714,3 +661,20 @@ TOOLS = {
|
|
714
661
|
)
|
715
662
|
if (hasattr(c, "name") and hasattr(c, "description") and hasattr(c, "usage"))
|
716
663
|
}
|
664
|
+
|
665
|
+
|
666
|
+
def _send_inference_request(
|
667
|
+
payload: Dict[str, Any], endpoint_name: str
|
668
|
+
) -> Dict[str, Any]:
|
669
|
+
res = requests.post(
|
670
|
+
f"{_LND_API_URL}/model/{endpoint_name}",
|
671
|
+
headers={
|
672
|
+
"Content-Type": "application/json",
|
673
|
+
"apikey": _LND_API_KEY,
|
674
|
+
},
|
675
|
+
json=payload,
|
676
|
+
)
|
677
|
+
if res.status_code != 200:
|
678
|
+
_LOGGER.error(f"Request failed: {res.text}")
|
679
|
+
raise ValueError(f"Request failed: {res.text}")
|
680
|
+
return res.json()["data"] # type: ignore
|
@@ -0,0 +1,48 @@
|
|
1
|
+
from pydantic import Field, field_validator
|
2
|
+
from pydantic_settings import BaseSettings
|
3
|
+
|
4
|
+
|
5
|
+
class LandingaiAPIKey(BaseSettings):
|
6
|
+
"""The API key of a user in a particular organization in LandingLens.
|
7
|
+
It supports loading from environment variables or .env files.
|
8
|
+
The supported name of the environment variables are (case-insensitive):
|
9
|
+
- LANDINGAI_API_KEY
|
10
|
+
|
11
|
+
Environment variables will always take priority over values loaded from a dotenv file.
|
12
|
+
"""
|
13
|
+
|
14
|
+
api_key: str = Field(
|
15
|
+
default="land_sk_hw34v3tyEc35OAhP8F7hnGnrDv2C8hD2ycMyq0aMkVS1H40D22",
|
16
|
+
alias="LANDINGAI_API_KEY",
|
17
|
+
description="The API key of LandingAI.",
|
18
|
+
)
|
19
|
+
|
20
|
+
@field_validator("api_key")
|
21
|
+
@classmethod
|
22
|
+
def is_api_key_valid(cls, key: str) -> str:
|
23
|
+
"""Check if the API key is a v2 key."""
|
24
|
+
if not key:
|
25
|
+
raise InvalidApiKeyError(f"LandingAI API key is required, but it's {key}")
|
26
|
+
if not key.startswith("land_sk_"):
|
27
|
+
raise InvalidApiKeyError(
|
28
|
+
f"LandingAI API key (v2) must start with 'land_sk_' prefix, but it's {key}. See https://support.landing.ai/docs/api-key for more information."
|
29
|
+
)
|
30
|
+
return key
|
31
|
+
|
32
|
+
class Config:
|
33
|
+
env_file = ".env"
|
34
|
+
env_prefix = "landingai_"
|
35
|
+
case_sensitive = False
|
36
|
+
extra = "ignore"
|
37
|
+
|
38
|
+
|
39
|
+
class InvalidApiKeyError(Exception):
|
40
|
+
"""Exception raised when the an invalid API key is provided. This error could be raised from any SDK code, not limited to a HTTP client."""
|
41
|
+
|
42
|
+
def __init__(self, message: str):
|
43
|
+
self.message = f"""{message}
|
44
|
+
For more information, see https://landing-ai.github.io/landingai-python/landingai.html#manage-api-credentials"""
|
45
|
+
super().__init__(self.message)
|
46
|
+
|
47
|
+
def __str__(self) -> str:
|
48
|
+
return self.message
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -16,6 +16,7 @@ Requires-Dist: openai (>=1.0.0,<2.0.0)
|
|
16
16
|
Requires-Dist: opencv-python-headless (>=4.0.0,<5.0.0)
|
17
17
|
Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
18
18
|
Requires-Dist: pillow (>=10.0.0,<11.0.0)
|
19
|
+
Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
|
19
20
|
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
20
21
|
Requires-Dist: sentence-transformers (>=2.0.0,<3.0.0)
|
21
22
|
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
@@ -5,7 +5,7 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
|
|
5
5
|
vision_agent/agent/easytool_prompts.py,sha256=dYzWa_RaiaFSQ-CowoQOcFmjZtBTTljRyA809bLgrvU,4519
|
6
6
|
vision_agent/agent/reflexion.py,sha256=wzpptfALNZIh9Q5jgkK3imGL5LWjTW_n_Ypsvxdh07Q,10101
|
7
7
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
8
|
-
vision_agent/agent/vision_agent.py,sha256=
|
8
|
+
vision_agent/agent/vision_agent.py,sha256=nHmfr-OuMfdH0N8gECXLzTAgRmTx9cYe5_pnQj-HnBE,19764
|
9
9
|
vision_agent/agent/vision_agent_prompts.py,sha256=dPg0mLVK_fGJpYK2xXGhm-zuXX1KVZW_zFXyYsspUz8,6567
|
10
10
|
vision_agent/data/__init__.py,sha256=YU-5g3LbEQ6a4drz0RLGTagXMVU2Z4Xr3RlfWE-R0jU,46
|
11
11
|
vision_agent/data/data.py,sha256=pgtSGZdAnbQ8oGsuapLtFTMPajnCGDGekEXTnFuBwsY,5122
|
@@ -20,9 +20,10 @@ vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,
|
|
20
20
|
vision_agent/lmm/lmm.py,sha256=LxwxCArp7DfnPbjf_Gl55xBxPwo2Qx8eDp1gCnGYSO0,9535
|
21
21
|
vision_agent/tools/__init__.py,sha256=OEqEysxm5wnnOD73NKNCUggALB72GEmVg9FNsEkSBtA,253
|
22
22
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
23
|
-
vision_agent/tools/tools.py,sha256=
|
23
|
+
vision_agent/tools/tools.py,sha256=Qsqe8X6VjB0EMWhyKJ5EMPyLIc_d5Vtlw4ugV2FB_Ks,25589
|
24
24
|
vision_agent/tools/video.py,sha256=40rscP8YvKN3lhZ4PDcOK4XbdFX2duCRpHY_krmBYKU,7476
|
25
|
-
vision_agent
|
26
|
-
vision_agent-0.
|
27
|
-
vision_agent-0.
|
28
|
-
vision_agent-0.
|
25
|
+
vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
|
26
|
+
vision_agent-0.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
27
|
+
vision_agent-0.1.1.dist-info/METADATA,sha256=rWMocnnZwuRhd3xIGyQUzDbsndVASBSu2jvAqt-3Odc,6233
|
28
|
+
vision_agent-0.1.1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
29
|
+
vision_agent-0.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|