vision-agent 0.0.53__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -476,7 +476,7 @@ class VisionAgent(Agent):
476
476
  reflections += "\n" + reflection
477
477
  # '<END>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
478
478
  self.log_progress(
479
- f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</<ANSWER>"
479
+ f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
480
480
  )
481
481
 
482
482
  if visualize_output:
@@ -12,8 +12,11 @@ from PIL.Image import Image as ImageType
12
12
 
13
13
  from vision_agent.image_utils import convert_to_b64, get_image_size
14
14
  from vision_agent.tools.video import extract_frames_from_video
15
+ from vision_agent.type_defs import LandingaiAPIKey
15
16
 
16
17
  _LOGGER = logging.getLogger(__name__)
18
+ _LND_API_KEY = LandingaiAPIKey().api_key
19
+ _LND_API_URL = "https://api.dev.landing.ai/v1/agent"
17
20
 
18
21
 
19
22
  def normalize_bbox(
@@ -80,8 +83,6 @@ class CLIP(Tool):
80
83
  [{"labels": ["red line", "yellow dot"], "scores": [0.98, 0.02]}]
81
84
  """
82
85
 
83
- _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
84
-
85
86
  name = "clip_"
86
87
  description = "'clip_' is a tool that can classify any image given a set of input names or tags. It returns a list of the input names along with their probability scores."
87
88
  usage = {
@@ -125,23 +126,9 @@ class CLIP(Tool):
125
126
  "image": image_b64,
126
127
  "tool": "closed_set_image_classification",
127
128
  }
128
- res = requests.post(
129
- self._ENDPOINT,
130
- headers={"Content-Type": "application/json"},
131
- json=data,
132
- )
133
- resp_json: Dict[str, Any] = res.json()
134
- if (
135
- "statusCode" in resp_json and resp_json["statusCode"] != 200
136
- ) or "statusCode" not in resp_json:
137
- _LOGGER.error(f"Request failed: {resp_json}")
138
- raise ValueError(f"Request failed: {resp_json}")
139
-
140
- resp_json["data"]["scores"] = [
141
- round(prob, 4) for prob in resp_json["data"]["scores"]
142
- ]
143
-
144
- return resp_json["data"] # type: ignore
129
+ resp_data = _send_inference_request(data, "tools")
130
+ resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
131
+ return resp_data
145
132
 
146
133
 
147
134
  class ImageCaption(Tool):
@@ -156,8 +143,6 @@ class ImageCaption(Tool):
156
143
  {'text': ['a box of orange and white socks']}
157
144
  """
158
145
 
159
- _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
160
-
161
146
  name = "image_caption_"
162
147
  description = "'image_caption_' is a tool that can caption an image based on its contents or tags. It returns a text describing the image"
163
148
  usage = {
@@ -197,19 +182,7 @@ class ImageCaption(Tool):
197
182
  "image": image_b64,
198
183
  "tool": "image_captioning",
199
184
  }
200
- res = requests.post(
201
- self._ENDPOINT,
202
- headers={"Content-Type": "application/json"},
203
- json=data,
204
- )
205
- resp_json: Dict[str, Any] = res.json()
206
- if (
207
- "statusCode" in resp_json and resp_json["statusCode"] != 200
208
- ) or "statusCode" not in resp_json:
209
- _LOGGER.error(f"Request failed: {resp_json}")
210
- raise ValueError(f"Request failed: {resp_json}")
211
-
212
- return resp_json["data"] # type: ignore
185
+ return _send_inference_request(data, "tools")
213
186
 
214
187
 
215
188
  class GroundingDINO(Tool):
@@ -226,8 +199,6 @@ class GroundingDINO(Tool):
226
199
  'scores': [0.98, 0.02]}]
227
200
  """
228
201
 
229
- _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
230
-
231
202
  name = "grounding_dino_"
232
203
  description = "'grounding_dino_' is a tool that can detect arbitrary objects with inputs such as category names or referring expressions. It returns a list of bounding boxes, label names and associated probability scores."
233
204
  usage = {
@@ -290,24 +261,13 @@ class GroundingDINO(Tool):
290
261
  "tool": "visual_grounding",
291
262
  "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
292
263
  }
293
- res = requests.post(
294
- self._ENDPOINT,
295
- headers={"Content-Type": "application/json"},
296
- json=request_data,
297
- )
298
- resp_json: Dict[str, Any] = res.json()
299
- if (
300
- "statusCode" in resp_json and resp_json["statusCode"] != 200
301
- ) or "statusCode" not in resp_json:
302
- _LOGGER.error(f"Request failed: {resp_json}")
303
- raise ValueError(f"Request failed: {resp_json}")
304
- data: Dict[str, Any] = resp_json["data"]
264
+ data: Dict[str, Any] = _send_inference_request(request_data, "tools")
305
265
  if "bboxes" in data:
306
266
  data["bboxes"] = [normalize_bbox(box, image_size) for box in data["bboxes"]]
307
267
  if "scores" in data:
308
268
  data["scores"] = [round(score, 2) for score in data["scores"]]
309
269
  if "labels" in data:
310
- data["labels"] = [label for label in data["labels"]]
270
+ data["labels"] = list(data["labels"])
311
271
  data["size"] = (image_size[1], image_size[0])
312
272
  return data
313
273
 
@@ -335,8 +295,6 @@ class GroundingSAM(Tool):
335
295
  [1, 1, 1, ..., 1, 1, 1]], dtype=uint8)]}]
336
296
  """
337
297
 
338
- _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
339
-
340
298
  name = "grounding_sam_"
341
299
  description = "'grounding_sam_' is a tool that can detect arbitrary objects with inputs such as category names or referring expressions. It returns a list of bounding boxes, label names and masks file names and associated probability scores."
342
300
  usage = {
@@ -399,18 +357,7 @@ class GroundingSAM(Tool):
399
357
  "tool": "visual_grounding_segment",
400
358
  "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
401
359
  }
402
- res = requests.post(
403
- self._ENDPOINT,
404
- headers={"Content-Type": "application/json"},
405
- json=request_data,
406
- )
407
- resp_json: Dict[str, Any] = res.json()
408
- if (
409
- "statusCode" in resp_json and resp_json["statusCode"] != 200
410
- ) or "statusCode" not in resp_json:
411
- _LOGGER.error(f"Request failed: {resp_json}")
412
- raise ValueError(f"Request failed: {resp_json}")
413
- data: Dict[str, Any] = resp_json["data"]
360
+ data: Dict[str, Any] = _send_inference_request(request_data, "tools")
414
361
  ret_pred: Dict[str, List] = {"labels": [], "bboxes": [], "masks": []}
415
362
  if "bboxes" in data:
416
363
  ret_pred["bboxes"] = [
@@ -714,3 +661,20 @@ TOOLS = {
714
661
  )
715
662
  if (hasattr(c, "name") and hasattr(c, "description") and hasattr(c, "usage"))
716
663
  }
664
+
665
+
666
+ def _send_inference_request(
667
+ payload: Dict[str, Any], endpoint_name: str
668
+ ) -> Dict[str, Any]:
669
+ res = requests.post(
670
+ f"{_LND_API_URL}/model/{endpoint_name}",
671
+ headers={
672
+ "Content-Type": "application/json",
673
+ "apikey": _LND_API_KEY,
674
+ },
675
+ json=payload,
676
+ )
677
+ if res.status_code != 200:
678
+ _LOGGER.error(f"Request failed: {res.text}")
679
+ raise ValueError(f"Request failed: {res.text}")
680
+ return res.json()["data"] # type: ignore
@@ -0,0 +1,48 @@
1
+ from pydantic import Field, field_validator
2
+ from pydantic_settings import BaseSettings
3
+
4
+
5
+ class LandingaiAPIKey(BaseSettings):
6
+ """The API key of a user in a particular organization in LandingLens.
7
+ It supports loading from environment variables or .env files.
8
+ The supported name of the environment variables are (case-insensitive):
9
+ - LANDINGAI_API_KEY
10
+
11
+ Environment variables will always take priority over values loaded from a dotenv file.
12
+ """
13
+
14
+ api_key: str = Field(
15
+ default="land_sk_hw34v3tyEc35OAhP8F7hnGnrDv2C8hD2ycMyq0aMkVS1H40D22",
16
+ alias="LANDINGAI_API_KEY",
17
+ description="The API key of LandingAI.",
18
+ )
19
+
20
+ @field_validator("api_key")
21
+ @classmethod
22
+ def is_api_key_valid(cls, key: str) -> str:
23
+ """Check if the API key is a v2 key."""
24
+ if not key:
25
+ raise InvalidApiKeyError(f"LandingAI API key is required, but it's {key}")
26
+ if not key.startswith("land_sk_"):
27
+ raise InvalidApiKeyError(
28
+ f"LandingAI API key (v2) must start with 'land_sk_' prefix, but it's {key}. See https://support.landing.ai/docs/api-key for more information."
29
+ )
30
+ return key
31
+
32
+ class Config:
33
+ env_file = ".env"
34
+ env_prefix = "landingai_"
35
+ case_sensitive = False
36
+ extra = "ignore"
37
+
38
+
39
+ class InvalidApiKeyError(Exception):
40
+ """Exception raised when the an invalid API key is provided. This error could be raised from any SDK code, not limited to a HTTP client."""
41
+
42
+ def __init__(self, message: str):
43
+ self.message = f"""{message}
44
+ For more information, see https://landing-ai.github.io/landingai-python/landingai.html#manage-api-credentials"""
45
+ super().__init__(self.message)
46
+
47
+ def __str__(self) -> str:
48
+ return self.message
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.0.53
3
+ Version: 0.1.1
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -16,6 +16,7 @@ Requires-Dist: openai (>=1.0.0,<2.0.0)
16
16
  Requires-Dist: opencv-python-headless (>=4.0.0,<5.0.0)
17
17
  Requires-Dist: pandas (>=2.0.0,<3.0.0)
18
18
  Requires-Dist: pillow (>=10.0.0,<11.0.0)
19
+ Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
19
20
  Requires-Dist: requests (>=2.0.0,<3.0.0)
20
21
  Requires-Dist: sentence-transformers (>=2.0.0,<3.0.0)
21
22
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
@@ -5,7 +5,7 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
5
5
  vision_agent/agent/easytool_prompts.py,sha256=dYzWa_RaiaFSQ-CowoQOcFmjZtBTTljRyA809bLgrvU,4519
6
6
  vision_agent/agent/reflexion.py,sha256=wzpptfALNZIh9Q5jgkK3imGL5LWjTW_n_Ypsvxdh07Q,10101
7
7
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
8
- vision_agent/agent/vision_agent.py,sha256=UV7_mqejfF4B-AqqmETqWvfiPvRcjfq-0nlNfeo_RxM,19765
8
+ vision_agent/agent/vision_agent.py,sha256=nHmfr-OuMfdH0N8gECXLzTAgRmTx9cYe5_pnQj-HnBE,19764
9
9
  vision_agent/agent/vision_agent_prompts.py,sha256=dPg0mLVK_fGJpYK2xXGhm-zuXX1KVZW_zFXyYsspUz8,6567
10
10
  vision_agent/data/__init__.py,sha256=YU-5g3LbEQ6a4drz0RLGTagXMVU2Z4Xr3RlfWE-R0jU,46
11
11
  vision_agent/data/data.py,sha256=pgtSGZdAnbQ8oGsuapLtFTMPajnCGDGekEXTnFuBwsY,5122
@@ -20,9 +20,10 @@ vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,
20
20
  vision_agent/lmm/lmm.py,sha256=LxwxCArp7DfnPbjf_Gl55xBxPwo2Qx8eDp1gCnGYSO0,9535
21
21
  vision_agent/tools/__init__.py,sha256=OEqEysxm5wnnOD73NKNCUggALB72GEmVg9FNsEkSBtA,253
22
22
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
23
- vision_agent/tools/tools.py,sha256=WPqLHw8D0tkaP2LFYo6cBithP4q0vb6Bve4Nv577Prk,27045
23
+ vision_agent/tools/tools.py,sha256=Qsqe8X6VjB0EMWhyKJ5EMPyLIc_d5Vtlw4ugV2FB_Ks,25589
24
24
  vision_agent/tools/video.py,sha256=40rscP8YvKN3lhZ4PDcOK4XbdFX2duCRpHY_krmBYKU,7476
25
- vision_agent-0.0.53.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
26
- vision_agent-0.0.53.dist-info/METADATA,sha256=ybezBW-LYFhlCovdbKNq6iC93mb0wZNOQ29HD30OPz4,6184
27
- vision_agent-0.0.53.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
28
- vision_agent-0.0.53.dist-info/RECORD,,
25
+ vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
26
+ vision_agent-0.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
+ vision_agent-0.1.1.dist-info/METADATA,sha256=rWMocnnZwuRhd3xIGyQUzDbsndVASBSu2jvAqt-3Odc,6233
28
+ vision_agent-0.1.1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
29
+ vision_agent-0.1.1.dist-info/RECORD,,