vision-agent 0.2.70__py3-none-any.whl → 0.2.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/tool_utils.py +38 -5
- vision_agent/tools/tools.py +11 -11
- {vision_agent-0.2.70.dist-info → vision_agent-0.2.71.dist-info}/METADATA +1 -1
- {vision_agent-0.2.70.dist-info → vision_agent-0.2.71.dist-info}/RECORD +6 -6
- {vision_agent-0.2.70.dist-info → vision_agent-0.2.71.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.70.dist-info → vision_agent-0.2.71.dist-info}/WHEEL +0 -0
vision_agent/tools/tool_utils.py
CHANGED
@@ -3,6 +3,9 @@ import os
|
|
3
3
|
from typing import Any, Dict
|
4
4
|
|
5
5
|
import requests
|
6
|
+
from requests import Session
|
7
|
+
from requests.adapters import HTTPAdapter
|
8
|
+
from urllib3.util.retry import Retry
|
6
9
|
|
7
10
|
from vision_agent.utils.type_defs import LandingaiAPIKey
|
8
11
|
|
@@ -11,20 +14,50 @@ _LND_API_KEY = LandingaiAPIKey().api_key
|
|
11
14
|
_LND_API_URL = "https://api.staging.landing.ai/v1/agent"
|
12
15
|
|
13
16
|
|
14
|
-
def
|
17
|
+
def send_inference_request(
|
15
18
|
payload: Dict[str, Any], endpoint_name: str
|
16
19
|
) -> Dict[str, Any]:
|
17
20
|
if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
|
18
21
|
payload["runtime_tag"] = runtime_tag
|
19
|
-
|
20
|
-
|
22
|
+
url = f"{_LND_API_URL}/model/{endpoint_name}"
|
23
|
+
session = _create_requests_session(
|
24
|
+
url=url,
|
25
|
+
num_retry=3,
|
21
26
|
headers={
|
22
27
|
"Content-Type": "application/json",
|
23
28
|
"apikey": _LND_API_KEY,
|
24
29
|
},
|
30
|
+
)
|
31
|
+
res = requests.post(
|
32
|
+
f"{_LND_API_URL}/model/{endpoint_name}",
|
25
33
|
json=payload,
|
26
34
|
)
|
35
|
+
res = session.post(url, json=payload)
|
27
36
|
if res.status_code != 200:
|
28
|
-
_LOGGER.error(f"Request failed: {res.text}")
|
29
|
-
raise ValueError(f"Request failed: {res.text}")
|
37
|
+
_LOGGER.error(f"Request failed: {res.status_code} {res.text}")
|
38
|
+
raise ValueError(f"Request failed: {res.status_code} {res.text}")
|
30
39
|
return res.json()["data"] # type: ignore
|
40
|
+
|
41
|
+
|
42
|
+
def _create_requests_session(
|
43
|
+
url: str, num_retry: int, headers: Dict[str, str]
|
44
|
+
) -> Session:
|
45
|
+
"""Create a requests session with retry"""
|
46
|
+
session = Session()
|
47
|
+
retries = Retry(
|
48
|
+
total=num_retry,
|
49
|
+
backoff_factor=2,
|
50
|
+
raise_on_redirect=True,
|
51
|
+
raise_on_status=False,
|
52
|
+
allowed_methods=["GET", "POST", "PUT"],
|
53
|
+
status_forcelist=[
|
54
|
+
408, # Request Timeout
|
55
|
+
429, # Too Many Requests (ie. rate limiter).
|
56
|
+
502, # Bad Gateway
|
57
|
+
503, # Service Unavailable (include cloud circuit breaker)
|
58
|
+
504, # Gateway Timeout
|
59
|
+
],
|
60
|
+
)
|
61
|
+
session.mount(url, HTTPAdapter(max_retries=retries if num_retry > 0 else 0))
|
62
|
+
session.headers.update(headers)
|
63
|
+
return session
|
vision_agent/tools/tools.py
CHANGED
@@ -15,7 +15,7 @@ from moviepy.editor import ImageSequenceClip
|
|
15
15
|
from PIL import Image, ImageDraw, ImageFont
|
16
16
|
from pillow_heif import register_heif_opener # type: ignore
|
17
17
|
|
18
|
-
from vision_agent.tools.tool_utils import
|
18
|
+
from vision_agent.tools.tool_utils import send_inference_request
|
19
19
|
from vision_agent.utils import extract_frames_from_video
|
20
20
|
from vision_agent.utils.execute import FileSerializer, MimeType
|
21
21
|
from vision_agent.utils.image_utils import (
|
@@ -105,7 +105,7 @@ def grounding_dino(
|
|
105
105
|
),
|
106
106
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
107
107
|
}
|
108
|
-
data: Dict[str, Any] =
|
108
|
+
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
109
109
|
return_data = []
|
110
110
|
for i in range(len(data["bboxes"])):
|
111
111
|
return_data.append(
|
@@ -161,7 +161,7 @@ def owl_v2(
|
|
161
161
|
"tool": "open_vocab_detection",
|
162
162
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
163
163
|
}
|
164
|
-
data: Dict[str, Any] =
|
164
|
+
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
165
165
|
return_data = []
|
166
166
|
for i in range(len(data["bboxes"])):
|
167
167
|
return_data.append(
|
@@ -225,7 +225,7 @@ def grounding_sam(
|
|
225
225
|
"tool": "visual_grounding_segment",
|
226
226
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
227
227
|
}
|
228
|
-
data: Dict[str, Any] =
|
228
|
+
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
229
229
|
return_data = []
|
230
230
|
for i in range(len(data["bboxes"])):
|
231
231
|
return_data.append(
|
@@ -341,7 +341,7 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
|
341
341
|
"image": image_b64,
|
342
342
|
"tool": "zero_shot_counting",
|
343
343
|
}
|
344
|
-
resp_data =
|
344
|
+
resp_data = send_inference_request(data, "tools")
|
345
345
|
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
|
346
346
|
return resp_data
|
347
347
|
|
@@ -376,7 +376,7 @@ def loca_visual_prompt_counting(
|
|
376
376
|
"prompt": bbox_str,
|
377
377
|
"tool": "few_shot_counting",
|
378
378
|
}
|
379
|
-
resp_data =
|
379
|
+
resp_data = send_inference_request(data, "tools")
|
380
380
|
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
|
381
381
|
return resp_data
|
382
382
|
|
@@ -407,7 +407,7 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
|
|
407
407
|
"tool": "image_question_answering",
|
408
408
|
}
|
409
409
|
|
410
|
-
answer =
|
410
|
+
answer = send_inference_request(data, "tools")
|
411
411
|
return answer["text"][0] # type: ignore
|
412
412
|
|
413
413
|
|
@@ -436,7 +436,7 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
|
436
436
|
"image": image_b64,
|
437
437
|
"tool": "closed_set_image_classification",
|
438
438
|
}
|
439
|
-
resp_data =
|
439
|
+
resp_data = send_inference_request(data, "tools")
|
440
440
|
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
|
441
441
|
return resp_data
|
442
442
|
|
@@ -463,7 +463,7 @@ def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
|
|
463
463
|
"image": image_b64,
|
464
464
|
"tool": "image_classification",
|
465
465
|
}
|
466
|
-
resp_data =
|
466
|
+
resp_data = send_inference_request(data, "tools")
|
467
467
|
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
|
468
468
|
return resp_data
|
469
469
|
|
@@ -490,7 +490,7 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
|
|
490
490
|
"image": image_b64,
|
491
491
|
"tool": "nsfw_image_classification",
|
492
492
|
}
|
493
|
-
resp_data =
|
493
|
+
resp_data = send_inference_request(data, "tools")
|
494
494
|
resp_data["scores"] = round(resp_data["scores"], 4)
|
495
495
|
return resp_data
|
496
496
|
|
@@ -517,7 +517,7 @@ def blip_image_caption(image: np.ndarray) -> str:
|
|
517
517
|
"tool": "image_captioning",
|
518
518
|
}
|
519
519
|
|
520
|
-
answer =
|
520
|
+
answer = send_inference_request(data, "tools")
|
521
521
|
return answer["text"][0] # type: ignore
|
522
522
|
|
523
523
|
|
@@ -9,15 +9,15 @@ vision_agent/lmm/__init__.py,sha256=bw24xyQJHGzmph5e-bKCiTh9AX6tRFI2OUd0mofxjZI,
|
|
9
9
|
vision_agent/lmm/lmm.py,sha256=V7jfU94HwA-SiQLY14USHrSGtagVKCNGjZhW5MyKipo,11547
|
10
10
|
vision_agent/tools/__init__.py,sha256=aE1O8cMeLDPO50Sc-CuAQ_Akh0viz7vBxDcVeZNqsA0,1604
|
11
11
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
12
|
-
vision_agent/tools/tool_utils.py,sha256=
|
13
|
-
vision_agent/tools/tools.py,sha256=
|
12
|
+
vision_agent/tools/tool_utils.py,sha256=ELcCD05xmER9KRN1lt5VbI31Kyvp-8G-ERSwSleT3Ic,1956
|
13
|
+
vision_agent/tools/tools.py,sha256=TaDZIvYsYNleqDsETfoZiPWPBZjyimXhudLdFZ5NsLE,31386
|
14
14
|
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
15
15
|
vision_agent/utils/execute.py,sha256=QImS69SN00logF-E68aNpT7YsJVRQOhZYlNLmCNEfro,21337
|
16
16
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
17
17
|
vision_agent/utils/sim.py,sha256=ci6Eta73dDgLP1Ajtknbgmf1g8aAvBHqlVQvBuLMKXQ,4427
|
18
18
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
19
19
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
20
|
-
vision_agent-0.2.
|
21
|
-
vision_agent-0.2.
|
22
|
-
vision_agent-0.2.
|
23
|
-
vision_agent-0.2.
|
20
|
+
vision_agent-0.2.71.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
21
|
+
vision_agent-0.2.71.dist-info/METADATA,sha256=rC9uL4uJOBKkZf2l3RdAGm2vjEKPEeYYYKBBP03nPdY,9394
|
22
|
+
vision_agent-0.2.71.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
23
|
+
vision_agent-0.2.71.dist-info/RECORD,,
|
File without changes
|
File without changes
|