vision-agent 0.2.70__tar.gz → 0.2.72__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {vision_agent-0.2.70 → vision_agent-0.2.72}/PKG-INFO +2 -2
  2. {vision_agent-0.2.70 → vision_agent-0.2.72}/README.md +1 -1
  3. {vision_agent-0.2.70 → vision_agent-0.2.72}/pyproject.toml +1 -1
  4. vision_agent-0.2.72/vision_agent/tools/tool_utils.py +63 -0
  5. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/tools/tools.py +11 -11
  6. vision_agent-0.2.70/vision_agent/tools/tool_utils.py +0 -30
  7. {vision_agent-0.2.70 → vision_agent-0.2.72}/LICENSE +0 -0
  8. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/__init__.py +0 -0
  9. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/agent/__init__.py +0 -0
  10. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/agent/agent.py +0 -0
  11. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/agent/vision_agent.py +0 -0
  12. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/agent/vision_agent_prompts.py +0 -0
  13. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/fonts/__init__.py +0 -0
  14. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  15. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/lmm/__init__.py +0 -0
  16. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/lmm/lmm.py +0 -0
  17. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/tools/__init__.py +0 -0
  18. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/tools/prompts.py +0 -0
  19. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/utils/__init__.py +0 -0
  20. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/utils/execute.py +0 -0
  21. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/utils/image_utils.py +0 -0
  22. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/utils/sim.py +0 -0
  23. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/utils/type_defs.py +0 -0
  24. {vision_agent-0.2.70 → vision_agent-0.2.72}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.70
3
+ Version: 0.2.72
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -78,7 +78,7 @@ export OPENAI_API_KEY="your-api-key"
78
78
  ```
79
79
 
80
80
  ### Important Note on API Usage
81
- Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#to-get-started-with-api-credits)
81
+ Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#how-to-get-started-with-openai-api-credits)
82
82
 
83
83
  ### Vision Agent
84
84
  #### Basic Usage
@@ -41,7 +41,7 @@ export OPENAI_API_KEY="your-api-key"
41
41
  ```
42
42
 
43
43
  ### Important Note on API Usage
44
- Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#to-get-started-with-api-credits)
44
+ Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#how-to-get-started-with-openai-api-credits)
45
45
 
46
46
  ### Vision Agent
47
47
  #### Basic Usage
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.70"
7
+ version = "0.2.72"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -0,0 +1,63 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, Dict
4
+
5
+ import requests
6
+ from requests import Session
7
+ from requests.adapters import HTTPAdapter
8
+ from urllib3.util.retry import Retry
9
+
10
+ from vision_agent.utils.type_defs import LandingaiAPIKey
11
+
12
+ _LOGGER = logging.getLogger(__name__)
13
+ _LND_API_KEY = LandingaiAPIKey().api_key
14
+ _LND_API_URL = "https://api.staging.landing.ai/v1/agent"
15
+
16
+
17
+ def send_inference_request(
18
+ payload: Dict[str, Any], endpoint_name: str
19
+ ) -> Dict[str, Any]:
20
+ if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
21
+ payload["runtime_tag"] = runtime_tag
22
+ url = f"{_LND_API_URL}/model/{endpoint_name}"
23
+ session = _create_requests_session(
24
+ url=url,
25
+ num_retry=3,
26
+ headers={
27
+ "Content-Type": "application/json",
28
+ "apikey": _LND_API_KEY,
29
+ },
30
+ )
31
+ res = requests.post(
32
+ f"{_LND_API_URL}/model/{endpoint_name}",
33
+ json=payload,
34
+ )
35
+ res = session.post(url, json=payload)
36
+ if res.status_code != 200:
37
+ _LOGGER.error(f"Request failed: {res.status_code} {res.text}")
38
+ raise ValueError(f"Request failed: {res.status_code} {res.text}")
39
+ return res.json()["data"] # type: ignore
40
+
41
+
42
+ def _create_requests_session(
43
+ url: str, num_retry: int, headers: Dict[str, str]
44
+ ) -> Session:
45
+ """Create a requests session with retry"""
46
+ session = Session()
47
+ retries = Retry(
48
+ total=num_retry,
49
+ backoff_factor=2,
50
+ raise_on_redirect=True,
51
+ raise_on_status=False,
52
+ allowed_methods=["GET", "POST", "PUT"],
53
+ status_forcelist=[
54
+ 408, # Request Timeout
55
+ 429, # Too Many Requests (ie. rate limiter).
56
+ 502, # Bad Gateway
57
+ 503, # Service Unavailable (include cloud circuit breaker)
58
+ 504, # Gateway Timeout
59
+ ],
60
+ )
61
+ session.mount(url, HTTPAdapter(max_retries=retries if num_retry > 0 else 0))
62
+ session.headers.update(headers)
63
+ return session
@@ -15,7 +15,7 @@ from moviepy.editor import ImageSequenceClip
15
15
  from PIL import Image, ImageDraw, ImageFont
16
16
  from pillow_heif import register_heif_opener # type: ignore
17
17
 
18
- from vision_agent.tools.tool_utils import _send_inference_request
18
+ from vision_agent.tools.tool_utils import send_inference_request
19
19
  from vision_agent.utils import extract_frames_from_video
20
20
  from vision_agent.utils.execute import FileSerializer, MimeType
21
21
  from vision_agent.utils.image_utils import (
@@ -105,7 +105,7 @@ def grounding_dino(
105
105
  ),
106
106
  "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
107
107
  }
108
- data: Dict[str, Any] = _send_inference_request(request_data, "tools")
108
+ data: Dict[str, Any] = send_inference_request(request_data, "tools")
109
109
  return_data = []
110
110
  for i in range(len(data["bboxes"])):
111
111
  return_data.append(
@@ -161,7 +161,7 @@ def owl_v2(
161
161
  "tool": "open_vocab_detection",
162
162
  "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
163
163
  }
164
- data: Dict[str, Any] = _send_inference_request(request_data, "tools")
164
+ data: Dict[str, Any] = send_inference_request(request_data, "tools")
165
165
  return_data = []
166
166
  for i in range(len(data["bboxes"])):
167
167
  return_data.append(
@@ -225,7 +225,7 @@ def grounding_sam(
225
225
  "tool": "visual_grounding_segment",
226
226
  "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
227
227
  }
228
- data: Dict[str, Any] = _send_inference_request(request_data, "tools")
228
+ data: Dict[str, Any] = send_inference_request(request_data, "tools")
229
229
  return_data = []
230
230
  for i in range(len(data["bboxes"])):
231
231
  return_data.append(
@@ -341,7 +341,7 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
341
341
  "image": image_b64,
342
342
  "tool": "zero_shot_counting",
343
343
  }
344
- resp_data = _send_inference_request(data, "tools")
344
+ resp_data = send_inference_request(data, "tools")
345
345
  resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
346
346
  return resp_data
347
347
 
@@ -376,7 +376,7 @@ def loca_visual_prompt_counting(
376
376
  "prompt": bbox_str,
377
377
  "tool": "few_shot_counting",
378
378
  }
379
- resp_data = _send_inference_request(data, "tools")
379
+ resp_data = send_inference_request(data, "tools")
380
380
  resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
381
381
  return resp_data
382
382
 
@@ -407,7 +407,7 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
407
407
  "tool": "image_question_answering",
408
408
  }
409
409
 
410
- answer = _send_inference_request(data, "tools")
410
+ answer = send_inference_request(data, "tools")
411
411
  return answer["text"][0] # type: ignore
412
412
 
413
413
 
@@ -436,7 +436,7 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
436
436
  "image": image_b64,
437
437
  "tool": "closed_set_image_classification",
438
438
  }
439
- resp_data = _send_inference_request(data, "tools")
439
+ resp_data = send_inference_request(data, "tools")
440
440
  resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
441
441
  return resp_data
442
442
 
@@ -463,7 +463,7 @@ def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
463
463
  "image": image_b64,
464
464
  "tool": "image_classification",
465
465
  }
466
- resp_data = _send_inference_request(data, "tools")
466
+ resp_data = send_inference_request(data, "tools")
467
467
  resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
468
468
  return resp_data
469
469
 
@@ -490,7 +490,7 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
490
490
  "image": image_b64,
491
491
  "tool": "nsfw_image_classification",
492
492
  }
493
- resp_data = _send_inference_request(data, "tools")
493
+ resp_data = send_inference_request(data, "tools")
494
494
  resp_data["scores"] = round(resp_data["scores"], 4)
495
495
  return resp_data
496
496
 
@@ -517,7 +517,7 @@ def blip_image_caption(image: np.ndarray) -> str:
517
517
  "tool": "image_captioning",
518
518
  }
519
519
 
520
- answer = _send_inference_request(data, "tools")
520
+ answer = send_inference_request(data, "tools")
521
521
  return answer["text"][0] # type: ignore
522
522
 
523
523
 
@@ -1,30 +0,0 @@
1
- import logging
2
- import os
3
- from typing import Any, Dict
4
-
5
- import requests
6
-
7
- from vision_agent.utils.type_defs import LandingaiAPIKey
8
-
9
- _LOGGER = logging.getLogger(__name__)
10
- _LND_API_KEY = LandingaiAPIKey().api_key
11
- _LND_API_URL = "https://api.staging.landing.ai/v1/agent"
12
-
13
-
14
- def _send_inference_request(
15
- payload: Dict[str, Any], endpoint_name: str
16
- ) -> Dict[str, Any]:
17
- if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
18
- payload["runtime_tag"] = runtime_tag
19
- res = requests.post(
20
- f"{_LND_API_URL}/model/{endpoint_name}",
21
- headers={
22
- "Content-Type": "application/json",
23
- "apikey": _LND_API_KEY,
24
- },
25
- json=payload,
26
- )
27
- if res.status_code != 200:
28
- _LOGGER.error(f"Request failed: {res.text}")
29
- raise ValueError(f"Request failed: {res.text}")
30
- return res.json()["data"] # type: ignore
File without changes