vision-agent 0.2.226__tar.gz → 0.2.227__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. {vision_agent-0.2.226 → vision_agent-0.2.227}/PKG-INFO +1 -1
  2. {vision_agent-0.2.226 → vision_agent-0.2.227}/pyproject.toml +1 -1
  3. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/__init__.py +1 -0
  4. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/tools.py +140 -0
  5. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/video_tracking.py +1 -0
  6. {vision_agent-0.2.226 → vision_agent-0.2.227}/LICENSE +0 -0
  7. {vision_agent-0.2.226 → vision_agent-0.2.227}/README.md +0 -0
  8. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/.sim_tools/df.csv +0 -0
  9. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/.sim_tools/embs.npy +0 -0
  10. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/__init__.py +0 -0
  11. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/README.md +0 -0
  12. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/__init__.py +0 -0
  13. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/agent.py +0 -0
  14. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/agent_utils.py +0 -0
  15. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/types.py +0 -0
  16. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent.py +0 -0
  17. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder.py +0 -0
  18. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  19. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
  20. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
  21. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner.py +0 -0
  22. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  23. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
  24. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner_v2.py +0 -0
  25. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_prompts.py +0 -0
  26. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
  27. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_v2.py +0 -0
  28. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/clients/__init__.py +0 -0
  29. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/clients/http.py +0 -0
  30. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/clients/landing_public_api.py +0 -0
  31. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/fonts/__init__.py +0 -0
  32. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  33. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/lmm/__init__.py +0 -0
  34. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/lmm/lmm.py +0 -0
  35. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/lmm/types.py +0 -0
  36. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/meta_tools.py +0 -0
  37. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/planner_tools.py +0 -0
  38. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/prompts.py +0 -0
  39. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/tool_utils.py +0 -0
  40. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/tools_types.py +0 -0
  41. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/__init__.py +0 -0
  42. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/exceptions.py +0 -0
  43. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/execute.py +0 -0
  44. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/image_utils.py +0 -0
  45. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/sim.py +0 -0
  46. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/type_defs.py +0 -0
  47. {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.226
3
+ Version: 0.2.227
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.226"
7
+ version = "0.2.227"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -63,6 +63,7 @@ from .tools import (
63
63
  video_temporal_localization,
64
64
  vit_image_classification,
65
65
  vit_nsfw_classification,
66
+ custom_object_detection,
66
67
  )
67
68
 
68
69
  __new_tools__ = [
@@ -290,6 +290,13 @@ def od_sam2_video_tracking(
290
290
  )
291
291
  function_name = "florence2_object_detection"
292
292
 
293
+ elif od_model == ODModels.CUSTOM:
294
+ segment_results = custom_object_detection(
295
+ deployment_id=fine_tune_id,
296
+ image=segment_frames[frame_number],
297
+ )
298
+ function_name = "custom_object_detection"
299
+
293
300
  else:
294
301
  raise NotImplementedError(
295
302
  f"Object detection model '{od_model}' is not implemented."
@@ -1217,6 +1224,139 @@ def countgd_visual_prompt_object_detection(
1217
1224
  return bboxes_formatted
1218
1225
 
1219
1226
 
1227
+ def custom_object_detection(
1228
+ deployment_id: str,
1229
+ image: np.ndarray,
1230
+ box_threshold: float = 0.1,
1231
+ ) -> List[Dict[str, Any]]:
1232
+ """'custom_object_detection' is a tool that can detect instances of an
1233
+ object given a deployment_id of a previously finetuned object detection model.
1234
+ It is particularly useful when trying to detect objects that are not well detected by generalist models.
1235
+ It returns a list of bounding boxes with normalized
1236
+ coordinates, label names and associated confidence scores.
1237
+
1238
+ Parameters:
1239
+ deployment_id (str): The id of the finetuned model.
1240
+ image (np.ndarray): The image that contains instances of the object.
1241
+ box_threshold (float, optional): The threshold for detection. Defaults
1242
+ to 0.1.
1243
+
1244
+ Returns:
1245
+ List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
1246
+ bounding box of the detected objects with normalized coordinates between 0
1247
+ and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
1248
+ top-left and xmax and ymax are the coordinates of the bottom-right of the
1249
+ bounding box.
1250
+
1251
+ Example
1252
+ -------
1253
+ >>> custom_object_detection("abcd1234-5678efg", image)
1254
+ [
1255
+ {'score': 0.49, 'label': 'flower', 'bbox': [0.1, 0.11, 0.35, 0.4]},
1256
+ {'score': 0.68, 'label': 'flower', 'bbox': [0.2, 0.21, 0.45, 0.5]},
1257
+ {'score': 0.78, 'label': 'flower', 'bbox': [0.3, 0.35, 0.48, 0.52]},
1258
+ {'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58]},
1259
+ ]
1260
+ """
1261
+ image_size = image.shape[:2]
1262
+ if image_size[0] < 1 or image_size[1] < 1:
1263
+ return []
1264
+
1265
+ files = [("image", numpy_to_bytes(image))]
1266
+ payload = {
1267
+ "deployment_id": deployment_id,
1268
+ "confidence": box_threshold,
1269
+ }
1270
+ detections: List[List[Dict[str, Any]]] = send_inference_request(
1271
+ payload, "custom-object-detection", files=files, v2=True
1272
+ )
1273
+
1274
+ bboxes = detections[0]
1275
+ bboxes_formatted = [
1276
+ {
1277
+ "label": bbox["label"],
1278
+ "bbox": normalize_bbox(bbox["bounding_box"], image_size),
1279
+ "score": bbox["score"],
1280
+ }
1281
+ for bbox in bboxes
1282
+ ]
1283
+ display_data = [
1284
+ {
1285
+ "label": bbox["label"],
1286
+ "bbox": bbox["bounding_box"],
1287
+ "score": bbox["score"],
1288
+ }
1289
+ for bbox in bboxes
1290
+ ]
1291
+
1292
+ _display_tool_trace(
1293
+ custom_object_detection.__name__,
1294
+ payload,
1295
+ display_data,
1296
+ files,
1297
+ )
1298
+ return bboxes_formatted
1299
+
1300
+
1301
+ def custom_od_sam2_video_tracking(
1302
+ deployment_id: str,
1303
+ frames: List[np.ndarray],
1304
+ chunk_length: Optional[int] = 10,
1305
+ ) -> List[List[Dict[str, Any]]]:
1306
+ """'custom_od_sam2_video_tracking' is a tool that can segment multiple objects given a
1307
+ custom model with predefined category names.
1308
+ It returns a list of bounding boxes, label names,
1309
+ mask file names and associated probability scores.
1310
+
1311
+ Parameters:
1312
+ deployment_id (str): The id of the deployed custom model.
1313
+ image (np.ndarray): The image to ground the prompt to.
1314
+ chunk_length (Optional[int]): The number of frames to re-run florence2 to find
1315
+ new objects.
1316
+
1317
+ Returns:
1318
+ List[Dict[str, Any]]: A list of dictionaries containing the score, label,
1319
+ bounding box, and mask of the detected objects with normalized coordinates
1320
+ (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left
1321
+ and xmax and ymax are the coordinates of the bottom-right of the bounding box.
1322
+ The mask is binary 2D numpy array where 1 indicates the object and 0 indicates
1323
+ the background.
1324
+
1325
+ Example
1326
+ -------
1327
+ >>> custom_od_sam2_video_tracking("abcd1234-5678efg", frames)
1328
+ [
1329
+ [
1330
+ {
1331
+ 'label': '0: dinosaur',
1332
+ 'bbox': [0.1, 0.11, 0.35, 0.4],
1333
+ 'mask': array([[0, 0, 0, ..., 0, 0, 0],
1334
+ [0, 0, 0, ..., 0, 0, 0],
1335
+ ...,
1336
+ [0, 0, 0, ..., 0, 0, 0],
1337
+ [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
1338
+ },
1339
+ ],
1340
+ ...
1341
+ ]
1342
+ """
1343
+
1344
+ ret = od_sam2_video_tracking(
1345
+ ODModels.CUSTOM,
1346
+ prompt="",
1347
+ frames=frames,
1348
+ chunk_length=chunk_length,
1349
+ fine_tune_id=deployment_id,
1350
+ )
1351
+ _display_tool_trace(
1352
+ custom_od_sam2_video_tracking.__name__,
1353
+ {},
1354
+ ret["display_data"],
1355
+ ret["files"],
1356
+ )
1357
+ return ret["return_data"] # type: ignore
1358
+
1359
+
1220
1360
  def qwen2_vl_images_vqa(prompt: str, images: List[np.ndarray]) -> str:
1221
1361
  """'qwen2_vl_images_vqa' is a tool that can answer any questions about arbitrary
1222
1362
  images including regular images or images of documents or presentations. It can be
@@ -17,6 +17,7 @@ class ODModels(str, Enum):
17
17
  COUNTGD = "countgd"
18
18
  FLORENCE2 = "florence2"
19
19
  OWLV2 = "owlv2"
20
+ CUSTOM = "custom"
20
21
 
21
22
 
22
23
  def split_frames_into_segments(
File without changes
File without changes