vision-agent 0.2.226__tar.gz → 0.2.227__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.226 → vision_agent-0.2.227}/PKG-INFO +1 -1
- {vision_agent-0.2.226 → vision_agent-0.2.227}/pyproject.toml +1 -1
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/__init__.py +1 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/tools.py +140 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/video_tracking.py +1 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/LICENSE +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/README.md +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/.sim_tools/df.csv +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/.sim_tools/embs.npy +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/README.md +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/types.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner_v2.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_v2.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/planner_tools.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/video.py +0 -0
@@ -290,6 +290,13 @@ def od_sam2_video_tracking(
|
|
290
290
|
)
|
291
291
|
function_name = "florence2_object_detection"
|
292
292
|
|
293
|
+
elif od_model == ODModels.CUSTOM:
|
294
|
+
segment_results = custom_object_detection(
|
295
|
+
deployment_id=fine_tune_id,
|
296
|
+
image=segment_frames[frame_number],
|
297
|
+
)
|
298
|
+
function_name = "custom_object_detection"
|
299
|
+
|
293
300
|
else:
|
294
301
|
raise NotImplementedError(
|
295
302
|
f"Object detection model '{od_model}' is not implemented."
|
@@ -1217,6 +1224,139 @@ def countgd_visual_prompt_object_detection(
|
|
1217
1224
|
return bboxes_formatted
|
1218
1225
|
|
1219
1226
|
|
1227
|
+
def custom_object_detection(
|
1228
|
+
deployment_id: str,
|
1229
|
+
image: np.ndarray,
|
1230
|
+
box_threshold: float = 0.1,
|
1231
|
+
) -> List[Dict[str, Any]]:
|
1232
|
+
"""'custom_object_detection' is a tool that can detect instances of an
|
1233
|
+
object given a deployment_id of a previously finetuned object detection model.
|
1234
|
+
It is particularly useful when trying to detect objects that are not well detected by generalist models.
|
1235
|
+
It returns a list of bounding boxes with normalized
|
1236
|
+
coordinates, label names and associated confidence scores.
|
1237
|
+
|
1238
|
+
Parameters:
|
1239
|
+
deployment_id (str): The id of the finetuned model.
|
1240
|
+
image (np.ndarray): The image that contains instances of the object.
|
1241
|
+
box_threshold (float, optional): The threshold for detection. Defaults
|
1242
|
+
to 0.1.
|
1243
|
+
|
1244
|
+
Returns:
|
1245
|
+
List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
|
1246
|
+
bounding box of the detected objects with normalized coordinates between 0
|
1247
|
+
and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
|
1248
|
+
top-left and xmax and ymax are the coordinates of the bottom-right of the
|
1249
|
+
bounding box.
|
1250
|
+
|
1251
|
+
Example
|
1252
|
+
-------
|
1253
|
+
>>> custom_object_detection("abcd1234-5678efg", image)
|
1254
|
+
[
|
1255
|
+
{'score': 0.49, 'label': 'flower', 'bbox': [0.1, 0.11, 0.35, 0.4]},
|
1256
|
+
{'score': 0.68, 'label': 'flower', 'bbox': [0.2, 0.21, 0.45, 0.5]},
|
1257
|
+
{'score': 0.78, 'label': 'flower', 'bbox': [0.3, 0.35, 0.48, 0.52]},
|
1258
|
+
{'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58]},
|
1259
|
+
]
|
1260
|
+
"""
|
1261
|
+
image_size = image.shape[:2]
|
1262
|
+
if image_size[0] < 1 or image_size[1] < 1:
|
1263
|
+
return []
|
1264
|
+
|
1265
|
+
files = [("image", numpy_to_bytes(image))]
|
1266
|
+
payload = {
|
1267
|
+
"deployment_id": deployment_id,
|
1268
|
+
"confidence": box_threshold,
|
1269
|
+
}
|
1270
|
+
detections: List[List[Dict[str, Any]]] = send_inference_request(
|
1271
|
+
payload, "custom-object-detection", files=files, v2=True
|
1272
|
+
)
|
1273
|
+
|
1274
|
+
bboxes = detections[0]
|
1275
|
+
bboxes_formatted = [
|
1276
|
+
{
|
1277
|
+
"label": bbox["label"],
|
1278
|
+
"bbox": normalize_bbox(bbox["bounding_box"], image_size),
|
1279
|
+
"score": bbox["score"],
|
1280
|
+
}
|
1281
|
+
for bbox in bboxes
|
1282
|
+
]
|
1283
|
+
display_data = [
|
1284
|
+
{
|
1285
|
+
"label": bbox["label"],
|
1286
|
+
"bbox": bbox["bounding_box"],
|
1287
|
+
"score": bbox["score"],
|
1288
|
+
}
|
1289
|
+
for bbox in bboxes
|
1290
|
+
]
|
1291
|
+
|
1292
|
+
_display_tool_trace(
|
1293
|
+
custom_object_detection.__name__,
|
1294
|
+
payload,
|
1295
|
+
display_data,
|
1296
|
+
files,
|
1297
|
+
)
|
1298
|
+
return bboxes_formatted
|
1299
|
+
|
1300
|
+
|
1301
|
+
def custom_od_sam2_video_tracking(
|
1302
|
+
deployment_id: str,
|
1303
|
+
frames: List[np.ndarray],
|
1304
|
+
chunk_length: Optional[int] = 10,
|
1305
|
+
) -> List[List[Dict[str, Any]]]:
|
1306
|
+
"""'custom_od_sam2_video_tracking' is a tool that can segment multiple objects given a
|
1307
|
+
custom model with predefined category names.
|
1308
|
+
It returns a list of bounding boxes, label names,
|
1309
|
+
mask file names and associated probability scores.
|
1310
|
+
|
1311
|
+
Parameters:
|
1312
|
+
deployment_id (str): The id of the deployed custom model.
|
1313
|
+
image (np.ndarray): The image to ground the prompt to.
|
1314
|
+
chunk_length (Optional[int]): The number of frames to re-run florence2 to find
|
1315
|
+
new objects.
|
1316
|
+
|
1317
|
+
Returns:
|
1318
|
+
List[Dict[str, Any]]: A list of dictionaries containing the score, label,
|
1319
|
+
bounding box, and mask of the detected objects with normalized coordinates
|
1320
|
+
(xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left
|
1321
|
+
and xmax and ymax are the coordinates of the bottom-right of the bounding box.
|
1322
|
+
The mask is binary 2D numpy array where 1 indicates the object and 0 indicates
|
1323
|
+
the background.
|
1324
|
+
|
1325
|
+
Example
|
1326
|
+
-------
|
1327
|
+
>>> custom_od_sam2_video_tracking("abcd1234-5678efg", frames)
|
1328
|
+
[
|
1329
|
+
[
|
1330
|
+
{
|
1331
|
+
'label': '0: dinosaur',
|
1332
|
+
'bbox': [0.1, 0.11, 0.35, 0.4],
|
1333
|
+
'mask': array([[0, 0, 0, ..., 0, 0, 0],
|
1334
|
+
[0, 0, 0, ..., 0, 0, 0],
|
1335
|
+
...,
|
1336
|
+
[0, 0, 0, ..., 0, 0, 0],
|
1337
|
+
[0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
|
1338
|
+
},
|
1339
|
+
],
|
1340
|
+
...
|
1341
|
+
]
|
1342
|
+
"""
|
1343
|
+
|
1344
|
+
ret = od_sam2_video_tracking(
|
1345
|
+
ODModels.CUSTOM,
|
1346
|
+
prompt="",
|
1347
|
+
frames=frames,
|
1348
|
+
chunk_length=chunk_length,
|
1349
|
+
fine_tune_id=deployment_id,
|
1350
|
+
)
|
1351
|
+
_display_tool_trace(
|
1352
|
+
custom_od_sam2_video_tracking.__name__,
|
1353
|
+
{},
|
1354
|
+
ret["display_data"],
|
1355
|
+
ret["files"],
|
1356
|
+
)
|
1357
|
+
return ret["return_data"] # type: ignore
|
1358
|
+
|
1359
|
+
|
1220
1360
|
def qwen2_vl_images_vqa(prompt: str, images: List[np.ndarray]) -> str:
|
1221
1361
|
"""'qwen2_vl_images_vqa' is a tool that can answer any questions about arbitrary
|
1222
1362
|
images including regular images or images of documents or presentations. It can be
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_coder_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/agent/vision_agent_planner_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|