vision-agent 0.2.30__py3-none-any.whl → 0.2.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,685 +0,0 @@
1
- import inspect
2
- import io
3
- import json
4
- import logging
5
- import tempfile
6
- from importlib import resources
7
- from pathlib import Path
8
- from typing import Any, Callable, Dict, List, Tuple, Union, cast
9
-
10
- import numpy as np
11
- import pandas as pd
12
- import requests
13
- from PIL import Image, ImageDraw, ImageFont
14
- from scipy.spatial import distance # type: ignore
15
-
16
- from vision_agent.tools.tool_utils import _send_inference_request
17
- from vision_agent.utils import extract_frames_from_video
18
- from vision_agent.utils.image_utils import (
19
- b64_to_pil,
20
- convert_to_b64,
21
- denormalize_bbox,
22
- get_image_size,
23
- normalize_bbox,
24
- rle_decode,
25
- )
26
-
27
- COLORS = [
28
- (158, 218, 229),
29
- (219, 219, 141),
30
- (23, 190, 207),
31
- (188, 189, 34),
32
- (199, 199, 199),
33
- (247, 182, 210),
34
- (127, 127, 127),
35
- (227, 119, 194),
36
- (196, 156, 148),
37
- (197, 176, 213),
38
- (140, 86, 75),
39
- (148, 103, 189),
40
- (255, 152, 150),
41
- (152, 223, 138),
42
- (214, 39, 40),
43
- (44, 160, 44),
44
- (255, 187, 120),
45
- (174, 199, 232),
46
- (255, 127, 14),
47
- (31, 119, 180),
48
- ]
49
- _API_KEY = "land_sk_WVYwP00xA3iXely2vuar6YUDZ3MJT9yLX6oW5noUkwICzYLiDV"
50
- _OCR_URL = "https://app.landing.ai/ocr/v1/detect-text"
51
- logging.basicConfig(level=logging.INFO)
52
- _LOGGER = logging.getLogger(__name__)
53
-
54
-
55
- def grounding_dino(
56
- prompt: str,
57
- image: np.ndarray,
58
- box_threshold: float = 0.20,
59
- iou_threshold: float = 0.20,
60
- ) -> List[Dict[str, Any]]:
61
- """'grounding_dino' is a tool that can detect and count objects given a text prompt
62
- such as category names or referring expressions. It returns a list and count of
63
- bounding boxes, label names and associated probability scores.
64
-
65
- Parameters:
66
- prompt (str): The prompt to ground to the image.
67
- image (np.ndarray): The image to ground the prompt to.
68
- box_threshold (float, optional): The threshold for the box detection. Defaults
69
- to 0.20.
70
- iou_threshold (float, optional): The threshold for the Intersection over Union
71
- (IoU). Defaults to 0.20.
72
-
73
- Returns:
74
- List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
75
- bounding box of the detected objects with normalized coordinates
76
- (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left and
77
- xmax and ymax are the coordinates of the bottom-right of the bounding box.
78
-
79
- Example
80
- -------
81
- >>> grounding_dino("car. dinosaur", image)
82
- [
83
- {'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]},
84
- {'score': 0.98, 'label': 'car', 'bbox': [0.2, 0.21, 0.45, 0.5},
85
- ]
86
- """
87
- image_size = image.shape[:2]
88
- image_b64 = convert_to_b64(image)
89
- request_data = {
90
- "prompt": prompt,
91
- "image": image_b64,
92
- "tool": "visual_grounding",
93
- "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
94
- }
95
- data: Dict[str, Any] = _send_inference_request(request_data, "tools")
96
- return_data = []
97
- for i in range(len(data["bboxes"])):
98
- return_data.append(
99
- {
100
- "score": round(data["scores"][i], 2),
101
- "label": data["labels"][i],
102
- "bbox": normalize_bbox(data["bboxes"][i], image_size),
103
- }
104
- )
105
- return return_data
106
-
107
-
108
- def grounding_sam(
109
- prompt: str,
110
- image: np.ndarray,
111
- box_threshold: float = 0.20,
112
- iou_threshold: float = 0.20,
113
- ) -> List[Dict[str, Any]]:
114
- """'grounding_sam' is a tool that can detect and segment objects given a text
115
- prompt such as category names or referring expressions. It returns a list of
116
- bounding boxes, label names and masks file names and associated probability scores.
117
-
118
- Parameters:
119
- prompt (str): The prompt to ground to the image.
120
- image (np.ndarray): The image to ground the prompt to.
121
- box_threshold (float, optional): The threshold for the box detection. Defaults
122
- to 0.20.
123
- iou_threshold (float, optional): The threshold for the Intersection over Union
124
- (IoU). Defaults to 0.20.
125
-
126
- Returns:
127
- List[Dict[str, Any]]: A list of dictionaries containing the score, label,
128
- bounding box, and mask of the detected objects with normalized coordinates
129
- (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left and
130
- xmax and ymax are the coordinates of the bottom-right of the bounding box.
131
- The mask is binary 2D numpy array where 1 indicates the object and 0 indicates
132
- the background.
133
-
134
- Example
135
- -------
136
- >>> grounding_sam("car. dinosaur", image)
137
- [
138
- {
139
- 'score': 0.99,
140
- 'label': 'dinosaur',
141
- 'bbox': [0.1, 0.11, 0.35, 0.4],
142
- 'mask': array([[0, 0, 0, ..., 0, 0, 0],
143
- [0, 0, 0, ..., 0, 0, 0],
144
- ...,
145
- [0, 0, 0, ..., 0, 0, 0],
146
- [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
147
- },
148
- ]
149
- """
150
- image_size = image.shape[:2]
151
- image_b64 = convert_to_b64(image)
152
- request_data = {
153
- "prompt": prompt,
154
- "image": image_b64,
155
- "tool": "visual_grounding_segment",
156
- "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
157
- }
158
- data: Dict[str, Any] = _send_inference_request(request_data, "tools")
159
- return_data = []
160
- for i in range(len(data["bboxes"])):
161
- return_data.append(
162
- {
163
- "score": round(data["scores"][i], 2),
164
- "label": data["labels"][i],
165
- "bbox": normalize_bbox(data["bboxes"][i], image_size),
166
- "mask": rle_decode(mask_rle=data["masks"][i], shape=data["mask_shape"]),
167
- }
168
- )
169
- return return_data
170
-
171
-
172
- def extract_frames(
173
- video_uri: Union[str, Path], fps: float = 0.5
174
- ) -> List[Tuple[np.ndarray, float]]:
175
- """'extract_frames' extracts frames from a video, returns a list of tuples (frame,
176
- timestamp), where timestamp is the relative time in seconds where the frame was
177
- captured. The frame is a local image file path.
178
-
179
- Parameters:
180
- video_uri (Union[str, Path]): The path to the video file.
181
- fps (float, optional): The frame rate per second to extract the frames. Defaults
182
- to 0.5.
183
-
184
- Returns:
185
- List[Tuple[np.ndarray, float]]: A list of tuples containing the extracted frame
186
- and the timestamp in seconds.
187
-
188
- Example
189
- -------
190
- >>> extract_frames("path/to/video.mp4")
191
- [(frame1, 0.0), (frame2, 0.5), ...]
192
- """
193
-
194
- return extract_frames_from_video(str(video_uri), fps)
195
-
196
-
197
- def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
198
- """'ocr' extracts text from an image. It returns a list of detected text, bounding
199
- boxes, and confidence scores.
200
-
201
- Parameters:
202
- image (np.ndarray): The image to extract text from.
203
-
204
- Returns:
205
- List[Dict[str, Any]]: A list of dictionaries containing the detected text, bbox,
206
- and confidence score.
207
-
208
- Example
209
- -------
210
- >>> ocr(image)
211
- [
212
- {'label': 'some text', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
213
- ]
214
- """
215
-
216
- pil_image = Image.fromarray(image).convert("RGB")
217
- image_size = pil_image.size[::-1]
218
- image_buffer = io.BytesIO()
219
- pil_image.save(image_buffer, format="PNG")
220
- buffer_bytes = image_buffer.getvalue()
221
- image_buffer.close()
222
-
223
- res = requests.post(
224
- _OCR_URL,
225
- files={"images": buffer_bytes},
226
- data={"language": "en"},
227
- headers={"contentType": "multipart/form-data", "apikey": _API_KEY},
228
- )
229
-
230
- if res.status_code != 200:
231
- raise ValueError(f"OCR request failed with status code {res.status_code}")
232
-
233
- data = res.json()
234
- output = []
235
- for det in data[0]:
236
- label = det["text"]
237
- box = [
238
- det["location"][0]["x"],
239
- det["location"][0]["y"],
240
- det["location"][2]["x"],
241
- det["location"][2]["y"],
242
- ]
243
- box = normalize_bbox(box, image_size)
244
- output.append({"label": label, "bbox": box, "score": round(det["score"], 2)})
245
-
246
- return output
247
-
248
-
249
- def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
250
- """'zero_shot_counting' is a tool that counts the dominant foreground object given an image and no other information about the content.
251
- It returns only the count of the objects in the image.
252
-
253
- Parameters:
254
- image (np.ndarray): The image that contains lot of instances of a single object
255
-
256
- Returns:
257
- Dict[str, Any]: A dictionary containing the key 'count' and the count as a value. E.g. {count: 12}.
258
-
259
- Example
260
- -------
261
- >>> zero_shot_counting(image)
262
- {'count': 45},
263
-
264
- """
265
-
266
- image_b64 = convert_to_b64(image)
267
- data = {
268
- "image": image_b64,
269
- "tool": "zero_shot_counting",
270
- }
271
- resp_data = _send_inference_request(data, "tools")
272
- resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
273
- return resp_data
274
-
275
-
276
- def visual_prompt_counting(
277
- image: np.ndarray, visual_prompt: Dict[str, List[float]]
278
- ) -> Dict[str, Any]:
279
- """'visual_prompt_counting' is a tool that counts the dominant foreground object given an image and a visual prompt which is a bounding box describing the object.
280
- It returns only the count of the objects in the image.
281
-
282
- Parameters:
283
- image (np.ndarray): The image that contains lot of instances of a single object
284
-
285
- Returns:
286
- Dict[str, Any]: A dictionary containing the key 'count' and the count as a value. E.g. {count: 12}.
287
-
288
- Example
289
- -------
290
- >>> visual_prompt_counting(image, {"bbox": [0.1, 0.1, 0.4, 0.42]})
291
- {'count': 45},
292
-
293
- """
294
-
295
- image_size = get_image_size(image)
296
- bbox = visual_prompt["bbox"]
297
- bbox_str = ", ".join(map(str, denormalize_bbox(bbox, image_size)))
298
- image_b64 = convert_to_b64(image)
299
-
300
- data = {
301
- "image": image_b64,
302
- "prompt": bbox_str,
303
- "tool": "few_shot_counting",
304
- }
305
- resp_data = _send_inference_request(data, "tools")
306
- resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
307
- return resp_data
308
-
309
-
310
- def image_question_answering(image: np.ndarray, prompt: str) -> str:
311
- """'image_question_answering_' is a tool that can answer questions about the visual contents of an image given a question and an image.
312
- It returns an answer to the question
313
-
314
- Parameters:
315
- image (np.ndarray): The reference image used for the question
316
- prompt (str): The question about the image
317
-
318
- Returns:
319
- str: A string which is the answer to the given prompt. E.g. {'text': 'This image contains a cat sitting on a table with a bowl of milk.'}.
320
-
321
- Example
322
- -------
323
- >>> image_question_answering(image, 'What is the cat doing ?')
324
- 'drinking milk'
325
-
326
- """
327
-
328
- image_b64 = convert_to_b64(image)
329
- data = {
330
- "image": image_b64,
331
- "prompt": prompt,
332
- "tool": "image_question_answering",
333
- }
334
-
335
- answer = _send_inference_request(data, "tools")
336
- return answer["text"][0] # type: ignore
337
-
338
-
339
- def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
340
- """'clip' is a tool that can classify an image given a list of input classes or tags.
341
- It returns the same list of the input classes along with their probability scores based on image content.
342
-
343
- Parameters:
344
- image (np.ndarray): The image to classify or tag
345
- classes (List[str]): The list of classes or tags that is associated with the image
346
-
347
- Returns:
348
- Dict[str, Any]: A dictionary containing the labels and scores. One dictionary contains a list of given labels and other a list of scores.
349
-
350
- Example
351
- -------
352
- >>> clip(image, ['dog', 'cat', 'bird'])
353
- {"labels": ["dog", "cat", "bird"], "scores": [0.68, 0.30, 0.02]},
354
-
355
- """
356
-
357
- image_b64 = convert_to_b64(image)
358
- data = {
359
- "prompt": ",".join(classes),
360
- "image": image_b64,
361
- "tool": "closed_set_image_classification",
362
- }
363
- resp_data = _send_inference_request(data, "tools")
364
- resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
365
- return resp_data
366
-
367
-
368
- def image_caption(image: np.ndarray) -> str:
369
- """'image_caption' is a tool that can caption an image based on its contents.
370
- It returns a text describing the image.
371
-
372
- Parameters:
373
- image (np.ndarray): The image to caption
374
-
375
- Returns:
376
- str: A string which is the caption for the given image.
377
-
378
- Example
379
- -------
380
- >>> image_caption(image)
381
- 'This image contains a cat sitting on a table with a bowl of milk.'
382
-
383
- """
384
-
385
- image_b64 = convert_to_b64(image)
386
- data = {
387
- "image": image_b64,
388
- "tool": "image_captioning",
389
- }
390
-
391
- answer = _send_inference_request(data, "tools")
392
- return answer["text"][0] # type: ignore
393
-
394
-
395
- def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
396
- """'closest_mask_distance' calculates the closest distance between two masks.
397
-
398
- Parameters:
399
- mask1 (np.ndarray): The first mask.
400
- mask2 (np.ndarray): The second mask.
401
-
402
- Returns:
403
- float: The closest distance between the two masks.
404
-
405
- Example
406
- -------
407
- >>> closest_mask_distance(mask1, mask2)
408
- 0.5
409
- """
410
-
411
- mask1 = np.clip(mask1, 0, 1)
412
- mask2 = np.clip(mask2, 0, 1)
413
- mask1_points = np.transpose(np.nonzero(mask1))
414
- mask2_points = np.transpose(np.nonzero(mask2))
415
- dist_matrix = distance.cdist(mask1_points, mask2_points, "euclidean")
416
- return cast(float, np.min(dist_matrix))
417
-
418
-
419
- def closest_box_distance(
420
- box1: List[float], box2: List[float], image_size: Tuple[int, int]
421
- ) -> float:
422
- """'closest_box_distance' calculates the closest distance between two bounding boxes.
423
-
424
- Parameters:
425
- box1 (List[float]): The first bounding box.
426
- box2 (List[float]): The second bounding box.
427
- image_size (Tuple[int, int]): The size of the image given as (height, width).
428
-
429
- Returns:
430
- float: The closest distance between the two bounding boxes.
431
-
432
- Example
433
- -------
434
- >>> closest_box_distance([100, 100, 200, 200], [300, 300, 400, 400])
435
- 141.42
436
- """
437
-
438
- x11, y11, x12, y12 = denormalize_bbox(box1, image_size)
439
- x21, y21, x22, y22 = denormalize_bbox(box2, image_size)
440
-
441
- horizontal_distance = np.max([0, x21 - x12, x11 - x22])
442
- vertical_distance = np.max([0, y21 - y12, y11 - y22])
443
- return cast(float, np.sqrt(horizontal_distance**2 + vertical_distance**2))
444
-
445
-
446
- # Utility and visualization functions
447
-
448
-
449
- def save_json(data: Any, file_path: str) -> None:
450
- """'save_json' is a utility function that saves data as a JSON file. It is helpful
451
- for saving data that contains NumPy arrays which are not JSON serializable.
452
-
453
- Parameters:
454
- data (Any): The data to save.
455
- file_path (str): The path to save the JSON file.
456
-
457
- Example
458
- -------
459
- >>> save_json(data, "path/to/file.json")
460
- """
461
-
462
- class NumpyEncoder(json.JSONEncoder):
463
- def default(self, obj: Any): # type: ignore
464
- if isinstance(obj, np.ndarray):
465
- return obj.tolist()
466
- elif isinstance(obj, np.bool_):
467
- return bool(obj)
468
- return json.JSONEncoder.default(self, obj)
469
-
470
- with open(file_path, "w") as f:
471
- json.dump(data, f, cls=NumpyEncoder)
472
-
473
-
474
- def load_image(image_path: str) -> np.ndarray:
475
- """'load_image' is a utility function that loads an image from the given path.
476
-
477
- Parameters:
478
- image_path (str): The path to the image.
479
-
480
- Returns:
481
- np.ndarray: The image as a NumPy array.
482
-
483
- Example
484
- -------
485
- >>> load_image("path/to/image.jpg")
486
- """
487
-
488
- image = Image.open(image_path).convert("RGB")
489
- return np.array(image)
490
-
491
-
492
- def save_image(image: np.ndarray) -> str:
493
- """'save_image' is a utility function that saves an image as a temporary file.
494
-
495
- Parameters:
496
- image (np.ndarray): The image to save.
497
-
498
- Returns:
499
- str: The path to the saved image.
500
-
501
- Example
502
- -------
503
- >>> save_image(image)
504
- "/tmp/tmpabc123.png"
505
- """
506
-
507
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
508
- pil_image = Image.fromarray(image.astype(np.uint8))
509
- pil_image.save(f, "PNG")
510
- return f.name
511
-
512
-
513
- def overlay_bounding_boxes(
514
- image: np.ndarray, bboxes: List[Dict[str, Any]]
515
- ) -> np.ndarray:
516
- """'display_bounding_boxes' is a utility function that displays bounding boxes on
517
- an image.
518
-
519
- Parameters:
520
- image (np.ndarray): The image to display the bounding boxes on.
521
- bboxes (List[Dict[str, Any]]): A list of dictionaries containing the bounding
522
- boxes.
523
-
524
- Returns:
525
- np.ndarray: The image with the bounding boxes, labels and scores displayed.
526
-
527
- Example
528
- -------
529
- >>> image_with_bboxes = display_bounding_boxes(
530
- image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
531
- )
532
- """
533
- pil_image = Image.fromarray(image.astype(np.uint8))
534
-
535
- if len(set([box["label"] for box in bboxes])) > len(COLORS):
536
- _LOGGER.warning(
537
- "Number of unique labels exceeds the number of available colors. Some labels may have the same color."
538
- )
539
-
540
- color = {
541
- label: COLORS[i % len(COLORS)]
542
- for i, label in enumerate(set([box["label"] for box in bboxes]))
543
- }
544
-
545
- width, height = pil_image.size
546
- fontsize = max(12, int(min(width, height) / 40))
547
- draw = ImageDraw.Draw(pil_image)
548
- font = ImageFont.truetype(
549
- str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
550
- fontsize,
551
- )
552
-
553
- for elt in bboxes:
554
- label = elt["label"]
555
- box = elt["bbox"]
556
- scores = elt["score"]
557
-
558
- box = [
559
- int(box[0] * width),
560
- int(box[1] * height),
561
- int(box[2] * width),
562
- int(box[3] * height),
563
- ]
564
- draw.rectangle(box, outline=color[label], width=4)
565
- text = f"{label}: {scores:.2f}"
566
- text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
567
- draw.rectangle((box[0], box[1], text_box[2], text_box[3]), fill=color[label])
568
- draw.text((box[0], box[1]), text, fill="black", font=font)
569
- return np.array(pil_image.convert("RGB"))
570
-
571
-
572
- def overlay_segmentation_masks(
573
- image: np.ndarray, masks: List[Dict[str, Any]]
574
- ) -> np.ndarray:
575
- """'display_segmentation_masks' is a utility function that displays segmentation
576
- masks.
577
-
578
- Parameters:
579
- image (np.ndarray): The image to display the masks on.
580
- masks (List[Dict[str, Any]]): A list of dictionaries containing the masks.
581
-
582
- Returns:
583
- np.ndarray: The image with the masks displayed.
584
-
585
- Example
586
- -------
587
- >>> image_with_masks = display_segmentation_masks(
588
- image,
589
- [{
590
- 'score': 0.99,
591
- 'label': 'dinosaur',
592
- 'mask': array([[0, 0, 0, ..., 0, 0, 0],
593
- [0, 0, 0, ..., 0, 0, 0],
594
- ...,
595
- [0, 0, 0, ..., 0, 0, 0],
596
- [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
597
- }],
598
- )
599
- """
600
- pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGBA")
601
-
602
- if len(set([mask["label"] for mask in masks])) > len(COLORS):
603
- _LOGGER.warning(
604
- "Number of unique labels exceeds the number of available colors. Some labels may have the same color."
605
- )
606
-
607
- color = {
608
- label: COLORS[i % len(COLORS)]
609
- for i, label in enumerate(set([mask["label"] for mask in masks]))
610
- }
611
-
612
- for elt in masks:
613
- mask = elt["mask"]
614
- label = elt["label"]
615
- np_mask = np.zeros((pil_image.size[1], pil_image.size[0], 4))
616
- np_mask[mask > 0, :] = color[label] + (255 * 0.5,)
617
- mask_img = Image.fromarray(np_mask.astype(np.uint8))
618
- pil_image = Image.alpha_composite(pil_image, mask_img)
619
- return np.array(pil_image.convert("RGB"))
620
-
621
-
622
- def get_tool_documentation(funcs: List[Callable[..., Any]]) -> str:
623
- docstrings = ""
624
- for func in funcs:
625
- docstrings += f"{func.__name__}{inspect.signature(func)}:\n{func.__doc__}\n\n"
626
-
627
- return docstrings
628
-
629
-
630
- def get_tool_descriptions(funcs: List[Callable[..., Any]]) -> str:
631
- descriptions = ""
632
- for func in funcs:
633
- description = func.__doc__
634
- if description is None:
635
- description = ""
636
-
637
- description = (
638
- description[: description.find("Parameters:")].replace("\n", " ").strip()
639
- )
640
- description = " ".join(description.split())
641
- descriptions += f"- {func.__name__}{inspect.signature(func)}: {description}\n"
642
- return descriptions
643
-
644
-
645
- def get_tools_df(funcs: List[Callable[..., Any]]) -> pd.DataFrame:
646
- data: Dict[str, List[str]] = {"desc": [], "doc": []}
647
-
648
- for func in funcs:
649
- desc = func.__doc__
650
- if desc is None:
651
- desc = ""
652
- desc = desc[: desc.find("Parameters:")].replace("\n", " ").strip()
653
- desc = " ".join(desc.split())
654
-
655
- doc = f"{func.__name__}{inspect.signature(func)}:\n{func.__doc__}"
656
- data["desc"].append(desc)
657
- data["doc"].append(doc)
658
-
659
- return pd.DataFrame(data) # type: ignore
660
-
661
-
662
- TOOLS = [
663
- grounding_dino,
664
- grounding_sam,
665
- extract_frames,
666
- ocr,
667
- clip,
668
- zero_shot_counting,
669
- visual_prompt_counting,
670
- image_question_answering,
671
- image_caption,
672
- closest_mask_distance,
673
- closest_box_distance,
674
- save_json,
675
- load_image,
676
- save_image,
677
- overlay_bounding_boxes,
678
- overlay_segmentation_masks,
679
- ]
680
- TOOLS_DF = get_tools_df(TOOLS) # type: ignore
681
- TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
682
- TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
683
- UTILITIES_DOCSTRING = get_tool_documentation(
684
- [save_json, load_image, save_image, overlay_bounding_boxes]
685
- )