vision-agent 0.2.50__py3-none-any.whl → 0.2.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/easytool_v2.py +17 -14
- vision_agent/agent/reflexion.py +1 -1
- vision_agent/agent/vision_agent.py +5 -4
- vision_agent/agent/vision_agent_prompts.py +1 -1
- vision_agent/tools/tools.py +83 -92
- vision_agent/utils/video.py +3 -3
- {vision_agent-0.2.50.dist-info → vision_agent-0.2.52.dist-info}/METADATA +1 -1
- {vision_agent-0.2.50.dist-info → vision_agent-0.2.52.dist-info}/RECORD +10 -10
- {vision_agent-0.2.50.dist-info → vision_agent-0.2.52.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.50.dist-info → vision_agent-0.2.52.dist-info}/WHEEL +0 -0
@@ -428,12 +428,12 @@ def visualize_result(all_tool_results: List[Dict]) -> Sequence[Union[str, Path]]
|
|
428
428
|
|
429
429
|
|
430
430
|
class EasyToolV2(Agent):
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
431
|
+
"""EasyToolV2 is an agent framework that utilizes tools as well as self reflection
|
432
|
+
to accomplish tasks, in particular vision tasks. EasyToolV2 is based off of EasyTool
|
433
|
+
https://arxiv.org/abs/2401.06201 and Reflexion https://arxiv.org/abs/2303.11366
|
434
|
+
where it will attempt to complete a task and then reflect on whether or not it was
|
435
|
+
able to accomplish the task based off of the plan and final results, if not it will
|
436
|
+
redo the task with this newly added reflection.
|
437
437
|
|
438
438
|
Example
|
439
439
|
-------
|
@@ -461,7 +461,10 @@ class EasyToolV2(Agent):
|
|
461
461
|
reflect_model: the model to use for self reflection.
|
462
462
|
max_retries: maximum number of retries to attempt to complete the task.
|
463
463
|
verbose: whether to print more logs.
|
464
|
-
report_progress_callback: a callback to report the progress of the agent.
|
464
|
+
report_progress_callback: a callback to report the progress of the agent.
|
465
|
+
This is useful for streaming logs in a web application where multiple
|
466
|
+
EasyToolV2 instances are running in parallel. This callback ensures
|
467
|
+
that the progress are not mixed up.
|
465
468
|
"""
|
466
469
|
self.task_model = (
|
467
470
|
OpenAILLM(model_name="gpt-4-turbo", json_mode=True, temperature=0.0)
|
@@ -495,9 +498,10 @@ class EasyToolV2(Agent):
|
|
495
498
|
"""Invoke the vision agent.
|
496
499
|
|
497
500
|
Parameters:
|
498
|
-
|
499
|
-
[{"role": "user", "content": "describe your task here..."}]
|
500
|
-
|
501
|
+
input: A conversation in the format of
|
502
|
+
[{"role": "user", "content": "describe your task here..."}] or a string
|
503
|
+
containing just the content.
|
504
|
+
media: The input media referenced in the chat parameter.
|
501
505
|
reference_data: A dictionary containing the reference image, mask or bounding
|
502
506
|
box in the format of:
|
503
507
|
{"image": "image.jpg", "mask": "mask.jpg", "bbox": [0.1, 0.2, 0.1, 0.2]}
|
@@ -549,7 +553,7 @@ class EasyToolV2(Agent):
|
|
549
553
|
Parameters:
|
550
554
|
chat: A conversation in the format of
|
551
555
|
[{"role": "user", "content": "describe your task here..."}].
|
552
|
-
|
556
|
+
media: The media image referenced in the chat parameter.
|
553
557
|
reference_data: A dictionary containing the reference image, mask or bounding
|
554
558
|
box in the format of:
|
555
559
|
{"image": "image.jpg", "mask": "mask.jpg", "bbox": [0.1, 0.2, 0.1, 0.2]}
|
@@ -558,9 +562,8 @@ class EasyToolV2(Agent):
|
|
558
562
|
self_reflection: boolean to enable and disable self reflection.
|
559
563
|
|
560
564
|
Returns:
|
561
|
-
A tuple where the first item is the final answer
|
562
|
-
|
563
|
-
contains the visualized output.
|
565
|
+
Tuple[str, List[Dict]]: A tuple where the first item is the final answer
|
566
|
+
and the second item is a list of all the tool results.
|
564
567
|
"""
|
565
568
|
if len(chat) == 0:
|
566
569
|
raise ValueError("Input cannot be empty.")
|
vision_agent/agent/reflexion.py
CHANGED
@@ -144,7 +144,7 @@ class Reflexion(Agent):
|
|
144
144
|
|
145
145
|
Parameters:
|
146
146
|
input: a prompt that describe the task or a conversation in the format of [{"role": "user", "content": "describe your task here..."}].
|
147
|
-
|
147
|
+
media: the input media referenced in the prompt parameter.
|
148
148
|
|
149
149
|
Returns:
|
150
150
|
A text response.
|
@@ -442,10 +442,10 @@ class VisionAgent(Agent):
|
|
442
442
|
"""Chat with Vision Agent and return intermediate information regarding the task.
|
443
443
|
|
444
444
|
Parameters:
|
445
|
-
|
446
|
-
[{"role": "user", "content": "describe your task here..."}]
|
445
|
+
input (Union[List[Dict[str, str]], str]): A conversation in the format of
|
446
|
+
[{"role": "user", "content": "describe your task here..."}] or a string
|
447
|
+
of just the contents.
|
447
448
|
media (Optional[Union[str, Path]]): The media file to be used in the task.
|
448
|
-
self_reflection (bool): Whether to reflect on the task and debug the code.
|
449
449
|
|
450
450
|
Returns:
|
451
451
|
str: The code output by the Vision Agent.
|
@@ -471,7 +471,8 @@ class VisionAgent(Agent):
|
|
471
471
|
[{"role": "user", "content": "describe your task here..."}].
|
472
472
|
media (Optional[Union[str, Path]]): The media file to be used in the task.
|
473
473
|
self_reflection (bool): Whether to reflect on the task and debug the code.
|
474
|
-
|
474
|
+
display_visualization (bool): If True, it opens a new window locally to
|
475
|
+
show the image(s) created by visualization code (if there is any).
|
475
476
|
|
476
477
|
Returns:
|
477
478
|
Dict[str, Any]: A dictionary containing the code, test, test result, plan,
|
@@ -71,7 +71,7 @@ This is the documentation for the functions you have access to. You may call any
|
|
71
71
|
1. **Understand and Clarify**: Make sure you understand the task.
|
72
72
|
2. **Algorithm/Method Selection**: Decide on the most efficient way.
|
73
73
|
3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
|
74
|
-
4. **Code Generation**: Translate your pseudocode into executable Python code. Ensure you use correct arguments, remember coordinates are always returned normalized from `vision_agent.tools`.
|
74
|
+
4. **Code Generation**: Translate your pseudocode into executable Python code. Ensure you use correct arguments, remember coordinates are always returned normalized from `vision_agent.tools`. All images from `vision_agent.tools` are in RGB format, red is (255, 0, 0) and blue is (0, 0, 255).
|
75
75
|
5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools import *`. Use a debug flag in the function parameters to toggle logging on and off.
|
76
76
|
"""
|
77
77
|
|
vision_agent/tools/tools.py
CHANGED
@@ -75,17 +75,18 @@ def grounding_dino(
|
|
75
75
|
|
76
76
|
Returns:
|
77
77
|
List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
|
78
|
-
|
79
|
-
|
80
|
-
|
78
|
+
bounding box of the detected objects with normalized coordinates between 0
|
79
|
+
and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
|
80
|
+
top-left and xmax and ymax are the coordinates of the bottom-right of the
|
81
|
+
bounding box.
|
81
82
|
|
82
83
|
Example
|
83
84
|
-------
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
85
|
+
>>> grounding_dino("car. dinosaur", image)
|
86
|
+
[
|
87
|
+
{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]},
|
88
|
+
{'score': 0.98, 'label': 'car', 'bbox': [0.2, 0.21, 0.45, 0.5},
|
89
|
+
]
|
89
90
|
"""
|
90
91
|
image_size = image.shape[:2]
|
91
92
|
image_b64 = convert_to_b64(image)
|
@@ -129,27 +130,27 @@ def grounding_sam(
|
|
129
130
|
|
130
131
|
Returns:
|
131
132
|
List[Dict[str, Any]]: A list of dictionaries containing the score, label,
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
133
|
+
bounding box, and mask of the detected objects with normalized coordinates
|
134
|
+
(xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left
|
135
|
+
and xmax and ymax are the coordinates of the bottom-right of the bounding box.
|
136
|
+
The mask is binary 2D numpy array where 1 indicates the object and 0 indicates
|
137
|
+
the background.
|
137
138
|
|
138
139
|
Example
|
139
140
|
-------
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
141
|
+
>>> grounding_sam("car. dinosaur", image)
|
142
|
+
[
|
143
|
+
{
|
144
|
+
'score': 0.99,
|
145
|
+
'label': 'dinosaur',
|
146
|
+
'bbox': [0.1, 0.11, 0.35, 0.4],
|
147
|
+
'mask': array([[0, 0, 0, ..., 0, 0, 0],
|
148
|
+
[0, 0, 0, ..., 0, 0, 0],
|
149
|
+
...,
|
150
|
+
[0, 0, 0, ..., 0, 0, 0],
|
151
|
+
[0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
|
152
|
+
},
|
153
|
+
]
|
153
154
|
"""
|
154
155
|
image_size = image.shape[:2]
|
155
156
|
image_b64 = convert_to_b64(image)
|
@@ -178,7 +179,7 @@ def extract_frames(
|
|
178
179
|
) -> List[Tuple[np.ndarray, float]]:
|
179
180
|
"""'extract_frames' extracts frames from a video, returns a list of tuples (frame,
|
180
181
|
timestamp), where timestamp is the relative time in seconds where the frame was
|
181
|
-
captured. The frame is a
|
182
|
+
captured. The frame is a numpy array.
|
182
183
|
|
183
184
|
Parameters:
|
184
185
|
video_uri (Union[str, Path]): The path to the video file.
|
@@ -187,12 +188,12 @@ def extract_frames(
|
|
187
188
|
|
188
189
|
Returns:
|
189
190
|
List[Tuple[np.ndarray, float]]: A list of tuples containing the extracted frame
|
190
|
-
|
191
|
+
as a numpy array and the timestamp in seconds.
|
191
192
|
|
192
193
|
Example
|
193
194
|
-------
|
194
|
-
|
195
|
-
|
195
|
+
>>> extract_frames("path/to/video.mp4")
|
196
|
+
[(frame1, 0.0), (frame2, 0.5), ...]
|
196
197
|
"""
|
197
198
|
|
198
199
|
return extract_frames_from_video(str(video_uri), fps)
|
@@ -212,10 +213,10 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
212
213
|
|
213
214
|
Example
|
214
215
|
-------
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
216
|
+
>>> ocr(image)
|
217
|
+
[
|
218
|
+
{'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
|
219
|
+
]
|
219
220
|
"""
|
220
221
|
|
221
222
|
pil_image = Image.fromarray(image).convert("RGB")
|
@@ -266,9 +267,8 @@ def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
|
266
267
|
|
267
268
|
Example
|
268
269
|
-------
|
269
|
-
|
270
|
-
|
271
|
-
|
270
|
+
>>> zero_shot_counting(image)
|
271
|
+
{'count': 45},
|
272
272
|
"""
|
273
273
|
|
274
274
|
image_b64 = convert_to_b64(image)
|
@@ -297,9 +297,8 @@ def visual_prompt_counting(
|
|
297
297
|
|
298
298
|
Example
|
299
299
|
-------
|
300
|
-
|
301
|
-
|
302
|
-
|
300
|
+
>>> visual_prompt_counting(image, {"bbox": [0.1, 0.1, 0.4, 0.42]})
|
301
|
+
{'count': 45},
|
303
302
|
"""
|
304
303
|
|
305
304
|
image_size = get_image_size(image)
|
@@ -332,9 +331,8 @@ def image_question_answering(image: np.ndarray, prompt: str) -> str:
|
|
332
331
|
|
333
332
|
Example
|
334
333
|
-------
|
335
|
-
|
336
|
-
|
337
|
-
|
334
|
+
>>> image_question_answering(image, 'What is the cat doing ?')
|
335
|
+
'drinking milk'
|
338
336
|
"""
|
339
337
|
|
340
338
|
image_b64 = convert_to_b64(image)
|
@@ -363,9 +361,8 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
|
363
361
|
|
364
362
|
Example
|
365
363
|
-------
|
366
|
-
|
367
|
-
|
368
|
-
|
364
|
+
>>> clip(image, ['dog', 'cat', 'bird'])
|
365
|
+
{"labels": ["dog", "cat", "bird"], "scores": [0.68, 0.30, 0.02]},
|
369
366
|
"""
|
370
367
|
|
371
368
|
image_b64 = convert_to_b64(image)
|
@@ -391,9 +388,8 @@ def image_caption(image: np.ndarray) -> str:
|
|
391
388
|
|
392
389
|
Example
|
393
390
|
-------
|
394
|
-
|
395
|
-
|
396
|
-
|
391
|
+
>>> image_caption(image)
|
392
|
+
'This image contains a cat sitting on a table with a bowl of milk.'
|
397
393
|
"""
|
398
394
|
|
399
395
|
image_b64 = convert_to_b64(image)
|
@@ -418,8 +414,8 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
|
|
418
414
|
|
419
415
|
Example
|
420
416
|
-------
|
421
|
-
|
422
|
-
|
417
|
+
>>> closest_mask_distance(mask1, mask2)
|
418
|
+
0.5
|
423
419
|
"""
|
424
420
|
|
425
421
|
mask1 = np.clip(mask1, 0, 1)
|
@@ -474,8 +470,8 @@ def closest_box_distance(
|
|
474
470
|
|
475
471
|
Example
|
476
472
|
-------
|
477
|
-
|
478
|
-
|
473
|
+
>>> closest_box_distance([100, 100, 200, 200], [300, 300, 400, 400])
|
474
|
+
141.42
|
479
475
|
"""
|
480
476
|
|
481
477
|
x11, y11, x12, y12 = denormalize_bbox(box1, image_size)
|
@@ -499,7 +495,7 @@ def save_json(data: Any, file_path: str) -> None:
|
|
499
495
|
|
500
496
|
Example
|
501
497
|
-------
|
502
|
-
|
498
|
+
>>> save_json(data, "path/to/file.json")
|
503
499
|
"""
|
504
500
|
|
505
501
|
class NumpyEncoder(json.JSONEncoder):
|
@@ -525,7 +521,7 @@ def load_image(image_path: str) -> np.ndarray:
|
|
525
521
|
|
526
522
|
Example
|
527
523
|
-------
|
528
|
-
|
524
|
+
>>> load_image("path/to/image.jpg")
|
529
525
|
"""
|
530
526
|
# NOTE: sometimes the generated code pass in a NumPy array
|
531
527
|
if isinstance(image_path, np.ndarray):
|
@@ -534,27 +530,22 @@ def load_image(image_path: str) -> np.ndarray:
|
|
534
530
|
return np.array(image)
|
535
531
|
|
536
532
|
|
537
|
-
def save_image(image: np.ndarray) ->
|
538
|
-
"""'save_image' is a utility function that saves an image
|
533
|
+
def save_image(image: np.ndarray, file_path: str) -> None:
|
534
|
+
"""'save_image' is a utility function that saves an image to a file path.
|
539
535
|
|
540
536
|
Parameters:
|
541
537
|
image (np.ndarray): The image to save.
|
542
|
-
|
543
|
-
Returns:
|
544
|
-
str: The path to the saved image.
|
538
|
+
file_path (str): The path to save the image file.
|
545
539
|
|
546
540
|
Example
|
547
541
|
-------
|
548
|
-
|
549
|
-
"/tmp/tmpabc123.png"
|
542
|
+
>>> save_image(image)
|
550
543
|
"""
|
551
544
|
from IPython.display import display
|
552
545
|
|
553
546
|
pil_image = Image.fromarray(image.astype(np.uint8))
|
554
547
|
display(pil_image)
|
555
|
-
|
556
|
-
pil_image.save(f, "PNG")
|
557
|
-
return f.name
|
548
|
+
pil_image.save(file_path)
|
558
549
|
|
559
550
|
|
560
551
|
def save_video(
|
@@ -572,8 +563,8 @@ def save_video(
|
|
572
563
|
|
573
564
|
Example
|
574
565
|
-------
|
575
|
-
|
576
|
-
|
566
|
+
>>> save_video(frames)
|
567
|
+
"/tmp/tmpvideo123.mp4"
|
577
568
|
"""
|
578
569
|
if fps <= 0:
|
579
570
|
_LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
|
@@ -619,9 +610,9 @@ def overlay_bounding_boxes(
|
|
619
610
|
|
620
611
|
Example
|
621
612
|
-------
|
622
|
-
|
623
|
-
|
624
|
-
|
613
|
+
>>> image_with_bboxes = overlay_bounding_boxes(
|
614
|
+
image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
|
615
|
+
)
|
625
616
|
"""
|
626
617
|
pil_image = Image.fromarray(image.astype(np.uint8))
|
627
618
|
|
@@ -675,18 +666,18 @@ def overlay_segmentation_masks(
|
|
675
666
|
|
676
667
|
Example
|
677
668
|
-------
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
669
|
+
>>> image_with_masks = overlay_segmentation_masks(
|
670
|
+
image,
|
671
|
+
[{
|
672
|
+
'score': 0.99,
|
673
|
+
'label': 'dinosaur',
|
674
|
+
'mask': array([[0, 0, 0, ..., 0, 0, 0],
|
675
|
+
[0, 0, 0, ..., 0, 0, 0],
|
676
|
+
...,
|
677
|
+
[0, 0, 0, ..., 0, 0, 0],
|
678
|
+
[0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
|
679
|
+
}],
|
680
|
+
)
|
690
681
|
"""
|
691
682
|
pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGBA")
|
692
683
|
|
@@ -727,16 +718,16 @@ def overlay_heat_map(
|
|
727
718
|
|
728
719
|
Example
|
729
720
|
-------
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
721
|
+
>>> image_with_heat_map = overlay_heat_map(
|
722
|
+
image,
|
723
|
+
{
|
724
|
+
'heat_map': array([[0, 0, 0, ..., 0, 0, 0],
|
725
|
+
[0, 0, 0, ..., 0, 0, 0],
|
726
|
+
...,
|
727
|
+
[0, 0, 0, ..., 0, 0, 0],
|
728
|
+
[0, 0, 0, ..., 125, 125, 125]], dtype=uint8),
|
729
|
+
},
|
730
|
+
)
|
740
731
|
"""
|
741
732
|
pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
|
742
733
|
|
vision_agent/utils/video.py
CHANGED
@@ -63,9 +63,9 @@ def extract_frames_from_video(
|
|
63
63
|
|
64
64
|
Returns:
|
65
65
|
a list of tuples containing the extracted frame and the timestamp in seconds.
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
E.g. [(frame1, 0.0), (frame2, 0.5), ...]. The timestamp is the time in seconds
|
67
|
+
from the start of the video. E.g. 12.125 means 12.125 seconds from the start of
|
68
|
+
the video. The frames are sorted by the timestamp in ascending order.
|
69
69
|
"""
|
70
70
|
with VideoFileClip(video_uri) as video:
|
71
71
|
video_duration: float = video.duration
|
@@ -7,12 +7,12 @@ vision_agent/agent/data_interpreter.py,sha256=YlCm3DVyhCM9T6wpccWxC5XHoIj9smsEsk
|
|
7
7
|
vision_agent/agent/data_interpreter_prompts.py,sha256=RDJggOfXwGaEoIcTYGX41ZEayCgYei1AootDOc_SN2g,6134
|
8
8
|
vision_agent/agent/easytool.py,sha256=wMa9-tpAaiC4E2ONbidxmMM9YvAOw4_Sypf5mGKco_w,11526
|
9
9
|
vision_agent/agent/easytool_prompts.py,sha256=Bikw-PPLkm78dwywTlnv32Y1Tw6JMeC-R7oCnXWLcTk,4656
|
10
|
-
vision_agent/agent/easytool_v2.py,sha256=
|
10
|
+
vision_agent/agent/easytool_v2.py,sha256=LY2cqzjVHBr7QMn4WsrZ7AfpWrDN0LjJIrd5tMo2-PI,27323
|
11
11
|
vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
|
12
|
-
vision_agent/agent/reflexion.py,sha256=
|
12
|
+
vision_agent/agent/reflexion.py,sha256=scck3YcME6DhX5Vs4Wr1rYb8S4wkBUkN9UksyazfrZg,10506
|
13
13
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
14
|
-
vision_agent/agent/vision_agent.py,sha256=
|
15
|
-
vision_agent/agent/vision_agent_prompts.py,sha256=
|
14
|
+
vision_agent/agent/vision_agent.py,sha256=wGGISg6pDVNseF2fIAN1jH66OX2qZk2nDhuobeSNGHk,20957
|
15
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=9QVQA-YTSHhYHYbxiqCWFVBHIa6uV4WF0z6599mV_Oc,8470
|
16
16
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
18
18
|
vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
|
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=Sng6dChynJJCYWjraXXM0tep_VPdnYl3L9vb0HMy_P
|
|
23
23
|
vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
|
24
24
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
25
25
|
vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
|
26
|
-
vision_agent/tools/tools.py,sha256=
|
26
|
+
vision_agent/tools/tools.py,sha256=aEph9ikQklqKzz18jgjO7eC77VqmkJCYEZ8DqtpihYg,26944
|
27
27
|
vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
|
28
28
|
vision_agent/utils/execute.py,sha256=GqoAodxtwTPBr1nujPTsWiZO2rBGvWVXTe8lgxY4d_g,20603
|
29
29
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
30
30
|
vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
|
31
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
32
|
-
vision_agent/utils/video.py,sha256=
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
36
|
-
vision_agent-0.2.
|
32
|
+
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
33
|
+
vision_agent-0.2.52.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.52.dist-info/METADATA,sha256=DsrommMwXdLuHvzMkducIQB8TbYndfwJZH1ZeMope7E,6817
|
35
|
+
vision_agent-0.2.52.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.52.dist-info/RECORD,,
|
File without changes
|
File without changes
|