ultralytics 8.1.28__py3-none-any.whl → 8.3.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +22 -0
- tests/conftest.py +83 -0
- tests/test_cli.py +122 -0
- tests/test_cuda.py +155 -0
- tests/test_engine.py +131 -0
- tests/test_exports.py +216 -0
- tests/test_integrations.py +150 -0
- tests/test_python.py +615 -0
- tests/test_solutions.py +94 -0
- ultralytics/__init__.py +11 -8
- ultralytics/cfg/__init__.py +569 -131
- ultralytics/cfg/datasets/Argoverse.yaml +2 -1
- ultralytics/cfg/datasets/DOTAv1.5.yaml +3 -2
- ultralytics/cfg/datasets/DOTAv1.yaml +3 -2
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +3 -2
- ultralytics/cfg/datasets/ImageNet.yaml +2 -1
- ultralytics/cfg/datasets/Objects365.yaml +5 -4
- ultralytics/cfg/datasets/SKU-110K.yaml +2 -1
- ultralytics/cfg/datasets/VOC.yaml +3 -2
- ultralytics/cfg/datasets/VisDrone.yaml +6 -5
- ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
- ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
- ultralytics/cfg/datasets/carparts-seg.yaml +3 -2
- ultralytics/cfg/datasets/coco-pose.yaml +7 -6
- ultralytics/cfg/datasets/coco.yaml +3 -2
- ultralytics/cfg/datasets/coco128-seg.yaml +4 -3
- ultralytics/cfg/datasets/coco128.yaml +4 -3
- ultralytics/cfg/datasets/coco8-pose.yaml +3 -2
- ultralytics/cfg/datasets/coco8-seg.yaml +3 -2
- ultralytics/cfg/datasets/coco8.yaml +3 -2
- ultralytics/cfg/datasets/crack-seg.yaml +3 -2
- ultralytics/cfg/datasets/dog-pose.yaml +24 -0
- ultralytics/cfg/datasets/dota8.yaml +3 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
- ultralytics/cfg/datasets/lvis.yaml +1236 -0
- ultralytics/cfg/datasets/medical-pills.yaml +22 -0
- ultralytics/cfg/datasets/open-images-v7.yaml +2 -1
- ultralytics/cfg/datasets/package-seg.yaml +5 -4
- ultralytics/cfg/datasets/signature.yaml +21 -0
- ultralytics/cfg/datasets/tiger-pose.yaml +3 -2
- ultralytics/cfg/datasets/xView.yaml +2 -1
- ultralytics/cfg/default.yaml +14 -11
- ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +24 -0
- ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
- ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
- ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
- ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
- ultralytics/cfg/models/11/yolo11.yaml +50 -0
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +5 -2
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +5 -2
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +5 -2
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +5 -2
- ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
- ultralytics/cfg/models/v3/yolov3-spp.yaml +5 -2
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +5 -2
- ultralytics/cfg/models/v3/yolov3.yaml +5 -2
- ultralytics/cfg/models/v5/yolov5-p6.yaml +5 -2
- ultralytics/cfg/models/v5/yolov5.yaml +5 -2
- ultralytics/cfg/models/v6/yolov6.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-cls.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +6 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +6 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-p2.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-p6.yaml +10 -7
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-pose.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-seg.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-world.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8.yaml +5 -2
- ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9c.yaml +30 -25
- ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
- ultralytics/cfg/models/v9/yolov9e.yaml +46 -42
- ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
- ultralytics/cfg/solutions/default.yaml +24 -0
- ultralytics/cfg/trackers/botsort.yaml +8 -5
- ultralytics/cfg/trackers/bytetrack.yaml +8 -5
- ultralytics/data/__init__.py +14 -3
- ultralytics/data/annotator.py +37 -15
- ultralytics/data/augment.py +1783 -289
- ultralytics/data/base.py +62 -27
- ultralytics/data/build.py +36 -8
- ultralytics/data/converter.py +196 -36
- ultralytics/data/dataset.py +233 -94
- ultralytics/data/loaders.py +199 -96
- ultralytics/data/split_dota.py +39 -29
- ultralytics/data/utils.py +110 -40
- ultralytics/engine/__init__.py +1 -1
- ultralytics/engine/exporter.py +569 -242
- ultralytics/engine/model.py +604 -252
- ultralytics/engine/predictor.py +22 -11
- ultralytics/engine/results.py +1228 -218
- ultralytics/engine/trainer.py +190 -129
- ultralytics/engine/tuner.py +18 -18
- ultralytics/engine/validator.py +18 -15
- ultralytics/hub/__init__.py +31 -13
- ultralytics/hub/auth.py +11 -7
- ultralytics/hub/google/__init__.py +159 -0
- ultralytics/hub/session.py +128 -94
- ultralytics/hub/utils.py +20 -21
- ultralytics/models/__init__.py +4 -2
- ultralytics/models/fastsam/__init__.py +2 -3
- ultralytics/models/fastsam/model.py +26 -4
- ultralytics/models/fastsam/predict.py +127 -63
- ultralytics/models/fastsam/utils.py +1 -44
- ultralytics/models/fastsam/val.py +1 -1
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +21 -10
- ultralytics/models/nas/predict.py +3 -6
- ultralytics/models/nas/val.py +4 -4
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +1 -1
- ultralytics/models/rtdetr/predict.py +6 -8
- ultralytics/models/rtdetr/train.py +6 -2
- ultralytics/models/rtdetr/val.py +3 -3
- ultralytics/models/sam/__init__.py +3 -3
- ultralytics/models/sam/amg.py +29 -23
- ultralytics/models/sam/build.py +211 -13
- ultralytics/models/sam/model.py +91 -30
- ultralytics/models/sam/modules/__init__.py +1 -1
- ultralytics/models/sam/modules/blocks.py +1129 -0
- ultralytics/models/sam/modules/decoders.py +381 -53
- ultralytics/models/sam/modules/encoders.py +515 -324
- ultralytics/models/sam/modules/memory_attention.py +237 -0
- ultralytics/models/sam/modules/sam.py +969 -21
- ultralytics/models/sam/modules/tiny_encoder.py +425 -154
- ultralytics/models/sam/modules/transformer.py +159 -60
- ultralytics/models/sam/modules/utils.py +293 -0
- ultralytics/models/sam/predict.py +1263 -132
- ultralytics/models/utils/__init__.py +1 -1
- ultralytics/models/utils/loss.py +36 -24
- ultralytics/models/utils/ops.py +3 -7
- ultralytics/models/yolo/__init__.py +3 -3
- ultralytics/models/yolo/classify/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +7 -8
- ultralytics/models/yolo/classify/train.py +17 -22
- ultralytics/models/yolo/classify/val.py +8 -4
- ultralytics/models/yolo/detect/__init__.py +1 -1
- ultralytics/models/yolo/detect/predict.py +3 -5
- ultralytics/models/yolo/detect/train.py +11 -4
- ultralytics/models/yolo/detect/val.py +90 -52
- ultralytics/models/yolo/model.py +14 -9
- ultralytics/models/yolo/obb/__init__.py +1 -1
- ultralytics/models/yolo/obb/predict.py +2 -2
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +41 -23
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +3 -5
- ultralytics/models/yolo/pose/train.py +2 -2
- ultralytics/models/yolo/pose/val.py +51 -17
- ultralytics/models/yolo/segment/__init__.py +1 -1
- ultralytics/models/yolo/segment/predict.py +3 -5
- ultralytics/models/yolo/segment/train.py +2 -2
- ultralytics/models/yolo/segment/val.py +60 -19
- ultralytics/models/yolo/world/__init__.py +5 -0
- ultralytics/models/yolo/world/train.py +92 -0
- ultralytics/models/yolo/world/train_world.py +109 -0
- ultralytics/nn/__init__.py +1 -1
- ultralytics/nn/autobackend.py +228 -93
- ultralytics/nn/modules/__init__.py +39 -14
- ultralytics/nn/modules/activation.py +21 -0
- ultralytics/nn/modules/block.py +527 -67
- ultralytics/nn/modules/conv.py +24 -7
- ultralytics/nn/modules/head.py +177 -34
- ultralytics/nn/modules/transformer.py +6 -5
- ultralytics/nn/modules/utils.py +1 -2
- ultralytics/nn/tasks.py +225 -77
- ultralytics/solutions/__init__.py +30 -1
- ultralytics/solutions/ai_gym.py +96 -143
- ultralytics/solutions/analytics.py +247 -0
- ultralytics/solutions/distance_calculation.py +78 -135
- ultralytics/solutions/heatmap.py +93 -247
- ultralytics/solutions/object_counter.py +184 -259
- ultralytics/solutions/parking_management.py +246 -0
- ultralytics/solutions/queue_management.py +112 -0
- ultralytics/solutions/region_counter.py +116 -0
- ultralytics/solutions/security_alarm.py +144 -0
- ultralytics/solutions/solutions.py +178 -0
- ultralytics/solutions/speed_estimation.py +86 -174
- ultralytics/solutions/streamlit_inference.py +190 -0
- ultralytics/solutions/trackzone.py +68 -0
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +32 -13
- ultralytics/trackers/bot_sort.py +61 -28
- ultralytics/trackers/byte_tracker.py +83 -51
- ultralytics/trackers/track.py +21 -6
- ultralytics/trackers/utils/__init__.py +1 -1
- ultralytics/trackers/utils/gmc.py +62 -48
- ultralytics/trackers/utils/kalman_filter.py +166 -35
- ultralytics/trackers/utils/matching.py +40 -21
- ultralytics/utils/__init__.py +511 -239
- ultralytics/utils/autobatch.py +40 -22
- ultralytics/utils/benchmarks.py +266 -85
- ultralytics/utils/callbacks/__init__.py +1 -1
- ultralytics/utils/callbacks/base.py +1 -3
- ultralytics/utils/callbacks/clearml.py +7 -6
- ultralytics/utils/callbacks/comet.py +39 -17
- ultralytics/utils/callbacks/dvc.py +1 -1
- ultralytics/utils/callbacks/hub.py +16 -16
- ultralytics/utils/callbacks/mlflow.py +28 -24
- ultralytics/utils/callbacks/neptune.py +6 -2
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +18 -18
- ultralytics/utils/callbacks/wb.py +27 -20
- ultralytics/utils/checks.py +160 -100
- ultralytics/utils/dist.py +2 -1
- ultralytics/utils/downloads.py +44 -37
- ultralytics/utils/errors.py +1 -1
- ultralytics/utils/files.py +72 -38
- ultralytics/utils/instance.py +41 -19
- ultralytics/utils/loss.py +84 -56
- ultralytics/utils/metrics.py +61 -56
- ultralytics/utils/ops.py +94 -89
- ultralytics/utils/patches.py +30 -14
- ultralytics/utils/plotting.py +600 -269
- ultralytics/utils/tal.py +67 -26
- ultralytics/utils/torch_utils.py +302 -102
- ultralytics/utils/triton.py +2 -1
- ultralytics/utils/tuner.py +21 -12
- ultralytics-8.3.62.dist-info/METADATA +370 -0
- ultralytics-8.3.62.dist-info/RECORD +241 -0
- {ultralytics-8.1.28.dist-info → ultralytics-8.3.62.dist-info}/WHEEL +1 -1
- ultralytics/data/explorer/__init__.py +0 -5
- ultralytics/data/explorer/explorer.py +0 -472
- ultralytics/data/explorer/gui/__init__.py +0 -1
- ultralytics/data/explorer/gui/dash.py +0 -268
- ultralytics/data/explorer/utils.py +0 -166
- ultralytics/models/fastsam/prompt.py +0 -357
- ultralytics-8.1.28.dist-info/METADATA +0 -373
- ultralytics-8.1.28.dist-info/RECORD +0 -197
- {ultralytics-8.1.28.dist-info → ultralytics-8.3.62.dist-info}/LICENSE +0 -0
- {ultralytics-8.1.28.dist-info → ultralytics-8.3.62.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.1.28.dist-info → ultralytics-8.3.62.dist-info}/top_level.txt +0 -0
ultralytics/data/loaders.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Ultralytics
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
3
|
import glob
|
4
4
|
import math
|
@@ -15,14 +15,32 @@ import requests
|
|
15
15
|
import torch
|
16
16
|
from PIL import Image
|
17
17
|
|
18
|
-
from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
|
19
|
-
from ultralytics.utils import
|
18
|
+
from ultralytics.data.utils import FORMATS_HELP_MSG, IMG_FORMATS, VID_FORMATS
|
19
|
+
from ultralytics.utils import IS_COLAB, IS_KAGGLE, LOGGER, ops
|
20
20
|
from ultralytics.utils.checks import check_requirements
|
21
|
+
from ultralytics.utils.patches import imread
|
21
22
|
|
22
23
|
|
23
24
|
@dataclass
|
24
25
|
class SourceTypes:
|
25
|
-
"""
|
26
|
+
"""
|
27
|
+
Class to represent various types of input sources for predictions.
|
28
|
+
|
29
|
+
This class uses dataclass to define boolean flags for different types of input sources that can be used for
|
30
|
+
making predictions with YOLO models.
|
31
|
+
|
32
|
+
Attributes:
|
33
|
+
stream (bool): Flag indicating if the input source is a video stream.
|
34
|
+
screenshot (bool): Flag indicating if the input source is a screenshot.
|
35
|
+
from_img (bool): Flag indicating if the input source is an image file.
|
36
|
+
|
37
|
+
Examples:
|
38
|
+
>>> source_types = SourceTypes(stream=True, screenshot=False, from_img=False)
|
39
|
+
>>> print(source_types.stream)
|
40
|
+
True
|
41
|
+
>>> print(source_types.from_img)
|
42
|
+
False
|
43
|
+
"""
|
26
44
|
|
27
45
|
stream: bool = False
|
28
46
|
screenshot: bool = False
|
@@ -32,38 +50,47 @@ class SourceTypes:
|
|
32
50
|
|
33
51
|
class LoadStreams:
|
34
52
|
"""
|
35
|
-
Stream Loader for various types of video streams
|
53
|
+
Stream Loader for various types of video streams.
|
54
|
+
|
55
|
+
Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video
|
56
|
+
streams simultaneously, making it suitable for real-time video analysis tasks.
|
36
57
|
|
37
58
|
Attributes:
|
38
|
-
sources (str): The source input paths or URLs for the video streams.
|
39
|
-
vid_stride (int): Video frame-rate stride
|
40
|
-
buffer (bool): Whether to buffer input streams
|
59
|
+
sources (List[str]): The source input paths or URLs for the video streams.
|
60
|
+
vid_stride (int): Video frame-rate stride.
|
61
|
+
buffer (bool): Whether to buffer input streams.
|
41
62
|
running (bool): Flag to indicate if the streaming thread is running.
|
42
63
|
mode (str): Set to 'stream' indicating real-time capture.
|
43
|
-
imgs (
|
44
|
-
fps (
|
45
|
-
frames (
|
46
|
-
threads (
|
47
|
-
shape (
|
48
|
-
caps (
|
64
|
+
imgs (List[List[np.ndarray]]): List of image frames for each stream.
|
65
|
+
fps (List[float]): List of FPS for each stream.
|
66
|
+
frames (List[int]): List of total frames for each stream.
|
67
|
+
threads (List[Thread]): List of threads for each stream.
|
68
|
+
shape (List[Tuple[int, int, int]]): List of shapes for each stream.
|
69
|
+
caps (List[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream.
|
49
70
|
bs (int): Batch size for processing.
|
50
71
|
|
51
72
|
Methods:
|
52
|
-
__init__: Initialize the stream loader.
|
53
73
|
update: Read stream frames in daemon thread.
|
54
74
|
close: Close stream loader and release resources.
|
55
75
|
__iter__: Returns an iterator object for the class.
|
56
76
|
__next__: Returns source paths, transformed, and original images for processing.
|
57
77
|
__len__: Return the length of the sources object.
|
58
78
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
79
|
+
Examples:
|
80
|
+
>>> stream_loader = LoadStreams("rtsp://example.com/stream1.mp4")
|
81
|
+
>>> for sources, imgs, _ in stream_loader:
|
82
|
+
... # Process the images
|
83
|
+
... pass
|
84
|
+
>>> stream_loader.close()
|
85
|
+
|
86
|
+
Notes:
|
87
|
+
- The class uses threading to efficiently load frames from multiple streams simultaneously.
|
88
|
+
- It automatically handles YouTube links, converting them to the best available stream URL.
|
89
|
+
- The class implements a buffer system to manage frame storage and retrieval.
|
63
90
|
"""
|
64
91
|
|
65
92
|
def __init__(self, sources="file.streams", vid_stride=1, buffer=False):
|
66
|
-
"""Initialize
|
93
|
+
"""Initialize stream loader for multiple video sources, supporting various stream types."""
|
67
94
|
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
|
68
95
|
self.buffer = buffer # buffer input streams
|
69
96
|
self.running = True # running flag for Thread
|
@@ -83,11 +110,11 @@ class LoadStreams:
|
|
83
110
|
for i, s in enumerate(sources): # index, source
|
84
111
|
# Start thread to read frames from video stream
|
85
112
|
st = f"{i + 1}/{n}: {s}... "
|
86
|
-
if urlparse(s).hostname in
|
87
|
-
# YouTube format i.e. 'https://www.youtube.com/watch?v=
|
113
|
+
if urlparse(s).hostname in {"www.youtube.com", "youtube.com", "youtu.be"}: # if source is YouTube video
|
114
|
+
# YouTube format i.e. 'https://www.youtube.com/watch?v=Jsn8D3aC840' or 'https://youtu.be/Jsn8D3aC840'
|
88
115
|
s = get_best_youtube_url(s)
|
89
116
|
s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
|
90
|
-
if s == 0 and (
|
117
|
+
if s == 0 and (IS_COLAB or IS_KAGGLE):
|
91
118
|
raise NotImplementedError(
|
92
119
|
"'source=0' webcam not supported in Colab and Kaggle notebooks. "
|
93
120
|
"Try running 'source=0' in a local environment."
|
@@ -114,7 +141,7 @@ class LoadStreams:
|
|
114
141
|
LOGGER.info("") # newline
|
115
142
|
|
116
143
|
def update(self, i, cap, stream):
|
117
|
-
"""Read stream
|
144
|
+
"""Read stream frames in daemon thread and update image buffer."""
|
118
145
|
n, f = 0, self.frames[i] # frame number, frame array
|
119
146
|
while self.running and cap.isOpened() and n < (f - 1):
|
120
147
|
if len(self.imgs[i]) < 30: # keep a <=30-image buffer
|
@@ -134,7 +161,7 @@ class LoadStreams:
|
|
134
161
|
time.sleep(0.01) # wait until the buffer is empty
|
135
162
|
|
136
163
|
def close(self):
|
137
|
-
"""
|
164
|
+
"""Terminates stream loader, stops threads, and releases video capture resources."""
|
138
165
|
self.running = False # stop flag for Thread
|
139
166
|
for thread in self.threads:
|
140
167
|
if thread.is_alive():
|
@@ -152,7 +179,7 @@ class LoadStreams:
|
|
152
179
|
return self
|
153
180
|
|
154
181
|
def __next__(self):
|
155
|
-
"""Returns
|
182
|
+
"""Returns the next batch of frames from multiple video streams for processing."""
|
156
183
|
self.count += 1
|
157
184
|
|
158
185
|
images = []
|
@@ -179,16 +206,16 @@ class LoadStreams:
|
|
179
206
|
return self.sources, images, [""] * self.bs
|
180
207
|
|
181
208
|
def __len__(self):
|
182
|
-
"""Return the
|
209
|
+
"""Return the number of video streams in the LoadStreams object."""
|
183
210
|
return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years
|
184
211
|
|
185
212
|
|
186
213
|
class LoadScreenshots:
|
187
214
|
"""
|
188
|
-
|
215
|
+
Ultralytics screenshot dataloader for capturing and processing screen images.
|
189
216
|
|
190
|
-
This class manages the loading of screenshot images for processing with
|
191
|
-
|
217
|
+
This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with
|
218
|
+
`yolo predict source=screen`.
|
192
219
|
|
193
220
|
Attributes:
|
194
221
|
source (str): The source input indicating which screen to capture.
|
@@ -201,15 +228,21 @@ class LoadScreenshots:
|
|
201
228
|
frame (int): Counter for captured frames.
|
202
229
|
sct (mss.mss): Screen capture object from `mss` library.
|
203
230
|
bs (int): Batch size, set to 1.
|
204
|
-
|
231
|
+
fps (int): Frames per second, set to 30.
|
232
|
+
monitor (Dict[str, int]): Monitor configuration details.
|
205
233
|
|
206
234
|
Methods:
|
207
235
|
__iter__: Returns an iterator object.
|
208
236
|
__next__: Captures the next screenshot and returns it.
|
237
|
+
|
238
|
+
Examples:
|
239
|
+
>>> loader = LoadScreenshots("0 100 100 640 480") # screen 0, top-left (100,100), 640x480
|
240
|
+
>>> for source, im, im0s, vid_cap, s in loader:
|
241
|
+
... print(f"Captured frame: {im.shape}")
|
209
242
|
"""
|
210
243
|
|
211
244
|
def __init__(self, source):
|
212
|
-
"""
|
245
|
+
"""Initialize screenshot capture with specified screen and region parameters."""
|
213
246
|
check_requirements("mss")
|
214
247
|
import mss # noqa
|
215
248
|
|
@@ -236,11 +269,11 @@ class LoadScreenshots:
|
|
236
269
|
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
|
237
270
|
|
238
271
|
def __iter__(self):
|
239
|
-
"""
|
272
|
+
"""Yields the next screenshot image from the specified screen or region for processing."""
|
240
273
|
return self
|
241
274
|
|
242
275
|
def __next__(self):
|
243
|
-
"""
|
276
|
+
"""Captures and returns the next screenshot as a numpy array using the mss library."""
|
244
277
|
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
|
245
278
|
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
|
246
279
|
|
@@ -250,29 +283,45 @@ class LoadScreenshots:
|
|
250
283
|
|
251
284
|
class LoadImagesAndVideos:
|
252
285
|
"""
|
253
|
-
|
286
|
+
A class for loading and processing images and videos for YOLO object detection.
|
254
287
|
|
255
|
-
This class manages the loading and pre-processing of image and video data
|
256
|
-
|
288
|
+
This class manages the loading and pre-processing of image and video data from various sources, including
|
289
|
+
single image files, video files, and lists of image and video paths.
|
257
290
|
|
258
291
|
Attributes:
|
259
|
-
files (
|
292
|
+
files (List[str]): List of image and video file paths.
|
260
293
|
nf (int): Total number of files (images and videos).
|
261
|
-
video_flag (
|
294
|
+
video_flag (List[bool]): Flags indicating whether a file is a video (True) or an image (False).
|
262
295
|
mode (str): Current mode, 'image' or 'video'.
|
263
|
-
vid_stride (int): Stride for video frame-rate
|
264
|
-
bs (int): Batch size
|
296
|
+
vid_stride (int): Stride for video frame-rate.
|
297
|
+
bs (int): Batch size.
|
265
298
|
cap (cv2.VideoCapture): Video capture object for OpenCV.
|
266
299
|
frame (int): Frame counter for video.
|
267
300
|
frames (int): Total number of frames in the video.
|
268
|
-
count (int): Counter for iteration, initialized at 0 during
|
301
|
+
count (int): Counter for iteration, initialized at 0 during __iter__().
|
302
|
+
ni (int): Number of images.
|
269
303
|
|
270
304
|
Methods:
|
271
|
-
|
305
|
+
__init__: Initialize the LoadImagesAndVideos object.
|
306
|
+
__iter__: Returns an iterator object for VideoStream or ImageFolder.
|
307
|
+
__next__: Returns the next batch of images or video frames along with their paths and metadata.
|
308
|
+
_new_video: Creates a new video capture object for the given path.
|
309
|
+
__len__: Returns the number of batches in the object.
|
310
|
+
|
311
|
+
Examples:
|
312
|
+
>>> loader = LoadImagesAndVideos("path/to/data", batch=32, vid_stride=1)
|
313
|
+
>>> for paths, imgs, info in loader:
|
314
|
+
... # Process batch of images or video frames
|
315
|
+
... pass
|
316
|
+
|
317
|
+
Notes:
|
318
|
+
- Supports various image formats including HEIC.
|
319
|
+
- Handles both local files and directories.
|
320
|
+
- Can read from a text file containing paths to images and videos.
|
272
321
|
"""
|
273
322
|
|
274
323
|
def __init__(self, path, batch=1, vid_stride=1):
|
275
|
-
"""Initialize
|
324
|
+
"""Initialize dataloader for images and videos, supporting various input formats."""
|
276
325
|
parent = None
|
277
326
|
if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
|
278
327
|
parent = Path(path).parent
|
@@ -291,15 +340,21 @@ class LoadImagesAndVideos:
|
|
291
340
|
else:
|
292
341
|
raise FileNotFoundError(f"{p} does not exist")
|
293
342
|
|
294
|
-
|
295
|
-
videos = [
|
343
|
+
# Define files as images or videos
|
344
|
+
images, videos = [], []
|
345
|
+
for f in files:
|
346
|
+
suffix = f.split(".")[-1].lower() # Get file extension without the dot and lowercase
|
347
|
+
if suffix in IMG_FORMATS:
|
348
|
+
images.append(f)
|
349
|
+
elif suffix in VID_FORMATS:
|
350
|
+
videos.append(f)
|
296
351
|
ni, nv = len(images), len(videos)
|
297
352
|
|
298
353
|
self.files = images + videos
|
299
354
|
self.nf = ni + nv # number of files
|
300
355
|
self.ni = ni # number of images
|
301
356
|
self.video_flag = [False] * ni + [True] * nv
|
302
|
-
self.mode = "image"
|
357
|
+
self.mode = "video" if ni == 0 else "image" # default to video if no images
|
303
358
|
self.vid_stride = vid_stride # video frame-rate stride
|
304
359
|
self.bs = batch
|
305
360
|
if any(videos):
|
@@ -307,22 +362,19 @@ class LoadImagesAndVideos:
|
|
307
362
|
else:
|
308
363
|
self.cap = None
|
309
364
|
if self.nf == 0:
|
310
|
-
raise FileNotFoundError(
|
311
|
-
f"No images or videos found in {p}. "
|
312
|
-
f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
|
313
|
-
)
|
365
|
+
raise FileNotFoundError(f"No images or videos found in {p}. {FORMATS_HELP_MSG}")
|
314
366
|
|
315
367
|
def __iter__(self):
|
316
|
-
"""
|
368
|
+
"""Iterates through image/video files, yielding source paths, images, and metadata."""
|
317
369
|
self.count = 0
|
318
370
|
return self
|
319
371
|
|
320
372
|
def __next__(self):
|
321
|
-
"""Returns the next batch of images or video frames
|
373
|
+
"""Returns the next batch of images or video frames with their paths and metadata."""
|
322
374
|
paths, imgs, info = [], [], []
|
323
375
|
while len(imgs) < self.bs:
|
324
376
|
if self.count >= self.nf: # end of file list
|
325
|
-
if
|
377
|
+
if imgs:
|
326
378
|
return paths, imgs, info # return last partial batch
|
327
379
|
else:
|
328
380
|
raise StopIteration
|
@@ -333,6 +385,7 @@ class LoadImagesAndVideos:
|
|
333
385
|
if not self.cap or not self.cap.isOpened():
|
334
386
|
self._new_video(path)
|
335
387
|
|
388
|
+
success = False
|
336
389
|
for _ in range(self.vid_stride):
|
337
390
|
success = self.cap.grab()
|
338
391
|
if not success:
|
@@ -356,13 +409,25 @@ class LoadImagesAndVideos:
|
|
356
409
|
if self.count < self.nf:
|
357
410
|
self._new_video(self.files[self.count])
|
358
411
|
else:
|
412
|
+
# Handle image files (including HEIC)
|
359
413
|
self.mode = "image"
|
360
|
-
|
414
|
+
if path.split(".")[-1].lower() == "heic":
|
415
|
+
# Load HEIC image using Pillow with pillow-heif
|
416
|
+
check_requirements("pillow-heif")
|
417
|
+
|
418
|
+
from pillow_heif import register_heif_opener
|
419
|
+
|
420
|
+
register_heif_opener() # Register HEIF opener with Pillow
|
421
|
+
with Image.open(path) as img:
|
422
|
+
im0 = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # convert image to BGR nparray
|
423
|
+
else:
|
424
|
+
im0 = imread(path) # BGR
|
361
425
|
if im0 is None:
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
426
|
+
LOGGER.warning(f"WARNING ⚠️ Image Read Error {path}")
|
427
|
+
else:
|
428
|
+
paths.append(path)
|
429
|
+
imgs.append(im0)
|
430
|
+
info.append(f"image {self.count + 1}/{self.nf} {path}: ")
|
366
431
|
self.count += 1 # move to the next file
|
367
432
|
if self.count >= self.ni: # end of image list
|
368
433
|
break
|
@@ -370,7 +435,7 @@ class LoadImagesAndVideos:
|
|
370
435
|
return paths, imgs, info
|
371
436
|
|
372
437
|
def _new_video(self, path):
|
373
|
-
"""Creates a new video capture object for the given path."""
|
438
|
+
"""Creates a new video capture object for the given path and initializes video-related attributes."""
|
374
439
|
self.frame = 0
|
375
440
|
self.cap = cv2.VideoCapture(path)
|
376
441
|
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
|
@@ -379,40 +444,50 @@ class LoadImagesAndVideos:
|
|
379
444
|
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
|
380
445
|
|
381
446
|
def __len__(self):
|
382
|
-
"""Returns the number of
|
383
|
-
return math.ceil(self.nf / self.bs) # number of
|
447
|
+
"""Returns the number of files (images and videos) in the dataset."""
|
448
|
+
return math.ceil(self.nf / self.bs) # number of batches
|
384
449
|
|
385
450
|
|
386
451
|
class LoadPilAndNumpy:
|
387
452
|
"""
|
388
453
|
Load images from PIL and Numpy arrays for batch processing.
|
389
454
|
|
390
|
-
This class
|
391
|
-
|
392
|
-
downstream processing.
|
455
|
+
This class manages loading and pre-processing of image data from both PIL and Numpy formats. It performs basic
|
456
|
+
validation and format conversion to ensure that the images are in the required format for downstream processing.
|
393
457
|
|
394
458
|
Attributes:
|
395
|
-
paths (
|
396
|
-
im0 (
|
397
|
-
mode (str): Type of data being processed,
|
459
|
+
paths (List[str]): List of image paths or autogenerated filenames.
|
460
|
+
im0 (List[np.ndarray]): List of images stored as Numpy arrays.
|
461
|
+
mode (str): Type of data being processed, set to 'image'.
|
398
462
|
bs (int): Batch size, equivalent to the length of `im0`.
|
399
463
|
|
400
464
|
Methods:
|
401
|
-
_single_check
|
465
|
+
_single_check: Validate and format a single image to a Numpy array.
|
466
|
+
|
467
|
+
Examples:
|
468
|
+
>>> from PIL import Image
|
469
|
+
>>> import numpy as np
|
470
|
+
>>> pil_img = Image.new("RGB", (100, 100))
|
471
|
+
>>> np_img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
|
472
|
+
>>> loader = LoadPilAndNumpy([pil_img, np_img])
|
473
|
+
>>> paths, images, _ = next(iter(loader))
|
474
|
+
>>> print(f"Loaded {len(images)} images")
|
475
|
+
Loaded 2 images
|
402
476
|
"""
|
403
477
|
|
404
478
|
def __init__(self, im0):
|
405
|
-
"""
|
479
|
+
"""Initializes a loader for PIL and Numpy images, converting inputs to a standardized format."""
|
406
480
|
if not isinstance(im0, list):
|
407
481
|
im0 = [im0]
|
408
|
-
|
482
|
+
# use `image{i}.jpg` when Image.filename returns an empty path.
|
483
|
+
self.paths = [getattr(im, "filename", "") or f"image{i}.jpg" for i, im in enumerate(im0)]
|
409
484
|
self.im0 = [self._single_check(im) for im in im0]
|
410
485
|
self.mode = "image"
|
411
486
|
self.bs = len(self.im0)
|
412
487
|
|
413
488
|
@staticmethod
|
414
489
|
def _single_check(im):
|
415
|
-
"""Validate and format an image to numpy array."""
|
490
|
+
"""Validate and format an image to numpy array, ensuring RGB order and contiguous memory."""
|
416
491
|
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
|
417
492
|
if isinstance(im, Image.Image):
|
418
493
|
if im.mode != "RGB":
|
@@ -422,41 +497,48 @@ class LoadPilAndNumpy:
|
|
422
497
|
return im
|
423
498
|
|
424
499
|
def __len__(self):
|
425
|
-
"""Returns the length of the 'im0' attribute."""
|
500
|
+
"""Returns the length of the 'im0' attribute, representing the number of loaded images."""
|
426
501
|
return len(self.im0)
|
427
502
|
|
428
503
|
def __next__(self):
|
429
|
-
"""Returns
|
504
|
+
"""Returns the next batch of images, paths, and metadata for processing."""
|
430
505
|
if self.count == 1: # loop only once as it's batch inference
|
431
506
|
raise StopIteration
|
432
507
|
self.count += 1
|
433
508
|
return self.paths, self.im0, [""] * self.bs
|
434
509
|
|
435
510
|
def __iter__(self):
|
436
|
-
"""
|
511
|
+
"""Iterates through PIL/numpy images, yielding paths, raw images, and metadata for processing."""
|
437
512
|
self.count = 0
|
438
513
|
return self
|
439
514
|
|
440
515
|
|
441
516
|
class LoadTensor:
|
442
517
|
"""
|
443
|
-
|
518
|
+
A class for loading and processing tensor data for object detection tasks.
|
444
519
|
|
445
|
-
This class
|
520
|
+
This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for
|
521
|
+
further processing in object detection pipelines.
|
446
522
|
|
447
523
|
Attributes:
|
448
|
-
im0 (torch.Tensor): The input tensor containing the image(s).
|
524
|
+
im0 (torch.Tensor): The input tensor containing the image(s) with shape (B, C, H, W).
|
449
525
|
bs (int): Batch size, inferred from the shape of `im0`.
|
450
|
-
mode (str): Current mode, set to 'image'.
|
451
|
-
paths (
|
452
|
-
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
526
|
+
mode (str): Current processing mode, set to 'image'.
|
527
|
+
paths (List[str]): List of image paths or auto-generated filenames.
|
453
528
|
|
454
529
|
Methods:
|
455
|
-
_single_check
|
530
|
+
_single_check: Validates and formats an input tensor.
|
531
|
+
|
532
|
+
Examples:
|
533
|
+
>>> import torch
|
534
|
+
>>> tensor = torch.rand(1, 3, 640, 640)
|
535
|
+
>>> loader = LoadTensor(tensor)
|
536
|
+
>>> paths, images, info = next(iter(loader))
|
537
|
+
>>> print(f"Processed {len(images)} images")
|
456
538
|
"""
|
457
539
|
|
458
540
|
def __init__(self, im0) -> None:
|
459
|
-
"""Initialize Tensor
|
541
|
+
"""Initialize LoadTensor object for processing torch.Tensor image data."""
|
460
542
|
self.im0 = self._single_check(im0)
|
461
543
|
self.bs = self.im0.shape[0]
|
462
544
|
self.mode = "image"
|
@@ -464,7 +546,7 @@ class LoadTensor:
|
|
464
546
|
|
465
547
|
@staticmethod
|
466
548
|
def _single_check(im, stride=32):
|
467
|
-
"""
|
549
|
+
"""Validates and formats a single image tensor, ensuring correct shape and normalization."""
|
468
550
|
s = (
|
469
551
|
f"WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
|
470
552
|
f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
|
@@ -486,24 +568,24 @@ class LoadTensor:
|
|
486
568
|
return im
|
487
569
|
|
488
570
|
def __iter__(self):
|
489
|
-
"""
|
571
|
+
"""Yields an iterator object for iterating through tensor image data."""
|
490
572
|
self.count = 0
|
491
573
|
return self
|
492
574
|
|
493
575
|
def __next__(self):
|
494
|
-
"""
|
576
|
+
"""Yields the next batch of tensor images and metadata for processing."""
|
495
577
|
if self.count == 1:
|
496
578
|
raise StopIteration
|
497
579
|
self.count += 1
|
498
580
|
return self.paths, self.im0, [""] * self.bs
|
499
581
|
|
500
582
|
def __len__(self):
|
501
|
-
"""Returns the batch size."""
|
583
|
+
"""Returns the batch size of the tensor input."""
|
502
584
|
return self.bs
|
503
585
|
|
504
586
|
|
505
587
|
def autocast_list(source):
|
506
|
-
"""Merges a list of
|
588
|
+
"""Merges a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
|
507
589
|
files = []
|
508
590
|
for im in source:
|
509
591
|
if isinstance(im, (str, Path)): # filename or uri
|
@@ -519,26 +601,47 @@ def autocast_list(source):
|
|
519
601
|
return files
|
520
602
|
|
521
603
|
|
522
|
-
def get_best_youtube_url(url,
|
604
|
+
def get_best_youtube_url(url, method="pytube"):
|
523
605
|
"""
|
524
606
|
Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
|
525
607
|
|
526
|
-
This function uses the pafy or yt_dlp library to extract the video info from YouTube. It then finds the highest
|
527
|
-
quality MP4 format that has video codec but no audio codec, and returns the URL of this video stream.
|
528
|
-
|
529
608
|
Args:
|
530
609
|
url (str): The URL of the YouTube video.
|
531
|
-
|
610
|
+
method (str): The method to use for extracting video info. Options are "pytube", "pafy", and "yt-dlp".
|
611
|
+
Defaults to "pytube".
|
532
612
|
|
533
613
|
Returns:
|
534
|
-
(str): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
|
614
|
+
(str | None): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
|
615
|
+
|
616
|
+
Examples:
|
617
|
+
>>> url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
618
|
+
>>> best_url = get_best_youtube_url(url)
|
619
|
+
>>> print(best_url)
|
620
|
+
https://rr4---sn-q4flrnek.googlevideo.com/videoplayback?expire=...
|
621
|
+
|
622
|
+
Notes:
|
623
|
+
- Requires additional libraries based on the chosen method: pytubefix, pafy, or yt-dlp.
|
624
|
+
- The function prioritizes streams with at least 1080p resolution when available.
|
625
|
+
- For the "yt-dlp" method, it looks for formats with video codec, no audio, and *.mp4 extension.
|
535
626
|
"""
|
536
|
-
if
|
627
|
+
if method == "pytube":
|
628
|
+
# Switched from pytube to pytubefix to resolve https://github.com/pytube/pytube/issues/1954
|
629
|
+
check_requirements("pytubefix>=6.5.2")
|
630
|
+
from pytubefix import YouTube
|
631
|
+
|
632
|
+
streams = YouTube(url).streams.filter(file_extension="mp4", only_video=True)
|
633
|
+
streams = sorted(streams, key=lambda s: s.resolution, reverse=True) # sort streams by resolution
|
634
|
+
for stream in streams:
|
635
|
+
if stream.resolution and int(stream.resolution[:-1]) >= 1080: # check if resolution is at least 1080p
|
636
|
+
return stream.url
|
637
|
+
|
638
|
+
elif method == "pafy":
|
537
639
|
check_requirements(("pafy", "youtube_dl==2020.12.2"))
|
538
640
|
import pafy # noqa
|
539
641
|
|
540
642
|
return pafy.new(url).getbestvideo(preftype="mp4").url
|
541
|
-
|
643
|
+
|
644
|
+
elif method == "yt-dlp":
|
542
645
|
check_requirements("yt-dlp")
|
543
646
|
import yt_dlp
|
544
647
|
|