ultralytics 8.1.29__py3-none-any.whl → 8.3.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. tests/__init__.py +22 -0
  2. tests/conftest.py +83 -0
  3. tests/test_cli.py +122 -0
  4. tests/test_cuda.py +155 -0
  5. tests/test_engine.py +131 -0
  6. tests/test_exports.py +216 -0
  7. tests/test_integrations.py +150 -0
  8. tests/test_python.py +615 -0
  9. tests/test_solutions.py +94 -0
  10. ultralytics/__init__.py +11 -8
  11. ultralytics/cfg/__init__.py +569 -131
  12. ultralytics/cfg/datasets/Argoverse.yaml +2 -1
  13. ultralytics/cfg/datasets/DOTAv1.5.yaml +3 -2
  14. ultralytics/cfg/datasets/DOTAv1.yaml +3 -2
  15. ultralytics/cfg/datasets/GlobalWheat2020.yaml +3 -2
  16. ultralytics/cfg/datasets/ImageNet.yaml +2 -1
  17. ultralytics/cfg/datasets/Objects365.yaml +5 -4
  18. ultralytics/cfg/datasets/SKU-110K.yaml +2 -1
  19. ultralytics/cfg/datasets/VOC.yaml +3 -2
  20. ultralytics/cfg/datasets/VisDrone.yaml +6 -5
  21. ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
  22. ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
  23. ultralytics/cfg/datasets/carparts-seg.yaml +3 -2
  24. ultralytics/cfg/datasets/coco-pose.yaml +7 -6
  25. ultralytics/cfg/datasets/coco.yaml +3 -2
  26. ultralytics/cfg/datasets/coco128-seg.yaml +4 -3
  27. ultralytics/cfg/datasets/coco128.yaml +4 -3
  28. ultralytics/cfg/datasets/coco8-pose.yaml +3 -2
  29. ultralytics/cfg/datasets/coco8-seg.yaml +3 -2
  30. ultralytics/cfg/datasets/coco8.yaml +3 -2
  31. ultralytics/cfg/datasets/crack-seg.yaml +3 -2
  32. ultralytics/cfg/datasets/dog-pose.yaml +24 -0
  33. ultralytics/cfg/datasets/dota8.yaml +3 -2
  34. ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
  35. ultralytics/cfg/datasets/lvis.yaml +1236 -0
  36. ultralytics/cfg/datasets/medical-pills.yaml +22 -0
  37. ultralytics/cfg/datasets/open-images-v7.yaml +2 -1
  38. ultralytics/cfg/datasets/package-seg.yaml +5 -4
  39. ultralytics/cfg/datasets/signature.yaml +21 -0
  40. ultralytics/cfg/datasets/tiger-pose.yaml +3 -2
  41. ultralytics/cfg/datasets/xView.yaml +2 -1
  42. ultralytics/cfg/default.yaml +14 -11
  43. ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +24 -0
  44. ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
  45. ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
  46. ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
  47. ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
  48. ultralytics/cfg/models/11/yolo11.yaml +50 -0
  49. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +5 -2
  50. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +5 -2
  51. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +5 -2
  52. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +5 -2
  53. ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
  54. ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
  55. ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
  56. ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
  57. ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
  58. ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
  59. ultralytics/cfg/models/v3/yolov3-spp.yaml +5 -2
  60. ultralytics/cfg/models/v3/yolov3-tiny.yaml +5 -2
  61. ultralytics/cfg/models/v3/yolov3.yaml +5 -2
  62. ultralytics/cfg/models/v5/yolov5-p6.yaml +5 -2
  63. ultralytics/cfg/models/v5/yolov5.yaml +5 -2
  64. ultralytics/cfg/models/v6/yolov6.yaml +5 -2
  65. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +5 -2
  66. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +5 -2
  67. ultralytics/cfg/models/v8/yolov8-cls.yaml +5 -2
  68. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +6 -2
  69. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +6 -2
  70. ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -2
  71. ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -2
  72. ultralytics/cfg/models/v8/yolov8-p2.yaml +5 -2
  73. ultralytics/cfg/models/v8/yolov8-p6.yaml +10 -7
  74. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +5 -2
  75. ultralytics/cfg/models/v8/yolov8-pose.yaml +5 -2
  76. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -2
  77. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +5 -2
  78. ultralytics/cfg/models/v8/yolov8-seg.yaml +5 -2
  79. ultralytics/cfg/models/v8/yolov8-world.yaml +5 -2
  80. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -2
  81. ultralytics/cfg/models/v8/yolov8.yaml +5 -2
  82. ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
  83. ultralytics/cfg/models/v9/yolov9c.yaml +30 -25
  84. ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
  85. ultralytics/cfg/models/v9/yolov9e.yaml +46 -42
  86. ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
  87. ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
  88. ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
  89. ultralytics/cfg/solutions/default.yaml +24 -0
  90. ultralytics/cfg/trackers/botsort.yaml +8 -5
  91. ultralytics/cfg/trackers/bytetrack.yaml +8 -5
  92. ultralytics/data/__init__.py +14 -3
  93. ultralytics/data/annotator.py +37 -15
  94. ultralytics/data/augment.py +1783 -289
  95. ultralytics/data/base.py +62 -27
  96. ultralytics/data/build.py +37 -8
  97. ultralytics/data/converter.py +196 -36
  98. ultralytics/data/dataset.py +233 -94
  99. ultralytics/data/loaders.py +199 -96
  100. ultralytics/data/split_dota.py +39 -29
  101. ultralytics/data/utils.py +111 -41
  102. ultralytics/engine/__init__.py +1 -1
  103. ultralytics/engine/exporter.py +579 -244
  104. ultralytics/engine/model.py +604 -252
  105. ultralytics/engine/predictor.py +22 -11
  106. ultralytics/engine/results.py +1228 -218
  107. ultralytics/engine/trainer.py +191 -129
  108. ultralytics/engine/tuner.py +18 -18
  109. ultralytics/engine/validator.py +18 -15
  110. ultralytics/hub/__init__.py +31 -13
  111. ultralytics/hub/auth.py +11 -7
  112. ultralytics/hub/google/__init__.py +159 -0
  113. ultralytics/hub/session.py +128 -94
  114. ultralytics/hub/utils.py +20 -21
  115. ultralytics/models/__init__.py +4 -2
  116. ultralytics/models/fastsam/__init__.py +2 -3
  117. ultralytics/models/fastsam/model.py +26 -4
  118. ultralytics/models/fastsam/predict.py +127 -63
  119. ultralytics/models/fastsam/utils.py +1 -44
  120. ultralytics/models/fastsam/val.py +1 -1
  121. ultralytics/models/nas/__init__.py +1 -1
  122. ultralytics/models/nas/model.py +21 -10
  123. ultralytics/models/nas/predict.py +3 -6
  124. ultralytics/models/nas/val.py +4 -4
  125. ultralytics/models/rtdetr/__init__.py +1 -1
  126. ultralytics/models/rtdetr/model.py +1 -1
  127. ultralytics/models/rtdetr/predict.py +6 -8
  128. ultralytics/models/rtdetr/train.py +6 -2
  129. ultralytics/models/rtdetr/val.py +3 -3
  130. ultralytics/models/sam/__init__.py +3 -3
  131. ultralytics/models/sam/amg.py +29 -23
  132. ultralytics/models/sam/build.py +211 -13
  133. ultralytics/models/sam/model.py +91 -30
  134. ultralytics/models/sam/modules/__init__.py +1 -1
  135. ultralytics/models/sam/modules/blocks.py +1129 -0
  136. ultralytics/models/sam/modules/decoders.py +381 -53
  137. ultralytics/models/sam/modules/encoders.py +515 -324
  138. ultralytics/models/sam/modules/memory_attention.py +237 -0
  139. ultralytics/models/sam/modules/sam.py +969 -21
  140. ultralytics/models/sam/modules/tiny_encoder.py +425 -154
  141. ultralytics/models/sam/modules/transformer.py +159 -60
  142. ultralytics/models/sam/modules/utils.py +293 -0
  143. ultralytics/models/sam/predict.py +1263 -132
  144. ultralytics/models/utils/__init__.py +1 -1
  145. ultralytics/models/utils/loss.py +36 -24
  146. ultralytics/models/utils/ops.py +3 -7
  147. ultralytics/models/yolo/__init__.py +3 -3
  148. ultralytics/models/yolo/classify/__init__.py +1 -1
  149. ultralytics/models/yolo/classify/predict.py +7 -8
  150. ultralytics/models/yolo/classify/train.py +17 -22
  151. ultralytics/models/yolo/classify/val.py +8 -4
  152. ultralytics/models/yolo/detect/__init__.py +1 -1
  153. ultralytics/models/yolo/detect/predict.py +3 -5
  154. ultralytics/models/yolo/detect/train.py +11 -4
  155. ultralytics/models/yolo/detect/val.py +90 -52
  156. ultralytics/models/yolo/model.py +14 -9
  157. ultralytics/models/yolo/obb/__init__.py +1 -1
  158. ultralytics/models/yolo/obb/predict.py +2 -2
  159. ultralytics/models/yolo/obb/train.py +5 -3
  160. ultralytics/models/yolo/obb/val.py +41 -23
  161. ultralytics/models/yolo/pose/__init__.py +1 -1
  162. ultralytics/models/yolo/pose/predict.py +3 -5
  163. ultralytics/models/yolo/pose/train.py +2 -2
  164. ultralytics/models/yolo/pose/val.py +51 -17
  165. ultralytics/models/yolo/segment/__init__.py +1 -1
  166. ultralytics/models/yolo/segment/predict.py +3 -5
  167. ultralytics/models/yolo/segment/train.py +2 -2
  168. ultralytics/models/yolo/segment/val.py +60 -19
  169. ultralytics/models/yolo/world/__init__.py +5 -0
  170. ultralytics/models/yolo/world/train.py +92 -0
  171. ultralytics/models/yolo/world/train_world.py +109 -0
  172. ultralytics/nn/__init__.py +1 -1
  173. ultralytics/nn/autobackend.py +228 -93
  174. ultralytics/nn/modules/__init__.py +39 -14
  175. ultralytics/nn/modules/activation.py +21 -0
  176. ultralytics/nn/modules/block.py +526 -66
  177. ultralytics/nn/modules/conv.py +24 -7
  178. ultralytics/nn/modules/head.py +177 -34
  179. ultralytics/nn/modules/transformer.py +6 -5
  180. ultralytics/nn/modules/utils.py +1 -2
  181. ultralytics/nn/tasks.py +226 -82
  182. ultralytics/solutions/__init__.py +30 -1
  183. ultralytics/solutions/ai_gym.py +96 -143
  184. ultralytics/solutions/analytics.py +247 -0
  185. ultralytics/solutions/distance_calculation.py +78 -135
  186. ultralytics/solutions/heatmap.py +93 -247
  187. ultralytics/solutions/object_counter.py +184 -259
  188. ultralytics/solutions/parking_management.py +246 -0
  189. ultralytics/solutions/queue_management.py +112 -0
  190. ultralytics/solutions/region_counter.py +116 -0
  191. ultralytics/solutions/security_alarm.py +144 -0
  192. ultralytics/solutions/solutions.py +178 -0
  193. ultralytics/solutions/speed_estimation.py +86 -174
  194. ultralytics/solutions/streamlit_inference.py +190 -0
  195. ultralytics/solutions/trackzone.py +68 -0
  196. ultralytics/trackers/__init__.py +1 -1
  197. ultralytics/trackers/basetrack.py +32 -13
  198. ultralytics/trackers/bot_sort.py +61 -28
  199. ultralytics/trackers/byte_tracker.py +83 -51
  200. ultralytics/trackers/track.py +21 -6
  201. ultralytics/trackers/utils/__init__.py +1 -1
  202. ultralytics/trackers/utils/gmc.py +62 -48
  203. ultralytics/trackers/utils/kalman_filter.py +166 -35
  204. ultralytics/trackers/utils/matching.py +40 -21
  205. ultralytics/utils/__init__.py +511 -239
  206. ultralytics/utils/autobatch.py +40 -22
  207. ultralytics/utils/benchmarks.py +266 -85
  208. ultralytics/utils/callbacks/__init__.py +1 -1
  209. ultralytics/utils/callbacks/base.py +1 -3
  210. ultralytics/utils/callbacks/clearml.py +7 -6
  211. ultralytics/utils/callbacks/comet.py +39 -17
  212. ultralytics/utils/callbacks/dvc.py +1 -1
  213. ultralytics/utils/callbacks/hub.py +16 -16
  214. ultralytics/utils/callbacks/mlflow.py +28 -24
  215. ultralytics/utils/callbacks/neptune.py +6 -2
  216. ultralytics/utils/callbacks/raytune.py +3 -4
  217. ultralytics/utils/callbacks/tensorboard.py +18 -18
  218. ultralytics/utils/callbacks/wb.py +27 -20
  219. ultralytics/utils/checks.py +172 -100
  220. ultralytics/utils/dist.py +2 -1
  221. ultralytics/utils/downloads.py +40 -34
  222. ultralytics/utils/errors.py +1 -1
  223. ultralytics/utils/files.py +72 -38
  224. ultralytics/utils/instance.py +41 -19
  225. ultralytics/utils/loss.py +83 -55
  226. ultralytics/utils/metrics.py +61 -56
  227. ultralytics/utils/ops.py +94 -89
  228. ultralytics/utils/patches.py +30 -14
  229. ultralytics/utils/plotting.py +600 -269
  230. ultralytics/utils/tal.py +67 -26
  231. ultralytics/utils/torch_utils.py +305 -112
  232. ultralytics/utils/triton.py +2 -1
  233. ultralytics/utils/tuner.py +21 -12
  234. ultralytics-8.3.63.dist-info/METADATA +370 -0
  235. ultralytics-8.3.63.dist-info/RECORD +241 -0
  236. {ultralytics-8.1.29.dist-info → ultralytics-8.3.63.dist-info}/WHEEL +1 -1
  237. ultralytics/data/explorer/__init__.py +0 -5
  238. ultralytics/data/explorer/explorer.py +0 -472
  239. ultralytics/data/explorer/gui/__init__.py +0 -1
  240. ultralytics/data/explorer/gui/dash.py +0 -268
  241. ultralytics/data/explorer/utils.py +0 -166
  242. ultralytics/models/fastsam/prompt.py +0 -357
  243. ultralytics-8.1.29.dist-info/METADATA +0 -373
  244. ultralytics-8.1.29.dist-info/RECORD +0 -197
  245. {ultralytics-8.1.29.dist-info → ultralytics-8.3.63.dist-info}/LICENSE +0 -0
  246. {ultralytics-8.1.29.dist-info → ultralytics-8.3.63.dist-info}/entry_points.txt +0 -0
  247. {ultralytics-8.1.29.dist-info → ultralytics-8.3.63.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Ultralytics YOLO 🚀, AGPL-3.0 license
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
3
  import glob
4
4
  import math
@@ -15,14 +15,32 @@ import requests
15
15
  import torch
16
16
  from PIL import Image
17
17
 
18
- from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
19
- from ultralytics.utils import LOGGER, is_colab, is_kaggle, ops
18
+ from ultralytics.data.utils import FORMATS_HELP_MSG, IMG_FORMATS, VID_FORMATS
19
+ from ultralytics.utils import IS_COLAB, IS_KAGGLE, LOGGER, ops
20
20
  from ultralytics.utils.checks import check_requirements
21
+ from ultralytics.utils.patches import imread
21
22
 
22
23
 
23
24
  @dataclass
24
25
  class SourceTypes:
25
- """Class to represent various types of input sources for predictions."""
26
+ """
27
+ Class to represent various types of input sources for predictions.
28
+
29
+ This class uses dataclass to define boolean flags for different types of input sources that can be used for
30
+ making predictions with YOLO models.
31
+
32
+ Attributes:
33
+ stream (bool): Flag indicating if the input source is a video stream.
34
+ screenshot (bool): Flag indicating if the input source is a screenshot.
35
+ from_img (bool): Flag indicating if the input source is an image file.
36
+
37
+ Examples:
38
+ >>> source_types = SourceTypes(stream=True, screenshot=False, from_img=False)
39
+ >>> print(source_types.stream)
40
+ True
41
+ >>> print(source_types.from_img)
42
+ False
43
+ """
26
44
 
27
45
  stream: bool = False
28
46
  screenshot: bool = False
@@ -32,38 +50,47 @@ class SourceTypes:
32
50
 
33
51
  class LoadStreams:
34
52
  """
35
- Stream Loader for various types of video streams, Supports RTSP, RTMP, HTTP, and TCP streams.
53
+ Stream Loader for various types of video streams.
54
+
55
+ Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video
56
+ streams simultaneously, making it suitable for real-time video analysis tasks.
36
57
 
37
58
  Attributes:
38
- sources (str): The source input paths or URLs for the video streams.
39
- vid_stride (int): Video frame-rate stride, defaults to 1.
40
- buffer (bool): Whether to buffer input streams, defaults to False.
59
+ sources (List[str]): The source input paths or URLs for the video streams.
60
+ vid_stride (int): Video frame-rate stride.
61
+ buffer (bool): Whether to buffer input streams.
41
62
  running (bool): Flag to indicate if the streaming thread is running.
42
63
  mode (str): Set to 'stream' indicating real-time capture.
43
- imgs (list): List of image frames for each stream.
44
- fps (list): List of FPS for each stream.
45
- frames (list): List of total frames for each stream.
46
- threads (list): List of threads for each stream.
47
- shape (list): List of shapes for each stream.
48
- caps (list): List of cv2.VideoCapture objects for each stream.
64
+ imgs (List[List[np.ndarray]]): List of image frames for each stream.
65
+ fps (List[float]): List of FPS for each stream.
66
+ frames (List[int]): List of total frames for each stream.
67
+ threads (List[Thread]): List of threads for each stream.
68
+ shape (List[Tuple[int, int, int]]): List of shapes for each stream.
69
+ caps (List[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream.
49
70
  bs (int): Batch size for processing.
50
71
 
51
72
  Methods:
52
- __init__: Initialize the stream loader.
53
73
  update: Read stream frames in daemon thread.
54
74
  close: Close stream loader and release resources.
55
75
  __iter__: Returns an iterator object for the class.
56
76
  __next__: Returns source paths, transformed, and original images for processing.
57
77
  __len__: Return the length of the sources object.
58
78
 
59
- Example:
60
- ```bash
61
- yolo predict source='rtsp://example.com/media.mp4'
62
- ```
79
+ Examples:
80
+ >>> stream_loader = LoadStreams("rtsp://example.com/stream1.mp4")
81
+ >>> for sources, imgs, _ in stream_loader:
82
+ ... # Process the images
83
+ ... pass
84
+ >>> stream_loader.close()
85
+
86
+ Notes:
87
+ - The class uses threading to efficiently load frames from multiple streams simultaneously.
88
+ - It automatically handles YouTube links, converting them to the best available stream URL.
89
+ - The class implements a buffer system to manage frame storage and retrieval.
63
90
  """
64
91
 
65
92
  def __init__(self, sources="file.streams", vid_stride=1, buffer=False):
66
- """Initialize instance variables and check for consistent input stream shapes."""
93
+ """Initialize stream loader for multiple video sources, supporting various stream types."""
67
94
  torch.backends.cudnn.benchmark = True # faster for fixed-size inference
68
95
  self.buffer = buffer # buffer input streams
69
96
  self.running = True # running flag for Thread
@@ -83,11 +110,11 @@ class LoadStreams:
83
110
  for i, s in enumerate(sources): # index, source
84
111
  # Start thread to read frames from video stream
85
112
  st = f"{i + 1}/{n}: {s}... "
86
- if urlparse(s).hostname in ("www.youtube.com", "youtube.com", "youtu.be"): # if source is YouTube video
87
- # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/LNwODJXcvt4'
113
+ if urlparse(s).hostname in {"www.youtube.com", "youtube.com", "youtu.be"}: # if source is YouTube video
114
+ # YouTube format i.e. 'https://www.youtube.com/watch?v=Jsn8D3aC840' or 'https://youtu.be/Jsn8D3aC840'
88
115
  s = get_best_youtube_url(s)
89
116
  s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
90
- if s == 0 and (is_colab() or is_kaggle()):
117
+ if s == 0 and (IS_COLAB or IS_KAGGLE):
91
118
  raise NotImplementedError(
92
119
  "'source=0' webcam not supported in Colab and Kaggle notebooks. "
93
120
  "Try running 'source=0' in a local environment."
@@ -114,7 +141,7 @@ class LoadStreams:
114
141
  LOGGER.info("") # newline
115
142
 
116
143
  def update(self, i, cap, stream):
117
- """Read stream `i` frames in daemon thread."""
144
+ """Read stream frames in daemon thread and update image buffer."""
118
145
  n, f = 0, self.frames[i] # frame number, frame array
119
146
  while self.running and cap.isOpened() and n < (f - 1):
120
147
  if len(self.imgs[i]) < 30: # keep a <=30-image buffer
@@ -134,7 +161,7 @@ class LoadStreams:
134
161
  time.sleep(0.01) # wait until the buffer is empty
135
162
 
136
163
  def close(self):
137
- """Close stream loader and release resources."""
164
+ """Terminates stream loader, stops threads, and releases video capture resources."""
138
165
  self.running = False # stop flag for Thread
139
166
  for thread in self.threads:
140
167
  if thread.is_alive():
@@ -152,7 +179,7 @@ class LoadStreams:
152
179
  return self
153
180
 
154
181
  def __next__(self):
155
- """Returns source paths, transformed and original images for processing."""
182
+ """Returns the next batch of frames from multiple video streams for processing."""
156
183
  self.count += 1
157
184
 
158
185
  images = []
@@ -179,16 +206,16 @@ class LoadStreams:
179
206
  return self.sources, images, [""] * self.bs
180
207
 
181
208
  def __len__(self):
182
- """Return the length of the sources object."""
209
+ """Return the number of video streams in the LoadStreams object."""
183
210
  return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years
184
211
 
185
212
 
186
213
  class LoadScreenshots:
187
214
  """
188
- YOLOv8 screenshot dataloader.
215
+ Ultralytics screenshot dataloader for capturing and processing screen images.
189
216
 
190
- This class manages the loading of screenshot images for processing with YOLOv8.
191
- Suitable for use with `yolo predict source=screen`.
217
+ This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with
218
+ `yolo predict source=screen`.
192
219
 
193
220
  Attributes:
194
221
  source (str): The source input indicating which screen to capture.
@@ -201,15 +228,21 @@ class LoadScreenshots:
201
228
  frame (int): Counter for captured frames.
202
229
  sct (mss.mss): Screen capture object from `mss` library.
203
230
  bs (int): Batch size, set to 1.
204
- monitor (dict): Monitor configuration details.
231
+ fps (int): Frames per second, set to 30.
232
+ monitor (Dict[str, int]): Monitor configuration details.
205
233
 
206
234
  Methods:
207
235
  __iter__: Returns an iterator object.
208
236
  __next__: Captures the next screenshot and returns it.
237
+
238
+ Examples:
239
+ >>> loader = LoadScreenshots("0 100 100 640 480") # screen 0, top-left (100,100), 640x480
240
+ >>> for source, im, im0s, vid_cap, s in loader:
241
+ ... print(f"Captured frame: {im.shape}")
209
242
  """
210
243
 
211
244
  def __init__(self, source):
212
- """Source = [screen_number left top width height] (pixels)."""
245
+ """Initialize screenshot capture with specified screen and region parameters."""
213
246
  check_requirements("mss")
214
247
  import mss # noqa
215
248
 
@@ -236,11 +269,11 @@ class LoadScreenshots:
236
269
  self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
237
270
 
238
271
  def __iter__(self):
239
- """Returns an iterator of the object."""
272
+ """Yields the next screenshot image from the specified screen or region for processing."""
240
273
  return self
241
274
 
242
275
  def __next__(self):
243
- """mss screen capture: get raw pixels from the screen as np array."""
276
+ """Captures and returns the next screenshot as a numpy array using the mss library."""
244
277
  im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
245
278
  s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
246
279
 
@@ -250,29 +283,45 @@ class LoadScreenshots:
250
283
 
251
284
  class LoadImagesAndVideos:
252
285
  """
253
- YOLOv8 image/video dataloader.
286
+ A class for loading and processing images and videos for YOLO object detection.
254
287
 
255
- This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
256
- various formats, including single image files, video files, and lists of image and video paths.
288
+ This class manages the loading and pre-processing of image and video data from various sources, including
289
+ single image files, video files, and lists of image and video paths.
257
290
 
258
291
  Attributes:
259
- files (list): List of image and video file paths.
292
+ files (List[str]): List of image and video file paths.
260
293
  nf (int): Total number of files (images and videos).
261
- video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
294
+ video_flag (List[bool]): Flags indicating whether a file is a video (True) or an image (False).
262
295
  mode (str): Current mode, 'image' or 'video'.
263
- vid_stride (int): Stride for video frame-rate, defaults to 1.
264
- bs (int): Batch size, set to 1 for this class.
296
+ vid_stride (int): Stride for video frame-rate.
297
+ bs (int): Batch size.
265
298
  cap (cv2.VideoCapture): Video capture object for OpenCV.
266
299
  frame (int): Frame counter for video.
267
300
  frames (int): Total number of frames in the video.
268
- count (int): Counter for iteration, initialized at 0 during `__iter__()`.
301
+ count (int): Counter for iteration, initialized at 0 during __iter__().
302
+ ni (int): Number of images.
269
303
 
270
304
  Methods:
271
- _new_video(path): Create a new cv2.VideoCapture object for a given video path.
305
+ __init__: Initialize the LoadImagesAndVideos object.
306
+ __iter__: Returns an iterator object for VideoStream or ImageFolder.
307
+ __next__: Returns the next batch of images or video frames along with their paths and metadata.
308
+ _new_video: Creates a new video capture object for the given path.
309
+ __len__: Returns the number of batches in the object.
310
+
311
+ Examples:
312
+ >>> loader = LoadImagesAndVideos("path/to/data", batch=32, vid_stride=1)
313
+ >>> for paths, imgs, info in loader:
314
+ ... # Process batch of images or video frames
315
+ ... pass
316
+
317
+ Notes:
318
+ - Supports various image formats including HEIC.
319
+ - Handles both local files and directories.
320
+ - Can read from a text file containing paths to images and videos.
272
321
  """
273
322
 
274
323
  def __init__(self, path, batch=1, vid_stride=1):
275
- """Initialize the Dataloader and raise FileNotFoundError if file not found."""
324
+ """Initialize dataloader for images and videos, supporting various input formats."""
276
325
  parent = None
277
326
  if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
278
327
  parent = Path(path).parent
@@ -291,15 +340,21 @@ class LoadImagesAndVideos:
291
340
  else:
292
341
  raise FileNotFoundError(f"{p} does not exist")
293
342
 
294
- images = [x for x in files if x.split(".")[-1].lower() in IMG_FORMATS]
295
- videos = [x for x in files if x.split(".")[-1].lower() in VID_FORMATS]
343
+ # Define files as images or videos
344
+ images, videos = [], []
345
+ for f in files:
346
+ suffix = f.split(".")[-1].lower() # Get file extension without the dot and lowercase
347
+ if suffix in IMG_FORMATS:
348
+ images.append(f)
349
+ elif suffix in VID_FORMATS:
350
+ videos.append(f)
296
351
  ni, nv = len(images), len(videos)
297
352
 
298
353
  self.files = images + videos
299
354
  self.nf = ni + nv # number of files
300
355
  self.ni = ni # number of images
301
356
  self.video_flag = [False] * ni + [True] * nv
302
- self.mode = "image"
357
+ self.mode = "video" if ni == 0 else "image" # default to video if no images
303
358
  self.vid_stride = vid_stride # video frame-rate stride
304
359
  self.bs = batch
305
360
  if any(videos):
@@ -307,22 +362,19 @@ class LoadImagesAndVideos:
307
362
  else:
308
363
  self.cap = None
309
364
  if self.nf == 0:
310
- raise FileNotFoundError(
311
- f"No images or videos found in {p}. "
312
- f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
313
- )
365
+ raise FileNotFoundError(f"No images or videos found in {p}. {FORMATS_HELP_MSG}")
314
366
 
315
367
  def __iter__(self):
316
- """Returns an iterator object for VideoStream or ImageFolder."""
368
+ """Iterates through image/video files, yielding source paths, images, and metadata."""
317
369
  self.count = 0
318
370
  return self
319
371
 
320
372
  def __next__(self):
321
- """Returns the next batch of images or video frames along with their paths and metadata."""
373
+ """Returns the next batch of images or video frames with their paths and metadata."""
322
374
  paths, imgs, info = [], [], []
323
375
  while len(imgs) < self.bs:
324
376
  if self.count >= self.nf: # end of file list
325
- if len(imgs) > 0:
377
+ if imgs:
326
378
  return paths, imgs, info # return last partial batch
327
379
  else:
328
380
  raise StopIteration
@@ -333,6 +385,7 @@ class LoadImagesAndVideos:
333
385
  if not self.cap or not self.cap.isOpened():
334
386
  self._new_video(path)
335
387
 
388
+ success = False
336
389
  for _ in range(self.vid_stride):
337
390
  success = self.cap.grab()
338
391
  if not success:
@@ -356,13 +409,25 @@ class LoadImagesAndVideos:
356
409
  if self.count < self.nf:
357
410
  self._new_video(self.files[self.count])
358
411
  else:
412
+ # Handle image files (including HEIC)
359
413
  self.mode = "image"
360
- im0 = cv2.imread(path) # BGR
414
+ if path.split(".")[-1].lower() == "heic":
415
+ # Load HEIC image using Pillow with pillow-heif
416
+ check_requirements("pillow-heif")
417
+
418
+ from pillow_heif import register_heif_opener
419
+
420
+ register_heif_opener() # Register HEIF opener with Pillow
421
+ with Image.open(path) as img:
422
+ im0 = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # convert image to BGR nparray
423
+ else:
424
+ im0 = imread(path) # BGR
361
425
  if im0 is None:
362
- raise FileNotFoundError(f"Image Not Found {path}")
363
- paths.append(path)
364
- imgs.append(im0)
365
- info.append(f"image {self.count + 1}/{self.nf} {path}: ")
426
+ LOGGER.warning(f"WARNING ⚠️ Image Read Error {path}")
427
+ else:
428
+ paths.append(path)
429
+ imgs.append(im0)
430
+ info.append(f"image {self.count + 1}/{self.nf} {path}: ")
366
431
  self.count += 1 # move to the next file
367
432
  if self.count >= self.ni: # end of image list
368
433
  break
@@ -370,7 +435,7 @@ class LoadImagesAndVideos:
370
435
  return paths, imgs, info
371
436
 
372
437
  def _new_video(self, path):
373
- """Creates a new video capture object for the given path."""
438
+ """Creates a new video capture object for the given path and initializes video-related attributes."""
374
439
  self.frame = 0
375
440
  self.cap = cv2.VideoCapture(path)
376
441
  self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
@@ -379,40 +444,50 @@ class LoadImagesAndVideos:
379
444
  self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
380
445
 
381
446
  def __len__(self):
382
- """Returns the number of batches in the object."""
383
- return math.ceil(self.nf / self.bs) # number of files
447
+ """Returns the number of files (images and videos) in the dataset."""
448
+ return math.ceil(self.nf / self.bs) # number of batches
384
449
 
385
450
 
386
451
  class LoadPilAndNumpy:
387
452
  """
388
453
  Load images from PIL and Numpy arrays for batch processing.
389
454
 
390
- This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
391
- It performs basic validation and format conversion to ensure that the images are in the required format for
392
- downstream processing.
455
+ This class manages loading and pre-processing of image data from both PIL and Numpy formats. It performs basic
456
+ validation and format conversion to ensure that the images are in the required format for downstream processing.
393
457
 
394
458
  Attributes:
395
- paths (list): List of image paths or autogenerated filenames.
396
- im0 (list): List of images stored as Numpy arrays.
397
- mode (str): Type of data being processed, defaults to 'image'.
459
+ paths (List[str]): List of image paths or autogenerated filenames.
460
+ im0 (List[np.ndarray]): List of images stored as Numpy arrays.
461
+ mode (str): Type of data being processed, set to 'image'.
398
462
  bs (int): Batch size, equivalent to the length of `im0`.
399
463
 
400
464
  Methods:
401
- _single_check(im): Validate and format a single image to a Numpy array.
465
+ _single_check: Validate and format a single image to a Numpy array.
466
+
467
+ Examples:
468
+ >>> from PIL import Image
469
+ >>> import numpy as np
470
+ >>> pil_img = Image.new("RGB", (100, 100))
471
+ >>> np_img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
472
+ >>> loader = LoadPilAndNumpy([pil_img, np_img])
473
+ >>> paths, images, _ = next(iter(loader))
474
+ >>> print(f"Loaded {len(images)} images")
475
+ Loaded 2 images
402
476
  """
403
477
 
404
478
  def __init__(self, im0):
405
- """Initialize PIL and Numpy Dataloader."""
479
+ """Initializes a loader for PIL and Numpy images, converting inputs to a standardized format."""
406
480
  if not isinstance(im0, list):
407
481
  im0 = [im0]
408
- self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
482
+ # use `image{i}.jpg` when Image.filename returns an empty path.
483
+ self.paths = [getattr(im, "filename", "") or f"image{i}.jpg" for i, im in enumerate(im0)]
409
484
  self.im0 = [self._single_check(im) for im in im0]
410
485
  self.mode = "image"
411
486
  self.bs = len(self.im0)
412
487
 
413
488
  @staticmethod
414
489
  def _single_check(im):
415
- """Validate and format an image to numpy array."""
490
+ """Validate and format an image to numpy array, ensuring RGB order and contiguous memory."""
416
491
  assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
417
492
  if isinstance(im, Image.Image):
418
493
  if im.mode != "RGB":
@@ -422,41 +497,48 @@ class LoadPilAndNumpy:
422
497
  return im
423
498
 
424
499
  def __len__(self):
425
- """Returns the length of the 'im0' attribute."""
500
+ """Returns the length of the 'im0' attribute, representing the number of loaded images."""
426
501
  return len(self.im0)
427
502
 
428
503
  def __next__(self):
429
- """Returns batch paths, images, processed images, None, ''."""
504
+ """Returns the next batch of images, paths, and metadata for processing."""
430
505
  if self.count == 1: # loop only once as it's batch inference
431
506
  raise StopIteration
432
507
  self.count += 1
433
508
  return self.paths, self.im0, [""] * self.bs
434
509
 
435
510
  def __iter__(self):
436
- """Enables iteration for class LoadPilAndNumpy."""
511
+ """Iterates through PIL/numpy images, yielding paths, raw images, and metadata for processing."""
437
512
  self.count = 0
438
513
  return self
439
514
 
440
515
 
441
516
  class LoadTensor:
442
517
  """
443
- Load images from torch.Tensor data.
518
+ A class for loading and processing tensor data for object detection tasks.
444
519
 
445
- This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
520
+ This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for
521
+ further processing in object detection pipelines.
446
522
 
447
523
  Attributes:
448
- im0 (torch.Tensor): The input tensor containing the image(s).
524
+ im0 (torch.Tensor): The input tensor containing the image(s) with shape (B, C, H, W).
449
525
  bs (int): Batch size, inferred from the shape of `im0`.
450
- mode (str): Current mode, set to 'image'.
451
- paths (list): List of image paths or filenames.
452
- count (int): Counter for iteration, initialized at 0 during `__iter__()`.
526
+ mode (str): Current processing mode, set to 'image'.
527
+ paths (List[str]): List of image paths or auto-generated filenames.
453
528
 
454
529
  Methods:
455
- _single_check(im, stride): Validate and possibly modify the input tensor.
530
+ _single_check: Validates and formats an input tensor.
531
+
532
+ Examples:
533
+ >>> import torch
534
+ >>> tensor = torch.rand(1, 3, 640, 640)
535
+ >>> loader = LoadTensor(tensor)
536
+ >>> paths, images, info = next(iter(loader))
537
+ >>> print(f"Processed {len(images)} images")
456
538
  """
457
539
 
458
540
  def __init__(self, im0) -> None:
459
- """Initialize Tensor Dataloader."""
541
+ """Initialize LoadTensor object for processing torch.Tensor image data."""
460
542
  self.im0 = self._single_check(im0)
461
543
  self.bs = self.im0.shape[0]
462
544
  self.mode = "image"
@@ -464,7 +546,7 @@ class LoadTensor:
464
546
 
465
547
  @staticmethod
466
548
  def _single_check(im, stride=32):
467
- """Validate and format an image to torch.Tensor."""
549
+ """Validates and formats a single image tensor, ensuring correct shape and normalization."""
468
550
  s = (
469
551
  f"WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
470
552
  f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
@@ -486,24 +568,24 @@ class LoadTensor:
486
568
  return im
487
569
 
488
570
  def __iter__(self):
489
- """Returns an iterator object."""
571
+ """Yields an iterator object for iterating through tensor image data."""
490
572
  self.count = 0
491
573
  return self
492
574
 
493
575
  def __next__(self):
494
- """Return next item in the iterator."""
576
+ """Yields the next batch of tensor images and metadata for processing."""
495
577
  if self.count == 1:
496
578
  raise StopIteration
497
579
  self.count += 1
498
580
  return self.paths, self.im0, [""] * self.bs
499
581
 
500
582
  def __len__(self):
501
- """Returns the batch size."""
583
+ """Returns the batch size of the tensor input."""
502
584
  return self.bs
503
585
 
504
586
 
505
587
  def autocast_list(source):
506
- """Merges a list of source of different types into a list of numpy arrays or PIL images."""
588
+ """Merges a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
507
589
  files = []
508
590
  for im in source:
509
591
  if isinstance(im, (str, Path)): # filename or uri
@@ -519,26 +601,47 @@ def autocast_list(source):
519
601
  return files
520
602
 
521
603
 
522
- def get_best_youtube_url(url, use_pafy=True):
604
+ def get_best_youtube_url(url, method="pytube"):
523
605
  """
524
606
  Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
525
607
 
526
- This function uses the pafy or yt_dlp library to extract the video info from YouTube. It then finds the highest
527
- quality MP4 format that has video codec but no audio codec, and returns the URL of this video stream.
528
-
529
608
  Args:
530
609
  url (str): The URL of the YouTube video.
531
- use_pafy (bool): Use the pafy package, default=True, otherwise use yt_dlp package.
610
+ method (str): The method to use for extracting video info. Options are "pytube", "pafy", and "yt-dlp".
611
+ Defaults to "pytube".
532
612
 
533
613
  Returns:
534
- (str): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
614
+ (str | None): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
615
+
616
+ Examples:
617
+ >>> url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
618
+ >>> best_url = get_best_youtube_url(url)
619
+ >>> print(best_url)
620
+ https://rr4---sn-q4flrnek.googlevideo.com/videoplayback?expire=...
621
+
622
+ Notes:
623
+ - Requires additional libraries based on the chosen method: pytubefix, pafy, or yt-dlp.
624
+ - The function prioritizes streams with at least 1080p resolution when available.
625
+ - For the "yt-dlp" method, it looks for formats with video codec, no audio, and *.mp4 extension.
535
626
  """
536
- if use_pafy:
627
+ if method == "pytube":
628
+ # Switched from pytube to pytubefix to resolve https://github.com/pytube/pytube/issues/1954
629
+ check_requirements("pytubefix>=6.5.2")
630
+ from pytubefix import YouTube
631
+
632
+ streams = YouTube(url).streams.filter(file_extension="mp4", only_video=True)
633
+ streams = sorted(streams, key=lambda s: s.resolution, reverse=True) # sort streams by resolution
634
+ for stream in streams:
635
+ if stream.resolution and int(stream.resolution[:-1]) >= 1080: # check if resolution is at least 1080p
636
+ return stream.url
637
+
638
+ elif method == "pafy":
537
639
  check_requirements(("pafy", "youtube_dl==2020.12.2"))
538
640
  import pafy # noqa
539
641
 
540
642
  return pafy.new(url).getbestvideo(preftype="mp4").url
541
- else:
643
+
644
+ elif method == "yt-dlp":
542
645
  check_requirements("yt-dlp")
543
646
  import yt_dlp
544
647