ultralytics 8.0.237__py3-none-any.whl → 8.0.239__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (137) hide show
  1. ultralytics/__init__.py +2 -2
  2. ultralytics/cfg/__init__.py +241 -138
  3. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  4. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  5. ultralytics/cfg/datasets/dota8.yaml +34 -0
  6. ultralytics/data/__init__.py +9 -2
  7. ultralytics/data/annotator.py +4 -4
  8. ultralytics/data/augment.py +186 -169
  9. ultralytics/data/base.py +54 -48
  10. ultralytics/data/build.py +34 -23
  11. ultralytics/data/converter.py +242 -70
  12. ultralytics/data/dataset.py +117 -95
  13. ultralytics/data/explorer/__init__.py +5 -0
  14. ultralytics/data/explorer/explorer.py +170 -97
  15. ultralytics/data/explorer/gui/__init__.py +1 -0
  16. ultralytics/data/explorer/gui/dash.py +146 -76
  17. ultralytics/data/explorer/utils.py +87 -25
  18. ultralytics/data/loaders.py +75 -62
  19. ultralytics/data/split_dota.py +44 -36
  20. ultralytics/data/utils.py +160 -142
  21. ultralytics/engine/exporter.py +348 -292
  22. ultralytics/engine/model.py +102 -66
  23. ultralytics/engine/predictor.py +74 -55
  24. ultralytics/engine/results.py +63 -40
  25. ultralytics/engine/trainer.py +192 -144
  26. ultralytics/engine/tuner.py +66 -59
  27. ultralytics/engine/validator.py +31 -26
  28. ultralytics/hub/__init__.py +54 -31
  29. ultralytics/hub/auth.py +28 -25
  30. ultralytics/hub/session.py +282 -133
  31. ultralytics/hub/utils.py +64 -42
  32. ultralytics/models/__init__.py +1 -1
  33. ultralytics/models/fastsam/__init__.py +1 -1
  34. ultralytics/models/fastsam/model.py +6 -6
  35. ultralytics/models/fastsam/predict.py +3 -2
  36. ultralytics/models/fastsam/prompt.py +55 -48
  37. ultralytics/models/fastsam/val.py +1 -1
  38. ultralytics/models/nas/__init__.py +1 -1
  39. ultralytics/models/nas/model.py +9 -8
  40. ultralytics/models/nas/predict.py +8 -6
  41. ultralytics/models/nas/val.py +11 -9
  42. ultralytics/models/rtdetr/__init__.py +1 -1
  43. ultralytics/models/rtdetr/model.py +11 -9
  44. ultralytics/models/rtdetr/train.py +18 -16
  45. ultralytics/models/rtdetr/val.py +25 -19
  46. ultralytics/models/sam/__init__.py +1 -1
  47. ultralytics/models/sam/amg.py +13 -14
  48. ultralytics/models/sam/build.py +44 -42
  49. ultralytics/models/sam/model.py +6 -6
  50. ultralytics/models/sam/modules/decoders.py +6 -4
  51. ultralytics/models/sam/modules/encoders.py +37 -35
  52. ultralytics/models/sam/modules/sam.py +5 -4
  53. ultralytics/models/sam/modules/tiny_encoder.py +95 -73
  54. ultralytics/models/sam/modules/transformer.py +3 -2
  55. ultralytics/models/sam/predict.py +39 -27
  56. ultralytics/models/utils/loss.py +99 -95
  57. ultralytics/models/utils/ops.py +34 -31
  58. ultralytics/models/yolo/__init__.py +1 -1
  59. ultralytics/models/yolo/classify/__init__.py +1 -1
  60. ultralytics/models/yolo/classify/predict.py +8 -6
  61. ultralytics/models/yolo/classify/train.py +37 -31
  62. ultralytics/models/yolo/classify/val.py +26 -24
  63. ultralytics/models/yolo/detect/__init__.py +1 -1
  64. ultralytics/models/yolo/detect/predict.py +8 -6
  65. ultralytics/models/yolo/detect/train.py +47 -37
  66. ultralytics/models/yolo/detect/val.py +100 -82
  67. ultralytics/models/yolo/model.py +31 -25
  68. ultralytics/models/yolo/obb/__init__.py +1 -1
  69. ultralytics/models/yolo/obb/predict.py +13 -12
  70. ultralytics/models/yolo/obb/train.py +3 -3
  71. ultralytics/models/yolo/obb/val.py +80 -58
  72. ultralytics/models/yolo/pose/__init__.py +1 -1
  73. ultralytics/models/yolo/pose/predict.py +17 -12
  74. ultralytics/models/yolo/pose/train.py +28 -25
  75. ultralytics/models/yolo/pose/val.py +91 -64
  76. ultralytics/models/yolo/segment/__init__.py +1 -1
  77. ultralytics/models/yolo/segment/predict.py +10 -8
  78. ultralytics/models/yolo/segment/train.py +16 -15
  79. ultralytics/models/yolo/segment/val.py +90 -68
  80. ultralytics/nn/__init__.py +26 -6
  81. ultralytics/nn/autobackend.py +144 -112
  82. ultralytics/nn/modules/__init__.py +96 -13
  83. ultralytics/nn/modules/block.py +28 -7
  84. ultralytics/nn/modules/conv.py +41 -23
  85. ultralytics/nn/modules/head.py +67 -59
  86. ultralytics/nn/modules/transformer.py +49 -32
  87. ultralytics/nn/modules/utils.py +20 -15
  88. ultralytics/nn/tasks.py +215 -141
  89. ultralytics/solutions/ai_gym.py +59 -47
  90. ultralytics/solutions/distance_calculation.py +22 -15
  91. ultralytics/solutions/heatmap.py +76 -54
  92. ultralytics/solutions/object_counter.py +46 -39
  93. ultralytics/solutions/speed_estimation.py +13 -16
  94. ultralytics/trackers/__init__.py +1 -1
  95. ultralytics/trackers/basetrack.py +1 -0
  96. ultralytics/trackers/bot_sort.py +2 -1
  97. ultralytics/trackers/byte_tracker.py +10 -7
  98. ultralytics/trackers/track.py +7 -7
  99. ultralytics/trackers/utils/gmc.py +25 -25
  100. ultralytics/trackers/utils/kalman_filter.py +85 -42
  101. ultralytics/trackers/utils/matching.py +8 -7
  102. ultralytics/utils/__init__.py +173 -151
  103. ultralytics/utils/autobatch.py +10 -10
  104. ultralytics/utils/benchmarks.py +76 -86
  105. ultralytics/utils/callbacks/__init__.py +1 -1
  106. ultralytics/utils/callbacks/base.py +29 -29
  107. ultralytics/utils/callbacks/clearml.py +51 -43
  108. ultralytics/utils/callbacks/comet.py +81 -66
  109. ultralytics/utils/callbacks/dvc.py +33 -26
  110. ultralytics/utils/callbacks/hub.py +44 -26
  111. ultralytics/utils/callbacks/mlflow.py +31 -24
  112. ultralytics/utils/callbacks/neptune.py +35 -25
  113. ultralytics/utils/callbacks/raytune.py +9 -4
  114. ultralytics/utils/callbacks/tensorboard.py +16 -11
  115. ultralytics/utils/callbacks/wb.py +39 -33
  116. ultralytics/utils/checks.py +189 -141
  117. ultralytics/utils/dist.py +15 -12
  118. ultralytics/utils/downloads.py +112 -96
  119. ultralytics/utils/errors.py +1 -1
  120. ultralytics/utils/files.py +11 -11
  121. ultralytics/utils/instance.py +22 -22
  122. ultralytics/utils/loss.py +117 -67
  123. ultralytics/utils/metrics.py +224 -158
  124. ultralytics/utils/ops.py +39 -29
  125. ultralytics/utils/patches.py +3 -3
  126. ultralytics/utils/plotting.py +217 -120
  127. ultralytics/utils/tal.py +19 -13
  128. ultralytics/utils/torch_utils.py +138 -109
  129. ultralytics/utils/triton.py +12 -10
  130. ultralytics/utils/tuner.py +49 -47
  131. {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/METADATA +5 -4
  132. ultralytics-8.0.239.dist-info/RECORD +188 -0
  133. ultralytics-8.0.237.dist-info/RECORD +0 -187
  134. {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/LICENSE +0 -0
  135. {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/WHEEL +0 -0
  136. {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/entry_points.txt +0 -0
  137. {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/top_level.txt +0 -0
@@ -23,6 +23,7 @@ from ultralytics.utils.checks import check_requirements
23
23
  @dataclass
24
24
  class SourceTypes:
25
25
  """Class to represent various types of input sources for predictions."""
26
+
26
27
  webcam: bool = False
27
28
  screenshot: bool = False
28
29
  from_img: bool = False
@@ -59,12 +60,12 @@ class LoadStreams:
59
60
  __len__: Return the length of the sources object.
60
61
  """
61
62
 
62
- def __init__(self, sources='file.streams', imgsz=640, vid_stride=1, buffer=False):
63
+ def __init__(self, sources="file.streams", imgsz=640, vid_stride=1, buffer=False):
63
64
  """Initialize instance variables and check for consistent input stream shapes."""
64
65
  torch.backends.cudnn.benchmark = True # faster for fixed-size inference
65
66
  self.buffer = buffer # buffer input streams
66
67
  self.running = True # running flag for Thread
67
- self.mode = 'stream'
68
+ self.mode = "stream"
68
69
  self.imgsz = imgsz
69
70
  self.vid_stride = vid_stride # video frame-rate stride
70
71
 
@@ -79,33 +80,36 @@ class LoadStreams:
79
80
  self.sources = [ops.clean_str(x) for x in sources] # clean source names for later
80
81
  for i, s in enumerate(sources): # index, source
81
82
  # Start thread to read frames from video stream
82
- st = f'{i + 1}/{n}: {s}... '
83
- if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video
83
+ st = f"{i + 1}/{n}: {s}... "
84
+ if urlparse(s).hostname in ("www.youtube.com", "youtube.com", "youtu.be"): # if source is YouTube video
84
85
  # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/LNwODJXcvt4'
85
86
  s = get_best_youtube_url(s)
86
87
  s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
87
88
  if s == 0 and (is_colab() or is_kaggle()):
88
- raise NotImplementedError("'source=0' webcam not supported in Colab and Kaggle notebooks. "
89
- "Try running 'source=0' in a local environment.")
89
+ raise NotImplementedError(
90
+ "'source=0' webcam not supported in Colab and Kaggle notebooks. "
91
+ "Try running 'source=0' in a local environment."
92
+ )
90
93
  self.caps[i] = cv2.VideoCapture(s) # store video capture object
91
94
  if not self.caps[i].isOpened():
92
- raise ConnectionError(f'{st}Failed to open {s}')
95
+ raise ConnectionError(f"{st}Failed to open {s}")
93
96
  w = int(self.caps[i].get(cv2.CAP_PROP_FRAME_WIDTH))
94
97
  h = int(self.caps[i].get(cv2.CAP_PROP_FRAME_HEIGHT))
95
98
  fps = self.caps[i].get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan
96
99
  self.frames[i] = max(int(self.caps[i].get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float(
97
- 'inf') # infinite stream fallback
100
+ "inf"
101
+ ) # infinite stream fallback
98
102
  self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback
99
103
 
100
104
  success, im = self.caps[i].read() # guarantee first frame
101
105
  if not success or im is None:
102
- raise ConnectionError(f'{st}Failed to read images from {s}')
106
+ raise ConnectionError(f"{st}Failed to read images from {s}")
103
107
  self.imgs[i].append(im)
104
108
  self.shape[i] = im.shape
105
109
  self.threads[i] = Thread(target=self.update, args=([i, self.caps[i], s]), daemon=True)
106
- LOGGER.info(f'{st}Success ✅ ({self.frames[i]} frames of shape {w}x{h} at {self.fps[i]:.2f} FPS)')
110
+ LOGGER.info(f"{st}Success ✅ ({self.frames[i]} frames of shape {w}x{h} at {self.fps[i]:.2f} FPS)")
107
111
  self.threads[i].start()
108
- LOGGER.info('') # newline
112
+ LOGGER.info("") # newline
109
113
 
110
114
  # Check for common shapes
111
115
  self.bs = self.__len__()
@@ -121,7 +125,7 @@ class LoadStreams:
121
125
  success, im = cap.retrieve()
122
126
  if not success:
123
127
  im = np.zeros(self.shape[i], dtype=np.uint8)
124
- LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.')
128
+ LOGGER.warning("WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.")
125
129
  cap.open(stream) # re-open stream if signal was lost
126
130
  if self.buffer:
127
131
  self.imgs[i].append(im)
@@ -140,7 +144,7 @@ class LoadStreams:
140
144
  try:
141
145
  cap.release() # release video capture
142
146
  except Exception as e:
143
- LOGGER.warning(f'WARNING ⚠️ Could not release VideoCapture object: {e}')
147
+ LOGGER.warning(f"WARNING ⚠️ Could not release VideoCapture object: {e}")
144
148
  cv2.destroyAllWindows()
145
149
 
146
150
  def __iter__(self):
@@ -154,16 +158,15 @@ class LoadStreams:
154
158
 
155
159
  images = []
156
160
  for i, x in enumerate(self.imgs):
157
-
158
161
  # Wait until a frame is available in each buffer
159
162
  while not x:
160
- if not self.threads[i].is_alive() or cv2.waitKey(1) == ord('q'): # q to quit
163
+ if not self.threads[i].is_alive() or cv2.waitKey(1) == ord("q"): # q to quit
161
164
  self.close()
162
165
  raise StopIteration
163
166
  time.sleep(1 / min(self.fps))
164
167
  x = self.imgs[i]
165
168
  if not x:
166
- LOGGER.warning(f'WARNING ⚠️ Waiting for stream {i}')
169
+ LOGGER.warning(f"WARNING ⚠️ Waiting for stream {i}")
167
170
 
168
171
  # Get and remove the first frame from imgs buffer
169
172
  if self.buffer:
@@ -174,7 +177,7 @@ class LoadStreams:
174
177
  images.append(x.pop(-1) if x else np.zeros(self.shape[i], dtype=np.uint8))
175
178
  x.clear()
176
179
 
177
- return self.sources, images, None, ''
180
+ return self.sources, images, None, ""
178
181
 
179
182
  def __len__(self):
180
183
  """Return the length of the sources object."""
@@ -209,7 +212,7 @@ class LoadScreenshots:
209
212
 
210
213
  def __init__(self, source, imgsz=640):
211
214
  """Source = [screen_number left top width height] (pixels)."""
212
- check_requirements('mss')
215
+ check_requirements("mss")
213
216
  import mss # noqa
214
217
 
215
218
  source, *params = source.split()
@@ -221,18 +224,18 @@ class LoadScreenshots:
221
224
  elif len(params) == 5:
222
225
  self.screen, left, top, width, height = (int(x) for x in params)
223
226
  self.imgsz = imgsz
224
- self.mode = 'stream'
227
+ self.mode = "stream"
225
228
  self.frame = 0
226
229
  self.sct = mss.mss()
227
230
  self.bs = 1
228
231
 
229
232
  # Parse monitor shape
230
233
  monitor = self.sct.monitors[self.screen]
231
- self.top = monitor['top'] if top is None else (monitor['top'] + top)
232
- self.left = monitor['left'] if left is None else (monitor['left'] + left)
233
- self.width = width or monitor['width']
234
- self.height = height or monitor['height']
235
- self.monitor = {'left': self.left, 'top': self.top, 'width': self.width, 'height': self.height}
234
+ self.top = monitor["top"] if top is None else (monitor["top"] + top)
235
+ self.left = monitor["left"] if left is None else (monitor["left"] + left)
236
+ self.width = width or monitor["width"]
237
+ self.height = height or monitor["height"]
238
+ self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
236
239
 
237
240
  def __iter__(self):
238
241
  """Returns an iterator of the object."""
@@ -241,7 +244,7 @@ class LoadScreenshots:
241
244
  def __next__(self):
242
245
  """mss screen capture: get raw pixels from the screen as np array."""
243
246
  im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
244
- s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: '
247
+ s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
245
248
 
246
249
  self.frame += 1
247
250
  return [str(self.screen)], [im0], None, s # screen, img, vid_cap, string
@@ -274,32 +277,32 @@ class LoadImages:
274
277
  def __init__(self, path, imgsz=640, vid_stride=1):
275
278
  """Initialize the Dataloader and raise FileNotFoundError if file not found."""
276
279
  parent = None
277
- if isinstance(path, str) and Path(path).suffix == '.txt': # *.txt file with img/vid/dir on each line
280
+ if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
278
281
  parent = Path(path).parent
279
282
  path = Path(path).read_text().splitlines() # list of sources
280
283
  files = []
281
284
  for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
282
285
  a = str(Path(p).absolute()) # do not use .resolve() https://github.com/ultralytics/ultralytics/issues/2912
283
- if '*' in a:
286
+ if "*" in a:
284
287
  files.extend(sorted(glob.glob(a, recursive=True))) # glob
285
288
  elif os.path.isdir(a):
286
- files.extend(sorted(glob.glob(os.path.join(a, '*.*')))) # dir
289
+ files.extend(sorted(glob.glob(os.path.join(a, "*.*")))) # dir
287
290
  elif os.path.isfile(a):
288
291
  files.append(a) # files (absolute or relative to CWD)
289
292
  elif parent and (parent / p).is_file():
290
293
  files.append(str((parent / p).absolute())) # files (relative to *.txt file parent)
291
294
  else:
292
- raise FileNotFoundError(f'{p} does not exist')
295
+ raise FileNotFoundError(f"{p} does not exist")
293
296
 
294
- images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
295
- videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
297
+ images = [x for x in files if x.split(".")[-1].lower() in IMG_FORMATS]
298
+ videos = [x for x in files if x.split(".")[-1].lower() in VID_FORMATS]
296
299
  ni, nv = len(images), len(videos)
297
300
 
298
301
  self.imgsz = imgsz
299
302
  self.files = images + videos
300
303
  self.nf = ni + nv # number of files
301
304
  self.video_flag = [False] * ni + [True] * nv
302
- self.mode = 'image'
305
+ self.mode = "image"
303
306
  self.vid_stride = vid_stride # video frame-rate stride
304
307
  self.bs = 1
305
308
  if any(videos):
@@ -307,8 +310,10 @@ class LoadImages:
307
310
  else:
308
311
  self.cap = None
309
312
  if self.nf == 0:
310
- raise FileNotFoundError(f'No images or videos found in {p}. '
311
- f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}')
313
+ raise FileNotFoundError(
314
+ f"No images or videos found in {p}. "
315
+ f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
316
+ )
312
317
 
313
318
  def __iter__(self):
314
319
  """Returns an iterator object for VideoStream or ImageFolder."""
@@ -323,7 +328,7 @@ class LoadImages:
323
328
 
324
329
  if self.video_flag[self.count]:
325
330
  # Read video
326
- self.mode = 'video'
331
+ self.mode = "video"
327
332
  for _ in range(self.vid_stride):
328
333
  self.cap.grab()
329
334
  success, im0 = self.cap.retrieve()
@@ -338,15 +343,15 @@ class LoadImages:
338
343
 
339
344
  self.frame += 1
340
345
  # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False
341
- s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
346
+ s = f"video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: "
342
347
 
343
348
  else:
344
349
  # Read image
345
350
  self.count += 1
346
351
  im0 = cv2.imread(path) # BGR
347
352
  if im0 is None:
348
- raise FileNotFoundError(f'Image Not Found {path}')
349
- s = f'image {self.count}/{self.nf} {path}: '
353
+ raise FileNotFoundError(f"Image Not Found {path}")
354
+ s = f"image {self.count}/{self.nf} {path}: "
350
355
 
351
356
  return [path], [im0], self.cap, s
352
357
 
@@ -385,20 +390,20 @@ class LoadPilAndNumpy:
385
390
  """Initialize PIL and Numpy Dataloader."""
386
391
  if not isinstance(im0, list):
387
392
  im0 = [im0]
388
- self.paths = [getattr(im, 'filename', f'image{i}.jpg') for i, im in enumerate(im0)]
393
+ self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
389
394
  self.im0 = [self._single_check(im) for im in im0]
390
395
  self.imgsz = imgsz
391
- self.mode = 'image'
396
+ self.mode = "image"
392
397
  # Generate fake paths
393
398
  self.bs = len(self.im0)
394
399
 
395
400
  @staticmethod
396
401
  def _single_check(im):
397
402
  """Validate and format an image to numpy array."""
398
- assert isinstance(im, (Image.Image, np.ndarray)), f'Expected PIL/np.ndarray image type, but got {type(im)}'
403
+ assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
399
404
  if isinstance(im, Image.Image):
400
- if im.mode != 'RGB':
401
- im = im.convert('RGB')
405
+ if im.mode != "RGB":
406
+ im = im.convert("RGB")
402
407
  im = np.asarray(im)[:, :, ::-1]
403
408
  im = np.ascontiguousarray(im) # contiguous
404
409
  return im
@@ -412,7 +417,7 @@ class LoadPilAndNumpy:
412
417
  if self.count == 1: # loop only once as it's batch inference
413
418
  raise StopIteration
414
419
  self.count += 1
415
- return self.paths, self.im0, None, ''
420
+ return self.paths, self.im0, None, ""
416
421
 
417
422
  def __iter__(self):
418
423
  """Enables iteration for class LoadPilAndNumpy."""
@@ -441,14 +446,16 @@ class LoadTensor:
441
446
  """Initialize Tensor Dataloader."""
442
447
  self.im0 = self._single_check(im0)
443
448
  self.bs = self.im0.shape[0]
444
- self.mode = 'image'
445
- self.paths = [getattr(im, 'filename', f'image{i}.jpg') for i, im in enumerate(im0)]
449
+ self.mode = "image"
450
+ self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
446
451
 
447
452
  @staticmethod
448
453
  def _single_check(im, stride=32):
449
454
  """Validate and format an image to torch.Tensor."""
450
- s = f'WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) ' \
451
- f'divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible.'
455
+ s = (
456
+ f"WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
457
+ f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
458
+ )
452
459
  if len(im.shape) != 4:
453
460
  if len(im.shape) != 3:
454
461
  raise ValueError(s)
@@ -457,8 +464,10 @@ class LoadTensor:
457
464
  if im.shape[2] % stride or im.shape[3] % stride:
458
465
  raise ValueError(s)
459
466
  if im.max() > 1.0 + torch.finfo(im.dtype).eps: # torch.float32 eps is 1.2e-07
460
- LOGGER.warning(f'WARNING ⚠️ torch.Tensor inputs should be normalized 0.0-1.0 but max value is {im.max()}. '
461
- f'Dividing input by 255.')
467
+ LOGGER.warning(
468
+ f"WARNING ⚠️ torch.Tensor inputs should be normalized 0.0-1.0 but max value is {im.max()}. "
469
+ f"Dividing input by 255."
470
+ )
462
471
  im = im.float() / 255.0
463
472
 
464
473
  return im
@@ -473,7 +482,7 @@ class LoadTensor:
473
482
  if self.count == 1:
474
483
  raise StopIteration
475
484
  self.count += 1
476
- return self.paths, self.im0, None, ''
485
+ return self.paths, self.im0, None, ""
477
486
 
478
487
  def __len__(self):
479
488
  """Returns the batch size."""
@@ -485,12 +494,14 @@ def autocast_list(source):
485
494
  files = []
486
495
  for im in source:
487
496
  if isinstance(im, (str, Path)): # filename or uri
488
- files.append(Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im))
497
+ files.append(Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im))
489
498
  elif isinstance(im, (Image.Image, np.ndarray)): # PIL or np Image
490
499
  files.append(im)
491
500
  else:
492
- raise TypeError(f'type {type(im).__name__} is not a supported Ultralytics prediction source type. \n'
493
- f'See https://docs.ultralytics.com/modes/predict for supported source types.')
501
+ raise TypeError(
502
+ f"type {type(im).__name__} is not a supported Ultralytics prediction source type. \n"
503
+ f"See https://docs.ultralytics.com/modes/predict for supported source types."
504
+ )
494
505
 
495
506
  return files
496
507
 
@@ -513,16 +524,18 @@ def get_best_youtube_url(url, use_pafy=True):
513
524
  (str): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
514
525
  """
515
526
  if use_pafy:
516
- check_requirements(('pafy', 'youtube_dl==2020.12.2'))
527
+ check_requirements(("pafy", "youtube_dl==2020.12.2"))
517
528
  import pafy # noqa
518
- return pafy.new(url).getbestvideo(preftype='mp4').url
529
+
530
+ return pafy.new(url).getbestvideo(preftype="mp4").url
519
531
  else:
520
- check_requirements('yt-dlp')
532
+ check_requirements("yt-dlp")
521
533
  import yt_dlp
522
- with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
534
+
535
+ with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
523
536
  info_dict = ydl.extract_info(url, download=False) # extract info
524
- for f in reversed(info_dict.get('formats', [])): # reversed because best is usually last
537
+ for f in reversed(info_dict.get("formats", [])): # reversed because best is usually last
525
538
  # Find a format with video codec, no audio, *.mp4 extension at least 1920x1080 size
526
- good_size = (f.get('width') or 0) >= 1920 or (f.get('height') or 0) >= 1080
527
- if good_size and f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4':
528
- return f.get('url')
539
+ good_size = (f.get("width") or 0) >= 1920 or (f.get("height") or 0) >= 1080
540
+ if good_size and f["vcodec"] != "none" and f["acodec"] == "none" and f["ext"] == "mp4":
541
+ return f.get("url")
@@ -1,3 +1,5 @@
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
1
3
  import itertools
2
4
  import os
3
5
  from glob import glob
@@ -12,7 +14,7 @@ from tqdm import tqdm
12
14
  from ultralytics.data.utils import exif_size, img2label_paths
13
15
  from ultralytics.utils.checks import check_requirements
14
16
 
15
- check_requirements('shapely')
17
+ check_requirements("shapely")
16
18
  from shapely.geometry import Polygon
17
19
 
18
20
 
@@ -52,11 +54,14 @@ def bbox_iof(polygon1, bbox2, eps=1e-6):
52
54
  return outputs
53
55
 
54
56
 
55
- def load_yolo_dota(data_root, split='train'):
56
- """Load DOTA dataset.
57
+ def load_yolo_dota(data_root, split="train"):
58
+ """
59
+ Load DOTA dataset.
60
+
57
61
  Args:
58
62
  data_root (str): Data root.
59
63
  split (str): The split data set, could be train or val.
64
+
60
65
  Notes:
61
66
  The directory structure assumed for the DOTA dataset:
62
67
  - data_root
@@ -67,10 +72,10 @@ def load_yolo_dota(data_root, split='train'):
67
72
  - train
68
73
  - val
69
74
  """
70
- assert split in ['train', 'val']
71
- im_dir = os.path.join(data_root, f'images/{split}')
75
+ assert split in ["train", "val"]
76
+ im_dir = os.path.join(data_root, f"images/{split}")
72
77
  assert Path(im_dir).exists(), f"Can't find {im_dir}, please check your data root."
73
- im_files = glob(os.path.join(data_root, f'images/{split}/*'))
78
+ im_files = glob(os.path.join(data_root, f"images/{split}/*"))
74
79
  lb_files = img2label_paths(im_files)
75
80
  annos = []
76
81
  for im_file, lb_file in zip(im_files, lb_files):
@@ -95,7 +100,7 @@ def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.0
95
100
  h, w = im_size
96
101
  windows = []
97
102
  for crop_size, gap in zip(crop_sizes, gaps):
98
- assert crop_size > gap, f'invaild crop_size gap pair [{crop_size} {gap}]'
103
+ assert crop_size > gap, f"invalid crop_size gap pair [{crop_size} {gap}]"
99
104
  step = crop_size - gap
100
105
 
101
106
  xn = 1 if w <= crop_size else ceil((w - crop_size) / step + 1)
@@ -127,13 +132,13 @@ def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.0
127
132
 
128
133
  def get_window_obj(anno, windows, iof_thr=0.7):
129
134
  """Get objects for each window."""
130
- h, w = anno['ori_size']
131
- label = anno['label']
135
+ h, w = anno["ori_size"]
136
+ label = anno["label"]
132
137
  if len(label):
133
138
  label[:, 1::2] *= w
134
139
  label[:, 2::2] *= h
135
140
  iofs = bbox_iof(label[:, 1:], windows)
136
- # unnormalized and misaligned coordinates
141
+ # Unnormalized and misaligned coordinates
137
142
  window_anns = [(label[iofs[:, i] >= iof_thr]) for i in range(len(windows))]
138
143
  else:
139
144
  window_anns = [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))]
@@ -141,13 +146,16 @@ def get_window_obj(anno, windows, iof_thr=0.7):
141
146
 
142
147
 
143
148
  def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
144
- """Crop images and save new labels.
149
+ """
150
+ Crop images and save new labels.
151
+
145
152
  Args:
146
153
  anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
147
154
  windows (list): A list of windows coordinates.
148
155
  window_objs (list): A list of labels inside each window.
149
156
  im_dir (str): The output directory path of images.
150
157
  lb_dir (str): The output directory path of labels.
158
+
151
159
  Notes:
152
160
  The directory structure assumed for the DOTA dataset:
153
161
  - data_root
@@ -158,15 +166,15 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
158
166
  - train
159
167
  - val
160
168
  """
161
- im = cv2.imread(anno['filepath'])
162
- name = Path(anno['filepath']).stem
169
+ im = cv2.imread(anno["filepath"])
170
+ name = Path(anno["filepath"]).stem
163
171
  for i, window in enumerate(windows):
164
172
  x_start, y_start, x_stop, y_stop = window.tolist()
165
- new_name = name + '__' + str(x_stop - x_start) + '__' + str(x_start) + '___' + str(y_start)
173
+ new_name = name + "__" + str(x_stop - x_start) + "__" + str(x_start) + "___" + str(y_start)
166
174
  patch_im = im[y_start:y_stop, x_start:x_stop]
167
175
  ph, pw = patch_im.shape[:2]
168
176
 
169
- cv2.imwrite(os.path.join(im_dir, f'{new_name}.jpg'), patch_im)
177
+ cv2.imwrite(os.path.join(im_dir, f"{new_name}.jpg"), patch_im)
170
178
  label = window_objs[i]
171
179
  if len(label) == 0:
172
180
  continue
@@ -175,17 +183,17 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
175
183
  label[:, 1::2] /= pw
176
184
  label[:, 2::2] /= ph
177
185
 
178
- with open(os.path.join(lb_dir, f'{new_name}.txt'), 'w') as f:
186
+ with open(os.path.join(lb_dir, f"{new_name}.txt"), "w") as f:
179
187
  for lb in label:
180
- formatted_coords = ['{:.6g}'.format(coord) for coord in lb[1:]]
188
+ formatted_coords = ["{:.6g}".format(coord) for coord in lb[1:]]
181
189
  f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
182
190
 
183
191
 
184
- def split_images_and_labels(data_root, save_dir, split='train', crop_sizes=[1024], gaps=[200]):
192
+ def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
185
193
  """
186
194
  Split both images and labels.
187
195
 
188
- NOTES:
196
+ Notes:
189
197
  The directory structure assumed for the DOTA dataset:
190
198
  - data_root
191
199
  - images
@@ -199,14 +207,14 @@ def split_images_and_labels(data_root, save_dir, split='train', crop_sizes=[1024
199
207
  - labels
200
208
  - split
201
209
  """
202
- im_dir = Path(save_dir) / 'images' / split
210
+ im_dir = Path(save_dir) / "images" / split
203
211
  im_dir.mkdir(parents=True, exist_ok=True)
204
- lb_dir = Path(save_dir) / 'labels' / split
212
+ lb_dir = Path(save_dir) / "labels" / split
205
213
  lb_dir.mkdir(parents=True, exist_ok=True)
206
214
 
207
215
  annos = load_yolo_dota(data_root, split=split)
208
216
  for anno in tqdm(annos, total=len(annos), desc=split):
209
- windows = get_windows(anno['ori_size'], crop_sizes, gaps)
217
+ windows = get_windows(anno["ori_size"], crop_sizes, gaps)
210
218
  window_objs = get_window_obj(anno, windows)
211
219
  crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
212
220
 
@@ -215,7 +223,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
215
223
  """
216
224
  Split train and val set of DOTA.
217
225
 
218
- NOTES:
226
+ Notes:
219
227
  The directory structure assumed for the DOTA dataset:
220
228
  - data_root
221
229
  - images
@@ -237,7 +245,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
237
245
  for r in rates:
238
246
  crop_sizes.append(int(crop_size / r))
239
247
  gaps.append(int(gap / r))
240
- for split in ['train', 'val']:
248
+ for split in ["train", "val"]:
241
249
  split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
242
250
 
243
251
 
@@ -245,7 +253,7 @@ def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
245
253
  """
246
254
  Split test set of DOTA, labels are not included within this set.
247
255
 
248
- NOTES:
256
+ Notes:
249
257
  The directory structure assumed for the DOTA dataset:
250
258
  - data_root
251
259
  - images
@@ -259,30 +267,30 @@ def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
259
267
  for r in rates:
260
268
  crop_sizes.append(int(crop_size / r))
261
269
  gaps.append(int(gap / r))
262
- save_dir = Path(save_dir) / 'images' / 'test'
270
+ save_dir = Path(save_dir) / "images" / "test"
263
271
  save_dir.mkdir(parents=True, exist_ok=True)
264
272
 
265
- im_dir = Path(os.path.join(data_root, 'images/test'))
273
+ im_dir = Path(os.path.join(data_root, "images/test"))
266
274
  assert im_dir.exists(), f"Can't find {str(im_dir)}, please check your data root."
267
- im_files = glob(str(im_dir / '*'))
268
- for im_file in tqdm(im_files, total=len(im_files), desc='test'):
275
+ im_files = glob(str(im_dir / "*"))
276
+ for im_file in tqdm(im_files, total=len(im_files), desc="test"):
269
277
  w, h = exif_size(Image.open(im_file))
270
278
  windows = get_windows((h, w), crop_sizes=crop_sizes, gaps=gaps)
271
279
  im = cv2.imread(im_file)
272
280
  name = Path(im_file).stem
273
281
  for window in windows:
274
282
  x_start, y_start, x_stop, y_stop = window.tolist()
275
- new_name = (name + '__' + str(x_stop - x_start) + '__' + str(x_start) + '___' + str(y_start))
283
+ new_name = name + "__" + str(x_stop - x_start) + "__" + str(x_start) + "___" + str(y_start)
276
284
  patch_im = im[y_start:y_stop, x_start:x_stop]
277
- cv2.imwrite(os.path.join(str(save_dir), f'{new_name}.jpg'), patch_im)
285
+ cv2.imwrite(os.path.join(str(save_dir), f"{new_name}.jpg"), patch_im)
278
286
 
279
287
 
280
- if __name__ == '__main__':
288
+ if __name__ == "__main__":
281
289
  split_trainval(
282
- data_root='DOTAv2',
283
- save_dir='DOTAv2-split',
290
+ data_root="DOTAv2",
291
+ save_dir="DOTAv2-split",
284
292
  )
285
293
  split_test(
286
- data_root='DOTAv2',
287
- save_dir='DOTAv2-split',
294
+ data_root="DOTAv2",
295
+ save_dir="DOTAv2-split",
288
296
  )