ultralytics 8.0.238__py3-none-any.whl → 8.0.239__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- ultralytics/__init__.py +2 -2
- ultralytics/cfg/__init__.py +241 -138
- ultralytics/data/__init__.py +9 -2
- ultralytics/data/annotator.py +4 -4
- ultralytics/data/augment.py +186 -169
- ultralytics/data/base.py +54 -48
- ultralytics/data/build.py +34 -23
- ultralytics/data/converter.py +242 -70
- ultralytics/data/dataset.py +117 -95
- ultralytics/data/explorer/__init__.py +3 -1
- ultralytics/data/explorer/explorer.py +120 -100
- ultralytics/data/explorer/gui/__init__.py +1 -0
- ultralytics/data/explorer/gui/dash.py +123 -89
- ultralytics/data/explorer/utils.py +37 -39
- ultralytics/data/loaders.py +75 -62
- ultralytics/data/split_dota.py +44 -36
- ultralytics/data/utils.py +160 -142
- ultralytics/engine/exporter.py +348 -292
- ultralytics/engine/model.py +102 -66
- ultralytics/engine/predictor.py +74 -55
- ultralytics/engine/results.py +61 -41
- ultralytics/engine/trainer.py +192 -144
- ultralytics/engine/tuner.py +66 -59
- ultralytics/engine/validator.py +31 -26
- ultralytics/hub/__init__.py +54 -31
- ultralytics/hub/auth.py +28 -25
- ultralytics/hub/session.py +282 -133
- ultralytics/hub/utils.py +64 -42
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +6 -6
- ultralytics/models/fastsam/predict.py +3 -2
- ultralytics/models/fastsam/prompt.py +55 -48
- ultralytics/models/fastsam/val.py +1 -1
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +9 -8
- ultralytics/models/nas/predict.py +8 -6
- ultralytics/models/nas/val.py +11 -9
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +11 -9
- ultralytics/models/rtdetr/train.py +18 -16
- ultralytics/models/rtdetr/val.py +25 -19
- ultralytics/models/sam/__init__.py +1 -1
- ultralytics/models/sam/amg.py +13 -14
- ultralytics/models/sam/build.py +44 -42
- ultralytics/models/sam/model.py +6 -6
- ultralytics/models/sam/modules/decoders.py +6 -4
- ultralytics/models/sam/modules/encoders.py +37 -35
- ultralytics/models/sam/modules/sam.py +5 -4
- ultralytics/models/sam/modules/tiny_encoder.py +95 -73
- ultralytics/models/sam/modules/transformer.py +3 -2
- ultralytics/models/sam/predict.py +39 -27
- ultralytics/models/utils/loss.py +99 -95
- ultralytics/models/utils/ops.py +34 -31
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +8 -6
- ultralytics/models/yolo/classify/train.py +37 -31
- ultralytics/models/yolo/classify/val.py +26 -24
- ultralytics/models/yolo/detect/__init__.py +1 -1
- ultralytics/models/yolo/detect/predict.py +8 -6
- ultralytics/models/yolo/detect/train.py +47 -37
- ultralytics/models/yolo/detect/val.py +100 -82
- ultralytics/models/yolo/model.py +31 -25
- ultralytics/models/yolo/obb/__init__.py +1 -1
- ultralytics/models/yolo/obb/predict.py +13 -11
- ultralytics/models/yolo/obb/train.py +3 -3
- ultralytics/models/yolo/obb/val.py +70 -59
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +17 -12
- ultralytics/models/yolo/pose/train.py +28 -25
- ultralytics/models/yolo/pose/val.py +91 -64
- ultralytics/models/yolo/segment/__init__.py +1 -1
- ultralytics/models/yolo/segment/predict.py +10 -8
- ultralytics/models/yolo/segment/train.py +16 -15
- ultralytics/models/yolo/segment/val.py +90 -68
- ultralytics/nn/__init__.py +26 -6
- ultralytics/nn/autobackend.py +144 -112
- ultralytics/nn/modules/__init__.py +96 -13
- ultralytics/nn/modules/block.py +28 -7
- ultralytics/nn/modules/conv.py +41 -23
- ultralytics/nn/modules/head.py +60 -52
- ultralytics/nn/modules/transformer.py +49 -32
- ultralytics/nn/modules/utils.py +20 -15
- ultralytics/nn/tasks.py +215 -141
- ultralytics/solutions/ai_gym.py +59 -47
- ultralytics/solutions/distance_calculation.py +17 -14
- ultralytics/solutions/heatmap.py +57 -55
- ultralytics/solutions/object_counter.py +46 -39
- ultralytics/solutions/speed_estimation.py +13 -16
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +1 -0
- ultralytics/trackers/bot_sort.py +2 -1
- ultralytics/trackers/byte_tracker.py +10 -7
- ultralytics/trackers/track.py +7 -7
- ultralytics/trackers/utils/gmc.py +25 -25
- ultralytics/trackers/utils/kalman_filter.py +85 -42
- ultralytics/trackers/utils/matching.py +8 -7
- ultralytics/utils/__init__.py +173 -152
- ultralytics/utils/autobatch.py +10 -10
- ultralytics/utils/benchmarks.py +76 -86
- ultralytics/utils/callbacks/__init__.py +1 -1
- ultralytics/utils/callbacks/base.py +29 -29
- ultralytics/utils/callbacks/clearml.py +51 -43
- ultralytics/utils/callbacks/comet.py +81 -66
- ultralytics/utils/callbacks/dvc.py +33 -26
- ultralytics/utils/callbacks/hub.py +44 -26
- ultralytics/utils/callbacks/mlflow.py +31 -24
- ultralytics/utils/callbacks/neptune.py +35 -25
- ultralytics/utils/callbacks/raytune.py +9 -4
- ultralytics/utils/callbacks/tensorboard.py +16 -11
- ultralytics/utils/callbacks/wb.py +39 -33
- ultralytics/utils/checks.py +189 -141
- ultralytics/utils/dist.py +15 -12
- ultralytics/utils/downloads.py +112 -96
- ultralytics/utils/errors.py +1 -1
- ultralytics/utils/files.py +11 -11
- ultralytics/utils/instance.py +22 -22
- ultralytics/utils/loss.py +117 -67
- ultralytics/utils/metrics.py +224 -158
- ultralytics/utils/ops.py +38 -28
- ultralytics/utils/patches.py +3 -3
- ultralytics/utils/plotting.py +217 -120
- ultralytics/utils/tal.py +19 -13
- ultralytics/utils/torch_utils.py +138 -109
- ultralytics/utils/triton.py +12 -10
- ultralytics/utils/tuner.py +49 -47
- {ultralytics-8.0.238.dist-info → ultralytics-8.0.239.dist-info}/METADATA +2 -1
- ultralytics-8.0.239.dist-info/RECORD +188 -0
- ultralytics-8.0.238.dist-info/RECORD +0 -188
- {ultralytics-8.0.238.dist-info → ultralytics-8.0.239.dist-info}/LICENSE +0 -0
- {ultralytics-8.0.238.dist-info → ultralytics-8.0.239.dist-info}/WHEEL +0 -0
- {ultralytics-8.0.238.dist-info → ultralytics-8.0.239.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.0.238.dist-info → ultralytics-8.0.239.dist-info}/top_level.txt +0 -0
ultralytics/data/loaders.py
CHANGED
|
@@ -23,6 +23,7 @@ from ultralytics.utils.checks import check_requirements
|
|
|
23
23
|
@dataclass
|
|
24
24
|
class SourceTypes:
|
|
25
25
|
"""Class to represent various types of input sources for predictions."""
|
|
26
|
+
|
|
26
27
|
webcam: bool = False
|
|
27
28
|
screenshot: bool = False
|
|
28
29
|
from_img: bool = False
|
|
@@ -59,12 +60,12 @@ class LoadStreams:
|
|
|
59
60
|
__len__: Return the length of the sources object.
|
|
60
61
|
"""
|
|
61
62
|
|
|
62
|
-
def __init__(self, sources=
|
|
63
|
+
def __init__(self, sources="file.streams", imgsz=640, vid_stride=1, buffer=False):
|
|
63
64
|
"""Initialize instance variables and check for consistent input stream shapes."""
|
|
64
65
|
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
|
|
65
66
|
self.buffer = buffer # buffer input streams
|
|
66
67
|
self.running = True # running flag for Thread
|
|
67
|
-
self.mode =
|
|
68
|
+
self.mode = "stream"
|
|
68
69
|
self.imgsz = imgsz
|
|
69
70
|
self.vid_stride = vid_stride # video frame-rate stride
|
|
70
71
|
|
|
@@ -79,33 +80,36 @@ class LoadStreams:
|
|
|
79
80
|
self.sources = [ops.clean_str(x) for x in sources] # clean source names for later
|
|
80
81
|
for i, s in enumerate(sources): # index, source
|
|
81
82
|
# Start thread to read frames from video stream
|
|
82
|
-
st = f
|
|
83
|
-
if urlparse(s).hostname in (
|
|
83
|
+
st = f"{i + 1}/{n}: {s}... "
|
|
84
|
+
if urlparse(s).hostname in ("www.youtube.com", "youtube.com", "youtu.be"): # if source is YouTube video
|
|
84
85
|
# YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/LNwODJXcvt4'
|
|
85
86
|
s = get_best_youtube_url(s)
|
|
86
87
|
s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
|
|
87
88
|
if s == 0 and (is_colab() or is_kaggle()):
|
|
88
|
-
raise NotImplementedError(
|
|
89
|
-
|
|
89
|
+
raise NotImplementedError(
|
|
90
|
+
"'source=0' webcam not supported in Colab and Kaggle notebooks. "
|
|
91
|
+
"Try running 'source=0' in a local environment."
|
|
92
|
+
)
|
|
90
93
|
self.caps[i] = cv2.VideoCapture(s) # store video capture object
|
|
91
94
|
if not self.caps[i].isOpened():
|
|
92
|
-
raise ConnectionError(f
|
|
95
|
+
raise ConnectionError(f"{st}Failed to open {s}")
|
|
93
96
|
w = int(self.caps[i].get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
94
97
|
h = int(self.caps[i].get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
95
98
|
fps = self.caps[i].get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan
|
|
96
99
|
self.frames[i] = max(int(self.caps[i].get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float(
|
|
97
|
-
|
|
100
|
+
"inf"
|
|
101
|
+
) # infinite stream fallback
|
|
98
102
|
self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback
|
|
99
103
|
|
|
100
104
|
success, im = self.caps[i].read() # guarantee first frame
|
|
101
105
|
if not success or im is None:
|
|
102
|
-
raise ConnectionError(f
|
|
106
|
+
raise ConnectionError(f"{st}Failed to read images from {s}")
|
|
103
107
|
self.imgs[i].append(im)
|
|
104
108
|
self.shape[i] = im.shape
|
|
105
109
|
self.threads[i] = Thread(target=self.update, args=([i, self.caps[i], s]), daemon=True)
|
|
106
|
-
LOGGER.info(f
|
|
110
|
+
LOGGER.info(f"{st}Success ✅ ({self.frames[i]} frames of shape {w}x{h} at {self.fps[i]:.2f} FPS)")
|
|
107
111
|
self.threads[i].start()
|
|
108
|
-
LOGGER.info(
|
|
112
|
+
LOGGER.info("") # newline
|
|
109
113
|
|
|
110
114
|
# Check for common shapes
|
|
111
115
|
self.bs = self.__len__()
|
|
@@ -121,7 +125,7 @@ class LoadStreams:
|
|
|
121
125
|
success, im = cap.retrieve()
|
|
122
126
|
if not success:
|
|
123
127
|
im = np.zeros(self.shape[i], dtype=np.uint8)
|
|
124
|
-
LOGGER.warning(
|
|
128
|
+
LOGGER.warning("WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.")
|
|
125
129
|
cap.open(stream) # re-open stream if signal was lost
|
|
126
130
|
if self.buffer:
|
|
127
131
|
self.imgs[i].append(im)
|
|
@@ -140,7 +144,7 @@ class LoadStreams:
|
|
|
140
144
|
try:
|
|
141
145
|
cap.release() # release video capture
|
|
142
146
|
except Exception as e:
|
|
143
|
-
LOGGER.warning(f
|
|
147
|
+
LOGGER.warning(f"WARNING ⚠️ Could not release VideoCapture object: {e}")
|
|
144
148
|
cv2.destroyAllWindows()
|
|
145
149
|
|
|
146
150
|
def __iter__(self):
|
|
@@ -154,16 +158,15 @@ class LoadStreams:
|
|
|
154
158
|
|
|
155
159
|
images = []
|
|
156
160
|
for i, x in enumerate(self.imgs):
|
|
157
|
-
|
|
158
161
|
# Wait until a frame is available in each buffer
|
|
159
162
|
while not x:
|
|
160
|
-
if not self.threads[i].is_alive() or cv2.waitKey(1) == ord(
|
|
163
|
+
if not self.threads[i].is_alive() or cv2.waitKey(1) == ord("q"): # q to quit
|
|
161
164
|
self.close()
|
|
162
165
|
raise StopIteration
|
|
163
166
|
time.sleep(1 / min(self.fps))
|
|
164
167
|
x = self.imgs[i]
|
|
165
168
|
if not x:
|
|
166
|
-
LOGGER.warning(f
|
|
169
|
+
LOGGER.warning(f"WARNING ⚠️ Waiting for stream {i}")
|
|
167
170
|
|
|
168
171
|
# Get and remove the first frame from imgs buffer
|
|
169
172
|
if self.buffer:
|
|
@@ -174,7 +177,7 @@ class LoadStreams:
|
|
|
174
177
|
images.append(x.pop(-1) if x else np.zeros(self.shape[i], dtype=np.uint8))
|
|
175
178
|
x.clear()
|
|
176
179
|
|
|
177
|
-
return self.sources, images, None,
|
|
180
|
+
return self.sources, images, None, ""
|
|
178
181
|
|
|
179
182
|
def __len__(self):
|
|
180
183
|
"""Return the length of the sources object."""
|
|
@@ -209,7 +212,7 @@ class LoadScreenshots:
|
|
|
209
212
|
|
|
210
213
|
def __init__(self, source, imgsz=640):
|
|
211
214
|
"""Source = [screen_number left top width height] (pixels)."""
|
|
212
|
-
check_requirements(
|
|
215
|
+
check_requirements("mss")
|
|
213
216
|
import mss # noqa
|
|
214
217
|
|
|
215
218
|
source, *params = source.split()
|
|
@@ -221,18 +224,18 @@ class LoadScreenshots:
|
|
|
221
224
|
elif len(params) == 5:
|
|
222
225
|
self.screen, left, top, width, height = (int(x) for x in params)
|
|
223
226
|
self.imgsz = imgsz
|
|
224
|
-
self.mode =
|
|
227
|
+
self.mode = "stream"
|
|
225
228
|
self.frame = 0
|
|
226
229
|
self.sct = mss.mss()
|
|
227
230
|
self.bs = 1
|
|
228
231
|
|
|
229
232
|
# Parse monitor shape
|
|
230
233
|
monitor = self.sct.monitors[self.screen]
|
|
231
|
-
self.top = monitor[
|
|
232
|
-
self.left = monitor[
|
|
233
|
-
self.width = width or monitor[
|
|
234
|
-
self.height = height or monitor[
|
|
235
|
-
self.monitor = {
|
|
234
|
+
self.top = monitor["top"] if top is None else (monitor["top"] + top)
|
|
235
|
+
self.left = monitor["left"] if left is None else (monitor["left"] + left)
|
|
236
|
+
self.width = width or monitor["width"]
|
|
237
|
+
self.height = height or monitor["height"]
|
|
238
|
+
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
|
|
236
239
|
|
|
237
240
|
def __iter__(self):
|
|
238
241
|
"""Returns an iterator of the object."""
|
|
@@ -241,7 +244,7 @@ class LoadScreenshots:
|
|
|
241
244
|
def __next__(self):
|
|
242
245
|
"""mss screen capture: get raw pixels from the screen as np array."""
|
|
243
246
|
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
|
|
244
|
-
s = f
|
|
247
|
+
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
|
|
245
248
|
|
|
246
249
|
self.frame += 1
|
|
247
250
|
return [str(self.screen)], [im0], None, s # screen, img, vid_cap, string
|
|
@@ -274,32 +277,32 @@ class LoadImages:
|
|
|
274
277
|
def __init__(self, path, imgsz=640, vid_stride=1):
|
|
275
278
|
"""Initialize the Dataloader and raise FileNotFoundError if file not found."""
|
|
276
279
|
parent = None
|
|
277
|
-
if isinstance(path, str) and Path(path).suffix ==
|
|
280
|
+
if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
|
|
278
281
|
parent = Path(path).parent
|
|
279
282
|
path = Path(path).read_text().splitlines() # list of sources
|
|
280
283
|
files = []
|
|
281
284
|
for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
|
|
282
285
|
a = str(Path(p).absolute()) # do not use .resolve() https://github.com/ultralytics/ultralytics/issues/2912
|
|
283
|
-
if
|
|
286
|
+
if "*" in a:
|
|
284
287
|
files.extend(sorted(glob.glob(a, recursive=True))) # glob
|
|
285
288
|
elif os.path.isdir(a):
|
|
286
|
-
files.extend(sorted(glob.glob(os.path.join(a,
|
|
289
|
+
files.extend(sorted(glob.glob(os.path.join(a, "*.*")))) # dir
|
|
287
290
|
elif os.path.isfile(a):
|
|
288
291
|
files.append(a) # files (absolute or relative to CWD)
|
|
289
292
|
elif parent and (parent / p).is_file():
|
|
290
293
|
files.append(str((parent / p).absolute())) # files (relative to *.txt file parent)
|
|
291
294
|
else:
|
|
292
|
-
raise FileNotFoundError(f
|
|
295
|
+
raise FileNotFoundError(f"{p} does not exist")
|
|
293
296
|
|
|
294
|
-
images = [x for x in files if x.split(
|
|
295
|
-
videos = [x for x in files if x.split(
|
|
297
|
+
images = [x for x in files if x.split(".")[-1].lower() in IMG_FORMATS]
|
|
298
|
+
videos = [x for x in files if x.split(".")[-1].lower() in VID_FORMATS]
|
|
296
299
|
ni, nv = len(images), len(videos)
|
|
297
300
|
|
|
298
301
|
self.imgsz = imgsz
|
|
299
302
|
self.files = images + videos
|
|
300
303
|
self.nf = ni + nv # number of files
|
|
301
304
|
self.video_flag = [False] * ni + [True] * nv
|
|
302
|
-
self.mode =
|
|
305
|
+
self.mode = "image"
|
|
303
306
|
self.vid_stride = vid_stride # video frame-rate stride
|
|
304
307
|
self.bs = 1
|
|
305
308
|
if any(videos):
|
|
@@ -307,8 +310,10 @@ class LoadImages:
|
|
|
307
310
|
else:
|
|
308
311
|
self.cap = None
|
|
309
312
|
if self.nf == 0:
|
|
310
|
-
raise FileNotFoundError(
|
|
311
|
-
|
|
313
|
+
raise FileNotFoundError(
|
|
314
|
+
f"No images or videos found in {p}. "
|
|
315
|
+
f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
|
|
316
|
+
)
|
|
312
317
|
|
|
313
318
|
def __iter__(self):
|
|
314
319
|
"""Returns an iterator object for VideoStream or ImageFolder."""
|
|
@@ -323,7 +328,7 @@ class LoadImages:
|
|
|
323
328
|
|
|
324
329
|
if self.video_flag[self.count]:
|
|
325
330
|
# Read video
|
|
326
|
-
self.mode =
|
|
331
|
+
self.mode = "video"
|
|
327
332
|
for _ in range(self.vid_stride):
|
|
328
333
|
self.cap.grab()
|
|
329
334
|
success, im0 = self.cap.retrieve()
|
|
@@ -338,15 +343,15 @@ class LoadImages:
|
|
|
338
343
|
|
|
339
344
|
self.frame += 1
|
|
340
345
|
# im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False
|
|
341
|
-
s = f
|
|
346
|
+
s = f"video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: "
|
|
342
347
|
|
|
343
348
|
else:
|
|
344
349
|
# Read image
|
|
345
350
|
self.count += 1
|
|
346
351
|
im0 = cv2.imread(path) # BGR
|
|
347
352
|
if im0 is None:
|
|
348
|
-
raise FileNotFoundError(f
|
|
349
|
-
s = f
|
|
353
|
+
raise FileNotFoundError(f"Image Not Found {path}")
|
|
354
|
+
s = f"image {self.count}/{self.nf} {path}: "
|
|
350
355
|
|
|
351
356
|
return [path], [im0], self.cap, s
|
|
352
357
|
|
|
@@ -385,20 +390,20 @@ class LoadPilAndNumpy:
|
|
|
385
390
|
"""Initialize PIL and Numpy Dataloader."""
|
|
386
391
|
if not isinstance(im0, list):
|
|
387
392
|
im0 = [im0]
|
|
388
|
-
self.paths = [getattr(im,
|
|
393
|
+
self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
|
|
389
394
|
self.im0 = [self._single_check(im) for im in im0]
|
|
390
395
|
self.imgsz = imgsz
|
|
391
|
-
self.mode =
|
|
396
|
+
self.mode = "image"
|
|
392
397
|
# Generate fake paths
|
|
393
398
|
self.bs = len(self.im0)
|
|
394
399
|
|
|
395
400
|
@staticmethod
|
|
396
401
|
def _single_check(im):
|
|
397
402
|
"""Validate and format an image to numpy array."""
|
|
398
|
-
assert isinstance(im, (Image.Image, np.ndarray)), f
|
|
403
|
+
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
|
|
399
404
|
if isinstance(im, Image.Image):
|
|
400
|
-
if im.mode !=
|
|
401
|
-
im = im.convert(
|
|
405
|
+
if im.mode != "RGB":
|
|
406
|
+
im = im.convert("RGB")
|
|
402
407
|
im = np.asarray(im)[:, :, ::-1]
|
|
403
408
|
im = np.ascontiguousarray(im) # contiguous
|
|
404
409
|
return im
|
|
@@ -412,7 +417,7 @@ class LoadPilAndNumpy:
|
|
|
412
417
|
if self.count == 1: # loop only once as it's batch inference
|
|
413
418
|
raise StopIteration
|
|
414
419
|
self.count += 1
|
|
415
|
-
return self.paths, self.im0, None,
|
|
420
|
+
return self.paths, self.im0, None, ""
|
|
416
421
|
|
|
417
422
|
def __iter__(self):
|
|
418
423
|
"""Enables iteration for class LoadPilAndNumpy."""
|
|
@@ -441,14 +446,16 @@ class LoadTensor:
|
|
|
441
446
|
"""Initialize Tensor Dataloader."""
|
|
442
447
|
self.im0 = self._single_check(im0)
|
|
443
448
|
self.bs = self.im0.shape[0]
|
|
444
|
-
self.mode =
|
|
445
|
-
self.paths = [getattr(im,
|
|
449
|
+
self.mode = "image"
|
|
450
|
+
self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
|
|
446
451
|
|
|
447
452
|
@staticmethod
|
|
448
453
|
def _single_check(im, stride=32):
|
|
449
454
|
"""Validate and format an image to torch.Tensor."""
|
|
450
|
-
s =
|
|
451
|
-
f
|
|
455
|
+
s = (
|
|
456
|
+
f"WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
|
|
457
|
+
f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
|
|
458
|
+
)
|
|
452
459
|
if len(im.shape) != 4:
|
|
453
460
|
if len(im.shape) != 3:
|
|
454
461
|
raise ValueError(s)
|
|
@@ -457,8 +464,10 @@ class LoadTensor:
|
|
|
457
464
|
if im.shape[2] % stride or im.shape[3] % stride:
|
|
458
465
|
raise ValueError(s)
|
|
459
466
|
if im.max() > 1.0 + torch.finfo(im.dtype).eps: # torch.float32 eps is 1.2e-07
|
|
460
|
-
LOGGER.warning(
|
|
461
|
-
|
|
467
|
+
LOGGER.warning(
|
|
468
|
+
f"WARNING ⚠️ torch.Tensor inputs should be normalized 0.0-1.0 but max value is {im.max()}. "
|
|
469
|
+
f"Dividing input by 255."
|
|
470
|
+
)
|
|
462
471
|
im = im.float() / 255.0
|
|
463
472
|
|
|
464
473
|
return im
|
|
@@ -473,7 +482,7 @@ class LoadTensor:
|
|
|
473
482
|
if self.count == 1:
|
|
474
483
|
raise StopIteration
|
|
475
484
|
self.count += 1
|
|
476
|
-
return self.paths, self.im0, None,
|
|
485
|
+
return self.paths, self.im0, None, ""
|
|
477
486
|
|
|
478
487
|
def __len__(self):
|
|
479
488
|
"""Returns the batch size."""
|
|
@@ -485,12 +494,14 @@ def autocast_list(source):
|
|
|
485
494
|
files = []
|
|
486
495
|
for im in source:
|
|
487
496
|
if isinstance(im, (str, Path)): # filename or uri
|
|
488
|
-
files.append(Image.open(requests.get(im, stream=True).raw if str(im).startswith(
|
|
497
|
+
files.append(Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im))
|
|
489
498
|
elif isinstance(im, (Image.Image, np.ndarray)): # PIL or np Image
|
|
490
499
|
files.append(im)
|
|
491
500
|
else:
|
|
492
|
-
raise TypeError(
|
|
493
|
-
|
|
501
|
+
raise TypeError(
|
|
502
|
+
f"type {type(im).__name__} is not a supported Ultralytics prediction source type. \n"
|
|
503
|
+
f"See https://docs.ultralytics.com/modes/predict for supported source types."
|
|
504
|
+
)
|
|
494
505
|
|
|
495
506
|
return files
|
|
496
507
|
|
|
@@ -513,16 +524,18 @@ def get_best_youtube_url(url, use_pafy=True):
|
|
|
513
524
|
(str): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
|
|
514
525
|
"""
|
|
515
526
|
if use_pafy:
|
|
516
|
-
check_requirements((
|
|
527
|
+
check_requirements(("pafy", "youtube_dl==2020.12.2"))
|
|
517
528
|
import pafy # noqa
|
|
518
|
-
|
|
529
|
+
|
|
530
|
+
return pafy.new(url).getbestvideo(preftype="mp4").url
|
|
519
531
|
else:
|
|
520
|
-
check_requirements(
|
|
532
|
+
check_requirements("yt-dlp")
|
|
521
533
|
import yt_dlp
|
|
522
|
-
|
|
534
|
+
|
|
535
|
+
with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
|
|
523
536
|
info_dict = ydl.extract_info(url, download=False) # extract info
|
|
524
|
-
for f in reversed(info_dict.get(
|
|
537
|
+
for f in reversed(info_dict.get("formats", [])): # reversed because best is usually last
|
|
525
538
|
# Find a format with video codec, no audio, *.mp4 extension at least 1920x1080 size
|
|
526
|
-
good_size = (f.get(
|
|
527
|
-
if good_size and f[
|
|
528
|
-
return f.get(
|
|
539
|
+
good_size = (f.get("width") or 0) >= 1920 or (f.get("height") or 0) >= 1080
|
|
540
|
+
if good_size and f["vcodec"] != "none" and f["acodec"] == "none" and f["ext"] == "mp4":
|
|
541
|
+
return f.get("url")
|
ultralytics/data/split_dota.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
+
|
|
1
3
|
import itertools
|
|
2
4
|
import os
|
|
3
5
|
from glob import glob
|
|
@@ -12,7 +14,7 @@ from tqdm import tqdm
|
|
|
12
14
|
from ultralytics.data.utils import exif_size, img2label_paths
|
|
13
15
|
from ultralytics.utils.checks import check_requirements
|
|
14
16
|
|
|
15
|
-
check_requirements(
|
|
17
|
+
check_requirements("shapely")
|
|
16
18
|
from shapely.geometry import Polygon
|
|
17
19
|
|
|
18
20
|
|
|
@@ -52,11 +54,14 @@ def bbox_iof(polygon1, bbox2, eps=1e-6):
|
|
|
52
54
|
return outputs
|
|
53
55
|
|
|
54
56
|
|
|
55
|
-
def load_yolo_dota(data_root, split=
|
|
56
|
-
"""
|
|
57
|
+
def load_yolo_dota(data_root, split="train"):
|
|
58
|
+
"""
|
|
59
|
+
Load DOTA dataset.
|
|
60
|
+
|
|
57
61
|
Args:
|
|
58
62
|
data_root (str): Data root.
|
|
59
63
|
split (str): The split data set, could be train or val.
|
|
64
|
+
|
|
60
65
|
Notes:
|
|
61
66
|
The directory structure assumed for the DOTA dataset:
|
|
62
67
|
- data_root
|
|
@@ -67,10 +72,10 @@ def load_yolo_dota(data_root, split='train'):
|
|
|
67
72
|
- train
|
|
68
73
|
- val
|
|
69
74
|
"""
|
|
70
|
-
assert split in [
|
|
71
|
-
im_dir = os.path.join(data_root, f
|
|
75
|
+
assert split in ["train", "val"]
|
|
76
|
+
im_dir = os.path.join(data_root, f"images/{split}")
|
|
72
77
|
assert Path(im_dir).exists(), f"Can't find {im_dir}, please check your data root."
|
|
73
|
-
im_files = glob(os.path.join(data_root, f
|
|
78
|
+
im_files = glob(os.path.join(data_root, f"images/{split}/*"))
|
|
74
79
|
lb_files = img2label_paths(im_files)
|
|
75
80
|
annos = []
|
|
76
81
|
for im_file, lb_file in zip(im_files, lb_files):
|
|
@@ -95,7 +100,7 @@ def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.0
|
|
|
95
100
|
h, w = im_size
|
|
96
101
|
windows = []
|
|
97
102
|
for crop_size, gap in zip(crop_sizes, gaps):
|
|
98
|
-
assert crop_size > gap, f
|
|
103
|
+
assert crop_size > gap, f"invalid crop_size gap pair [{crop_size} {gap}]"
|
|
99
104
|
step = crop_size - gap
|
|
100
105
|
|
|
101
106
|
xn = 1 if w <= crop_size else ceil((w - crop_size) / step + 1)
|
|
@@ -127,13 +132,13 @@ def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.0
|
|
|
127
132
|
|
|
128
133
|
def get_window_obj(anno, windows, iof_thr=0.7):
|
|
129
134
|
"""Get objects for each window."""
|
|
130
|
-
h, w = anno[
|
|
131
|
-
label = anno[
|
|
135
|
+
h, w = anno["ori_size"]
|
|
136
|
+
label = anno["label"]
|
|
132
137
|
if len(label):
|
|
133
138
|
label[:, 1::2] *= w
|
|
134
139
|
label[:, 2::2] *= h
|
|
135
140
|
iofs = bbox_iof(label[:, 1:], windows)
|
|
136
|
-
#
|
|
141
|
+
# Unnormalized and misaligned coordinates
|
|
137
142
|
window_anns = [(label[iofs[:, i] >= iof_thr]) for i in range(len(windows))]
|
|
138
143
|
else:
|
|
139
144
|
window_anns = [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))]
|
|
@@ -141,13 +146,16 @@ def get_window_obj(anno, windows, iof_thr=0.7):
|
|
|
141
146
|
|
|
142
147
|
|
|
143
148
|
def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
|
|
144
|
-
"""
|
|
149
|
+
"""
|
|
150
|
+
Crop images and save new labels.
|
|
151
|
+
|
|
145
152
|
Args:
|
|
146
153
|
anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
|
|
147
154
|
windows (list): A list of windows coordinates.
|
|
148
155
|
window_objs (list): A list of labels inside each window.
|
|
149
156
|
im_dir (str): The output directory path of images.
|
|
150
157
|
lb_dir (str): The output directory path of labels.
|
|
158
|
+
|
|
151
159
|
Notes:
|
|
152
160
|
The directory structure assumed for the DOTA dataset:
|
|
153
161
|
- data_root
|
|
@@ -158,15 +166,15 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
|
|
|
158
166
|
- train
|
|
159
167
|
- val
|
|
160
168
|
"""
|
|
161
|
-
im = cv2.imread(anno[
|
|
162
|
-
name = Path(anno[
|
|
169
|
+
im = cv2.imread(anno["filepath"])
|
|
170
|
+
name = Path(anno["filepath"]).stem
|
|
163
171
|
for i, window in enumerate(windows):
|
|
164
172
|
x_start, y_start, x_stop, y_stop = window.tolist()
|
|
165
|
-
new_name = name +
|
|
173
|
+
new_name = name + "__" + str(x_stop - x_start) + "__" + str(x_start) + "___" + str(y_start)
|
|
166
174
|
patch_im = im[y_start:y_stop, x_start:x_stop]
|
|
167
175
|
ph, pw = patch_im.shape[:2]
|
|
168
176
|
|
|
169
|
-
cv2.imwrite(os.path.join(im_dir, f
|
|
177
|
+
cv2.imwrite(os.path.join(im_dir, f"{new_name}.jpg"), patch_im)
|
|
170
178
|
label = window_objs[i]
|
|
171
179
|
if len(label) == 0:
|
|
172
180
|
continue
|
|
@@ -175,17 +183,17 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
|
|
|
175
183
|
label[:, 1::2] /= pw
|
|
176
184
|
label[:, 2::2] /= ph
|
|
177
185
|
|
|
178
|
-
with open(os.path.join(lb_dir, f
|
|
186
|
+
with open(os.path.join(lb_dir, f"{new_name}.txt"), "w") as f:
|
|
179
187
|
for lb in label:
|
|
180
|
-
formatted_coords = [
|
|
188
|
+
formatted_coords = ["{:.6g}".format(coord) for coord in lb[1:]]
|
|
181
189
|
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
|
|
182
190
|
|
|
183
191
|
|
|
184
|
-
def split_images_and_labels(data_root, save_dir, split=
|
|
192
|
+
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
|
|
185
193
|
"""
|
|
186
194
|
Split both images and labels.
|
|
187
195
|
|
|
188
|
-
|
|
196
|
+
Notes:
|
|
189
197
|
The directory structure assumed for the DOTA dataset:
|
|
190
198
|
- data_root
|
|
191
199
|
- images
|
|
@@ -199,14 +207,14 @@ def split_images_and_labels(data_root, save_dir, split='train', crop_sizes=[1024
|
|
|
199
207
|
- labels
|
|
200
208
|
- split
|
|
201
209
|
"""
|
|
202
|
-
im_dir = Path(save_dir) /
|
|
210
|
+
im_dir = Path(save_dir) / "images" / split
|
|
203
211
|
im_dir.mkdir(parents=True, exist_ok=True)
|
|
204
|
-
lb_dir = Path(save_dir) /
|
|
212
|
+
lb_dir = Path(save_dir) / "labels" / split
|
|
205
213
|
lb_dir.mkdir(parents=True, exist_ok=True)
|
|
206
214
|
|
|
207
215
|
annos = load_yolo_dota(data_root, split=split)
|
|
208
216
|
for anno in tqdm(annos, total=len(annos), desc=split):
|
|
209
|
-
windows = get_windows(anno[
|
|
217
|
+
windows = get_windows(anno["ori_size"], crop_sizes, gaps)
|
|
210
218
|
window_objs = get_window_obj(anno, windows)
|
|
211
219
|
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
|
|
212
220
|
|
|
@@ -215,7 +223,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
|
|
215
223
|
"""
|
|
216
224
|
Split train and val set of DOTA.
|
|
217
225
|
|
|
218
|
-
|
|
226
|
+
Notes:
|
|
219
227
|
The directory structure assumed for the DOTA dataset:
|
|
220
228
|
- data_root
|
|
221
229
|
- images
|
|
@@ -237,7 +245,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
|
|
237
245
|
for r in rates:
|
|
238
246
|
crop_sizes.append(int(crop_size / r))
|
|
239
247
|
gaps.append(int(gap / r))
|
|
240
|
-
for split in [
|
|
248
|
+
for split in ["train", "val"]:
|
|
241
249
|
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
|
|
242
250
|
|
|
243
251
|
|
|
@@ -245,7 +253,7 @@ def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
|
|
245
253
|
"""
|
|
246
254
|
Split test set of DOTA, labels are not included within this set.
|
|
247
255
|
|
|
248
|
-
|
|
256
|
+
Notes:
|
|
249
257
|
The directory structure assumed for the DOTA dataset:
|
|
250
258
|
- data_root
|
|
251
259
|
- images
|
|
@@ -259,30 +267,30 @@ def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
|
|
259
267
|
for r in rates:
|
|
260
268
|
crop_sizes.append(int(crop_size / r))
|
|
261
269
|
gaps.append(int(gap / r))
|
|
262
|
-
save_dir = Path(save_dir) /
|
|
270
|
+
save_dir = Path(save_dir) / "images" / "test"
|
|
263
271
|
save_dir.mkdir(parents=True, exist_ok=True)
|
|
264
272
|
|
|
265
|
-
im_dir = Path(os.path.join(data_root,
|
|
273
|
+
im_dir = Path(os.path.join(data_root, "images/test"))
|
|
266
274
|
assert im_dir.exists(), f"Can't find {str(im_dir)}, please check your data root."
|
|
267
|
-
im_files = glob(str(im_dir /
|
|
268
|
-
for im_file in tqdm(im_files, total=len(im_files), desc=
|
|
275
|
+
im_files = glob(str(im_dir / "*"))
|
|
276
|
+
for im_file in tqdm(im_files, total=len(im_files), desc="test"):
|
|
269
277
|
w, h = exif_size(Image.open(im_file))
|
|
270
278
|
windows = get_windows((h, w), crop_sizes=crop_sizes, gaps=gaps)
|
|
271
279
|
im = cv2.imread(im_file)
|
|
272
280
|
name = Path(im_file).stem
|
|
273
281
|
for window in windows:
|
|
274
282
|
x_start, y_start, x_stop, y_stop = window.tolist()
|
|
275
|
-
new_name =
|
|
283
|
+
new_name = name + "__" + str(x_stop - x_start) + "__" + str(x_start) + "___" + str(y_start)
|
|
276
284
|
patch_im = im[y_start:y_stop, x_start:x_stop]
|
|
277
|
-
cv2.imwrite(os.path.join(str(save_dir), f
|
|
285
|
+
cv2.imwrite(os.path.join(str(save_dir), f"{new_name}.jpg"), patch_im)
|
|
278
286
|
|
|
279
287
|
|
|
280
|
-
if __name__ ==
|
|
288
|
+
if __name__ == "__main__":
|
|
281
289
|
split_trainval(
|
|
282
|
-
data_root=
|
|
283
|
-
save_dir=
|
|
290
|
+
data_root="DOTAv2",
|
|
291
|
+
save_dir="DOTAv2-split",
|
|
284
292
|
)
|
|
285
293
|
split_test(
|
|
286
|
-
data_root=
|
|
287
|
-
save_dir=
|
|
294
|
+
data_root="DOTAv2",
|
|
295
|
+
save_dir="DOTAv2-split",
|
|
288
296
|
)
|