ultralytics 8.3.77__py3-none-any.whl → 8.3.79__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ultralytics/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "8.3.77"
3
+ __version__ = "8.3.79"
4
4
 
5
5
  import os
6
6
 
@@ -0,0 +1,32 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # YOLO12-cls image classification model
4
+ # Model docs: https://docs.ultralytics.com/models/yolo12
5
+ # Task docs: https://docs.ultralytics.com/tasks/classify
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ scales: # model compound scaling constants, i.e. 'model=yolo12n-cls.yaml' will call yolo12-cls.yaml with scale 'n'
10
+ # [depth, width, max_channels]
11
+ n: [0.50, 0.25, 1024] # summary: 152 layers, 1,820,976 parameters, 1,820,976 gradients, 3.7 GFLOPs
12
+ s: [0.50, 0.50, 1024] # summary: 152 layers, 6,206,992 parameters, 6,206,992 gradients, 13.6 GFLOPs
13
+ m: [0.50, 1.00, 512] # summary: 172 layers, 12,083,088 parameters, 12,083,088 gradients, 44.2 GFLOPs
14
+ l: [1.00, 1.00, 512] # summary: 312 layers, 15,558,640 parameters, 15,558,640 gradients, 56.9 GFLOPs
15
+ x: [1.00, 1.50, 512] # summary: 312 layers, 34,172,592 parameters, 34,172,592 gradients, 126.5 GFLOPs
16
+
17
+ # YOLO12n backbone
18
+ backbone:
19
+ # [from, repeats, module, args]
20
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
21
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
22
+ - [-1, 2, C3k2, [256, False, 0.25]]
23
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
24
+ - [-1, 2, C3k2, [512, False, 0.25]]
25
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
26
+ - [-1, 4, A2C2f, [512, True, 4]]
27
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
28
+ - [-1, 4, A2C2f, [1024, True, 1]] # 8
29
+
30
+ # YOLO12n head
31
+ head:
32
+ - [-1, 1, Classify, [nc]] # Classify
@@ -0,0 +1,48 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # YOLO12-obb Oriented Bounding Boxes (OBB) model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo12
5
+ # Task docs: https://docs.ultralytics.com/tasks/obb
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ scales: # model compound scaling constants, i.e. 'model=yolo12n-obb.yaml' will call yolo12-obb.yaml with scale 'n'
10
+ # [depth, width, max_channels]
11
+ n: [0.50, 0.25, 1024] # summary: 287 layers, 2,673,955 parameters, 2,673,939 gradients, 6.9 GFLOPs
12
+ s: [0.50, 0.50, 1024] # summary: 287 layers, 9,570,275 parameters, 9,570,259 gradients, 22.7 GFLOPs
13
+ m: [0.50, 1.00, 512] # summary: 307 layers, 21,048,003 parameters, 21,047,987 gradients, 71.8 GFLOPs
14
+ l: [1.00, 1.00, 512] # summary: 503 layers, 27,299,619 parameters, 27,299,603 gradients, 93.4 GFLOPs
15
+ x: [1.00, 1.50, 512] # summary: 503 layers, 61,119,939 parameters, 61,119,923 gradients, 208.6 GFLOPs
16
+
17
+ # YOLO12n backbone
18
+ backbone:
19
+ # [from, repeats, module, args]
20
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
21
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
22
+ - [-1, 2, C3k2, [256, False, 0.25]]
23
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
24
+ - [-1, 2, C3k2, [512, False, 0.25]]
25
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
26
+ - [-1, 4, A2C2f, [512, True, 4]]
27
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
28
+ - [-1, 4, A2C2f, [1024, True, 1]] # 8
29
+
30
+ # YOLO12n head
31
+ head:
32
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
33
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
34
+ - [-1, 2, A2C2f, [512, False, -1]] # 11
35
+
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
38
+ - [-1, 2, A2C2f, [256, False, -1]] # 14
39
+
40
+ - [-1, 1, Conv, [256, 3, 2]]
41
+ - [[-1, 11], 1, Concat, [1]] # cat head P4
42
+ - [-1, 2, A2C2f, [512, False, -1]] # 17
43
+
44
+ - [-1, 1, Conv, [512, 3, 2]]
45
+ - [[-1, 8], 1, Concat, [1]] # cat head P5
46
+ - [-1, 2, C3k2, [1024, True]] # 20 (P5/32-large)
47
+
48
+ - [[14, 17, 20], 1, OBB, [nc, 1]] # Detect(P3, P4, P5)
@@ -0,0 +1,49 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # YOLO12-pose keypoints/pose estimation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo12
5
+ # Task docs: https://docs.ultralytics.com/tasks/pose
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
10
+ scales: # model compound scaling constants, i.e. 'model=yolo12n-pose.yaml' will call yolo12-pose.yaml with scale 'n'
11
+ # [depth, width, max_channels]
12
+ n: [0.50, 0.25, 1024] # summary: 287 layers, 2,886,715 parameters, 2,886,699 gradients, 7.8 GFLOPs
13
+ s: [0.50, 0.50, 1024] # summary: 287 layers, 9,774,155 parameters, 9,774,139 gradients, 23.5 GFLOPs
14
+ m: [0.50, 1.00, 512] # summary: 307 layers, 21,057,753 parameters, 21,057,737 gradients, 71.8 GFLOPs
15
+ l: [1.00, 1.00, 512] # summary: 503 layers, 27,309,369 parameters, 27,309,353 gradients, 93.5 GFLOPs
16
+ x: [1.00, 1.50, 512] # summary: 503 layers, 61,134,489 parameters, 61,134,473 gradients, 208.7 GFLOPs
17
+
18
+ # YOLO12n backbone
19
+ backbone:
20
+ # [from, repeats, module, args]
21
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
22
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
23
+ - [-1, 2, C3k2, [256, False, 0.25]]
24
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
25
+ - [-1, 2, C3k2, [512, False, 0.25]]
26
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
27
+ - [-1, 4, A2C2f, [512, True, 4]]
28
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
29
+ - [-1, 4, A2C2f, [1024, True, 1]] # 8
30
+
31
+ # YOLO12n head
32
+ head:
33
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
35
+ - [-1, 2, A2C2f, [512, False, -1]] # 11
36
+
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
39
+ - [-1, 2, A2C2f, [256, False, -1]] # 14
40
+
41
+ - [-1, 1, Conv, [256, 3, 2]]
42
+ - [[-1, 11], 1, Concat, [1]] # cat head P4
43
+ - [-1, 2, A2C2f, [512, False, -1]] # 17
44
+
45
+ - [-1, 1, Conv, [512, 3, 2]]
46
+ - [[-1, 8], 1, Concat, [1]] # cat head P5
47
+ - [-1, 2, C3k2, [1024, True]] # 20 (P5/32-large)
48
+
49
+ - [[14, 17, 20], 1, Pose, [nc, kpt_shape]] # Detect(P3, P4, P5)
@@ -0,0 +1,48 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # YOLO12-seg instance segmentation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo12
5
+ # Task docs: https://docs.ultralytics.com/tasks/segment
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ scales: # model compound scaling constants, i.e. 'model=yolo12n-seg.yaml' will call yolo12-seg.yaml with scale 'n'
10
+ # [depth, width, max_channels]
11
+ n: [0.50, 0.25, 1024] # summary: 294 layers, 2,855,056 parameters, 2,855,040 gradients, 10.6 GFLOPs
12
+ s: [0.50, 0.50, 1024] # summary: 294 layers, 9,938,592 parameters, 9,938,576 gradients, 35.7 GFLOPs
13
+ m: [0.50, 1.00, 512] # summary: 314 layers, 22,505,376 parameters, 22,505,360 gradients, 123.5 GFLOPs
14
+ l: [1.00, 1.00, 512] # summary: 510 layers, 28,756,992 parameters, 28,756,976 gradients, 145.1 GFLOPs
15
+ x: [1.00, 1.50, 512] # summary: 510 layers, 64,387,264 parameters, 64,387,248 gradients, 324.6 GFLOPs
16
+
17
+ # YOLO12n backbone
18
+ backbone:
19
+ # [from, repeats, module, args]
20
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
21
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
22
+ - [-1, 2, C3k2, [256, False, 0.25]]
23
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
24
+ - [-1, 2, C3k2, [512, False, 0.25]]
25
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
26
+ - [-1, 4, A2C2f, [512, True, 4]]
27
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
28
+ - [-1, 4, A2C2f, [1024, True, 1]] # 8
29
+
30
+ # YOLO12n head
31
+ head:
32
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
33
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
34
+ - [-1, 2, A2C2f, [512, False, -1]] # 11
35
+
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
38
+ - [-1, 2, A2C2f, [256, False, -1]] # 14
39
+
40
+ - [-1, 1, Conv, [256, 3, 2]]
41
+ - [[-1, 11], 1, Concat, [1]] # cat head P4
42
+ - [-1, 2, A2C2f, [512, False, -1]] # 17
43
+
44
+ - [-1, 1, Conv, [512, 3, 2]]
45
+ - [[-1, 8], 1, Concat, [1]] # cat head P5
46
+ - [-1, 2, C3k2, [1024, True]] # 20 (P5/32-large)
47
+
48
+ - [[14, 17, 20], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5)
@@ -0,0 +1,48 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # YOLO12 object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo12
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ scales: # model compound scaling constants, i.e. 'model=yolo12n.yaml' will call yolo12.yaml with scale 'n'
10
+ # [depth, width, max_channels]
11
+ n: [0.50, 0.25, 1024] # summary: 272 layers, 2,602,288 parameters, 2,602,272 gradients, 6.7 GFLOPs
12
+ s: [0.50, 0.50, 1024] # summary: 272 layers, 9,284,096 parameters, 9,284,080 gradients, 21.7 GFLOPs
13
+ m: [0.50, 1.00, 512] # summary: 292 layers, 20,199,168 parameters, 20,199,152 gradients, 68.1 GFLOPs
14
+ l: [1.00, 1.00, 512] # summary: 488 layers, 26,450,784 parameters, 26,450,768 gradients, 89.7 GFLOPs
15
+ x: [1.00, 1.50, 512] # summary: 488 layers, 59,210,784 parameters, 59,210,768 gradients, 200.3 GFLOPs
16
+
17
+ # YOLO12n backbone
18
+ backbone:
19
+ # [from, repeats, module, args]
20
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
21
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
22
+ - [-1, 2, C3k2, [256, False, 0.25]]
23
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
24
+ - [-1, 2, C3k2, [512, False, 0.25]]
25
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
26
+ - [-1, 4, A2C2f, [512, True, 4]]
27
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
28
+ - [-1, 4, A2C2f, [1024, True, 1]] # 8
29
+
30
+ # YOLO12n head
31
+ head:
32
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
33
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
34
+ - [-1, 2, A2C2f, [512, False, -1]] # 11
35
+
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
38
+ - [-1, 2, A2C2f, [256, False, -1]] # 14
39
+
40
+ - [-1, 1, Conv, [256, 3, 2]]
41
+ - [[-1, 11], 1, Concat, [1]] # cat head P4
42
+ - [-1, 2, A2C2f, [512, False, -1]] # 17
43
+
44
+ - [-1, 1, Conv, [512, 3, 2]]
45
+ - [[-1, 8], 1, Concat, [1]] # cat head P5
46
+ - [-1, 2, C3k2, [1024, True]] # 20 (P5/32-large)
47
+
48
+ - [[14, 17, 20], 1, Detect, [nc]] # Detect(P3, P4, P5)
@@ -58,15 +58,13 @@ def auto_annotate(
58
58
 
59
59
  for result in det_results:
60
60
  class_ids = result.boxes.cls.int().tolist() # noqa
61
- if len(class_ids):
61
+ if class_ids:
62
62
  boxes = result.boxes.xyxy # Boxes object for bbox outputs
63
63
  sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
64
64
  segments = sam_results[0].masks.xyn # noqa
65
65
 
66
66
  with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:
67
- for i in range(len(segments)):
68
- s = segments[i]
69
- if len(s) == 0:
70
- continue
71
- segment = map(str, segments[i].reshape(-1).tolist())
72
- f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")
67
+ for i, s in enumerate(segments):
68
+ if s.any():
69
+ segment = map(str, s.reshape(-1).tolist())
70
+ f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")
@@ -1364,17 +1364,26 @@ class RandomHSV:
1364
1364
  >>> hsv_augmenter(labels)
1365
1365
  >>> augmented_img = labels["img"]
1366
1366
  """
1367
- img = labels["img"]
1368
1367
  if self.hgain or self.sgain or self.vgain:
1369
- r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains
1370
- hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
1368
+ img = labels["img"]
1371
1369
  dtype = img.dtype # uint8
1372
1370
 
1371
+ # Original implementation (bug) from ultralytics<=8.3.78
1372
+ # r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains
1373
+ # x = np.arange(0, 256, dtype=r.dtype)
1374
+ # lut_hue = ((x * r[0]) % 180).astype(dtype)
1375
+ # lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
1376
+ # lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
1377
+
1378
+ # Fixed implementation in https://github.com/ultralytics/ultralytics/pull/19311
1379
+ r = np.random.uniform(-1, 1, 3) * (self.hgain, self.sgain, self.vgain) * (180, 255, 255) # random gains
1373
1380
  x = np.arange(0, 256, dtype=r.dtype)
1374
- lut_hue = ((x * r[0]) % 180).astype(dtype)
1375
- lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
1376
- lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
1381
+ lut_hue = ((x + r[0]) % 180).astype(dtype)
1382
+ lut_sat = np.clip(x + r[1], 0, 255).astype(dtype)
1383
+ lut_val = np.clip(x + r[2], 0, 255).astype(dtype)
1384
+ lut_sat[0] = 0 # prevent pure white changing color
1377
1385
 
1386
+ hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
1378
1387
  im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
1379
1388
  cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
1380
1389
  return labels
@@ -1484,7 +1493,7 @@ class LetterBox:
1484
1493
  Attributes:
1485
1494
  new_shape (tuple): Target shape (height, width) for resizing.
1486
1495
  auto (bool): Whether to use minimum rectangle.
1487
- scaleFill (bool): Whether to stretch the image to new_shape.
1496
+ scale_fill (bool): Whether to stretch the image to new_shape.
1488
1497
  scaleup (bool): Whether to allow scaling up. If False, only scale down.
1489
1498
  stride (int): Stride for rounding padding.
1490
1499
  center (bool): Whether to center the image or align to top-left.
@@ -1499,7 +1508,7 @@ class LetterBox:
1499
1508
  >>> updated_instances = result["instances"]
1500
1509
  """
1501
1510
 
1502
- def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, center=True, stride=32):
1511
+ def __init__(self, new_shape=(640, 640), auto=False, scale_fill=False, scaleup=True, center=True, stride=32):
1503
1512
  """
1504
1513
  Initialize LetterBox object for resizing and padding images.
1505
1514
 
@@ -1509,7 +1518,7 @@ class LetterBox:
1509
1518
  Args:
1510
1519
  new_shape (Tuple[int, int]): Target size (height, width) for the resized image.
1511
1520
  auto (bool): If True, use minimum rectangle to resize. If False, use new_shape directly.
1512
- scaleFill (bool): If True, stretch the image to new_shape without padding.
1521
+ scale_fill (bool): If True, stretch the image to new_shape without padding.
1513
1522
  scaleup (bool): If True, allow scaling up. If False, only scale down.
1514
1523
  center (bool): If True, center the placed image. If False, place image in top-left corner.
1515
1524
  stride (int): Stride of the model (e.g., 32 for YOLOv5).
@@ -1517,17 +1526,17 @@ class LetterBox:
1517
1526
  Attributes:
1518
1527
  new_shape (Tuple[int, int]): Target size for the resized image.
1519
1528
  auto (bool): Flag for using minimum rectangle resizing.
1520
- scaleFill (bool): Flag for stretching image without padding.
1529
+ scale_fill (bool): Flag for stretching image without padding.
1521
1530
  scaleup (bool): Flag for allowing upscaling.
1522
1531
  stride (int): Stride value for ensuring image size is divisible by stride.
1523
1532
 
1524
1533
  Examples:
1525
- >>> letterbox = LetterBox(new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32)
1534
+ >>> letterbox = LetterBox(new_shape=(640, 640), auto=False, scale_fill=False, scaleup=True, stride=32)
1526
1535
  >>> resized_img = letterbox(original_img)
1527
1536
  """
1528
1537
  self.new_shape = new_shape
1529
1538
  self.auto = auto
1530
- self.scaleFill = scaleFill
1539
+ self.scale_fill = scale_fill
1531
1540
  self.scaleup = scaleup
1532
1541
  self.stride = stride
1533
1542
  self.center = center # Put the image in the middle or top-left
@@ -1573,7 +1582,7 @@ class LetterBox:
1573
1582
  dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
1574
1583
  if self.auto: # minimum rectangle
1575
1584
  dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding
1576
- elif self.scaleFill: # stretch
1585
+ elif self.scale_fill: # stretch
1577
1586
  dw, dh = 0.0, 0.0
1578
1587
  new_unpad = (new_shape[1], new_shape[0])
1579
1588
  ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
@@ -274,13 +274,13 @@ def convert_coco(
274
274
  # Create image dict
275
275
  images = {f"{x['id']:d}": x for x in data["images"]}
276
276
  # Create image-annotations dict
277
- imgToAnns = defaultdict(list)
277
+ annotations = defaultdict(list)
278
278
  for ann in data["annotations"]:
279
- imgToAnns[ann["image_id"]].append(ann)
279
+ annotations[ann["image_id"]].append(ann)
280
280
 
281
281
  image_txt = []
282
282
  # Write labels file
283
- for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"):
283
+ for img_id, anns in TQDM(annotations.items(), desc=f"Annotations {json_file}"):
284
284
  img = images[f"{img_id:d}"]
285
285
  h, w = img["height"], img["width"]
286
286
  f = str(Path(img["coco_url"]).relative_to("http://images.cocodataset.org")) if lvis else img["file_name"]
@@ -170,6 +170,7 @@ def try_export(inner_func):
170
170
  def outer_func(*args, **kwargs):
171
171
  """Export a model."""
172
172
  prefix = inner_args["prefix"]
173
+ dt = 0.0
173
174
  try:
174
175
  with Profile() as dt:
175
176
  f, model = inner_func(*args, **kwargs)
@@ -309,9 +310,8 @@ class Exporter:
309
310
  "WARNING ⚠️ INT8 export requires a missing 'data' arg for calibration. "
310
311
  f"Using default 'data={self.args.data}'."
311
312
  )
312
- if tfjs:
313
- if ARM64 and LINUX:
314
- raise SystemError("TensorFlow.js export not supported on ARM64 Linux")
313
+ if tfjs and (ARM64 and LINUX):
314
+ raise SystemError("TensorFlow.js export not supported on ARM64 Linux")
315
315
 
316
316
  # Input
317
317
  im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device)
@@ -419,7 +419,7 @@ class Exporter:
419
419
  if pb or tfjs: # pb prerequisite to tfjs
420
420
  f[6], _ = self.export_pb(keras_model=keras_model)
421
421
  if tflite:
422
- f[7], _ = self.export_tflite(keras_model=keras_model, nms=False, agnostic_nms=self.args.agnostic_nms)
422
+ f[7], _ = self.export_tflite()
423
423
  if edgetpu:
424
424
  f[8], _ = self.export_edgetpu(tflite_model=Path(f[5]) / f"{self.file.stem}_full_integer_quant.tflite")
425
425
  if tfjs:
@@ -1077,7 +1077,7 @@ class Exporter:
1077
1077
  return f, None
1078
1078
 
1079
1079
  @try_export
1080
- def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr("TensorFlow Lite:")):
1080
+ def export_tflite(self, prefix=colorstr("TensorFlow Lite:")):
1081
1081
  """YOLO TensorFlow Lite export."""
1082
1082
  # BUG https://github.com/ultralytics/ultralytics/issues/13436
1083
1083
  import tensorflow as tf # noqa
@@ -567,6 +567,10 @@ class BaseTrainer:
567
567
  except Exception as e:
568
568
  raise RuntimeError(emojis(f"Dataset '{clean_url(self.args.data)}' error ❌ {e}")) from e
569
569
  self.data = data
570
+ if self.args.single_cls:
571
+ LOGGER.info("Overriding class names with single class.")
572
+ self.data["names"] = {0: "item"}
573
+ self.data["nc"] = 1
570
574
  return data["train"], data.get("val") or data.get("test")
571
575
 
572
576
  def setup_model(self):
@@ -72,7 +72,7 @@ class RTDETRPredictor(BasePredictor):
72
72
  def pre_transform(self, im):
73
73
  """
74
74
  Pre-transforms the input images before feeding them into the model for inference. The input images are
75
- letterboxed to ensure a square aspect ratio and scale-filled. The size must be square(640) and scaleFilled.
75
+ letterboxed to ensure a square aspect ratio and scale-filled. The size must be square(640) and scale_filled.
76
76
 
77
77
  Args:
78
78
  im (list[np.ndarray] |torch.Tensor): Input images of shape (N,3,h,w) for tensor, [(h,w,3) x N] for list.
@@ -80,5 +80,5 @@ class RTDETRPredictor(BasePredictor):
80
80
  Returns:
81
81
  (list): List of pre-transformed images ready for model inference.
82
82
  """
83
- letterbox = LetterBox(self.imgsz, auto=False, scaleFill=True)
83
+ letterbox = LetterBox(self.imgsz, auto=False, scale_fill=True)
84
84
  return [letterbox(image=x) for x in im]
@@ -34,7 +34,7 @@ class RTDETRDataset(YOLODataset):
34
34
  hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
35
35
  transforms = v8_transforms(self, self.imgsz, hyp, stretch=True)
36
36
  else:
37
- # transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), auto=False, scaleFill=True)])
37
+ # transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), auto=False, scale_fill=True)])
38
38
  transforms = Compose([])
39
39
  transforms.append(
40
40
  Format(
@@ -186,7 +186,7 @@ class DetectionValidator(BaseValidator):
186
186
  self.nt_per_class = np.bincount(stats["target_cls"].astype(int), minlength=self.nc)
187
187
  self.nt_per_image = np.bincount(stats["target_img"].astype(int), minlength=self.nc)
188
188
  stats.pop("target_img", None)
189
- if len(stats) and stats["tp"].any():
189
+ if len(stats):
190
190
  self.metrics.process(**stats)
191
191
  return self.metrics.results_dict
192
192
 
@@ -197,12 +197,13 @@ class AutoBackend(nn.Module):
197
197
  import onnxruntime
198
198
 
199
199
  providers = ["CPUExecutionProvider"]
200
- if cuda and "CUDAExecutionProvider" in onnxruntime.get_available_providers():
201
- providers.insert(0, "CUDAExecutionProvider")
202
- elif cuda: # Only log warning if CUDA was requested but unavailable
203
- LOGGER.warning("WARNING ⚠️ Failed to start ONNX Runtime with CUDA. Using CPU...")
204
- device = torch.device("cpu")
205
- cuda = False
200
+ if cuda:
201
+ if "CUDAExecutionProvider" in onnxruntime.get_available_providers():
202
+ providers.insert(0, "CUDAExecutionProvider")
203
+ else: # Only log warning if CUDA was requested but unavailable
204
+ LOGGER.warning("WARNING ⚠️ Failed to start ONNX Runtime with CUDA. Using CPU...")
205
+ device = torch.device("cpu")
206
+ cuda = False
206
207
  LOGGER.info(f"Using ONNX Runtime {providers[0]}")
207
208
  if onnx:
208
209
  session = onnxruntime.InferenceSession(w, providers=providers)
@@ -223,7 +224,7 @@ class AutoBackend(nn.Module):
223
224
  output_names = [x.name for x in session.get_outputs()]
224
225
  metadata = session.get_modelmeta().custom_metadata_map
225
226
  dynamic = isinstance(session.get_outputs()[0].shape[0], str)
226
- fp16 = True if "float16" in session.get_inputs()[0].type else False
227
+ fp16 = "float16" in session.get_inputs()[0].type
227
228
  if not dynamic:
228
229
  io = session.io_binding()
229
230
  bindings = []
@@ -30,6 +30,7 @@ from .block import (
30
30
  SPP,
31
31
  SPPELAN,
32
32
  SPPF,
33
+ A2C2f,
33
34
  AConv,
34
35
  ADown,
35
36
  Attention,
@@ -160,4 +161,5 @@ __all__ = (
160
161
  "PSA",
161
162
  "TorchVision",
162
163
  "Index",
164
+ "A2C2f",
163
165
  )
@@ -1154,3 +1154,205 @@ class TorchVision(nn.Module):
1154
1154
  else:
1155
1155
  y = self.m(x)
1156
1156
  return y
1157
+
1158
+
1159
+ class AAttn(nn.Module):
1160
+ """
1161
+ Area-attention module for YOLO models, providing efficient attention mechanisms.
1162
+
1163
+ This module implements an area-based attention mechanism that processes input features in a spatially-aware manner,
1164
+ making it particularly effective for object detection tasks.
1165
+
1166
+ Attributes:
1167
+ area (int): Number of areas the feature map is divided.
1168
+ num_heads (int): Number of heads into which the attention mechanism is divided.
1169
+ head_dim (int): Dimension of each attention head.
1170
+ qkv (Conv): Convolution layer for computing query, key and value tensors.
1171
+ proj (Conv): Projection convolution layer.
1172
+ pe (Conv): Position encoding convolution layer.
1173
+
1174
+ Methods:
1175
+ forward: Applies area-attention to input tensor.
1176
+
1177
+ Examples:
1178
+ >>> attn = AAttn(dim=256, num_heads=8, area=4)
1179
+ >>> x = torch.randn(1, 256, 32, 32)
1180
+ >>> output = attn(x)
1181
+ >>> print(output.shape)
1182
+ torch.Size([1, 256, 32, 32])
1183
+ """
1184
+
1185
+ def __init__(self, dim, num_heads, area=1):
1186
+ """
1187
+ Initializes an Area-attention module for YOLO models.
1188
+
1189
+ Args:
1190
+ dim (int): Number of hidden channels.
1191
+ num_heads (int): Number of heads into which the attention mechanism is divided.
1192
+ area (int): Number of areas the feature map is divided, default is 1.
1193
+ """
1194
+ super().__init__()
1195
+ self.area = area
1196
+
1197
+ self.num_heads = num_heads
1198
+ self.head_dim = head_dim = dim // num_heads
1199
+ all_head_dim = head_dim * self.num_heads
1200
+
1201
+ self.qkv = Conv(dim, all_head_dim * 3, 1, act=False)
1202
+ self.proj = Conv(all_head_dim, dim, 1, act=False)
1203
+ self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False)
1204
+
1205
+ def forward(self, x):
1206
+ """Processes the input tensor 'x' through the area-attention."""
1207
+ B, C, H, W = x.shape
1208
+ N = H * W
1209
+
1210
+ qkv = self.qkv(x).flatten(2).transpose(1, 2)
1211
+ if self.area > 1:
1212
+ qkv = qkv.reshape(B * self.area, N // self.area, C * 3)
1213
+ B, N, _ = qkv.shape
1214
+ q, k, v = (
1215
+ qkv.view(B, N, self.num_heads, self.head_dim * 3)
1216
+ .permute(0, 2, 3, 1)
1217
+ .split([self.head_dim, self.head_dim, self.head_dim], dim=2)
1218
+ )
1219
+ attn = (q.transpose(-2, -1) @ k) * (self.head_dim**-0.5)
1220
+ attn = attn.softmax(dim=-1)
1221
+ x = v @ attn.transpose(-2, -1)
1222
+ x = x.permute(0, 3, 1, 2)
1223
+ v = v.permute(0, 3, 1, 2)
1224
+
1225
+ if self.area > 1:
1226
+ x = x.reshape(B // self.area, N * self.area, C)
1227
+ v = v.reshape(B // self.area, N * self.area, C)
1228
+ B, N, _ = x.shape
1229
+
1230
+ x = x.reshape(B, H, W, C).permute(0, 3, 1, 2).contiguous()
1231
+ v = v.reshape(B, H, W, C).permute(0, 3, 1, 2).contiguous()
1232
+
1233
+ x = x + self.pe(v)
1234
+ return self.proj(x)
1235
+
1236
+
1237
+ class ABlock(nn.Module):
1238
+ """
1239
+ Area-attention block module for efficient feature extraction in YOLO models.
1240
+
1241
+ This module implements an area-attention mechanism combined with a feed-forward network for processing feature maps.
1242
+ It uses a novel area-based attention approach that is more efficient than traditional self-attention while
1243
+ maintaining effectiveness.
1244
+
1245
+ Attributes:
1246
+ attn (AAttn): Area-attention module for processing spatial features.
1247
+ mlp (nn.Sequential): Multi-layer perceptron for feature transformation.
1248
+
1249
+ Methods:
1250
+ _init_weights: Initializes module weights using truncated normal distribution.
1251
+ forward: Applies area-attention and feed-forward processing to input tensor.
1252
+
1253
+ Examples:
1254
+ >>> block = ABlock(dim=256, num_heads=8, mlp_ratio=1.2, area=1)
1255
+ >>> x = torch.randn(1, 256, 32, 32)
1256
+ >>> output = block(x)
1257
+ >>> print(output.shape)
1258
+ torch.Size([1, 256, 32, 32])
1259
+ """
1260
+
1261
+ def __init__(self, dim, num_heads, mlp_ratio=1.2, area=1):
1262
+ """
1263
+ Initializes an Area-attention block module for efficient feature extraction in YOLO models.
1264
+
1265
+ This module implements an area-attention mechanism combined with a feed-forward network for processing feature
1266
+ maps. It uses a novel area-based attention approach that is more efficient than traditional self-attention
1267
+ while maintaining effectiveness.
1268
+
1269
+ Args:
1270
+ dim (int): Number of input channels.
1271
+ num_heads (int): Number of heads into which the attention mechanism is divided.
1272
+ mlp_ratio (float): Expansion ratio for MLP hidden dimension.
1273
+ area (int): Number of areas the feature map is divided.
1274
+ """
1275
+ super().__init__()
1276
+
1277
+ self.attn = AAttn(dim, num_heads=num_heads, area=area)
1278
+ mlp_hidden_dim = int(dim * mlp_ratio)
1279
+ self.mlp = nn.Sequential(Conv(dim, mlp_hidden_dim, 1), Conv(mlp_hidden_dim, dim, 1, act=False))
1280
+
1281
+ self.apply(self._init_weights)
1282
+
1283
+ def _init_weights(self, m):
1284
+ """Initialize weights using a truncated normal distribution."""
1285
+ if isinstance(m, nn.Conv2d):
1286
+ nn.init.trunc_normal_(m.weight, std=0.02)
1287
+ if m.bias is not None:
1288
+ nn.init.constant_(m.bias, 0)
1289
+
1290
+ def forward(self, x):
1291
+ """Forward pass through ABlock, applying area-attention and feed-forward layers to the input tensor."""
1292
+ x = x + self.attn(x)
1293
+ return x + self.mlp(x)
1294
+
1295
+
1296
+ class A2C2f(nn.Module):
1297
+ """
1298
+ Area-Attention C2f module for enhanced feature extraction with area-based attention mechanisms.
1299
+
1300
+ This module extends the C2f architecture by incorporating area-attention and ABlock layers for improved feature
1301
+ processing. It supports both area-attention and standard convolution modes.
1302
+
1303
+ Attributes:
1304
+ cv1 (Conv): Initial 1x1 convolution layer that reduces input channels to hidden channels.
1305
+ cv2 (Conv): Final 1x1 convolution layer that processes concatenated features.
1306
+ gamma (nn.Parameter | None): Learnable parameter for residual scaling when using area attention.
1307
+ m (nn.ModuleList): List of either ABlock or C3k modules for feature processing.
1308
+
1309
+ Methods:
1310
+ forward: Processes input through area-attention or standard convolution pathway.
1311
+
1312
+ Examples:
1313
+ >>> m = A2C2f(512, 512, n=1, a2=True, area=1)
1314
+ >>> x = torch.randn(1, 512, 32, 32)
1315
+ >>> output = m(x)
1316
+ >>> print(output.shape)
1317
+ torch.Size([1, 512, 32, 32])
1318
+ """
1319
+
1320
+ def __init__(self, c1, c2, n=1, a2=True, area=1, residual=False, mlp_ratio=2.0, e=0.5, g=1, shortcut=True):
1321
+ """
1322
+ Area-Attention C2f module for enhanced feature extraction with area-based attention mechanisms.
1323
+
1324
+ Args:
1325
+ c1 (int): Number of input channels.
1326
+ c2 (int): Number of output channels.
1327
+ n (int): Number of ABlock or C3k modules to stack.
1328
+ a2 (bool): Whether to use area attention blocks. If False, uses C3k blocks instead.
1329
+ area (int): Number of areas the feature map is divided.
1330
+ residual (bool): Whether to use residual connections with learnable gamma parameter.
1331
+ mlp_ratio (float): Expansion ratio for MLP hidden dimension.
1332
+ e (float): Channel expansion ratio for hidden channels.
1333
+ g (int): Number of groups for grouped convolutions.
1334
+ shortcut (bool): Whether to use shortcut connections in C3k blocks.
1335
+ """
1336
+ super().__init__()
1337
+ c_ = int(c2 * e) # hidden channels
1338
+ assert c_ % 32 == 0, "Dimension of ABlock be a multiple of 32."
1339
+
1340
+ self.cv1 = Conv(c1, c_, 1, 1)
1341
+ self.cv2 = Conv((1 + n) * c_, c2, 1)
1342
+
1343
+ self.gamma = nn.Parameter(0.01 * torch.ones(c2), requires_grad=True) if a2 and residual else None
1344
+ self.m = nn.ModuleList(
1345
+ nn.Sequential(*(ABlock(c_, c_ // 32, mlp_ratio, area) for _ in range(2)))
1346
+ if a2
1347
+ else C3k(c_, c_, 2, shortcut, g)
1348
+ for _ in range(n)
1349
+ )
1350
+
1351
+ def forward(self, x):
1352
+ """Forward pass through R-ELAN layer."""
1353
+ y = [self.cv1(x)]
1354
+ y.extend(m(y[-1]) for m in self.m)
1355
+ y = self.cv2(torch.cat(y, 1))
1356
+ if self.gamma is not None:
1357
+ return x + self.gamma.view(-1, len(self.gamma), 1, 1) * y
1358
+ return y
ultralytics/nn/tasks.py CHANGED
@@ -22,6 +22,7 @@ from ultralytics.nn.modules import (
22
22
  SPP,
23
23
  SPPELAN,
24
24
  SPPF,
25
+ A2C2f,
25
26
  AConv,
26
27
  ADown,
27
28
  Bottleneck,
@@ -637,8 +638,8 @@ class WorldModel(DetectionModel):
637
638
  (torch.Tensor): Model's output tensor.
638
639
  """
639
640
  txt_feats = (self.txt_feats if txt_feats is None else txt_feats).to(device=x.device, dtype=x.dtype)
640
- if len(txt_feats) != len(x):
641
- txt_feats = txt_feats.repeat(len(x), 1, 1)
641
+ if len(txt_feats) != len(x) or self.model[-1].export:
642
+ txt_feats = txt_feats.expand(x.shape[0], -1, -1)
642
643
  ori_txt_feats = txt_feats.clone()
643
644
  y, dt, embeddings = [], [], [] # outputs
644
645
  for m in self.model: # except the head part
@@ -985,6 +986,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
985
986
  PSA,
986
987
  SCDown,
987
988
  C2fCIB,
989
+ A2C2f,
988
990
  }
989
991
  )
990
992
  repeat_modules = frozenset( # modules with 'repeat' arguments
@@ -1003,6 +1005,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
1003
1005
  C2fPSA,
1004
1006
  C2fCIB,
1005
1007
  C2PSA,
1008
+ A2C2f,
1006
1009
  }
1007
1010
  )
1008
1011
  for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
@@ -1034,6 +1037,10 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
1034
1037
  legacy = False
1035
1038
  if scale in "mlx":
1036
1039
  args[3] = True
1040
+ if m is A2C2f:
1041
+ legacy = False
1042
+ if scale in "lx": # for L/X sizes
1043
+ args.extend((True, 1.2))
1037
1044
  elif m is AIFI:
1038
1045
  args = [ch[f], *args]
1039
1046
  elif m in frozenset({HGStem, HGBlock}):
@@ -177,7 +177,7 @@ def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, c
177
177
  return {"name": "ground_truth", "data": data}
178
178
 
179
179
 
180
- def _format_prediction_annotations_for_detection(image_path, metadata, class_label_map=None):
180
+ def _format_prediction_annotations_for_detection(image_path, metadata, class_label_map=None, class_map=None):
181
181
  """Format YOLO predictions for object detection visualization."""
182
182
  stem = image_path.stem
183
183
  image_id = int(stem) if stem.isnumeric() else stem
@@ -187,26 +187,32 @@ def _format_prediction_annotations_for_detection(image_path, metadata, class_lab
187
187
  LOGGER.debug(f"COMET WARNING: Image: {image_path} has no bounding boxes predictions")
188
188
  return None
189
189
 
190
+ label_index_offset = 0
191
+ if class_map is not None:
192
+ # offset to align indices of class labels (starting from zero)
193
+ # with prediction's category ID indices (can start from one)
194
+ label_index_offset = sorted(class_map)[0]
195
+
190
196
  data = []
191
197
  for prediction in predictions:
192
198
  boxes = prediction["bbox"]
193
199
  score = _scale_confidence_score(prediction["score"])
194
200
  cls_label = prediction["category_id"]
195
201
  if class_label_map:
196
- cls_label = str(class_label_map[cls_label])
202
+ cls_label = str(class_label_map[cls_label - label_index_offset])
197
203
 
198
204
  data.append({"boxes": [boxes], "label": cls_label, "score": score})
199
205
 
200
206
  return {"name": "prediction", "data": data}
201
207
 
202
208
 
203
- def _fetch_annotations(img_idx, image_path, batch, prediction_metadata_map, class_label_map):
209
+ def _fetch_annotations(img_idx, image_path, batch, prediction_metadata_map, class_label_map, class_map):
204
210
  """Join the ground truth and prediction annotations if they exist."""
205
211
  ground_truth_annotations = _format_ground_truth_annotations_for_detection(
206
212
  img_idx, image_path, batch, class_label_map
207
213
  )
208
214
  prediction_annotations = _format_prediction_annotations_for_detection(
209
- image_path, prediction_metadata_map, class_label_map
215
+ image_path, prediction_metadata_map, class_label_map, class_map
210
216
  )
211
217
 
212
218
  annotations = [
@@ -260,6 +266,7 @@ def _log_image_predictions(experiment, validator, curr_step):
260
266
  predictions_metadata_map = _create_prediction_metadata_map(jdict)
261
267
  dataloader = validator.dataloader
262
268
  class_label_map = validator.names
269
+ class_map = getattr(validator, "class_map", None)
263
270
 
264
271
  batch_logging_interval = _get_eval_batch_logging_interval()
265
272
  max_image_predictions = _get_max_image_predictions_to_log()
@@ -280,6 +287,7 @@ def _log_image_predictions(experiment, validator, curr_step):
280
287
  batch,
281
288
  predictions_metadata_map,
282
289
  class_label_map,
290
+ class_map=class_map,
283
291
  )
284
292
  _log_images(
285
293
  experiment,
@@ -18,6 +18,7 @@ GITHUB_ASSETS_REPO = "ultralytics/assets"
18
18
  GITHUB_ASSETS_NAMES = (
19
19
  [f"yolov8{k}{suffix}.pt" for k in "nsmlx" for suffix in ("", "-cls", "-seg", "-pose", "-obb", "-oiv7")]
20
20
  + [f"yolo11{k}{suffix}.pt" for k in "nsmlx" for suffix in ("", "-cls", "-seg", "-pose", "-obb")]
21
+ + [f"yolo12{k}{suffix}.pt" for k in "nsmlx" for suffix in ("",)] # detect models only currently
21
22
  + [f"yolov5{k}{resolution}u.pt" for k in "nsmlx" for resolution in ("", "6")]
22
23
  + [f"yolov3{k}u.pt" for k in ("", "-spp", "-tiny")]
23
24
  + [f"yolov8{k}-world.pt" for k in "smlx"]
@@ -604,7 +604,7 @@ def ap_per_class(
604
604
  if j == 0:
605
605
  prec_values.append(np.interp(x, mrec, mpre)) # precision at mAP@0.5
606
606
 
607
- prec_values = np.array(prec_values) # (nc, 1000)
607
+ prec_values = np.array(prec_values) if prec_values else np.zeros((1, 1000)) # (nc, 1000)
608
608
 
609
609
  # Compute F1 (harmonic mean of precision and recall)
610
610
  f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
@@ -317,8 +317,7 @@ def model_info(model, detailed=False, verbose=True, imgsz=640):
317
317
  if len(m._parameters):
318
318
  for pn, p in m.named_parameters():
319
319
  LOGGER.info(
320
- f"{i:>5g}{mn + '.' + pn:>40}{mt:>20}{p.requires_grad!r:>10}{p.numel():>12g}"
321
- f"{str(list(p.shape)):>20}{p.mean():>10.3g}{p.std():>10.3g}{str(p.dtype).replace('torch.', ''):>15}"
320
+ f"{i:>5g}{f'{mn}.{pn}':>40}{mt:>20}{p.requires_grad!r:>10}{p.numel():>12g}{str(list(p.shape)):>20}{p.mean():>10.3g}{p.std():>10.3g}{str(p.dtype).replace('torch.', ''):>15}"
322
321
  )
323
322
  else: # layers with no learnable params
324
323
  LOGGER.info(f"{i:>5g}{mn:>40}{mt:>20}{False!r:>10}{0:>12g}{str([]):>20}{'-':>10}{'-':>10}{'-':>15}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ultralytics
3
- Version: 8.3.77
3
+ Version: 8.3.79
4
4
  Summary: Ultralytics YOLO 🚀 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
5
5
  Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -7,7 +7,7 @@ tests/test_exports.py,sha256=T_z_NUS9URQXv83k5XNLHTuksJ8srtzbZnWuiiQWM98,9260
7
7
  tests/test_integrations.py,sha256=p3DMnnPMKsV0Qm82JVJUIY1UZ67xRgF9E8AaL76TEHE,6154
8
8
  tests/test_python.py,sha256=tW-EFJC2rjl_DvAa8khXGWYdypseQjrLjGHhe2p9r9A,23238
9
9
  tests/test_solutions.py,sha256=aY0G3vNzXGCENG9FD76MfUp7jgzeESPsUvbvQYBUvH0,4205
10
- ultralytics/__init__.py,sha256=HQtHuvQM5kuWiBsRP3s4mhhbcYU2HsTkCBcXce66zkY,709
10
+ ultralytics/__init__.py,sha256=moo1vmbr78zE56S4YpmOEngnomz8U-tJpyjDtGNeJxA,709
11
11
  ultralytics/assets/bus.jpg,sha256=wCAZxJecGR63Od3ZRERe9Aja1Weayrb9Ug751DS_vGM,137419
12
12
  ultralytics/assets/zidane.jpg,sha256=Ftc4aeMmen1O0A3o6GCDO9FlfBslLpTAw0gnetx7bts,50427
13
13
  ultralytics/cfg/__init__.py,sha256=qP44HnFP4QcC5FQz29A-EGTuwdtxXAzPvw_IvCVmiqA,39771
@@ -48,6 +48,11 @@ ultralytics/cfg/models/11/yolo11-obb.yaml,sha256=x8XDI2WvbBDre79eslYafBDvu6AmdGb
48
48
  ultralytics/cfg/models/11/yolo11-pose.yaml,sha256=RUe-8rIrrYWItv0GMo_VaO9JfrK2NJSXfbhv0NOq9dk,2128
49
49
  ultralytics/cfg/models/11/yolo11-seg.yaml,sha256=ozw5daUucWFCnJNVApK8TIijSe2qAlFmq_VoPyVu9Oo,2045
50
50
  ultralytics/cfg/models/11/yolo11.yaml,sha256=5XryDSNt5MYaIhTnGOJYnFV8xRiZPsGcsayYt0RCSJM,2012
51
+ ultralytics/cfg/models/12/yolo12-cls.yaml,sha256=BLv578ZuU-QKx6GTNWX6lXdutzf_0rGhRrC3HrpxaNM,1405
52
+ ultralytics/cfg/models/12/yolo12-obb.yaml,sha256=JMviFAOmDbW0aMNzZNqispP0wxWw3mtKn2iUwedf4WM,1975
53
+ ultralytics/cfg/models/12/yolo12-pose.yaml,sha256=Mr9xjYclLQzxYhMqjIKQTdiTvtqZvEXBtclADFggaMA,2074
54
+ ultralytics/cfg/models/12/yolo12-seg.yaml,sha256=RBFFz4b95Dupfg0fmqCkZ4i1Zzai_QyJrI6Y2oLsocM,1984
55
+ ultralytics/cfg/models/12/yolo12.yaml,sha256=ZeA8LuymJXPNjZ5xkxkZHkcktDaKDzUBb2Kc3gCLC1w,1953
51
56
  ultralytics/cfg/models/rt-detr/rtdetr-l.yaml,sha256=_jGu4rotBnmjS29MkSvPx_4dNTWku68ie8-BIvf_p6Q,2041
52
57
  ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml,sha256=BGWp61olKkgD_CzikeVSglWfat3L9hDIK6KDkjwzlxc,1678
53
58
  ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml,sha256=hrRmoL2w-Rchd7obEcSYPeyDNG32QxXftbRH_4vVeZQ,1676
@@ -92,21 +97,21 @@ ultralytics/cfg/solutions/default.yaml,sha256=c-9thwI7y7VmIoIM6AW70Z0r825SToH2h7
92
97
  ultralytics/cfg/trackers/botsort.yaml,sha256=D9doE5GQUe6HrAFzr7OfQFIGPFk0M_vJ0B_n7VjxH6Q,1080
93
98
  ultralytics/cfg/trackers/bytetrack.yaml,sha256=6u-tiZlk16EqEwkNXaMrza6PAQmWj_ypgv26LGCtPDg,886
94
99
  ultralytics/data/__init__.py,sha256=nAXaL1puCc7z_NjzQNlJnhbVhT9Fla2u7Dsqo7q1dAc,644
95
- ultralytics/data/annotator.py,sha256=jbKHB5l5IYOG1YOgCxA6czU_ivb3NPAACrtPe6-bVn4,3145
96
- ultralytics/data/augment.py,sha256=sQDtIPD0P2pm_t-dI87hZt9KTB2PDN0JT_7AekHctRw,120726
100
+ ultralytics/data/annotator.py,sha256=whx_3sdKGRsECYLKyJMNGQ-d9g-f8020O6kvl5M1c_I,3067
101
+ ultralytics/data/augment.py,sha256=XFrIPNI7un1aLdL3ZU24mwKLRd1AvUSPgie7s3Pb6OE,121319
97
102
  ultralytics/data/base.py,sha256=NTNdn-Emgx3Z2vats8i8oEe-9yosPmHd53v1A0xz0EU,15196
98
103
  ultralytics/data/build.py,sha256=gOU5SNABBNxwo5012N--WhjEnLK2ewycXIryMpbHg6U,7685
99
- ultralytics/data/converter.py,sha256=89E44LBCpbn5hMF03Kdts6DaTP8Oei5iCra5enFCt5I,24467
104
+ ultralytics/data/converter.py,sha256=M7LvBpdYiDA_YEuef3oCXhGPFTjtyJjSbSwqn-F6d7I,24473
100
105
  ultralytics/data/dataset.py,sha256=lxtH3JytNu6nsiPAIhe0uGuGGpkZ4ZRqvXM6eJw9rXU,23244
101
106
  ultralytics/data/loaders.py,sha256=JOwXbz-dxgG2bx0_cQHp-olz5FleoCX8EzrUvZ77vvg,28534
102
107
  ultralytics/data/split_dota.py,sha256=YI-i2MqdiBt06W67TJnBXQHJrqTnkJDJ3zzoL0UZVro,10733
103
108
  ultralytics/data/utils.py,sha256=5YMU5396oAFPwTy2y0MCU2WipF6Rt-7xNtmHKRCA4fI,33838
104
109
  ultralytics/engine/__init__.py,sha256=lm6MckFYCPTbqIoX7w0s_daxdjNeBeKW6DXppv1-QUM,70
105
- ultralytics/engine/exporter.py,sha256=nrO0UdRMzhb_hp-IXAstBmMHJoax-P7Fqe7a8u2954A,77113
110
+ ultralytics/engine/exporter.py,sha256=qeEV-w8cdYuVJP4RaKbq46xJmsYGUFJnkplfEUCIm_c,77013
106
111
  ultralytics/engine/model.py,sha256=s8HsSBvdRgSbnKGULr7YW-ZWJKJsQpOoHd9Aih_nMt0,53427
107
112
  ultralytics/engine/predictor.py,sha256=jiYDAjupOlRUpPvw9tu7or9PjXtLm-YCRiawANtWxj0,17881
108
113
  ultralytics/engine/results.py,sha256=hWlO2e58BPUJ5R4Jl4iirBPaZ8BypcNu_cNQ2NHpUqM,78111
109
- ultralytics/engine/trainer.py,sha256=hrqaO3cgf0VPVLHKv0tHk4W3gcasgqq_KF-DkgnIjus,37421
114
+ ultralytics/engine/trainer.py,sha256=pV8sztWxFH5rMNYW0wXHlk-YrVZsEUYAKFvfcA22PnY,37600
110
115
  ultralytics/engine/tuner.py,sha256=EUlTs7KJQ2RVABm8pihr_14M_Z2kGSzJaWH-Y9TJYDw,11976
111
116
  ultralytics/engine/validator.py,sha256=r27X8HGeDEwq7V5sFjEQH_3EnP1CyG-HcOLpFABUisU,15034
112
117
  ultralytics/hub/__init__.py,sha256=1ifzSYV0PIT4ZWOm2V7HnpGyY3G3hCz0malw3AXHFlY,5660
@@ -126,9 +131,9 @@ ultralytics/models/nas/predict.py,sha256=nzVGTdUb0E_IjmWksX_T61q80hbrjEovihTzTJ1
126
131
  ultralytics/models/nas/val.py,sha256=CSqmcuAcuJ5SQ7mo364RdXLGeu2XATyRY8Z84VGGX5o,1497
127
132
  ultralytics/models/rtdetr/__init__.py,sha256=_jEHmOjI_QP_nT3XJXLgYHQ6bXG4EL8Gnvn1y_eev1g,225
128
133
  ultralytics/models/rtdetr/model.py,sha256=KFUlxMo2NTxVvK9D5x9p0WhXogK_QL5Wao8KxcZcT7s,2016
129
- ultralytics/models/rtdetr/predict.py,sha256=ymZS4ocUuec7zEOOnKFr2xaAr48NwljibO8DE_VrTwY,3596
134
+ ultralytics/models/rtdetr/predict.py,sha256=zT4rc2M0drf1ge1FhWc6RG7tg6xgRdCroXlnl2tJJCI,3598
130
135
  ultralytics/models/rtdetr/train.py,sha256=TGawTiBD0SkNaCS8mWc3KbhfiviPuA7GWkvpZ8xVpGM,3875
131
- ultralytics/models/rtdetr/val.py,sha256=A2Um_J7GE6EHDOHCABpcy4ApyBKT_r9IquguCeROQ7I,5594
136
+ ultralytics/models/rtdetr/val.py,sha256=cve1HdfLI-hGM2dkTCDT_cOFfDuzAe2ROkmlJOZC4qw,5595
132
137
  ultralytics/models/sam/__init__.py,sha256=qZwyxJf34UuE5Lu9qfblVXUAvK1fVd66Xyut_ZcTdyc,246
133
138
  ultralytics/models/sam/amg.py,sha256=MsTflp_oyTjQkfgYZCyn_HVpGOw4f-XH7vDSbM9mRRI,8736
134
139
  ultralytics/models/sam/build.py,sha256=Vhml3zBGDcRO-efauNdM0ZlKTV10ADAj_aT823lPJv8,12515
@@ -155,7 +160,7 @@ ultralytics/models/yolo/classify/val.py,sha256=VUYkqGtKnZPig1XE5Qrtqoqm-Y9dDgr5Y
155
160
  ultralytics/models/yolo/detect/__init__.py,sha256=GIRsLYR-kT4JJx7lh4ZZAFGBZj0aebokuU0A7JbjDVA,257
156
161
  ultralytics/models/yolo/detect/predict.py,sha256=_RrKS3h-tRR4uJyTOPSIp4HapxXC-c8Ao9yDeAM835I,2852
157
162
  ultralytics/models/yolo/detect/train.py,sha256=Y2SYjywenBLg8j-r4bC_sWqle1DJGQtDL5O6koeqm9U,6738
158
- ultralytics/models/yolo/detect/val.py,sha256=ZzJ2mEKoiUI8yfgE5nx1zUV-51_78z5s8REUbBr7wU8,15253
163
+ ultralytics/models/yolo/detect/val.py,sha256=V06zB_CSKCdVu8r7e_0mi9h749qe32FdZI7VssZPoDk,15231
159
164
  ultralytics/models/yolo/obb/__init__.py,sha256=tQmpG8wVHsajWkZdmD6cjGohJ4ki64iSXQT8JY_dydo,221
160
165
  ultralytics/models/yolo/obb/predict.py,sha256=SUgLzsxg1O77KxIeCj9IlSiqB9SfIwcoRtNZViqPS2E,1880
161
166
  ultralytics/models/yolo/obb/train.py,sha256=7LJ04dYENfjdt1Jet0Cxh0nyIpmgIUtmz425ZEuZSn8,1550
@@ -172,11 +177,11 @@ ultralytics/models/yolo/world/__init__.py,sha256=nlh8I6t8hMGz_vZg8QSlsUW1R-2eKvn
172
177
  ultralytics/models/yolo/world/train.py,sha256=6PVmQ0G-22OOPPwP_rqSobe2LM6e2b_lC7lJCdW3UIk,3714
173
178
  ultralytics/models/yolo/world/train_world.py,sha256=sCtg4Hnq9Y7amYjlQsdvTHXH8cKSooipvcXu_1Iyb2k,4885
174
179
  ultralytics/nn/__init__.py,sha256=rjociYD9lo_K-d-1s6TbdWklPLjTcEHk7OIlRDJstIE,615
175
- ultralytics/nn/autobackend.py,sha256=Sixewlem0qeGCD18Zihli1H25j1q71957L33kpVfrVE,37365
176
- ultralytics/nn/tasks.py,sha256=VxtdV1OVpIatZBHmHgIRHuJ9FY4sLAN96VSKeeOhNg8,48796
177
- ultralytics/nn/modules/__init__.py,sha256=02dPoAMtpPNQdHXHmvJeWZvJ_WG6eqwH8atLdFWgcuY,2713
180
+ ultralytics/nn/autobackend.py,sha256=uVLr4GymWtz4D86QXzmqs7LwoLjv3VycFLfdKq9rG1U,37377
181
+ ultralytics/nn/tasks.py,sha256=rKyTShwk1RtWBnbHObcSamxXoCUiwzl0K5QFYaw56Hw,49030
182
+ ultralytics/nn/modules/__init__.py,sha256=pVV5SSu6ktOusdVFr1kHK_WOkVLjCLO2W5XaLH-NF8w,2737
178
183
  ultralytics/nn/modules/activation.py,sha256=oRkhMdqlNpIxQb35pTSUeHV-h0VyLl96GOqvIZ4OvT8,923
179
- ultralytics/nn/modules/block.py,sha256=vQqfKIXPmEnxupdzcLDGC5FkjCNIqURfqt4CEEseuXE,43940
184
+ ultralytics/nn/modules/block.py,sha256=z0F0YD07C31VyMdYCeT5KoTgTpazIYW34xH7xgy02J4,52166
180
185
  ultralytics/nn/modules/conv.py,sha256=Wx_tZ56M7iMiNqz3v03oi86C2fatdmdBBDpkrUyzEIU,13132
181
186
  ultralytics/nn/modules/head.py,sha256=RYT31wplr64yDSHLpEZy3fyqg9W8HWlXWKrltwpqGiQ,27962
182
187
  ultralytics/nn/modules/transformer.py,sha256=fdc4xam82Dk8etahkhlc5RHW6dfY00klKj2od4QpdQo,18097
@@ -209,23 +214,23 @@ ultralytics/utils/autobatch.py,sha256=zc81HlAMArPASEbExty0E_zpITF8PVwin7w-xBFFZ5
209
214
  ultralytics/utils/benchmarks.py,sha256=enf8emMQ7OcZa6RokvwrNm4ZfW-XS7SBKp57staqGRM,26751
210
215
  ultralytics/utils/checks.py,sha256=Hz7yLxQHqzYJkL3HmGy6nhHLG2eYjwH5B0BK5GXV9a4,31011
211
216
  ultralytics/utils/dist.py,sha256=fuiJQEnyyL-SighlI3hUlZPaaSreUl4Q39snF6OhQtI,2386
212
- ultralytics/utils/downloads.py,sha256=aUESyJOE2d7mJwbGECHWLR3RF8HVQPSwNH0cfmLGgdI,21999
217
+ ultralytics/utils/downloads.py,sha256=5B1uwRr6Urb5ShZAAni5_tq9a-3o0fSAH3xNCULktFY,22100
213
218
  ultralytics/utils/errors.py,sha256=sXKDEd8ws3L-yIfG_-P_h86axbm37sJNha7kFBJbQMQ,844
214
219
  ultralytics/utils/files.py,sha256=c85NRofjGPMcpkV-yUo1Cwk8ZVquBGCEKlzbSVtXkQA,8252
215
220
  ultralytics/utils/instance.py,sha256=z1oyyvz7wnCSUW_bvi0TbgAL0VxJtAWWXV9KWCoyJ_k,16887
216
221
  ultralytics/utils/loss.py,sha256=paRY8K7R4pcUGJfApVzZx-m_iFzzMbHm5GgiaixfDuU,34179
217
- ultralytics/utils/metrics.py,sha256=6RBMTBbTYa-5nRwTPlbPBX8w9xhpqryZ9tjXsvlRmmM,54184
222
+ ultralytics/utils/metrics.py,sha256=M15LVYzTGgmahkALKwKU3iYDoJIZ3M4824FLqsJ9qeU,54224
218
223
  ultralytics/utils/ops.py,sha256=izQr5GvgzmaD-GXaqxIjLE525JnvgLetOtuq_EOaxM8,34584
219
224
  ultralytics/utils/patches.py,sha256=ARR89dP4YKq7Dd3g2eU-ukbnc2lo3BELukL_1c_d854,3298
220
225
  ultralytics/utils/plotting.py,sha256=hKji4TyxAmCXdSL264VX6dsC2AZYiL9StShI02dcAOM,62990
221
226
  ultralytics/utils/tal.py,sha256=DO-c006HEI62pcrNRpmt4lpqJPC5yu3veRDOvUuExno,18498
222
- ultralytics/utils/torch_utils.py,sha256=wmV-mlv-vQzAwk66TILC_nEFhlGx5udrGlQqTenqnBw,34068
227
+ ultralytics/utils/torch_utils.py,sha256=h1aWTJ71NX5Q_L5ZAj-4Yljht5S_6YEhE2XUm2Apt2M,34039
223
228
  ultralytics/utils/triton.py,sha256=2L1_rZ8xCJEjexRVj75g9YU-u4tQln_DJ5N1Yr_0bSs,4071
224
229
  ultralytics/utils/tuner.py,sha256=gySDBzTlq_klTOq6CGEyUN58HXzPCulObaMBHacXzHo,6294
225
230
  ultralytics/utils/callbacks/__init__.py,sha256=hzL63Rce6VkZhP4Lcim9LKjadixaQG86nKqPhk7IkS0,242
226
231
  ultralytics/utils/callbacks/base.py,sha256=nbeSPjPCOb0M6FsFQ5-uFxXVzUYwmFyE4wFoA66Jpug,5803
227
232
  ultralytics/utils/callbacks/clearml.py,sha256=JH70T1OLPd9GSvC6HnaKkZHTr8fyE9RRcz3ukL62QPw,5961
228
- ultralytics/utils/callbacks/comet.py,sha256=RfijX3oKLdI3zXyleATLmkGfaV--3sBU-V-zyX8-TWU,15607
233
+ ultralytics/utils/callbacks/comet.py,sha256=2fO79Lvl3DqJmM6zX5COU1Xt3IN_GTkzrDQWr9a80Ag,16005
229
234
  ultralytics/utils/callbacks/dvc.py,sha256=4ln4wqU3ZZTK5JfvUmbKfQuIdO6QohDSnFVV4v5Pl8E,5073
230
235
  ultralytics/utils/callbacks/hub.py,sha256=bqU83kBnNZ0U9qjm0I9xvM4DWA0VMxSLxQDgjuTZbKM,3977
231
236
  ultralytics/utils/callbacks/mlflow.py,sha256=3y4xOPLZe1bES0ETWGJYywulTEUGv8I849e2TNms8yI,5420
@@ -233,9 +238,9 @@ ultralytics/utils/callbacks/neptune.py,sha256=waZ_bRu0-qBKujTLuqonC2gx2DkgBuVnfq
233
238
  ultralytics/utils/callbacks/raytune.py,sha256=A_NVWjyPNf2m6iB-mbW7SMpyqM9QBvpbPa-MCMFMtdk,727
234
239
  ultralytics/utils/callbacks/tensorboard.py,sha256=JHOEVlNQ5dYJPd4Z-EvqbXowuK5uA0p8wPgyyaIUQs0,4194
235
240
  ultralytics/utils/callbacks/wb.py,sha256=ayhT2y62AcSOacnawshATU0rWrlSFQ77mrGgBdRl3W4,7086
236
- ultralytics-8.3.77.dist-info/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
237
- ultralytics-8.3.77.dist-info/METADATA,sha256=QxzCR22FPcNMdABaAuLci--YfzTs5Y_7Nz-2vywjMfE,35158
238
- ultralytics-8.3.77.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
239
- ultralytics-8.3.77.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
240
- ultralytics-8.3.77.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
241
- ultralytics-8.3.77.dist-info/RECORD,,
241
+ ultralytics-8.3.79.dist-info/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
242
+ ultralytics-8.3.79.dist-info/METADATA,sha256=Qg8FyAGAg92474miDCJhNUdGM-siQ8ypdixaKpzYyko,35158
243
+ ultralytics-8.3.79.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
244
+ ultralytics-8.3.79.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
245
+ ultralytics-8.3.79.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
246
+ ultralytics-8.3.79.dist-info/RECORD,,