ultralytics 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/conftest.py +7 -24
- tests/test_cli.py +1 -1
- tests/test_cuda.py +7 -2
- tests/test_engine.py +7 -8
- tests/test_exports.py +16 -16
- tests/test_integrations.py +1 -1
- tests/test_solutions.py +11 -11
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -13
- ultralytics/data/annotator.py +6 -5
- ultralytics/data/augment.py +127 -126
- ultralytics/data/base.py +54 -51
- ultralytics/data/build.py +47 -23
- ultralytics/data/converter.py +47 -43
- ultralytics/data/dataset.py +51 -50
- ultralytics/data/loaders.py +77 -44
- ultralytics/data/split.py +22 -9
- ultralytics/data/split_dota.py +63 -39
- ultralytics/data/utils.py +59 -39
- ultralytics/engine/exporter.py +79 -27
- ultralytics/engine/model.py +39 -39
- ultralytics/engine/predictor.py +37 -28
- ultralytics/engine/results.py +187 -157
- ultralytics/engine/trainer.py +36 -19
- ultralytics/engine/tuner.py +12 -9
- ultralytics/engine/validator.py +7 -9
- ultralytics/hub/__init__.py +11 -13
- ultralytics/hub/auth.py +22 -2
- ultralytics/hub/google/__init__.py +19 -19
- ultralytics/hub/session.py +37 -51
- ultralytics/hub/utils.py +19 -5
- ultralytics/models/fastsam/model.py +30 -12
- ultralytics/models/fastsam/predict.py +5 -6
- ultralytics/models/fastsam/utils.py +3 -3
- ultralytics/models/fastsam/val.py +10 -6
- ultralytics/models/nas/model.py +9 -5
- ultralytics/models/nas/predict.py +6 -6
- ultralytics/models/nas/val.py +3 -3
- ultralytics/models/rtdetr/model.py +7 -6
- ultralytics/models/rtdetr/predict.py +14 -7
- ultralytics/models/rtdetr/train.py +10 -4
- ultralytics/models/rtdetr/val.py +36 -9
- ultralytics/models/sam/amg.py +30 -12
- ultralytics/models/sam/build.py +22 -22
- ultralytics/models/sam/model.py +10 -9
- ultralytics/models/sam/modules/blocks.py +76 -80
- ultralytics/models/sam/modules/decoders.py +6 -8
- ultralytics/models/sam/modules/encoders.py +23 -26
- ultralytics/models/sam/modules/memory_attention.py +13 -1
- ultralytics/models/sam/modules/sam.py +57 -26
- ultralytics/models/sam/modules/tiny_encoder.py +232 -237
- ultralytics/models/sam/modules/transformer.py +13 -13
- ultralytics/models/sam/modules/utils.py +11 -19
- ultralytics/models/sam/predict.py +114 -101
- ultralytics/models/utils/loss.py +98 -77
- ultralytics/models/utils/ops.py +116 -67
- ultralytics/models/yolo/classify/predict.py +5 -5
- ultralytics/models/yolo/classify/train.py +32 -28
- ultralytics/models/yolo/classify/val.py +7 -8
- ultralytics/models/yolo/detect/predict.py +1 -0
- ultralytics/models/yolo/detect/train.py +15 -14
- ultralytics/models/yolo/detect/val.py +37 -36
- ultralytics/models/yolo/model.py +106 -23
- ultralytics/models/yolo/obb/predict.py +3 -4
- ultralytics/models/yolo/obb/train.py +14 -6
- ultralytics/models/yolo/obb/val.py +29 -23
- ultralytics/models/yolo/pose/predict.py +9 -8
- ultralytics/models/yolo/pose/train.py +24 -16
- ultralytics/models/yolo/pose/val.py +44 -26
- ultralytics/models/yolo/segment/predict.py +5 -5
- ultralytics/models/yolo/segment/train.py +11 -7
- ultralytics/models/yolo/segment/val.py +2 -2
- ultralytics/models/yolo/world/train.py +33 -23
- ultralytics/models/yolo/world/train_world.py +11 -3
- ultralytics/models/yolo/yoloe/predict.py +11 -11
- ultralytics/models/yolo/yoloe/train.py +73 -21
- ultralytics/models/yolo/yoloe/train_seg.py +10 -7
- ultralytics/models/yolo/yoloe/val.py +42 -18
- ultralytics/nn/autobackend.py +59 -15
- ultralytics/nn/modules/__init__.py +4 -4
- ultralytics/nn/modules/activation.py +4 -1
- ultralytics/nn/modules/block.py +178 -111
- ultralytics/nn/modules/conv.py +6 -5
- ultralytics/nn/modules/head.py +469 -121
- ultralytics/nn/modules/transformer.py +147 -58
- ultralytics/nn/tasks.py +227 -20
- ultralytics/nn/text_model.py +30 -33
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +7 -4
- ultralytics/solutions/config.py +10 -10
- ultralytics/solutions/distance_calculation.py +11 -10
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +6 -3
- ultralytics/solutions/object_blurrer.py +3 -3
- ultralytics/solutions/object_counter.py +15 -7
- ultralytics/solutions/object_cropper.py +3 -2
- ultralytics/solutions/parking_management.py +29 -28
- ultralytics/solutions/queue_management.py +6 -6
- ultralytics/solutions/region_counter.py +10 -3
- ultralytics/solutions/security_alarm.py +3 -3
- ultralytics/solutions/similarity_search.py +85 -24
- ultralytics/solutions/solutions.py +184 -75
- ultralytics/solutions/speed_estimation.py +28 -22
- ultralytics/solutions/streamlit_inference.py +17 -12
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/trackers/basetrack.py +16 -23
- ultralytics/trackers/bot_sort.py +30 -20
- ultralytics/trackers/byte_tracker.py +70 -64
- ultralytics/trackers/track.py +4 -8
- ultralytics/trackers/utils/gmc.py +31 -58
- ultralytics/trackers/utils/kalman_filter.py +37 -37
- ultralytics/trackers/utils/matching.py +1 -1
- ultralytics/utils/__init__.py +105 -89
- ultralytics/utils/autobatch.py +16 -3
- ultralytics/utils/autodevice.py +54 -24
- ultralytics/utils/benchmarks.py +42 -28
- ultralytics/utils/callbacks/base.py +3 -3
- ultralytics/utils/callbacks/clearml.py +9 -9
- ultralytics/utils/callbacks/comet.py +67 -25
- ultralytics/utils/callbacks/dvc.py +7 -10
- ultralytics/utils/callbacks/mlflow.py +2 -5
- ultralytics/utils/callbacks/neptune.py +7 -13
- ultralytics/utils/callbacks/raytune.py +1 -1
- ultralytics/utils/callbacks/tensorboard.py +5 -6
- ultralytics/utils/callbacks/wb.py +14 -14
- ultralytics/utils/checks.py +14 -13
- ultralytics/utils/dist.py +5 -5
- ultralytics/utils/downloads.py +94 -67
- ultralytics/utils/errors.py +5 -5
- ultralytics/utils/export.py +61 -47
- ultralytics/utils/files.py +23 -22
- ultralytics/utils/instance.py +48 -52
- ultralytics/utils/loss.py +78 -40
- ultralytics/utils/metrics.py +186 -130
- ultralytics/utils/ops.py +186 -190
- ultralytics/utils/patches.py +15 -17
- ultralytics/utils/plotting.py +71 -27
- ultralytics/utils/tal.py +21 -15
- ultralytics/utils/torch_utils.py +53 -50
- ultralytics/utils/triton.py +5 -4
- ultralytics/utils/tuner.py +5 -5
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/METADATA +1 -1
- ultralytics-8.3.144.dist-info/RECORD +272 -0
- ultralytics-8.3.143.dist-info/RECORD +0 -272
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/top_level.txt +0 -0
ultralytics/nn/modules/block.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Block modules."""
|
3
3
|
|
4
|
+
from typing import List, Optional, Tuple
|
5
|
+
|
4
6
|
import torch
|
5
7
|
import torch.nn as nn
|
6
8
|
import torch.nn.functional as F
|
@@ -60,15 +62,20 @@ class DFL(nn.Module):
|
|
60
62
|
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
|
61
63
|
"""
|
62
64
|
|
63
|
-
def __init__(self, c1=16):
|
64
|
-
"""
|
65
|
+
def __init__(self, c1: int = 16):
|
66
|
+
"""
|
67
|
+
Initialize a convolutional layer with a given number of input channels.
|
68
|
+
|
69
|
+
Args:
|
70
|
+
c1 (int): Number of input channels.
|
71
|
+
"""
|
65
72
|
super().__init__()
|
66
73
|
self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
|
67
74
|
x = torch.arange(c1, dtype=torch.float)
|
68
75
|
self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
|
69
76
|
self.c1 = c1
|
70
77
|
|
71
|
-
def forward(self, x):
|
78
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
72
79
|
"""Apply the DFL module to input tensor and return transformed output."""
|
73
80
|
b, _, a = x.shape # batch, channels, anchors
|
74
81
|
return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
|
@@ -78,7 +85,7 @@ class DFL(nn.Module):
|
|
78
85
|
class Proto(nn.Module):
|
79
86
|
"""Ultralytics YOLO models mask Proto module for segmentation models."""
|
80
87
|
|
81
|
-
def __init__(self, c1, c_=256, c2=32):
|
88
|
+
def __init__(self, c1: int, c_: int = 256, c2: int = 32):
|
82
89
|
"""
|
83
90
|
Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
|
84
91
|
|
@@ -93,7 +100,7 @@ class Proto(nn.Module):
|
|
93
100
|
self.cv2 = Conv(c_, c_, k=3)
|
94
101
|
self.cv3 = Conv(c_, c2)
|
95
102
|
|
96
|
-
def forward(self, x):
|
103
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
97
104
|
"""Perform a forward pass through layers using an upsampled input image."""
|
98
105
|
return self.cv3(self.cv2(self.upsample(self.cv1(x))))
|
99
106
|
|
@@ -105,7 +112,7 @@ class HGStem(nn.Module):
|
|
105
112
|
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
106
113
|
"""
|
107
114
|
|
108
|
-
def __init__(self, c1, cm, c2):
|
115
|
+
def __init__(self, c1: int, cm: int, c2: int):
|
109
116
|
"""
|
110
117
|
Initialize the StemBlock of PPHGNetV2.
|
111
118
|
|
@@ -122,7 +129,7 @@ class HGStem(nn.Module):
|
|
122
129
|
self.stem4 = Conv(cm, c2, 1, 1, act=nn.ReLU())
|
123
130
|
self.pool = nn.MaxPool2d(kernel_size=2, stride=1, padding=0, ceil_mode=True)
|
124
131
|
|
125
|
-
def forward(self, x):
|
132
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
126
133
|
"""Forward pass of a PPHGNetV2 backbone layer."""
|
127
134
|
x = self.stem1(x)
|
128
135
|
x = F.pad(x, [0, 1, 0, 1])
|
@@ -143,7 +150,17 @@ class HGBlock(nn.Module):
|
|
143
150
|
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
144
151
|
"""
|
145
152
|
|
146
|
-
def __init__(
|
153
|
+
def __init__(
|
154
|
+
self,
|
155
|
+
c1: int,
|
156
|
+
cm: int,
|
157
|
+
c2: int,
|
158
|
+
k: int = 3,
|
159
|
+
n: int = 6,
|
160
|
+
lightconv: bool = False,
|
161
|
+
shortcut: bool = False,
|
162
|
+
act: nn.Module = nn.ReLU(),
|
163
|
+
):
|
147
164
|
"""
|
148
165
|
Initialize HGBlock with specified parameters.
|
149
166
|
|
@@ -164,7 +181,7 @@ class HGBlock(nn.Module):
|
|
164
181
|
self.ec = Conv(c2 // 2, c2, 1, 1, act=act) # excitation conv
|
165
182
|
self.add = shortcut and c1 == c2
|
166
183
|
|
167
|
-
def forward(self, x):
|
184
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
168
185
|
"""Forward pass of a PPHGNetV2 backbone layer."""
|
169
186
|
y = [x]
|
170
187
|
y.extend(m(y[-1]) for m in self.m)
|
@@ -175,14 +192,14 @@ class HGBlock(nn.Module):
|
|
175
192
|
class SPP(nn.Module):
|
176
193
|
"""Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
|
177
194
|
|
178
|
-
def __init__(self, c1, c2, k=(5, 9, 13)):
|
195
|
+
def __init__(self, c1: int, c2: int, k: Tuple[int, ...] = (5, 9, 13)):
|
179
196
|
"""
|
180
197
|
Initialize the SPP layer with input/output channels and pooling kernel sizes.
|
181
198
|
|
182
199
|
Args:
|
183
200
|
c1 (int): Input channels.
|
184
201
|
c2 (int): Output channels.
|
185
|
-
k (
|
202
|
+
k (tuple): Kernel sizes for max pooling.
|
186
203
|
"""
|
187
204
|
super().__init__()
|
188
205
|
c_ = c1 // 2 # hidden channels
|
@@ -190,7 +207,7 @@ class SPP(nn.Module):
|
|
190
207
|
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
191
208
|
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
192
209
|
|
193
|
-
def forward(self, x):
|
210
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
194
211
|
"""Forward pass of the SPP layer, performing spatial pyramid pooling."""
|
195
212
|
x = self.cv1(x)
|
196
213
|
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
@@ -199,7 +216,7 @@ class SPP(nn.Module):
|
|
199
216
|
class SPPF(nn.Module):
|
200
217
|
"""Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
|
201
218
|
|
202
|
-
def __init__(self, c1, c2, k=5):
|
219
|
+
def __init__(self, c1: int, c2: int, k: int = 5):
|
203
220
|
"""
|
204
221
|
Initialize the SPPF layer with given input/output channels and kernel size.
|
205
222
|
|
@@ -217,7 +234,7 @@ class SPPF(nn.Module):
|
|
217
234
|
self.cv2 = Conv(c_ * 4, c2, 1, 1)
|
218
235
|
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
219
236
|
|
220
|
-
def forward(self, x):
|
237
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
221
238
|
"""Apply sequential pooling operations to input and return concatenated feature maps."""
|
222
239
|
y = [self.cv1(x)]
|
223
240
|
y.extend(self.m(y[-1]) for _ in range(3))
|
@@ -227,7 +244,7 @@ class SPPF(nn.Module):
|
|
227
244
|
class C1(nn.Module):
|
228
245
|
"""CSP Bottleneck with 1 convolution."""
|
229
246
|
|
230
|
-
def __init__(self, c1, c2, n=1):
|
247
|
+
def __init__(self, c1: int, c2: int, n: int = 1):
|
231
248
|
"""
|
232
249
|
Initialize the CSP Bottleneck with 1 convolution.
|
233
250
|
|
@@ -240,7 +257,7 @@ class C1(nn.Module):
|
|
240
257
|
self.cv1 = Conv(c1, c2, 1, 1)
|
241
258
|
self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))
|
242
259
|
|
243
|
-
def forward(self, x):
|
260
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
244
261
|
"""Apply convolution and residual connection to input tensor."""
|
245
262
|
y = self.cv1(x)
|
246
263
|
return self.m(y) + y
|
@@ -249,7 +266,7 @@ class C1(nn.Module):
|
|
249
266
|
class C2(nn.Module):
|
250
267
|
"""CSP Bottleneck with 2 convolutions."""
|
251
268
|
|
252
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
269
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
253
270
|
"""
|
254
271
|
Initialize a CSP Bottleneck with 2 convolutions.
|
255
272
|
|
@@ -268,7 +285,7 @@ class C2(nn.Module):
|
|
268
285
|
# self.attention = ChannelAttention(2 * self.c) # or SpatialAttention()
|
269
286
|
self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))
|
270
287
|
|
271
|
-
def forward(self, x):
|
288
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
272
289
|
"""Forward pass through the CSP bottleneck with 2 convolutions."""
|
273
290
|
a, b = self.cv1(x).chunk(2, 1)
|
274
291
|
return self.cv2(torch.cat((self.m(a), b), 1))
|
@@ -277,7 +294,7 @@ class C2(nn.Module):
|
|
277
294
|
class C2f(nn.Module):
|
278
295
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
279
296
|
|
280
|
-
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
|
297
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
|
281
298
|
"""
|
282
299
|
Initialize a CSP bottleneck with 2 convolutions.
|
283
300
|
|
@@ -295,13 +312,13 @@ class C2f(nn.Module):
|
|
295
312
|
self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2)
|
296
313
|
self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
|
297
314
|
|
298
|
-
def forward(self, x):
|
315
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
299
316
|
"""Forward pass through C2f layer."""
|
300
317
|
y = list(self.cv1(x).chunk(2, 1))
|
301
318
|
y.extend(m(y[-1]) for m in self.m)
|
302
319
|
return self.cv2(torch.cat(y, 1))
|
303
320
|
|
304
|
-
def forward_split(self, x):
|
321
|
+
def forward_split(self, x: torch.Tensor) -> torch.Tensor:
|
305
322
|
"""Forward pass using split() instead of chunk()."""
|
306
323
|
y = self.cv1(x).split((self.c, self.c), 1)
|
307
324
|
y = [y[0], y[1]]
|
@@ -312,7 +329,7 @@ class C2f(nn.Module):
|
|
312
329
|
class C3(nn.Module):
|
313
330
|
"""CSP Bottleneck with 3 convolutions."""
|
314
331
|
|
315
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
332
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
316
333
|
"""
|
317
334
|
Initialize the CSP Bottleneck with 3 convolutions.
|
318
335
|
|
@@ -331,7 +348,7 @@ class C3(nn.Module):
|
|
331
348
|
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
|
332
349
|
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))
|
333
350
|
|
334
|
-
def forward(self, x):
|
351
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
335
352
|
"""Forward pass through the CSP bottleneck with 3 convolutions."""
|
336
353
|
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
|
337
354
|
|
@@ -339,7 +356,7 @@ class C3(nn.Module):
|
|
339
356
|
class C3x(C3):
|
340
357
|
"""C3 module with cross-convolutions."""
|
341
358
|
|
342
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
359
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
343
360
|
"""
|
344
361
|
Initialize C3 module with cross-convolutions.
|
345
362
|
|
@@ -359,7 +376,7 @@ class C3x(C3):
|
|
359
376
|
class RepC3(nn.Module):
|
360
377
|
"""Rep C3."""
|
361
378
|
|
362
|
-
def __init__(self, c1, c2, n=3, e=1.0):
|
379
|
+
def __init__(self, c1: int, c2: int, n: int = 3, e: float = 1.0):
|
363
380
|
"""
|
364
381
|
Initialize CSP Bottleneck with a single convolution.
|
365
382
|
|
@@ -376,7 +393,7 @@ class RepC3(nn.Module):
|
|
376
393
|
self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)])
|
377
394
|
self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity()
|
378
395
|
|
379
|
-
def forward(self, x):
|
396
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
380
397
|
"""Forward pass of RepC3 module."""
|
381
398
|
return self.cv3(self.m(self.cv1(x)) + self.cv2(x))
|
382
399
|
|
@@ -384,7 +401,7 @@ class RepC3(nn.Module):
|
|
384
401
|
class C3TR(C3):
|
385
402
|
"""C3 module with TransformerBlock()."""
|
386
403
|
|
387
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
404
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
388
405
|
"""
|
389
406
|
Initialize C3 module with TransformerBlock.
|
390
407
|
|
@@ -404,7 +421,7 @@ class C3TR(C3):
|
|
404
421
|
class C3Ghost(C3):
|
405
422
|
"""C3 module with GhostBottleneck()."""
|
406
423
|
|
407
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
424
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
408
425
|
"""
|
409
426
|
Initialize C3 module with GhostBottleneck.
|
410
427
|
|
@@ -424,7 +441,7 @@ class C3Ghost(C3):
|
|
424
441
|
class GhostBottleneck(nn.Module):
|
425
442
|
"""Ghost Bottleneck https://github.com/huawei-noah/Efficient-AI-Backbones."""
|
426
443
|
|
427
|
-
def __init__(self, c1, c2, k=3, s=1):
|
444
|
+
def __init__(self, c1: int, c2: int, k: int = 3, s: int = 1):
|
428
445
|
"""
|
429
446
|
Initialize Ghost Bottleneck module.
|
430
447
|
|
@@ -445,7 +462,7 @@ class GhostBottleneck(nn.Module):
|
|
445
462
|
nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
446
463
|
)
|
447
464
|
|
448
|
-
def forward(self, x):
|
465
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
449
466
|
"""Apply skip connection and concatenation to input tensor."""
|
450
467
|
return self.conv(x) + self.shortcut(x)
|
451
468
|
|
@@ -453,7 +470,9 @@ class GhostBottleneck(nn.Module):
|
|
453
470
|
class Bottleneck(nn.Module):
|
454
471
|
"""Standard bottleneck."""
|
455
472
|
|
456
|
-
def __init__(
|
473
|
+
def __init__(
|
474
|
+
self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: Tuple[int, int] = (3, 3), e: float = 0.5
|
475
|
+
):
|
457
476
|
"""
|
458
477
|
Initialize a standard bottleneck module.
|
459
478
|
|
@@ -462,7 +481,7 @@ class Bottleneck(nn.Module):
|
|
462
481
|
c2 (int): Output channels.
|
463
482
|
shortcut (bool): Whether to use shortcut connection.
|
464
483
|
g (int): Groups for convolutions.
|
465
|
-
k (
|
484
|
+
k (tuple): Kernel sizes for convolutions.
|
466
485
|
e (float): Expansion ratio.
|
467
486
|
"""
|
468
487
|
super().__init__()
|
@@ -471,7 +490,7 @@ class Bottleneck(nn.Module):
|
|
471
490
|
self.cv2 = Conv(c_, c2, k[1], 1, g=g)
|
472
491
|
self.add = shortcut and c1 == c2
|
473
492
|
|
474
|
-
def forward(self, x):
|
493
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
475
494
|
"""Apply bottleneck with optional shortcut connection."""
|
476
495
|
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
477
496
|
|
@@ -479,7 +498,7 @@ class Bottleneck(nn.Module):
|
|
479
498
|
class BottleneckCSP(nn.Module):
|
480
499
|
"""CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
|
481
500
|
|
482
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
501
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
483
502
|
"""
|
484
503
|
Initialize CSP Bottleneck.
|
485
504
|
|
@@ -501,7 +520,7 @@ class BottleneckCSP(nn.Module):
|
|
501
520
|
self.act = nn.SiLU()
|
502
521
|
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
503
522
|
|
504
|
-
def forward(self, x):
|
523
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
505
524
|
"""Apply CSP bottleneck with 3 convolutions."""
|
506
525
|
y1 = self.cv3(self.m(self.cv1(x)))
|
507
526
|
y2 = self.cv2(x)
|
@@ -511,7 +530,7 @@ class BottleneckCSP(nn.Module):
|
|
511
530
|
class ResNetBlock(nn.Module):
|
512
531
|
"""ResNet block with standard convolution layers."""
|
513
532
|
|
514
|
-
def __init__(self, c1, c2, s=1, e=4):
|
533
|
+
def __init__(self, c1: int, c2: int, s: int = 1, e: int = 4):
|
515
534
|
"""
|
516
535
|
Initialize ResNet block.
|
517
536
|
|
@@ -528,7 +547,7 @@ class ResNetBlock(nn.Module):
|
|
528
547
|
self.cv3 = Conv(c2, c3, k=1, act=False)
|
529
548
|
self.shortcut = nn.Sequential(Conv(c1, c3, k=1, s=s, act=False)) if s != 1 or c1 != c3 else nn.Identity()
|
530
549
|
|
531
|
-
def forward(self, x):
|
550
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
532
551
|
"""Forward pass through the ResNet block."""
|
533
552
|
return F.relu(self.cv3(self.cv2(self.cv1(x))) + self.shortcut(x))
|
534
553
|
|
@@ -536,7 +555,7 @@ class ResNetBlock(nn.Module):
|
|
536
555
|
class ResNetLayer(nn.Module):
|
537
556
|
"""ResNet layer with multiple ResNet blocks."""
|
538
557
|
|
539
|
-
def __init__(self, c1, c2, s=1, is_first=False, n=1, e=4):
|
558
|
+
def __init__(self, c1: int, c2: int, s: int = 1, is_first: bool = False, n: int = 1, e: int = 4):
|
540
559
|
"""
|
541
560
|
Initialize ResNet layer.
|
542
561
|
|
@@ -560,7 +579,7 @@ class ResNetLayer(nn.Module):
|
|
560
579
|
blocks.extend([ResNetBlock(e * c2, c2, 1, e=e) for _ in range(n - 1)])
|
561
580
|
self.layer = nn.Sequential(*blocks)
|
562
581
|
|
563
|
-
def forward(self, x):
|
582
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
564
583
|
"""Forward pass through the ResNet layer."""
|
565
584
|
return self.layer(x)
|
566
585
|
|
@@ -568,7 +587,7 @@ class ResNetLayer(nn.Module):
|
|
568
587
|
class MaxSigmoidAttnBlock(nn.Module):
|
569
588
|
"""Max Sigmoid attention block."""
|
570
589
|
|
571
|
-
def __init__(self, c1, c2, nh=1, ec=128, gc=512, scale=False):
|
590
|
+
def __init__(self, c1: int, c2: int, nh: int = 1, ec: int = 128, gc: int = 512, scale: bool = False):
|
572
591
|
"""
|
573
592
|
Initialize MaxSigmoidAttnBlock.
|
574
593
|
|
@@ -589,7 +608,7 @@ class MaxSigmoidAttnBlock(nn.Module):
|
|
589
608
|
self.proj_conv = Conv(c1, c2, k=3, s=1, act=False)
|
590
609
|
self.scale = nn.Parameter(torch.ones(1, nh, 1, 1)) if scale else 1.0
|
591
610
|
|
592
|
-
def forward(self, x, guide):
|
611
|
+
def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
|
593
612
|
"""
|
594
613
|
Forward pass of MaxSigmoidAttnBlock.
|
595
614
|
|
@@ -622,7 +641,18 @@ class MaxSigmoidAttnBlock(nn.Module):
|
|
622
641
|
class C2fAttn(nn.Module):
|
623
642
|
"""C2f module with an additional attn module."""
|
624
643
|
|
625
|
-
def __init__(
|
644
|
+
def __init__(
|
645
|
+
self,
|
646
|
+
c1: int,
|
647
|
+
c2: int,
|
648
|
+
n: int = 1,
|
649
|
+
ec: int = 128,
|
650
|
+
nh: int = 1,
|
651
|
+
gc: int = 512,
|
652
|
+
shortcut: bool = False,
|
653
|
+
g: int = 1,
|
654
|
+
e: float = 0.5,
|
655
|
+
):
|
626
656
|
"""
|
627
657
|
Initialize C2f module with attention mechanism.
|
628
658
|
|
@@ -644,7 +674,7 @@ class C2fAttn(nn.Module):
|
|
644
674
|
self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
|
645
675
|
self.attn = MaxSigmoidAttnBlock(self.c, self.c, gc=gc, ec=ec, nh=nh)
|
646
676
|
|
647
|
-
def forward(self, x, guide):
|
677
|
+
def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
|
648
678
|
"""
|
649
679
|
Forward pass through C2f layer with attention.
|
650
680
|
|
@@ -660,7 +690,7 @@ class C2fAttn(nn.Module):
|
|
660
690
|
y.append(self.attn(y[-1], guide))
|
661
691
|
return self.cv2(torch.cat(y, 1))
|
662
692
|
|
663
|
-
def forward_split(self, x, guide):
|
693
|
+
def forward_split(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
|
664
694
|
"""
|
665
695
|
Forward pass using split() instead of chunk().
|
666
696
|
|
@@ -680,7 +710,9 @@ class C2fAttn(nn.Module):
|
|
680
710
|
class ImagePoolingAttn(nn.Module):
|
681
711
|
"""ImagePoolingAttn: Enhance the text embeddings with image-aware information."""
|
682
712
|
|
683
|
-
def __init__(
|
713
|
+
def __init__(
|
714
|
+
self, ec: int = 256, ch: Tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
|
715
|
+
):
|
684
716
|
"""
|
685
717
|
Initialize ImagePoolingAttn module.
|
686
718
|
|
@@ -708,7 +740,7 @@ class ImagePoolingAttn(nn.Module):
|
|
708
740
|
self.hc = ec // nh
|
709
741
|
self.k = k
|
710
742
|
|
711
|
-
def forward(self, x, text):
|
743
|
+
def forward(self, x: List[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
|
712
744
|
"""
|
713
745
|
Forward pass of ImagePoolingAttn.
|
714
746
|
|
@@ -752,7 +784,7 @@ class ContrastiveHead(nn.Module):
|
|
752
784
|
self.bias = nn.Parameter(torch.tensor([-10.0]))
|
753
785
|
self.logit_scale = nn.Parameter(torch.ones([]) * torch.tensor(1 / 0.07).log())
|
754
786
|
|
755
|
-
def forward(self, x, w):
|
787
|
+
def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
|
756
788
|
"""
|
757
789
|
Forward function of contrastive learning.
|
758
790
|
|
@@ -798,15 +830,11 @@ class BNContrastiveHead(nn.Module):
|
|
798
830
|
del self.logit_scale
|
799
831
|
self.forward = self.forward_fuse
|
800
832
|
|
801
|
-
def forward_fuse(self, x, w):
|
802
|
-
"""
|
803
|
-
Passes input out unchanged.
|
804
|
-
|
805
|
-
TODO: Update or remove?
|
806
|
-
"""
|
833
|
+
def forward_fuse(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
|
834
|
+
"""Passes input out unchanged."""
|
807
835
|
return x
|
808
836
|
|
809
|
-
def forward(self, x, w):
|
837
|
+
def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
|
810
838
|
"""
|
811
839
|
Forward function of contrastive learning with batch normalization.
|
812
840
|
|
@@ -827,7 +855,9 @@ class BNContrastiveHead(nn.Module):
|
|
827
855
|
class RepBottleneck(Bottleneck):
|
828
856
|
"""Rep bottleneck."""
|
829
857
|
|
830
|
-
def __init__(
|
858
|
+
def __init__(
|
859
|
+
self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: Tuple[int, int] = (3, 3), e: float = 0.5
|
860
|
+
):
|
831
861
|
"""
|
832
862
|
Initialize RepBottleneck.
|
833
863
|
|
@@ -836,7 +866,7 @@ class RepBottleneck(Bottleneck):
|
|
836
866
|
c2 (int): Output channels.
|
837
867
|
shortcut (bool): Whether to use shortcut connection.
|
838
868
|
g (int): Groups for convolutions.
|
839
|
-
k (
|
869
|
+
k (tuple): Kernel sizes for convolutions.
|
840
870
|
e (float): Expansion ratio.
|
841
871
|
"""
|
842
872
|
super().__init__(c1, c2, shortcut, g, k, e)
|
@@ -847,7 +877,7 @@ class RepBottleneck(Bottleneck):
|
|
847
877
|
class RepCSP(C3):
|
848
878
|
"""Repeatable Cross Stage Partial Network (RepCSP) module for efficient feature extraction."""
|
849
879
|
|
850
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
880
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
851
881
|
"""
|
852
882
|
Initialize RepCSP layer.
|
853
883
|
|
@@ -867,7 +897,7 @@ class RepCSP(C3):
|
|
867
897
|
class RepNCSPELAN4(nn.Module):
|
868
898
|
"""CSP-ELAN."""
|
869
899
|
|
870
|
-
def __init__(self, c1, c2, c3, c4, n=1):
|
900
|
+
def __init__(self, c1: int, c2: int, c3: int, c4: int, n: int = 1):
|
871
901
|
"""
|
872
902
|
Initialize CSP-ELAN layer.
|
873
903
|
|
@@ -885,13 +915,13 @@ class RepNCSPELAN4(nn.Module):
|
|
885
915
|
self.cv3 = nn.Sequential(RepCSP(c4, c4, n), Conv(c4, c4, 3, 1))
|
886
916
|
self.cv4 = Conv(c3 + (2 * c4), c2, 1, 1)
|
887
917
|
|
888
|
-
def forward(self, x):
|
918
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
889
919
|
"""Forward pass through RepNCSPELAN4 layer."""
|
890
920
|
y = list(self.cv1(x).chunk(2, 1))
|
891
921
|
y.extend((m(y[-1])) for m in [self.cv2, self.cv3])
|
892
922
|
return self.cv4(torch.cat(y, 1))
|
893
923
|
|
894
|
-
def forward_split(self, x):
|
924
|
+
def forward_split(self, x: torch.Tensor) -> torch.Tensor:
|
895
925
|
"""Forward pass using split() instead of chunk()."""
|
896
926
|
y = list(self.cv1(x).split((self.c, self.c), 1))
|
897
927
|
y.extend(m(y[-1]) for m in [self.cv2, self.cv3])
|
@@ -901,7 +931,7 @@ class RepNCSPELAN4(nn.Module):
|
|
901
931
|
class ELAN1(RepNCSPELAN4):
|
902
932
|
"""ELAN1 module with 4 convolutions."""
|
903
933
|
|
904
|
-
def __init__(self, c1, c2, c3, c4):
|
934
|
+
def __init__(self, c1: int, c2: int, c3: int, c4: int):
|
905
935
|
"""
|
906
936
|
Initialize ELAN1 layer.
|
907
937
|
|
@@ -922,7 +952,7 @@ class ELAN1(RepNCSPELAN4):
|
|
922
952
|
class AConv(nn.Module):
|
923
953
|
"""AConv."""
|
924
954
|
|
925
|
-
def __init__(self, c1, c2):
|
955
|
+
def __init__(self, c1: int, c2: int):
|
926
956
|
"""
|
927
957
|
Initialize AConv module.
|
928
958
|
|
@@ -933,7 +963,7 @@ class AConv(nn.Module):
|
|
933
963
|
super().__init__()
|
934
964
|
self.cv1 = Conv(c1, c2, 3, 2, 1)
|
935
965
|
|
936
|
-
def forward(self, x):
|
966
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
937
967
|
"""Forward pass through AConv layer."""
|
938
968
|
x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
|
939
969
|
return self.cv1(x)
|
@@ -942,7 +972,7 @@ class AConv(nn.Module):
|
|
942
972
|
class ADown(nn.Module):
|
943
973
|
"""ADown."""
|
944
974
|
|
945
|
-
def __init__(self, c1, c2):
|
975
|
+
def __init__(self, c1: int, c2: int):
|
946
976
|
"""
|
947
977
|
Initialize ADown module.
|
948
978
|
|
@@ -955,7 +985,7 @@ class ADown(nn.Module):
|
|
955
985
|
self.cv1 = Conv(c1 // 2, self.c, 3, 2, 1)
|
956
986
|
self.cv2 = Conv(c1 // 2, self.c, 1, 1, 0)
|
957
987
|
|
958
|
-
def forward(self, x):
|
988
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
959
989
|
"""Forward pass through ADown layer."""
|
960
990
|
x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
|
961
991
|
x1, x2 = x.chunk(2, 1)
|
@@ -968,7 +998,7 @@ class ADown(nn.Module):
|
|
968
998
|
class SPPELAN(nn.Module):
|
969
999
|
"""SPP-ELAN."""
|
970
1000
|
|
971
|
-
def __init__(self, c1, c2, c3, k=5):
|
1001
|
+
def __init__(self, c1: int, c2: int, c3: int, k: int = 5):
|
972
1002
|
"""
|
973
1003
|
Initialize SPP-ELAN block.
|
974
1004
|
|
@@ -986,7 +1016,7 @@ class SPPELAN(nn.Module):
|
|
986
1016
|
self.cv4 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
987
1017
|
self.cv5 = Conv(4 * c3, c2, 1, 1)
|
988
1018
|
|
989
|
-
def forward(self, x):
|
1019
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
990
1020
|
"""Forward pass through SPPELAN layer."""
|
991
1021
|
y = [self.cv1(x)]
|
992
1022
|
y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4])
|
@@ -996,7 +1026,7 @@ class SPPELAN(nn.Module):
|
|
996
1026
|
class CBLinear(nn.Module):
|
997
1027
|
"""CBLinear."""
|
998
1028
|
|
999
|
-
def __init__(self, c1, c2s, k=1, s=1, p=None, g=1):
|
1029
|
+
def __init__(self, c1: int, c2s: List[int], k: int = 1, s: int = 1, p: Optional[int] = None, g: int = 1):
|
1000
1030
|
"""
|
1001
1031
|
Initialize CBLinear module.
|
1002
1032
|
|
@@ -1012,7 +1042,7 @@ class CBLinear(nn.Module):
|
|
1012
1042
|
self.c2s = c2s
|
1013
1043
|
self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)
|
1014
1044
|
|
1015
|
-
def forward(self, x):
|
1045
|
+
def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
|
1016
1046
|
"""Forward pass through CBLinear layer."""
|
1017
1047
|
return self.conv(x).split(self.c2s, dim=1)
|
1018
1048
|
|
@@ -1020,7 +1050,7 @@ class CBLinear(nn.Module):
|
|
1020
1050
|
class CBFuse(nn.Module):
|
1021
1051
|
"""CBFuse."""
|
1022
1052
|
|
1023
|
-
def __init__(self, idx):
|
1053
|
+
def __init__(self, idx: List[int]):
|
1024
1054
|
"""
|
1025
1055
|
Initialize CBFuse module.
|
1026
1056
|
|
@@ -1030,7 +1060,7 @@ class CBFuse(nn.Module):
|
|
1030
1060
|
super().__init__()
|
1031
1061
|
self.idx = idx
|
1032
1062
|
|
1033
|
-
def forward(self, xs):
|
1063
|
+
def forward(self, xs: List[torch.Tensor]) -> torch.Tensor:
|
1034
1064
|
"""
|
1035
1065
|
Forward pass through CBFuse layer.
|
1036
1066
|
|
@@ -1048,7 +1078,7 @@ class CBFuse(nn.Module):
|
|
1048
1078
|
class C3f(nn.Module):
|
1049
1079
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
1050
1080
|
|
1051
|
-
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
|
1081
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
|
1052
1082
|
"""
|
1053
1083
|
Initialize CSP bottleneck layer with two convolutions.
|
1054
1084
|
|
@@ -1067,7 +1097,7 @@ class C3f(nn.Module):
|
|
1067
1097
|
self.cv3 = Conv((2 + n) * c_, c2, 1) # optional act=FReLU(c2)
|
1068
1098
|
self.m = nn.ModuleList(Bottleneck(c_, c_, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
|
1069
1099
|
|
1070
|
-
def forward(self, x):
|
1100
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1071
1101
|
"""Forward pass through C3f layer."""
|
1072
1102
|
y = [self.cv2(x), self.cv1(x)]
|
1073
1103
|
y.extend(m(y[-1]) for m in self.m)
|
@@ -1077,7 +1107,9 @@ class C3f(nn.Module):
|
|
1077
1107
|
class C3k2(C2f):
|
1078
1108
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
1079
1109
|
|
1080
|
-
def __init__(
|
1110
|
+
def __init__(
|
1111
|
+
self, c1: int, c2: int, n: int = 1, c3k: bool = False, e: float = 0.5, g: int = 1, shortcut: bool = True
|
1112
|
+
):
|
1081
1113
|
"""
|
1082
1114
|
Initialize C3k2 module.
|
1083
1115
|
|
@@ -1099,7 +1131,7 @@ class C3k2(C2f):
|
|
1099
1131
|
class C3k(C3):
|
1100
1132
|
"""C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks."""
|
1101
1133
|
|
1102
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, k=3):
|
1134
|
+
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5, k: int = 3):
|
1103
1135
|
"""
|
1104
1136
|
Initialize C3k module.
|
1105
1137
|
|
@@ -1121,7 +1153,7 @@ class C3k(C3):
|
|
1121
1153
|
class RepVGGDW(torch.nn.Module):
|
1122
1154
|
"""RepVGGDW is a class that represents a depth wise separable convolutional block in RepVGG architecture."""
|
1123
1155
|
|
1124
|
-
def __init__(self, ed) -> None:
|
1156
|
+
def __init__(self, ed: int) -> None:
|
1125
1157
|
"""
|
1126
1158
|
Initialize RepVGGDW module.
|
1127
1159
|
|
@@ -1134,7 +1166,7 @@ class RepVGGDW(torch.nn.Module):
|
|
1134
1166
|
self.dim = ed
|
1135
1167
|
self.act = nn.SiLU()
|
1136
1168
|
|
1137
|
-
def forward(self, x):
|
1169
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1138
1170
|
"""
|
1139
1171
|
Perform a forward pass of the RepVGGDW block.
|
1140
1172
|
|
@@ -1146,7 +1178,7 @@ class RepVGGDW(torch.nn.Module):
|
|
1146
1178
|
"""
|
1147
1179
|
return self.act(self.conv(x) + self.conv1(x))
|
1148
1180
|
|
1149
|
-
def forward_fuse(self, x):
|
1181
|
+
def forward_fuse(self, x: torch.Tensor) -> torch.Tensor:
|
1150
1182
|
"""
|
1151
1183
|
Perform a forward pass of the RepVGGDW block without fusing the convolutions.
|
1152
1184
|
|
@@ -1197,7 +1229,7 @@ class CIB(nn.Module):
|
|
1197
1229
|
lk (bool, optional): Whether to use RepVGGDW for the third convolutional layer. Defaults to False.
|
1198
1230
|
"""
|
1199
1231
|
|
1200
|
-
def __init__(self, c1, c2, shortcut=True, e=0.5, lk=False):
|
1232
|
+
def __init__(self, c1: int, c2: int, shortcut: bool = True, e: float = 0.5, lk: bool = False):
|
1201
1233
|
"""
|
1202
1234
|
Initialize the CIB module.
|
1203
1235
|
|
@@ -1220,7 +1252,7 @@ class CIB(nn.Module):
|
|
1220
1252
|
|
1221
1253
|
self.add = shortcut and c1 == c2
|
1222
1254
|
|
1223
|
-
def forward(self, x):
|
1255
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1224
1256
|
"""
|
1225
1257
|
Forward pass of the CIB module.
|
1226
1258
|
|
@@ -1247,7 +1279,9 @@ class C2fCIB(C2f):
|
|
1247
1279
|
e (float, optional): Expansion ratio for CIB modules. Defaults to 0.5.
|
1248
1280
|
"""
|
1249
1281
|
|
1250
|
-
def __init__(
|
1282
|
+
def __init__(
|
1283
|
+
self, c1: int, c2: int, n: int = 1, shortcut: bool = False, lk: bool = False, g: int = 1, e: float = 0.5
|
1284
|
+
):
|
1251
1285
|
"""
|
1252
1286
|
Initialize C2fCIB module.
|
1253
1287
|
|
@@ -1283,7 +1317,7 @@ class Attention(nn.Module):
|
|
1283
1317
|
pe (Conv): Convolutional layer for positional encoding.
|
1284
1318
|
"""
|
1285
1319
|
|
1286
|
-
def __init__(self, dim, num_heads=8, attn_ratio=0.5):
|
1320
|
+
def __init__(self, dim: int, num_heads: int = 8, attn_ratio: float = 0.5):
|
1287
1321
|
"""
|
1288
1322
|
Initialize multi-head attention module.
|
1289
1323
|
|
@@ -1303,7 +1337,7 @@ class Attention(nn.Module):
|
|
1303
1337
|
self.proj = Conv(dim, dim, 1, act=False)
|
1304
1338
|
self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)
|
1305
1339
|
|
1306
|
-
def forward(self, x):
|
1340
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1307
1341
|
"""
|
1308
1342
|
Forward pass of the Attention module.
|
1309
1343
|
|
@@ -1349,7 +1383,7 @@ class PSABlock(nn.Module):
|
|
1349
1383
|
>>> output_tensor = psablock(input_tensor)
|
1350
1384
|
"""
|
1351
1385
|
|
1352
|
-
def __init__(self, c, attn_ratio=0.5, num_heads=4, shortcut=True) -> None:
|
1386
|
+
def __init__(self, c: int, attn_ratio: float = 0.5, num_heads: int = 4, shortcut: bool = True) -> None:
|
1353
1387
|
"""
|
1354
1388
|
Initialize the PSABlock.
|
1355
1389
|
|
@@ -1365,7 +1399,7 @@ class PSABlock(nn.Module):
|
|
1365
1399
|
self.ffn = nn.Sequential(Conv(c, c * 2, 1), Conv(c * 2, c, 1, act=False))
|
1366
1400
|
self.add = shortcut
|
1367
1401
|
|
1368
|
-
def forward(self, x):
|
1402
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1369
1403
|
"""
|
1370
1404
|
Execute a forward pass through PSABlock.
|
1371
1405
|
|
@@ -1404,7 +1438,7 @@ class PSA(nn.Module):
|
|
1404
1438
|
>>> output_tensor = psa.forward(input_tensor)
|
1405
1439
|
"""
|
1406
1440
|
|
1407
|
-
def __init__(self, c1, c2, e=0.5):
|
1441
|
+
def __init__(self, c1: int, c2: int, e: float = 0.5):
|
1408
1442
|
"""
|
1409
1443
|
Initialize PSA module.
|
1410
1444
|
|
@@ -1422,7 +1456,7 @@ class PSA(nn.Module):
|
|
1422
1456
|
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
|
1423
1457
|
self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
|
1424
1458
|
|
1425
|
-
def forward(self, x):
|
1459
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1426
1460
|
"""
|
1427
1461
|
Execute forward pass in PSA module.
|
1428
1462
|
|
@@ -1463,7 +1497,7 @@ class C2PSA(nn.Module):
|
|
1463
1497
|
>>> output_tensor = c2psa(input_tensor)
|
1464
1498
|
"""
|
1465
1499
|
|
1466
|
-
def __init__(self, c1, c2, n=1, e=0.5):
|
1500
|
+
def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
|
1467
1501
|
"""
|
1468
1502
|
Initialize C2PSA module.
|
1469
1503
|
|
@@ -1481,7 +1515,7 @@ class C2PSA(nn.Module):
|
|
1481
1515
|
|
1482
1516
|
self.m = nn.Sequential(*(PSABlock(self.c, attn_ratio=0.5, num_heads=self.c // 64) for _ in range(n)))
|
1483
1517
|
|
1484
|
-
def forward(self, x):
|
1518
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1485
1519
|
"""
|
1486
1520
|
Process the input tensor through a series of PSA blocks.
|
1487
1521
|
|
@@ -1521,7 +1555,7 @@ class C2fPSA(C2f):
|
|
1521
1555
|
>>> print(output.shape)
|
1522
1556
|
"""
|
1523
1557
|
|
1524
|
-
def __init__(self, c1, c2, n=1, e=0.5):
|
1558
|
+
def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
|
1525
1559
|
"""
|
1526
1560
|
Initialize C2fPSA module.
|
1527
1561
|
|
@@ -1560,7 +1594,7 @@ class SCDown(nn.Module):
|
|
1560
1594
|
torch.Size([1, 128, 64, 64])
|
1561
1595
|
"""
|
1562
1596
|
|
1563
|
-
def __init__(self, c1, c2, k, s):
|
1597
|
+
def __init__(self, c1: int, c2: int, k: int, s: int):
|
1564
1598
|
"""
|
1565
1599
|
Initialize SCDown module.
|
1566
1600
|
|
@@ -1574,7 +1608,7 @@ class SCDown(nn.Module):
|
|
1574
1608
|
self.cv1 = Conv(c1, c2, 1, 1)
|
1575
1609
|
self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)
|
1576
1610
|
|
1577
|
-
def forward(self, x):
|
1611
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1578
1612
|
"""
|
1579
1613
|
Apply convolution and downsampling to the input tensor.
|
1580
1614
|
|
@@ -1604,7 +1638,9 @@ class TorchVision(nn.Module):
|
|
1604
1638
|
split (bool, optional): Returns output from intermediate child modules as list. Default is False.
|
1605
1639
|
"""
|
1606
1640
|
|
1607
|
-
def __init__(
|
1641
|
+
def __init__(
|
1642
|
+
self, model: str, weights: str = "DEFAULT", unwrap: bool = True, truncate: int = 2, split: bool = False
|
1643
|
+
):
|
1608
1644
|
"""
|
1609
1645
|
Load the model and weights from torchvision.
|
1610
1646
|
|
@@ -1632,7 +1668,7 @@ class TorchVision(nn.Module):
|
|
1632
1668
|
self.split = False
|
1633
1669
|
self.m.head = self.m.heads = nn.Identity()
|
1634
1670
|
|
1635
|
-
def forward(self, x):
|
1671
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1636
1672
|
"""
|
1637
1673
|
Forward pass through the model.
|
1638
1674
|
|
@@ -1676,14 +1712,14 @@ class AAttn(nn.Module):
|
|
1676
1712
|
torch.Size([1, 256, 32, 32])
|
1677
1713
|
"""
|
1678
1714
|
|
1679
|
-
def __init__(self, dim, num_heads, area=1):
|
1715
|
+
def __init__(self, dim: int, num_heads: int, area: int = 1):
|
1680
1716
|
"""
|
1681
1717
|
Initialize an Area-attention module for YOLO models.
|
1682
1718
|
|
1683
1719
|
Args:
|
1684
1720
|
dim (int): Number of hidden channels.
|
1685
1721
|
num_heads (int): Number of heads into which the attention mechanism is divided.
|
1686
|
-
area (int): Number of areas the feature map is divided
|
1722
|
+
area (int): Number of areas the feature map is divided.
|
1687
1723
|
"""
|
1688
1724
|
super().__init__()
|
1689
1725
|
self.area = area
|
@@ -1696,7 +1732,7 @@ class AAttn(nn.Module):
|
|
1696
1732
|
self.proj = Conv(all_head_dim, dim, 1, act=False)
|
1697
1733
|
self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False)
|
1698
1734
|
|
1699
|
-
def forward(self, x):
|
1735
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1700
1736
|
"""
|
1701
1737
|
Process the input tensor through the area-attention.
|
1702
1738
|
|
@@ -1760,7 +1796,7 @@ class ABlock(nn.Module):
|
|
1760
1796
|
torch.Size([1, 256, 32, 32])
|
1761
1797
|
"""
|
1762
1798
|
|
1763
|
-
def __init__(self, dim, num_heads, mlp_ratio=1.2, area=1):
|
1799
|
+
def __init__(self, dim: int, num_heads: int, mlp_ratio: float = 1.2, area: int = 1):
|
1764
1800
|
"""
|
1765
1801
|
Initialize an Area-attention block module.
|
1766
1802
|
|
@@ -1778,7 +1814,7 @@ class ABlock(nn.Module):
|
|
1778
1814
|
|
1779
1815
|
self.apply(self._init_weights)
|
1780
1816
|
|
1781
|
-
def _init_weights(self, m):
|
1817
|
+
def _init_weights(self, m: nn.Module):
|
1782
1818
|
"""
|
1783
1819
|
Initialize weights using a truncated normal distribution.
|
1784
1820
|
|
@@ -1790,7 +1826,7 @@ class ABlock(nn.Module):
|
|
1790
1826
|
if m.bias is not None:
|
1791
1827
|
nn.init.constant_(m.bias, 0)
|
1792
1828
|
|
1793
|
-
def forward(self, x):
|
1829
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1794
1830
|
"""
|
1795
1831
|
Forward pass through ABlock.
|
1796
1832
|
|
@@ -1828,7 +1864,19 @@ class A2C2f(nn.Module):
|
|
1828
1864
|
torch.Size([1, 512, 32, 32])
|
1829
1865
|
"""
|
1830
1866
|
|
1831
|
-
def __init__(
|
1867
|
+
def __init__(
|
1868
|
+
self,
|
1869
|
+
c1: int,
|
1870
|
+
c2: int,
|
1871
|
+
n: int = 1,
|
1872
|
+
a2: bool = True,
|
1873
|
+
area: int = 1,
|
1874
|
+
residual: bool = False,
|
1875
|
+
mlp_ratio: float = 2.0,
|
1876
|
+
e: float = 0.5,
|
1877
|
+
g: int = 1,
|
1878
|
+
shortcut: bool = True,
|
1879
|
+
):
|
1832
1880
|
"""
|
1833
1881
|
Initialize Area-Attention C2f module.
|
1834
1882
|
|
@@ -1859,7 +1907,7 @@ class A2C2f(nn.Module):
|
|
1859
1907
|
for _ in range(n)
|
1860
1908
|
)
|
1861
1909
|
|
1862
|
-
def forward(self, x):
|
1910
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1863
1911
|
"""
|
1864
1912
|
Forward pass through A2C2f layer.
|
1865
1913
|
|
@@ -1880,13 +1928,20 @@ class A2C2f(nn.Module):
|
|
1880
1928
|
class SwiGLUFFN(nn.Module):
|
1881
1929
|
"""SwiGLU Feed-Forward Network for transformer-based architectures."""
|
1882
1930
|
|
1883
|
-
def __init__(self, gc, ec, e=4) -> None:
|
1884
|
-
"""
|
1931
|
+
def __init__(self, gc: int, ec: int, e: int = 4) -> None:
|
1932
|
+
"""
|
1933
|
+
Initialize SwiGLU FFN with input dimension, output dimension, and expansion factor.
|
1934
|
+
|
1935
|
+
Args:
|
1936
|
+
gc (int): Guide channels.
|
1937
|
+
ec (int): Embedding channels.
|
1938
|
+
e (int): Expansion factor.
|
1939
|
+
"""
|
1885
1940
|
super().__init__()
|
1886
1941
|
self.w12 = nn.Linear(gc, e * ec)
|
1887
1942
|
self.w3 = nn.Linear(e * ec // 2, ec)
|
1888
1943
|
|
1889
|
-
def forward(self, x):
|
1944
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1890
1945
|
"""Apply SwiGLU transformation to input features."""
|
1891
1946
|
x12 = self.w12(x)
|
1892
1947
|
x1, x2 = x12.chunk(2, dim=-1)
|
@@ -1897,8 +1952,13 @@ class SwiGLUFFN(nn.Module):
|
|
1897
1952
|
class Residual(nn.Module):
|
1898
1953
|
"""Residual connection wrapper for neural network modules."""
|
1899
1954
|
|
1900
|
-
def __init__(self, m) -> None:
|
1901
|
-
"""
|
1955
|
+
def __init__(self, m: nn.Module) -> None:
|
1956
|
+
"""
|
1957
|
+
Initialize residual module with the wrapped module.
|
1958
|
+
|
1959
|
+
Args:
|
1960
|
+
m (nn.Module): Module to wrap with residual connection.
|
1961
|
+
"""
|
1902
1962
|
super().__init__()
|
1903
1963
|
self.m = m
|
1904
1964
|
nn.init.zeros_(self.m.w3.bias)
|
@@ -1906,7 +1966,7 @@ class Residual(nn.Module):
|
|
1906
1966
|
# nn.init.constant_(self.m.w3.weight, 1e-6)
|
1907
1967
|
nn.init.zeros_(self.m.w3.weight)
|
1908
1968
|
|
1909
|
-
def forward(self, x):
|
1969
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
1910
1970
|
"""Apply residual connection to input features."""
|
1911
1971
|
return x + self.m(x)
|
1912
1972
|
|
@@ -1914,8 +1974,15 @@ class Residual(nn.Module):
|
|
1914
1974
|
class SAVPE(nn.Module):
|
1915
1975
|
"""Spatial-Aware Visual Prompt Embedding module for feature enhancement."""
|
1916
1976
|
|
1917
|
-
def __init__(self, ch, c3, embed):
|
1918
|
-
"""
|
1977
|
+
def __init__(self, ch: List[int], c3: int, embed: int):
|
1978
|
+
"""
|
1979
|
+
Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
|
1980
|
+
|
1981
|
+
Args:
|
1982
|
+
ch (List[int]): List of input channel dimensions.
|
1983
|
+
c3 (int): Intermediate channels.
|
1984
|
+
embed (int): Embedding dimension.
|
1985
|
+
"""
|
1919
1986
|
super().__init__()
|
1920
1987
|
self.cv1 = nn.ModuleList(
|
1921
1988
|
nn.Sequential(
|
@@ -1935,7 +2002,7 @@ class SAVPE(nn.Module):
|
|
1935
2002
|
self.cv5 = nn.Conv2d(1, self.c, 3, padding=1)
|
1936
2003
|
self.cv6 = nn.Sequential(Conv(2 * self.c, self.c, 3), nn.Conv2d(self.c, self.c, 3, padding=1))
|
1937
2004
|
|
1938
|
-
def forward(self, x, vp):
|
2005
|
+
def forward(self, x: List[torch.Tensor], vp: torch.Tensor) -> torch.Tensor:
|
1939
2006
|
"""Process input features and visual prompts to generate enhanced embeddings."""
|
1940
2007
|
y = [self.cv2[i](xi) for i, xi in enumerate(x)]
|
1941
2008
|
y = self.cv4(torch.cat(y, dim=1))
|