ultralytics 8.2.69__py3-none-any.whl → 8.2.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (30) hide show
  1. ultralytics/__init__.py +3 -2
  2. ultralytics/cfg/__init__.py +4 -0
  3. ultralytics/models/__init__.py +2 -1
  4. ultralytics/models/fastsam/predict.py +1 -0
  5. ultralytics/models/sam/build.py +2 -2
  6. ultralytics/models/sam/model.py +10 -2
  7. ultralytics/models/sam/modules/decoders.py +1 -42
  8. ultralytics/models/sam/modules/encoders.py +3 -1
  9. ultralytics/models/sam/modules/sam.py +5 -7
  10. ultralytics/models/sam/modules/transformer.py +4 -3
  11. ultralytics/models/sam/predict.py +12 -6
  12. ultralytics/models/sam2/__init__.py +6 -0
  13. ultralytics/models/sam2/build.py +156 -0
  14. ultralytics/models/sam2/model.py +97 -0
  15. ultralytics/models/sam2/modules/__init__.py +1 -0
  16. ultralytics/models/sam2/modules/decoders.py +305 -0
  17. ultralytics/models/sam2/modules/encoders.py +332 -0
  18. ultralytics/models/sam2/modules/memory_attention.py +170 -0
  19. ultralytics/models/sam2/modules/sam2.py +804 -0
  20. ultralytics/models/sam2/modules/sam2_blocks.py +715 -0
  21. ultralytics/models/sam2/modules/utils.py +191 -0
  22. ultralytics/models/sam2/predict.py +182 -0
  23. ultralytics/nn/modules/transformer.py +5 -3
  24. ultralytics/utils/torch_utils.py +9 -6
  25. {ultralytics-8.2.69.dist-info → ultralytics-8.2.70.dist-info}/METADATA +1 -1
  26. {ultralytics-8.2.69.dist-info → ultralytics-8.2.70.dist-info}/RECORD +30 -19
  27. {ultralytics-8.2.69.dist-info → ultralytics-8.2.70.dist-info}/LICENSE +0 -0
  28. {ultralytics-8.2.69.dist-info → ultralytics-8.2.70.dist-info}/WHEEL +0 -0
  29. {ultralytics-8.2.69.dist-info → ultralytics-8.2.70.dist-info}/entry_points.txt +0 -0
  30. {ultralytics-8.2.69.dist-info → ultralytics-8.2.70.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,170 @@
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ import copy
4
+ from typing import Optional
5
+
6
+ import torch
7
+ from torch import Tensor, nn
8
+
9
+ from .sam2_blocks import RoPEAttention
10
+
11
+
12
+ class MemoryAttentionLayer(nn.Module):
13
+ """Implements a memory attention layer with self-attention and cross-attention mechanisms for neural networks."""
14
+
15
+ def __init__(
16
+ self,
17
+ d_model: int = 256,
18
+ dim_feedforward: int = 2048,
19
+ dropout: float = 0.1,
20
+ pos_enc_at_attn: bool = False,
21
+ pos_enc_at_cross_attn_keys: bool = True,
22
+ pos_enc_at_cross_attn_queries: bool = False,
23
+ ):
24
+ """Initializes a MemoryAttentionLayer with self-attention, cross-attention, and feedforward components."""
25
+ super().__init__()
26
+ self.d_model = d_model
27
+ self.dim_feedforward = dim_feedforward
28
+ self.dropout_value = dropout
29
+ self.self_attn = RoPEAttention(embedding_dim=256, num_heads=1, downsample_rate=1)
30
+ self.cross_attn_image = RoPEAttention(
31
+ rope_k_repeat=True,
32
+ embedding_dim=256,
33
+ num_heads=1,
34
+ downsample_rate=1,
35
+ kv_in_dim=64,
36
+ )
37
+
38
+ # Implementation of Feedforward model
39
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
40
+ self.dropout = nn.Dropout(dropout)
41
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
42
+
43
+ self.norm1 = nn.LayerNorm(d_model)
44
+ self.norm2 = nn.LayerNorm(d_model)
45
+ self.norm3 = nn.LayerNorm(d_model)
46
+ self.dropout1 = nn.Dropout(dropout)
47
+ self.dropout2 = nn.Dropout(dropout)
48
+ self.dropout3 = nn.Dropout(dropout)
49
+
50
+ self.activation = nn.ReLU()
51
+
52
+ # Where to add pos enc
53
+ self.pos_enc_at_attn = pos_enc_at_attn
54
+ self.pos_enc_at_cross_attn_queries = pos_enc_at_cross_attn_queries
55
+ self.pos_enc_at_cross_attn_keys = pos_enc_at_cross_attn_keys
56
+
57
+ def _forward_sa(self, tgt, query_pos):
58
+ """Performs self-attention on input tensor using positional encoding and RoPE attention mechanism."""
59
+ tgt2 = self.norm1(tgt)
60
+ q = k = tgt2 + query_pos if self.pos_enc_at_attn else tgt2
61
+ tgt2 = self.self_attn(q, k, v=tgt2)
62
+ tgt = tgt + self.dropout1(tgt2)
63
+ return tgt
64
+
65
+ def _forward_ca(self, tgt, memory, query_pos, pos, num_k_exclude_rope=0):
66
+ """Performs cross-attention between target and memory tensors using RoPEAttention mechanism."""
67
+ kwds = {}
68
+ if num_k_exclude_rope > 0:
69
+ assert isinstance(self.cross_attn_image, RoPEAttention)
70
+ kwds = {"num_k_exclude_rope": num_k_exclude_rope}
71
+
72
+ # Cross-Attention
73
+ tgt2 = self.norm2(tgt)
74
+ tgt2 = self.cross_attn_image(
75
+ q=tgt2 + query_pos if self.pos_enc_at_cross_attn_queries else tgt2,
76
+ k=memory + pos if self.pos_enc_at_cross_attn_keys else memory,
77
+ v=memory,
78
+ **kwds,
79
+ )
80
+ tgt = tgt + self.dropout2(tgt2)
81
+ return tgt
82
+
83
+ def forward(
84
+ self,
85
+ tgt,
86
+ memory,
87
+ pos: Optional[Tensor] = None,
88
+ query_pos: Optional[Tensor] = None,
89
+ num_k_exclude_rope: int = 0,
90
+ ) -> torch.Tensor:
91
+ """Performs self-attention, cross-attention, and MLP operations on input tensors for memory-based attention."""
92
+ tgt = self._forward_sa(tgt, query_pos)
93
+ tgt = self._forward_ca(tgt, memory, query_pos, pos, num_k_exclude_rope)
94
+ # MLP
95
+ tgt2 = self.norm3(tgt)
96
+ tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
97
+ tgt = tgt + self.dropout3(tgt2)
98
+ return tgt
99
+
100
+
101
+ class MemoryAttention(nn.Module):
102
+ """Memory attention module for processing sequential data with self and cross-attention mechanisms."""
103
+
104
+ def __init__(
105
+ self,
106
+ d_model: int,
107
+ pos_enc_at_input: bool,
108
+ layer: nn.Module,
109
+ num_layers: int,
110
+ batch_first: bool = True, # Do layers expect batch first input?
111
+ ):
112
+ """Initializes MemoryAttention module with layers and normalization for attention processing."""
113
+ super().__init__()
114
+ self.d_model = d_model
115
+ self.layers = nn.ModuleList([copy.deepcopy(layer) for _ in range(num_layers)])
116
+ self.num_layers = num_layers
117
+ self.norm = nn.LayerNorm(d_model)
118
+ self.pos_enc_at_input = pos_enc_at_input
119
+ self.batch_first = batch_first
120
+
121
+ def forward(
122
+ self,
123
+ curr: torch.Tensor, # self-attention inputs
124
+ memory: torch.Tensor, # cross-attention inputs
125
+ curr_pos: Optional[Tensor] = None, # pos_enc for self-attention inputs
126
+ memory_pos: Optional[Tensor] = None, # pos_enc for cross-attention inputs
127
+ num_obj_ptr_tokens: int = 0, # number of object pointer *tokens*
128
+ ):
129
+ """Applies self-attention and cross-attention to input tensors, processing through multiple layers."""
130
+ if isinstance(curr, list):
131
+ assert isinstance(curr_pos, list)
132
+ assert len(curr) == len(curr_pos) == 1
133
+ curr, curr_pos = (
134
+ curr[0],
135
+ curr_pos[0],
136
+ )
137
+
138
+ assert curr.shape[1] == memory.shape[1], "Batch size must be the same for curr and memory"
139
+
140
+ output = curr
141
+ if self.pos_enc_at_input and curr_pos is not None:
142
+ output = output + 0.1 * curr_pos
143
+
144
+ if self.batch_first:
145
+ # Convert to batch first
146
+ output = output.transpose(0, 1)
147
+ curr_pos = curr_pos.transpose(0, 1)
148
+ memory = memory.transpose(0, 1)
149
+ memory_pos = memory_pos.transpose(0, 1)
150
+
151
+ for layer in self.layers:
152
+ kwds = {}
153
+ if isinstance(layer.cross_attn_image, RoPEAttention):
154
+ kwds = {"num_k_exclude_rope": num_obj_ptr_tokens}
155
+
156
+ output = layer(
157
+ tgt=output,
158
+ memory=memory,
159
+ pos=memory_pos,
160
+ query_pos=curr_pos,
161
+ **kwds,
162
+ )
163
+ normed_output = self.norm(output)
164
+
165
+ if self.batch_first:
166
+ # Convert back to seq first
167
+ normed_output = normed_output.transpose(0, 1)
168
+ curr_pos = curr_pos.transpose(0, 1)
169
+
170
+ return normed_output