PyPI - ultralytics - Versions diffs - 8.1.38__py3-none-any.whl → 8.1.40__py3-none-any.whl - Mend

ultralytics 8.1.38py3-none-any.whl → 8.1.40py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ultralytics might be problematic. Click here for more details.

Files changed (58) hide show

ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +3 -3
ultralytics/cfg/datasets/lvis.yaml +1239 -0
ultralytics/data/__init__.py +18 -2
ultralytics/data/augment.py +124 -3
ultralytics/data/base.py +2 -2
ultralytics/data/build.py +25 -3
ultralytics/data/converter.py +24 -6
ultralytics/data/dataset.py +142 -27
ultralytics/data/loaders.py +11 -8
ultralytics/data/split_dota.py +1 -1
ultralytics/data/utils.py +33 -8
ultralytics/engine/exporter.py +3 -3
ultralytics/engine/model.py +6 -3
ultralytics/engine/results.py +2 -2
ultralytics/engine/trainer.py +59 -55
ultralytics/engine/validator.py +2 -2
ultralytics/hub/utils.py +1 -1
ultralytics/models/fastsam/model.py +1 -1
ultralytics/models/fastsam/prompt.py +4 -5
ultralytics/models/nas/model.py +1 -1
ultralytics/models/sam/model.py +1 -1
ultralytics/models/sam/modules/tiny_encoder.py +1 -1
ultralytics/models/yolo/__init__.py +2 -2
ultralytics/models/yolo/classify/train.py +1 -1
ultralytics/models/yolo/detect/train.py +1 -1
ultralytics/models/yolo/detect/val.py +36 -17
ultralytics/models/yolo/model.py +1 -0
ultralytics/models/yolo/world/__init__.py +5 -0
ultralytics/models/yolo/world/train.py +92 -0
ultralytics/models/yolo/world/train_world.py +108 -0
ultralytics/nn/autobackend.py +5 -5
ultralytics/nn/modules/block.py +4 -2
ultralytics/nn/modules/conv.py +1 -1
ultralytics/nn/modules/head.py +13 -4
ultralytics/nn/tasks.py +30 -14
ultralytics/solutions/ai_gym.py +1 -1
ultralytics/solutions/heatmap.py +85 -47
ultralytics/solutions/object_counter.py +79 -64
ultralytics/trackers/byte_tracker.py +1 -1
ultralytics/trackers/track.py +1 -1
ultralytics/trackers/utils/gmc.py +1 -1
ultralytics/utils/__init__.py +4 -4
ultralytics/utils/benchmarks.py +2 -2
ultralytics/utils/callbacks/comet.py +1 -1
ultralytics/utils/callbacks/mlflow.py +1 -1
ultralytics/utils/checks.py +3 -3
ultralytics/utils/downloads.py +2 -2
ultralytics/utils/loss.py +1 -1
ultralytics/utils/metrics.py +1 -1
ultralytics/utils/plotting.py +36 -22
ultralytics/utils/torch_utils.py +17 -3
{ultralytics-8.1.38.dist-info → ultralytics-8.1.40.dist-info}/METADATA +1 -1
{ultralytics-8.1.38.dist-info → ultralytics-8.1.40.dist-info}/RECORD +58 -54
{ultralytics-8.1.38.dist-info → ultralytics-8.1.40.dist-info}/LICENSE +0 -0
{ultralytics-8.1.38.dist-info → ultralytics-8.1.40.dist-info}/WHEEL +0 -0
{ultralytics-8.1.38.dist-info → ultralytics-8.1.40.dist-info}/entry_points.txt +0 -0
{ultralytics-8.1.38.dist-info → ultralytics-8.1.40.dist-info}/top_level.txt +0 -0

ultralytics/models/yolo/world/train.py ADDED Viewed

@@ -0,0 +1,92 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import itertools
+from ultralytics.data import build_yolo_dataset
+from ultralytics.models import yolo
+from ultralytics.nn.tasks import WorldModel
+from ultralytics.utils import DEFAULT_CFG, RANK, checks
+from ultralytics.utils.torch_utils import de_parallel
+def on_pretrain_routine_end(trainer):
+    """Callback."""
+    if RANK in {-1, 0}:
+        # NOTE: for evaluation
+        names = [name.split("/")[0] for name in list(trainer.test_loader.dataset.data["names"].values())]
+        de_parallel(trainer.ema.ema).set_classes(names, cache_clip_model=False)
+    device = next(trainer.model.parameters()).device
+    trainer.text_model, _ = trainer.clip.load("ViT-B/32", device=device)
+    for p in trainer.text_model.parameters():
+        p.requires_grad_(False)
+class WorldTrainer(yolo.detect.DetectionTrainer):
+    """
+    A class to fine-tune a world model on a close-set dataset.
+    Example:
+        ```python
+        from ultralytics.models.yolo.world import WorldModel
+        args = dict(model='yolov8s-world.pt', data='coco8.yaml', epochs=3)
+        trainer = WorldTrainer(overrides=args)
+        trainer.train()
+        ```
+    """
+    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initialize a WorldTrainer object with given arguments."""
+        if overrides is None:
+            overrides = {}
+        super().__init__(cfg, overrides, _callbacks)
+        # Import and assign clip
+        try:
+            import clip
+        except ImportError:
+            checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
+            import clip
+        self.clip = clip
+    def get_model(self, cfg=None, weights=None, verbose=True):
+        """Return WorldModel initialized with specified config and weights."""
+        # NOTE: This `nc` here is the max number of different text samples in one image, rather than the actual `nc`.
+        # NOTE: Following the official config, nc hard-coded to 80 for now.
+        model = WorldModel(
+            cfg["yaml_file"] if isinstance(cfg, dict) else cfg,
+            ch=3,
+            nc=min(self.data["nc"], 80),
+            verbose=verbose and RANK == -1,
+        )
+        if weights:
+            model.load(weights)
+        self.add_callback("on_pretrain_routine_end", on_pretrain_routine_end)
+        return model
+    def build_dataset(self, img_path, mode="train", batch=None):
+        """
+        Build YOLO Dataset.
+        Args:
+            img_path (str): Path to the folder containing images.
+            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
+            batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
+        """
+        gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
+        return build_yolo_dataset(
+            self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
+        )
+    def preprocess_batch(self, batch):
+        """Preprocesses a batch of images for YOLOWorld training, adjusting formatting and dimensions as needed."""
+        batch = super().preprocess_batch(batch)
+        # NOTE: add text features
+        texts = list(itertools.chain(*batch["texts"]))
+        text_token = self.clip.tokenize(texts).to(batch["img"].device)
+        txt_feats = self.text_model.encode_text(text_token).to(dtype=batch["img"].dtype)  # torch.float32
+        txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
+        batch["txt_feats"] = txt_feats.reshape(len(batch["texts"]), -1, txt_feats.shape[-1])
+        return batch

ultralytics/models/yolo/world/train_world.py ADDED Viewed

@@ -0,0 +1,108 @@
+from ultralytics.data import build_yolo_dataset, build_grounding, YOLOConcatDataset
+from ultralytics.data.utils import check_det_dataset
+from ultralytics.models.yolo.world import WorldTrainer
+from ultralytics.utils.torch_utils import de_parallel
+from ultralytics.utils import DEFAULT_CFG
+class WorldTrainerFromScratch(WorldTrainer):
+    """
+    A class extending the WorldTrainer class for training a world model from scratch on open-set dataset.
+    Example:
+        ```python
+        from ultralytics.models.yolo.world.train_world import WorldTrainerFromScratch
+        from ultralytics import YOLOWorld
+        data = dict(
+            train=dict(
+                yolo_data=["Objects365.yaml"],
+                grounding_data=[
+                    dict(
+                        img_path="../datasets/flickr30k/images",
+                        json_file="../datasets/flickr30k/final_flickr_separateGT_train.json",
+                    ),
+                    dict(
+                        img_path="../datasets/GQA/images",
+                        json_file="../datasets/GQA/final_mixed_train_no_coco.json",
+                    ),
+                ],
+            ),
+            val=dict(yolo_data=["lvis.yaml"]),
+        )
+        model = YOLOWorld("yolov8s-worldv2.yaml")
+        model.train(data=data, trainer=WorldTrainerFromScratch)
+        ```
+    """
+    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initialize a WorldTrainer object with given arguments."""
+        if overrides is None:
+            overrides = {}
+        super().__init__(cfg, overrides, _callbacks)
+    def build_dataset(self, img_path, mode="train", batch=None):
+        """
+        Build YOLO Dataset.
+        Args:
+            img_path (List[str] | str): Path to the folder containing images.
+            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
+            batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
+        """
+        gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
+        if mode == "train":
+            dataset = [
+                build_yolo_dataset(self.args, im_path, batch, self.data, stride=gs, multi_modal=True)
+                if isinstance(im_path, str)
+                else build_grounding(self.args, im_path["img_path"], im_path["json_file"], batch, stride=gs)
+                for im_path in img_path
+            ]
+            return YOLOConcatDataset(dataset) if len(dataset) > 1 else dataset[0]
+        else:
+            return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs)
+    def get_dataset(self):
+        """
+        Get train, val path from data dict if it exists.
+        Returns None if data format is not recognized.
+        """
+        final_data = dict()
+        data_yaml = self.args.data
+        assert data_yaml.get("train", False)  # object365.yaml
+        assert data_yaml.get("val", False)  # lvis.yaml
+        data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()}
+        assert len(data["val"]) == 1, f"Only support validating on 1 dataset for now, but got {len(data['val'])}."
+        val_split = "minival" if "lvis" in data["val"][0]["val"] else "val"
+        for d in data["val"]:
+            if d.get("minival") is None:  # for lvis dataset
+                continue
+            d["minival"] = str(d["path"] / d["minival"])
+        for s in ["train", "val"]:
+            final_data[s] = [d["train" if s == "train" else val_split] for d in data[s]]
+            # save grounding data if there's one
+            grounding_data = data_yaml[s].get("grounding_data")
+            if grounding_data is None:
+                continue
+            grounding_data = [grounding_data] if not isinstance(grounding_data, list) else grounding_data
+            for g in grounding_data:
+                assert isinstance(g, dict), f"Grounding data should be provided in dict format, but got {type(g)}"
+            final_data[s] += grounding_data
+        # NOTE: to make training work properly, set `nc` and `names`
+        final_data["nc"] = data["val"][0]["nc"]
+        final_data["names"] = data["val"][0]["names"]
+        self.data = final_data
+        return final_data["train"], final_data["val"][0]
+    def plot_training_labels(self):
+        """DO NOT plot labels."""
+        pass
+    def final_eval(self):
+        """Performs final evaluation and validation for object detection YOLO-World model."""
+        val = self.args.data["val"]["yolo_data"][0]
+        self.validator.args.data = val
+        self.validator.args.split = "minival" if isinstance(val, str) and "lvis" in val else "val"
+        return super().final_eval()

ultralytics/nn/autobackend.py CHANGED Viewed

@@ -374,9 +374,9 @@ class AutoBackend(nn.Module):
             metadata = yaml_load(metadata)
         if metadata:
             for k, v in metadata.items():
-                if k in ("stride", "batch"):
+                if k in {"stride", "batch"}:
                     metadata[k] = int(v)
-                elif k in ("imgsz", "names", "kpt_shape") and isinstance(v, str):
+                elif k in {"imgsz", "names", "kpt_shape"} and isinstance(v, str):
                     metadata[k] = eval(v)
             stride = metadata["stride"]
             task = metadata["task"]
@@ -531,8 +531,8 @@ class AutoBackend(nn.Module):
                     self.names = {i: f"class{i}" for i in range(nc)}
             else:  # Lite or Edge TPU
                 details = self.input_details[0]
-                integer = details["dtype"] in (np.int8, np.int16)  # is TFLite quantized int8 or int16 model
-                if integer:
+                is_int = details["dtype"] in {np.int8, np.int16}  # is TFLite quantized int8 or int16 model
+                if is_int:
                     scale, zero_point = details["quantization"]
                     im = (im / scale + zero_point).astype(details["dtype"])  # de-scale
                 self.interpreter.set_tensor(details["index"], im)
@@ -540,7 +540,7 @@ class AutoBackend(nn.Module):
                 y = []
                 for output in self.output_details:
                     x = self.interpreter.get_tensor(output["index"])
-                    if integer:
+                    if is_int:
                         scale, zero_point = output["quantization"]
                         x = (x.astype(np.float32) - zero_point) * scale  # re-scale
                     if x.ndim == 3:  # if task is not classification, excluding masks (ndim=4) as well

ultralytics/nn/modules/block.py CHANGED Viewed

@@ -519,7 +519,8 @@ class ContrastiveHead(nn.Module):
     def __init__(self):
         """Initializes ContrastiveHead with specified region-text similarity parameters."""
         super().__init__()
-        self.bias = nn.Parameter(torch.zeros([]))
+        # NOTE: use -10.0 to keep the init cls loss consistency with other losses
+        self.bias = nn.Parameter(torch.tensor([-10.0]))
         self.logit_scale = nn.Parameter(torch.ones([]) * torch.tensor(1 / 0.07).log())
     def forward(self, x, w):
@@ -542,7 +543,8 @@ class BNContrastiveHead(nn.Module):
         """Initialize ContrastiveHead with region-text similarity parameters."""
         super().__init__()
         self.norm = nn.BatchNorm2d(embed_dims)
-        self.bias = nn.Parameter(torch.zeros([]))
+        # NOTE: use -10.0 to keep the init cls loss consistency with other losses
+        self.bias = nn.Parameter(torch.tensor([-10.0]))
         # use -1.0 is more stable
         self.logit_scale = nn.Parameter(-1.0 * torch.ones([]))

ultralytics/nn/modules/conv.py CHANGED Viewed

@@ -296,7 +296,7 @@ class SpatialAttention(nn.Module):
     def __init__(self, kernel_size=7):
         """Initialize Spatial-attention module with kernel size argument."""
         super().__init__()
-        assert kernel_size in (3, 7), "kernel size must be 3 or 7"
+        assert kernel_size in {3, 7}, "kernel size must be 3 or 7"
         padding = 3 if kernel_size == 7 else 1
         self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
         self.act = nn.Sigmoid()

ultralytics/nn/modules/head.py CHANGED Viewed

@@ -54,13 +54,13 @@ class Detect(nn.Module):
             self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
             self.shape = shape
-        if self.export and self.format in ("saved_model", "pb", "tflite", "edgetpu", "tfjs"):  # avoid TF FlexSplitV ops
+        if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}:  # avoid TF FlexSplitV ops
             box = x_cat[:, : self.reg_max * 4]
             cls = x_cat[:, self.reg_max * 4 :]
         else:
             box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
-        if self.export and self.format in ("tflite", "edgetpu"):
+        if self.export and self.format in {"tflite", "edgetpu"}:
             # Precompute normalization factor to increase numerical stability
             # See https://github.com/ultralytics/ultralytics/issues/7371
             grid_h = shape[2]
@@ -230,13 +230,13 @@ class WorldDetect(Detect):
             self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
             self.shape = shape
-        if self.export and self.format in ("saved_model", "pb", "tflite", "edgetpu", "tfjs"):  # avoid TF FlexSplitV ops
+        if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}:  # avoid TF FlexSplitV ops
             box = x_cat[:, : self.reg_max * 4]
             cls = x_cat[:, self.reg_max * 4 :]
         else:
             box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
-        if self.export and self.format in ("tflite", "edgetpu"):
+        if self.export and self.format in {"tflite", "edgetpu"}:
             # Precompute normalization factor to increase numerical stability
             # See https://github.com/ultralytics/ultralytics/issues/7371
             grid_h = shape[2]
@@ -250,6 +250,15 @@ class WorldDetect(Detect):
         y = torch.cat((dbox, cls.sigmoid()), 1)
         return y if self.export else (y, x)
+    def bias_init(self):
+        """Initialize Detect() biases, WARNING: requires stride availability."""
+        m = self  # self.model[-1]  # Detect() module
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
+        # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # nominal class frequency
+        for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
+            a[-1].bias.data[:] = 1.0  # box
+            # b[-1].bias.data[:] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
 class RTDETRDecoder(nn.Module):
     """

ultralytics/nn/tasks.py CHANGED Viewed

@@ -564,28 +564,28 @@ class WorldModel(DetectionModel):
         self.clip_model = None  # CLIP model placeholder
         super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
-    def set_classes(self, text):
-        """Perform a forward pass with optional profiling, visualization, and embedding extraction."""
+    def set_classes(self, text, batch=80, cache_clip_model=True):
+        """Set classes in advance so that model could do offline-inference without clip model."""
         try:
             import clip
         except ImportError:
-            check_requirements("git+https://github.com/openai/CLIP.git")
+            check_requirements("git+https://github.com/ultralytics/CLIP.git")
             import clip
-        if not getattr(self, "clip_model", None):  # for backwards compatibility of models lacking clip_model attribute
+        if (
+            not getattr(self, "clip_model", None) and cache_clip_model
+        ):  # for backwards compatibility of models lacking clip_model attribute
             self.clip_model = clip.load("ViT-B/32")[0]
-        device = next(self.clip_model.parameters()).device
+        model = self.clip_model if cache_clip_model else clip.load("ViT-B/32")[0]
+        device = next(model.parameters()).device
         text_token = clip.tokenize(text).to(device)
-        txt_feats = self.clip_model.encode_text(text_token).to(dtype=torch.float32)
+        txt_feats = [model.encode_text(token).detach() for token in text_token.split(batch)]
+        txt_feats = txt_feats[0] if len(txt_feats) == 1 else torch.cat(txt_feats, dim=0)
         txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
-        self.txt_feats = txt_feats.reshape(-1, len(text), txt_feats.shape[-1]).detach()
+        self.txt_feats = txt_feats.reshape(-1, len(text), txt_feats.shape[-1])
         self.model[-1].nc = len(text)
-    def init_criterion(self):
-        """Initialize the loss criterion for the model."""
-        raise NotImplementedError
-    def predict(self, x, profile=False, visualize=False, augment=False, embed=None):
+    def predict(self, x, profile=False, visualize=False, txt_feats=None, augment=False, embed=None):
         """
         Perform a forward pass through the model.
@@ -593,13 +593,14 @@ class WorldModel(DetectionModel):
             x (torch.Tensor): The input tensor.
             profile (bool, optional): If True, profile the computation time for each layer. Defaults to False.
             visualize (bool, optional): If True, save feature maps for visualization. Defaults to False.
+            txt_feats (torch.Tensor): The text features, use it if it's given. Defaults to None.
             augment (bool, optional): If True, perform data augmentation during inference. Defaults to False.
             embed (list, optional): A list of feature vectors/embeddings to return.
         Returns:
             (torch.Tensor): Model's output tensor.
         """
-        txt_feats = self.txt_feats.to(device=x.device, dtype=x.dtype)
+        txt_feats = (self.txt_feats if txt_feats is None else txt_feats).to(device=x.device, dtype=x.dtype)
         if len(txt_feats) != len(x):
             txt_feats = txt_feats.repeat(len(x), 1, 1)
         ori_txt_feats = txt_feats.clone()
@@ -627,6 +628,21 @@ class WorldModel(DetectionModel):
                     return torch.unbind(torch.cat(embeddings, 1), dim=0)
         return x
+    def loss(self, batch, preds=None):
+        """
+        Compute loss.
+        Args:
+            batch (dict): Batch to compute loss on.
+            preds (torch.Tensor | List[torch.Tensor]): Predictions.
+        """
+        if not hasattr(self, "criterion"):
+            self.criterion = self.init_criterion()
+        if preds is None:
+            preds = self.forward(batch["img"], txt_feats=batch["txt_feats"])
+        return self.criterion(preds, batch)
 class Ensemble(nn.ModuleList):
     """Ensemble of models."""
@@ -880,7 +896,7 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
                 )  # num heads
             args = [c1, c2, *args[1:]]
-            if m in (BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3):
+            if m in {BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3}:
                 args.insert(2, n)  # number of repeats
                 n = 1
         elif m is AIFI:

ultralytics/solutions/ai_gym.py CHANGED Viewed

@@ -81,7 +81,7 @@ class AIGym:
         self.annotator = Annotator(im0, line_width=2)
         for ind, k in enumerate(reversed(self.keypoints)):
-            if self.pose_type in ["pushup", "pullup"]:
+            if self.pose_type in {"pushup", "pullup"}:
                 self.angle[ind] = self.annotator.estimate_pose_angle(
                     k[int(self.kpts_to_check[0])].cpu(),
                     k[int(self.kpts_to_check[1])].cpu(),

ultralytics/solutions/heatmap.py CHANGED Viewed

@@ -24,6 +24,8 @@ class Heatmap:
         self.view_img = False
         self.shape = "circle"
+        self.names = None  # Classes names
         # Image information
         self.imw = None
         self.imh = None
@@ -52,10 +54,13 @@ class Heatmap:
         # Object Counting Information
         self.in_counts = 0
         self.out_counts = 0
-        self.counting_list = []
+        self.count_ids = []
+        self.class_wise_count = {}
         self.count_txt_thickness = 0
-        self.count_txt_color = (0, 0, 0)
-        self.count_color = (255, 255, 255)
+        self.count_txt_color = (255, 255, 255)
+        self.line_color = (255, 255, 255)
+        self.cls_txtdisplay_gap = 50
+        self.fontsize = 0.6
         # Decay factor
         self.decay_factor = 0.99
@@ -67,6 +72,7 @@ class Heatmap:
         self,
         imw,
         imh,
+        classes_names=None,
         colormap=cv2.COLORMAP_JET,
         heatmap_alpha=0.5,
         view_img=False,
@@ -74,13 +80,15 @@ class Heatmap:
         view_out_counts=True,
         count_reg_pts=None,
         count_txt_thickness=2,
-        count_txt_color=(0, 0, 0),
-        count_color=(255, 255, 255),
+        count_txt_color=(255, 255, 255),
+        fontsize=0.8,
+        line_color=(255, 255, 255),
         count_reg_color=(255, 0, 255),
         region_thickness=5,
         line_dist_thresh=15,
         decay_factor=0.99,
         shape="circle",
+        cls_txtdisplay_gap=50,
     ):
         """
         Configures the heatmap colormap, width, height and display parameters.
@@ -89,6 +97,7 @@ class Heatmap:
             colormap (cv2.COLORMAP): The colormap to be set.
             imw (int): The width of the frame.
             imh (int): The height of the frame.
+            classes_names (dict): Classes names
             heatmap_alpha (float): alpha value for heatmap display
             view_img (bool): Flag indicating frame display
             view_in_counts (bool): Flag to control whether to display the incounts on video stream.
@@ -96,13 +105,16 @@ class Heatmap:
             count_reg_pts (list): Object counting region points
             count_txt_thickness (int): Text thickness for object counting display
             count_txt_color (RGB color): count text color value
-            count_color (RGB color): count text background color value
+            fontsize (float): Text display font size
+            line_color (RGB color): count highlighter line color
             count_reg_color (RGB color): Color of object counting region
             region_thickness (int): Object counting Region thickness
             line_dist_thresh (int): Euclidean Distance threshold for line counter
             decay_factor (float): value for removing heatmap area after object passed
             shape (str): Heatmap shape, rect or circle shape supported
+            cls_txtdisplay_gap (int): Display gap between each class count
         """
+        self.names = classes_names
         self.imw = imw
         self.imh = imh
         self.heatmap_alpha = heatmap_alpha
@@ -116,32 +128,32 @@ class Heatmap:
             if len(count_reg_pts) == 2:
                 print("Line Counter Initiated.")
                 self.count_reg_pts = count_reg_pts
-                self.counting_region = LineString(count_reg_pts)
-            elif len(count_reg_pts) == 4:
-                print("Region Counter Initiated.")
+                self.counting_region = LineString(self.count_reg_pts)
+            elif len(count_reg_pts) >= 3:
+                print("Polygon Counter Initiated.")
                 self.count_reg_pts = count_reg_pts
                 self.counting_region = Polygon(self.count_reg_pts)
             else:
-                print("Region or line points Invalid, 2 or 4 points supported")
+                print("Invalid Region points provided, region_points must be 2 for lines or >= 3 for polygons.")
                 print("Using Line Counter Now")
-                self.counting_region = Polygon([(20, 400), (1260, 400)])  # dummy points
+                self.counting_region = LineString(self.count_reg_pts)
         # Heatmap new frame
         self.heatmap = np.zeros((int(self.imh), int(self.imw)), dtype=np.float32)
         self.count_txt_thickness = count_txt_thickness
         self.count_txt_color = count_txt_color
-        self.count_color = count_color
+        self.fontsize = fontsize
+        self.line_color = line_color
         self.region_color = count_reg_color
         self.region_thickness = region_thickness
         self.decay_factor = decay_factor
         self.line_dist_thresh = line_dist_thresh
         self.shape = shape
+        self.cls_txtdisplay_gap = cls_txtdisplay_gap
         # shape of heatmap, if not selected
-        if self.shape not in ["circle", "rect"]:
+        if self.shape not in {"circle", "rect"}:
             print("Unknown shape value provided, 'circle' & 'rect' supported")
             print("Using Circular shape now")
             self.shape = "circle"
@@ -183,6 +195,12 @@ class Heatmap:
                 )
             for box, cls, track_id in zip(self.boxes, self.clss, self.track_ids):
+                # Store class info
+                if self.names[cls] not in self.class_wise_count:
+                    if len(self.names[cls]) > 5:
+                        self.names[cls] = self.names[cls][:5]
+                    self.class_wise_count[self.names[cls]] = {"in": 0, "out": 0}
                 if self.shape == "circle":
                     center = (int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2))
                     radius = min(int(box[2]) - int(box[0]), int(box[3]) - int(box[1])) // 2
@@ -203,23 +221,39 @@ class Heatmap:
                 if len(track_line) > 30:
                     track_line.pop(0)
-                # Count objects
-                if len(self.count_reg_pts) == 4:
-                    if self.counting_region.contains(Point(track_line[-1])) and track_id not in self.counting_list:
-                        self.counting_list.append(track_id)
-                        if box[0] < self.counting_region.centroid.x:
-                            self.out_counts += 1
-                        else:
+                prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None
+                # Count objects in any polygon
+                if len(self.count_reg_pts) >= 3:
+                    is_inside = self.counting_region.contains(Point(track_line[-1]))
+                    if prev_position is not None and is_inside and track_id not in self.count_ids:
+                        self.count_ids.append(track_id)
+                        if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0:
                             self.in_counts += 1
+                            self.class_wise_count[self.names[cls]]["in"] += 1
+                        else:
+                            self.out_counts += 1
+                            self.class_wise_count[self.names[cls]]["out"] += 1
+                # Count objects using line
                 elif len(self.count_reg_pts) == 2:
-                    distance = Point(track_line[-1]).distance(self.counting_region)
-                    if distance < self.line_dist_thresh and track_id not in self.counting_list:
-                        self.counting_list.append(track_id)
-                        if box[0] < self.counting_region.centroid.x:
-                            self.out_counts += 1
-                        else:
-                            self.in_counts += 1
+                    is_inside = (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0
+                    if prev_position is not None and is_inside and track_id not in self.count_ids:
+                        distance = Point(track_line[-1]).distance(self.counting_region)
+                        if distance < self.line_dist_thresh and track_id not in self.count_ids:
+                            self.count_ids.append(track_id)
+                            if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0:
+                                self.in_counts += 1
+                                self.class_wise_count[self.names[cls]]["in"] += 1
+                            else:
+                                self.out_counts += 1
+                                self.class_wise_count[self.names[cls]]["out"] += 1
         else:
             for box, cls in zip(self.boxes, self.clss):
                 if self.shape == "circle":
@@ -240,26 +274,30 @@ class Heatmap:
         heatmap_normalized = cv2.normalize(self.heatmap, None, 0, 255, cv2.NORM_MINMAX)
         heatmap_colored = cv2.applyColorMap(heatmap_normalized.astype(np.uint8), self.colormap)
-        incount_label = f"In Count : {self.in_counts}"
-        outcount_label = f"OutCount : {self.out_counts}"
-        # Display counts based on user choice
-        counts_label = None
-        if not self.view_in_counts and not self.view_out_counts:
-            counts_label = None
-        elif not self.view_in_counts:
-            counts_label = outcount_label
-        elif not self.view_out_counts:
-            counts_label = incount_label
-        else:
-            counts_label = f"{incount_label} {outcount_label}"
+        label = "Ultralytics Analytics \t"
+        for key, value in self.class_wise_count.items():
+            if value["in"] != 0 or value["out"] != 0:
+                if not self.view_in_counts and not self.view_out_counts:
+                    label = None
+                elif not self.view_in_counts:
+                    label += f"{str.capitalize(key)}: IN {value['in']} \t"
+                elif not self.view_out_counts:
+                    label += f"{str.capitalize(key)}: OUT {value['out']} \t"
+                else:
+                    label += f"{str.capitalize(key)}: IN {value['in']} OUT {value['out']} \t"
+        label = label.rstrip()
+        label = label.split("\t")
-        if self.count_reg_pts is not None and counts_label is not None:
-            self.annotator.count_labels(
-                counts=counts_label,
-                count_txt_size=self.count_txt_thickness,
+        if self.count_reg_pts is not None and label is not None:
+            self.annotator.display_counts(
+                counts=label,
+                tf=self.count_txt_thickness,
+                fontScale=self.fontsize,
                 txt_color=self.count_txt_color,
-                color=self.count_color,
+                line_color=self.line_color,
+                classwise_txtgap=self.cls_txtdisplay_gap,
             )
         self.im0 = cv2.addWeighted(self.im0, 1 - self.heatmap_alpha, heatmap_colored, self.heatmap_alpha, 0)

ultralytics 8.1.38__py3-none-any.whl → 8.1.40__py3-none-any.whl

Potentially problematic release.

ultralytics 8.1.38py3-none-any.whl → 8.1.40py3-none-any.whl