ultralytics 8.2.68__tar.gz → 8.2.70__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- {ultralytics-8.2.68/ultralytics.egg-info → ultralytics-8.2.70}/PKG-INFO +1 -1
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/test_cli.py +4 -16
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/__init__.py +3 -2
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/__init__.py +4 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/augment.py +1 -1
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/hub/google/__init__.py +3 -3
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/__init__.py +2 -1
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/fastsam/__init__.py +1 -2
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/fastsam/model.py +18 -0
- ultralytics-8.2.70/ultralytics/models/fastsam/predict.py +146 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/build.py +2 -2
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/model.py +10 -2
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/modules/decoders.py +1 -42
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/modules/encoders.py +3 -1
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/modules/sam.py +5 -7
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/modules/transformer.py +4 -3
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/predict.py +12 -6
- ultralytics-8.2.70/ultralytics/models/sam2/__init__.py +6 -0
- ultralytics-8.2.70/ultralytics/models/sam2/build.py +156 -0
- ultralytics-8.2.70/ultralytics/models/sam2/model.py +97 -0
- ultralytics-8.2.70/ultralytics/models/sam2/modules/decoders.py +305 -0
- ultralytics-8.2.70/ultralytics/models/sam2/modules/encoders.py +332 -0
- ultralytics-8.2.70/ultralytics/models/sam2/modules/memory_attention.py +170 -0
- ultralytics-8.2.70/ultralytics/models/sam2/modules/sam2.py +804 -0
- ultralytics-8.2.70/ultralytics/models/sam2/modules/sam2_blocks.py +715 -0
- ultralytics-8.2.70/ultralytics/models/sam2/modules/utils.py +191 -0
- ultralytics-8.2.70/ultralytics/models/sam2/predict.py +182 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/modules/transformer.py +5 -3
- ultralytics-8.2.70/ultralytics/trackers/utils/__init__.py +1 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/ops.py +1 -1
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/torch_utils.py +9 -6
- {ultralytics-8.2.68 → ultralytics-8.2.70/ultralytics.egg-info}/PKG-INFO +1 -1
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics.egg-info/SOURCES.txt +11 -1
- ultralytics-8.2.68/ultralytics/models/fastsam/predict.py +0 -31
- ultralytics-8.2.68/ultralytics/models/fastsam/prompt.py +0 -352
- {ultralytics-8.2.68 → ultralytics-8.2.70}/LICENSE +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/README.md +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/pyproject.toml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/setup.cfg +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/conftest.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/test_cuda.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/test_engine.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/test_explorer.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/test_exports.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/test_integrations.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/test_python.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/tests/test_solutions.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/assets/bus.jpg +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/assets/zidane.jpg +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/Argoverse.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/DOTAv1.5.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/DOTAv1.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/GlobalWheat2020.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/ImageNet.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/Objects365.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/SKU-110K.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/VOC.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/VisDrone.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/african-wildlife.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/brain-tumor.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/carparts-seg.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/coco-pose.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/coco.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/coco128-seg.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/coco128.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/coco8-pose.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/coco8-seg.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/coco8.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/crack-seg.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/dota8.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/lvis.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/open-images-v7.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/package-seg.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/signature.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/tiger-pose.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/datasets/xView.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/default.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v10/yolov10b.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v10/yolov10l.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v10/yolov10m.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v10/yolov10n.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v10/yolov10s.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v10/yolov10x.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v3/yolov3-spp.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v3/yolov3-tiny.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v3/yolov3.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v5/yolov5-p6.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v5/yolov5.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v6/yolov6.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-cls.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-ghost.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-obb.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-p2.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-p6.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-pose.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-seg.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-world.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8-worldv2.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v8/yolov8.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v9/yolov9c-seg.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v9/yolov9c.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v9/yolov9e-seg.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v9/yolov9e.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v9/yolov9m.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v9/yolov9s.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/models/v9/yolov9t.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/trackers/botsort.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/cfg/trackers/bytetrack.yaml +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/annotator.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/base.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/build.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/converter.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/dataset.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/explorer/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/explorer/explorer.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/explorer/gui/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/explorer/gui/dash.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/explorer/utils.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/loaders.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/split_dota.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/data/utils.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/engine/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/engine/exporter.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/engine/model.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/engine/predictor.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/engine/results.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/engine/trainer.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/engine/tuner.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/engine/validator.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/hub/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/hub/auth.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/hub/session.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/hub/utils.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/fastsam/utils.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/fastsam/val.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/nas/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/nas/model.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/nas/predict.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/nas/val.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/rtdetr/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/rtdetr/model.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/rtdetr/predict.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/rtdetr/train.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/rtdetr/val.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/amg.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/modules/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/sam/modules/tiny_encoder.py +0 -0
- {ultralytics-8.2.68/ultralytics/models/utils → ultralytics-8.2.70/ultralytics/models/sam2/modules}/__init__.py +0 -0
- {ultralytics-8.2.68/ultralytics/trackers → ultralytics-8.2.70/ultralytics/models}/utils/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/utils/loss.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/utils/ops.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/classify/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/classify/predict.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/classify/train.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/classify/val.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/detect/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/detect/predict.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/detect/train.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/detect/val.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/model.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/obb/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/obb/predict.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/obb/train.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/obb/val.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/pose/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/pose/predict.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/pose/train.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/pose/val.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/segment/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/segment/predict.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/segment/train.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/segment/val.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/world/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/world/train.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/models/yolo/world/train_world.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/autobackend.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/modules/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/modules/activation.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/modules/block.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/modules/conv.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/modules/head.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/modules/utils.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/nn/tasks.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/ai_gym.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/analytics.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/distance_calculation.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/heatmap.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/object_counter.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/parking_management.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/queue_management.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/speed_estimation.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/solutions/streamlit_inference.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/trackers/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/trackers/basetrack.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/trackers/bot_sort.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/trackers/byte_tracker.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/trackers/track.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/trackers/utils/gmc.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/trackers/utils/kalman_filter.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/trackers/utils/matching.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/autobatch.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/benchmarks.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/__init__.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/base.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/clearml.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/comet.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/dvc.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/hub.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/mlflow.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/neptune.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/raytune.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/tensorboard.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/callbacks/wb.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/checks.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/dist.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/downloads.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/errors.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/files.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/instance.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/loss.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/metrics.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/patches.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/plotting.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/tal.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/triton.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics/utils/tuner.py +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics.egg-info/dependency_links.txt +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics.egg-info/entry_points.txt +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics.egg-info/requires.txt +0 -0
- {ultralytics-8.2.68 → ultralytics-8.2.70}/ultralytics.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ultralytics
|
|
3
|
-
Version: 8.2.
|
|
3
|
+
Version: 8.2.70
|
|
4
4
|
Summary: Ultralytics YOLOv8 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
|
|
5
5
|
Author: Glenn Jocher, Ayush Chaurasia, Jing Qiu
|
|
6
6
|
Maintainer: Glenn Jocher, Ayush Chaurasia, Jing Qiu
|
|
@@ -68,7 +68,6 @@ def test_fastsam(task="segment", model=WEIGHTS_DIR / "FastSAM-s.pt", data="coco8
|
|
|
68
68
|
run(f"yolo segment predict model={model} source={source} imgsz=32 save save_crop save_txt")
|
|
69
69
|
|
|
70
70
|
from ultralytics import FastSAM
|
|
71
|
-
from ultralytics.models.fastsam import FastSAMPrompt
|
|
72
71
|
from ultralytics.models.sam import Predictor
|
|
73
72
|
|
|
74
73
|
# Create a FastSAM model
|
|
@@ -81,21 +80,10 @@ def test_fastsam(task="segment", model=WEIGHTS_DIR / "FastSAM-s.pt", data="coco8
|
|
|
81
80
|
# Remove small regions
|
|
82
81
|
new_masks, _ = Predictor.remove_small_regions(everything_results[0].masks.data, min_area=20)
|
|
83
82
|
|
|
84
|
-
#
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
# Bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
|
|
89
|
-
ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300])
|
|
90
|
-
|
|
91
|
-
# Text prompt
|
|
92
|
-
ann = prompt_process.text_prompt(text="a photo of a dog")
|
|
93
|
-
|
|
94
|
-
# Point prompt
|
|
95
|
-
# Points default [[0,0]] [[x1,y1],[x2,y2]]
|
|
96
|
-
# Point_label default [0] [1,0] 0:background, 1:foreground
|
|
97
|
-
ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1])
|
|
98
|
-
prompt_process.plot(annotations=ann, output="./")
|
|
83
|
+
# Run inference with bboxes and points and texts prompt at the same time
|
|
84
|
+
results = sam_model(
|
|
85
|
+
source, bboxes=[439, 437, 524, 709], points=[[200, 200]], labels=[1], texts="a photo of a dog"
|
|
86
|
+
)
|
|
99
87
|
|
|
100
88
|
|
|
101
89
|
def test_mobilesam():
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
2
|
|
|
3
|
-
__version__ = "8.2.
|
|
3
|
+
__version__ = "8.2.70"
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
6
|
|
|
@@ -8,7 +8,7 @@ import os
|
|
|
8
8
|
os.environ["OMP_NUM_THREADS"] = "1" # reduce CPU utilization during training
|
|
9
9
|
|
|
10
10
|
from ultralytics.data.explorer.explorer import Explorer
|
|
11
|
-
from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, YOLOWorld
|
|
11
|
+
from ultralytics.models import NAS, RTDETR, SAM, SAM2, YOLO, FastSAM, YOLOWorld
|
|
12
12
|
from ultralytics.utils import ASSETS, SETTINGS
|
|
13
13
|
from ultralytics.utils.checks import check_yolo as checks
|
|
14
14
|
from ultralytics.utils.downloads import download
|
|
@@ -21,6 +21,7 @@ __all__ = (
|
|
|
21
21
|
"YOLOWorld",
|
|
22
22
|
"NAS",
|
|
23
23
|
"SAM",
|
|
24
|
+
"SAM2",
|
|
24
25
|
"FastSAM",
|
|
25
26
|
"RTDETR",
|
|
26
27
|
"checks",
|
|
@@ -2221,7 +2221,7 @@ class RandomLoadText:
|
|
|
2221
2221
|
pos_labels = np.unique(cls).tolist()
|
|
2222
2222
|
|
|
2223
2223
|
if len(pos_labels) > self.max_samples:
|
|
2224
|
-
pos_labels =
|
|
2224
|
+
pos_labels = random.sample(pos_labels, k=self.max_samples)
|
|
2225
2225
|
|
|
2226
2226
|
neg_samples = min(min(num_classes, self.max_samples) - len(pos_labels), random.randint(*self.neg_samples))
|
|
2227
2227
|
neg_labels = [i for i in range(num_classes) if i not in pos_labels]
|
|
@@ -136,14 +136,14 @@ class GCPRegions:
|
|
|
136
136
|
sorted_results = sorted(results, key=lambda x: x[1])
|
|
137
137
|
|
|
138
138
|
if verbose:
|
|
139
|
-
print(f"{'Region':<
|
|
139
|
+
print(f"{'Region':<25} {'Location':<35} {'Tier':<5} {'Latency (ms)'}")
|
|
140
140
|
for region, mean, std, min_, max_ in sorted_results:
|
|
141
141
|
tier, city, country = self.regions[region]
|
|
142
142
|
location = f"{city}, {country}"
|
|
143
143
|
if mean == float("inf"):
|
|
144
|
-
print(f"{region:<
|
|
144
|
+
print(f"{region:<25} {location:<35} {tier:<5} {'Timeout'}")
|
|
145
145
|
else:
|
|
146
|
-
print(f"{region:<
|
|
146
|
+
print(f"{region:<25} {location:<35} {tier:<5} {mean:.0f} ± {std:.0f} ({min_:.0f} - {max_:.0f})")
|
|
147
147
|
print(f"\nLowest latency region{'s' if top > 1 else ''}:")
|
|
148
148
|
for region, mean, std, min_, max_ in sorted_results[:top]:
|
|
149
149
|
tier, city, country = self.regions[region]
|
|
@@ -4,6 +4,7 @@ from .fastsam import FastSAM
|
|
|
4
4
|
from .nas import NAS
|
|
5
5
|
from .rtdetr import RTDETR
|
|
6
6
|
from .sam import SAM
|
|
7
|
+
from .sam2 import SAM2
|
|
7
8
|
from .yolo import YOLO, YOLOWorld
|
|
8
9
|
|
|
9
|
-
__all__ = "YOLO", "RTDETR", "SAM", "FastSAM", "NAS", "YOLOWorld" # allow simpler import
|
|
10
|
+
__all__ = "YOLO", "RTDETR", "SAM", "FastSAM", "NAS", "YOLOWorld", "SAM2" # allow simpler import
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
from .model import FastSAM
|
|
4
4
|
from .predict import FastSAMPredictor
|
|
5
|
-
from .prompt import FastSAMPrompt
|
|
6
5
|
from .val import FastSAMValidator
|
|
7
6
|
|
|
8
|
-
__all__ = "FastSAMPredictor", "FastSAM", "
|
|
7
|
+
__all__ = "FastSAMPredictor", "FastSAM", "FastSAMValidator"
|
|
@@ -28,6 +28,24 @@ class FastSAM(Model):
|
|
|
28
28
|
assert Path(model).suffix not in {".yaml", ".yml"}, "FastSAM models only support pre-trained models."
|
|
29
29
|
super().__init__(model=model, task="segment")
|
|
30
30
|
|
|
31
|
+
def predict(self, source, stream=False, bboxes=None, points=None, labels=None, texts=None, **kwargs):
|
|
32
|
+
"""
|
|
33
|
+
Performs segmentation prediction on the given image or video source.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
source (str): Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object.
|
|
37
|
+
stream (bool, optional): If True, enables real-time streaming. Defaults to False.
|
|
38
|
+
bboxes (list, optional): List of bounding box coordinates for prompted segmentation. Defaults to None.
|
|
39
|
+
points (list, optional): List of points for prompted segmentation. Defaults to None.
|
|
40
|
+
labels (list, optional): List of labels for prompted segmentation. Defaults to None.
|
|
41
|
+
texts (list, optional): List of texts for prompted segmentation. Defaults to None.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
(list): The model predictions.
|
|
45
|
+
"""
|
|
46
|
+
prompts = dict(bboxes=bboxes, points=points, labels=labels, texts=texts)
|
|
47
|
+
return super().predict(source, stream, prompts=prompts, **kwargs)
|
|
48
|
+
|
|
31
49
|
@property
|
|
32
50
|
def task_map(self):
|
|
33
51
|
"""Returns a dictionary mapping segment task to corresponding predictor and validator classes."""
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
+
import torch
|
|
3
|
+
from PIL import Image
|
|
4
|
+
|
|
5
|
+
from ultralytics.models.yolo.segment import SegmentationPredictor
|
|
6
|
+
from ultralytics.utils import DEFAULT_CFG, checks
|
|
7
|
+
from ultralytics.utils.metrics import box_iou
|
|
8
|
+
from ultralytics.utils.ops import scale_masks
|
|
9
|
+
|
|
10
|
+
from .utils import adjust_bboxes_to_image_border
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FastSAMPredictor(SegmentationPredictor):
|
|
14
|
+
"""
|
|
15
|
+
FastSAMPredictor is specialized for fast SAM (Segment Anything Model) segmentation prediction tasks in Ultralytics
|
|
16
|
+
YOLO framework.
|
|
17
|
+
|
|
18
|
+
This class extends the SegmentationPredictor, customizing the prediction pipeline specifically for fast SAM. It
|
|
19
|
+
adjusts post-processing steps to incorporate mask prediction and non-max suppression while optimizing for single-
|
|
20
|
+
class segmentation.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
|
24
|
+
"""Initializes a FastSAMPredictor for fast SAM segmentation tasks in Ultralytics YOLO framework."""
|
|
25
|
+
super().__init__(cfg, overrides, _callbacks)
|
|
26
|
+
self.prompts = {}
|
|
27
|
+
|
|
28
|
+
def postprocess(self, preds, img, orig_imgs):
|
|
29
|
+
"""Applies box postprocess for FastSAM predictions."""
|
|
30
|
+
bboxes = self.prompts.pop("bboxes", None)
|
|
31
|
+
points = self.prompts.pop("points", None)
|
|
32
|
+
labels = self.prompts.pop("labels", None)
|
|
33
|
+
texts = self.prompts.pop("texts", None)
|
|
34
|
+
results = super().postprocess(preds, img, orig_imgs)
|
|
35
|
+
for result in results:
|
|
36
|
+
full_box = torch.tensor(
|
|
37
|
+
[0, 0, result.orig_shape[1], result.orig_shape[0]], device=preds[0].device, dtype=torch.float32
|
|
38
|
+
)
|
|
39
|
+
boxes = adjust_bboxes_to_image_border(result.boxes.xyxy, result.orig_shape)
|
|
40
|
+
idx = torch.nonzero(box_iou(full_box[None], boxes) > 0.9).flatten()
|
|
41
|
+
if idx.numel() != 0:
|
|
42
|
+
result.boxes.xyxy[idx] = full_box
|
|
43
|
+
|
|
44
|
+
return self.prompt(results, bboxes=bboxes, points=points, labels=labels, texts=texts)
|
|
45
|
+
|
|
46
|
+
def prompt(self, results, bboxes=None, points=None, labels=None, texts=None):
|
|
47
|
+
"""
|
|
48
|
+
Internal function for image segmentation inference based on cues like bounding boxes, points, and masks.
|
|
49
|
+
Leverages SAM's specialized architecture for prompt-based, real-time segmentation.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
results (Results | List[Results]): The original inference results from FastSAM models without any prompts.
|
|
53
|
+
bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
|
|
54
|
+
points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
|
|
55
|
+
labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
|
|
56
|
+
texts (str | List[str], optional): Textual prompts, a list contains string objects.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
(List[Results]): The output results determined by prompts.
|
|
60
|
+
"""
|
|
61
|
+
if bboxes is None and points is None and texts is None:
|
|
62
|
+
return results
|
|
63
|
+
prompt_results = []
|
|
64
|
+
if not isinstance(results, list):
|
|
65
|
+
results = [results]
|
|
66
|
+
for result in results:
|
|
67
|
+
masks = result.masks.data
|
|
68
|
+
if masks.shape[1:] != result.orig_shape:
|
|
69
|
+
masks = scale_masks(masks[None], result.orig_shape)[0]
|
|
70
|
+
# bboxes prompt
|
|
71
|
+
idx = torch.zeros(len(result), dtype=torch.bool, device=self.device)
|
|
72
|
+
if bboxes is not None:
|
|
73
|
+
bboxes = torch.as_tensor(bboxes, dtype=torch.int32, device=self.device)
|
|
74
|
+
bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes
|
|
75
|
+
bbox_areas = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
|
|
76
|
+
mask_areas = torch.stack([masks[:, b[1] : b[3], b[0] : b[2]].sum(dim=(1, 2)) for b in bboxes])
|
|
77
|
+
full_mask_areas = torch.sum(masks, dim=(1, 2))
|
|
78
|
+
|
|
79
|
+
union = bbox_areas[:, None] + full_mask_areas - mask_areas
|
|
80
|
+
idx[torch.argmax(mask_areas / union, dim=1)] = True
|
|
81
|
+
if points is not None:
|
|
82
|
+
points = torch.as_tensor(points, dtype=torch.int32, device=self.device)
|
|
83
|
+
points = points[None] if points.ndim == 1 else points
|
|
84
|
+
if labels is None:
|
|
85
|
+
labels = torch.ones(points.shape[0])
|
|
86
|
+
labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
|
|
87
|
+
assert len(labels) == len(
|
|
88
|
+
points
|
|
89
|
+
), f"Excepted `labels` got same size as `point`, but got {len(labels)} and {len(points)}"
|
|
90
|
+
point_idx = (
|
|
91
|
+
torch.ones(len(result), dtype=torch.bool, device=self.device)
|
|
92
|
+
if labels.sum() == 0 # all negative points
|
|
93
|
+
else torch.zeros(len(result), dtype=torch.bool, device=self.device)
|
|
94
|
+
)
|
|
95
|
+
for p, l in zip(points, labels):
|
|
96
|
+
point_idx[torch.nonzero(masks[:, p[1], p[0]], as_tuple=True)[0]] = True if l else False
|
|
97
|
+
idx |= point_idx
|
|
98
|
+
if texts is not None:
|
|
99
|
+
if isinstance(texts, str):
|
|
100
|
+
texts = [texts]
|
|
101
|
+
crop_ims, filter_idx = [], []
|
|
102
|
+
for i, b in enumerate(result.boxes.xyxy.tolist()):
|
|
103
|
+
x1, y1, x2, y2 = [int(x) for x in b]
|
|
104
|
+
if masks[i].sum() <= 100:
|
|
105
|
+
filter_idx.append(i)
|
|
106
|
+
continue
|
|
107
|
+
crop_ims.append(Image.fromarray(result.orig_img[y1:y2, x1:x2, ::-1]))
|
|
108
|
+
similarity = self._clip_inference(crop_ims, texts)
|
|
109
|
+
text_idx = torch.argmax(similarity, dim=-1) # (M, )
|
|
110
|
+
if len(filter_idx):
|
|
111
|
+
text_idx += (torch.tensor(filter_idx, device=self.device)[None] <= int(text_idx)).sum(0)
|
|
112
|
+
idx[text_idx] = True
|
|
113
|
+
|
|
114
|
+
prompt_results.append(result[idx])
|
|
115
|
+
|
|
116
|
+
return prompt_results
|
|
117
|
+
|
|
118
|
+
def _clip_inference(self, images, texts):
|
|
119
|
+
"""
|
|
120
|
+
CLIP Inference process.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
images (List[PIL.Image]): A list of source images and each of them should be PIL.Image type with RGB channel order.
|
|
124
|
+
texts (List[str]): A list of prompt texts and each of them should be string object.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
(torch.Tensor): The similarity between given images and texts.
|
|
128
|
+
"""
|
|
129
|
+
try:
|
|
130
|
+
import clip
|
|
131
|
+
except ImportError:
|
|
132
|
+
checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
|
|
133
|
+
import clip
|
|
134
|
+
if (not hasattr(self, "clip_model")) or (not hasattr(self, "clip_preprocess")):
|
|
135
|
+
self.clip_model, self.clip_preprocess = clip.load("ViT-B/32", device=self.device)
|
|
136
|
+
images = torch.stack([self.clip_preprocess(image).to(self.device) for image in images])
|
|
137
|
+
tokenized_text = clip.tokenize(texts).to(self.device)
|
|
138
|
+
image_features = self.clip_model.encode_image(images)
|
|
139
|
+
text_features = self.clip_model.encode_text(tokenized_text)
|
|
140
|
+
image_features /= image_features.norm(dim=-1, keepdim=True) # (N, 512)
|
|
141
|
+
text_features /= text_features.norm(dim=-1, keepdim=True) # (M, 512)
|
|
142
|
+
return (image_features * text_features[:, None]).sum(-1) # (M, N)
|
|
143
|
+
|
|
144
|
+
def set_prompts(self, prompts):
|
|
145
|
+
"""Set prompts in advance."""
|
|
146
|
+
self.prompts = prompts
|
|
@@ -14,7 +14,7 @@ from ultralytics.utils.downloads import attempt_download_asset
|
|
|
14
14
|
|
|
15
15
|
from .modules.decoders import MaskDecoder
|
|
16
16
|
from .modules.encoders import ImageEncoderViT, PromptEncoder
|
|
17
|
-
from .modules.sam import
|
|
17
|
+
from .modules.sam import SAMModel
|
|
18
18
|
from .modules.tiny_encoder import TinyViT
|
|
19
19
|
from .modules.transformer import TwoWayTransformer
|
|
20
20
|
|
|
@@ -105,7 +105,7 @@ def _build_sam(
|
|
|
105
105
|
out_chans=prompt_embed_dim,
|
|
106
106
|
)
|
|
107
107
|
)
|
|
108
|
-
sam =
|
|
108
|
+
sam = SAMModel(
|
|
109
109
|
image_encoder=image_encoder,
|
|
110
110
|
prompt_encoder=PromptEncoder(
|
|
111
111
|
embed_dim=prompt_embed_dim,
|
|
@@ -44,6 +44,7 @@ class SAM(Model):
|
|
|
44
44
|
"""
|
|
45
45
|
if model and Path(model).suffix not in {".pt", ".pth"}:
|
|
46
46
|
raise NotImplementedError("SAM prediction requires pre-trained *.pt or *.pth model.")
|
|
47
|
+
self.is_sam2 = "sam2" in Path(model).stem
|
|
47
48
|
super().__init__(model=model, task="segment")
|
|
48
49
|
|
|
49
50
|
def _load(self, weights: str, task=None):
|
|
@@ -54,7 +55,12 @@ class SAM(Model):
|
|
|
54
55
|
weights (str): Path to the weights file.
|
|
55
56
|
task (str, optional): Task name. Defaults to None.
|
|
56
57
|
"""
|
|
57
|
-
self.
|
|
58
|
+
if self.is_sam2:
|
|
59
|
+
from ..sam2.build import build_sam2
|
|
60
|
+
|
|
61
|
+
self.model = build_sam2(weights)
|
|
62
|
+
else:
|
|
63
|
+
self.model = build_sam(weights)
|
|
58
64
|
|
|
59
65
|
def predict(self, source, stream=False, bboxes=None, points=None, labels=None, **kwargs):
|
|
60
66
|
"""
|
|
@@ -112,4 +118,6 @@ class SAM(Model):
|
|
|
112
118
|
Returns:
|
|
113
119
|
(dict): A dictionary mapping the 'segment' task to its corresponding 'Predictor'.
|
|
114
120
|
"""
|
|
115
|
-
|
|
121
|
+
from ..sam2.predict import SAM2Predictor
|
|
122
|
+
|
|
123
|
+
return {"segment": {"predictor": SAM2Predictor if self.is_sam2 else Predictor}}
|
|
@@ -4,9 +4,8 @@ from typing import List, Tuple, Type
|
|
|
4
4
|
|
|
5
5
|
import torch
|
|
6
6
|
from torch import nn
|
|
7
|
-
from torch.nn import functional as F
|
|
8
7
|
|
|
9
|
-
from ultralytics.nn.modules import LayerNorm2d
|
|
8
|
+
from ultralytics.nn.modules import MLP, LayerNorm2d
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
class MaskDecoder(nn.Module):
|
|
@@ -28,7 +27,6 @@ class MaskDecoder(nn.Module):
|
|
|
28
27
|
|
|
29
28
|
def __init__(
|
|
30
29
|
self,
|
|
31
|
-
*,
|
|
32
30
|
transformer_dim: int,
|
|
33
31
|
transformer: nn.Module,
|
|
34
32
|
num_multimask_outputs: int = 3,
|
|
@@ -149,42 +147,3 @@ class MaskDecoder(nn.Module):
|
|
|
149
147
|
iou_pred = self.iou_prediction_head(iou_token_out)
|
|
150
148
|
|
|
151
149
|
return masks, iou_pred
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
class MLP(nn.Module):
|
|
155
|
-
"""
|
|
156
|
-
MLP (Multi-Layer Perceptron) model lightly adapted from
|
|
157
|
-
https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py
|
|
158
|
-
"""
|
|
159
|
-
|
|
160
|
-
def __init__(
|
|
161
|
-
self,
|
|
162
|
-
input_dim: int,
|
|
163
|
-
hidden_dim: int,
|
|
164
|
-
output_dim: int,
|
|
165
|
-
num_layers: int,
|
|
166
|
-
sigmoid_output: bool = False,
|
|
167
|
-
) -> None:
|
|
168
|
-
"""
|
|
169
|
-
Initializes the MLP (Multi-Layer Perceptron) model.
|
|
170
|
-
|
|
171
|
-
Args:
|
|
172
|
-
input_dim (int): The dimensionality of the input features.
|
|
173
|
-
hidden_dim (int): The dimensionality of the hidden layers.
|
|
174
|
-
output_dim (int): The dimensionality of the output layer.
|
|
175
|
-
num_layers (int): The number of hidden layers.
|
|
176
|
-
sigmoid_output (bool, optional): Apply a sigmoid activation to the output layer. Defaults to False.
|
|
177
|
-
"""
|
|
178
|
-
super().__init__()
|
|
179
|
-
self.num_layers = num_layers
|
|
180
|
-
h = [hidden_dim] * (num_layers - 1)
|
|
181
|
-
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
|
|
182
|
-
self.sigmoid_output = sigmoid_output
|
|
183
|
-
|
|
184
|
-
def forward(self, x):
|
|
185
|
-
"""Executes feedforward within the neural network module and applies activation."""
|
|
186
|
-
for i, layer in enumerate(self.layers):
|
|
187
|
-
x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
|
|
188
|
-
if self.sigmoid_output:
|
|
189
|
-
x = torch.sigmoid(x)
|
|
190
|
-
return x
|
|
@@ -211,6 +211,8 @@ class PromptEncoder(nn.Module):
|
|
|
211
211
|
point_embedding[labels == -1] += self.not_a_point_embed.weight
|
|
212
212
|
point_embedding[labels == 0] += self.point_embeddings[0].weight
|
|
213
213
|
point_embedding[labels == 1] += self.point_embeddings[1].weight
|
|
214
|
+
point_embedding[labels == 2] += self.point_embeddings[2].weight
|
|
215
|
+
point_embedding[labels == 3] += self.point_embeddings[3].weight
|
|
214
216
|
return point_embedding
|
|
215
217
|
|
|
216
218
|
def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
|
|
@@ -226,8 +228,8 @@ class PromptEncoder(nn.Module):
|
|
|
226
228
|
"""Embeds mask inputs."""
|
|
227
229
|
return self.mask_downscaling(masks)
|
|
228
230
|
|
|
231
|
+
@staticmethod
|
|
229
232
|
def _get_batch_size(
|
|
230
|
-
self,
|
|
231
233
|
points: Optional[Tuple[torch.Tensor, torch.Tensor]],
|
|
232
234
|
boxes: Optional[torch.Tensor],
|
|
233
235
|
masks: Optional[torch.Tensor],
|
|
@@ -15,15 +15,14 @@ from .decoders import MaskDecoder
|
|
|
15
15
|
from .encoders import ImageEncoderViT, PromptEncoder
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
class
|
|
18
|
+
class SAMModel(nn.Module):
|
|
19
19
|
"""
|
|
20
|
-
|
|
21
|
-
embeddings, and prompt encoders to encode various types of input prompts. These embeddings are then used by
|
|
22
|
-
decoder to predict object masks.
|
|
20
|
+
SAMModel (Segment Anything Model) is designed for object segmentation tasks. It uses image encoders to generate
|
|
21
|
+
image embeddings, and prompt encoders to encode various types of input prompts. These embeddings are then used by
|
|
22
|
+
the mask decoder to predict object masks.
|
|
23
23
|
|
|
24
24
|
Attributes:
|
|
25
25
|
mask_threshold (float): Threshold value for mask prediction.
|
|
26
|
-
image_format (str): Format of the input image, default is 'RGB'.
|
|
27
26
|
image_encoder (ImageEncoderViT): The backbone used to encode the image into embeddings.
|
|
28
27
|
prompt_encoder (PromptEncoder): Encodes various types of input prompts.
|
|
29
28
|
mask_decoder (MaskDecoder): Predicts object masks from the image and prompt embeddings.
|
|
@@ -32,7 +31,6 @@ class Sam(nn.Module):
|
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
33
|
mask_threshold: float = 0.0
|
|
35
|
-
image_format: str = "RGB"
|
|
36
34
|
|
|
37
35
|
def __init__(
|
|
38
36
|
self,
|
|
@@ -43,7 +41,7 @@ class Sam(nn.Module):
|
|
|
43
41
|
pixel_std: List[float] = (58.395, 57.12, 57.375),
|
|
44
42
|
) -> None:
|
|
45
43
|
"""
|
|
46
|
-
Initialize the
|
|
44
|
+
Initialize the SAMModel class to predict object masks from an image and input prompts.
|
|
47
45
|
|
|
48
46
|
Note:
|
|
49
47
|
All forward() operations moved to SAMPredictor.
|
|
@@ -86,7 +86,6 @@ class TwoWayTransformer(nn.Module):
|
|
|
86
86
|
(torch.Tensor): the processed image_embedding
|
|
87
87
|
"""
|
|
88
88
|
# BxCxHxW -> BxHWxC == B x N_image_tokens x C
|
|
89
|
-
bs, c, h, w = image_embedding.shape
|
|
90
89
|
image_embedding = image_embedding.flatten(2).permute(0, 2, 1)
|
|
91
90
|
image_pe = image_pe.flatten(2).permute(0, 2, 1)
|
|
92
91
|
|
|
@@ -212,6 +211,7 @@ class Attention(nn.Module):
|
|
|
212
211
|
embedding_dim: int,
|
|
213
212
|
num_heads: int,
|
|
214
213
|
downsample_rate: int = 1,
|
|
214
|
+
kv_in_dim: int = None,
|
|
215
215
|
) -> None:
|
|
216
216
|
"""
|
|
217
217
|
Initializes the Attention model with the given dimensions and settings.
|
|
@@ -226,13 +226,14 @@ class Attention(nn.Module):
|
|
|
226
226
|
"""
|
|
227
227
|
super().__init__()
|
|
228
228
|
self.embedding_dim = embedding_dim
|
|
229
|
+
self.kv_in_dim = kv_in_dim if kv_in_dim is not None else embedding_dim
|
|
229
230
|
self.internal_dim = embedding_dim // downsample_rate
|
|
230
231
|
self.num_heads = num_heads
|
|
231
232
|
assert self.internal_dim % num_heads == 0, "num_heads must divide embedding_dim."
|
|
232
233
|
|
|
233
234
|
self.q_proj = nn.Linear(embedding_dim, self.internal_dim)
|
|
234
|
-
self.k_proj = nn.Linear(
|
|
235
|
-
self.v_proj = nn.Linear(
|
|
235
|
+
self.k_proj = nn.Linear(self.kv_in_dim, self.internal_dim)
|
|
236
|
+
self.v_proj = nn.Linear(self.kv_in_dim, self.internal_dim)
|
|
236
237
|
self.out_proj = nn.Linear(self.internal_dim, embedding_dim)
|
|
237
238
|
|
|
238
239
|
@staticmethod
|
|
@@ -168,7 +168,7 @@ class Predictor(BasePredictor):
|
|
|
168
168
|
- np.ndarray: An array of length C containing quality scores predicted by the model for each mask.
|
|
169
169
|
- np.ndarray: Low-resolution logits of shape CxHxW for subsequent inference, where H=W=256.
|
|
170
170
|
"""
|
|
171
|
-
features = self.
|
|
171
|
+
features = self.get_im_features(im) if self.features is None else self.features
|
|
172
172
|
|
|
173
173
|
src_shape, dst_shape = self.batch[1][0].shape[:2], im.shape[2:]
|
|
174
174
|
r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1])
|
|
@@ -334,7 +334,7 @@ class Predictor(BasePredictor):
|
|
|
334
334
|
"""
|
|
335
335
|
device = select_device(self.args.device, verbose=verbose)
|
|
336
336
|
if model is None:
|
|
337
|
-
model =
|
|
337
|
+
model = self.get_model()
|
|
338
338
|
model.eval()
|
|
339
339
|
self.model = model.to(device)
|
|
340
340
|
self.device = device
|
|
@@ -348,6 +348,10 @@ class Predictor(BasePredictor):
|
|
|
348
348
|
self.model.fp16 = False
|
|
349
349
|
self.done_warmup = True
|
|
350
350
|
|
|
351
|
+
def get_model(self):
|
|
352
|
+
"""Built Segment Anything Model (SAM) model."""
|
|
353
|
+
return build_sam(self.args.model)
|
|
354
|
+
|
|
351
355
|
def postprocess(self, preds, img, orig_imgs):
|
|
352
356
|
"""
|
|
353
357
|
Post-processes SAM's inference outputs to generate object detection masks and bounding boxes.
|
|
@@ -412,16 +416,18 @@ class Predictor(BasePredictor):
|
|
|
412
416
|
AssertionError: If more than one image is set.
|
|
413
417
|
"""
|
|
414
418
|
if self.model is None:
|
|
415
|
-
|
|
416
|
-
self.setup_model(model)
|
|
419
|
+
self.setup_model(model=None)
|
|
417
420
|
self.setup_source(image)
|
|
418
421
|
assert len(self.dataset) == 1, "`set_image` only supports setting one image!"
|
|
419
422
|
for batch in self.dataset:
|
|
420
423
|
im = self.preprocess(batch[1])
|
|
421
|
-
self.features = self.
|
|
422
|
-
self.im = im
|
|
424
|
+
self.features = self.get_im_features(im)
|
|
423
425
|
break
|
|
424
426
|
|
|
427
|
+
def get_im_features(self, im):
|
|
428
|
+
"""Get image features from the SAM image encoder."""
|
|
429
|
+
return self.model.image_encoder(im)
|
|
430
|
+
|
|
425
431
|
def set_prompts(self, prompts):
|
|
426
432
|
"""Set prompts in advance."""
|
|
427
433
|
self.prompts = prompts
|