vision-agents-plugins-moondream 0.1.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agents_plugins_moondream-0.1.13/.gitignore +87 -0
- vision_agents_plugins_moondream-0.1.13/PKG-INFO +191 -0
- vision_agents_plugins_moondream-0.1.13/README.md +172 -0
- vision_agents_plugins_moondream-0.1.13/py.typed +0 -0
- vision_agents_plugins_moondream-0.1.13/pyproject.toml +43 -0
- vision_agents_plugins_moondream-0.1.13/tests/test_moondream.py +466 -0
- vision_agents_plugins_moondream-0.1.13/tests/test_moondream_local.py +290 -0
- vision_agents_plugins_moondream-0.1.13/vision_agents/plugins/moondream/__init__.py +25 -0
- vision_agents_plugins_moondream-0.1.13/vision_agents/plugins/moondream/moondream_cloud_processor.py +243 -0
- vision_agents_plugins_moondream-0.1.13/vision_agents/plugins/moondream/moondream_local_processor.py +360 -0
- vision_agents_plugins_moondream-0.1.13/vision_agents/plugins/moondream/moondream_utils.py +104 -0
- vision_agents_plugins_moondream-0.1.13/vision_agents/plugins/moondream/moondream_video_track.py +76 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
|
|
7
|
+
# Distribution / packaging
|
|
8
|
+
.Python
|
|
9
|
+
build/
|
|
10
|
+
dist/
|
|
11
|
+
downloads/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
eggs/
|
|
14
|
+
.eggs/
|
|
15
|
+
lib64/
|
|
16
|
+
parts/
|
|
17
|
+
sdist/
|
|
18
|
+
var/
|
|
19
|
+
wheels/
|
|
20
|
+
share/python-wheels/
|
|
21
|
+
pip-wheel-metadata/
|
|
22
|
+
MANIFEST
|
|
23
|
+
*.egg-info/
|
|
24
|
+
*.egg
|
|
25
|
+
|
|
26
|
+
# Installer logs
|
|
27
|
+
pip-log.txt
|
|
28
|
+
pip-delete-this-directory.txt
|
|
29
|
+
|
|
30
|
+
# Unit test / coverage reports
|
|
31
|
+
htmlcov/
|
|
32
|
+
.tox/
|
|
33
|
+
.nox/
|
|
34
|
+
.coverage
|
|
35
|
+
.coverage.*
|
|
36
|
+
.cache
|
|
37
|
+
coverage.xml
|
|
38
|
+
nosetests.xml
|
|
39
|
+
*.cover
|
|
40
|
+
*.py,cover
|
|
41
|
+
.hypothesis/
|
|
42
|
+
.pytest_cache/
|
|
43
|
+
|
|
44
|
+
# Type checker / lint caches
|
|
45
|
+
.mypy_cache/
|
|
46
|
+
.dmypy.json
|
|
47
|
+
dmypy.json
|
|
48
|
+
.pytype/
|
|
49
|
+
.pyre/
|
|
50
|
+
.ruff_cache/
|
|
51
|
+
|
|
52
|
+
# Environments
|
|
53
|
+
.venv
|
|
54
|
+
env/
|
|
55
|
+
venv/
|
|
56
|
+
ENV/
|
|
57
|
+
env.bak/
|
|
58
|
+
venv.bak/
|
|
59
|
+
.env
|
|
60
|
+
.env.local
|
|
61
|
+
.env.*.local
|
|
62
|
+
.env.bak
|
|
63
|
+
pyvenv.cfg
|
|
64
|
+
.python-version
|
|
65
|
+
|
|
66
|
+
# Editors / IDEs
|
|
67
|
+
.vscode/
|
|
68
|
+
.idea/
|
|
69
|
+
|
|
70
|
+
# Jupyter Notebook
|
|
71
|
+
.ipynb_checkpoints/
|
|
72
|
+
|
|
73
|
+
# OS / Misc
|
|
74
|
+
.DS_Store
|
|
75
|
+
*.log
|
|
76
|
+
|
|
77
|
+
# Tooling & repo-specific
|
|
78
|
+
pyrightconfig.json
|
|
79
|
+
shell.nix
|
|
80
|
+
bin/*
|
|
81
|
+
lib/*
|
|
82
|
+
stream-py/
|
|
83
|
+
|
|
84
|
+
# Artifacts / assets
|
|
85
|
+
*.pt
|
|
86
|
+
*.kef
|
|
87
|
+
*.onnx
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vision-agents-plugins-moondream
|
|
3
|
+
Version: 0.1.13
|
|
4
|
+
Summary: Moondream 3 vision processor plugin for Vision Agents
|
|
5
|
+
Project-URL: Documentation, https://visionagents.ai/
|
|
6
|
+
Project-URL: Website, https://visionagents.ai/
|
|
7
|
+
Project-URL: Source, https://github.com/GetStream/Vision-Agents
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Requires-Dist: accelerate>=0.20.0
|
|
11
|
+
Requires-Dist: moondream>=0.1.1
|
|
12
|
+
Requires-Dist: numpy>=2.0.0
|
|
13
|
+
Requires-Dist: opencv-python>=4.8.0
|
|
14
|
+
Requires-Dist: pillow>=10.4.0
|
|
15
|
+
Requires-Dist: torch>=2.0.0
|
|
16
|
+
Requires-Dist: transformers>=4.40.0
|
|
17
|
+
Requires-Dist: vision-agents
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Moondream Plugin
|
|
21
|
+
|
|
22
|
+
This plugin provides Moondream 3 detection capabilities for vision-agents, enabling real-time zero-shot object detection on video streams. Choose between cloud-hosted or local processing depending on your needs.
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
uv add vision-agents-plugins-moondream
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Choosing the Right Processor
|
|
31
|
+
|
|
32
|
+
### CloudDetectionProcessor (Recommended for Most Users)
|
|
33
|
+
- **Use when:** You want a simple setup with no infrastructure management
|
|
34
|
+
- **Pros:** No model download, no GPU required, automatic updates
|
|
35
|
+
- **Cons:** Requires API key, 2 RPS rate limit by default (can be increased)
|
|
36
|
+
- **Best for:** Development, testing, low-to-medium volume applications
|
|
37
|
+
|
|
38
|
+
### LocalDetectionProcessor (For Advanced Users)
|
|
39
|
+
- **Use when:** You need higher throughput, have your own GPU infrastructure, or want to avoid rate limits
|
|
40
|
+
- **Pros:** No rate limits, no API costs, full control over hardware
|
|
41
|
+
- **Cons:** Requires GPU for best performance, model download on first use, infrastructure management
|
|
42
|
+
- **Best for:** Production deployments, high-volume applications, Digital Ocean Gradient AI GPUs, or custom infrastructure
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
### Using CloudDetectionProcessor (Hosted)
|
|
47
|
+
|
|
48
|
+
The `CloudDetectionProcessor` uses Moondream's hosted API. By default it has a 2 RPS (requests per second) rate limit and requires an API key. The rate limit can be adjusted by contacting the Moondream team to request a higher limit.
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from vision_agents.plugins import moondream
|
|
52
|
+
from vision_agents.core import Agent
|
|
53
|
+
|
|
54
|
+
# Create a cloud processor with detection
|
|
55
|
+
processor = moondream.CloudDetectionProcessor(
|
|
56
|
+
api_key="your-api-key", # or set MOONDREAM_API_KEY env var
|
|
57
|
+
detect_objects="person", # or ["person", "car", "dog"] for multiple
|
|
58
|
+
fps=30
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Use in an agent
|
|
62
|
+
agent = Agent(
|
|
63
|
+
processors=[processor],
|
|
64
|
+
llm=your_llm,
|
|
65
|
+
# ... other components
|
|
66
|
+
)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Using LocalDetectionProcessor (On-Device)
|
|
70
|
+
|
|
71
|
+
If you are running on your own infrastructure or using a service like Digital Ocean's Gradient AI GPUs, you can use the `LocalDetectionProcessor` which downloads the model from HuggingFace and runs on device. By default it will use CUDA for best performance. Performance will vary depending on your specific hardware configuration.
|
|
72
|
+
|
|
73
|
+
**Note:** The moondream3-preview model is gated and requires HuggingFace authentication:
|
|
74
|
+
- Request access at https://huggingface.co/moondream/moondream3-preview
|
|
75
|
+
- Set `HF_TOKEN` environment variable: `export HF_TOKEN=your_token_here`
|
|
76
|
+
- Or run: `huggingface-cli login`
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from vision_agents.plugins import moondream
|
|
80
|
+
from vision_agents.core import Agent
|
|
81
|
+
|
|
82
|
+
# Create a local processor (no API key needed)
|
|
83
|
+
processor = moondream.LocalDetectionProcessor(
|
|
84
|
+
detect_objects=["person", "car", "dog"],
|
|
85
|
+
conf_threshold=0.3,
|
|
86
|
+
device="cuda", # Auto-detects CUDA, MPS, or CPU
|
|
87
|
+
fps=30
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Use in an agent
|
|
91
|
+
agent = Agent(
|
|
92
|
+
processors=[processor],
|
|
93
|
+
llm=your_llm,
|
|
94
|
+
# ... other components
|
|
95
|
+
)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Detect Multiple Objects
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# Detect multiple object types with zero-shot detection
|
|
102
|
+
processor = moondream.CloudDetectionProcessor(
|
|
103
|
+
api_key="your-api-key",
|
|
104
|
+
detect_objects=["person", "car", "dog", "basketball"],
|
|
105
|
+
conf_threshold=0.3
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Access results for LLM
|
|
109
|
+
state = processor.state()
|
|
110
|
+
print(state["detections_summary"]) # "Detected: 2 persons, 1 car"
|
|
111
|
+
print(state["detections_count"]) # Total number of detections
|
|
112
|
+
print(state["last_image"]) # PIL Image for vision models
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Configuration
|
|
116
|
+
|
|
117
|
+
### CloudDetectionProcessor Parameters
|
|
118
|
+
|
|
119
|
+
- `api_key`: str - API key for Moondream Cloud API. If not provided, will attempt to read from `MOONDREAM_API_KEY` environment variable.
|
|
120
|
+
- `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"`
|
|
121
|
+
- `conf_threshold`: float - Confidence threshold for detections (default: 0.3)
|
|
122
|
+
- `fps`: int - Frame processing rate (default: 30)
|
|
123
|
+
- `interval`: int - Processing interval in seconds (default: 0)
|
|
124
|
+
- `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10)
|
|
125
|
+
|
|
126
|
+
**Rate Limits:** By default, the Moondream Cloud API has a 2rps (requests per second) rate limit. Contact the Moondream team to request a higher limit.
|
|
127
|
+
|
|
128
|
+
### LocalDetectionProcessor Parameters
|
|
129
|
+
|
|
130
|
+
- `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"`
|
|
131
|
+
- `conf_threshold`: float - Confidence threshold for detections (default: 0.3)
|
|
132
|
+
- `fps`: int - Frame processing rate (default: 30)
|
|
133
|
+
- `interval`: int - Processing interval in seconds (default: 0)
|
|
134
|
+
- `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10)
|
|
135
|
+
- `device`: str - Device to run inference on ('cuda', 'mps', or 'cpu'). Auto-detects CUDA, then MPS (Apple Silicon), then defaults to CPU. Default: `None` (auto-detect)
|
|
136
|
+
- `model_name`: str - Hugging Face model identifier (default: "moondream/moondream3-preview")
|
|
137
|
+
- `options`: AgentOptions - Model directory configuration. If not provided, uses default which defaults to tempfile.gettempdir()
|
|
138
|
+
|
|
139
|
+
**Performance:** Performance will vary depending on your hardware configuration. CUDA is recommended for best performance on NVIDIA GPUs. The model will be downloaded from HuggingFace on first use.
|
|
140
|
+
|
|
141
|
+
## Video Publishing
|
|
142
|
+
|
|
143
|
+
The processor publishes annotated video frames with bounding boxes drawn on detected objects:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
processor = moondream.CloudDetectionProcessor(
|
|
147
|
+
api_key="your-api-key",
|
|
148
|
+
detect_objects=["person", "car"]
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# The track will show:
|
|
152
|
+
# - Green bounding boxes around detected objects
|
|
153
|
+
# - Labels with confidence scores
|
|
154
|
+
# - Real-time annotation overlay
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Testing
|
|
158
|
+
|
|
159
|
+
The plugin includes comprehensive tests:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
# Run all tests
|
|
163
|
+
pytest plugins/moondream/tests/ -v
|
|
164
|
+
|
|
165
|
+
# Run specific test categories
|
|
166
|
+
pytest plugins/moondream/tests/ -k "inference" -v
|
|
167
|
+
pytest plugins/moondream/tests/ -k "annotation" -v
|
|
168
|
+
pytest plugins/moondream/tests/ -k "state" -v
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Dependencies
|
|
172
|
+
|
|
173
|
+
### Required
|
|
174
|
+
- `vision-agents` - Core framework
|
|
175
|
+
- `moondream` - Moondream SDK for cloud API (CloudDetectionProcessor only)
|
|
176
|
+
- `numpy>=2.0.0` - Array operations
|
|
177
|
+
- `pillow>=10.0.0` - Image processing
|
|
178
|
+
- `opencv-python>=4.8.0` - Video annotation
|
|
179
|
+
- `aiortc` - WebRTC support
|
|
180
|
+
|
|
181
|
+
### LocalDetectionProcessor Additional Dependencies
|
|
182
|
+
- `torch` - PyTorch for model inference
|
|
183
|
+
- `transformers` - HuggingFace transformers library for model loading
|
|
184
|
+
|
|
185
|
+
## Links
|
|
186
|
+
|
|
187
|
+
- [Moondream Documentation](https://docs.moondream.ai/)
|
|
188
|
+
- [Vision Agents Documentation](https://visionagents.ai/)
|
|
189
|
+
- [GitHub Repository](https://github.com/GetStream/Vision-Agents)
|
|
190
|
+
|
|
191
|
+
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Moondream Plugin
|
|
2
|
+
|
|
3
|
+
This plugin provides Moondream 3 detection capabilities for vision-agents, enabling real-time zero-shot object detection on video streams. Choose between cloud-hosted or local processing depending on your needs.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
uv add vision-agents-plugins-moondream
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Choosing the Right Processor
|
|
12
|
+
|
|
13
|
+
### CloudDetectionProcessor (Recommended for Most Users)
|
|
14
|
+
- **Use when:** You want a simple setup with no infrastructure management
|
|
15
|
+
- **Pros:** No model download, no GPU required, automatic updates
|
|
16
|
+
- **Cons:** Requires API key, 2 RPS rate limit by default (can be increased)
|
|
17
|
+
- **Best for:** Development, testing, low-to-medium volume applications
|
|
18
|
+
|
|
19
|
+
### LocalDetectionProcessor (For Advanced Users)
|
|
20
|
+
- **Use when:** You need higher throughput, have your own GPU infrastructure, or want to avoid rate limits
|
|
21
|
+
- **Pros:** No rate limits, no API costs, full control over hardware
|
|
22
|
+
- **Cons:** Requires GPU for best performance, model download on first use, infrastructure management
|
|
23
|
+
- **Best for:** Production deployments, high-volume applications, Digital Ocean Gradient AI GPUs, or custom infrastructure
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
### Using CloudDetectionProcessor (Hosted)
|
|
28
|
+
|
|
29
|
+
The `CloudDetectionProcessor` uses Moondream's hosted API. By default it has a 2 RPS (requests per second) rate limit and requires an API key. The rate limit can be adjusted by contacting the Moondream team to request a higher limit.
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from vision_agents.plugins import moondream
|
|
33
|
+
from vision_agents.core import Agent
|
|
34
|
+
|
|
35
|
+
# Create a cloud processor with detection
|
|
36
|
+
processor = moondream.CloudDetectionProcessor(
|
|
37
|
+
api_key="your-api-key", # or set MOONDREAM_API_KEY env var
|
|
38
|
+
detect_objects="person", # or ["person", "car", "dog"] for multiple
|
|
39
|
+
fps=30
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Use in an agent
|
|
43
|
+
agent = Agent(
|
|
44
|
+
processors=[processor],
|
|
45
|
+
llm=your_llm,
|
|
46
|
+
# ... other components
|
|
47
|
+
)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Using LocalDetectionProcessor (On-Device)
|
|
51
|
+
|
|
52
|
+
If you are running on your own infrastructure or using a service like Digital Ocean's Gradient AI GPUs, you can use the `LocalDetectionProcessor` which downloads the model from HuggingFace and runs on device. By default it will use CUDA for best performance. Performance will vary depending on your specific hardware configuration.
|
|
53
|
+
|
|
54
|
+
**Note:** The moondream3-preview model is gated and requires HuggingFace authentication:
|
|
55
|
+
- Request access at https://huggingface.co/moondream/moondream3-preview
|
|
56
|
+
- Set `HF_TOKEN` environment variable: `export HF_TOKEN=your_token_here`
|
|
57
|
+
- Or run: `huggingface-cli login`
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from vision_agents.plugins import moondream
|
|
61
|
+
from vision_agents.core import Agent
|
|
62
|
+
|
|
63
|
+
# Create a local processor (no API key needed)
|
|
64
|
+
processor = moondream.LocalDetectionProcessor(
|
|
65
|
+
detect_objects=["person", "car", "dog"],
|
|
66
|
+
conf_threshold=0.3,
|
|
67
|
+
device="cuda", # Auto-detects CUDA, MPS, or CPU
|
|
68
|
+
fps=30
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Use in an agent
|
|
72
|
+
agent = Agent(
|
|
73
|
+
processors=[processor],
|
|
74
|
+
llm=your_llm,
|
|
75
|
+
# ... other components
|
|
76
|
+
)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Detect Multiple Objects
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
# Detect multiple object types with zero-shot detection
|
|
83
|
+
processor = moondream.CloudDetectionProcessor(
|
|
84
|
+
api_key="your-api-key",
|
|
85
|
+
detect_objects=["person", "car", "dog", "basketball"],
|
|
86
|
+
conf_threshold=0.3
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Access results for LLM
|
|
90
|
+
state = processor.state()
|
|
91
|
+
print(state["detections_summary"]) # "Detected: 2 persons, 1 car"
|
|
92
|
+
print(state["detections_count"]) # Total number of detections
|
|
93
|
+
print(state["last_image"]) # PIL Image for vision models
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Configuration
|
|
97
|
+
|
|
98
|
+
### CloudDetectionProcessor Parameters
|
|
99
|
+
|
|
100
|
+
- `api_key`: str - API key for Moondream Cloud API. If not provided, will attempt to read from `MOONDREAM_API_KEY` environment variable.
|
|
101
|
+
- `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"`
|
|
102
|
+
- `conf_threshold`: float - Confidence threshold for detections (default: 0.3)
|
|
103
|
+
- `fps`: int - Frame processing rate (default: 30)
|
|
104
|
+
- `interval`: int - Processing interval in seconds (default: 0)
|
|
105
|
+
- `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10)
|
|
106
|
+
|
|
107
|
+
**Rate Limits:** By default, the Moondream Cloud API has a 2rps (requests per second) rate limit. Contact the Moondream team to request a higher limit.
|
|
108
|
+
|
|
109
|
+
### LocalDetectionProcessor Parameters
|
|
110
|
+
|
|
111
|
+
- `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"`
|
|
112
|
+
- `conf_threshold`: float - Confidence threshold for detections (default: 0.3)
|
|
113
|
+
- `fps`: int - Frame processing rate (default: 30)
|
|
114
|
+
- `interval`: int - Processing interval in seconds (default: 0)
|
|
115
|
+
- `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10)
|
|
116
|
+
- `device`: str - Device to run inference on ('cuda', 'mps', or 'cpu'). Auto-detects CUDA, then MPS (Apple Silicon), then defaults to CPU. Default: `None` (auto-detect)
|
|
117
|
+
- `model_name`: str - Hugging Face model identifier (default: "moondream/moondream3-preview")
|
|
118
|
+
- `options`: AgentOptions - Model directory configuration. If not provided, uses default which defaults to tempfile.gettempdir()
|
|
119
|
+
|
|
120
|
+
**Performance:** Performance will vary depending on your hardware configuration. CUDA is recommended for best performance on NVIDIA GPUs. The model will be downloaded from HuggingFace on first use.
|
|
121
|
+
|
|
122
|
+
## Video Publishing
|
|
123
|
+
|
|
124
|
+
The processor publishes annotated video frames with bounding boxes drawn on detected objects:
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
processor = moondream.CloudDetectionProcessor(
|
|
128
|
+
api_key="your-api-key",
|
|
129
|
+
detect_objects=["person", "car"]
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# The track will show:
|
|
133
|
+
# - Green bounding boxes around detected objects
|
|
134
|
+
# - Labels with confidence scores
|
|
135
|
+
# - Real-time annotation overlay
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Testing
|
|
139
|
+
|
|
140
|
+
The plugin includes comprehensive tests:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# Run all tests
|
|
144
|
+
pytest plugins/moondream/tests/ -v
|
|
145
|
+
|
|
146
|
+
# Run specific test categories
|
|
147
|
+
pytest plugins/moondream/tests/ -k "inference" -v
|
|
148
|
+
pytest plugins/moondream/tests/ -k "annotation" -v
|
|
149
|
+
pytest plugins/moondream/tests/ -k "state" -v
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Dependencies
|
|
153
|
+
|
|
154
|
+
### Required
|
|
155
|
+
- `vision-agents` - Core framework
|
|
156
|
+
- `moondream` - Moondream SDK for cloud API (CloudDetectionProcessor only)
|
|
157
|
+
- `numpy>=2.0.0` - Array operations
|
|
158
|
+
- `pillow>=10.0.0` - Image processing
|
|
159
|
+
- `opencv-python>=4.8.0` - Video annotation
|
|
160
|
+
- `aiortc` - WebRTC support
|
|
161
|
+
|
|
162
|
+
### LocalDetectionProcessor Additional Dependencies
|
|
163
|
+
- `torch` - PyTorch for model inference
|
|
164
|
+
- `transformers` - HuggingFace transformers library for model loading
|
|
165
|
+
|
|
166
|
+
## Links
|
|
167
|
+
|
|
168
|
+
- [Moondream Documentation](https://docs.moondream.ai/)
|
|
169
|
+
- [Vision Agents Documentation](https://visionagents.ai/)
|
|
170
|
+
- [GitHub Repository](https://github.com/GetStream/Vision-Agents)
|
|
171
|
+
|
|
172
|
+
|
|
File without changes
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling", "hatch-vcs"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "vision-agents-plugins-moondream"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Moondream 3 vision processor plugin for Vision Agents"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"vision-agents",
|
|
14
|
+
"numpy>=2.0.0",
|
|
15
|
+
"pillow>=10.4.0",
|
|
16
|
+
"opencv-python>=4.8.0",
|
|
17
|
+
"moondream>=0.1.1", # Now compatible with vision-agents pillow>=10.4.0
|
|
18
|
+
"transformers>=4.40.0", # For local model loading
|
|
19
|
+
"torch>=2.0.0", # PyTorch for model inference
|
|
20
|
+
"accelerate>=0.20.0", # Required for device_map and device management
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.urls]
|
|
24
|
+
Documentation = "https://visionagents.ai/"
|
|
25
|
+
Website = "https://visionagents.ai/"
|
|
26
|
+
Source = "https://github.com/GetStream/Vision-Agents"
|
|
27
|
+
|
|
28
|
+
[tool.hatch.version]
|
|
29
|
+
source = "vcs"
|
|
30
|
+
raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
|
|
31
|
+
|
|
32
|
+
[tool.hatch.build.targets.wheel]
|
|
33
|
+
packages = [".", "vision_agents"]
|
|
34
|
+
|
|
35
|
+
[tool.uv.sources]
|
|
36
|
+
vision-agents = { workspace = true }
|
|
37
|
+
|
|
38
|
+
[dependency-groups]
|
|
39
|
+
dev = [
|
|
40
|
+
"pytest>=8.4.1",
|
|
41
|
+
"pytest-asyncio>=1.0.0",
|
|
42
|
+
]
|
|
43
|
+
|