vision-agents-plugins-moondream 0.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+
7
+ # Distribution / packaging
8
+ .Python
9
+ build/
10
+ dist/
11
+ downloads/
12
+ develop-eggs/
13
+ eggs/
14
+ .eggs/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ pip-wheel-metadata/
22
+ MANIFEST
23
+ *.egg-info/
24
+ *.egg
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ coverage.xml
38
+ nosetests.xml
39
+ *.cover
40
+ *.py,cover
41
+ .hypothesis/
42
+ .pytest_cache/
43
+
44
+ # Type checker / lint caches
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ dmypy.json
48
+ .pytype/
49
+ .pyre/
50
+ .ruff_cache/
51
+
52
+ # Environments
53
+ .venv
54
+ env/
55
+ venv/
56
+ ENV/
57
+ env.bak/
58
+ venv.bak/
59
+ .env
60
+ .env.local
61
+ .env.*.local
62
+ .env.bak
63
+ pyvenv.cfg
64
+ .python-version
65
+
66
+ # Editors / IDEs
67
+ .vscode/
68
+ .idea/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints/
72
+
73
+ # OS / Misc
74
+ .DS_Store
75
+ *.log
76
+
77
+ # Tooling & repo-specific
78
+ pyrightconfig.json
79
+ shell.nix
80
+ bin/*
81
+ lib/*
82
+ stream-py/
83
+
84
+ # Artifacts / assets
85
+ *.pt
86
+ *.kef
87
+ *.onnx
@@ -0,0 +1,191 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-moondream
3
+ Version: 0.1.13
4
+ Summary: Moondream 3 vision processor plugin for Vision Agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Requires-Python: >=3.10
10
+ Requires-Dist: accelerate>=0.20.0
11
+ Requires-Dist: moondream>=0.1.1
12
+ Requires-Dist: numpy>=2.0.0
13
+ Requires-Dist: opencv-python>=4.8.0
14
+ Requires-Dist: pillow>=10.4.0
15
+ Requires-Dist: torch>=2.0.0
16
+ Requires-Dist: transformers>=4.40.0
17
+ Requires-Dist: vision-agents
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Moondream Plugin
21
+
22
+ This plugin provides Moondream 3 detection capabilities for vision-agents, enabling real-time zero-shot object detection on video streams. Choose between cloud-hosted or local processing depending on your needs.
23
+
24
+ ## Installation
25
+
26
+ ```bash
27
+ uv add vision-agents-plugins-moondream
28
+ ```
29
+
30
+ ## Choosing the Right Processor
31
+
32
+ ### CloudDetectionProcessor (Recommended for Most Users)
33
+ - **Use when:** You want a simple setup with no infrastructure management
34
+ - **Pros:** No model download, no GPU required, automatic updates
35
+ - **Cons:** Requires API key, 2 RPS rate limit by default (can be increased)
36
+ - **Best for:** Development, testing, low-to-medium volume applications
37
+
38
+ ### LocalDetectionProcessor (For Advanced Users)
39
+ - **Use when:** You need higher throughput, have your own GPU infrastructure, or want to avoid rate limits
40
+ - **Pros:** No rate limits, no API costs, full control over hardware
41
+ - **Cons:** Requires GPU for best performance, model download on first use, infrastructure management
42
+ - **Best for:** Production deployments, high-volume applications, Digital Ocean Gradient AI GPUs, or custom infrastructure
43
+
44
+ ## Quick Start
45
+
46
+ ### Using CloudDetectionProcessor (Hosted)
47
+
48
+ The `CloudDetectionProcessor` uses Moondream's hosted API. By default it has a 2 RPS (requests per second) rate limit and requires an API key. The rate limit can be adjusted by contacting the Moondream team to request a higher limit.
49
+
50
+ ```python
51
+ from vision_agents.plugins import moondream
52
+ from vision_agents.core import Agent
53
+
54
+ # Create a cloud processor with detection
55
+ processor = moondream.CloudDetectionProcessor(
56
+ api_key="your-api-key", # or set MOONDREAM_API_KEY env var
57
+ detect_objects="person", # or ["person", "car", "dog"] for multiple
58
+ fps=30
59
+ )
60
+
61
+ # Use in an agent
62
+ agent = Agent(
63
+ processors=[processor],
64
+ llm=your_llm,
65
+ # ... other components
66
+ )
67
+ ```
68
+
69
+ ### Using LocalDetectionProcessor (On-Device)
70
+
71
+ If you are running on your own infrastructure or using a service like Digital Ocean's Gradient AI GPUs, you can use the `LocalDetectionProcessor` which downloads the model from HuggingFace and runs on device. By default it will use CUDA for best performance. Performance will vary depending on your specific hardware configuration.
72
+
73
+ **Note:** The moondream3-preview model is gated and requires HuggingFace authentication:
74
+ - Request access at https://huggingface.co/moondream/moondream3-preview
75
+ - Set `HF_TOKEN` environment variable: `export HF_TOKEN=your_token_here`
76
+ - Or run: `huggingface-cli login`
77
+
78
+ ```python
79
+ from vision_agents.plugins import moondream
80
+ from vision_agents.core import Agent
81
+
82
+ # Create a local processor (no API key needed)
83
+ processor = moondream.LocalDetectionProcessor(
84
+ detect_objects=["person", "car", "dog"],
85
+ conf_threshold=0.3,
86
+ device="cuda", # Auto-detects CUDA, MPS, or CPU
87
+ fps=30
88
+ )
89
+
90
+ # Use in an agent
91
+ agent = Agent(
92
+ processors=[processor],
93
+ llm=your_llm,
94
+ # ... other components
95
+ )
96
+ ```
97
+
98
+ ### Detect Multiple Objects
99
+
100
+ ```python
101
+ # Detect multiple object types with zero-shot detection
102
+ processor = moondream.CloudDetectionProcessor(
103
+ api_key="your-api-key",
104
+ detect_objects=["person", "car", "dog", "basketball"],
105
+ conf_threshold=0.3
106
+ )
107
+
108
+ # Access results for LLM
109
+ state = processor.state()
110
+ print(state["detections_summary"]) # "Detected: 2 persons, 1 car"
111
+ print(state["detections_count"]) # Total number of detections
112
+ print(state["last_image"]) # PIL Image for vision models
113
+ ```
114
+
115
+ ## Configuration
116
+
117
+ ### CloudDetectionProcessor Parameters
118
+
119
+ - `api_key`: str - API key for Moondream Cloud API. If not provided, will attempt to read from `MOONDREAM_API_KEY` environment variable.
120
+ - `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"`
121
+ - `conf_threshold`: float - Confidence threshold for detections (default: 0.3)
122
+ - `fps`: int - Frame processing rate (default: 30)
123
+ - `interval`: int - Processing interval in seconds (default: 0)
124
+ - `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10)
125
+
126
+ **Rate Limits:** By default, the Moondream Cloud API has a 2rps (requests per second) rate limit. Contact the Moondream team to request a higher limit.
127
+
128
+ ### LocalDetectionProcessor Parameters
129
+
130
+ - `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"`
131
+ - `conf_threshold`: float - Confidence threshold for detections (default: 0.3)
132
+ - `fps`: int - Frame processing rate (default: 30)
133
+ - `interval`: int - Processing interval in seconds (default: 0)
134
+ - `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10)
135
+ - `device`: str - Device to run inference on ('cuda', 'mps', or 'cpu'). Auto-detects CUDA, then MPS (Apple Silicon), then defaults to CPU. Default: `None` (auto-detect)
136
+ - `model_name`: str - Hugging Face model identifier (default: "moondream/moondream3-preview")
137
+ - `options`: AgentOptions - Model directory configuration. If not provided, uses default which defaults to tempfile.gettempdir()
138
+
139
+ **Performance:** Performance will vary depending on your hardware configuration. CUDA is recommended for best performance on NVIDIA GPUs. The model will be downloaded from HuggingFace on first use.
140
+
141
+ ## Video Publishing
142
+
143
+ The processor publishes annotated video frames with bounding boxes drawn on detected objects:
144
+
145
+ ```python
146
+ processor = moondream.CloudDetectionProcessor(
147
+ api_key="your-api-key",
148
+ detect_objects=["person", "car"]
149
+ )
150
+
151
+ # The track will show:
152
+ # - Green bounding boxes around detected objects
153
+ # - Labels with confidence scores
154
+ # - Real-time annotation overlay
155
+ ```
156
+
157
+ ## Testing
158
+
159
+ The plugin includes comprehensive tests:
160
+
161
+ ```bash
162
+ # Run all tests
163
+ pytest plugins/moondream/tests/ -v
164
+
165
+ # Run specific test categories
166
+ pytest plugins/moondream/tests/ -k "inference" -v
167
+ pytest plugins/moondream/tests/ -k "annotation" -v
168
+ pytest plugins/moondream/tests/ -k "state" -v
169
+ ```
170
+
171
+ ## Dependencies
172
+
173
+ ### Required
174
+ - `vision-agents` - Core framework
175
+ - `moondream` - Moondream SDK for cloud API (CloudDetectionProcessor only)
176
+ - `numpy>=2.0.0` - Array operations
177
+ - `pillow>=10.0.0` - Image processing
178
+ - `opencv-python>=4.8.0` - Video annotation
179
+ - `aiortc` - WebRTC support
180
+
181
+ ### LocalDetectionProcessor Additional Dependencies
182
+ - `torch` - PyTorch for model inference
183
+ - `transformers` - HuggingFace transformers library for model loading
184
+
185
+ ## Links
186
+
187
+ - [Moondream Documentation](https://docs.moondream.ai/)
188
+ - [Vision Agents Documentation](https://visionagents.ai/)
189
+ - [GitHub Repository](https://github.com/GetStream/Vision-Agents)
190
+
191
+
@@ -0,0 +1,172 @@
1
+ # Moondream Plugin
2
+
3
+ This plugin provides Moondream 3 detection capabilities for vision-agents, enabling real-time zero-shot object detection on video streams. Choose between cloud-hosted or local processing depending on your needs.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ uv add vision-agents-plugins-moondream
9
+ ```
10
+
11
+ ## Choosing the Right Processor
12
+
13
+ ### CloudDetectionProcessor (Recommended for Most Users)
14
+ - **Use when:** You want a simple setup with no infrastructure management
15
+ - **Pros:** No model download, no GPU required, automatic updates
16
+ - **Cons:** Requires API key, 2 RPS rate limit by default (can be increased)
17
+ - **Best for:** Development, testing, low-to-medium volume applications
18
+
19
+ ### LocalDetectionProcessor (For Advanced Users)
20
+ - **Use when:** You need higher throughput, have your own GPU infrastructure, or want to avoid rate limits
21
+ - **Pros:** No rate limits, no API costs, full control over hardware
22
+ - **Cons:** Requires GPU for best performance, model download on first use, infrastructure management
23
+ - **Best for:** Production deployments, high-volume applications, Digital Ocean Gradient AI GPUs, or custom infrastructure
24
+
25
+ ## Quick Start
26
+
27
+ ### Using CloudDetectionProcessor (Hosted)
28
+
29
+ The `CloudDetectionProcessor` uses Moondream's hosted API. By default it has a 2 RPS (requests per second) rate limit and requires an API key. The rate limit can be adjusted by contacting the Moondream team to request a higher limit.
30
+
31
+ ```python
32
+ from vision_agents.plugins import moondream
33
+ from vision_agents.core import Agent
34
+
35
+ # Create a cloud processor with detection
36
+ processor = moondream.CloudDetectionProcessor(
37
+ api_key="your-api-key", # or set MOONDREAM_API_KEY env var
38
+ detect_objects="person", # or ["person", "car", "dog"] for multiple
39
+ fps=30
40
+ )
41
+
42
+ # Use in an agent
43
+ agent = Agent(
44
+ processors=[processor],
45
+ llm=your_llm,
46
+ # ... other components
47
+ )
48
+ ```
49
+
50
+ ### Using LocalDetectionProcessor (On-Device)
51
+
52
+ If you are running on your own infrastructure or using a service like Digital Ocean's Gradient AI GPUs, you can use the `LocalDetectionProcessor` which downloads the model from HuggingFace and runs on device. By default it will use CUDA for best performance. Performance will vary depending on your specific hardware configuration.
53
+
54
+ **Note:** The moondream3-preview model is gated and requires HuggingFace authentication:
55
+ - Request access at https://huggingface.co/moondream/moondream3-preview
56
+ - Set `HF_TOKEN` environment variable: `export HF_TOKEN=your_token_here`
57
+ - Or run: `huggingface-cli login`
58
+
59
+ ```python
60
+ from vision_agents.plugins import moondream
61
+ from vision_agents.core import Agent
62
+
63
+ # Create a local processor (no API key needed)
64
+ processor = moondream.LocalDetectionProcessor(
65
+ detect_objects=["person", "car", "dog"],
66
+ conf_threshold=0.3,
67
+ device="cuda", # Auto-detects CUDA, MPS, or CPU
68
+ fps=30
69
+ )
70
+
71
+ # Use in an agent
72
+ agent = Agent(
73
+ processors=[processor],
74
+ llm=your_llm,
75
+ # ... other components
76
+ )
77
+ ```
78
+
79
+ ### Detect Multiple Objects
80
+
81
+ ```python
82
+ # Detect multiple object types with zero-shot detection
83
+ processor = moondream.CloudDetectionProcessor(
84
+ api_key="your-api-key",
85
+ detect_objects=["person", "car", "dog", "basketball"],
86
+ conf_threshold=0.3
87
+ )
88
+
89
+ # Access results for LLM
90
+ state = processor.state()
91
+ print(state["detections_summary"]) # "Detected: 2 persons, 1 car"
92
+ print(state["detections_count"]) # Total number of detections
93
+ print(state["last_image"]) # PIL Image for vision models
94
+ ```
95
+
96
+ ## Configuration
97
+
98
+ ### CloudDetectionProcessor Parameters
99
+
100
+ - `api_key`: str - API key for Moondream Cloud API. If not provided, will attempt to read from `MOONDREAM_API_KEY` environment variable.
101
+ - `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"`
102
+ - `conf_threshold`: float - Confidence threshold for detections (default: 0.3)
103
+ - `fps`: int - Frame processing rate (default: 30)
104
+ - `interval`: int - Processing interval in seconds (default: 0)
105
+ - `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10)
106
+
107
+ **Rate Limits:** By default, the Moondream Cloud API has a 2rps (requests per second) rate limit. Contact the Moondream team to request a higher limit.
108
+
109
+ ### LocalDetectionProcessor Parameters
110
+
111
+ - `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"`
112
+ - `conf_threshold`: float - Confidence threshold for detections (default: 0.3)
113
+ - `fps`: int - Frame processing rate (default: 30)
114
+ - `interval`: int - Processing interval in seconds (default: 0)
115
+ - `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10)
116
+ - `device`: str - Device to run inference on ('cuda', 'mps', or 'cpu'). Auto-detects CUDA, then MPS (Apple Silicon), then defaults to CPU. Default: `None` (auto-detect)
117
+ - `model_name`: str - Hugging Face model identifier (default: "moondream/moondream3-preview")
118
+ - `options`: AgentOptions - Model directory configuration. If not provided, uses default which defaults to tempfile.gettempdir()
119
+
120
+ **Performance:** Performance will vary depending on your hardware configuration. CUDA is recommended for best performance on NVIDIA GPUs. The model will be downloaded from HuggingFace on first use.
121
+
122
+ ## Video Publishing
123
+
124
+ The processor publishes annotated video frames with bounding boxes drawn on detected objects:
125
+
126
+ ```python
127
+ processor = moondream.CloudDetectionProcessor(
128
+ api_key="your-api-key",
129
+ detect_objects=["person", "car"]
130
+ )
131
+
132
+ # The track will show:
133
+ # - Green bounding boxes around detected objects
134
+ # - Labels with confidence scores
135
+ # - Real-time annotation overlay
136
+ ```
137
+
138
+ ## Testing
139
+
140
+ The plugin includes comprehensive tests:
141
+
142
+ ```bash
143
+ # Run all tests
144
+ pytest plugins/moondream/tests/ -v
145
+
146
+ # Run specific test categories
147
+ pytest plugins/moondream/tests/ -k "inference" -v
148
+ pytest plugins/moondream/tests/ -k "annotation" -v
149
+ pytest plugins/moondream/tests/ -k "state" -v
150
+ ```
151
+
152
+ ## Dependencies
153
+
154
+ ### Required
155
+ - `vision-agents` - Core framework
156
+ - `moondream` - Moondream SDK for cloud API (CloudDetectionProcessor only)
157
+ - `numpy>=2.0.0` - Array operations
158
+ - `pillow>=10.0.0` - Image processing
159
+ - `opencv-python>=4.8.0` - Video annotation
160
+ - `aiortc` - WebRTC support
161
+
162
+ ### LocalDetectionProcessor Additional Dependencies
163
+ - `torch` - PyTorch for model inference
164
+ - `transformers` - HuggingFace transformers library for model loading
165
+
166
+ ## Links
167
+
168
+ - [Moondream Documentation](https://docs.moondream.ai/)
169
+ - [Vision Agents Documentation](https://visionagents.ai/)
170
+ - [GitHub Repository](https://github.com/GetStream/Vision-Agents)
171
+
172
+
File without changes
@@ -0,0 +1,43 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-moondream"
7
+ dynamic = ["version"]
8
+ description = "Moondream 3 vision processor plugin for Vision Agents"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ dependencies = [
13
+ "vision-agents",
14
+ "numpy>=2.0.0",
15
+ "pillow>=10.4.0",
16
+ "opencv-python>=4.8.0",
17
+ "moondream>=0.1.1", # Now compatible with vision-agents pillow>=10.4.0
18
+ "transformers>=4.40.0", # For local model loading
19
+ "torch>=2.0.0", # PyTorch for model inference
20
+ "accelerate>=0.20.0", # Required for device_map and device management
21
+ ]
22
+
23
+ [project.urls]
24
+ Documentation = "https://visionagents.ai/"
25
+ Website = "https://visionagents.ai/"
26
+ Source = "https://github.com/GetStream/Vision-Agents"
27
+
28
+ [tool.hatch.version]
29
+ source = "vcs"
30
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
31
+
32
+ [tool.hatch.build.targets.wheel]
33
+ packages = [".", "vision_agents"]
34
+
35
+ [tool.uv.sources]
36
+ vision-agents = { workspace = true }
37
+
38
+ [dependency-groups]
39
+ dev = [
40
+ "pytest>=8.4.1",
41
+ "pytest-asyncio>=1.0.0",
42
+ ]
43
+