visionserve 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- visionserve-0.1.0/PKG-INFO +154 -0
- visionserve-0.1.0/README.md +122 -0
- visionserve-0.1.0/pyproject.toml +46 -0
- visionserve-0.1.0/setup.cfg +4 -0
- visionserve-0.1.0/tests/test_client.py +304 -0
- visionserve-0.1.0/visionserve.egg-info/PKG-INFO +154 -0
- visionserve-0.1.0/visionserve.egg-info/SOURCES.txt +8 -0
- visionserve-0.1.0/visionserve.egg-info/dependency_links.txt +1 -0
- visionserve-0.1.0/visionserve.egg-info/requires.txt +9 -0
- visionserve-0.1.0/visionserve.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: visionserve
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python client SDK for the VisionServe HTTP server (talks to the Go runtime over HTTP).
|
|
5
|
+
Author: VisionServe
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/visionserve/visionserve
|
|
8
|
+
Project-URL: Repository, https://github.com/visionserve/visionserve
|
|
9
|
+
Project-URL: Issues, https://github.com/visionserve/visionserve/issues
|
|
10
|
+
Keywords: visionserve,computer-vision,onnx,rf-detr,sam,grounding-dino,client
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
Provides-Extra: images
|
|
26
|
+
Requires-Dist: pillow>=8.0; extra == "images"
|
|
27
|
+
Requires-Dist: numpy>=1.20; extra == "images"
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pillow>=8.0; extra == "dev"
|
|
30
|
+
Requires-Dist: numpy>=1.20; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
32
|
+
|
|
33
|
+
# VisionServe Python Client
|
|
34
|
+
|
|
35
|
+
A lightweight Python **client** SDK for the [VisionServe](../../) HTTP server. It talks
|
|
36
|
+
to the Go runtime over HTTP (default `http://localhost:11435`) — it is **not** the
|
|
37
|
+
inference runtime and pulls no inference engine into Python. Think of it like Ollama's
|
|
38
|
+
Python client.
|
|
39
|
+
|
|
40
|
+
The transport uses only the Python **standard library** (`urllib`), so the client has
|
|
41
|
+
no required third-party dependencies. `numpy` and `pillow` are **optional** and only
|
|
42
|
+
needed for:
|
|
43
|
+
- passing `numpy.ndarray` / `PIL.Image` images to `predict()`, and
|
|
44
|
+
- decoding masks with `Mask.to_ndarray()`.
|
|
45
|
+
|
|
46
|
+
## Install
|
|
47
|
+
|
|
48
|
+
From the repository root:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install -e clients/python
|
|
52
|
+
# optional extras for ndarray/PIL image inputs and mask decoding:
|
|
53
|
+
pip install -e 'clients/python[images]'
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Run the server first
|
|
57
|
+
|
|
58
|
+
The client needs a running VisionServe server (which in turn needs the ONNX Runtime
|
|
59
|
+
shared library at runtime). From the repo root:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
make serve # starts the Go server on :11435
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Quickstart
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from visionserve import Client
|
|
69
|
+
|
|
70
|
+
c = Client() # http://localhost:11435, timeout=120s
|
|
71
|
+
print(c.health()) # {"status": "ok"}
|
|
72
|
+
|
|
73
|
+
for m in c.list_models():
|
|
74
|
+
print(m.name, m.task, m.license, m.state)
|
|
75
|
+
|
|
76
|
+
c.load("rf-detr")
|
|
77
|
+
res = c.predict("rf-detr", "cat.jpg")
|
|
78
|
+
print(res.task, res.duration_ms)
|
|
79
|
+
for d in res.detections:
|
|
80
|
+
print(d.cls, d.conf, d.bbox) # bbox = [x, y, w, h] in ORIGINAL image pixels
|
|
81
|
+
|
|
82
|
+
print([m.name for m in c.ps()]) # currently loaded models
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Public API
|
|
86
|
+
|
|
87
|
+
### `Client(host="http://localhost:11435", timeout=120)`
|
|
88
|
+
|
|
89
|
+
| Method | HTTP | Returns |
|
|
90
|
+
| --- | --- | --- |
|
|
91
|
+
| `health()` | `GET /api/health` | `{"status": "ok"}` |
|
|
92
|
+
| `list_models()` | `GET /api/models` | `list[ModelInfo]` |
|
|
93
|
+
| `load(model)` | `POST /api/load` | `{"model", "state"}` |
|
|
94
|
+
| `unload(model)` | `POST /api/unload` | `{"model", "state"}` |
|
|
95
|
+
| `ps()` | `GET /api/models` (filtered) | loaded `list[ModelInfo]` |
|
|
96
|
+
| `predict(model, image, *, prompt=None, box=None, point=None)` | `POST /api/predict` | `Result` |
|
|
97
|
+
|
|
98
|
+
`predict()` `image` accepts:
|
|
99
|
+
- `str` / `os.PathLike` — a path to an image file,
|
|
100
|
+
- `bytes` — already-encoded image (PNG/JPEG), sent verbatim,
|
|
101
|
+
- `PIL.Image.Image` — encoded to PNG client-side,
|
|
102
|
+
- `numpy.ndarray` — `HWC` uint8 (or float in `[0,1]` → scaled to uint8); grayscale
|
|
103
|
+
`(H, W)` is promoted to RGB. Encoded to PNG client-side.
|
|
104
|
+
|
|
105
|
+
Prompts (serialized to the server's string format):
|
|
106
|
+
- `box`: `[x, y, w, h]` or a list of boxes → `"x,y,w,h"` joined by `;`.
|
|
107
|
+
- `point`: `[x, y]` / `[x, y, label]` or a list (label 1=fg, 0=bg) → `"x,y[,label]"` joined by `;`.
|
|
108
|
+
- `prompt`: free text, e.g. `"cat. remote."`.
|
|
109
|
+
|
|
110
|
+
### Result types
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
Detection(bbox: list[float], cls: str, conf: float) # bbox = [x, y, w, h], original px
|
|
114
|
+
Mask(rle: str, bbox: list[float], conf: float)
|
|
115
|
+
Result(task, model, detections: list[Detection], masks: list[Mask], duration_ms)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
`Mask.to_ndarray(width, height) -> np.ndarray` decodes the COCO-style **column-major**
|
|
119
|
+
uncompressed RLE into a boolean `(height, width)` array (requires numpy). It is the
|
|
120
|
+
exact inverse of the server's encoder.
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
res = c.predict("mobile-sam", "img.jpg", box=[50, 40, 120, 90])
|
|
124
|
+
from PIL import Image
|
|
125
|
+
w, h = Image.open("img.jpg").size
|
|
126
|
+
mask = res.masks[0].to_ndarray(width=w, height=h) # bool (h, w)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Examples
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# RF-DETR detection (optionally draw boxes):
|
|
133
|
+
python clients/python/examples/detect.py cat.jpg --model rf-detr --save out.png
|
|
134
|
+
|
|
135
|
+
# MobileSAM with a box prompt -> mask ndarray:
|
|
136
|
+
python clients/python/examples/segment.py img.jpg --box 50,40,120,90 --save mask.png
|
|
137
|
+
|
|
138
|
+
# Open-vocab (text prompt) — model must be available on the server:
|
|
139
|
+
python clients/python/examples/grounded.py img.jpg --prompt "cat. remote."
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Tests
|
|
143
|
+
|
|
144
|
+
The test suite is fully offline — it spins up a mock HTTP server in a thread and also
|
|
145
|
+
round-trips the RLE codec against a reference port of the Go encoder. No running Go
|
|
146
|
+
server is required.
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
# with pytest:
|
|
150
|
+
/home/trung/miniconda3/envs/label/bin/python3 -m pytest clients/python/tests -v
|
|
151
|
+
|
|
152
|
+
# or as a dependency-free self-test:
|
|
153
|
+
/home/trung/miniconda3/envs/label/bin/python3 clients/python/tests/test_client.py
|
|
154
|
+
```
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# VisionServe Python Client
|
|
2
|
+
|
|
3
|
+
A lightweight Python **client** SDK for the [VisionServe](../../) HTTP server. It talks
|
|
4
|
+
to the Go runtime over HTTP (default `http://localhost:11435`) — it is **not** the
|
|
5
|
+
inference runtime and pulls no inference engine into Python. Think of it like Ollama's
|
|
6
|
+
Python client.
|
|
7
|
+
|
|
8
|
+
The transport uses only the Python **standard library** (`urllib`), so the client has
|
|
9
|
+
no required third-party dependencies. `numpy` and `pillow` are **optional** and only
|
|
10
|
+
needed for:
|
|
11
|
+
- passing `numpy.ndarray` / `PIL.Image` images to `predict()`, and
|
|
12
|
+
- decoding masks with `Mask.to_ndarray()`.
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
From the repository root:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install -e clients/python
|
|
20
|
+
# optional extras for ndarray/PIL image inputs and mask decoding:
|
|
21
|
+
pip install -e 'clients/python[images]'
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Run the server first
|
|
25
|
+
|
|
26
|
+
The client needs a running VisionServe server (which in turn needs the ONNX Runtime
|
|
27
|
+
shared library at runtime). From the repo root:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
make serve # starts the Go server on :11435
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Quickstart
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from visionserve import Client
|
|
37
|
+
|
|
38
|
+
c = Client() # http://localhost:11435, timeout=120s
|
|
39
|
+
print(c.health()) # {"status": "ok"}
|
|
40
|
+
|
|
41
|
+
for m in c.list_models():
|
|
42
|
+
print(m.name, m.task, m.license, m.state)
|
|
43
|
+
|
|
44
|
+
c.load("rf-detr")
|
|
45
|
+
res = c.predict("rf-detr", "cat.jpg")
|
|
46
|
+
print(res.task, res.duration_ms)
|
|
47
|
+
for d in res.detections:
|
|
48
|
+
print(d.cls, d.conf, d.bbox) # bbox = [x, y, w, h] in ORIGINAL image pixels
|
|
49
|
+
|
|
50
|
+
print([m.name for m in c.ps()]) # currently loaded models
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Public API
|
|
54
|
+
|
|
55
|
+
### `Client(host="http://localhost:11435", timeout=120)`
|
|
56
|
+
|
|
57
|
+
| Method | HTTP | Returns |
|
|
58
|
+
| --- | --- | --- |
|
|
59
|
+
| `health()` | `GET /api/health` | `{"status": "ok"}` |
|
|
60
|
+
| `list_models()` | `GET /api/models` | `list[ModelInfo]` |
|
|
61
|
+
| `load(model)` | `POST /api/load` | `{"model", "state"}` |
|
|
62
|
+
| `unload(model)` | `POST /api/unload` | `{"model", "state"}` |
|
|
63
|
+
| `ps()` | `GET /api/models` (filtered) | loaded `list[ModelInfo]` |
|
|
64
|
+
| `predict(model, image, *, prompt=None, box=None, point=None)` | `POST /api/predict` | `Result` |
|
|
65
|
+
|
|
66
|
+
`predict()` `image` accepts:
|
|
67
|
+
- `str` / `os.PathLike` — a path to an image file,
|
|
68
|
+
- `bytes` — already-encoded image (PNG/JPEG), sent verbatim,
|
|
69
|
+
- `PIL.Image.Image` — encoded to PNG client-side,
|
|
70
|
+
- `numpy.ndarray` — `HWC` uint8 (or float in `[0,1]` → scaled to uint8); grayscale
|
|
71
|
+
`(H, W)` is promoted to RGB. Encoded to PNG client-side.
|
|
72
|
+
|
|
73
|
+
Prompts (serialized to the server's string format):
|
|
74
|
+
- `box`: `[x, y, w, h]` or a list of boxes → `"x,y,w,h"` joined by `;`.
|
|
75
|
+
- `point`: `[x, y]` / `[x, y, label]` or a list (label 1=fg, 0=bg) → `"x,y[,label]"` joined by `;`.
|
|
76
|
+
- `prompt`: free text, e.g. `"cat. remote."`.
|
|
77
|
+
|
|
78
|
+
### Result types
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
Detection(bbox: list[float], cls: str, conf: float) # bbox = [x, y, w, h], original px
|
|
82
|
+
Mask(rle: str, bbox: list[float], conf: float)
|
|
83
|
+
Result(task, model, detections: list[Detection], masks: list[Mask], duration_ms)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
`Mask.to_ndarray(width, height) -> np.ndarray` decodes the COCO-style **column-major**
|
|
87
|
+
uncompressed RLE into a boolean `(height, width)` array (requires numpy). It is the
|
|
88
|
+
exact inverse of the server's encoder.
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
res = c.predict("mobile-sam", "img.jpg", box=[50, 40, 120, 90])
|
|
92
|
+
from PIL import Image
|
|
93
|
+
w, h = Image.open("img.jpg").size
|
|
94
|
+
mask = res.masks[0].to_ndarray(width=w, height=h) # bool (h, w)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Examples
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# RF-DETR detection (optionally draw boxes):
|
|
101
|
+
python clients/python/examples/detect.py cat.jpg --model rf-detr --save out.png
|
|
102
|
+
|
|
103
|
+
# MobileSAM with a box prompt -> mask ndarray:
|
|
104
|
+
python clients/python/examples/segment.py img.jpg --box 50,40,120,90 --save mask.png
|
|
105
|
+
|
|
106
|
+
# Open-vocab (text prompt) — model must be available on the server:
|
|
107
|
+
python clients/python/examples/grounded.py img.jpg --prompt "cat. remote."
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Tests
|
|
111
|
+
|
|
112
|
+
The test suite is fully offline — it spins up a mock HTTP server in a thread and also
|
|
113
|
+
round-trips the RLE codec against a reference port of the Go encoder. No running Go
|
|
114
|
+
server is required.
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# with pytest:
|
|
118
|
+
/home/trung/miniconda3/envs/label/bin/python3 -m pytest clients/python/tests -v
|
|
119
|
+
|
|
120
|
+
# or as a dependency-free self-test:
|
|
121
|
+
/home/trung/miniconda3/envs/label/bin/python3 clients/python/tests/test_client.py
|
|
122
|
+
```
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "visionserve"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Python client SDK for the VisionServe HTTP server (talks to the Go runtime over HTTP)."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "Apache-2.0" }
|
|
12
|
+
authors = [{ name = "VisionServe" }]
|
|
13
|
+
keywords = ["visionserve", "computer-vision", "onnx", "rf-detr", "sam", "grounding-dino", "client"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: Apache Software License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.8",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Image Recognition",
|
|
26
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
# The client is dependency-light: it uses only the Python standard library
|
|
30
|
+
# (urllib, json, base64) for transport, so it works even in minimal environments.
|
|
31
|
+
# numpy and pillow are OPTIONAL — they unlock ndarray/PIL image inputs and
|
|
32
|
+
# Mask.to_ndarray() decoding, but the client degrades gracefully without them.
|
|
33
|
+
dependencies = []
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
images = ["pillow>=8.0", "numpy>=1.20"]
|
|
37
|
+
dev = ["pillow>=8.0", "numpy>=1.20", "pytest>=7.0"]
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://github.com/visionserve/visionserve"
|
|
41
|
+
Repository = "https://github.com/visionserve/visionserve"
|
|
42
|
+
Issues = "https://github.com/visionserve/visionserve/issues"
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.packages.find]
|
|
45
|
+
where = ["."]
|
|
46
|
+
include = ["visionserve*"]
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""Deterministic tests for the VisionServe Python client.
|
|
2
|
+
|
|
3
|
+
These DO NOT require the Go server: a tiny mock HTTP server (stdlib ``http.server``
|
|
4
|
+
in a background thread) returns canned JSON and records the requests it receives, so
|
|
5
|
+
we can validate both request building and response parsing offline.
|
|
6
|
+
|
|
7
|
+
Run with the label env python:
|
|
8
|
+
/home/trung/miniconda3/envs/label/bin/python3 -m pytest clients/python/tests -v
|
|
9
|
+
or as a self-test:
|
|
10
|
+
/home/trung/miniconda3/envs/label/bin/python3 clients/python/tests/test_client.py
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
import threading
|
|
17
|
+
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
18
|
+
|
|
19
|
+
# Make the package importable when run directly (not just via pytest -e install).
|
|
20
|
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
21
|
+
|
|
22
|
+
from visionserve import Client, Mask, Result # noqa: E402
|
|
23
|
+
from visionserve.client import ( # noqa: E402
|
|
24
|
+
_serialize_boxes,
|
|
25
|
+
_serialize_points,
|
|
26
|
+
_build_multipart,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# --------------------------------------------------------------------------- #
|
|
31
|
+
# Reference Go encoder (column-major, starts background) — ported from
|
|
32
|
+
# internal/models/mobilesam/postprocess.go : encodeRLEColumnMajor.
|
|
33
|
+
# Used to verify our decoder is a faithful inverse.
|
|
34
|
+
# --------------------------------------------------------------------------- #
|
|
35
|
+
def encode_rle_column_major(bin_rows, h, w):
|
|
36
|
+
"""bin_rows: 2D list [h][w] of bool. Returns space-separated counts string."""
|
|
37
|
+
counts = []
|
|
38
|
+
prev = False # runs start with background
|
|
39
|
+
run = 0
|
|
40
|
+
for x in range(w):
|
|
41
|
+
for y in range(h):
|
|
42
|
+
v = bool(bin_rows[y][x])
|
|
43
|
+
if v == prev:
|
|
44
|
+
run += 1
|
|
45
|
+
else:
|
|
46
|
+
counts.append(run)
|
|
47
|
+
prev = v
|
|
48
|
+
run = 1
|
|
49
|
+
counts.append(run)
|
|
50
|
+
return " ".join(str(c) for c in counts)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# --------------------------------------------------------------------------- #
|
|
54
|
+
# Mock server
|
|
55
|
+
# --------------------------------------------------------------------------- #
|
|
56
|
+
class _MockState:
|
|
57
|
+
last_request = None # dict: {path, method, headers, body}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _make_handler(state):
|
|
61
|
+
class Handler(BaseHTTPRequestHandler):
|
|
62
|
+
def log_message(self, *a): # silence
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
def _read_body(self):
|
|
66
|
+
length = int(self.headers.get("Content-Length", 0))
|
|
67
|
+
return self.rfile.read(length) if length else b""
|
|
68
|
+
|
|
69
|
+
def _send(self, code, obj):
|
|
70
|
+
body = json.dumps(obj).encode("utf-8")
|
|
71
|
+
self.send_response(code)
|
|
72
|
+
self.send_header("Content-Type", "application/json")
|
|
73
|
+
self.send_header("Content-Length", str(len(body)))
|
|
74
|
+
self.end_headers()
|
|
75
|
+
self.wfile.write(body)
|
|
76
|
+
|
|
77
|
+
def do_GET(self):
|
|
78
|
+
state.last_request = {"path": self.path, "method": "GET", "headers": dict(self.headers), "body": b""}
|
|
79
|
+
if self.path == "/api/health":
|
|
80
|
+
self._send(200, {"status": "ok"})
|
|
81
|
+
elif self.path == "/api/models":
|
|
82
|
+
self._send(200, [
|
|
83
|
+
{"name": "rf-detr", "task": "detection", "license": "Apache-2.0", "state": "loaded"},
|
|
84
|
+
{"name": "mobile-sam", "task": "segmentation", "license": "Apache-2.0", "state": "available"},
|
|
85
|
+
])
|
|
86
|
+
else:
|
|
87
|
+
self._send(404, {"error": "not found"})
|
|
88
|
+
|
|
89
|
+
def do_POST(self):
|
|
90
|
+
body = self._read_body()
|
|
91
|
+
state.last_request = {
|
|
92
|
+
"path": self.path,
|
|
93
|
+
"method": "POST",
|
|
94
|
+
"headers": dict(self.headers),
|
|
95
|
+
"body": body,
|
|
96
|
+
}
|
|
97
|
+
if self.path == "/api/load":
|
|
98
|
+
req = json.loads(body)
|
|
99
|
+
self._send(200, {"model": req["model"], "state": "loaded"})
|
|
100
|
+
elif self.path == "/api/unload":
|
|
101
|
+
req = json.loads(body)
|
|
102
|
+
self._send(200, {"model": req["model"], "state": "unloaded"})
|
|
103
|
+
elif self.path == "/api/predict":
|
|
104
|
+
self._send(200, {
|
|
105
|
+
"task": "detection",
|
|
106
|
+
"model": "rf-detr",
|
|
107
|
+
"detections": [
|
|
108
|
+
{"bbox": [10.0, 20.0, 30.0, 40.0], "class": "cat", "conf": 0.91},
|
|
109
|
+
],
|
|
110
|
+
"masks": [
|
|
111
|
+
{"rle": "1 2 3", "bbox": [0, 0, 2, 3], "conf": 0.8},
|
|
112
|
+
],
|
|
113
|
+
"duration_ms": 12.5,
|
|
114
|
+
})
|
|
115
|
+
else:
|
|
116
|
+
self._send(404, {"error": "not found"})
|
|
117
|
+
|
|
118
|
+
return Handler
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class MockServer:
|
|
122
|
+
def __enter__(self):
|
|
123
|
+
self.state = _MockState()
|
|
124
|
+
self.httpd = HTTPServer(("127.0.0.1", 0), _make_handler(self.state))
|
|
125
|
+
self.port = self.httpd.server_address[1]
|
|
126
|
+
self.thread = threading.Thread(target=self.httpd.serve_forever, daemon=True)
|
|
127
|
+
self.thread.start()
|
|
128
|
+
self.host = "http://127.0.0.1:%d" % self.port
|
|
129
|
+
return self
|
|
130
|
+
|
|
131
|
+
def __exit__(self, *exc):
|
|
132
|
+
self.httpd.shutdown()
|
|
133
|
+
self.httpd.server_close()
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# --------------------------------------------------------------------------- #
|
|
137
|
+
# Tests
|
|
138
|
+
# --------------------------------------------------------------------------- #
|
|
139
|
+
def test_health_and_models():
|
|
140
|
+
with MockServer() as srv:
|
|
141
|
+
c = Client(host=srv.host)
|
|
142
|
+
assert c.health() == {"status": "ok"}
|
|
143
|
+
models = c.list_models()
|
|
144
|
+
assert [m.name for m in models] == ["rf-detr", "mobile-sam"]
|
|
145
|
+
assert models[0].state == "loaded"
|
|
146
|
+
ps = c.ps()
|
|
147
|
+
assert [m.name for m in ps] == ["rf-detr"]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def test_load_unload():
|
|
151
|
+
with MockServer() as srv:
|
|
152
|
+
c = Client(host=srv.host)
|
|
153
|
+
assert c.load("rf-detr") == {"model": "rf-detr", "state": "loaded"}
|
|
154
|
+
body = json.loads(srv.state.last_request["body"])
|
|
155
|
+
assert body == {"model": "rf-detr"}
|
|
156
|
+
assert c.unload("rf-detr")["state"] == "unloaded"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_predict_multipart_request_and_parse():
|
|
160
|
+
with MockServer() as srv:
|
|
161
|
+
c = Client(host=srv.host)
|
|
162
|
+
res = c.predict(
|
|
163
|
+
"rf-detr",
|
|
164
|
+
b"\x89PNG fake bytes",
|
|
165
|
+
prompt="cat. remote.",
|
|
166
|
+
box=[10, 20, 30, 40],
|
|
167
|
+
point=[5, 6, 1],
|
|
168
|
+
)
|
|
169
|
+
# response parsing
|
|
170
|
+
assert isinstance(res, Result)
|
|
171
|
+
assert res.task == "detection"
|
|
172
|
+
assert res.detections[0].cls == "cat"
|
|
173
|
+
assert res.detections[0].bbox == [10.0, 20.0, 30.0, 40.0]
|
|
174
|
+
assert abs(res.duration_ms - 12.5) < 1e-9
|
|
175
|
+
|
|
176
|
+
# request building: multipart content-type + fields present
|
|
177
|
+
req = srv.state.last_request
|
|
178
|
+
assert req["path"] == "/api/predict"
|
|
179
|
+
ct = req["headers"]["Content-Type"]
|
|
180
|
+
assert ct.startswith("multipart/form-data; boundary=")
|
|
181
|
+
raw = req["body"].decode("utf-8", errors="replace")
|
|
182
|
+
assert 'name="model"' in raw and "rf-detr" in raw
|
|
183
|
+
assert 'name="prompt"' in raw and "cat. remote." in raw
|
|
184
|
+
assert 'name="box"' in raw and "10,20,30,40" in raw
|
|
185
|
+
assert 'name="point"' in raw and "5,6,1" in raw
|
|
186
|
+
assert 'name="image"; filename=' in raw
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_serialize_boxes_and_points():
|
|
190
|
+
# single box and list of boxes
|
|
191
|
+
assert _serialize_boxes([1, 2, 3, 4]) == "1,2,3,4"
|
|
192
|
+
assert _serialize_boxes([[1, 2, 3, 4], [5, 6, 7, 8]]) == "1,2,3,4;5,6,7,8"
|
|
193
|
+
# float that is integer-valued prints without .0
|
|
194
|
+
assert _serialize_boxes([1.0, 2.5, 3, 4]) == "1,2.5,3,4"
|
|
195
|
+
# points 2 and 3 wide
|
|
196
|
+
assert _serialize_points([5, 6]) == "5,6"
|
|
197
|
+
assert _serialize_points([[5, 6, 1], [7, 8, 0]]) == "5,6,1;7,8,0"
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def test_multipart_builder_shape():
|
|
201
|
+
body, ct = _build_multipart({"model": "x"}, b"IMG", "image.png")
|
|
202
|
+
assert ct.startswith("multipart/form-data; boundary=")
|
|
203
|
+
assert b'name="model"' in body
|
|
204
|
+
assert b'name="image"; filename="image.png"' in body
|
|
205
|
+
assert b"IMG" in body
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def test_rle_roundtrip_known():
|
|
209
|
+
"""A small hand-built RLE round-trips through Mask.to_ndarray()."""
|
|
210
|
+
import numpy as np
|
|
211
|
+
|
|
212
|
+
# 3x2 (H=3, W=2) mask. Lay it out as rows [h][w]:
|
|
213
|
+
# col0: [1,0,1] col1: [0,0,1]
|
|
214
|
+
# Column-major read order (x outer, y inner): 1,0,1, 0,0,1
|
|
215
|
+
# -> runs starting background: bg0(0)=0? sequence is [1,0,1,0,0,1]
|
|
216
|
+
# Build counts via the reference Go encoder to be exact.
|
|
217
|
+
h, w = 3, 2
|
|
218
|
+
rows = [
|
|
219
|
+
[1, 0],
|
|
220
|
+
[0, 0],
|
|
221
|
+
[1, 1],
|
|
222
|
+
]
|
|
223
|
+
rle = encode_rle_column_major(rows, h, w)
|
|
224
|
+
m = Mask(rle=rle, bbox=[0, 0, w, h], conf=1.0)
|
|
225
|
+
arr = m.to_ndarray(width=w, height=h)
|
|
226
|
+
expected = np.array(rows, dtype=bool)
|
|
227
|
+
assert arr.shape == (h, w)
|
|
228
|
+
assert np.array_equal(arr, expected), (arr.tolist(), expected.tolist(), rle)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def test_rle_roundtrip_random():
|
|
232
|
+
"""Random masks: encode (reference Go algo) -> decode -> must equal original."""
|
|
233
|
+
import numpy as np
|
|
234
|
+
|
|
235
|
+
rng = np.random.default_rng(1234)
|
|
236
|
+
for _ in range(50):
|
|
237
|
+
h = int(rng.integers(1, 9))
|
|
238
|
+
w = int(rng.integers(1, 9))
|
|
239
|
+
mask = rng.integers(0, 2, size=(h, w)).astype(bool)
|
|
240
|
+
rows = mask.astype(int).tolist()
|
|
241
|
+
rle = encode_rle_column_major(rows, h, w)
|
|
242
|
+
decoded = Mask(rle=rle, bbox=[0, 0, w, h], conf=1.0).to_ndarray(width=w, height=h)
|
|
243
|
+
assert np.array_equal(decoded, mask), (h, w, rle)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def test_rle_all_background_and_all_foreground():
|
|
247
|
+
import numpy as np
|
|
248
|
+
|
|
249
|
+
h, w = 4, 3
|
|
250
|
+
# all background -> single run "12"
|
|
251
|
+
m0 = Mask(rle="%d" % (h * w), bbox=[0, 0, w, h], conf=1.0)
|
|
252
|
+
assert not m0.to_ndarray(w, h).any()
|
|
253
|
+
# all foreground -> "0 12" (background run 0, then foreground 12)
|
|
254
|
+
m1 = Mask(rle="0 %d" % (h * w), bbox=[0, 0, w, h], conf=1.0)
|
|
255
|
+
assert m1.to_ndarray(w, h).all()
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def test_rle_bad_sum_raises():
|
|
259
|
+
import pytest
|
|
260
|
+
|
|
261
|
+
m = Mask(rle="1 2", bbox=[0, 0, 5, 5], conf=1.0)
|
|
262
|
+
with pytest.raises(ValueError):
|
|
263
|
+
m.to_ndarray(5, 5)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# --------------------------------------------------------------------------- #
|
|
267
|
+
# Self-test entry point (works without pytest installed)
|
|
268
|
+
# --------------------------------------------------------------------------- #
|
|
269
|
+
def _run_self_test():
|
|
270
|
+
failures = []
|
|
271
|
+
tests = [
|
|
272
|
+
test_health_and_models,
|
|
273
|
+
test_load_unload,
|
|
274
|
+
test_predict_multipart_request_and_parse,
|
|
275
|
+
test_serialize_boxes_and_points,
|
|
276
|
+
test_multipart_builder_shape,
|
|
277
|
+
test_rle_roundtrip_known,
|
|
278
|
+
test_rle_roundtrip_random,
|
|
279
|
+
test_rle_all_background_and_all_foreground,
|
|
280
|
+
]
|
|
281
|
+
for t in tests:
|
|
282
|
+
try:
|
|
283
|
+
t()
|
|
284
|
+
print("PASS %s" % t.__name__)
|
|
285
|
+
except Exception as e: # noqa: BLE001
|
|
286
|
+
failures.append((t.__name__, e))
|
|
287
|
+
print("FAIL %s: %r" % (t.__name__, e))
|
|
288
|
+
|
|
289
|
+
# bad-sum test without pytest:
|
|
290
|
+
try:
|
|
291
|
+
Mask(rle="1 2", bbox=[0, 0, 5, 5], conf=1.0).to_ndarray(5, 5)
|
|
292
|
+
failures.append(("test_rle_bad_sum_raises", "did not raise"))
|
|
293
|
+
print("FAIL test_rle_bad_sum_raises: did not raise")
|
|
294
|
+
except ValueError:
|
|
295
|
+
print("PASS test_rle_bad_sum_raises")
|
|
296
|
+
|
|
297
|
+
if failures:
|
|
298
|
+
print("\n%d FAILURE(S)" % len(failures))
|
|
299
|
+
sys.exit(1)
|
|
300
|
+
print("\nALL TESTS PASSED")
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
if __name__ == "__main__":
|
|
304
|
+
_run_self_test()
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: visionserve
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python client SDK for the VisionServe HTTP server (talks to the Go runtime over HTTP).
|
|
5
|
+
Author: VisionServe
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/visionserve/visionserve
|
|
8
|
+
Project-URL: Repository, https://github.com/visionserve/visionserve
|
|
9
|
+
Project-URL: Issues, https://github.com/visionserve/visionserve/issues
|
|
10
|
+
Keywords: visionserve,computer-vision,onnx,rf-detr,sam,grounding-dino,client
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
Provides-Extra: images
|
|
26
|
+
Requires-Dist: pillow>=8.0; extra == "images"
|
|
27
|
+
Requires-Dist: numpy>=1.20; extra == "images"
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pillow>=8.0; extra == "dev"
|
|
30
|
+
Requires-Dist: numpy>=1.20; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
32
|
+
|
|
33
|
+
# VisionServe Python Client
|
|
34
|
+
|
|
35
|
+
A lightweight Python **client** SDK for the [VisionServe](../../) HTTP server. It talks
|
|
36
|
+
to the Go runtime over HTTP (default `http://localhost:11435`) — it is **not** the
|
|
37
|
+
inference runtime and pulls no inference engine into Python. Think of it like Ollama's
|
|
38
|
+
Python client.
|
|
39
|
+
|
|
40
|
+
The transport uses only the Python **standard library** (`urllib`), so the client has
|
|
41
|
+
no required third-party dependencies. `numpy` and `pillow` are **optional** and only
|
|
42
|
+
needed for:
|
|
43
|
+
- passing `numpy.ndarray` / `PIL.Image` images to `predict()`, and
|
|
44
|
+
- decoding masks with `Mask.to_ndarray()`.
|
|
45
|
+
|
|
46
|
+
## Install
|
|
47
|
+
|
|
48
|
+
From the repository root:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install -e clients/python
|
|
52
|
+
# optional extras for ndarray/PIL image inputs and mask decoding:
|
|
53
|
+
pip install -e 'clients/python[images]'
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Run the server first
|
|
57
|
+
|
|
58
|
+
The client needs a running VisionServe server (which in turn needs the ONNX Runtime
|
|
59
|
+
shared library at runtime). From the repo root:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
make serve # starts the Go server on :11435
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Quickstart
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from visionserve import Client
|
|
69
|
+
|
|
70
|
+
c = Client() # http://localhost:11435, timeout=120s
|
|
71
|
+
print(c.health()) # {"status": "ok"}
|
|
72
|
+
|
|
73
|
+
for m in c.list_models():
|
|
74
|
+
print(m.name, m.task, m.license, m.state)
|
|
75
|
+
|
|
76
|
+
c.load("rf-detr")
|
|
77
|
+
res = c.predict("rf-detr", "cat.jpg")
|
|
78
|
+
print(res.task, res.duration_ms)
|
|
79
|
+
for d in res.detections:
|
|
80
|
+
print(d.cls, d.conf, d.bbox) # bbox = [x, y, w, h] in ORIGINAL image pixels
|
|
81
|
+
|
|
82
|
+
print([m.name for m in c.ps()]) # currently loaded models
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Public API
|
|
86
|
+
|
|
87
|
+
### `Client(host="http://localhost:11435", timeout=120)`
|
|
88
|
+
|
|
89
|
+
| Method | HTTP | Returns |
|
|
90
|
+
| --- | --- | --- |
|
|
91
|
+
| `health()` | `GET /api/health` | `{"status": "ok"}` |
|
|
92
|
+
| `list_models()` | `GET /api/models` | `list[ModelInfo]` |
|
|
93
|
+
| `load(model)` | `POST /api/load` | `{"model", "state"}` |
|
|
94
|
+
| `unload(model)` | `POST /api/unload` | `{"model", "state"}` |
|
|
95
|
+
| `ps()` | `GET /api/models` (filtered) | loaded `list[ModelInfo]` |
|
|
96
|
+
| `predict(model, image, *, prompt=None, box=None, point=None)` | `POST /api/predict` | `Result` |
|
|
97
|
+
|
|
98
|
+
`predict()` `image` accepts:
|
|
99
|
+
- `str` / `os.PathLike` — a path to an image file,
|
|
100
|
+
- `bytes` — already-encoded image (PNG/JPEG), sent verbatim,
|
|
101
|
+
- `PIL.Image.Image` — encoded to PNG client-side,
|
|
102
|
+
- `numpy.ndarray` — `HWC` uint8 (or float in `[0,1]` → scaled to uint8); grayscale
|
|
103
|
+
`(H, W)` is promoted to RGB. Encoded to PNG client-side.
|
|
104
|
+
|
|
105
|
+
Prompts (serialized to the server's string format):
|
|
106
|
+
- `box`: `[x, y, w, h]` or a list of boxes → `"x,y,w,h"` joined by `;`.
|
|
107
|
+
- `point`: `[x, y]` / `[x, y, label]` or a list (label 1=fg, 0=bg) → `"x,y[,label]"` joined by `;`.
|
|
108
|
+
- `prompt`: free text, e.g. `"cat. remote."`.
|
|
109
|
+
|
|
110
|
+
### Result types
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
Detection(bbox: list[float], cls: str, conf: float) # bbox = [x, y, w, h], original px
|
|
114
|
+
Mask(rle: str, bbox: list[float], conf: float)
|
|
115
|
+
Result(task, model, detections: list[Detection], masks: list[Mask], duration_ms)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
`Mask.to_ndarray(width, height) -> np.ndarray` decodes the COCO-style **column-major**
|
|
119
|
+
uncompressed RLE into a boolean `(height, width)` array (requires numpy). It is the
|
|
120
|
+
exact inverse of the server's encoder.
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
res = c.predict("mobile-sam", "img.jpg", box=[50, 40, 120, 90])
|
|
124
|
+
from PIL import Image
|
|
125
|
+
w, h = Image.open("img.jpg").size
|
|
126
|
+
mask = res.masks[0].to_ndarray(width=w, height=h) # bool (h, w)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Examples
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# RF-DETR detection (optionally draw boxes):
|
|
133
|
+
python clients/python/examples/detect.py cat.jpg --model rf-detr --save out.png
|
|
134
|
+
|
|
135
|
+
# MobileSAM with a box prompt -> mask ndarray:
|
|
136
|
+
python clients/python/examples/segment.py img.jpg --box 50,40,120,90 --save mask.png
|
|
137
|
+
|
|
138
|
+
# Open-vocab (text prompt) — model must be available on the server:
|
|
139
|
+
python clients/python/examples/grounded.py img.jpg --prompt "cat. remote."
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Tests
|
|
143
|
+
|
|
144
|
+
The test suite is fully offline — it spins up a mock HTTP server in a thread and also
|
|
145
|
+
round-trips the RLE codec against a reference port of the Go encoder. No running Go
|
|
146
|
+
server is required.
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
# with pytest:
|
|
150
|
+
/home/trung/miniconda3/envs/label/bin/python3 -m pytest clients/python/tests -v
|
|
151
|
+
|
|
152
|
+
# or as a dependency-free self-test:
|
|
153
|
+
/home/trung/miniconda3/envs/label/bin/python3 clients/python/tests/test_client.py
|
|
154
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|