sparse-ot 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sparse_ot-0.1.0/.clangd +6 -0
- sparse_ot-0.1.0/.github/workflows/ci.yml +63 -0
- sparse_ot-0.1.0/.github/workflows/publish.yml +107 -0
- sparse_ot-0.1.0/.gitignore +25 -0
- sparse_ot-0.1.0/CMakeLists.txt +16 -0
- sparse_ot-0.1.0/LICENSE +21 -0
- sparse_ot-0.1.0/PKG-INFO +17 -0
- sparse_ot-0.1.0/README.md +256 -0
- sparse_ot-0.1.0/benchmarks/__init__.py +0 -0
- sparse_ot-0.1.0/benchmarks/bench_solvers.py +293 -0
- sparse_ot-0.1.0/benchmarks/generate_report.py +202 -0
- sparse_ot-0.1.0/benchmarks/problems.py +111 -0
- sparse_ot-0.1.0/benchmarks/results/accuracy_mid.json +360 -0
- sparse_ot-0.1.0/benchmarks/results/accuracy_quick.json +106 -0
- sparse_ot-0.1.0/benchmarks/results/efficiency_mid.json +366 -0
- sparse_ot-0.1.0/benchmarks/results/efficiency_quick.json +106 -0
- sparse_ot-0.1.0/benchmarks/results/figures/accuracy.pdf +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/accuracy.png +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/heatmap_lemon_vs_bonneel.pdf +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/heatmap_lemon_vs_bonneel.png +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/heatmap_ortools_vs_lemon.pdf +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/heatmap_ortools_vs_lemon.png +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k128.pdf +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k128.png +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k2.pdf +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k2.png +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k32.pdf +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k32.png +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k512.pdf +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k512.png +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k8.pdf +0 -0
- sparse_ot-0.1.0/benchmarks/results/figures/lineplot_k8.png +0 -0
- sparse_ot-0.1.0/docs/superpowers/plans/2026-05-15-plan-1-scaffold-bonneel.md +642 -0
- sparse_ot-0.1.0/docs/superpowers/plans/2026-05-16-plan-2-lemon-routing.md +1149 -0
- sparse_ot-0.1.0/docs/superpowers/plans/2026-05-16-plan-3-ortools-benchmarks.md +1445 -0
- sparse_ot-0.1.0/docs/superpowers/plans/2026-05-16-plan-4-feasibility-and-bench-fixes.md +1086 -0
- sparse_ot-0.1.0/docs/superpowers/plans/2026-05-18-sparse-bonneel.md +1020 -0
- sparse_ot-0.1.0/docs/superpowers/specs/2026-05-15-sparse-ot-design.md +336 -0
- sparse_ot-0.1.0/docs/superpowers/specs/2026-05-18-sparse-bonneel-design.md +215 -0
- sparse_ot-0.1.0/pyproject.toml +35 -0
- sparse_ot-0.1.0/src/cpp/bonneel/VENDORING.md +78 -0
- sparse_ot-0.1.0/src/cpp/bonneel/bipartite_sparse_digraph.h +132 -0
- sparse_ot-0.1.0/src/cpp/bonneel/full_bipartitegraph.h +238 -0
- sparse_ot-0.1.0/src/cpp/bonneel/network_simplex_simple.h +1600 -0
- sparse_ot-0.1.0/src/cpp/bonneel_solver.cpp +159 -0
- sparse_ot-0.1.0/src/sparse_ot/__init__.py +5 -0
- sparse_ot-0.1.0/src/sparse_ot/_ext/__init__.py +0 -0
- sparse_ot-0.1.0/src/sparse_ot/emd.py +126 -0
- sparse_ot-0.1.0/src/sparse_ot/feasibility.py +96 -0
- sparse_ot-0.1.0/src/sparse_ot/sparse_utils.py +72 -0
- sparse_ot-0.1.0/tests/test_benchmarks.py +121 -0
- sparse_ot-0.1.0/tests/test_bonneel_sparse.py +153 -0
- sparse_ot-0.1.0/tests/test_duals.py +78 -0
- sparse_ot-0.1.0/tests/test_emd.py +136 -0
- sparse_ot-0.1.0/tests/test_feasibility_check.py +62 -0
- sparse_ot-0.1.0/tests/test_sparse_utils.py +127 -0
- sparse_ot-0.1.0/uv.lock +1260 -0
sparse_ot-0.1.0/.clangd
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
CompileFlags:
|
|
2
|
+
Add:
|
|
3
|
+
- "-I/Users/jonatanbobrutsky-haim/Documents/Code/sparse-optimal-transport/.venv/lib/python3.12/site-packages/pybind11/include"
|
|
4
|
+
- "-I/Users/jonatanbobrutsky-haim/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/include/python3.12"
|
|
5
|
+
- "-Isrc/cpp"
|
|
6
|
+
- "-std=c++17"
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ci-${{ github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
test:
|
|
15
|
+
name: ${{ matrix.os }} / py${{ matrix.python-version }}
|
|
16
|
+
runs-on: ${{ matrix.os }}
|
|
17
|
+
strategy:
|
|
18
|
+
fail-fast: false
|
|
19
|
+
matrix:
|
|
20
|
+
os: [ubuntu-latest, macos-latest]
|
|
21
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
22
|
+
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v4
|
|
25
|
+
|
|
26
|
+
- name: Set up Python
|
|
27
|
+
uses: actions/setup-python@v5
|
|
28
|
+
with:
|
|
29
|
+
python-version: ${{ matrix.python-version }}
|
|
30
|
+
cache: pip
|
|
31
|
+
|
|
32
|
+
- name: Install build deps (Linux)
|
|
33
|
+
if: runner.os == 'Linux'
|
|
34
|
+
run: sudo apt-get update && sudo apt-get install -y build-essential cmake
|
|
35
|
+
|
|
36
|
+
- name: Install build deps (macOS)
|
|
37
|
+
if: runner.os == 'macOS'
|
|
38
|
+
run: brew install cmake
|
|
39
|
+
|
|
40
|
+
- name: Install package + test extras
|
|
41
|
+
run: |
|
|
42
|
+
python -m pip install --upgrade pip
|
|
43
|
+
pip install -e ".[dev]"
|
|
44
|
+
|
|
45
|
+
- name: Run tests (skip slow)
|
|
46
|
+
run: pytest tests/ -v -m "not slow" --timeout=300
|
|
47
|
+
|
|
48
|
+
# The slow tests (memory smoke test on a 10k×10k problem) run only on Linux
|
|
49
|
+
# against the latest Python, to keep CI time reasonable.
|
|
50
|
+
slow-tests:
|
|
51
|
+
name: slow tests (ubuntu / py3.12)
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
steps:
|
|
54
|
+
- uses: actions/checkout@v4
|
|
55
|
+
- uses: actions/setup-python@v5
|
|
56
|
+
with:
|
|
57
|
+
python-version: "3.12"
|
|
58
|
+
cache: pip
|
|
59
|
+
- run: sudo apt-get update && sudo apt-get install -y build-essential cmake
|
|
60
|
+
- run: |
|
|
61
|
+
python -m pip install --upgrade pip
|
|
62
|
+
pip install -e ".[dev]"
|
|
63
|
+
- run: pytest tests/ -v -m "slow" --timeout=600
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
# Manual trigger for dry runs against TestPyPI.
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
target:
|
|
10
|
+
description: "Publish target"
|
|
11
|
+
required: true
|
|
12
|
+
default: "testpypi"
|
|
13
|
+
type: choice
|
|
14
|
+
options:
|
|
15
|
+
- testpypi
|
|
16
|
+
- pypi
|
|
17
|
+
|
|
18
|
+
permissions:
|
|
19
|
+
contents: read
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
build_wheels:
|
|
23
|
+
name: Wheels on ${{ matrix.os }}
|
|
24
|
+
runs-on: ${{ matrix.os }}
|
|
25
|
+
strategy:
|
|
26
|
+
fail-fast: false
|
|
27
|
+
matrix:
|
|
28
|
+
# macos-13 (Intel) dropped: the GitHub-hosted runner pool is
|
|
29
|
+
# chronically oversubscribed and stalls publish jobs by 30+ min.
|
|
30
|
+
# Apple Silicon dominates the macOS Python user base on 3.10+;
|
|
31
|
+
# users on Intel Macs can build from the sdist.
|
|
32
|
+
os: [ubuntu-latest, ubuntu-24.04-arm, macos-14]
|
|
33
|
+
steps:
|
|
34
|
+
- uses: actions/checkout@v4
|
|
35
|
+
|
|
36
|
+
- name: Build wheels
|
|
37
|
+
uses: pypa/cibuildwheel@v2.21
|
|
38
|
+
env:
|
|
39
|
+
# PyPI-supported Python versions; skip 32-bit Linux, PyPy, musllinux
|
|
40
|
+
# (scipy wheels not universally available on musl).
|
|
41
|
+
CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-*"
|
|
42
|
+
CIBW_SKIP: "*-musllinux_* *-win32 pp*"
|
|
43
|
+
# macOS targets: arm64 on macos-14, x86_64 on macos-13.
|
|
44
|
+
CIBW_ARCHS_MACOS: "auto64"
|
|
45
|
+
CIBW_ARCHS_LINUX: "auto64"
|
|
46
|
+
# Recent scipy releases publish only manylinux_2_28 wheels for x86_64
|
|
47
|
+
# and aarch64; the default manylinux2014 (glibc 2.17) container falls
|
|
48
|
+
# back to source-building scipy, which needs OpenBLAS and fails.
|
|
49
|
+
CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
|
|
50
|
+
CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28
|
|
51
|
+
# Minimal smoke test: import the package and run one tiny emd() call.
|
|
52
|
+
# The full pytest suite already runs on every supported (os, py) pair
|
|
53
|
+
# in ci.yml — re-running it inside each wheel container would force
|
|
54
|
+
# scipy + pot + matplotlib installs we don't need just to verify the
|
|
55
|
+
# binary loads.
|
|
56
|
+
CIBW_TEST_REQUIRES: "numpy scipy"
|
|
57
|
+
CIBW_TEST_COMMAND: >-
|
|
58
|
+
python -c "import numpy as np, sparse_ot;
|
|
59
|
+
G = sparse_ot.emd(np.array([0.4, 0.6]), np.array([0.5, 0.5]), np.array([[0.0, 1.0], [1.0, 0.0]]));
|
|
60
|
+
assert G.shape == (2, 2);
|
|
61
|
+
print('sparse_ot wheel smoke test ok')"
|
|
62
|
+
|
|
63
|
+
- uses: actions/upload-artifact@v4
|
|
64
|
+
with:
|
|
65
|
+
name: wheels-${{ matrix.os }}
|
|
66
|
+
path: ./wheelhouse/*.whl
|
|
67
|
+
|
|
68
|
+
build_sdist:
|
|
69
|
+
name: sdist
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
steps:
|
|
72
|
+
- uses: actions/checkout@v4
|
|
73
|
+
- uses: actions/setup-python@v5
|
|
74
|
+
with:
|
|
75
|
+
python-version: "3.12"
|
|
76
|
+
- run: python -m pip install --upgrade build
|
|
77
|
+
- run: python -m build --sdist
|
|
78
|
+
- uses: actions/upload-artifact@v4
|
|
79
|
+
with:
|
|
80
|
+
name: sdist
|
|
81
|
+
path: dist/*.tar.gz
|
|
82
|
+
|
|
83
|
+
publish:
|
|
84
|
+
name: Publish to ${{ (github.event_name == 'workflow_dispatch' && inputs.target) || 'pypi' }}
|
|
85
|
+
needs: [build_wheels, build_sdist]
|
|
86
|
+
runs-on: ubuntu-latest
|
|
87
|
+
# Trusted-publishing OIDC requires an environment + id-token write.
|
|
88
|
+
environment:
|
|
89
|
+
name: ${{ (github.event_name == 'workflow_dispatch' && inputs.target) || 'pypi' }}
|
|
90
|
+
url: https://${{ ((github.event_name == 'workflow_dispatch' && inputs.target) || 'pypi') == 'testpypi' && 'test.' || '' }}pypi.org/p/sparse-ot
|
|
91
|
+
permissions:
|
|
92
|
+
id-token: write
|
|
93
|
+
steps:
|
|
94
|
+
- uses: actions/download-artifact@v4
|
|
95
|
+
with:
|
|
96
|
+
path: dist
|
|
97
|
+
merge-multiple: true
|
|
98
|
+
|
|
99
|
+
- name: Publish to PyPI
|
|
100
|
+
if: (github.event_name == 'workflow_dispatch' && inputs.target == 'pypi') || github.event_name == 'release'
|
|
101
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
102
|
+
|
|
103
|
+
- name: Publish to TestPyPI
|
|
104
|
+
if: github.event_name == 'workflow_dispatch' && inputs.target == 'testpypi'
|
|
105
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
106
|
+
with:
|
|
107
|
+
repository-url: https://test.pypi.org/legacy/
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
|
|
8
|
+
# Virtual environment
|
|
9
|
+
.venv/
|
|
10
|
+
|
|
11
|
+
# scikit-build-core / CMake build artifacts
|
|
12
|
+
_skbuild/
|
|
13
|
+
CMakeFiles/
|
|
14
|
+
*.cmake
|
|
15
|
+
Makefile
|
|
16
|
+
|
|
17
|
+
# Compiled extensions
|
|
18
|
+
*.so
|
|
19
|
+
*.pyd
|
|
20
|
+
|
|
21
|
+
# pytest
|
|
22
|
+
.pytest_cache/
|
|
23
|
+
|
|
24
|
+
# macOS
|
|
25
|
+
.DS_Store
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.18)
|
|
2
|
+
project(sparse_ot_ext LANGUAGES CXX)
|
|
3
|
+
|
|
4
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
5
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
6
|
+
|
|
7
|
+
find_package(pybind11 CONFIG REQUIRED)
|
|
8
|
+
|
|
9
|
+
pybind11_add_module(_bonneel src/cpp/bonneel_solver.cpp)
|
|
10
|
+
target_include_directories(_bonneel PRIVATE src/cpp)
|
|
11
|
+
target_compile_options(_bonneel PRIVATE -O3)
|
|
12
|
+
# Disable OpenMP for the Bonneel network-simplex wrapper: OpenMP thread overhead
|
|
13
|
+
# causes 30-40x slowdown on the problem sizes we handle (n ≤ ~5 000). POT's
|
|
14
|
+
# emd_wrap is also built without OpenMP for the same reason.
|
|
15
|
+
target_compile_definitions(_bonneel PRIVATE NOOMP)
|
|
16
|
+
install(TARGETS _bonneel DESTINATION sparse_ot/_ext)
|
sparse_ot-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jonatan Bobrutsky-Haim
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
sparse_ot-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: sparse-ot
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Sparse optimal transport with drop-in POT API
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: numpy>=1.24
|
|
8
|
+
Requires-Dist: scipy>=1.10
|
|
9
|
+
Provides-Extra: torch
|
|
10
|
+
Requires-Dist: torch>=2.0; extra == "torch"
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
13
|
+
Requires-Dist: pytest-timeout>=2.0; extra == "dev"
|
|
14
|
+
Requires-Dist: pot>=0.9; extra == "dev"
|
|
15
|
+
Requires-Dist: pybind11>=2.12; extra == "dev"
|
|
16
|
+
Requires-Dist: matplotlib>=3.7; extra == "dev"
|
|
17
|
+
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# sparse-ot
|
|
2
|
+
|
|
3
|
+
[](https://github.com/JBobrutsky/sparse-optimal-transport/actions/workflows/ci.yml)
|
|
4
|
+
|
|
5
|
+
Drop-in replacement for [POT](https://github.com/PythonOT/POT)'s `emd` /
|
|
6
|
+
`emd2`, with native support for **sparse cost matrices**. One solver
|
|
7
|
+
([Bonneel's network simplex](https://github.com/nbonneel/network_simplex))
|
|
8
|
+
covers both regimes:
|
|
9
|
+
|
|
10
|
+
- **Dense** `numpy.ndarray` cost matrix → dense plan.
|
|
11
|
+
- **`scipy.sparse` CSR** cost matrix → sparse plan, with memory and per-pivot
|
|
12
|
+
work both scaling in the number of candidate edges `k` rather than `n × m`.
|
|
13
|
+
|
|
14
|
+
## Quickstart
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
import numpy as np, scipy.sparse, sparse_ot as sot
|
|
18
|
+
|
|
19
|
+
# Dense: identical interface to ot.emd.
|
|
20
|
+
G = sot.emd(a, b, M)
|
|
21
|
+
cost = sot.emd2(a, b, M)
|
|
22
|
+
|
|
23
|
+
# Sparse: pass a CSR cost matrix. Absent entries are forbidden edges, not
|
|
24
|
+
# zero-cost shortcuts.
|
|
25
|
+
M_csr = scipy.sparse.csr_matrix(...)
|
|
26
|
+
G_csr = sot.emd(a, b, M_csr)
|
|
27
|
+
|
|
28
|
+
# POT-compatible log dict (cost, u, v, warning, result_code).
|
|
29
|
+
G, info = sot.emd(a, b, M, log=True)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
The `u` and `v` returned in the log dict are the dual potentials with
|
|
33
|
+
POT's sign convention (`u[i] + v[j] ≤ M[i,j]` at the optimum). With
|
|
34
|
+
`center_dual=True` (default) `u` is shifted to zero mean, preserving
|
|
35
|
+
`u[i] + v[j]` on every edge.
|
|
36
|
+
|
|
37
|
+
## Why sparse?
|
|
38
|
+
|
|
39
|
+
A 10 000 × 10 000 problem with 10 candidate edges per row (k = 100 000):
|
|
40
|
+
|
|
41
|
+
| Solver path | Memory | Wall time |
|
|
42
|
+
|--------------------|---------------|-----------|
|
|
43
|
+
| Bonneel-dense | ≈ 800 MB (cost matrix) | (does not run; OOM at this scale on small machines) |
|
|
44
|
+
| **Bonneel-sparse** | **≈ 6 MB** | seconds |
|
|
45
|
+
|
|
46
|
+
Most real OT problems (k-NN, transformer attention masks, point-cloud
|
|
47
|
+
matching) are intrinsically sparse. Materialising them as dense costs
|
|
48
|
+
matrices is wasteful and can be infeasible. This package gives you
|
|
49
|
+
Bonneel's tight constants without the O(n·m) memory penalty.
|
|
50
|
+
|
|
51
|
+
## Feasibility on sparse supports
|
|
52
|
+
|
|
53
|
+
When you pass a sparse `M`, the transport plan is restricted to the
|
|
54
|
+
edges you provide. The package checks that the support is connected
|
|
55
|
+
and that supply totals match (`check_feasibility`), but **this does
|
|
56
|
+
not guarantee an LP-feasible plan exists**.
|
|
57
|
+
|
|
58
|
+
A small support can fail [Hall's condition](https://en.wikipedia.org/wiki/Hall%27s_marriage_theorem):
|
|
59
|
+
some local block of rows `S` may collectively need to move more mass
|
|
60
|
+
than the columns they reach can absorb. For example, a band-7 support
|
|
61
|
+
(each row connects only to its 7 nearest columns) cannot route generic
|
|
62
|
+
Dirichlet marginals at `n = 1000` — the corner rows have nowhere to
|
|
63
|
+
shed their excess.
|
|
64
|
+
|
|
65
|
+
When that happens we don't lie. The solver returns its best-effort
|
|
66
|
+
flow, `info["result_code"] == 0`, and a `RuntimeWarning` fires
|
|
67
|
+
explaining that the marginals weren't met. Compare to POT, which
|
|
68
|
+
silently routes mass through any zero-cost or penalty edge in the
|
|
69
|
+
densified representation and reports `success` with an arbitrary
|
|
70
|
+
cost.
|
|
71
|
+
|
|
72
|
+
In practice: build supports that are slightly denser than your
|
|
73
|
+
marginals strictly require (k-NN with k chosen by validation, plus a
|
|
74
|
+
small slack), or run with very dense support whenever you don't know
|
|
75
|
+
the marginal distribution ahead of time.
|
|
76
|
+
|
|
77
|
+
## Convergence and the `numItermax` knob
|
|
78
|
+
|
|
79
|
+
Bonneel's network simplex stops at `numItermax` pivots without raising.
|
|
80
|
+
If the iteration cap is hit before convergence the returned flow can
|
|
81
|
+
violate marginals by orders of magnitude more than machine epsilon. We
|
|
82
|
+
guard against this in two ways:
|
|
83
|
+
|
|
84
|
+
1. The default `numItermax` is **problem-size-aware**:
|
|
85
|
+
`min(50M, max(100k, 100·(n + m + k)))`.
|
|
86
|
+
2. After every solve we re-check the marginals. If `max(|G.sum(1) - a|,
|
|
87
|
+
|G.sum(0) - b|) > 1e-6`, we emit a `RuntimeWarning` and report
|
|
88
|
+
`result_code = 0` with a diagnostic in `info["warning"]`. No
|
|
89
|
+
exception is raised, matching POT's behavior.
|
|
90
|
+
|
|
91
|
+
You can pass `numItermax=…` to override.
|
|
92
|
+
|
|
93
|
+
## Build and install
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
pip install -e . --no-build-isolation
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
`pyproject.toml` sets `editable.rebuild = true`, so the pybind11
|
|
100
|
+
extension is rebuilt automatically the next time `sparse_ot` is imported
|
|
101
|
+
after a `src/cpp/` edit.
|
|
102
|
+
|
|
103
|
+
## Benchmarks
|
|
104
|
+
|
|
105
|
+
Two independent suites:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
python benchmarks/bench_solvers.py --mid # ~15 minutes
|
|
109
|
+
python benchmarks/bench_solvers.py --quick # ~30 seconds (used by CI)
|
|
110
|
+
python benchmarks/bench_solvers.py # full sweep, hours
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
- **Dense suite** — fully-random `n × n` cost matrix. Compares
|
|
114
|
+
`bonneel_dense` against `pot_reference` (`ot.emd`).
|
|
115
|
+
- **Sparse suite** — feasible-by-construction kNN-grid (`benchmarks/problems.py`).
|
|
116
|
+
Runs `bonneel_sparse` only; there is no honest dense representation of
|
|
117
|
+
a kNN cost (absent edges must be +∞, which neither POT nor Bonneel's
|
|
118
|
+
dense path can express).
|
|
119
|
+
|
|
120
|
+
Results are written to `benchmarks/results/{efficiency,accuracy}_{tag}.json`
|
|
121
|
+
with the structure:
|
|
122
|
+
|
|
123
|
+
```jsonc
|
|
124
|
+
{
|
|
125
|
+
"dense": { "<n>": { "bonneel_dense": {...}, "pot_reference": {...} } },
|
|
126
|
+
"sparse": { "<n>": { "<k>": { "bonneel_sparse": {...} } } }
|
|
127
|
+
}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
See "Benchmark results" below for the current numbers on the maintainer's
|
|
131
|
+
laptop.
|
|
132
|
+
|
|
133
|
+
## Benchmark results
|
|
134
|
+
|
|
135
|
+
Numbers below are from `python benchmarks/bench_solvers.py --mid` on an
|
|
136
|
+
Apple-Silicon laptop (Sonoma, 64 GB). Wall times are median of 1 run; the
|
|
137
|
+
sparse-suite peak memory is `tracemalloc` peak during `emd()`.
|
|
138
|
+
|
|
139
|
+
### Dense suite (fully random `n × n` cost)
|
|
140
|
+
|
|
141
|
+
| n | `bonneel_dense` | `pot_reference` | dense / pot |
|
|
142
|
+
|------:|----------------:|----------------:|------------:|
|
|
143
|
+
| 200 | 0.003s | 0.002s | 0.66× |
|
|
144
|
+
| 500 | 0.019s | 0.015s | 0.80× |
|
|
145
|
+
| 1000 | 0.085s | 0.072s | 0.85× |
|
|
146
|
+
| 2000 | 0.406s | 0.346s | 0.85× |
|
|
147
|
+
| 4000 | 2.166s | 1.414s | 0.65× |
|
|
148
|
+
|
|
149
|
+
POT and `bonneel_dense` share the same C++ engine (POT vendors Bonneel's
|
|
150
|
+
network simplex), so the wall-time ratio reflects pure wrapping overhead;
|
|
151
|
+
POT's Cython wrapper is marginally tighter than our pybind11 wrapper.
|
|
152
|
+
Costs agree to machine precision for n ≤ 2000. At n = 4000, POT's cost
|
|
153
|
+
is 1.1 % higher than ours — POT's default `numItermax = 100 000`
|
|
154
|
+
truncates before convergence, while our problem-size-aware default
|
|
155
|
+
finishes the pivots.
|
|
156
|
+
|
|
157
|
+
### Sparse suite (knn-grid)
|
|
158
|
+
|
|
159
|
+
The same knn problem is run through both Bonneel paths so the
|
|
160
|
+
speed/memory tradeoff is directly comparable. `bonneel_dense` runs only
|
|
161
|
+
where `n ≤ MAX_DENSE_N`; above that the cost matrix doesn't fit and the
|
|
162
|
+
cell is sparse-only. For the dense path we densify with a finite
|
|
163
|
+
penalty (`max(M.data) · (n·m + 1)`) on absent edges — with the
|
|
164
|
+
problem-size-aware `numItermax` the optimal basis never lands on a
|
|
165
|
+
penalty edge.
|
|
166
|
+
|
|
167
|
+
| n | k | nnz | sparse wall | sparse peak | dense wall | dense peak | winner |
|
|
168
|
+
|-------:|-----:|-----------:|------------:|------------:|-----------:|-----------:|:-------------|
|
|
169
|
+
| 200 | 2 | 400 | 0.006s | 0.04 MB | 0.003s | 0.32 MB | dense 2.4× |
|
|
170
|
+
| 200 | 32 | 6 400 | 0.051s | 0.14 MB | 0.004s | 0.32 MB | dense 14× |
|
|
171
|
+
| 200 | 128 | 25 600 | 0.204s | 0.52 MB | 0.003s | 0.32 MB | dense 71× |
|
|
172
|
+
| 1 000 | 2 | 2 000 | 0.033s | 0.15 MB | 0.075s | 7.69 MB | **sparse 2.3×** |
|
|
173
|
+
| 1 000 | 8 | 8 000 | 0.100s | 0.22 MB | 0.178s | 7.69 MB | **sparse 1.8×** |
|
|
174
|
+
| 1 000 | 32 | 32 000 | 0.315s | 0.67 MB | 0.182s | 7.69 MB | dense 1.7× |
|
|
175
|
+
| 1 000 | 128 | 128 000 | 1.103s | 2.59 MB | 0.256s | 7.69 MB | dense 4.3× |
|
|
176
|
+
| 4 000 | 2 | 8 000 | 0.134s | 0.60 MB | 1.652s | 122.3 MB | **sparse 12×** |
|
|
177
|
+
| 4 000 | 8 | 32 000 | 0.577s | 0.87 MB | 8.918s | 122.3 MB | **sparse 15×** |
|
|
178
|
+
| 4 000 | 32 | 128 000 | 1.572s | 2.66 MB | 8.574s | 122.3 MB | **sparse 5.5×** |
|
|
179
|
+
| 4 000 | 128 | 512 000 | 4.540s | 10.35 MB | 10.298s | 122.3 MB | **sparse 2.3×** |
|
|
180
|
+
| 4 000 | 400 | 1 600 000 | 14.473s | 32.14 MB | 8.393s | 122.3 MB | dense 1.7× |
|
|
181
|
+
| 4 000 | 512 | 2 048 000 | 17.956s | 41.11 MB | 9.535s | 122.3 MB | dense 1.9× |
|
|
182
|
+
| 16 000 | 2 | 32 000 | 0.531s | 2.39 MB | — | — | sparse only |
|
|
183
|
+
| 16 000 | 32 | 512 000 | 12.066s | 10.62 MB | — | — | sparse only |
|
|
184
|
+
| 16 000 | 128 | 2 048 000 | 32.955s | 41.38 MB | — | — | sparse only |
|
|
185
|
+
| 16 000 | 512 | 8 192 000 | 75.201s | 164.4 MB | — | — | sparse only |
|
|
186
|
+
| 16 000 | 1600 | 25 600 000 | 269.488s | 513.1 MB | — | — | sparse only |
|
|
187
|
+
|
|
188
|
+
Reading the table:
|
|
189
|
+
|
|
190
|
+
- At **n = 200** the dense path always wins because the n² cost matrix is
|
|
191
|
+
tiny and Bonneel's flat-array constants dominate over the sparse
|
|
192
|
+
digraph's per-arc indirection.
|
|
193
|
+
- At **n = 1 000** the crossover is around k ≈ n / 50: below that, sparse
|
|
194
|
+
wins; above, dense wins.
|
|
195
|
+
- At **n = 4 000** sparse wins by 2–15× up to k ≈ n / 20. Above that
|
|
196
|
+
density, dense again wins on time but its memory cost is fixed at
|
|
197
|
+
122 MB regardless of k.
|
|
198
|
+
- At **n = 16 000** the dense path is out of reach (cost matrix ≈ 2 GB);
|
|
199
|
+
only sparse runs.
|
|
200
|
+
|
|
201
|
+
### Accuracy
|
|
202
|
+
|
|
203
|
+
Marginals stay at machine precision (worst case `2.5 × 10⁻¹⁶`) across
|
|
204
|
+
every cell of both suites. On the knn problems, `bonneel_sparse` and
|
|
205
|
+
`bonneel_dense` agree on cost to ≈ machine precision in 22 of 24 cells.
|
|
206
|
+
Two exceptions:
|
|
207
|
+
|
|
208
|
+
| n | k | sparse cost | dense cost | relative diff |
|
|
209
|
+
|------:|-----:|------------:|-----------:|--------------:|
|
|
210
|
+
| 4 000 | 400 | 271.7297 | 272.2578 | 1.9 × 10⁻³ |
|
|
211
|
+
| 4 000 | 512 | 557.3260 | 557.3270 | 1.7 × 10⁻⁶ |
|
|
212
|
+
|
|
213
|
+
In both cases `bonneel_sparse` finds a strictly lower-cost plan. The
|
|
214
|
+
densified-with-penalty input introduces costs on the order of
|
|
215
|
+
`max(M) · n² ≈ 10¹²`, and floating-point reduced-cost computations on
|
|
216
|
+
that scale accumulate enough rounding noise to push the pivot rule off
|
|
217
|
+
the true optimum. The sparse path never sees those large numbers and is
|
|
218
|
+
the more accurate of the two when both can run.
|
|
219
|
+
|
|
220
|
+
## Memory cutoffs
|
|
221
|
+
|
|
222
|
+
`bench_solvers.py` skips cells beyond these defaults (16 GB target):
|
|
223
|
+
|
|
224
|
+
| Constant | Default | Effect |
|
|
225
|
+
|------------------|---------------|--------------------------------------------|
|
|
226
|
+
| `MAX_DENSE_N` | 8 192 | Dense suite skipped above this |
|
|
227
|
+
| `MAX_SPARSE_NNZ` | 200 000 000 | Sparse cell skipped above this nnz |
|
|
228
|
+
|
|
229
|
+
Raise the constants for larger hardware.
|
|
230
|
+
|
|
231
|
+
## Releasing
|
|
232
|
+
|
|
233
|
+
PyPI uploads are automated via GitHub Actions and PyPI's
|
|
234
|
+
[trusted-publishing OIDC](https://docs.pypi.org/trusted-publishers/). To
|
|
235
|
+
cut a release:
|
|
236
|
+
|
|
237
|
+
1. Bump `project.version` in `pyproject.toml`, commit, tag (`git tag vX.Y.Z`),
|
|
238
|
+
push (`git push --tags`).
|
|
239
|
+
2. Create a GitHub Release pointing at the tag.
|
|
240
|
+
|
|
241
|
+
The `.github/workflows/publish.yml` workflow then builds wheels via
|
|
242
|
+
`cibuildwheel` for Linux (x86_64, arm64) and macOS (x86_64, arm64) across
|
|
243
|
+
Python 3.10–3.13, builds an sdist, and uploads everything to PyPI.
|
|
244
|
+
|
|
245
|
+
First-time setup (one-time, requires owner action on pypi.org):
|
|
246
|
+
|
|
247
|
+
- Add a trusted publisher for **sparse-ot** with owner = `JBobrutsky`,
|
|
248
|
+
repository = `sparse-optimal-transport`, workflow = `publish.yml`,
|
|
249
|
+
environment = `pypi`.
|
|
250
|
+
- For TestPyPI dry runs, register the same on test.pypi.org with
|
|
251
|
+
environment = `testpypi`. Then trigger `Publish to PyPI` via the
|
|
252
|
+
Actions UI (workflow_dispatch) with target = `testpypi`.
|
|
253
|
+
|
|
254
|
+
## License
|
|
255
|
+
|
|
256
|
+
[MIT](LICENSE).
|
|
File without changes
|