umapers 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. umapers-1.0.0/PKG-INFO +83 -0
  2. umapers-1.0.0/README.md +55 -0
  3. umapers-1.0.0/pyproject.toml +48 -0
  4. umapers-1.0.0/python/umapers/__init__.py +4 -0
  5. umapers-1.0.0/python/umapers/__init__.pyi +5 -0
  6. umapers-1.0.0/python/umapers/_api.py +450 -0
  7. umapers-1.0.0/python/umapers/_api.pyi +365 -0
  8. umapers-1.0.0/python/umapers/py.typed +1 -0
  9. umapers-1.0.0/rust_umap/Cargo.lock +360 -0
  10. umapers-1.0.0/rust_umap/Cargo.toml +18 -0
  11. umapers-1.0.0/rust_umap/README.md +217 -0
  12. umapers-1.0.0/rust_umap/benchmarks/eval_aligned_umap.py +861 -0
  13. umapers-1.0.0/rust_umap/benchmarks/eval_euclidean_fit_regression.py +210 -0
  14. umapers-1.0.0/rust_umap/benchmarks/eval_inverse_quality.py +265 -0
  15. umapers-1.0.0/rust_umap/benchmarks/eval_parametric_consistency.py +941 -0
  16. umapers-1.0.0/rust_umap/benchmarks/eval_sparse_csr_vs_umap_learn.py +814 -0
  17. umapers-1.0.0/rust_umap/examples/aligned_benchmark.rs +288 -0
  18. umapers-1.0.0/rust_umap/examples/aligned_demo.rs +112 -0
  19. umapers-1.0.0/rust_umap/examples/inverse_quality.rs +120 -0
  20. umapers-1.0.0/rust_umap/examples/parametric_eval.rs +219 -0
  21. umapers-1.0.0/rust_umap/src/aligned.rs +983 -0
  22. umapers-1.0.0/rust_umap/src/bin/bench_fit_csv.rs +252 -0
  23. umapers-1.0.0/rust_umap/src/bin/fit_csv.rs +106 -0
  24. umapers-1.0.0/rust_umap/src/cli_common.rs +583 -0
  25. umapers-1.0.0/rust_umap/src/lib.rs +5553 -0
  26. umapers-1.0.0/rust_umap/src/main.rs +64 -0
  27. umapers-1.0.0/rust_umap/src/parametric.rs +887 -0
  28. umapers-1.0.0/rust_umap/src/sparse.rs +648 -0
  29. umapers-1.0.0/rust_umap/tests/cli_e2e.rs +330 -0
  30. umapers-1.0.0/umap_rs/Cargo.lock +531 -0
  31. umapers-1.0.0/umap_rs/Cargo.toml +21 -0
  32. umapers-1.0.0/umap_rs/README.md +55 -0
  33. umapers-1.0.0/umap_rs/examples/manual_dense_workflow.py +62 -0
  34. umapers-1.0.0/umap_rs/examples/manual_help_surface.py +50 -0
  35. umapers-1.0.0/umap_rs/examples/manual_precomputed_knn.py +68 -0
  36. umapers-1.0.0/umap_rs/examples/manual_transform_inverse.py +57 -0
  37. umapers-1.0.0/umap_rs/src/lib.rs +913 -0
  38. umapers-1.0.0/umap_rs/tests/test_binding.py +727 -0
  39. umapers-1.0.0/umap_rs/uv.lock +223 -0
umapers-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,83 @@
1
+ Metadata-Version: 2.4
2
+ Name: umapers
3
+ Version: 1.0.0
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: 3
6
+ Classifier: Programming Language :: Python :: 3 :: Only
7
+ Classifier: Programming Language :: Python :: 3.9
8
+ Classifier: Programming Language :: Python :: 3.10
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Programming Language :: Python :: Implementation :: CPython
13
+ Classifier: License :: OSI Approved :: BSD License
14
+ Classifier: Typing :: Typed
15
+ Requires-Dist: numpy>=1.26,<3
16
+ Summary: Python bindings for rust_umap
17
+ Keywords: umap,dimensionality-reduction,manifold-learning,rust,pyo3
18
+ Home-Page: https://github.com/wenjiudaijiugui/umapers
19
+ License: BSD-3-Clause
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
22
+ Project-URL: Changelog, https://github.com/wenjiudaijiugui/umapers/blob/main/CHANGELOG.md
23
+ Project-URL: Documentation, https://github.com/wenjiudaijiugui/umapers/tree/main/umap_rs
24
+ Project-URL: Homepage, https://github.com/wenjiudaijiugui/umapers
25
+ Project-URL: Issues, https://github.com/wenjiudaijiugui/umapers/issues
26
+ Project-URL: Repository, https://github.com/wenjiudaijiugui/umapers
27
+
28
+ # umapers
29
+
30
+ Python package `umapers`, backed by `rust_umap` and built with PyO3 + maturin.
31
+
32
+ Version `1.0.0` focuses on IDE-help quality for the public Python API:
33
+
34
+ - the exported surface has useful type hints
35
+ - public methods carry docstrings that explain inputs and outputs
36
+ - `help(umapers.Umap)` and editor hover should be informative
37
+
38
+ The binding remains intentionally thin: Python normalizes arrays and CSR inputs,
39
+ while Rust owns validation and compute-heavy paths whenever practical.
40
+
41
+ ## Local build
42
+
43
+ ```bash
44
+ PYTHON_BIN="$(command -v python3 || command -v python)"
45
+ uv venv --python "$PYTHON_BIN" .venv
46
+ uv pip install --python .venv/bin/python --upgrade pip maturin
47
+ uv run --python .venv/bin/python maturin develop --manifest-path umap_rs/Cargo.toml
48
+ uv run --python .venv/bin/python python -I -m pytest -q umap_rs/tests/test_binding.py
49
+ ```
50
+
51
+ ## Quick usage
52
+
53
+ ```python
54
+ import numpy as np
55
+ from umapers import Umap
56
+
57
+ x = np.random.default_rng(42).normal(size=(200, 16)).astype(np.float32)
58
+ model = Umap(n_neighbors=15, n_components=2, n_epochs=120, random_seed=42, init="random")
59
+ emb = model.fit_transform(x)
60
+ ```
61
+
62
+ ## API layers
63
+
64
+ ### Main API
65
+
66
+ Most users should start here:
67
+
68
+ - `Umap`
69
+ - `fit_transform`
70
+ - `Umap.fit`
71
+ - `Umap.fit_transform`
72
+ - `Umap.transform`
73
+ - `Umap.inverse_transform`
74
+
75
+ These methods accept NumPy arrays by default and support the documented `out=`
76
+ buffers where available.
77
+
78
+ ### Advanced API
79
+
80
+ `Umap.fit_transform_with_knn(...)` is available for callers who already have a
81
+ precomputed exact or shared kNN graph. It is useful for benchmarks and
82
+ parameter sweeps, but it is not the recommended first-stop quickstart.
83
+
@@ -0,0 +1,55 @@
1
+ # umapers
2
+
3
+ Python package `umapers`, backed by `rust_umap` and built with PyO3 + maturin.
4
+
5
+ Version `1.0.0` focuses on IDE-help quality for the public Python API:
6
+
7
+ - the exported surface has useful type hints
8
+ - public methods carry docstrings that explain inputs and outputs
9
+ - `help(umapers.Umap)` and editor hover should be informative
10
+
11
+ The binding remains intentionally thin: Python normalizes arrays and CSR inputs,
12
+ while Rust owns validation and compute-heavy paths whenever practical.
13
+
14
+ ## Local build
15
+
16
+ ```bash
17
+ PYTHON_BIN="$(command -v python3 || command -v python)"
18
+ uv venv --python "$PYTHON_BIN" .venv
19
+ uv pip install --python .venv/bin/python --upgrade pip maturin
20
+ uv run --python .venv/bin/python maturin develop --manifest-path umap_rs/Cargo.toml
21
+ uv run --python .venv/bin/python python -I -m pytest -q umap_rs/tests/test_binding.py
22
+ ```
23
+
24
+ ## Quick usage
25
+
26
+ ```python
27
+ import numpy as np
28
+ from umapers import Umap
29
+
30
+ x = np.random.default_rng(42).normal(size=(200, 16)).astype(np.float32)
31
+ model = Umap(n_neighbors=15, n_components=2, n_epochs=120, random_seed=42, init="random")
32
+ emb = model.fit_transform(x)
33
+ ```
34
+
35
+ ## API layers
36
+
37
+ ### Main API
38
+
39
+ Most users should start here:
40
+
41
+ - `Umap`
42
+ - `fit_transform`
43
+ - `Umap.fit`
44
+ - `Umap.fit_transform`
45
+ - `Umap.transform`
46
+ - `Umap.inverse_transform`
47
+
48
+ These methods accept NumPy arrays by default and support the documented `out=`
49
+ buffers where available.
50
+
51
+ ### Advanced API
52
+
53
+ `Umap.fit_transform_with_knn(...)` is available for callers who already have a
54
+ precomputed exact or shared kNN graph. It is useful for benchmarks and
55
+ parameter sweeps, but it is not the recommended first-stop quickstart.
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["maturin>=1.6,<2.0"]
3
+ build-backend = "maturin"
4
+
5
+ [project]
6
+ name = "umapers"
7
+ version = "1.0.0"
8
+ description = "Python bindings for rust_umap"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "BSD-3-Clause" }
12
+ keywords = ["umap", "dimensionality-reduction", "manifold-learning", "rust", "pyo3"]
13
+ dependencies = [
14
+ "numpy>=1.26,<3",
15
+ ]
16
+ classifiers = [
17
+ "Programming Language :: Rust",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3 :: Only",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Programming Language :: Python :: 3.13",
25
+ "Programming Language :: Python :: Implementation :: CPython",
26
+ "License :: OSI Approved :: BSD License",
27
+ "Typing :: Typed",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/wenjiudaijiugui/umapers"
32
+ Repository = "https://github.com/wenjiudaijiugui/umapers"
33
+ Issues = "https://github.com/wenjiudaijiugui/umapers/issues"
34
+ Changelog = "https://github.com/wenjiudaijiugui/umapers/blob/main/CHANGELOG.md"
35
+ Documentation = "https://github.com/wenjiudaijiugui/umapers/tree/main/umap_rs"
36
+
37
+ [tool.maturin]
38
+ module-name = "umapers._umapers"
39
+ exclude = [
40
+ "python/umapers/__pycache__/*",
41
+ ]
42
+ include = [
43
+ "python/umapers/__init__.pyi",
44
+ "python/umapers/_api.pyi",
45
+ "python/umapers/py.typed",
46
+ ]
47
+ manifest-path = "umap_rs/Cargo.toml"
48
+ python-source = "python"
@@ -0,0 +1,4 @@
1
+ from ._api import Umap, UmapKwargs, fit_transform
2
+ from ._umapers import __version__
3
+
4
+ __all__ = ["Umap", "UmapKwargs", "fit_transform", "__version__"]
@@ -0,0 +1,5 @@
1
+ from ._api import Umap, UmapKwargs, fit_transform
2
+
3
+ __version__: str
4
+
5
+ __all__: list[str]
@@ -0,0 +1,450 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, TypedDict
4
+
5
+ import numpy as np
6
+
7
+ from ._umapers import UmapCore
8
+
9
+
10
+ class UmapKwargs(TypedDict, total=False):
11
+ """Keyword arguments accepted by `Umap(...)` and `fit_transform(..., **kwargs)`."""
12
+
13
+ n_neighbors: int
14
+ n_components: int
15
+ n_epochs: int | None
16
+ metric: str
17
+ learning_rate: float
18
+ min_dist: float
19
+ spread: float
20
+ local_connectivity: float
21
+ set_op_mix_ratio: float
22
+ repulsion_strength: float
23
+ negative_sample_rate: int
24
+ random_seed: int
25
+ init: str
26
+ ann_mode: str
27
+ use_approximate_knn: bool
28
+ approx_knn_candidates: int
29
+ approx_knn_iters: int
30
+ approx_knn_threshold: int
31
+
32
+
33
+ def _as_f32_matrix(x: Any, name: str) -> np.ndarray:
34
+ arr = np.asarray(x, dtype=np.float32, order="C")
35
+ if arr.ndim != 2:
36
+ raise ValueError(f"{name} must be a 2D array, got ndim={arr.ndim}")
37
+ return arr
38
+
39
+
40
+ def _as_knn_indices(x: Any, name: str) -> np.ndarray:
41
+ arr = np.asarray(x, dtype=np.int64, order="C")
42
+ if arr.ndim != 2:
43
+ raise ValueError(f"{name} must be a 2D array, got ndim={arr.ndim}")
44
+ return arr
45
+
46
+
47
+ def _maybe_as_csr_parts(x: Any, name: str) -> tuple[np.ndarray, np.ndarray, np.ndarray, int, int] | None:
48
+ if getattr(x, "format", None) != "csr":
49
+ return None
50
+
51
+ shape = getattr(x, "shape", None)
52
+ if shape is None or len(shape) != 2:
53
+ raise ValueError(f"{name} must be a 2D CSR matrix")
54
+
55
+ n_rows, n_cols = int(shape[0]), int(shape[1])
56
+ if n_cols <= 0:
57
+ raise ValueError(f"{name} must have at least one column")
58
+
59
+ indptr = np.asarray(x.indptr, dtype=np.int64, order="C")
60
+ indices = np.asarray(x.indices, dtype=np.int64, order="C")
61
+ data = np.asarray(x.data, dtype=np.float32, order="C")
62
+ if indptr.ndim != 1 or indices.ndim != 1 or data.ndim != 1:
63
+ raise ValueError(f"{name} CSR arrays must be 1D")
64
+ if indices.shape[0] != data.shape[0]:
65
+ raise ValueError(f"{name} CSR indices/data length mismatch")
66
+ return indptr, indices, data, n_rows, n_cols
67
+
68
+
69
+ def _as_out_buffer(out: Any, shape: tuple[int, int]) -> np.ndarray:
70
+ if not isinstance(out, np.ndarray):
71
+ raise TypeError("out must be a NumPy ndarray")
72
+ if out.dtype != np.float32:
73
+ raise TypeError(f"out dtype must be float32, got {out.dtype}")
74
+ if out.ndim != 2:
75
+ raise ValueError(f"out must be 2D, got ndim={out.ndim}")
76
+ if not out.flags.c_contiguous:
77
+ raise ValueError("out must be C-contiguous")
78
+ if not out.flags.writeable:
79
+ raise ValueError("out must be writeable")
80
+ if tuple(out.shape) != tuple(shape):
81
+ raise ValueError(f"output buffer shape mismatch: expected {shape}, got {tuple(out.shape)}")
82
+ return out
83
+
84
+
85
+ def _normalize_ann_mode(
86
+ ann_mode: Any,
87
+ use_approximate_knn: bool,
88
+ approx_knn_threshold: int,
89
+ ) -> tuple[str, bool, int]:
90
+ mode = str(ann_mode).lower()
91
+ if mode == "auto":
92
+ return mode, use_approximate_knn, approx_knn_threshold
93
+ if mode == "exact":
94
+ return mode, False, approx_knn_threshold
95
+ if mode == "approximate":
96
+ return mode, True, 0
97
+ raise ValueError(f"unsupported ann_mode '{ann_mode}', expected auto|exact|approximate")
98
+
99
+
100
+ class Umap:
101
+ """High-level Python wrapper around the Rust UMAP core.
102
+
103
+ The Python layer is intentionally thin: it normalizes array-like inputs,
104
+ handles optional CSR sparse inputs, and forwards validated data to the
105
+ Rust implementation for fitting and inference.
106
+
107
+ Example
108
+ -------
109
+ >>> import numpy as np
110
+ >>> from umapers import Umap
111
+ >>> x = np.random.default_rng(42).normal(size=(100, 8)).astype(np.float32)
112
+ >>> emb = Umap(n_neighbors=15, n_components=2).fit_transform(x)
113
+ """
114
+
115
+ def __init__(
116
+ self,
117
+ *,
118
+ n_neighbors: int = 15,
119
+ n_components: int = 2,
120
+ n_epochs: int | None = None,
121
+ metric: str = "euclidean",
122
+ learning_rate: float = 1.0,
123
+ min_dist: float = 0.1,
124
+ spread: float = 1.0,
125
+ local_connectivity: float = 1.0,
126
+ set_op_mix_ratio: float = 1.0,
127
+ repulsion_strength: float = 1.0,
128
+ negative_sample_rate: int = 5,
129
+ random_seed: int = 42,
130
+ init: str = "spectral",
131
+ ann_mode: str = "auto",
132
+ use_approximate_knn: bool = True,
133
+ approx_knn_candidates: int = 30,
134
+ approx_knn_iters: int = 10,
135
+ approx_knn_threshold: int = 4096,
136
+ ) -> None:
137
+ """Create a UMAP model.
138
+
139
+ Parameters
140
+ ----------
141
+ n_neighbors:
142
+ Number of neighbors used to build the neighborhood graph.
143
+ n_components:
144
+ Output embedding dimension.
145
+ n_epochs:
146
+ Number of optimization epochs. If `None`, the Rust core uses its
147
+ internal default.
148
+ metric:
149
+ Distance metric for dense input and query transforms.
150
+ learning_rate, min_dist, spread, local_connectivity,
151
+ set_op_mix_ratio, repulsion_strength, negative_sample_rate,
152
+ random_seed, init:
153
+ Standard UMAP hyperparameters forwarded to the Rust core.
154
+ ann_mode:
155
+ Python-side shortcut for approximate nearest-neighbor behavior.
156
+ Supported values are `auto`, `exact`, and `approximate`.
157
+ use_approximate_knn:
158
+ Default approximate-kNN behavior when `ann_mode="auto"`.
159
+ approx_knn_candidates, approx_knn_iters, approx_knn_threshold:
160
+ Approximate-kNN tuning parameters forwarded to the Rust core.
161
+
162
+ Examples
163
+ --------
164
+ >>> import numpy as np
165
+ >>> from umapers import Umap
166
+ >>> x = np.random.default_rng(42).normal(size=(200, 16)).astype(np.float32)
167
+ >>> model = Umap(n_neighbors=15, n_components=2, init="random")
168
+ >>> emb = model.fit_transform(x)
169
+ >>> emb.shape
170
+ (200, 2)
171
+ """
172
+ self.n_neighbors = int(n_neighbors)
173
+ self.n_components = int(n_components)
174
+ ann_mode, use_approximate_knn, approx_knn_threshold = _normalize_ann_mode(
175
+ ann_mode,
176
+ use_approximate_knn,
177
+ approx_knn_threshold,
178
+ )
179
+ self.ann_mode = ann_mode
180
+ self._core = UmapCore(
181
+ n_neighbors=n_neighbors,
182
+ n_components=n_components,
183
+ n_epochs=n_epochs,
184
+ metric=metric,
185
+ learning_rate=learning_rate,
186
+ min_dist=min_dist,
187
+ spread=spread,
188
+ local_connectivity=local_connectivity,
189
+ set_op_mix_ratio=set_op_mix_ratio,
190
+ repulsion_strength=repulsion_strength,
191
+ negative_sample_rate=negative_sample_rate,
192
+ random_seed=random_seed,
193
+ init=init,
194
+ use_approximate_knn=use_approximate_knn,
195
+ approx_knn_candidates=approx_knn_candidates,
196
+ approx_knn_iters=approx_knn_iters,
197
+ approx_knn_threshold=approx_knn_threshold,
198
+ )
199
+
200
+ def fit(self, data: Any) -> "Umap":
201
+ """Fit the model on dense or CSR input and return `self`.
202
+
203
+ Parameters
204
+ ----------
205
+ data:
206
+ Dense input is converted to a C-contiguous `float32` matrix of
207
+ shape `(n_samples, n_features)`. CSR sparse input is accepted as an
208
+ advanced convenience path and is forwarded to the Rust core.
209
+
210
+ Returns
211
+ -------
212
+ Umap
213
+ The fitted model.
214
+ """
215
+ csr = _maybe_as_csr_parts(data, "data")
216
+ if csr is not None:
217
+ indptr, indices, values, _, n_cols = csr
218
+ self._core.fit_sparse_csr(indptr, indices, values, n_cols)
219
+ return self
220
+
221
+ arr = _as_f32_matrix(data, "data")
222
+ self._core.fit(arr)
223
+ return self
224
+
225
+ def fit_transform(self, data: Any, *, out: np.ndarray | None = None) -> np.ndarray:
226
+ """Fit the model and return the embedding for the training data.
227
+
228
+ Parameters
229
+ ----------
230
+ data:
231
+ Dense input is converted to `float32` dtype and expected to have
232
+ shape `(n_samples, n_features)`. CSR sparse input is supported for
233
+ the current sparse MVP path.
234
+ out:
235
+ Optional writable `float32` dtype buffer with shape
236
+ `(n_samples, n_components)`. When provided, the result is written
237
+ in place and the same array is returned.
238
+
239
+ Returns
240
+ -------
241
+ numpy.ndarray
242
+ The fitted embedding with shape `(n_samples, n_components)`.
243
+ """
244
+ csr = _maybe_as_csr_parts(data, "data")
245
+ if csr is not None:
246
+ indptr, indices, values, n_rows, n_cols = csr
247
+ expected_shape = (n_rows, self.n_components)
248
+ if out is None:
249
+ return self._core.fit_transform_sparse_csr(indptr, indices, values, n_cols)
250
+ out_buf = _as_out_buffer(out, expected_shape)
251
+ self._core.fit_transform_sparse_csr_into(indptr, indices, values, n_cols, out_buf)
252
+ return out_buf
253
+
254
+ arr = _as_f32_matrix(data, "data")
255
+ expected_shape = (arr.shape[0], self.n_components)
256
+ if out is None:
257
+ return self._core.fit_transform(arr)
258
+ out_buf = _as_out_buffer(out, expected_shape)
259
+ self._core.fit_transform_into(arr, out_buf)
260
+ return out_buf
261
+
262
+ def fit_transform_with_knn(
263
+ self,
264
+ data: Any,
265
+ knn_indices: Any,
266
+ knn_dists: Any,
267
+ *,
268
+ knn_metric: str = "euclidean",
269
+ validate_precomputed: bool = True,
270
+ out: np.ndarray | None = None,
271
+ ) -> np.ndarray:
272
+ """Fit using a precomputed kNN graph and return the embedding.
273
+
274
+ This is a public advanced interface for callers that already have an
275
+ exact or shared kNN graph. It is useful for benchmark parity and for
276
+ integrating external neighbor-search pipelines, but it is not the
277
+ default quickstart path.
278
+
279
+ Parameters
280
+ ----------
281
+ data:
282
+ Dense training data with shape `(n_samples, n_features)`. It is
283
+ converted to `float32`.
284
+ knn_indices:
285
+ Precomputed neighbor indices with shape `(n_samples, k)` and
286
+ integer dtype.
287
+ knn_dists:
288
+ Precomputed neighbor distances with shape `(n_samples, k)` and
289
+ `float32`-compatible values.
290
+ knn_metric:
291
+ Metric name for the precomputed graph. It must match the model
292
+ metric.
293
+ validate_precomputed:
294
+ If `True`, the Rust core performs precomputed-kNN validation before
295
+ fitting. The Python binding keeps this path thin and only
296
+ normalizes array dtypes and layouts.
297
+ out:
298
+ Optional writable `float32` buffer with shape
299
+ `(n_samples, n_components)`.
300
+
301
+ Returns
302
+ -------
303
+ numpy.ndarray
304
+ The fitted embedding with shape `(n_samples, n_components)`.
305
+
306
+ Example
307
+ -------
308
+ >>> import numpy as np
309
+ >>> from sklearn.neighbors import NearestNeighbors
310
+ >>> from umapers import Umap
311
+ >>> x = np.random.default_rng(42).normal(size=(64, 8)).astype(np.float32)
312
+ >>> nbrs = NearestNeighbors(n_neighbors=16, algorithm="brute", metric="euclidean")
313
+ >>> nbrs.fit(x)
314
+ >>> dists, idx = nbrs.kneighbors(x)
315
+ >>> emb = Umap(n_neighbors=15, metric="euclidean").fit_transform_with_knn(
316
+ ... x,
317
+ ... idx[:, 1:16].astype(np.int64),
318
+ ... dists[:, 1:16].astype(np.float32),
319
+ ... )
320
+ """
321
+ arr = _as_f32_matrix(data, "data")
322
+ idx = _as_knn_indices(knn_indices, "knn_indices")
323
+ dist = _as_f32_matrix(knn_dists, "knn_dists")
324
+ expected_shape = (arr.shape[0], self.n_components)
325
+ if out is None:
326
+ return self._core.fit_transform_with_knn(
327
+ arr,
328
+ idx,
329
+ dist,
330
+ knn_metric,
331
+ validate_precomputed,
332
+ )
333
+
334
+ out_buf = _as_out_buffer(out, expected_shape)
335
+ self._core.fit_transform_with_knn_into(
336
+ arr,
337
+ idx,
338
+ dist,
339
+ out_buf,
340
+ knn_metric,
341
+ validate_precomputed,
342
+ )
343
+ return out_buf
344
+
345
+ def transform(self, query: Any, *, out: np.ndarray | None = None) -> np.ndarray:
346
+ """Project new dense samples into the learned embedding space.
347
+
348
+ Parameters
349
+ ----------
350
+ query:
351
+ Dense input of shape `(n_samples, n_features)`. It is converted to
352
+ a C-contiguous `float32` matrix.
353
+ out:
354
+ Optional writable `float32` buffer with shape
355
+ `(n_samples, n_components)`.
356
+
357
+ Returns
358
+ -------
359
+ numpy.ndarray
360
+ The projected embedding.
361
+
362
+ Example
363
+ -------
364
+ >>> import numpy as np
365
+ >>> from umapers import Umap
366
+ >>> x = np.random.default_rng(42).normal(size=(100, 8)).astype(np.float32)
367
+ >>> model = Umap(n_neighbors=15, n_components=2).fit(x)
368
+ >>> query_emb = model.transform(x[:10])
369
+ """
370
+ arr = _as_f32_matrix(query, "query")
371
+ expected_shape = (arr.shape[0], self.n_components)
372
+ if out is None:
373
+ return self._core.transform(arr)
374
+ out_buf = _as_out_buffer(out, expected_shape)
375
+ self._core.transform_into(arr, out_buf)
376
+ return out_buf
377
+
378
+ def inverse_transform(self, embedded_query: Any, *, out: np.ndarray | None = None) -> np.ndarray:
379
+ """Map embedded samples back to the original feature space.
380
+
381
+ Parameters
382
+ ----------
383
+ embedded_query:
384
+ Dense embedding of shape `(n_samples, n_components)`. It is
385
+ converted to `float32`.
386
+ out:
387
+ Optional writable `float32` buffer with shape
388
+ `(n_samples, n_features)`. The model must already be fit before
389
+ using `out=`.
390
+
391
+ Returns
392
+ -------
393
+ numpy.ndarray
394
+ Reconstructed samples in the original feature space.
395
+
396
+ Example
397
+ -------
398
+ >>> import numpy as np
399
+ >>> from umapers import Umap
400
+ >>> x = np.random.default_rng(42).normal(size=(100, 8)).astype(np.float32)
401
+ >>> model = Umap(n_neighbors=15, n_components=2).fit(x)
402
+ >>> emb = model.transform(x[:10])
403
+ >>> x_rec = model.inverse_transform(emb)
404
+ """
405
+ arr = _as_f32_matrix(embedded_query, "embedded_query")
406
+ if out is None:
407
+ return self._core.inverse_transform(arr)
408
+ n_features = self._core.n_features
409
+ if n_features is None:
410
+ raise RuntimeError("model must be fit before inverse_transform(out=...)")
411
+ out_buf = _as_out_buffer(out, (arr.shape[0], n_features))
412
+ self._core.inverse_transform_into(arr, out_buf)
413
+ return out_buf
414
+
415
+
416
+ def fit_transform(data: Any, **kwargs: Any) -> np.ndarray:
417
+ """Embed a dataset in one call.
418
+
419
+ Parameters
420
+ ----------
421
+ data:
422
+ Dense or CSR input accepted by ``Umap.fit_transform``.
423
+ **kwargs:
424
+ Hyperparameters forwarded to ``Umap(...)``. Common keys include
425
+ ``n_neighbors``, ``n_components``, ``metric``, ``init``, and
426
+ ``random_seed``.
427
+
428
+ Returns
429
+ -------
430
+ numpy.ndarray
431
+ Embedding with shape ``(n_samples, n_components)`` and dtype
432
+ ``float32``.
433
+
434
+ Examples
435
+ --------
436
+ >>> import numpy as np
437
+ >>> from umapers import fit_transform
438
+ >>> x = np.random.default_rng(42).normal(size=(200, 16)).astype(np.float32)
439
+ >>> emb = fit_transform(x, n_neighbors=15, n_components=2, init="random")
440
+ >>> emb.shape
441
+ (200, 2)
442
+ """
443
+ model = Umap(**kwargs)
444
+ csr = _maybe_as_csr_parts(data, "data")
445
+ if csr is not None:
446
+ indptr, indices, values, _, n_cols = csr
447
+ return model._core.fit_transform_sparse_csr_stateless(indptr, indices, values, n_cols)
448
+
449
+ arr = _as_f32_matrix(data, "data")
450
+ return model._core.fit_transform_stateless(arr)