spatial-memory-mcp 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spatial-memory-mcp might be problematic. Click here for more details.
- spatial_memory/__init__.py +97 -0
- spatial_memory/__main__.py +270 -0
- spatial_memory/adapters/__init__.py +7 -0
- spatial_memory/adapters/lancedb_repository.py +878 -0
- spatial_memory/config.py +728 -0
- spatial_memory/core/__init__.py +118 -0
- spatial_memory/core/cache.py +317 -0
- spatial_memory/core/circuit_breaker.py +297 -0
- spatial_memory/core/connection_pool.py +220 -0
- spatial_memory/core/consolidation_strategies.py +402 -0
- spatial_memory/core/database.py +3069 -0
- spatial_memory/core/db_idempotency.py +242 -0
- spatial_memory/core/db_indexes.py +575 -0
- spatial_memory/core/db_migrations.py +584 -0
- spatial_memory/core/db_search.py +509 -0
- spatial_memory/core/db_versioning.py +177 -0
- spatial_memory/core/embeddings.py +557 -0
- spatial_memory/core/errors.py +317 -0
- spatial_memory/core/file_security.py +702 -0
- spatial_memory/core/filesystem.py +178 -0
- spatial_memory/core/health.py +289 -0
- spatial_memory/core/helpers.py +79 -0
- spatial_memory/core/import_security.py +432 -0
- spatial_memory/core/lifecycle_ops.py +1067 -0
- spatial_memory/core/logging.py +194 -0
- spatial_memory/core/metrics.py +192 -0
- spatial_memory/core/models.py +628 -0
- spatial_memory/core/rate_limiter.py +326 -0
- spatial_memory/core/response_types.py +497 -0
- spatial_memory/core/security.py +588 -0
- spatial_memory/core/spatial_ops.py +426 -0
- spatial_memory/core/tracing.py +300 -0
- spatial_memory/core/utils.py +110 -0
- spatial_memory/core/validation.py +403 -0
- spatial_memory/factory.py +407 -0
- spatial_memory/migrations/__init__.py +40 -0
- spatial_memory/ports/__init__.py +11 -0
- spatial_memory/ports/repositories.py +631 -0
- spatial_memory/py.typed +0 -0
- spatial_memory/server.py +1141 -0
- spatial_memory/services/__init__.py +70 -0
- spatial_memory/services/export_import.py +1023 -0
- spatial_memory/services/lifecycle.py +1120 -0
- spatial_memory/services/memory.py +412 -0
- spatial_memory/services/spatial.py +1147 -0
- spatial_memory/services/utility.py +409 -0
- spatial_memory/tools/__init__.py +5 -0
- spatial_memory/tools/definitions.py +695 -0
- spatial_memory/verify.py +140 -0
- spatial_memory_mcp-1.6.1.dist-info/METADATA +499 -0
- spatial_memory_mcp-1.6.1.dist-info/RECORD +54 -0
- spatial_memory_mcp-1.6.1.dist-info/WHEEL +4 -0
- spatial_memory_mcp-1.6.1.dist-info/entry_points.txt +2 -0
- spatial_memory_mcp-1.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
"""Core spatial algorithms for memory navigation and exploration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import TypeVar
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from numpy.typing import NDArray
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
T = TypeVar("T")
|
|
16
|
+
Vector = NDArray[np.float32]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# =============================================================================
|
|
20
|
+
# Vector Operations
|
|
21
|
+
# =============================================================================
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def normalize(v: Vector) -> Vector:
|
|
25
|
+
"""
|
|
26
|
+
Normalize a vector to unit length.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
v: Input vector to normalize.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Unit vector in same direction as v, or zero vector if input norm is negligible.
|
|
33
|
+
"""
|
|
34
|
+
norm = np.linalg.norm(v)
|
|
35
|
+
if norm < 1e-10:
|
|
36
|
+
return np.zeros_like(v)
|
|
37
|
+
return v / norm
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def normalize_batch(vectors: Vector, copy: bool = True) -> Vector:
|
|
41
|
+
"""
|
|
42
|
+
Normalize multiple vectors efficiently.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
vectors: 2D array of shape (n_vectors, n_dimensions).
|
|
46
|
+
copy: If True, creates a copy before modifying. If False, modifies in place.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Array of unit vectors with same shape as input.
|
|
50
|
+
"""
|
|
51
|
+
if copy:
|
|
52
|
+
vectors = vectors.copy()
|
|
53
|
+
norms = np.linalg.norm(vectors, axis=1, keepdims=True)
|
|
54
|
+
norms = np.maximum(norms, 1e-10)
|
|
55
|
+
vectors /= norms
|
|
56
|
+
return vectors
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# =============================================================================
|
|
60
|
+
# SLERP (Spherical Linear Interpolation)
|
|
61
|
+
# =============================================================================
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def slerp(v0: Vector, v1: Vector, t: float) -> Vector:
|
|
65
|
+
"""
|
|
66
|
+
Spherical linear interpolation between two unit vectors.
|
|
67
|
+
|
|
68
|
+
SLERP produces a constant-speed path along the great circle connecting two
|
|
69
|
+
points on the unit sphere. This is more geometrically correct than linear
|
|
70
|
+
interpolation for normalized embedding vectors.
|
|
71
|
+
|
|
72
|
+
Handles edge cases:
|
|
73
|
+
- Parallel vectors (omega ~ 0): Falls back to linear interpolation
|
|
74
|
+
- Antipodal vectors (omega ~ pi): Chooses arbitrary perpendicular path
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
v0: Starting unit vector.
|
|
78
|
+
v1: Ending unit vector.
|
|
79
|
+
t: Interpolation parameter in [0, 1].
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Interpolated unit vector at parameter t.
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
>>> v0 = np.array([1.0, 0.0, 0.0], dtype=np.float32)
|
|
86
|
+
>>> v1 = np.array([0.0, 1.0, 0.0], dtype=np.float32)
|
|
87
|
+
>>> mid = slerp(v0, v1, 0.5)
|
|
88
|
+
>>> np.linalg.norm(mid) # Always unit length
|
|
89
|
+
1.0
|
|
90
|
+
"""
|
|
91
|
+
# Work in float64 for numerical stability
|
|
92
|
+
v0 = normalize(v0.astype(np.float64))
|
|
93
|
+
v1 = normalize(v1.astype(np.float64))
|
|
94
|
+
|
|
95
|
+
# Compute dot product, clamp to [-1, 1] for numerical stability
|
|
96
|
+
dot = np.clip(np.dot(v0, v1), -1.0, 1.0)
|
|
97
|
+
|
|
98
|
+
# Handle nearly parallel vectors (dot ~ 1.0)
|
|
99
|
+
# Linear interpolation is a good approximation when angle is very small
|
|
100
|
+
if dot > 0.9995:
|
|
101
|
+
result = v0 + t * (v1 - v0)
|
|
102
|
+
return normalize(result.astype(np.float32))
|
|
103
|
+
|
|
104
|
+
# Handle nearly antipodal vectors (dot ~ -1.0)
|
|
105
|
+
# Choose an arbitrary perpendicular path
|
|
106
|
+
if dot < -0.9995:
|
|
107
|
+
perp = _find_perpendicular(v0)
|
|
108
|
+
half_angle = np.pi * t
|
|
109
|
+
result = v0 * np.cos(half_angle) + perp * np.sin(half_angle)
|
|
110
|
+
return result.astype(np.float32)
|
|
111
|
+
|
|
112
|
+
# Standard SLERP formula
|
|
113
|
+
omega = np.arccos(dot)
|
|
114
|
+
sin_omega = np.sin(omega)
|
|
115
|
+
s0 = np.sin((1.0 - t) * omega) / sin_omega
|
|
116
|
+
s1 = np.sin(t * omega) / sin_omega
|
|
117
|
+
|
|
118
|
+
return (s0 * v0 + s1 * v1).astype(np.float32)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _find_perpendicular(v: Vector) -> Vector:
|
|
122
|
+
"""
|
|
123
|
+
Find a unit vector perpendicular to v.
|
|
124
|
+
|
|
125
|
+
Uses the approach of creating a vector from the standard basis that differs
|
|
126
|
+
most from v, then applying Gram-Schmidt orthogonalization.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
v: Input unit vector.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
A unit vector orthogonal to v.
|
|
133
|
+
"""
|
|
134
|
+
# Find the component with smallest absolute value
|
|
135
|
+
min_idx = np.argmin(np.abs(v))
|
|
136
|
+
|
|
137
|
+
# Create a basis vector that differs most from v
|
|
138
|
+
basis = np.zeros_like(v)
|
|
139
|
+
basis[min_idx] = 1.0
|
|
140
|
+
|
|
141
|
+
# Gram-Schmidt: subtract projection of basis onto v
|
|
142
|
+
perp = basis - np.dot(v, basis) * v
|
|
143
|
+
return normalize(perp)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def slerp_path(
|
|
147
|
+
v0: Vector,
|
|
148
|
+
v1: Vector,
|
|
149
|
+
steps: int,
|
|
150
|
+
include_endpoints: bool = True,
|
|
151
|
+
) -> list[Vector]:
|
|
152
|
+
"""
|
|
153
|
+
Generate N interpolation steps between two vectors using SLERP.
|
|
154
|
+
|
|
155
|
+
Creates a path of evenly-spaced points along the great circle connecting
|
|
156
|
+
two embedding vectors. Useful for exploring the semantic space between
|
|
157
|
+
two memories.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
v0: Starting vector.
|
|
161
|
+
v1: Ending vector.
|
|
162
|
+
steps: Number of vectors to generate.
|
|
163
|
+
include_endpoints: If True, path starts at v0 and ends at v1.
|
|
164
|
+
If False, generates intermediate points only.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
List of interpolated unit vectors.
|
|
168
|
+
|
|
169
|
+
Raises:
|
|
170
|
+
ValueError: If steps < 1.
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
>>> v0 = np.array([1.0, 0.0, 0.0], dtype=np.float32)
|
|
174
|
+
>>> v1 = np.array([0.0, 1.0, 0.0], dtype=np.float32)
|
|
175
|
+
>>> path = slerp_path(v0, v1, steps=5)
|
|
176
|
+
>>> len(path)
|
|
177
|
+
5
|
|
178
|
+
"""
|
|
179
|
+
if steps < 1:
|
|
180
|
+
raise ValueError("steps must be at least 1")
|
|
181
|
+
|
|
182
|
+
vectors: list[Vector] = []
|
|
183
|
+
|
|
184
|
+
if include_endpoints:
|
|
185
|
+
for i in range(steps):
|
|
186
|
+
t = i / (steps - 1) if steps > 1 else 0.0
|
|
187
|
+
vectors.append(slerp(v0, v1, t))
|
|
188
|
+
else:
|
|
189
|
+
for i in range(steps):
|
|
190
|
+
t = (i + 1) / (steps + 1)
|
|
191
|
+
vectors.append(slerp(v0, v1, t))
|
|
192
|
+
|
|
193
|
+
return vectors
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# =============================================================================
|
|
197
|
+
# Temperature-based Selection (for Wander)
|
|
198
|
+
# =============================================================================
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def softmax_with_temperature(
|
|
202
|
+
scores: NDArray[np.float64],
|
|
203
|
+
temperature: float = 1.0,
|
|
204
|
+
) -> NDArray[np.float64]:
|
|
205
|
+
"""
|
|
206
|
+
Compute softmax probabilities with temperature scaling.
|
|
207
|
+
|
|
208
|
+
Temperature controls the randomness of the resulting distribution:
|
|
209
|
+
- T -> 0: Deterministic (all probability mass on highest score)
|
|
210
|
+
- T = 1: Standard softmax
|
|
211
|
+
- T -> inf: Uniform random selection
|
|
212
|
+
|
|
213
|
+
Uses numerically stable computation by shifting scores before exponentiation.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
scores: Array of raw scores (higher = better).
|
|
217
|
+
temperature: Temperature parameter (must be >= 0).
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Probability distribution over scores (sums to 1).
|
|
221
|
+
|
|
222
|
+
Raises:
|
|
223
|
+
ValueError: If temperature is negative.
|
|
224
|
+
|
|
225
|
+
Example:
|
|
226
|
+
>>> scores = np.array([0.9, 0.7, 0.3])
|
|
227
|
+
>>> probs = softmax_with_temperature(scores, temperature=1.0)
|
|
228
|
+
>>> np.sum(probs) # Always sums to 1
|
|
229
|
+
1.0
|
|
230
|
+
"""
|
|
231
|
+
if temperature < 0:
|
|
232
|
+
raise ValueError("Temperature must be non-negative")
|
|
233
|
+
|
|
234
|
+
scores = np.asarray(scores, dtype=np.float64)
|
|
235
|
+
|
|
236
|
+
if len(scores) == 0:
|
|
237
|
+
return np.array([], dtype=np.float64)
|
|
238
|
+
|
|
239
|
+
if len(scores) == 1:
|
|
240
|
+
return np.array([1.0], dtype=np.float64)
|
|
241
|
+
|
|
242
|
+
# Handle temperature = 0 (greedy/deterministic selection)
|
|
243
|
+
if temperature < 1e-10:
|
|
244
|
+
result = np.zeros_like(scores)
|
|
245
|
+
result[np.argmax(scores)] = 1.0
|
|
246
|
+
return result
|
|
247
|
+
|
|
248
|
+
# Scale scores by temperature
|
|
249
|
+
scaled = scores / temperature
|
|
250
|
+
|
|
251
|
+
# Subtract max for numerical stability (prevents overflow in exp)
|
|
252
|
+
scaled_shifted = scaled - np.max(scaled)
|
|
253
|
+
exp_scores = np.exp(scaled_shifted)
|
|
254
|
+
|
|
255
|
+
return exp_scores / np.sum(exp_scores)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def temperature_select(
|
|
259
|
+
items: Sequence[T],
|
|
260
|
+
scores: NDArray[np.float64],
|
|
261
|
+
temperature: float = 1.0,
|
|
262
|
+
rng: np.random.Generator | None = None,
|
|
263
|
+
) -> T:
|
|
264
|
+
"""
|
|
265
|
+
Select an item using temperature-scaled softmax probabilities.
|
|
266
|
+
|
|
267
|
+
Combines softmax_with_temperature with random selection. Lower temperatures
|
|
268
|
+
favor higher-scored items, while higher temperatures approach uniform random.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
items: Sequence of items to choose from.
|
|
272
|
+
scores: Score for each item (higher = more likely to be selected).
|
|
273
|
+
temperature: Controls randomness (0.1=focused, 2.0=random).
|
|
274
|
+
rng: Optional numpy random generator for reproducibility.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Selected item from the sequence.
|
|
278
|
+
|
|
279
|
+
Raises:
|
|
280
|
+
ValueError: If items and scores have different lengths.
|
|
281
|
+
|
|
282
|
+
Example:
|
|
283
|
+
>>> items = ["a", "b", "c"]
|
|
284
|
+
>>> scores = np.array([0.9, 0.7, 0.3])
|
|
285
|
+
>>> # Low temperature: almost always picks "a"
|
|
286
|
+
>>> temperature_select(items, scores, temperature=0.1)
|
|
287
|
+
'a'
|
|
288
|
+
"""
|
|
289
|
+
if len(items) != len(scores):
|
|
290
|
+
raise ValueError("items and scores must have same length")
|
|
291
|
+
|
|
292
|
+
if rng is None:
|
|
293
|
+
rng = np.random.default_rng()
|
|
294
|
+
|
|
295
|
+
probabilities = softmax_with_temperature(scores, temperature)
|
|
296
|
+
idx = rng.choice(len(items), p=probabilities)
|
|
297
|
+
return items[idx]
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# =============================================================================
|
|
301
|
+
# HDBSCAN Clustering (for Regions)
|
|
302
|
+
# =============================================================================
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
@dataclass
|
|
306
|
+
class ClusterInfo:
|
|
307
|
+
"""
|
|
308
|
+
Information about a discovered cluster.
|
|
309
|
+
|
|
310
|
+
Represents a semantic region in the memory space discovered by HDBSCAN
|
|
311
|
+
clustering. Contains metadata about the cluster including its size,
|
|
312
|
+
central tendency, and sample members.
|
|
313
|
+
|
|
314
|
+
Attributes:
|
|
315
|
+
cluster_id: Unique identifier for this cluster (-1 indicates noise).
|
|
316
|
+
size: Number of memories in this cluster.
|
|
317
|
+
centroid: Mean vector of all memories in the cluster (normalized).
|
|
318
|
+
centroid_memory_id: ID of the memory closest to the centroid.
|
|
319
|
+
sample_memory_ids: IDs of representative sample memories.
|
|
320
|
+
coherence: Average pairwise similarity within the cluster (0-1).
|
|
321
|
+
keywords: Extracted topic keywords for this cluster.
|
|
322
|
+
"""
|
|
323
|
+
|
|
324
|
+
cluster_id: int
|
|
325
|
+
size: int
|
|
326
|
+
centroid: Vector
|
|
327
|
+
centroid_memory_id: str
|
|
328
|
+
sample_memory_ids: list[str]
|
|
329
|
+
coherence: float
|
|
330
|
+
keywords: list[str] = field(default_factory=list)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def configure_hdbscan(
|
|
334
|
+
n_samples: int,
|
|
335
|
+
min_cluster_size: int | None = None,
|
|
336
|
+
min_samples: int | None = None,
|
|
337
|
+
) -> dict:
|
|
338
|
+
"""
|
|
339
|
+
Configure HDBSCAN parameters based on dataset characteristics.
|
|
340
|
+
|
|
341
|
+
Provides sensible defaults for HDBSCAN clustering on embedding vectors.
|
|
342
|
+
The min_cluster_size is computed adaptively based on dataset size if not
|
|
343
|
+
provided explicitly.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
n_samples: Number of samples in the dataset.
|
|
347
|
+
min_cluster_size: Minimum number of points to form a cluster.
|
|
348
|
+
If None, computed as sqrt(n_samples)/2, clamped to [3, 50].
|
|
349
|
+
min_samples: Minimum samples in neighborhood for core point.
|
|
350
|
+
If None, set to min_cluster_size // 2.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Dictionary of HDBSCAN parameters ready to use with hdbscan.HDBSCAN().
|
|
354
|
+
|
|
355
|
+
Example:
|
|
356
|
+
>>> params = configure_hdbscan(1000)
|
|
357
|
+
>>> params["min_cluster_size"]
|
|
358
|
+
15
|
|
359
|
+
>>> import hdbscan # doctest: +SKIP
|
|
360
|
+
>>> clusterer = hdbscan.HDBSCAN(**params) # doctest: +SKIP
|
|
361
|
+
"""
|
|
362
|
+
if min_cluster_size is None:
|
|
363
|
+
# Adaptive min_cluster_size based on dataset size
|
|
364
|
+
min_cluster_size = max(3, int(np.sqrt(n_samples) / 2))
|
|
365
|
+
min_cluster_size = min(min_cluster_size, 50)
|
|
366
|
+
|
|
367
|
+
if min_samples is None:
|
|
368
|
+
min_samples = max(2, min_cluster_size // 2)
|
|
369
|
+
|
|
370
|
+
return {
|
|
371
|
+
"min_cluster_size": min_cluster_size,
|
|
372
|
+
"min_samples": min_samples,
|
|
373
|
+
"metric": "euclidean", # Use with normalized vectors for cosine distance
|
|
374
|
+
"cluster_selection_method": "eom", # Excess of Mass for varied cluster sizes
|
|
375
|
+
"core_dist_n_jobs": -1, # Use all available cores
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
# =============================================================================
|
|
380
|
+
# UMAP Projection (for Visualize)
|
|
381
|
+
# =============================================================================
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def configure_umap(
|
|
385
|
+
n_samples: int,
|
|
386
|
+
n_components: int = 2,
|
|
387
|
+
n_neighbors: int = 15,
|
|
388
|
+
min_dist: float = 0.1,
|
|
389
|
+
random_state: int = 42,
|
|
390
|
+
) -> dict:
|
|
391
|
+
"""
|
|
392
|
+
Configure UMAP parameters for memory visualization.
|
|
393
|
+
|
|
394
|
+
Provides sensible defaults for projecting high-dimensional embedding
|
|
395
|
+
vectors to 2D or 3D for visualization. Parameters are adjusted based
|
|
396
|
+
on the number of samples.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
n_samples: Number of samples to project.
|
|
400
|
+
n_components: Target dimensionality (2 for 2D, 3 for 3D visualization).
|
|
401
|
+
n_neighbors: Size of local neighborhood for manifold approximation.
|
|
402
|
+
Larger values capture more global structure.
|
|
403
|
+
min_dist: Minimum distance between points in embedded space.
|
|
404
|
+
Smaller values create tighter clusters.
|
|
405
|
+
random_state: Random seed for reproducibility.
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
Dictionary of UMAP parameters ready to use with umap.UMAP().
|
|
409
|
+
|
|
410
|
+
Example:
|
|
411
|
+
>>> params = configure_umap(500, n_components=2)
|
|
412
|
+
>>> params["n_neighbors"]
|
|
413
|
+
15
|
|
414
|
+
>>> import umap # doctest: +SKIP
|
|
415
|
+
>>> reducer = umap.UMAP(**params) # doctest: +SKIP
|
|
416
|
+
"""
|
|
417
|
+
return {
|
|
418
|
+
"n_components": n_components,
|
|
419
|
+
# n_neighbors cannot exceed n_samples - 1
|
|
420
|
+
"n_neighbors": min(n_neighbors, n_samples - 1),
|
|
421
|
+
"min_dist": min_dist,
|
|
422
|
+
"metric": "cosine", # Natural metric for embeddings
|
|
423
|
+
"random_state": random_state,
|
|
424
|
+
# Enable low memory mode for large datasets
|
|
425
|
+
"low_memory": n_samples > 5000,
|
|
426
|
+
}
|