spatial-memory-mcp 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. spatial_memory/__init__.py +97 -0
  2. spatial_memory/__main__.py +271 -0
  3. spatial_memory/adapters/__init__.py +7 -0
  4. spatial_memory/adapters/lancedb_repository.py +880 -0
  5. spatial_memory/config.py +769 -0
  6. spatial_memory/core/__init__.py +118 -0
  7. spatial_memory/core/cache.py +317 -0
  8. spatial_memory/core/circuit_breaker.py +297 -0
  9. spatial_memory/core/connection_pool.py +220 -0
  10. spatial_memory/core/consolidation_strategies.py +401 -0
  11. spatial_memory/core/database.py +3072 -0
  12. spatial_memory/core/db_idempotency.py +242 -0
  13. spatial_memory/core/db_indexes.py +576 -0
  14. spatial_memory/core/db_migrations.py +588 -0
  15. spatial_memory/core/db_search.py +512 -0
  16. spatial_memory/core/db_versioning.py +178 -0
  17. spatial_memory/core/embeddings.py +558 -0
  18. spatial_memory/core/errors.py +317 -0
  19. spatial_memory/core/file_security.py +701 -0
  20. spatial_memory/core/filesystem.py +178 -0
  21. spatial_memory/core/health.py +289 -0
  22. spatial_memory/core/helpers.py +79 -0
  23. spatial_memory/core/import_security.py +433 -0
  24. spatial_memory/core/lifecycle_ops.py +1067 -0
  25. spatial_memory/core/logging.py +194 -0
  26. spatial_memory/core/metrics.py +192 -0
  27. spatial_memory/core/models.py +660 -0
  28. spatial_memory/core/rate_limiter.py +326 -0
  29. spatial_memory/core/response_types.py +500 -0
  30. spatial_memory/core/security.py +588 -0
  31. spatial_memory/core/spatial_ops.py +430 -0
  32. spatial_memory/core/tracing.py +300 -0
  33. spatial_memory/core/utils.py +110 -0
  34. spatial_memory/core/validation.py +406 -0
  35. spatial_memory/factory.py +444 -0
  36. spatial_memory/migrations/__init__.py +40 -0
  37. spatial_memory/ports/__init__.py +11 -0
  38. spatial_memory/ports/repositories.py +630 -0
  39. spatial_memory/py.typed +0 -0
  40. spatial_memory/server.py +1214 -0
  41. spatial_memory/services/__init__.py +70 -0
  42. spatial_memory/services/decay_manager.py +411 -0
  43. spatial_memory/services/export_import.py +1031 -0
  44. spatial_memory/services/lifecycle.py +1139 -0
  45. spatial_memory/services/memory.py +412 -0
  46. spatial_memory/services/spatial.py +1152 -0
  47. spatial_memory/services/utility.py +429 -0
  48. spatial_memory/tools/__init__.py +5 -0
  49. spatial_memory/tools/definitions.py +695 -0
  50. spatial_memory/verify.py +140 -0
  51. spatial_memory_mcp-1.9.1.dist-info/METADATA +509 -0
  52. spatial_memory_mcp-1.9.1.dist-info/RECORD +55 -0
  53. spatial_memory_mcp-1.9.1.dist-info/WHEEL +4 -0
  54. spatial_memory_mcp-1.9.1.dist-info/entry_points.txt +2 -0
  55. spatial_memory_mcp-1.9.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1152 @@
1
+ """Spatial service for exploration operations.
2
+
3
+ This service provides the spatial layer for memory exploration:
4
+ - journey: SLERP interpolation between two memories
5
+ - wander: Temperature-based random walk through memory space
6
+ - regions: HDBSCAN clustering to discover memory regions
7
+ - visualize: UMAP projection for 2D/3D visualization
8
+
9
+ The service uses dependency injection for repository and embedding services.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import logging
16
+ import random
17
+ import re
18
+ from collections import Counter
19
+ from dataclasses import dataclass
20
+ from typing import TYPE_CHECKING, Any, Literal
21
+
22
+ import numpy as np
23
+
24
+ from spatial_memory.core.errors import (
25
+ ClusteringError,
26
+ InsufficientMemoriesError,
27
+ JourneyError,
28
+ MemoryNotFoundError,
29
+ ValidationError,
30
+ VisualizationError,
31
+ WanderError,
32
+ )
33
+ from spatial_memory.core.models import (
34
+ JourneyResult,
35
+ JourneyStep,
36
+ MemoryResult,
37
+ RegionCluster,
38
+ RegionsResult,
39
+ VisualizationEdge,
40
+ VisualizationNode,
41
+ VisualizationResult,
42
+ WanderResult,
43
+ WanderStep,
44
+ )
45
+ from spatial_memory.core.utils import utc_now
46
+ from spatial_memory.core.validation import validate_namespace, validate_uuid
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+ # Check optional dependency availability at import time
51
+ try:
52
+ import hdbscan
53
+
54
+ HDBSCAN_AVAILABLE = True
55
+ except ImportError:
56
+ HDBSCAN_AVAILABLE = False
57
+ logger.debug("HDBSCAN not available - regions operation will be disabled")
58
+
59
+ try:
60
+ import umap
61
+
62
+ UMAP_AVAILABLE = True
63
+ except ImportError:
64
+ UMAP_AVAILABLE = False
65
+ logger.debug("UMAP not available - visualize operation will be disabled")
66
+
67
+ try:
68
+ from scipy.spatial.distance import cdist
69
+
70
+ SCIPY_AVAILABLE = True
71
+ except ImportError:
72
+ SCIPY_AVAILABLE = False
73
+ logger.debug("scipy not available - using fallback for similarity calculations")
74
+
75
+ # Common stop words for keyword extraction (module-level to avoid recreation)
76
+ _STOP_WORDS: frozenset[str] = frozenset({
77
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
78
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
79
+ "should", "may", "might", "must", "can", "to", "of", "in", "for",
80
+ "on", "with", "at", "by", "from", "as", "into", "through", "during",
81
+ "before", "after", "above", "below", "between", "under", "again",
82
+ "further", "then", "once", "here", "there", "when", "where", "why",
83
+ "how", "all", "each", "few", "more", "most", "other", "some", "such",
84
+ "no", "nor", "not", "only", "own", "same", "so", "than", "too",
85
+ "very", "just", "also", "now", "and", "but", "or", "if", "it", "its",
86
+ "this", "that", "these", "those", "i", "you", "he", "she", "we", "they",
87
+ })
88
+
89
+ if TYPE_CHECKING:
90
+ from spatial_memory.ports.repositories import (
91
+ EmbeddingServiceProtocol,
92
+ MemoryRepositoryProtocol,
93
+ )
94
+
95
+
96
+ @dataclass
97
+ class SpatialConfig:
98
+ """Configuration for spatial operations.
99
+
100
+ Attributes:
101
+ journey_default_steps: Default number of interpolation steps for journey.
102
+ journey_max_steps: Maximum allowed steps for journey.
103
+ journey_neighbors_per_step: Number of neighbors to find per interpolation point.
104
+ wander_default_steps: Default number of steps for random walk.
105
+ wander_max_steps: Maximum allowed steps for wander.
106
+ wander_default_temperature: Default temperature (randomness) for wander.
107
+ wander_avoid_recent: Number of recent memories to avoid revisiting.
108
+ wander_candidates_per_step: Number of candidate neighbors per step.
109
+ regions_min_cluster_size: Minimum cluster size for HDBSCAN.
110
+ regions_max_memories: Maximum memories to consider for clustering.
111
+ visualize_n_neighbors: UMAP n_neighbors parameter.
112
+ visualize_min_dist: UMAP min_dist parameter.
113
+ visualize_max_memories: Maximum memories to include in visualization.
114
+ visualize_similarity_threshold: Minimum similarity for edge creation.
115
+ """
116
+
117
+ # Journey parameters
118
+ journey_default_steps: int = 10
119
+ journey_max_steps: int = 20
120
+ journey_neighbors_per_step: int = 3
121
+
122
+ # Wander parameters
123
+ wander_default_steps: int = 10
124
+ wander_max_steps: int = 20
125
+ wander_default_temperature: float = 0.5
126
+ wander_avoid_recent: int = 5
127
+ wander_candidates_per_step: int = 10
128
+
129
+ # Regions parameters
130
+ regions_min_cluster_size: int = 3
131
+ regions_max_memories: int = 10_000
132
+
133
+ # Visualize parameters
134
+ visualize_n_neighbors: int = 15
135
+ visualize_min_dist: float = 0.1
136
+ visualize_max_memories: int = 500
137
+ visualize_similarity_threshold: float = 0.7
138
+
139
+
140
+ # Color palette for cluster visualization
141
+ CLUSTER_COLORS = [
142
+ "#4285F4", # Blue
143
+ "#EA4335", # Red
144
+ "#FBBC04", # Yellow
145
+ "#34A853", # Green
146
+ "#FF6D01", # Orange
147
+ "#46BDC6", # Cyan
148
+ "#7B1FA2", # Purple
149
+ "#E91E63", # Pink
150
+ "#009688", # Teal
151
+ "#795548", # Brown
152
+ ]
153
+
154
+
155
+ class SpatialService:
156
+ """Service for spatial exploration of memory space.
157
+
158
+ Uses Clean Architecture - depends on protocol interfaces, not implementations.
159
+ """
160
+
161
+ def __init__(
162
+ self,
163
+ repository: MemoryRepositoryProtocol,
164
+ embeddings: EmbeddingServiceProtocol,
165
+ config: SpatialConfig | None = None,
166
+ ) -> None:
167
+ """Initialize the spatial service.
168
+
169
+ Args:
170
+ repository: Repository for memory storage.
171
+ embeddings: Service for generating embeddings.
172
+ config: Optional configuration (uses defaults if not provided).
173
+ """
174
+ self._repo = repository
175
+ self._embeddings = embeddings
176
+ self._config = config or SpatialConfig()
177
+
178
+ def journey(
179
+ self,
180
+ start_id: str,
181
+ end_id: str,
182
+ steps: int | None = None,
183
+ namespace: str | None = None,
184
+ ) -> JourneyResult:
185
+ """Find a path between two memories using SLERP interpolation.
186
+
187
+ Spherical Linear Interpolation (SLERP) creates smooth paths through
188
+ embedding space, finding actual memories closest to each interpolation
189
+ point.
190
+
191
+ Args:
192
+ start_id: Starting memory UUID.
193
+ end_id: Ending memory UUID.
194
+ steps: Number of interpolation steps (default from config).
195
+ namespace: Optional namespace filter for intermediate memories.
196
+
197
+ Returns:
198
+ JourneyResult with path steps.
199
+
200
+ Raises:
201
+ ValidationError: If input validation fails.
202
+ MemoryNotFoundError: If start or end memory not found.
203
+ JourneyError: If path cannot be computed.
204
+ """
205
+ # Validate inputs
206
+ start_id = validate_uuid(start_id)
207
+ end_id = validate_uuid(end_id)
208
+ if namespace is not None:
209
+ namespace = validate_namespace(namespace)
210
+
211
+ # Get step count
212
+ actual_steps = steps if steps is not None else self._config.journey_default_steps
213
+ if actual_steps < 2:
214
+ raise ValidationError("Journey requires at least 2 steps")
215
+ if actual_steps > self._config.journey_max_steps:
216
+ raise ValidationError(
217
+ f"Maximum journey steps is {self._config.journey_max_steps}"
218
+ )
219
+
220
+ # Get start and end memories with vectors
221
+ start_result = self._repo.get_with_vector(start_id)
222
+ if start_result is None:
223
+ raise MemoryNotFoundError(start_id)
224
+ start_memory, start_vector = start_result
225
+
226
+ end_result = self._repo.get_with_vector(end_id)
227
+ if end_result is None:
228
+ raise MemoryNotFoundError(end_id)
229
+ end_memory, end_vector = end_result
230
+
231
+ try:
232
+ # Generate interpolation points using SLERP
233
+ interpolated_vectors, t_values = self._slerp_interpolate(
234
+ start_vector, end_vector, actual_steps
235
+ )
236
+
237
+ # Find nearest memories for each interpolation point
238
+ # Use batch search for efficiency, include vectors to avoid N+1 queries
239
+ search_results = self._batch_vector_search(
240
+ interpolated_vectors,
241
+ limit_per_query=self._config.journey_neighbors_per_step,
242
+ namespace=namespace,
243
+ include_vector=True, # Include vectors to avoid follow-up queries
244
+ )
245
+
246
+ # Build journey steps
247
+ journey_steps: list[JourneyStep] = []
248
+ steps_with_memories = 0
249
+
250
+ for step_num, (interp_vec, t_val, neighbors) in enumerate(
251
+ zip(interpolated_vectors, t_values, search_results)
252
+ ):
253
+ # Calculate distance from interpolation point to nearest memory
254
+ distance_to_path = float("inf")
255
+ if neighbors:
256
+ for neighbor in neighbors:
257
+ # Use vector from search result (included via include_vector=True)
258
+ if neighbor.vector is not None:
259
+ neighbor_vec = np.array(neighbor.vector, dtype=np.float32)
260
+ dist = self._cosine_distance(interp_vec, neighbor_vec)
261
+ else:
262
+ # Fallback if vector not included (shouldn't happen)
263
+ dist = self._cosine_distance(
264
+ interp_vec, self._get_vector_for_memory(neighbor.id)
265
+ )
266
+ if dist < distance_to_path:
267
+ distance_to_path = dist
268
+ steps_with_memories += 1
269
+
270
+ # Use 0.0 if no memories found (inf means no distance calculated)
271
+ # Clamp to 0.0 to handle floating point precision errors
272
+ if distance_to_path == float("inf"):
273
+ final_distance = 0.0
274
+ else:
275
+ final_distance = max(0.0, distance_to_path)
276
+ journey_steps.append(
277
+ JourneyStep(
278
+ step=step_num,
279
+ t=t_val,
280
+ position=interp_vec.tolist(),
281
+ nearby_memories=neighbors,
282
+ distance_to_path=final_distance,
283
+ )
284
+ )
285
+
286
+ # Calculate path coverage
287
+ path_coverage = steps_with_memories / len(journey_steps) if journey_steps else 0.0
288
+
289
+ return JourneyResult(
290
+ start_id=start_id,
291
+ end_id=end_id,
292
+ steps=journey_steps,
293
+ path_coverage=path_coverage,
294
+ )
295
+
296
+ except Exception as e:
297
+ if isinstance(e, (ValidationError, MemoryNotFoundError)):
298
+ raise
299
+ raise JourneyError(f"Failed to compute journey: {e}") from e
300
+
301
+ def wander(
302
+ self,
303
+ start_id: str,
304
+ steps: int | None = None,
305
+ temperature: float | None = None,
306
+ namespace: str | None = None,
307
+ ) -> WanderResult:
308
+ """Perform a random walk through memory space.
309
+
310
+ Temperature controls randomness:
311
+ - 0.0 = Always pick the most similar (greedy)
312
+ - 0.5 = Balanced exploration
313
+ - 1.0 = Highly random selection
314
+
315
+ Args:
316
+ start_id: Starting memory UUID.
317
+ steps: Number of steps to wander (default from config).
318
+ temperature: Randomness factor 0.0-1.0 (default from config).
319
+ namespace: Optional namespace filter.
320
+
321
+ Returns:
322
+ WanderResult with path taken.
323
+
324
+ Raises:
325
+ ValidationError: If input validation fails.
326
+ MemoryNotFoundError: If start memory not found.
327
+ WanderError: If walk cannot continue.
328
+ """
329
+ # Validate inputs
330
+ start_id = validate_uuid(start_id)
331
+ if namespace is not None:
332
+ namespace = validate_namespace(namespace)
333
+
334
+ # Get parameters
335
+ actual_steps = steps if steps is not None else self._config.wander_default_steps
336
+ if actual_steps < 1:
337
+ raise ValidationError("Wander requires at least 1 step")
338
+ if actual_steps > self._config.wander_max_steps:
339
+ raise ValidationError(
340
+ f"Maximum wander steps is {self._config.wander_max_steps}"
341
+ )
342
+
343
+ actual_temp = (
344
+ temperature
345
+ if temperature is not None
346
+ else self._config.wander_default_temperature
347
+ )
348
+ if not 0.0 <= actual_temp <= 1.0:
349
+ raise ValidationError("Temperature must be between 0.0 and 1.0")
350
+
351
+ # Verify start memory exists
352
+ start_result = self._repo.get_with_vector(start_id)
353
+ if start_result is None:
354
+ raise MemoryNotFoundError(start_id)
355
+ current_memory, current_vector = start_result
356
+
357
+ try:
358
+ wander_steps: list[WanderStep] = []
359
+ visited_ids: set[str] = {start_id}
360
+ recent_ids: list[str] = [start_id]
361
+ total_distance = 0.0
362
+ prev_vector = current_vector
363
+
364
+ for step_num in range(actual_steps):
365
+ # Find candidates from current position
366
+ # Include vectors to avoid follow-up get_with_vector queries
367
+ neighbors = self._repo.search(
368
+ current_vector,
369
+ limit=self._config.wander_candidates_per_step + len(visited_ids),
370
+ namespace=namespace,
371
+ include_vector=True,
372
+ )
373
+
374
+ # Filter out recently visited
375
+ candidates = [
376
+ n
377
+ for n in neighbors
378
+ if n.id not in recent_ids[-self._config.wander_avoid_recent :]
379
+ ]
380
+
381
+ if not candidates:
382
+ # No unvisited candidates - allow revisiting older memories
383
+ candidates = [n for n in neighbors if n.id not in visited_ids]
384
+
385
+ if not candidates:
386
+ logger.warning(
387
+ f"Wander ended early at step {step_num}: no candidates"
388
+ )
389
+ break
390
+
391
+ # Select next memory based on temperature
392
+ next_memory, selection_prob = self._temperature_select(
393
+ candidates, actual_temp
394
+ )
395
+
396
+ # Get vector from search result (included via include_vector=True)
397
+ if next_memory.vector is not None:
398
+ next_vector = np.array(next_memory.vector, dtype=np.float32)
399
+ else:
400
+ # Fallback if vector not included (shouldn't happen)
401
+ next_result = self._repo.get_with_vector(next_memory.id)
402
+ if next_result is None:
403
+ logger.warning(f"Memory {next_memory.id} disappeared during wander")
404
+ break
405
+ _, next_vector = next_result
406
+
407
+ step_distance = self._cosine_distance(prev_vector, next_vector)
408
+ total_distance += step_distance
409
+
410
+ wander_steps.append(
411
+ WanderStep(
412
+ step=step_num,
413
+ memory=next_memory,
414
+ similarity_to_previous=next_memory.similarity,
415
+ selection_probability=selection_prob,
416
+ )
417
+ )
418
+
419
+ visited_ids.add(next_memory.id)
420
+ recent_ids.append(next_memory.id)
421
+ current_vector = next_vector
422
+ prev_vector = next_vector
423
+
424
+ return WanderResult(
425
+ start_id=start_id,
426
+ steps=wander_steps,
427
+ total_distance=total_distance,
428
+ )
429
+
430
+ except Exception as e:
431
+ if isinstance(e, (ValidationError, MemoryNotFoundError)):
432
+ raise
433
+ raise WanderError(f"Wander failed: {e}") from e
434
+
435
+ def regions(
436
+ self,
437
+ namespace: str | None = None,
438
+ min_cluster_size: int | None = None,
439
+ max_clusters: int | None = None,
440
+ ) -> RegionsResult:
441
+ """Discover memory regions using HDBSCAN clustering.
442
+
443
+ HDBSCAN automatically determines the number of clusters and
444
+ identifies outliers (noise points).
445
+
446
+ Args:
447
+ namespace: Optional namespace filter.
448
+ min_cluster_size: Minimum points per cluster (default from config).
449
+ max_clusters: Maximum clusters to return (None = all).
450
+
451
+ Returns:
452
+ RegionsResult with discovered clusters.
453
+
454
+ Raises:
455
+ ValidationError: If input validation fails.
456
+ ClusteringError: If clustering fails or HDBSCAN unavailable.
457
+ InsufficientMemoriesError: If not enough memories for clustering.
458
+ """
459
+ if not HDBSCAN_AVAILABLE:
460
+ raise ClusteringError(
461
+ "HDBSCAN is not available. Install with: pip install hdbscan"
462
+ )
463
+
464
+ # Validate inputs
465
+ if namespace is not None:
466
+ namespace = validate_namespace(namespace)
467
+
468
+ actual_min_size = (
469
+ min_cluster_size
470
+ if min_cluster_size is not None
471
+ else self._config.regions_min_cluster_size
472
+ )
473
+ if actual_min_size < 2:
474
+ raise ValidationError("Minimum cluster size must be at least 2")
475
+
476
+ try:
477
+ # Fetch all vectors for clustering
478
+ all_memories = self._repo.get_all(
479
+ namespace=namespace, limit=self._config.regions_max_memories
480
+ )
481
+
482
+ if len(all_memories) < actual_min_size:
483
+ raise InsufficientMemoriesError(
484
+ required=actual_min_size,
485
+ available=len(all_memories),
486
+ operation="regions",
487
+ )
488
+
489
+ # Extract IDs and vectors
490
+ memory_map = {m.id: (m, v) for m, v in all_memories}
491
+ memory_ids = list(memory_map.keys())
492
+ vectors = np.array([v for _, v in all_memories], dtype=np.float32)
493
+
494
+ # Run HDBSCAN clustering
495
+ clusterer = hdbscan.HDBSCAN(
496
+ min_cluster_size=actual_min_size,
497
+ metric="euclidean", # Works well with normalized vectors
498
+ cluster_selection_method="eom", # Excess of Mass
499
+ )
500
+ labels = clusterer.fit_predict(vectors)
501
+
502
+ # Process clusters
503
+ clusters: list[RegionCluster] = []
504
+ unique_labels = set(labels)
505
+
506
+ # Remove noise label (-1) for cluster processing
507
+ cluster_labels = [label for label in unique_labels if label >= 0]
508
+
509
+ for cluster_id in cluster_labels:
510
+ # Get indices of memories in this cluster
511
+ cluster_indices = [
512
+ i for i, lbl in enumerate(labels) if lbl == cluster_id
513
+ ]
514
+ cluster_vectors = vectors[cluster_indices]
515
+ cluster_ids = [memory_ids[i] for i in cluster_indices]
516
+
517
+ # Find centroid and closest memory to centroid
518
+ centroid = cluster_vectors.mean(axis=0)
519
+ distances_to_centroid = np.linalg.norm(
520
+ cluster_vectors - centroid, axis=1
521
+ )
522
+ centroid_idx = int(np.argmin(distances_to_centroid))
523
+ centroid_memory_id = cluster_ids[centroid_idx]
524
+
525
+ # Calculate coherence (inverse of average intra-cluster distance)
526
+ avg_dist = float(distances_to_centroid.mean())
527
+ max_possible_dist = 2.0 # Max distance for normalized vectors
528
+ coherence = max(0.0, min(1.0, 1.0 - (avg_dist / max_possible_dist)))
529
+
530
+ # Get representative and sample memories
531
+ rep_memory, _ = memory_map[centroid_memory_id]
532
+ rep_result = self._memory_to_result(rep_memory, 1.0)
533
+
534
+ sample_results: list[MemoryResult] = []
535
+ for sid in cluster_ids[:5]:
536
+ mem, _ = memory_map[sid]
537
+ # Calculate similarity to centroid for the sample
538
+ mem_vec = memory_map[sid][1]
539
+ sim = 1.0 - self._cosine_distance(centroid, mem_vec)
540
+ sample_results.append(self._memory_to_result(mem, sim))
541
+
542
+ # Extract keywords from sample content
543
+ sample_contents = [m.content for m in sample_results]
544
+ keywords = self._extract_keywords(" ".join(sample_contents), n=5)
545
+
546
+ clusters.append(
547
+ RegionCluster(
548
+ cluster_id=cluster_id,
549
+ size=len(cluster_ids),
550
+ representative_memory=rep_result,
551
+ sample_memories=sample_results[:3],
552
+ coherence=coherence,
553
+ keywords=keywords,
554
+ )
555
+ )
556
+
557
+ # Sort by size (largest first)
558
+ clusters.sort(key=lambda c: c.size, reverse=True)
559
+
560
+ # Limit clusters if requested
561
+ if max_clusters is not None and len(clusters) > max_clusters:
562
+ clusters = clusters[:max_clusters]
563
+
564
+ # Count noise points
565
+ noise_count = sum(1 for lbl in labels if lbl == -1)
566
+
567
+ # Calculate silhouette score if possible
568
+ clustering_quality = 0.0
569
+ if len(cluster_labels) >= 2:
570
+ try:
571
+ from sklearn.metrics import silhouette_score
572
+ # Filter out noise points for silhouette calculation
573
+ mask = labels >= 0
574
+ if mask.sum() >= 2:
575
+ clustering_quality = float(
576
+ silhouette_score(vectors[mask], labels[mask])
577
+ )
578
+ except ImportError:
579
+ pass # sklearn not available, skip quality calculation
580
+
581
+ return RegionsResult(
582
+ clusters=clusters,
583
+ noise_count=noise_count,
584
+ total_memories=len(memory_ids),
585
+ clustering_quality=clustering_quality,
586
+ )
587
+
588
+ except (ValidationError, InsufficientMemoriesError, ClusteringError):
589
+ raise
590
+ except Exception as e:
591
+ raise ClusteringError(f"Clustering failed: {e}") from e
592
+
593
+ def visualize(
594
+ self,
595
+ memory_ids: list[str] | None = None,
596
+ namespace: str | None = None,
597
+ format: Literal["json", "mermaid", "svg"] = "json",
598
+ dimensions: Literal[2, 3] = 2,
599
+ include_edges: bool = True,
600
+ ) -> VisualizationResult:
601
+ """Generate a visualization of memory space using UMAP projection.
602
+
603
+ Args:
604
+ memory_ids: Specific memories to visualize (None = auto-select).
605
+ namespace: Namespace filter when auto-selecting.
606
+ format: Output format (json, mermaid, or svg).
607
+ dimensions: Number of dimensions (2 or 3).
608
+ include_edges: Include similarity edges between nodes.
609
+
610
+ Returns:
611
+ VisualizationResult with visualization data and formatted output.
612
+
613
+ Raises:
614
+ ValidationError: If input validation fails.
615
+ VisualizationError: If visualization fails or UMAP unavailable.
616
+ InsufficientMemoriesError: If not enough memories.
617
+ """
618
+ if not UMAP_AVAILABLE:
619
+ raise VisualizationError(
620
+ "UMAP is not available. Install with: pip install umap-learn"
621
+ )
622
+
623
+ # Validate inputs
624
+ if namespace is not None:
625
+ namespace = validate_namespace(namespace)
626
+
627
+ if memory_ids is not None:
628
+ memory_ids = [validate_uuid(mid) for mid in memory_ids]
629
+
630
+ if dimensions not in (2, 3):
631
+ raise ValidationError("Dimensions must be 2 or 3")
632
+
633
+ try:
634
+ # Get memories to visualize
635
+ if memory_ids:
636
+ memories_with_vectors: list[tuple[Any, np.ndarray]] = []
637
+ for mid in memory_ids[: self._config.visualize_max_memories]:
638
+ result = self._repo.get_with_vector(mid)
639
+ if result:
640
+ memories_with_vectors.append(result)
641
+ else:
642
+ memories_with_vectors = self._repo.get_all(
643
+ namespace=namespace, limit=self._config.visualize_max_memories
644
+ )
645
+
646
+ if len(memories_with_vectors) < 5:
647
+ raise InsufficientMemoriesError(
648
+ required=5,
649
+ available=len(memories_with_vectors),
650
+ operation="visualize",
651
+ )
652
+
653
+ # Extract vectors
654
+ vectors = np.array(
655
+ [v for _, v in memories_with_vectors], dtype=np.float32
656
+ )
657
+
658
+ # Run UMAP projection
659
+ n_neighbors = min(
660
+ self._config.visualize_n_neighbors, len(vectors) - 1
661
+ )
662
+ reducer = umap.UMAP(
663
+ n_components=dimensions,
664
+ n_neighbors=n_neighbors,
665
+ min_dist=self._config.visualize_min_dist,
666
+ metric="cosine",
667
+ random_state=42, # Reproducibility
668
+ )
669
+ embedding = reducer.fit_transform(vectors)
670
+
671
+ # Optionally run clustering for coloring
672
+ cluster_labels = [-1] * len(memories_with_vectors)
673
+
674
+ if HDBSCAN_AVAILABLE and len(memories_with_vectors) >= 10:
675
+ try:
676
+ clusterer = hdbscan.HDBSCAN(
677
+ min_cluster_size=3,
678
+ metric="euclidean",
679
+ )
680
+ cluster_labels = clusterer.fit_predict(vectors).tolist()
681
+ except Exception as e:
682
+ logger.debug(f"Clustering for visualization failed: {e}")
683
+
684
+ # Build visualization nodes
685
+ nodes: list[VisualizationNode] = []
686
+ for i, (memory, _) in enumerate(memories_with_vectors):
687
+ # Create short label from content
688
+ content = memory.content
689
+ label = content[:50] + "..." if len(content) > 50 else content
690
+ label = label.replace("\n", " ")
691
+
692
+ nodes.append(
693
+ VisualizationNode(
694
+ id=memory.id,
695
+ x=float(embedding[i, 0]),
696
+ y=float(embedding[i, 1]) if dimensions >= 2 else 0.0,
697
+ label=label,
698
+ cluster=cluster_labels[i],
699
+ importance=memory.importance,
700
+ highlighted=False,
701
+ )
702
+ )
703
+
704
+ # Build edges if requested
705
+ edges: list[VisualizationEdge] = []
706
+ if include_edges:
707
+ # Calculate pairwise similarities using vectorized operations
708
+ similarity_matrix = self._compute_pairwise_similarities(vectors)
709
+ threshold = self._config.visualize_similarity_threshold
710
+
711
+ # Extract upper triangle indices where similarity >= threshold
712
+ # (upper triangle avoids duplicate edges)
713
+ upper_tri_indices = np.triu_indices(len(vectors), k=1)
714
+ similarities = similarity_matrix[upper_tri_indices]
715
+
716
+ # Filter by threshold and create edges
717
+ mask = similarities >= threshold
718
+ for idx in np.where(mask)[0]:
719
+ i, j = upper_tri_indices[0][idx], upper_tri_indices[1][idx]
720
+ edges.append(
721
+ VisualizationEdge(
722
+ from_id=nodes[i].id,
723
+ to_id=nodes[j].id,
724
+ weight=float(similarities[idx]),
725
+ )
726
+ )
727
+
728
+ # Calculate bounds
729
+ x_coords = [n.x for n in nodes]
730
+ y_coords = [n.y for n in nodes]
731
+ bounds = {
732
+ "x_min": min(x_coords),
733
+ "x_max": max(x_coords),
734
+ "y_min": min(y_coords),
735
+ "y_max": max(y_coords),
736
+ }
737
+
738
+ # Format output
739
+ output = self._format_output(nodes, edges, format)
740
+
741
+ return VisualizationResult(
742
+ nodes=nodes,
743
+ edges=edges,
744
+ bounds=bounds,
745
+ format=format,
746
+ output=output,
747
+ )
748
+
749
+ except (ValidationError, InsufficientMemoriesError, VisualizationError):
750
+ raise
751
+ except Exception as e:
752
+ raise VisualizationError(f"Visualization failed: {e}") from e
753
+
754
+ # =========================================================================
755
+ # Helper Methods
756
+ # =========================================================================
757
+
758
+ def _memory_to_result(self, memory: Any, similarity: float) -> MemoryResult:
759
+ """Convert a Memory object to a MemoryResult.
760
+
761
+ Args:
762
+ memory: Memory object.
763
+ similarity: Similarity score.
764
+
765
+ Returns:
766
+ MemoryResult object.
767
+ """
768
+ return MemoryResult(
769
+ id=memory.id,
770
+ content=memory.content,
771
+ similarity=max(0.0, min(1.0, similarity)),
772
+ namespace=memory.namespace,
773
+ tags=memory.tags,
774
+ importance=memory.importance,
775
+ created_at=memory.created_at,
776
+ metadata=memory.metadata,
777
+ )
778
+
779
+ def _slerp_interpolate(
780
+ self,
781
+ start_vec: np.ndarray,
782
+ end_vec: np.ndarray,
783
+ num_steps: int,
784
+ ) -> tuple[list[np.ndarray], list[float]]:
785
+ """Spherical Linear Interpolation between two vectors.
786
+
787
+ SLERP maintains constant angular velocity along the geodesic path
788
+ between two points on a hypersphere, making it ideal for semantic
789
+ interpolation in embedding space.
790
+
791
+ Args:
792
+ start_vec: Starting vector.
793
+ end_vec: Ending vector.
794
+ num_steps: Number of interpolation points.
795
+
796
+ Returns:
797
+ Tuple of (interpolated vectors, t values).
798
+ """
799
+ # Normalize vectors
800
+ start_norm = start_vec / (np.linalg.norm(start_vec) + 1e-10)
801
+ end_norm = end_vec / (np.linalg.norm(end_vec) + 1e-10)
802
+
803
+ # Calculate angle between vectors
804
+ dot = np.clip(np.dot(start_norm, end_norm), -1.0, 1.0)
805
+ omega = np.arccos(dot)
806
+
807
+ t_values = list(np.linspace(0, 1, num_steps))
808
+
809
+ # Handle nearly parallel vectors (use linear interpolation)
810
+ if omega < 1e-6:
811
+ linear_interp = [
812
+ start_vec + t * (end_vec - start_vec)
813
+ for t in t_values
814
+ ]
815
+ return linear_interp, t_values
816
+
817
+ sin_omega = np.sin(omega)
818
+
819
+ interpolated: list[np.ndarray] = []
820
+ for t in t_values:
821
+ coef_start = np.sin((1 - t) * omega) / sin_omega
822
+ coef_end = np.sin(t * omega) / sin_omega
823
+ vec = coef_start * start_norm + coef_end * end_norm
824
+ interpolated.append(vec)
825
+
826
+ return interpolated, t_values
827
+
828
+ def _batch_vector_search(
829
+ self,
830
+ vectors: list[np.ndarray],
831
+ limit_per_query: int,
832
+ namespace: str | None,
833
+ include_vector: bool = False,
834
+ ) -> list[list[MemoryResult]]:
835
+ """Perform batch vector search using repository's native batch capability.
836
+
837
+ Uses the repository's batch_vector_search for efficient multi-query
838
+ searches in a single database operation.
839
+
840
+ Args:
841
+ vectors: List of query vectors.
842
+ limit_per_query: Results per query.
843
+ namespace: Optional namespace filter.
844
+ include_vector: Whether to include embedding vectors in results.
845
+ Defaults to False to reduce response size.
846
+
847
+ Returns:
848
+ List of result lists. If include_vector=True, each MemoryResult
849
+ includes its embedding vector.
850
+ """
851
+ # Use native batch search for efficiency
852
+ raw_results = self._repo.batch_vector_search(
853
+ query_vectors=vectors,
854
+ limit_per_query=limit_per_query,
855
+ namespace=namespace,
856
+ include_vector=include_vector,
857
+ )
858
+
859
+ # Convert raw dict results to MemoryResult objects
860
+ results: list[list[MemoryResult]] = []
861
+ for query_results in raw_results:
862
+ memory_results: list[MemoryResult] = []
863
+ for record in query_results:
864
+ memory_result = MemoryResult(
865
+ id=record["id"],
866
+ content=record["content"],
867
+ similarity=record.get("similarity", 0.0),
868
+ namespace=record.get("namespace", "default"),
869
+ tags=record.get("tags", []),
870
+ importance=record.get("importance", 0.5),
871
+ created_at=record.get("created_at") or utc_now(),
872
+ metadata=record.get("metadata", {}),
873
+ vector=record.get("vector") if include_vector else None,
874
+ )
875
+ memory_results.append(memory_result)
876
+ results.append(memory_results)
877
+ return results
878
+
879
+ def _get_vector_for_memory(self, memory_id: str) -> np.ndarray:
880
+ """Get the vector for a memory.
881
+
882
+ Args:
883
+ memory_id: Memory UUID.
884
+
885
+ Returns:
886
+ The memory's vector.
887
+ """
888
+ result = self._repo.get_with_vector(memory_id)
889
+ if result is None:
890
+ # Return zero vector if memory not found (shouldn't happen in practice)
891
+ return np.zeros(self._embeddings.dimensions, dtype=np.float32)
892
+ _, vector = result
893
+ return vector
894
+
895
+ def _cosine_distance(self, vec1: np.ndarray, vec2: np.ndarray) -> float:
896
+ """Calculate cosine distance between two vectors.
897
+
898
+ Args:
899
+ vec1: First vector.
900
+ vec2: Second vector.
901
+
902
+ Returns:
903
+ Cosine distance (0 = identical, 2 = opposite).
904
+ """
905
+ norm1 = np.linalg.norm(vec1)
906
+ norm2 = np.linalg.norm(vec2)
907
+ if norm1 < 1e-10 or norm2 < 1e-10:
908
+ return 1.0 # Maximum distance for zero vectors
909
+
910
+ similarity = np.dot(vec1, vec2) / (norm1 * norm2)
911
+ return float(1.0 - similarity)
912
+
913
+ def _compute_pairwise_similarities(self, vectors: np.ndarray) -> np.ndarray:
914
+ """Compute pairwise cosine similarities using vectorized operations.
915
+
916
+ Uses scipy.cdist if available for optimal performance, otherwise
917
+ falls back to numpy matrix operations.
918
+
919
+ Args:
920
+ vectors: 2D array of shape (n_vectors, embedding_dim).
921
+
922
+ Returns:
923
+ Symmetric similarity matrix of shape (n_vectors, n_vectors).
924
+ Values range from -1 (opposite) to 1 (identical).
925
+ """
926
+ # Normalize vectors to unit length
927
+ norms = np.linalg.norm(vectors, axis=1, keepdims=True)
928
+ # Avoid division by zero for zero vectors
929
+ norms = np.where(norms < 1e-10, 1.0, norms)
930
+ normalized = vectors / norms
931
+
932
+ similarities: np.ndarray
933
+ if SCIPY_AVAILABLE:
934
+ # scipy.cdist with cosine metric returns distances (1 - similarity)
935
+ distances = cdist(normalized, normalized, metric="cosine")
936
+ similarities = 1.0 - distances
937
+ else:
938
+ # Fallback: use numpy dot product (A @ A.T for normalized vectors)
939
+ similarities = normalized @ normalized.T
940
+
941
+ return similarities
942
+
943
+ def _temperature_select(
944
+ self,
945
+ candidates: list[MemoryResult],
946
+ temperature: float,
947
+ ) -> tuple[MemoryResult, float]:
948
+ """Select a candidate using temperature-based sampling.
949
+
950
+ Args:
951
+ candidates: List of candidate memories with similarity scores.
952
+ temperature: Randomness factor (0 = greedy, 1 = uniform random).
953
+
954
+ Returns:
955
+ Tuple of (selected memory, selection probability).
956
+ """
957
+ if not candidates:
958
+ raise WanderError("No candidates for temperature selection")
959
+
960
+ if temperature == 0.0:
961
+ # Greedy: pick highest similarity
962
+ return max(candidates, key=lambda c: c.similarity), 1.0
963
+
964
+ if temperature >= 1.0:
965
+ # Random: uniform selection
966
+ prob = 1.0 / len(candidates)
967
+ return random.choice(candidates), prob
968
+
969
+ # Temperature-based softmax selection
970
+ similarities = np.array([c.similarity for c in candidates])
971
+
972
+ # Scale by inverse temperature (lower temp = sharper distribution)
973
+ scaled = similarities / (temperature + 1e-10)
974
+ scaled = scaled - scaled.max() # Numerical stability
975
+ exp_scaled = np.exp(scaled)
976
+ probs = exp_scaled / exp_scaled.sum()
977
+
978
+ # Sample according to probabilities
979
+ idx = np.random.choice(len(candidates), p=probs)
980
+ return candidates[idx], float(probs[idx])
981
+
982
+ def _extract_keywords(self, text: str, n: int = 5) -> list[str]:
983
+ """Extract top keywords from text using simple frequency analysis.
984
+
985
+ Args:
986
+ text: Text to analyze.
987
+ n: Number of keywords to extract.
988
+
989
+ Returns:
990
+ List of top keywords.
991
+ """
992
+ # Simple keyword extraction using word frequency
993
+ # Tokenize and filter using module-level stop words
994
+ words = re.findall(r"\b[a-zA-Z]+\b", text.lower())
995
+ filtered = [w for w in words if w not in _STOP_WORDS and len(w) > 2]
996
+
997
+ # Count frequencies
998
+ counter = Counter(filtered)
999
+ return [word for word, _ in counter.most_common(n)]
1000
+
1001
+ def _format_output(
1002
+ self,
1003
+ nodes: list[VisualizationNode],
1004
+ edges: list[VisualizationEdge],
1005
+ format: Literal["json", "mermaid", "svg"],
1006
+ ) -> str:
1007
+ """Format visualization data for output.
1008
+
1009
+ Args:
1010
+ nodes: Visualization nodes.
1011
+ edges: Visualization edges.
1012
+ format: Output format.
1013
+
1014
+ Returns:
1015
+ Formatted string output.
1016
+ """
1017
+ if format == "json":
1018
+ return json.dumps(
1019
+ {
1020
+ "nodes": [
1021
+ {
1022
+ "id": n.id,
1023
+ "x": n.x,
1024
+ "y": n.y,
1025
+ "label": n.label,
1026
+ "cluster": n.cluster,
1027
+ "importance": n.importance,
1028
+ }
1029
+ for n in nodes
1030
+ ],
1031
+ "edges": [
1032
+ {
1033
+ "from": e.from_id,
1034
+ "to": e.to_id,
1035
+ "weight": e.weight,
1036
+ }
1037
+ for e in edges
1038
+ ],
1039
+ },
1040
+ indent=2,
1041
+ )
1042
+
1043
+ elif format == "mermaid":
1044
+ lines = ["graph LR"]
1045
+
1046
+ # Add nodes with short IDs
1047
+ node_aliases = {n.id: f"N{i}" for i, n in enumerate(nodes)}
1048
+ for node in nodes:
1049
+ alias = node_aliases[node.id]
1050
+ # Escape special characters in label
1051
+ safe_label = node.label.replace('"', "'").replace("\n", " ")[:30]
1052
+ lines.append(f' {alias}["{safe_label}"]')
1053
+
1054
+ # Add edges
1055
+ for edge in edges:
1056
+ from_alias = node_aliases.get(edge.from_id)
1057
+ to_alias = node_aliases.get(edge.to_id)
1058
+ if from_alias and to_alias:
1059
+ lines.append(f" {from_alias} --> {to_alias}")
1060
+
1061
+ return "\n".join(lines)
1062
+
1063
+ elif format == "svg":
1064
+ return self._generate_svg(nodes, edges)
1065
+
1066
+ else:
1067
+ raise ValidationError(f"Unknown format: {format}")
1068
+
1069
+ def _generate_svg(
1070
+ self,
1071
+ nodes: list[VisualizationNode],
1072
+ edges: list[VisualizationEdge],
1073
+ ) -> str:
1074
+ """Generate SVG visualization.
1075
+
1076
+ Args:
1077
+ nodes: Visualization nodes.
1078
+ edges: Visualization edges.
1079
+
1080
+ Returns:
1081
+ SVG string.
1082
+ """
1083
+ width, height = 800, 600
1084
+ padding = 50
1085
+
1086
+ # Calculate scale to fit nodes
1087
+ x_coords = [n.x for n in nodes]
1088
+ y_coords = [n.y for n in nodes]
1089
+ x_min, x_max = min(x_coords), max(x_coords)
1090
+ y_min, y_max = min(y_coords), max(y_coords)
1091
+
1092
+ x_range = x_max - x_min if x_max != x_min else 1
1093
+ y_range = y_max - y_min if y_max != y_min else 1
1094
+
1095
+ def scale_x(x: float) -> float:
1096
+ return padding + (x - x_min) / x_range * (width - 2 * padding)
1097
+
1098
+ def scale_y(y: float) -> float:
1099
+ return padding + (y - y_min) / y_range * (height - 2 * padding)
1100
+
1101
+ svg_lines = [
1102
+ f'<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {width} {height}">',
1103
+ " <style>",
1104
+ " .node { cursor: pointer; }",
1105
+ " .node circle { stroke: #333; stroke-width: 1; }",
1106
+ " .node text { font-size: 10px; fill: #333; }",
1107
+ " .edge { stroke: #ccc; stroke-width: 1; opacity: 0.5; }",
1108
+ " </style>",
1109
+ ]
1110
+
1111
+ # Draw edges
1112
+ for edge in edges:
1113
+ from_node = next((n for n in nodes if n.id == edge.from_id), None)
1114
+ to_node = next((n for n in nodes if n.id == edge.to_id), None)
1115
+ if from_node and to_node:
1116
+ x1, y1 = scale_x(from_node.x), scale_y(from_node.y)
1117
+ x2, y2 = scale_x(to_node.x), scale_y(to_node.y)
1118
+ svg_lines.append(
1119
+ f' <line class="edge" x1="{x1:.1f}" y1="{y1:.1f}" '
1120
+ f'x2="{x2:.1f}" y2="{y2:.1f}" />'
1121
+ )
1122
+
1123
+ # Draw nodes
1124
+ for node in nodes:
1125
+ x, y = scale_x(node.x), scale_y(node.y)
1126
+ radius = 5 + node.importance * 5 # Scale by importance
1127
+ if node.cluster >= 0:
1128
+ color = CLUSTER_COLORS[node.cluster % len(CLUSTER_COLORS)]
1129
+ else:
1130
+ color = "#999"
1131
+
1132
+ svg_lines.append(' <g class="node">')
1133
+ svg_lines.append(
1134
+ f' <circle cx="{x:.1f}" cy="{y:.1f}" r="{radius:.1f}" '
1135
+ f'fill="{color}" />'
1136
+ )
1137
+ # Add truncated label
1138
+ short_label = node.label[:20] + "..." if len(node.label) > 20 else node.label
1139
+ # Escape XML special characters
1140
+ short_label = (
1141
+ short_label.replace("&", "&amp;")
1142
+ .replace("<", "&lt;")
1143
+ .replace(">", "&gt;")
1144
+ )
1145
+ svg_lines.append(
1146
+ f' <text x="{x:.1f}" y="{y + radius + 12:.1f}" '
1147
+ f'text-anchor="middle">{short_label}</text>'
1148
+ )
1149
+ svg_lines.append(" </g>")
1150
+
1151
+ svg_lines.append("</svg>")
1152
+ return "\n".join(svg_lines)