spatial-memory-mcp 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spatial-memory-mcp might be problematic. Click here for more details.

Files changed (54) hide show
  1. spatial_memory/__init__.py +97 -0
  2. spatial_memory/__main__.py +270 -0
  3. spatial_memory/adapters/__init__.py +7 -0
  4. spatial_memory/adapters/lancedb_repository.py +878 -0
  5. spatial_memory/config.py +728 -0
  6. spatial_memory/core/__init__.py +118 -0
  7. spatial_memory/core/cache.py +317 -0
  8. spatial_memory/core/circuit_breaker.py +297 -0
  9. spatial_memory/core/connection_pool.py +220 -0
  10. spatial_memory/core/consolidation_strategies.py +402 -0
  11. spatial_memory/core/database.py +3069 -0
  12. spatial_memory/core/db_idempotency.py +242 -0
  13. spatial_memory/core/db_indexes.py +575 -0
  14. spatial_memory/core/db_migrations.py +584 -0
  15. spatial_memory/core/db_search.py +509 -0
  16. spatial_memory/core/db_versioning.py +177 -0
  17. spatial_memory/core/embeddings.py +557 -0
  18. spatial_memory/core/errors.py +317 -0
  19. spatial_memory/core/file_security.py +702 -0
  20. spatial_memory/core/filesystem.py +178 -0
  21. spatial_memory/core/health.py +289 -0
  22. spatial_memory/core/helpers.py +79 -0
  23. spatial_memory/core/import_security.py +432 -0
  24. spatial_memory/core/lifecycle_ops.py +1067 -0
  25. spatial_memory/core/logging.py +194 -0
  26. spatial_memory/core/metrics.py +192 -0
  27. spatial_memory/core/models.py +628 -0
  28. spatial_memory/core/rate_limiter.py +326 -0
  29. spatial_memory/core/response_types.py +497 -0
  30. spatial_memory/core/security.py +588 -0
  31. spatial_memory/core/spatial_ops.py +426 -0
  32. spatial_memory/core/tracing.py +300 -0
  33. spatial_memory/core/utils.py +110 -0
  34. spatial_memory/core/validation.py +403 -0
  35. spatial_memory/factory.py +407 -0
  36. spatial_memory/migrations/__init__.py +40 -0
  37. spatial_memory/ports/__init__.py +11 -0
  38. spatial_memory/ports/repositories.py +631 -0
  39. spatial_memory/py.typed +0 -0
  40. spatial_memory/server.py +1141 -0
  41. spatial_memory/services/__init__.py +70 -0
  42. spatial_memory/services/export_import.py +1023 -0
  43. spatial_memory/services/lifecycle.py +1120 -0
  44. spatial_memory/services/memory.py +412 -0
  45. spatial_memory/services/spatial.py +1147 -0
  46. spatial_memory/services/utility.py +409 -0
  47. spatial_memory/tools/__init__.py +5 -0
  48. spatial_memory/tools/definitions.py +695 -0
  49. spatial_memory/verify.py +140 -0
  50. spatial_memory_mcp-1.6.1.dist-info/METADATA +499 -0
  51. spatial_memory_mcp-1.6.1.dist-info/RECORD +54 -0
  52. spatial_memory_mcp-1.6.1.dist-info/WHEEL +4 -0
  53. spatial_memory_mcp-1.6.1.dist-info/entry_points.txt +2 -0
  54. spatial_memory_mcp-1.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1147 @@
1
+ """Spatial service for exploration operations.
2
+
3
+ This service provides the spatial layer for memory exploration:
4
+ - journey: SLERP interpolation between two memories
5
+ - wander: Temperature-based random walk through memory space
6
+ - regions: HDBSCAN clustering to discover memory regions
7
+ - visualize: UMAP projection for 2D/3D visualization
8
+
9
+ The service uses dependency injection for repository and embedding services.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import logging
16
+ import random
17
+ import re
18
+ from collections import Counter
19
+ from dataclasses import dataclass
20
+ from typing import TYPE_CHECKING, Any, Literal
21
+
22
+ import numpy as np
23
+
24
+ from spatial_memory.core.errors import (
25
+ ClusteringError,
26
+ InsufficientMemoriesError,
27
+ JourneyError,
28
+ MemoryNotFoundError,
29
+ ValidationError,
30
+ VisualizationError,
31
+ WanderError,
32
+ )
33
+ from spatial_memory.core.models import (
34
+ JourneyResult,
35
+ JourneyStep,
36
+ MemoryResult,
37
+ RegionCluster,
38
+ RegionsResult,
39
+ VisualizationEdge,
40
+ VisualizationNode,
41
+ VisualizationResult,
42
+ WanderResult,
43
+ WanderStep,
44
+ )
45
+ from spatial_memory.core.validation import validate_namespace, validate_uuid
46
+
47
+ logger = logging.getLogger(__name__)
48
+
49
+ # Check optional dependency availability at import time
50
+ try:
51
+ import hdbscan
52
+
53
+ HDBSCAN_AVAILABLE = True
54
+ except ImportError:
55
+ HDBSCAN_AVAILABLE = False
56
+ logger.debug("HDBSCAN not available - regions operation will be disabled")
57
+
58
+ try:
59
+ import umap
60
+
61
+ UMAP_AVAILABLE = True
62
+ except ImportError:
63
+ UMAP_AVAILABLE = False
64
+ logger.debug("UMAP not available - visualize operation will be disabled")
65
+
66
+ try:
67
+ from scipy.spatial.distance import cdist
68
+
69
+ SCIPY_AVAILABLE = True
70
+ except ImportError:
71
+ SCIPY_AVAILABLE = False
72
+ logger.debug("scipy not available - using fallback for similarity calculations")
73
+
74
+ # Common stop words for keyword extraction (module-level to avoid recreation)
75
+ _STOP_WORDS: frozenset[str] = frozenset({
76
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
77
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
78
+ "should", "may", "might", "must", "can", "to", "of", "in", "for",
79
+ "on", "with", "at", "by", "from", "as", "into", "through", "during",
80
+ "before", "after", "above", "below", "between", "under", "again",
81
+ "further", "then", "once", "here", "there", "when", "where", "why",
82
+ "how", "all", "each", "few", "more", "most", "other", "some", "such",
83
+ "no", "nor", "not", "only", "own", "same", "so", "than", "too",
84
+ "very", "just", "also", "now", "and", "but", "or", "if", "it", "its",
85
+ "this", "that", "these", "those", "i", "you", "he", "she", "we", "they",
86
+ })
87
+
88
+ if TYPE_CHECKING:
89
+ from spatial_memory.ports.repositories import (
90
+ EmbeddingServiceProtocol,
91
+ MemoryRepositoryProtocol,
92
+ )
93
+
94
+
95
+ @dataclass
96
+ class SpatialConfig:
97
+ """Configuration for spatial operations.
98
+
99
+ Attributes:
100
+ journey_default_steps: Default number of interpolation steps for journey.
101
+ journey_max_steps: Maximum allowed steps for journey.
102
+ journey_neighbors_per_step: Number of neighbors to find per interpolation point.
103
+ wander_default_steps: Default number of steps for random walk.
104
+ wander_max_steps: Maximum allowed steps for wander.
105
+ wander_default_temperature: Default temperature (randomness) for wander.
106
+ wander_avoid_recent: Number of recent memories to avoid revisiting.
107
+ wander_candidates_per_step: Number of candidate neighbors per step.
108
+ regions_min_cluster_size: Minimum cluster size for HDBSCAN.
109
+ regions_max_memories: Maximum memories to consider for clustering.
110
+ visualize_n_neighbors: UMAP n_neighbors parameter.
111
+ visualize_min_dist: UMAP min_dist parameter.
112
+ visualize_max_memories: Maximum memories to include in visualization.
113
+ visualize_similarity_threshold: Minimum similarity for edge creation.
114
+ """
115
+
116
+ # Journey parameters
117
+ journey_default_steps: int = 10
118
+ journey_max_steps: int = 20
119
+ journey_neighbors_per_step: int = 3
120
+
121
+ # Wander parameters
122
+ wander_default_steps: int = 10
123
+ wander_max_steps: int = 20
124
+ wander_default_temperature: float = 0.5
125
+ wander_avoid_recent: int = 5
126
+ wander_candidates_per_step: int = 10
127
+
128
+ # Regions parameters
129
+ regions_min_cluster_size: int = 3
130
+ regions_max_memories: int = 10_000
131
+
132
+ # Visualize parameters
133
+ visualize_n_neighbors: int = 15
134
+ visualize_min_dist: float = 0.1
135
+ visualize_max_memories: int = 500
136
+ visualize_similarity_threshold: float = 0.7
137
+
138
+
139
+ # Color palette for cluster visualization
140
+ CLUSTER_COLORS = [
141
+ "#4285F4", # Blue
142
+ "#EA4335", # Red
143
+ "#FBBC04", # Yellow
144
+ "#34A853", # Green
145
+ "#FF6D01", # Orange
146
+ "#46BDC6", # Cyan
147
+ "#7B1FA2", # Purple
148
+ "#E91E63", # Pink
149
+ "#009688", # Teal
150
+ "#795548", # Brown
151
+ ]
152
+
153
+
154
+ class SpatialService:
155
+ """Service for spatial exploration of memory space.
156
+
157
+ Uses Clean Architecture - depends on protocol interfaces, not implementations.
158
+ """
159
+
160
+ def __init__(
161
+ self,
162
+ repository: MemoryRepositoryProtocol,
163
+ embeddings: EmbeddingServiceProtocol,
164
+ config: SpatialConfig | None = None,
165
+ ) -> None:
166
+ """Initialize the spatial service.
167
+
168
+ Args:
169
+ repository: Repository for memory storage.
170
+ embeddings: Service for generating embeddings.
171
+ config: Optional configuration (uses defaults if not provided).
172
+ """
173
+ self._repo = repository
174
+ self._embeddings = embeddings
175
+ self._config = config or SpatialConfig()
176
+
177
+ def journey(
178
+ self,
179
+ start_id: str,
180
+ end_id: str,
181
+ steps: int | None = None,
182
+ namespace: str | None = None,
183
+ ) -> JourneyResult:
184
+ """Find a path between two memories using SLERP interpolation.
185
+
186
+ Spherical Linear Interpolation (SLERP) creates smooth paths through
187
+ embedding space, finding actual memories closest to each interpolation
188
+ point.
189
+
190
+ Args:
191
+ start_id: Starting memory UUID.
192
+ end_id: Ending memory UUID.
193
+ steps: Number of interpolation steps (default from config).
194
+ namespace: Optional namespace filter for intermediate memories.
195
+
196
+ Returns:
197
+ JourneyResult with path steps.
198
+
199
+ Raises:
200
+ ValidationError: If input validation fails.
201
+ MemoryNotFoundError: If start or end memory not found.
202
+ JourneyError: If path cannot be computed.
203
+ """
204
+ # Validate inputs
205
+ start_id = validate_uuid(start_id)
206
+ end_id = validate_uuid(end_id)
207
+ if namespace is not None:
208
+ namespace = validate_namespace(namespace)
209
+
210
+ # Get step count
211
+ actual_steps = steps if steps is not None else self._config.journey_default_steps
212
+ if actual_steps < 2:
213
+ raise ValidationError("Journey requires at least 2 steps")
214
+ if actual_steps > self._config.journey_max_steps:
215
+ raise ValidationError(
216
+ f"Maximum journey steps is {self._config.journey_max_steps}"
217
+ )
218
+
219
+ # Get start and end memories with vectors
220
+ start_result = self._repo.get_with_vector(start_id)
221
+ if start_result is None:
222
+ raise MemoryNotFoundError(start_id)
223
+ start_memory, start_vector = start_result
224
+
225
+ end_result = self._repo.get_with_vector(end_id)
226
+ if end_result is None:
227
+ raise MemoryNotFoundError(end_id)
228
+ end_memory, end_vector = end_result
229
+
230
+ try:
231
+ # Generate interpolation points using SLERP
232
+ interpolated_vectors, t_values = self._slerp_interpolate(
233
+ start_vector, end_vector, actual_steps
234
+ )
235
+
236
+ # Find nearest memories for each interpolation point
237
+ # Use batch search for efficiency, include vectors to avoid N+1 queries
238
+ search_results = self._batch_vector_search(
239
+ interpolated_vectors,
240
+ limit_per_query=self._config.journey_neighbors_per_step,
241
+ namespace=namespace,
242
+ include_vector=True, # Include vectors to avoid follow-up queries
243
+ )
244
+
245
+ # Build journey steps
246
+ journey_steps: list[JourneyStep] = []
247
+ steps_with_memories = 0
248
+
249
+ for step_num, (interp_vec, t_val, neighbors) in enumerate(
250
+ zip(interpolated_vectors, t_values, search_results)
251
+ ):
252
+ # Calculate distance from interpolation point to nearest memory
253
+ distance_to_path = float("inf")
254
+ if neighbors:
255
+ for neighbor in neighbors:
256
+ # Use vector from search result (included via include_vector=True)
257
+ if neighbor.vector is not None:
258
+ neighbor_vec = np.array(neighbor.vector, dtype=np.float32)
259
+ dist = self._cosine_distance(interp_vec, neighbor_vec)
260
+ else:
261
+ # Fallback if vector not included (shouldn't happen)
262
+ dist = self._cosine_distance(
263
+ interp_vec, self._get_vector_for_memory(neighbor.id)
264
+ )
265
+ if dist < distance_to_path:
266
+ distance_to_path = dist
267
+ steps_with_memories += 1
268
+
269
+ # Use 0.0 if no memories found (inf means no distance calculated)
270
+ # Clamp to 0.0 to handle floating point precision errors (e.g., -4.89e-08)
271
+ final_distance = 0.0 if distance_to_path == float("inf") else max(0.0, distance_to_path)
272
+ journey_steps.append(
273
+ JourneyStep(
274
+ step=step_num,
275
+ t=t_val,
276
+ position=interp_vec.tolist(),
277
+ nearby_memories=neighbors,
278
+ distance_to_path=final_distance,
279
+ )
280
+ )
281
+
282
+ # Calculate path coverage
283
+ path_coverage = steps_with_memories / len(journey_steps) if journey_steps else 0.0
284
+
285
+ return JourneyResult(
286
+ start_id=start_id,
287
+ end_id=end_id,
288
+ steps=journey_steps,
289
+ path_coverage=path_coverage,
290
+ )
291
+
292
+ except Exception as e:
293
+ if isinstance(e, (ValidationError, MemoryNotFoundError)):
294
+ raise
295
+ raise JourneyError(f"Failed to compute journey: {e}") from e
296
+
297
+ def wander(
298
+ self,
299
+ start_id: str,
300
+ steps: int | None = None,
301
+ temperature: float | None = None,
302
+ namespace: str | None = None,
303
+ ) -> WanderResult:
304
+ """Perform a random walk through memory space.
305
+
306
+ Temperature controls randomness:
307
+ - 0.0 = Always pick the most similar (greedy)
308
+ - 0.5 = Balanced exploration
309
+ - 1.0 = Highly random selection
310
+
311
+ Args:
312
+ start_id: Starting memory UUID.
313
+ steps: Number of steps to wander (default from config).
314
+ temperature: Randomness factor 0.0-1.0 (default from config).
315
+ namespace: Optional namespace filter.
316
+
317
+ Returns:
318
+ WanderResult with path taken.
319
+
320
+ Raises:
321
+ ValidationError: If input validation fails.
322
+ MemoryNotFoundError: If start memory not found.
323
+ WanderError: If walk cannot continue.
324
+ """
325
+ # Validate inputs
326
+ start_id = validate_uuid(start_id)
327
+ if namespace is not None:
328
+ namespace = validate_namespace(namespace)
329
+
330
+ # Get parameters
331
+ actual_steps = steps if steps is not None else self._config.wander_default_steps
332
+ if actual_steps < 1:
333
+ raise ValidationError("Wander requires at least 1 step")
334
+ if actual_steps > self._config.wander_max_steps:
335
+ raise ValidationError(
336
+ f"Maximum wander steps is {self._config.wander_max_steps}"
337
+ )
338
+
339
+ actual_temp = (
340
+ temperature
341
+ if temperature is not None
342
+ else self._config.wander_default_temperature
343
+ )
344
+ if not 0.0 <= actual_temp <= 1.0:
345
+ raise ValidationError("Temperature must be between 0.0 and 1.0")
346
+
347
+ # Verify start memory exists
348
+ start_result = self._repo.get_with_vector(start_id)
349
+ if start_result is None:
350
+ raise MemoryNotFoundError(start_id)
351
+ current_memory, current_vector = start_result
352
+
353
+ try:
354
+ wander_steps: list[WanderStep] = []
355
+ visited_ids: set[str] = {start_id}
356
+ recent_ids: list[str] = [start_id]
357
+ total_distance = 0.0
358
+ prev_vector = current_vector
359
+
360
+ for step_num in range(actual_steps):
361
+ # Find candidates from current position
362
+ # Include vectors to avoid follow-up get_with_vector queries
363
+ neighbors = self._repo.search(
364
+ current_vector,
365
+ limit=self._config.wander_candidates_per_step + len(visited_ids),
366
+ namespace=namespace,
367
+ include_vector=True,
368
+ )
369
+
370
+ # Filter out recently visited
371
+ candidates = [
372
+ n
373
+ for n in neighbors
374
+ if n.id not in recent_ids[-self._config.wander_avoid_recent :]
375
+ ]
376
+
377
+ if not candidates:
378
+ # No unvisited candidates - allow revisiting older memories
379
+ candidates = [n for n in neighbors if n.id not in visited_ids]
380
+
381
+ if not candidates:
382
+ logger.warning(
383
+ f"Wander ended early at step {step_num}: no candidates"
384
+ )
385
+ break
386
+
387
+ # Select next memory based on temperature
388
+ next_memory, selection_prob = self._temperature_select(
389
+ candidates, actual_temp
390
+ )
391
+
392
+ # Get vector from search result (included via include_vector=True)
393
+ if next_memory.vector is not None:
394
+ next_vector = np.array(next_memory.vector, dtype=np.float32)
395
+ else:
396
+ # Fallback if vector not included (shouldn't happen)
397
+ next_result = self._repo.get_with_vector(next_memory.id)
398
+ if next_result is None:
399
+ logger.warning(f"Memory {next_memory.id} disappeared during wander")
400
+ break
401
+ _, next_vector = next_result
402
+
403
+ step_distance = self._cosine_distance(prev_vector, next_vector)
404
+ total_distance += step_distance
405
+
406
+ wander_steps.append(
407
+ WanderStep(
408
+ step=step_num,
409
+ memory=next_memory,
410
+ similarity_to_previous=next_memory.similarity,
411
+ selection_probability=selection_prob,
412
+ )
413
+ )
414
+
415
+ visited_ids.add(next_memory.id)
416
+ recent_ids.append(next_memory.id)
417
+ current_vector = next_vector
418
+ prev_vector = next_vector
419
+
420
+ return WanderResult(
421
+ start_id=start_id,
422
+ steps=wander_steps,
423
+ total_distance=total_distance,
424
+ )
425
+
426
+ except Exception as e:
427
+ if isinstance(e, (ValidationError, MemoryNotFoundError)):
428
+ raise
429
+ raise WanderError(f"Wander failed: {e}") from e
430
+
431
+ def regions(
432
+ self,
433
+ namespace: str | None = None,
434
+ min_cluster_size: int | None = None,
435
+ max_clusters: int | None = None,
436
+ ) -> RegionsResult:
437
+ """Discover memory regions using HDBSCAN clustering.
438
+
439
+ HDBSCAN automatically determines the number of clusters and
440
+ identifies outliers (noise points).
441
+
442
+ Args:
443
+ namespace: Optional namespace filter.
444
+ min_cluster_size: Minimum points per cluster (default from config).
445
+ max_clusters: Maximum clusters to return (None = all).
446
+
447
+ Returns:
448
+ RegionsResult with discovered clusters.
449
+
450
+ Raises:
451
+ ValidationError: If input validation fails.
452
+ ClusteringError: If clustering fails or HDBSCAN unavailable.
453
+ InsufficientMemoriesError: If not enough memories for clustering.
454
+ """
455
+ if not HDBSCAN_AVAILABLE:
456
+ raise ClusteringError(
457
+ "HDBSCAN is not available. Install with: pip install hdbscan"
458
+ )
459
+
460
+ # Validate inputs
461
+ if namespace is not None:
462
+ namespace = validate_namespace(namespace)
463
+
464
+ actual_min_size = (
465
+ min_cluster_size
466
+ if min_cluster_size is not None
467
+ else self._config.regions_min_cluster_size
468
+ )
469
+ if actual_min_size < 2:
470
+ raise ValidationError("Minimum cluster size must be at least 2")
471
+
472
+ try:
473
+ # Fetch all vectors for clustering
474
+ all_memories = self._repo.get_all(
475
+ namespace=namespace, limit=self._config.regions_max_memories
476
+ )
477
+
478
+ if len(all_memories) < actual_min_size:
479
+ raise InsufficientMemoriesError(
480
+ required=actual_min_size,
481
+ available=len(all_memories),
482
+ operation="regions",
483
+ )
484
+
485
+ # Extract IDs and vectors
486
+ memory_map = {m.id: (m, v) for m, v in all_memories}
487
+ memory_ids = list(memory_map.keys())
488
+ vectors = np.array([v for _, v in all_memories], dtype=np.float32)
489
+
490
+ # Run HDBSCAN clustering
491
+ clusterer = hdbscan.HDBSCAN(
492
+ min_cluster_size=actual_min_size,
493
+ metric="euclidean", # Works well with normalized vectors
494
+ cluster_selection_method="eom", # Excess of Mass
495
+ )
496
+ labels = clusterer.fit_predict(vectors)
497
+
498
+ # Process clusters
499
+ clusters: list[RegionCluster] = []
500
+ unique_labels = set(labels)
501
+
502
+ # Remove noise label (-1) for cluster processing
503
+ cluster_labels = [label for label in unique_labels if label >= 0]
504
+
505
+ for cluster_id in cluster_labels:
506
+ # Get indices of memories in this cluster
507
+ cluster_indices = [
508
+ i for i, lbl in enumerate(labels) if lbl == cluster_id
509
+ ]
510
+ cluster_vectors = vectors[cluster_indices]
511
+ cluster_ids = [memory_ids[i] for i in cluster_indices]
512
+
513
+ # Find centroid and closest memory to centroid
514
+ centroid = cluster_vectors.mean(axis=0)
515
+ distances_to_centroid = np.linalg.norm(
516
+ cluster_vectors - centroid, axis=1
517
+ )
518
+ centroid_idx = int(np.argmin(distances_to_centroid))
519
+ centroid_memory_id = cluster_ids[centroid_idx]
520
+
521
+ # Calculate coherence (inverse of average intra-cluster distance)
522
+ avg_dist = float(distances_to_centroid.mean())
523
+ max_possible_dist = 2.0 # Max distance for normalized vectors
524
+ coherence = max(0.0, min(1.0, 1.0 - (avg_dist / max_possible_dist)))
525
+
526
+ # Get representative and sample memories
527
+ rep_memory, _ = memory_map[centroid_memory_id]
528
+ rep_result = self._memory_to_result(rep_memory, 1.0)
529
+
530
+ sample_results: list[MemoryResult] = []
531
+ for sid in cluster_ids[:5]:
532
+ mem, _ = memory_map[sid]
533
+ # Calculate similarity to centroid for the sample
534
+ mem_vec = memory_map[sid][1]
535
+ sim = 1.0 - self._cosine_distance(centroid, mem_vec)
536
+ sample_results.append(self._memory_to_result(mem, sim))
537
+
538
+ # Extract keywords from sample content
539
+ sample_contents = [m.content for m in sample_results]
540
+ keywords = self._extract_keywords(" ".join(sample_contents), n=5)
541
+
542
+ clusters.append(
543
+ RegionCluster(
544
+ cluster_id=cluster_id,
545
+ size=len(cluster_ids),
546
+ representative_memory=rep_result,
547
+ sample_memories=sample_results[:3],
548
+ coherence=coherence,
549
+ keywords=keywords,
550
+ )
551
+ )
552
+
553
+ # Sort by size (largest first)
554
+ clusters.sort(key=lambda c: c.size, reverse=True)
555
+
556
+ # Limit clusters if requested
557
+ if max_clusters is not None and len(clusters) > max_clusters:
558
+ clusters = clusters[:max_clusters]
559
+
560
+ # Count noise points
561
+ noise_count = sum(1 for lbl in labels if lbl == -1)
562
+
563
+ # Calculate silhouette score if possible
564
+ clustering_quality = 0.0
565
+ if len(cluster_labels) >= 2:
566
+ try:
567
+ from sklearn.metrics import silhouette_score
568
+ # Filter out noise points for silhouette calculation
569
+ mask = labels >= 0
570
+ if mask.sum() >= 2:
571
+ clustering_quality = float(
572
+ silhouette_score(vectors[mask], labels[mask])
573
+ )
574
+ except ImportError:
575
+ pass # sklearn not available, skip quality calculation
576
+
577
+ return RegionsResult(
578
+ clusters=clusters,
579
+ noise_count=noise_count,
580
+ total_memories=len(memory_ids),
581
+ clustering_quality=clustering_quality,
582
+ )
583
+
584
+ except (ValidationError, InsufficientMemoriesError, ClusteringError):
585
+ raise
586
+ except Exception as e:
587
+ raise ClusteringError(f"Clustering failed: {e}") from e
588
+
589
+ def visualize(
590
+ self,
591
+ memory_ids: list[str] | None = None,
592
+ namespace: str | None = None,
593
+ format: Literal["json", "mermaid", "svg"] = "json",
594
+ dimensions: Literal[2, 3] = 2,
595
+ include_edges: bool = True,
596
+ ) -> VisualizationResult:
597
+ """Generate a visualization of memory space using UMAP projection.
598
+
599
+ Args:
600
+ memory_ids: Specific memories to visualize (None = auto-select).
601
+ namespace: Namespace filter when auto-selecting.
602
+ format: Output format (json, mermaid, or svg).
603
+ dimensions: Number of dimensions (2 or 3).
604
+ include_edges: Include similarity edges between nodes.
605
+
606
+ Returns:
607
+ VisualizationResult with visualization data and formatted output.
608
+
609
+ Raises:
610
+ ValidationError: If input validation fails.
611
+ VisualizationError: If visualization fails or UMAP unavailable.
612
+ InsufficientMemoriesError: If not enough memories.
613
+ """
614
+ if not UMAP_AVAILABLE:
615
+ raise VisualizationError(
616
+ "UMAP is not available. Install with: pip install umap-learn"
617
+ )
618
+
619
+ # Validate inputs
620
+ if namespace is not None:
621
+ namespace = validate_namespace(namespace)
622
+
623
+ if memory_ids is not None:
624
+ memory_ids = [validate_uuid(mid) for mid in memory_ids]
625
+
626
+ if dimensions not in (2, 3):
627
+ raise ValidationError("Dimensions must be 2 or 3")
628
+
629
+ try:
630
+ # Get memories to visualize
631
+ if memory_ids:
632
+ memories_with_vectors: list[tuple[Any, np.ndarray]] = []
633
+ for mid in memory_ids[: self._config.visualize_max_memories]:
634
+ result = self._repo.get_with_vector(mid)
635
+ if result:
636
+ memories_with_vectors.append(result)
637
+ else:
638
+ memories_with_vectors = self._repo.get_all(
639
+ namespace=namespace, limit=self._config.visualize_max_memories
640
+ )
641
+
642
+ if len(memories_with_vectors) < 5:
643
+ raise InsufficientMemoriesError(
644
+ required=5,
645
+ available=len(memories_with_vectors),
646
+ operation="visualize",
647
+ )
648
+
649
+ # Extract vectors
650
+ vectors = np.array(
651
+ [v for _, v in memories_with_vectors], dtype=np.float32
652
+ )
653
+
654
+ # Run UMAP projection
655
+ n_neighbors = min(
656
+ self._config.visualize_n_neighbors, len(vectors) - 1
657
+ )
658
+ reducer = umap.UMAP(
659
+ n_components=dimensions,
660
+ n_neighbors=n_neighbors,
661
+ min_dist=self._config.visualize_min_dist,
662
+ metric="cosine",
663
+ random_state=42, # Reproducibility
664
+ )
665
+ embedding = reducer.fit_transform(vectors)
666
+
667
+ # Optionally run clustering for coloring
668
+ cluster_labels = [-1] * len(memories_with_vectors)
669
+
670
+ if HDBSCAN_AVAILABLE and len(memories_with_vectors) >= 10:
671
+ try:
672
+ clusterer = hdbscan.HDBSCAN(
673
+ min_cluster_size=3,
674
+ metric="euclidean",
675
+ )
676
+ cluster_labels = clusterer.fit_predict(vectors).tolist()
677
+ except Exception as e:
678
+ logger.debug(f"Clustering for visualization failed: {e}")
679
+
680
+ # Build visualization nodes
681
+ nodes: list[VisualizationNode] = []
682
+ for i, (memory, _) in enumerate(memories_with_vectors):
683
+ # Create short label from content
684
+ content = memory.content
685
+ label = content[:50] + "..." if len(content) > 50 else content
686
+ label = label.replace("\n", " ")
687
+
688
+ nodes.append(
689
+ VisualizationNode(
690
+ id=memory.id,
691
+ x=float(embedding[i, 0]),
692
+ y=float(embedding[i, 1]) if dimensions >= 2 else 0.0,
693
+ label=label,
694
+ cluster=cluster_labels[i],
695
+ importance=memory.importance,
696
+ highlighted=False,
697
+ )
698
+ )
699
+
700
+ # Build edges if requested
701
+ edges: list[VisualizationEdge] = []
702
+ if include_edges:
703
+ # Calculate pairwise similarities using vectorized operations
704
+ similarity_matrix = self._compute_pairwise_similarities(vectors)
705
+ threshold = self._config.visualize_similarity_threshold
706
+
707
+ # Extract upper triangle indices where similarity >= threshold
708
+ # (upper triangle avoids duplicate edges)
709
+ upper_tri_indices = np.triu_indices(len(vectors), k=1)
710
+ similarities = similarity_matrix[upper_tri_indices]
711
+
712
+ # Filter by threshold and create edges
713
+ mask = similarities >= threshold
714
+ for idx in np.where(mask)[0]:
715
+ i, j = upper_tri_indices[0][idx], upper_tri_indices[1][idx]
716
+ edges.append(
717
+ VisualizationEdge(
718
+ from_id=nodes[i].id,
719
+ to_id=nodes[j].id,
720
+ weight=float(similarities[idx]),
721
+ )
722
+ )
723
+
724
+ # Calculate bounds
725
+ x_coords = [n.x for n in nodes]
726
+ y_coords = [n.y for n in nodes]
727
+ bounds = {
728
+ "x_min": min(x_coords),
729
+ "x_max": max(x_coords),
730
+ "y_min": min(y_coords),
731
+ "y_max": max(y_coords),
732
+ }
733
+
734
+ # Format output
735
+ output = self._format_output(nodes, edges, format)
736
+
737
+ return VisualizationResult(
738
+ nodes=nodes,
739
+ edges=edges,
740
+ bounds=bounds,
741
+ format=format,
742
+ output=output,
743
+ )
744
+
745
+ except (ValidationError, InsufficientMemoriesError, VisualizationError):
746
+ raise
747
+ except Exception as e:
748
+ raise VisualizationError(f"Visualization failed: {e}") from e
749
+
750
+ # =========================================================================
751
+ # Helper Methods
752
+ # =========================================================================
753
+
754
+ def _memory_to_result(self, memory: Any, similarity: float) -> MemoryResult:
755
+ """Convert a Memory object to a MemoryResult.
756
+
757
+ Args:
758
+ memory: Memory object.
759
+ similarity: Similarity score.
760
+
761
+ Returns:
762
+ MemoryResult object.
763
+ """
764
+ return MemoryResult(
765
+ id=memory.id,
766
+ content=memory.content,
767
+ similarity=max(0.0, min(1.0, similarity)),
768
+ namespace=memory.namespace,
769
+ tags=memory.tags,
770
+ importance=memory.importance,
771
+ created_at=memory.created_at,
772
+ metadata=memory.metadata,
773
+ )
774
+
775
+ def _slerp_interpolate(
776
+ self,
777
+ start_vec: np.ndarray,
778
+ end_vec: np.ndarray,
779
+ num_steps: int,
780
+ ) -> tuple[list[np.ndarray], list[float]]:
781
+ """Spherical Linear Interpolation between two vectors.
782
+
783
+ SLERP maintains constant angular velocity along the geodesic path
784
+ between two points on a hypersphere, making it ideal for semantic
785
+ interpolation in embedding space.
786
+
787
+ Args:
788
+ start_vec: Starting vector.
789
+ end_vec: Ending vector.
790
+ num_steps: Number of interpolation points.
791
+
792
+ Returns:
793
+ Tuple of (interpolated vectors, t values).
794
+ """
795
+ # Normalize vectors
796
+ start_norm = start_vec / (np.linalg.norm(start_vec) + 1e-10)
797
+ end_norm = end_vec / (np.linalg.norm(end_vec) + 1e-10)
798
+
799
+ # Calculate angle between vectors
800
+ dot = np.clip(np.dot(start_norm, end_norm), -1.0, 1.0)
801
+ omega = np.arccos(dot)
802
+
803
+ t_values = list(np.linspace(0, 1, num_steps))
804
+
805
+ # Handle nearly parallel vectors (use linear interpolation)
806
+ if omega < 1e-6:
807
+ linear_interp = [
808
+ start_vec + t * (end_vec - start_vec)
809
+ for t in t_values
810
+ ]
811
+ return linear_interp, t_values
812
+
813
+ sin_omega = np.sin(omega)
814
+
815
+ interpolated: list[np.ndarray] = []
816
+ for t in t_values:
817
+ coef_start = np.sin((1 - t) * omega) / sin_omega
818
+ coef_end = np.sin(t * omega) / sin_omega
819
+ vec = coef_start * start_norm + coef_end * end_norm
820
+ interpolated.append(vec)
821
+
822
+ return interpolated, t_values
823
+
824
+ def _batch_vector_search(
825
+ self,
826
+ vectors: list[np.ndarray],
827
+ limit_per_query: int,
828
+ namespace: str | None,
829
+ include_vector: bool = False,
830
+ ) -> list[list[MemoryResult]]:
831
+ """Perform batch vector search using repository's native batch capability.
832
+
833
+ Uses the repository's batch_vector_search for efficient multi-query
834
+ searches in a single database operation.
835
+
836
+ Args:
837
+ vectors: List of query vectors.
838
+ limit_per_query: Results per query.
839
+ namespace: Optional namespace filter.
840
+ include_vector: Whether to include embedding vectors in results.
841
+ Defaults to False to reduce response size.
842
+
843
+ Returns:
844
+ List of result lists. If include_vector=True, each MemoryResult
845
+ includes its embedding vector.
846
+ """
847
+ # Use native batch search for efficiency
848
+ raw_results = self._repo.batch_vector_search(
849
+ query_vectors=vectors,
850
+ limit_per_query=limit_per_query,
851
+ namespace=namespace,
852
+ include_vector=include_vector,
853
+ )
854
+
855
+ # Convert raw dict results to MemoryResult objects
856
+ results: list[list[MemoryResult]] = []
857
+ for query_results in raw_results:
858
+ memory_results: list[MemoryResult] = []
859
+ for record in query_results:
860
+ memory_result = MemoryResult(
861
+ id=record["id"],
862
+ content=record["content"],
863
+ similarity=record.get("similarity", 0.0),
864
+ namespace=record.get("namespace", "default"),
865
+ tags=record.get("tags", []),
866
+ importance=record.get("importance", 0.5),
867
+ created_at=record.get("created_at"),
868
+ metadata=record.get("metadata", {}),
869
+ vector=record.get("vector") if include_vector else None,
870
+ )
871
+ memory_results.append(memory_result)
872
+ results.append(memory_results)
873
+ return results
874
+
875
+ def _get_vector_for_memory(self, memory_id: str) -> np.ndarray:
876
+ """Get the vector for a memory.
877
+
878
+ Args:
879
+ memory_id: Memory UUID.
880
+
881
+ Returns:
882
+ The memory's vector.
883
+ """
884
+ result = self._repo.get_with_vector(memory_id)
885
+ if result is None:
886
+ # Return zero vector if memory not found (shouldn't happen in practice)
887
+ return np.zeros(self._embeddings.dimensions, dtype=np.float32)
888
+ _, vector = result
889
+ return vector
890
+
891
+ def _cosine_distance(self, vec1: np.ndarray, vec2: np.ndarray) -> float:
892
+ """Calculate cosine distance between two vectors.
893
+
894
+ Args:
895
+ vec1: First vector.
896
+ vec2: Second vector.
897
+
898
+ Returns:
899
+ Cosine distance (0 = identical, 2 = opposite).
900
+ """
901
+ norm1 = np.linalg.norm(vec1)
902
+ norm2 = np.linalg.norm(vec2)
903
+ if norm1 < 1e-10 or norm2 < 1e-10:
904
+ return 1.0 # Maximum distance for zero vectors
905
+
906
+ similarity = np.dot(vec1, vec2) / (norm1 * norm2)
907
+ return float(1.0 - similarity)
908
+
909
+ def _compute_pairwise_similarities(self, vectors: np.ndarray) -> np.ndarray:
910
+ """Compute pairwise cosine similarities using vectorized operations.
911
+
912
+ Uses scipy.cdist if available for optimal performance, otherwise
913
+ falls back to numpy matrix operations.
914
+
915
+ Args:
916
+ vectors: 2D array of shape (n_vectors, embedding_dim).
917
+
918
+ Returns:
919
+ Symmetric similarity matrix of shape (n_vectors, n_vectors).
920
+ Values range from -1 (opposite) to 1 (identical).
921
+ """
922
+ # Normalize vectors to unit length
923
+ norms = np.linalg.norm(vectors, axis=1, keepdims=True)
924
+ # Avoid division by zero for zero vectors
925
+ norms = np.where(norms < 1e-10, 1.0, norms)
926
+ normalized = vectors / norms
927
+
928
+ if SCIPY_AVAILABLE:
929
+ # scipy.cdist with cosine metric returns distances (1 - similarity)
930
+ distances = cdist(normalized, normalized, metric="cosine")
931
+ similarities = 1.0 - distances
932
+ else:
933
+ # Fallback: use numpy dot product (A @ A.T for normalized vectors)
934
+ similarities = normalized @ normalized.T
935
+
936
+ return similarities
937
+
938
+ def _temperature_select(
939
+ self,
940
+ candidates: list[MemoryResult],
941
+ temperature: float,
942
+ ) -> tuple[MemoryResult, float]:
943
+ """Select a candidate using temperature-based sampling.
944
+
945
+ Args:
946
+ candidates: List of candidate memories with similarity scores.
947
+ temperature: Randomness factor (0 = greedy, 1 = uniform random).
948
+
949
+ Returns:
950
+ Tuple of (selected memory, selection probability).
951
+ """
952
+ if not candidates:
953
+ raise WanderError("No candidates for temperature selection")
954
+
955
+ if temperature == 0.0:
956
+ # Greedy: pick highest similarity
957
+ return max(candidates, key=lambda c: c.similarity), 1.0
958
+
959
+ if temperature >= 1.0:
960
+ # Random: uniform selection
961
+ prob = 1.0 / len(candidates)
962
+ return random.choice(candidates), prob
963
+
964
+ # Temperature-based softmax selection
965
+ similarities = np.array([c.similarity for c in candidates])
966
+
967
+ # Scale by inverse temperature (lower temp = sharper distribution)
968
+ scaled = similarities / (temperature + 1e-10)
969
+ scaled = scaled - scaled.max() # Numerical stability
970
+ exp_scaled = np.exp(scaled)
971
+ probs = exp_scaled / exp_scaled.sum()
972
+
973
+ # Sample according to probabilities
974
+ idx = np.random.choice(len(candidates), p=probs)
975
+ return candidates[idx], float(probs[idx])
976
+
977
+ def _extract_keywords(self, text: str, n: int = 5) -> list[str]:
978
+ """Extract top keywords from text using simple frequency analysis.
979
+
980
+ Args:
981
+ text: Text to analyze.
982
+ n: Number of keywords to extract.
983
+
984
+ Returns:
985
+ List of top keywords.
986
+ """
987
+ # Simple keyword extraction using word frequency
988
+ # Tokenize and filter using module-level stop words
989
+ words = re.findall(r"\b[a-zA-Z]+\b", text.lower())
990
+ filtered = [w for w in words if w not in _STOP_WORDS and len(w) > 2]
991
+
992
+ # Count frequencies
993
+ counter = Counter(filtered)
994
+ return [word for word, _ in counter.most_common(n)]
995
+
996
+ def _format_output(
997
+ self,
998
+ nodes: list[VisualizationNode],
999
+ edges: list[VisualizationEdge],
1000
+ format: Literal["json", "mermaid", "svg"],
1001
+ ) -> str:
1002
+ """Format visualization data for output.
1003
+
1004
+ Args:
1005
+ nodes: Visualization nodes.
1006
+ edges: Visualization edges.
1007
+ format: Output format.
1008
+
1009
+ Returns:
1010
+ Formatted string output.
1011
+ """
1012
+ if format == "json":
1013
+ return json.dumps(
1014
+ {
1015
+ "nodes": [
1016
+ {
1017
+ "id": n.id,
1018
+ "x": n.x,
1019
+ "y": n.y,
1020
+ "label": n.label,
1021
+ "cluster": n.cluster,
1022
+ "importance": n.importance,
1023
+ }
1024
+ for n in nodes
1025
+ ],
1026
+ "edges": [
1027
+ {
1028
+ "from": e.from_id,
1029
+ "to": e.to_id,
1030
+ "weight": e.weight,
1031
+ }
1032
+ for e in edges
1033
+ ],
1034
+ },
1035
+ indent=2,
1036
+ )
1037
+
1038
+ elif format == "mermaid":
1039
+ lines = ["graph LR"]
1040
+
1041
+ # Add nodes with short IDs
1042
+ node_aliases = {n.id: f"N{i}" for i, n in enumerate(nodes)}
1043
+ for node in nodes:
1044
+ alias = node_aliases[node.id]
1045
+ # Escape special characters in label
1046
+ safe_label = node.label.replace('"', "'").replace("\n", " ")[:30]
1047
+ lines.append(f' {alias}["{safe_label}"]')
1048
+
1049
+ # Add edges
1050
+ for edge in edges:
1051
+ from_alias = node_aliases.get(edge.from_id)
1052
+ to_alias = node_aliases.get(edge.to_id)
1053
+ if from_alias and to_alias:
1054
+ lines.append(f" {from_alias} --> {to_alias}")
1055
+
1056
+ return "\n".join(lines)
1057
+
1058
+ elif format == "svg":
1059
+ return self._generate_svg(nodes, edges)
1060
+
1061
+ else:
1062
+ raise ValidationError(f"Unknown format: {format}")
1063
+
1064
+ def _generate_svg(
1065
+ self,
1066
+ nodes: list[VisualizationNode],
1067
+ edges: list[VisualizationEdge],
1068
+ ) -> str:
1069
+ """Generate SVG visualization.
1070
+
1071
+ Args:
1072
+ nodes: Visualization nodes.
1073
+ edges: Visualization edges.
1074
+
1075
+ Returns:
1076
+ SVG string.
1077
+ """
1078
+ width, height = 800, 600
1079
+ padding = 50
1080
+
1081
+ # Calculate scale to fit nodes
1082
+ x_coords = [n.x for n in nodes]
1083
+ y_coords = [n.y for n in nodes]
1084
+ x_min, x_max = min(x_coords), max(x_coords)
1085
+ y_min, y_max = min(y_coords), max(y_coords)
1086
+
1087
+ x_range = x_max - x_min if x_max != x_min else 1
1088
+ y_range = y_max - y_min if y_max != y_min else 1
1089
+
1090
+ def scale_x(x: float) -> float:
1091
+ return padding + (x - x_min) / x_range * (width - 2 * padding)
1092
+
1093
+ def scale_y(y: float) -> float:
1094
+ return padding + (y - y_min) / y_range * (height - 2 * padding)
1095
+
1096
+ svg_lines = [
1097
+ f'<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {width} {height}">',
1098
+ " <style>",
1099
+ " .node { cursor: pointer; }",
1100
+ " .node circle { stroke: #333; stroke-width: 1; }",
1101
+ " .node text { font-size: 10px; fill: #333; }",
1102
+ " .edge { stroke: #ccc; stroke-width: 1; opacity: 0.5; }",
1103
+ " </style>",
1104
+ ]
1105
+
1106
+ # Draw edges
1107
+ for edge in edges:
1108
+ from_node = next((n for n in nodes if n.id == edge.from_id), None)
1109
+ to_node = next((n for n in nodes if n.id == edge.to_id), None)
1110
+ if from_node and to_node:
1111
+ x1, y1 = scale_x(from_node.x), scale_y(from_node.y)
1112
+ x2, y2 = scale_x(to_node.x), scale_y(to_node.y)
1113
+ svg_lines.append(
1114
+ f' <line class="edge" x1="{x1:.1f}" y1="{y1:.1f}" '
1115
+ f'x2="{x2:.1f}" y2="{y2:.1f}" />'
1116
+ )
1117
+
1118
+ # Draw nodes
1119
+ for node in nodes:
1120
+ x, y = scale_x(node.x), scale_y(node.y)
1121
+ radius = 5 + node.importance * 5 # Scale by importance
1122
+ if node.cluster >= 0:
1123
+ color = CLUSTER_COLORS[node.cluster % len(CLUSTER_COLORS)]
1124
+ else:
1125
+ color = "#999"
1126
+
1127
+ svg_lines.append(' <g class="node">')
1128
+ svg_lines.append(
1129
+ f' <circle cx="{x:.1f}" cy="{y:.1f}" r="{radius:.1f}" '
1130
+ f'fill="{color}" />'
1131
+ )
1132
+ # Add truncated label
1133
+ short_label = node.label[:20] + "..." if len(node.label) > 20 else node.label
1134
+ # Escape XML special characters
1135
+ short_label = (
1136
+ short_label.replace("&", "&amp;")
1137
+ .replace("<", "&lt;")
1138
+ .replace(">", "&gt;")
1139
+ )
1140
+ svg_lines.append(
1141
+ f' <text x="{x:.1f}" y="{y + radius + 12:.1f}" '
1142
+ f'text-anchor="middle">{short_label}</text>'
1143
+ )
1144
+ svg_lines.append(" </g>")
1145
+
1146
+ svg_lines.append("</svg>")
1147
+ return "\n".join(svg_lines)