spatial-memory-mcp 1.0.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spatial-memory-mcp might be problematic. Click here for more details.

Files changed (39) hide show
  1. spatial_memory/__init__.py +97 -97
  2. spatial_memory/__main__.py +241 -2
  3. spatial_memory/adapters/lancedb_repository.py +74 -5
  4. spatial_memory/config.py +115 -2
  5. spatial_memory/core/__init__.py +35 -0
  6. spatial_memory/core/cache.py +317 -0
  7. spatial_memory/core/circuit_breaker.py +297 -0
  8. spatial_memory/core/connection_pool.py +41 -3
  9. spatial_memory/core/consolidation_strategies.py +402 -0
  10. spatial_memory/core/database.py +791 -769
  11. spatial_memory/core/db_idempotency.py +242 -0
  12. spatial_memory/core/db_indexes.py +575 -0
  13. spatial_memory/core/db_migrations.py +584 -0
  14. spatial_memory/core/db_search.py +509 -0
  15. spatial_memory/core/db_versioning.py +177 -0
  16. spatial_memory/core/embeddings.py +156 -19
  17. spatial_memory/core/errors.py +75 -3
  18. spatial_memory/core/filesystem.py +178 -0
  19. spatial_memory/core/logging.py +194 -103
  20. spatial_memory/core/models.py +4 -0
  21. spatial_memory/core/rate_limiter.py +326 -105
  22. spatial_memory/core/response_types.py +497 -0
  23. spatial_memory/core/tracing.py +300 -0
  24. spatial_memory/core/validation.py +403 -319
  25. spatial_memory/factory.py +407 -0
  26. spatial_memory/migrations/__init__.py +40 -0
  27. spatial_memory/ports/repositories.py +52 -2
  28. spatial_memory/server.py +329 -188
  29. spatial_memory/services/export_import.py +61 -43
  30. spatial_memory/services/lifecycle.py +397 -122
  31. spatial_memory/services/memory.py +81 -4
  32. spatial_memory/services/spatial.py +129 -46
  33. spatial_memory/tools/definitions.py +695 -671
  34. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/METADATA +83 -3
  35. spatial_memory_mcp-1.6.0.dist-info/RECORD +54 -0
  36. spatial_memory_mcp-1.0.3.dist-info/RECORD +0 -41
  37. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/WHEEL +0 -0
  38. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/entry_points.txt +0 -0
  39. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -16,9 +16,10 @@ from __future__ import annotations
16
16
 
17
17
  import logging
18
18
  from dataclasses import dataclass
19
- from datetime import datetime, timezone
20
19
  from typing import TYPE_CHECKING, Any, Literal
21
20
 
21
+ import numpy as np
22
+
22
23
  from spatial_memory.core.errors import (
23
24
  ConsolidationError,
24
25
  DecayError,
@@ -26,6 +27,10 @@ from spatial_memory.core.errors import (
26
27
  ReinforcementError,
27
28
  ValidationError,
28
29
  )
30
+ from spatial_memory.core.consolidation_strategies import (
31
+ ConsolidationAction,
32
+ get_strategy,
33
+ )
29
34
  from spatial_memory.core.lifecycle_ops import (
30
35
  apply_decay,
31
36
  calculate_decay_factor,
@@ -34,9 +39,6 @@ from spatial_memory.core.lifecycle_ops import (
34
39
  extract_candidates,
35
40
  find_duplicate_groups,
36
41
  jaccard_similarity,
37
- merge_memory_content,
38
- merge_memory_metadata,
39
- select_representative,
40
42
  )
41
43
  from spatial_memory.core.models import (
42
44
  ConsolidateResult,
@@ -53,7 +55,7 @@ from spatial_memory.core.models import (
53
55
 
54
56
  # Alias for backward compatibility
55
57
  ConsolidationGroupResult = ConsolidationGroup
56
- from spatial_memory.core.utils import to_naive_utc, utc_now_naive
58
+ from spatial_memory.core.utils import to_naive_utc, utc_now, utc_now_naive
57
59
  from spatial_memory.core.validation import validate_namespace
58
60
 
59
61
  logger = logging.getLogger(__name__)
@@ -110,6 +112,7 @@ class LifecycleConfig:
110
112
  consolidate_min_threshold: float = 0.7
111
113
  consolidate_content_weight: float = 0.3
112
114
  consolidate_max_batch: int = 1000
115
+ consolidate_chunk_size: int = 200 # Process in smaller chunks for memory efficiency
113
116
 
114
117
 
115
118
  # =============================================================================
@@ -271,16 +274,31 @@ class LifecycleService:
271
274
  total_decay_factor / len(all_memories) if all_memories else 1.0
272
275
  )
273
276
 
274
- # Apply updates if not dry run
277
+ # Apply updates if not dry run - use batch update for efficiency
275
278
  failed_updates: list[str] = []
276
279
  if not dry_run and memories_to_update:
277
280
  logger.info(f"Applying decay to {len(memories_to_update)} memories")
278
- for memory_id, new_importance in memories_to_update:
279
- try:
280
- self._repo.update(memory_id, {"importance": new_importance})
281
- except Exception as e:
282
- logger.warning(f"Failed to update {memory_id}: {e}")
283
- failed_updates.append(memory_id)
281
+ # Convert to batch update format: list of (memory_id, updates_dict)
282
+ batch_updates = [
283
+ (memory_id, {"importance": new_importance})
284
+ for memory_id, new_importance in memories_to_update
285
+ ]
286
+ try:
287
+ success_count, failed_ids = self._repo.update_batch(batch_updates)
288
+ failed_updates = failed_ids
289
+ logger.debug(
290
+ f"Batch decay update: {success_count} succeeded, "
291
+ f"{len(failed_ids)} failed"
292
+ )
293
+ except Exception as e:
294
+ logger.warning(f"Batch decay update failed: {e}")
295
+ # Fall back to individual updates on batch failure
296
+ for memory_id, new_importance in memories_to_update:
297
+ try:
298
+ self._repo.update(memory_id, {"importance": new_importance})
299
+ except Exception as update_err:
300
+ logger.warning(f"Failed to update {memory_id}: {update_err}")
301
+ failed_updates.append(memory_id)
284
302
 
285
303
  return DecayResult(
286
304
  memories_analyzed=len(all_memories),
@@ -345,14 +363,30 @@ class LifecycleService:
345
363
  failed_updates: list[str] = []
346
364
  total_boost = 0.0
347
365
 
348
- for memory_id in memory_ids:
349
- memory = self._repo.get(memory_id)
366
+ # Batch fetch all memories in a single query
367
+ memory_map = self._repo.get_batch(memory_ids)
350
368
 
351
- if memory is None:
369
+ # Track which memories were not found
370
+ for memory_id in memory_ids:
371
+ if memory_id not in memory_map:
352
372
  not_found.append(memory_id)
353
373
  logger.warning(f"Memory not found for reinforcement: {memory_id}")
354
- continue
355
374
 
375
+ if not memory_map:
376
+ return ReinforceResult(
377
+ memories_reinforced=0,
378
+ avg_boost=0.0,
379
+ reinforced=[],
380
+ not_found=not_found,
381
+ failed_updates=[],
382
+ )
383
+
384
+ # Calculate reinforcement for all found memories
385
+ now = utc_now()
386
+ batch_updates: list[tuple[str, dict[str, Any]]] = []
387
+ reinforcement_info: list[tuple[str, Memory, float, float]] = [] # id, memory, new_imp, boost
388
+
389
+ for memory_id, memory in memory_map.items():
356
390
  # Calculate new importance
357
391
  new_importance, actual_boost = calculate_reinforcement(
358
392
  current_importance=memory.importance,
@@ -364,12 +398,36 @@ class LifecycleService:
364
398
  # Prepare update
365
399
  updates: dict[str, Any] = {"importance": new_importance}
366
400
  if update_access:
367
- updates["last_accessed"] = datetime.now(timezone.utc)
401
+ updates["last_accessed"] = now
368
402
  updates["access_count"] = memory.access_count + 1
369
403
 
370
- # Apply update
371
- try:
372
- self._repo.update(memory_id, updates)
404
+ batch_updates.append((memory_id, updates))
405
+ reinforcement_info.append((memory_id, memory, new_importance, actual_boost))
406
+
407
+ # Apply all updates in a single batch operation
408
+ try:
409
+ success_count, batch_failed_ids = self._repo.update_batch(batch_updates)
410
+ failed_updates = batch_failed_ids
411
+ logger.debug(
412
+ f"Batch reinforce update: {success_count} succeeded, "
413
+ f"{len(batch_failed_ids)} failed"
414
+ )
415
+ except Exception as e:
416
+ logger.warning(f"Batch reinforce update failed: {e}, falling back to individual updates")
417
+ # Fall back to individual updates on batch failure
418
+ batch_failed_ids = []
419
+ for memory_id, updates in batch_updates:
420
+ try:
421
+ self._repo.update(memory_id, updates)
422
+ except Exception as update_err:
423
+ logger.warning(f"Failed to reinforce {memory_id}: {update_err}")
424
+ batch_failed_ids.append(memory_id)
425
+ failed_updates = batch_failed_ids
426
+
427
+ # Build reinforced results for successful updates
428
+ failed_set = set(failed_updates)
429
+ for memory_id, memory, new_importance, actual_boost in reinforcement_info:
430
+ if memory_id not in failed_set:
373
431
  reinforced.append(
374
432
  ReinforcedMemory(
375
433
  id=memory_id,
@@ -382,9 +440,6 @@ class LifecycleService:
382
440
  )
383
441
  )
384
442
  total_boost += actual_boost
385
- except Exception as e:
386
- logger.warning(f"Failed to reinforce {memory_id}: {e}")
387
- failed_updates.append(memory_id)
388
443
 
389
444
  avg_boost = total_boost / len(reinforced) if reinforced else 0.0
390
445
 
@@ -462,11 +517,17 @@ class LifecycleService:
462
517
  extractions=[],
463
518
  )
464
519
 
520
+ # Generate embeddings for all candidates in a single batch
521
+ # This is much more efficient than generating one at a time
522
+ candidate_texts = [c.content for c in candidates]
523
+ candidate_vectors = self._embeddings.embed_batch(candidate_texts)
524
+ logger.debug(f"Generated {len(candidate_vectors)} embeddings in batch")
525
+
465
526
  extractions: list[ExtractedMemory] = []
466
527
  memories_created = 0
467
528
  deduplicated_count = 0
468
529
 
469
- for candidate in candidates:
530
+ for candidate, vector in zip(candidates, candidate_vectors):
470
531
  extraction = ExtractedMemory(
471
532
  content=candidate.content,
472
533
  confidence=candidate.confidence,
@@ -477,22 +538,24 @@ class LifecycleService:
477
538
  memory_id=None,
478
539
  )
479
540
 
480
- # Check for duplicates if requested
541
+ # Check for duplicates if requested (use pre-computed vector)
481
542
  if deduplicate:
482
- is_duplicate = self._check_duplicate(
483
- candidate.content,
484
- effective_namespace,
485
- dedup_threshold,
543
+ is_duplicate = self._check_duplicate_with_vector(
544
+ content=candidate.content,
545
+ vector=vector,
546
+ namespace=effective_namespace,
547
+ threshold=dedup_threshold,
486
548
  )
487
549
  if is_duplicate:
488
550
  deduplicated_count += 1
489
551
  extractions.append(extraction)
490
552
  continue
491
553
 
492
- # Store the extracted memory
554
+ # Store the extracted memory (use pre-computed vector)
493
555
  try:
494
- memory_id = self._store_extracted_memory(
556
+ memory_id = self._store_extracted_memory_with_vector(
495
557
  content=candidate.content,
558
+ vector=vector,
496
559
  namespace=effective_namespace,
497
560
  confidence=candidate.confidence,
498
561
  pattern_type=candidate.pattern_type,
@@ -557,25 +620,20 @@ class LifecycleService:
557
620
  "similarity_threshold must be between 0.7 and 0.99"
558
621
  )
559
622
 
560
- if strategy not in (
561
- "keep_newest",
562
- "keep_oldest",
563
- "keep_highest_importance",
564
- "merge_content",
565
- ):
566
- raise ValidationError(f"Invalid strategy: {strategy}")
623
+ # Validate strategy using the strategy registry
624
+ try:
625
+ strategy_impl = get_strategy(strategy)
626
+ except ValueError as e:
627
+ raise ValidationError(str(e)) from e
567
628
 
568
629
  if max_groups < 1:
569
630
  raise ValidationError("max_groups must be at least 1")
570
631
 
571
632
  try:
572
- # Fetch all memories in namespace with vectors
573
- all_memories = self._repo.get_all(
574
- namespace=namespace,
575
- limit=self._config.consolidate_max_batch,
576
- )
633
+ # Get total count to decide processing strategy
634
+ total_count = self._repo.count(namespace=namespace)
577
635
 
578
- if len(all_memories) < 2:
636
+ if total_count < 2:
579
637
  logger.info("Not enough memories for consolidation")
580
638
  return ConsolidateResult(
581
639
  groups_found=0,
@@ -585,9 +643,34 @@ class LifecycleService:
585
643
  dry_run=dry_run,
586
644
  )
587
645
 
588
- # Build lookup structures
589
- import numpy as np
646
+ # Use chunked processing for large namespaces to reduce memory usage
647
+ chunk_size = min(
648
+ self._config.consolidate_chunk_size,
649
+ self._config.consolidate_max_batch,
650
+ )
651
+ use_chunked = total_count > chunk_size
590
652
 
653
+ if use_chunked:
654
+ logger.info(
655
+ f"Using chunked consolidation: {total_count} memories in "
656
+ f"chunks of {chunk_size}"
657
+ )
658
+ return self._consolidate_chunked(
659
+ namespace=namespace,
660
+ similarity_threshold=similarity_threshold,
661
+ strategy=strategy,
662
+ dry_run=dry_run,
663
+ max_groups=max_groups,
664
+ chunk_size=chunk_size,
665
+ )
666
+
667
+ # Standard single-pass processing for smaller namespaces
668
+ all_memories = self._repo.get_all(
669
+ namespace=namespace,
670
+ limit=self._config.consolidate_max_batch,
671
+ )
672
+
673
+ # Build lookup structures
591
674
  memories = [m for m, _ in all_memories]
592
675
  vectors_list = [v for _, v in all_memories]
593
676
  vectors_array = np.array(vectors_list, dtype=np.float32)
@@ -636,12 +719,6 @@ class LifecycleService:
636
719
  group_member_dicts = [memory_dicts[i] for i in member_indices]
637
720
  group_member_ids = [str(d["id"]) for d in group_member_dicts]
638
721
 
639
- # Select representative
640
- rep_idx = select_representative(group_member_dicts, strategy)
641
- rep_id = str(group_member_dicts[rep_idx]["id"])
642
-
643
- action = "preview" if dry_run else "merged"
644
-
645
722
  # Calculate average similarity for the group
646
723
  total_sim = 0.0
647
724
  pair_count = 0
@@ -665,81 +742,32 @@ class LifecycleService:
665
742
  pair_count += 1
666
743
  avg_similarity = total_sim / pair_count if pair_count > 0 else 0.0
667
744
 
668
- if not dry_run:
669
- try:
670
- if strategy == "merge_content":
671
- # Create merged content in memory first (no DB write yet)
672
- group_contents = [str(d["content"]) for d in group_member_dicts]
673
- merged_content = merge_memory_content(group_contents)
674
- merged_meta = merge_memory_metadata(group_member_dicts)
675
-
676
- # Generate new embedding before any DB changes
677
- new_vector = self._embeddings.embed(merged_content)
678
-
679
- # Prepare merged memory object (not persisted yet)
680
- merged_memory = Memory(
681
- id="", # Will be assigned
682
- content=merged_content,
683
- namespace=namespace,
684
- tags=merged_meta.get("tags", []),
685
- importance=merged_meta.get("importance", 0.5),
686
- source=MemorySource.CONSOLIDATED,
687
- metadata=merged_meta.get("metadata", {}),
688
- )
689
-
690
- # DELETE FIRST pattern: remove originals before adding merge
691
- # This prevents duplicates if add fails after delete
692
- deleted_ids: list[str] = []
693
- try:
694
- for mid in group_member_ids:
695
- self._repo.delete(mid)
696
- deleted_ids.append(mid)
697
- memories_deleted += 1
698
- except Exception as del_err:
699
- # Partial delete - log for manual recovery
700
- logger.critical(
701
- f"Partial consolidation failure: deleted {deleted_ids}, "
702
- f"failed on {mid}: {del_err}. "
703
- f"Remaining members may need manual cleanup: "
704
- f"{[m for m in group_member_ids if m not in deleted_ids]}"
705
- )
706
- raise
707
-
708
- # Now add the merged memory
709
- try:
710
- new_id = self._repo.add(merged_memory, new_vector)
711
- except Exception as add_err:
712
- # CRITICAL: Originals deleted but merge failed
713
- # Log for manual recovery - data is in merged_content
714
- logger.critical(
715
- f"Consolidation add failed after deleting originals. "
716
- f"Deleted IDs: {deleted_ids}. "
717
- f"Merged content (save for recovery): {merged_content[:500]}... "
718
- f"Error: {add_err}"
719
- )
720
- raise
721
-
722
- rep_id = new_id
723
- memories_merged += 1
724
- action = "merged"
725
- else:
726
- # Keep representative, delete others
727
- for mid in group_member_ids:
728
- if mid != rep_id:
729
- self._repo.delete(mid)
730
- memories_deleted += 1
731
- memories_merged += 1
732
- action = "kept_representative"
733
- except Exception as e:
734
- logger.warning(f"Failed to consolidate group: {e}")
735
- action = "failed"
745
+ # Apply consolidation strategy
746
+ try:
747
+ action_result: ConsolidationAction = strategy_impl.apply(
748
+ members=group_member_dicts,
749
+ member_ids=group_member_ids,
750
+ namespace=namespace,
751
+ repository=self._repo,
752
+ embeddings=self._embeddings,
753
+ dry_run=dry_run,
754
+ )
755
+ memories_merged += action_result.memories_merged
756
+ memories_deleted += action_result.memories_deleted
757
+ except Exception as e:
758
+ logger.warning(f"Failed to consolidate group: {e}")
759
+ action_result = ConsolidationAction(
760
+ representative_id=group_member_ids[0],
761
+ deleted_ids=[],
762
+ action="failed",
763
+ )
736
764
 
737
765
  result_groups.append(
738
766
  ConsolidationGroupResult(
739
- representative_id=rep_id,
767
+ representative_id=action_result.representative_id,
740
768
  member_ids=group_member_ids,
741
769
  avg_similarity=avg_similarity,
742
- action_taken=action,
770
+ action_taken=action_result.action,
743
771
  )
744
772
  )
745
773
 
@@ -760,6 +788,171 @@ class LifecycleService:
760
788
  # Helper Methods
761
789
  # =========================================================================
762
790
 
791
+ def _consolidate_chunked(
792
+ self,
793
+ namespace: str,
794
+ similarity_threshold: float,
795
+ strategy: Literal[
796
+ "keep_newest", "keep_oldest", "keep_highest_importance", "merge_content"
797
+ ],
798
+ dry_run: bool,
799
+ max_groups: int,
800
+ chunk_size: int,
801
+ ) -> ConsolidateResult:
802
+ """Process consolidation in memory-efficient chunks.
803
+
804
+ Processes memories in smaller chunks to reduce peak memory usage.
805
+ Note: This may miss duplicates that span chunk boundaries.
806
+
807
+ Args:
808
+ namespace: Namespace to consolidate.
809
+ similarity_threshold: Minimum similarity for duplicates.
810
+ strategy: How to handle duplicates.
811
+ dry_run: Preview without changes.
812
+ max_groups: Maximum groups to process total.
813
+ chunk_size: Memories per chunk.
814
+
815
+ Returns:
816
+ Aggregated ConsolidateResult from all chunks.
817
+ """
818
+ all_groups: list[ConsolidationGroupResult] = []
819
+ total_merged = 0
820
+ total_deleted = 0
821
+ offset = 0
822
+ groups_remaining = max_groups
823
+
824
+ while groups_remaining > 0:
825
+ # Fetch chunk of memories
826
+ chunk_memories = self._repo.get_all(
827
+ namespace=namespace,
828
+ limit=chunk_size,
829
+ )
830
+
831
+ # Skip already processed memories by filtering by offset
832
+ # Note: This is a simplified approach - in production, you'd want
833
+ # to track processed IDs or use cursor-based pagination
834
+ if offset > 0:
835
+ # Re-fetch with offset simulation (get more and skip)
836
+ all_chunk = self._repo.get_all(
837
+ namespace=namespace,
838
+ limit=offset + chunk_size,
839
+ )
840
+ if len(all_chunk) <= offset:
841
+ # No more memories to process
842
+ break
843
+ chunk_memories = all_chunk[offset:offset + chunk_size]
844
+
845
+ if len(chunk_memories) < 2:
846
+ break
847
+
848
+ # Build lookup structures for this chunk
849
+ memories = [m for m, _ in chunk_memories]
850
+ vectors_list = [v for _, v in chunk_memories]
851
+ vectors_array = np.array(vectors_list, dtype=np.float32)
852
+ memory_ids = [m.id for m in memories]
853
+ contents = [m.content for m in memories]
854
+ memory_dicts: list[dict[str, Any]] = [
855
+ {
856
+ "id": m.id,
857
+ "content": m.content,
858
+ "created_at": m.created_at,
859
+ "last_accessed": m.last_accessed,
860
+ "access_count": m.access_count,
861
+ "importance": m.importance,
862
+ "tags": list(m.tags),
863
+ }
864
+ for m in memories
865
+ ]
866
+
867
+ # Find duplicate groups in this chunk
868
+ group_indices = find_duplicate_groups(
869
+ memory_ids=memory_ids,
870
+ vectors=vectors_array,
871
+ contents=contents,
872
+ threshold=similarity_threshold,
873
+ content_weight=self._config.consolidate_content_weight,
874
+ )
875
+
876
+ # Limit groups for this chunk
877
+ group_indices = group_indices[:groups_remaining]
878
+
879
+ if not group_indices:
880
+ offset += len(chunk_memories)
881
+ continue
882
+
883
+ # Get strategy implementation
884
+ strategy_impl = get_strategy(strategy)
885
+
886
+ # Process groups in this chunk
887
+ for member_indices in group_indices:
888
+ group_member_dicts = [memory_dicts[i] for i in member_indices]
889
+ group_member_ids = [str(d["id"]) for d in group_member_dicts]
890
+
891
+ # Calculate average similarity
892
+ total_sim = 0.0
893
+ pair_count = 0
894
+ for i_idx, i in enumerate(member_indices):
895
+ for j in member_indices[i_idx + 1:]:
896
+ v1, v2 = vectors_array[i], vectors_array[j]
897
+ dot = float(np.dot(v1, v2))
898
+ norm1 = float(np.linalg.norm(v1))
899
+ norm2 = float(np.linalg.norm(v2))
900
+ if norm1 > 1e-10 and norm2 > 1e-10:
901
+ v_sim = dot / (norm1 * norm2)
902
+ else:
903
+ v_sim = 0.0
904
+ c_sim = jaccard_similarity(contents[i], contents[j])
905
+ combined = combined_similarity(
906
+ v_sim, c_sim, self._config.consolidate_content_weight
907
+ )
908
+ total_sim += combined
909
+ pair_count += 1
910
+ avg_similarity = total_sim / pair_count if pair_count > 0 else 0.0
911
+
912
+ # Apply consolidation strategy
913
+ try:
914
+ action_result: ConsolidationAction = strategy_impl.apply(
915
+ members=group_member_dicts,
916
+ member_ids=group_member_ids,
917
+ namespace=namespace,
918
+ repository=self._repo,
919
+ embeddings=self._embeddings,
920
+ dry_run=dry_run,
921
+ )
922
+ total_merged += action_result.memories_merged
923
+ total_deleted += action_result.memories_deleted
924
+ except Exception as e:
925
+ logger.warning(f"Failed to consolidate group: {e}")
926
+ action_result = ConsolidationAction(
927
+ representative_id=group_member_ids[0],
928
+ deleted_ids=[],
929
+ action="failed",
930
+ )
931
+
932
+ all_groups.append(
933
+ ConsolidationGroupResult(
934
+ representative_id=action_result.representative_id,
935
+ member_ids=group_member_ids,
936
+ avg_similarity=avg_similarity,
937
+ action_taken=action_result.action,
938
+ )
939
+ )
940
+ groups_remaining -= 1
941
+
942
+ offset += len(chunk_memories)
943
+ logger.debug(
944
+ f"Processed chunk at offset {offset - len(chunk_memories)}, "
945
+ f"found {len(group_indices)} groups"
946
+ )
947
+
948
+ return ConsolidateResult(
949
+ groups_found=len(all_groups),
950
+ memories_merged=total_merged,
951
+ memories_deleted=total_deleted,
952
+ groups=all_groups,
953
+ dry_run=dry_run,
954
+ )
955
+
763
956
  def _check_duplicate(
764
957
  self,
765
958
  content: str,
@@ -843,3 +1036,85 @@ class LifecycleService:
843
1036
  )
844
1037
 
845
1038
  return self._repo.add(memory, vector)
1039
+
1040
+ def _check_duplicate_with_vector(
1041
+ self,
1042
+ content: str,
1043
+ vector: np.ndarray,
1044
+ namespace: str,
1045
+ threshold: float,
1046
+ ) -> bool:
1047
+ """Check if similar content already exists using pre-computed vector.
1048
+
1049
+ Args:
1050
+ content: Content to check.
1051
+ vector: Pre-computed embedding vector.
1052
+ namespace: Namespace to search.
1053
+ threshold: Similarity threshold.
1054
+
1055
+ Returns:
1056
+ True if a similar memory exists.
1057
+ """
1058
+ try:
1059
+ # Search for similar memories using pre-computed vector
1060
+ results = self._repo.search(vector, limit=5, namespace=namespace)
1061
+
1062
+ for result in results:
1063
+ # Check vector similarity
1064
+ if result.similarity >= threshold:
1065
+ return True
1066
+
1067
+ # Also check content overlap
1068
+ content_sim = jaccard_similarity(content, result.content)
1069
+ combined = combined_similarity(
1070
+ result.similarity,
1071
+ content_sim,
1072
+ self._config.consolidate_content_weight,
1073
+ )
1074
+ if combined >= threshold:
1075
+ return True
1076
+
1077
+ return False
1078
+
1079
+ except Exception as e:
1080
+ logger.warning(f"Duplicate check failed: {e}")
1081
+ return False
1082
+
1083
+ def _store_extracted_memory_with_vector(
1084
+ self,
1085
+ content: str,
1086
+ vector: np.ndarray,
1087
+ namespace: str,
1088
+ confidence: float,
1089
+ pattern_type: str,
1090
+ ) -> str:
1091
+ """Store an extracted memory using pre-computed vector.
1092
+
1093
+ Args:
1094
+ content: Memory content.
1095
+ vector: Pre-computed embedding vector.
1096
+ namespace: Target namespace.
1097
+ confidence: Extraction confidence.
1098
+ pattern_type: Type of pattern matched.
1099
+
1100
+ Returns:
1101
+ The new memory's ID.
1102
+ """
1103
+ # Scale importance by confidence but keep lower than manual memories
1104
+ importance = self._config.extract_default_importance * confidence
1105
+
1106
+ # Create memory
1107
+ memory = Memory(
1108
+ id="", # Will be assigned
1109
+ content=content,
1110
+ namespace=namespace,
1111
+ tags=[f"extracted-{pattern_type}"],
1112
+ importance=importance,
1113
+ source=MemorySource.EXTRACTED,
1114
+ metadata={
1115
+ "extraction_confidence": confidence,
1116
+ "extraction_pattern": pattern_type,
1117
+ },
1118
+ )
1119
+
1120
+ return self._repo.add(memory, vector)