PyPI - vector-inspector - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

vector-inspector 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

vector_inspector/core/cache_manager.py +159 -0
vector_inspector/core/connection_manager.py +277 -0
vector_inspector/core/connections/chroma_connection.py +90 -5
vector_inspector/core/connections/qdrant_connection.py +62 -8
vector_inspector/core/embedding_utils.py +140 -0
vector_inspector/services/backup_restore_service.py +3 -29
vector_inspector/services/credential_service.py +130 -0
vector_inspector/services/filter_service.py +1 -1
vector_inspector/services/profile_service.py +409 -0
vector_inspector/services/settings_service.py +20 -1
vector_inspector/services/visualization_service.py +11 -7
vector_inspector/ui/components/connection_manager_panel.py +320 -0
vector_inspector/ui/components/profile_manager_panel.py +518 -0
vector_inspector/ui/dialogs/__init__.py +5 -0
vector_inspector/ui/dialogs/cross_db_migration.py +364 -0
vector_inspector/ui/dialogs/embedding_config_dialog.py +176 -0
vector_inspector/ui/main_window.py +429 -181
vector_inspector/ui/views/connection_view.py +43 -8
vector_inspector/ui/views/info_panel.py +226 -80
vector_inspector/ui/views/metadata_view.py +136 -28
vector_inspector/ui/views/search_view.py +43 -3
{vector_inspector-0.2.5.dist-info → vector_inspector-0.2.7.dist-info}/METADATA +5 -3
vector_inspector-0.2.7.dist-info/RECORD +45 -0
vector_inspector-0.2.5.dist-info/RECORD +0 -35
{vector_inspector-0.2.5.dist-info → vector_inspector-0.2.7.dist-info}/WHEEL +0 -0
{vector_inspector-0.2.5.dist-info → vector_inspector-0.2.7.dist-info}/entry_points.txt +0 -0

vector_inspector/core/connections/qdrant_connection.py CHANGED Viewed

@@ -50,16 +50,22 @@ class QdrantConnection(VectorDBConnection):
             True if connection successful, False otherwise
         """
         try:
+            # Common parameters for stability
+            common_params = {
+                'check_compatibility': False,
+                'timeout': 300,  # 5 minutes timeout for long operations
+            }
             if self.path:
                 # Local/embedded mode
-                self._client = QdrantClient(path=self.path, check_compatibility=False)
+                self._client = QdrantClient(path=self.path, **common_params)
             elif self.url:
                 # Full URL provided
                 self._client = QdrantClient(
                     url=self.url,
                     api_key=self.api_key,
                     prefer_grpc=self.prefer_grpc,
-                    check_compatibility=False
+                    **common_params
                 )
             elif self.host:
                 # Host and port provided
@@ -68,11 +74,11 @@ class QdrantConnection(VectorDBConnection):
                     port=self.port,
                     api_key=self.api_key,
                     prefer_grpc=self.prefer_grpc,
-                    check_compatibility=False
+                    **common_params
                 )
             else:
                 # Default to in-memory client
-                self._client = QdrantClient(":memory:", check_compatibility=False)
+                self._client = QdrantClient(":memory:", **common_params)
             # Test connection
             self._client.get_collections()
@@ -251,6 +257,14 @@ class QdrantConnection(VectorDBConnection):
                 "distance_metric": distance_metric,
             }
+            # Check for embedding model metadata (if collection creator stored it)
+            if hasattr(collection_info.config, 'metadata') and collection_info.config.metadata:
+                metadata = collection_info.config.metadata
+                if 'embedding_model' in metadata:
+                    result['embedding_model'] = metadata['embedding_model']
+                if 'embedding_model_type' in metadata:
+                    result['embedding_model_type'] = metadata['embedding_model_type']
             if config_details:
                 result['config'] = config_details
@@ -260,6 +274,46 @@ class QdrantConnection(VectorDBConnection):
             print(f"Failed to get collection info: {e}")
             return None
+    def _get_embedding_model_for_collection(self, collection_name: str):
+        """Get the appropriate embedding model for a collection based on stored metadata, settings, or dimension."""
+        from ..embedding_utils import get_model_for_dimension, load_embedding_model, DEFAULT_MODEL
+        # Get collection info to determine vector dimension and check metadata
+        collection_info = self.get_collection_info(collection_name)
+        if not collection_info:
+            # Default if we can't determine
+            print(f"Warning: Could not determine collection info for {collection_name}, using default")
+            model_name, model_type = DEFAULT_MODEL
+            model = load_embedding_model(model_name, model_type)
+            return (model, model_name, model_type)
+        # Priority 1: Check if collection metadata has embedding model info (most reliable)
+        if 'embedding_model' in collection_info:
+            model_name = collection_info['embedding_model']
+            model_type = collection_info.get('embedding_model_type', 'sentence-transformer')
+            print(f"Using stored embedding model '{model_name}' ({model_type}) for collection '{collection_name}'")
+            model = load_embedding_model(model_name, model_type)
+            return (model, model_name, model_type)
+        # Priority 2: Check user settings for manual override (skip in connection class)
+        # Settings lookup is done in the UI layer where connection_id is available
+        # Priority 3: Fall back to dimension-based guessing (least reliable)
+        vector_dim = collection_info.get("vector_dimension")
+        if not vector_dim or vector_dim == "Unknown":
+            print(f"Warning: No vector dimension in collection info, using default")
+            model_name, model_type = DEFAULT_MODEL
+            model = load_embedding_model(model_name, model_type)
+            return (model, model_name, model_type)
+        # Get the appropriate model for this dimension
+        model_name, model_type = get_model_for_dimension(vector_dim)
+        model = load_embedding_model(model_name, model_type)
+        print(f"⚠️ Guessing {model_type} model '{model_name}' based on dimension {vector_dim} for '{collection_name}'")
+        print(f"   To specify the correct model, use Settings > Configure Collection Embedding Models")
+        return (model, model_name, model_type)
     def _build_qdrant_filter(self, where: Optional[Dict[str, Any]] = None) -> Optional[Filter]:
         """
         Build Qdrant filter from ChromaDB-style where clause.
@@ -374,11 +428,11 @@ class QdrantConnection(VectorDBConnection):
             for query in queries:
                 # Embed text queries if needed
                 if isinstance(query, str):
-                    # Generate embeddings for text query
+                    # Generate embeddings for text query using appropriate model for this collection
                     try:
-                        from sentence_transformers import SentenceTransformer
-                        model = SentenceTransformer("all-MiniLM-L6-v2")
-                        query_vector = model.encode(query).tolist()
+                        model, model_name, model_type = self._get_embedding_model_for_collection(collection_name)
+                        from ..embedding_utils import encode_text
+                        query_vector = encode_text(query, model, model_type)
                     except Exception as e:
                         print(f"Failed to embed query text: {e}")
                         continue

vector_inspector/core/embedding_utils.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""Utilities for managing embedding models and vector dimensions."""
+from typing import Optional, Union, Tuple
+from sentence_transformers import SentenceTransformer
+# Mapping of vector dimensions to appropriate models
+# Format: dimension -> list of (model_name, model_type, description)
+# Listed in order of preference for ambiguous cases
+DIMENSION_TO_MODEL = {
+    384: [
+        ("all-MiniLM-L6-v2", "sentence-transformer", "Fast text embeddings"),
+    ],
+    512: [
+        ("openai/clip-vit-base-patch32", "clip", "Multi-modal (text + images)"),
+        ("paraphrase-albert-small-v2", "sentence-transformer", "Text-only paraphrase"),
+    ],
+    768: [
+        ("all-mpnet-base-v2", "sentence-transformer", "High quality text embeddings"),
+    ],
+    1024: [
+        ("all-roberta-large-v1", "sentence-transformer", "Large text embeddings"),
+    ],
+    1536: [
+        ("gtr-t5-large", "sentence-transformer", "Very large text embeddings"),
+    ],
+}
+# Default model to use when dimension is unknown or not mapped
+DEFAULT_MODEL = ("all-MiniLM-L6-v2", "sentence-transformer")
+def get_model_for_dimension(dimension: int, prefer_multimodal: bool = True) -> Tuple[str, str]:
+    """
+    Get the appropriate embedding model name and type for a given vector dimension.
+    Args:
+        dimension: The vector dimension size
+        prefer_multimodal: If True and multiple models exist for this dimension,
+                          prefer multi-modal (CLIP) over text-only models
+    Returns:
+        Tuple of (model_name, model_type) where model_type is "sentence-transformer" or "clip"
+    """
+    if dimension in DIMENSION_TO_MODEL:
+        models = DIMENSION_TO_MODEL[dimension]
+        if len(models) == 1:
+            return (models[0][0], models[0][1])
+        # Multiple models available - apply preference
+        if prefer_multimodal:
+            # Prefer CLIP/multimodal
+            for model_name, model_type, desc in models:
+                if model_type == "clip":
+                    return (model_name, model_type)
+        # Default to first option
+        return (models[0][0], models[0][1])
+    # Find the closest dimension if exact match not found
+    closest_dim = min(DIMENSION_TO_MODEL.keys(), key=lambda x: abs(x - dimension))
+    models = DIMENSION_TO_MODEL[closest_dim]
+    return (models[0][0], models[0][1])
+def get_available_models_for_dimension(dimension: int) -> list:
+    """
+    Get all available model options for a given dimension.
+    Args:
+        dimension: The vector dimension size
+    Returns:
+        List of tuples: [(model_name, model_type, description), ...]
+    """
+    if dimension in DIMENSION_TO_MODEL:
+        return DIMENSION_TO_MODEL[dimension]
+    return []
+def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTransformer, any]:
+    """
+    Load an embedding model (sentence-transformer or CLIP).
+    Args:
+        model_name: Name of the model to load
+        model_type: Type of model ("sentence-transformer" or "clip")
+    Returns:
+        Loaded model (SentenceTransformer or CLIP model)
+    """
+    if model_type == "clip":
+        from transformers import CLIPModel, CLIPProcessor
+        model = CLIPModel.from_pretrained(model_name)
+        processor = CLIPProcessor.from_pretrained(model_name)
+        return (model, processor)
+    else:
+        return SentenceTransformer(model_name)
+def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type: str) -> list:
+    """
+    Encode text using the appropriate model.
+    Args:
+        text: Text to encode
+        model: The loaded model (SentenceTransformer or (CLIPModel, CLIPProcessor) tuple)
+        model_type: Type of model ("sentence-transformer" or "clip")
+    Returns:
+        Embedding vector as a list
+    """
+    if model_type == "clip":
+        import torch
+        clip_model, processor = model
+        inputs = processor(text=[text], return_tensors="pt", padding=True)
+        with torch.no_grad():
+            text_features = clip_model.get_text_features(**inputs)
+        # Normalize the features (CLIP embeddings are typically normalized)
+        text_features = text_features / text_features.norm(dim=-1, keepdim=True)
+        return text_features[0].cpu().numpy().tolist()
+    else:
+        # sentence-transformer
+        embedding = model.encode(text)
+        return embedding.tolist()
+def get_embedding_model_for_dimension(dimension: int) -> Tuple[Union[SentenceTransformer, Tuple], str, str]:
+    """
+    Get a loaded embedding model for a specific dimension.
+    Args:
+        dimension: The vector dimension size
+    Returns:
+        Tuple of (loaded_model, model_name, model_type)
+    """
+    model_name, model_type = get_model_for_dimension(dimension)
+    model = load_embedding_model(model_name, model_type)
+    return (model, model_name, model_type)

vector_inspector/services/backup_restore_service.py CHANGED Viewed

@@ -169,36 +169,10 @@ class BackupRestoreService:
                         print(f"Failed to generate embeddings: {e}")
                         return False
-                # Convert IDs to Qdrant-compatible format (integers or UUIDs)
-                # Store original IDs in metadata
+                # Keep IDs as strings - Qdrant's _to_uuid method handles conversion
+                # Just ensure all IDs are strings
                 original_ids = data.get("ids", [])
-                qdrant_ids = []
-                metadatas = data.get("metadatas", [])
-                for i, orig_id in enumerate(original_ids):
-                    # Try to convert to integer, otherwise use index
-                    try:
-                        # If it's like "doc_123", extract the number
-                        if isinstance(orig_id, str) and "_" in orig_id:
-                            qdrant_id = int(orig_id.split("_")[-1])
-                        else:
-                            qdrant_id = int(orig_id)
-                    except (ValueError, AttributeError):
-                        # Use index as ID if can't convert
-                        qdrant_id = i
-                    qdrant_ids.append(qdrant_id)
-                    # Store original ID in metadata
-                    if i < len(metadatas):
-                        if metadatas[i] is None:
-                            metadatas[i] = {}
-                        metadatas[i]["original_id"] = orig_id
-                    else:
-                        metadatas.append({"original_id": orig_id})
-                data["ids"] = qdrant_ids
-                data["metadatas"] = metadatas
+                data["ids"] = [str(id_val) for id_val in original_ids]
             # Add items to collection
             success = connection.add_items(

vector_inspector/services/credential_service.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""Service for secure credential storage using system keychains."""
+from typing import Optional
+import json
+class CredentialService:
+    """Handles secure storage and retrieval of credentials using system keychains.
+    Falls back to in-memory storage if keyring is not available (not recommended for production).
+    """
+    SERVICE_NAME = "vector-inspector"
+    def __init__(self):
+        """Initialize credential service with keyring if available."""
+        self._use_keyring = False
+        self._memory_store = {}  # Fallback in-memory storage
+        try:
+            import keyring
+            self._keyring = keyring
+            self._use_keyring = True
+        except ImportError:
+            print("Warning: keyring module not available. Credentials will not be persisted securely.")
+            self._keyring = None
+    def store_credentials(self, profile_id: str, credentials: dict) -> bool:
+        """
+        Store credentials for a profile.
+        Args:
+            profile_id: Unique profile identifier
+            credentials: Dictionary of credential data (api_key, password, etc.)
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            credential_key = f"profile:{profile_id}"
+            credential_json = json.dumps(credentials)
+            if self._use_keyring:
+                self._keyring.set_password(
+                    self.SERVICE_NAME,
+                    credential_key,
+                    credential_json
+                )
+            else:
+                # Fallback to in-memory (not persistent)
+                self._memory_store[credential_key] = credential_json
+            return True
+        except Exception as e:
+            print(f"Failed to store credentials: {e}")
+            return False
+    def get_credentials(self, profile_id: str) -> Optional[dict]:
+        """
+        Retrieve credentials for a profile.
+        Args:
+            profile_id: Unique profile identifier
+        Returns:
+            Dictionary of credential data, or None if not found
+        """
+        try:
+            credential_key = f"profile:{profile_id}"
+            if self._use_keyring:
+                credential_json = self._keyring.get_password(
+                    self.SERVICE_NAME,
+                    credential_key
+                )
+            else:
+                # Fallback to in-memory
+                credential_json = self._memory_store.get(credential_key)
+            if credential_json:
+                return json.loads(credential_json)
+            return None
+        except Exception as e:
+            print(f"Failed to retrieve credentials: {e}")
+            return None
+    def delete_credentials(self, profile_id: str) -> bool:
+        """
+        Delete stored credentials for a profile.
+        Args:
+            profile_id: Unique profile identifier
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            credential_key = f"profile:{profile_id}"
+            if self._use_keyring:
+                try:
+                    self._keyring.delete_password(
+                        self.SERVICE_NAME,
+                        credential_key
+                    )
+                except self._keyring.errors.PasswordDeleteError:
+                    # Credential doesn't exist, that's okay
+                    pass
+            else:
+                # Fallback to in-memory
+                self._memory_store.pop(credential_key, None)
+            return True
+        except Exception as e:
+            print(f"Failed to delete credentials: {e}")
+            return False
+    def is_keyring_available(self) -> bool:
+        """Check if system keyring is available."""
+        return self._use_keyring
+    def clear_all_credentials(self):
+        """Clear all stored credentials. Use with caution!"""
+        if not self._use_keyring:
+            self._memory_store.clear()
+        else:
+            # For keyring, we'd need to track all profile IDs
+            # This is typically not needed, but can be implemented if required
+            print("Warning: clear_all_credentials not implemented for keyring backend")

vector_inspector/services/filter_service.py CHANGED Viewed

@@ -66,7 +66,7 @@ def apply_client_side_filters(data: Dict[str, Any], filters: List[Dict[str, Any]
         "metadatas": [metadatas[i] for i in keep_indices if i < len(metadatas)],
     }
-    if embeddings:
+    if embeddings is not None and len(embeddings) > 0:
         filtered_data["embeddings"] = [embeddings[i] for i in keep_indices if i < len(embeddings)]
     return filtered_data

vector-inspector 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

vector-inspector 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl