PyPI - vector-inspector - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

vector-inspector 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

vector_inspector/core/connection_manager.py +55 -49
vector_inspector/core/connections/base_connection.py +41 -41
vector_inspector/core/connections/chroma_connection.py +110 -86
vector_inspector/core/connections/pinecone_connection.py +168 -182
vector_inspector/core/connections/qdrant_connection.py +109 -126
vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
vector_inspector/core/connections/template_connection.py +55 -65
vector_inspector/core/embedding_utils.py +32 -32
vector_inspector/core/logging.py +27 -0
vector_inspector/core/model_registry.py +4 -3
vector_inspector/main.py +6 -2
vector_inspector/services/backup_helpers.py +63 -0
vector_inspector/services/backup_restore_service.py +73 -152
vector_inspector/services/credential_service.py +33 -40
vector_inspector/services/import_export_service.py +70 -67
vector_inspector/services/profile_service.py +92 -94
vector_inspector/services/settings_service.py +68 -48
vector_inspector/services/visualization_service.py +40 -39
vector_inspector/ui/components/splash_window.py +57 -0
vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
vector_inspector/ui/main_window.py +200 -146
vector_inspector/ui/views/info_panel.py +208 -127
vector_inspector/ui/views/metadata_view.py +8 -7
vector_inspector/ui/views/search_view.py +97 -75
vector_inspector/ui/views/visualization_view.py +140 -97
vector_inspector/utils/version.py +5 -0
{vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/METADATA +10 -2
{vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/RECORD +32 -25
{vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/WHEEL +0 -0
{vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/entry_points.txt +0 -0

vector_inspector/core/connections/pinecone_connection.py CHANGED Viewed

@@ -6,20 +6,18 @@ from pinecone import Pinecone, ServerlessSpec
 from pinecone.exceptions import PineconeException
 from .base_connection import VectorDBConnection
+from vector_inspector.core.logging import log_error
 class PineconeConnection(VectorDBConnection):
     """Manages connection to Pinecone and provides query interface."""
     def __init__(
-        self,
-        api_key: str,
-        environment: Optional[str] = None,
-        index_host: Optional[str] = None
+        self, api_key: str, environment: Optional[str] = None, index_host: Optional[str] = None
     ):
         """
         Initialize Pinecone connection.
         Args:
             api_key: Pinecone API key
             environment: Pinecone environment (optional, auto-detected)
@@ -31,41 +29,41 @@ class PineconeConnection(VectorDBConnection):
         self._client: Optional[Pinecone] = None
         self._current_index = None
         self._current_index_name: Optional[str] = None
     def connect(self) -> bool:
         """
         Establish connection to Pinecone.
         Returns:
             True if connection successful, False otherwise
         """
         try:
             # Initialize Pinecone client
             self._client = Pinecone(api_key=self.api_key)
             # Test connection by listing indexes
             self._client.list_indexes()
             return True
         except Exception as e:
-            print(f"Connection failed: {e}")
+            log_error("Connection failed: %s", e)
             self._client = None  # Reset client on failure
             return False
     def disconnect(self):
         """Close connection to Pinecone."""
         self._client = None
         self._current_index = None
         self._current_index_name = None
     @property
     def is_connected(self) -> bool:
         """Check if connected to Pinecone."""
         return self._client is not None
     def list_collections(self) -> List[str]:
         """
         Get list of all indexes (collections in Pinecone terminology).
         Returns:
             List of index names
         """
@@ -75,14 +73,14 @@ class PineconeConnection(VectorDBConnection):
             indexes = self._client.list_indexes()
             return [str(idx.name) for idx in indexes]  # type: ignore
         except Exception as e:
-            print(f"Failed to list indexes: {e}")
+            log_error("Failed to list indexes: %s", e)
             return []
     def _get_index(self, name: str):
         """Get or create index reference."""
         if not self._client:
             return None
         try:
             # Cache the current index to avoid repeated lookups
             if self._current_index_name != name:
@@ -90,38 +88,38 @@ class PineconeConnection(VectorDBConnection):
                 self._current_index_name = name
             return self._current_index
         except Exception as e:
-            print(f"Failed to get index: {e}")
+            log_error("Failed to get index: %s", e)
             return None
     def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
         """
         Get index metadata and statistics.
         Args:
             name: Index name
         Returns:
             Dictionary with index info
         """
         if not self._client:
             return None
         try:
             # Get index description
             index_description = self._client.describe_index(name)
             # Get index stats
             index = self._get_index(name)
             if not index:
                 return None
             stats = index.describe_index_stats()
             # Extract information
-            total_vector_count = stats.get('total_vector_count', 0)
+            total_vector_count = stats.get("total_vector_count", 0)
             dimension = index_description.dimension
             metric = index_description.metric
             # Get metadata fields from a sample query (if vectors exist)
             metadata_fields = []
             if total_vector_count > 0:
@@ -129,46 +127,50 @@ class PineconeConnection(VectorDBConnection):
                     # Query for a small sample to see metadata structure
                     dimension_val = int(dimension) if dimension else 0
                     sample_query = index.query(
-                        vector=[0.0] * dimension_val,
-                        top_k=1,
-                        include_metadata=True
+                        vector=[0.0] * dimension_val, top_k=1, include_metadata=True
                     )
-                    if hasattr(sample_query, 'matches') and sample_query.matches:  # type: ignore
+                    if hasattr(sample_query, "matches") and sample_query.matches:  # type: ignore
                         metadata = sample_query.matches[0].metadata  # type: ignore
                         if metadata:
                             metadata_fields = list(metadata.keys())
                 except Exception:
                     pass  # Metadata fields will remain empty
             return {
                 "name": name,
                 "count": total_vector_count,
                 "metadata_fields": metadata_fields,
                 "vector_dimension": dimension,
                 "distance_metric": str(metric).upper() if metric else "UNKNOWN",
-                "host": str(index_description.host) if hasattr(index_description, 'host') else "N/A",
-                "status": index_description.status.get('state', 'unknown') if hasattr(index_description.status, 'get') else str(index_description.status),  # type: ignore
-                "spec": str(index_description.spec) if hasattr(index_description, 'spec') else "N/A",
+                "host": str(index_description.host)
+                if hasattr(index_description, "host")
+                else "N/A",
+                "status": index_description.status.get("state", "unknown")
+                if hasattr(index_description.status, "get")
+                else str(index_description.status),  # type: ignore
+                "spec": str(index_description.spec)
+                if hasattr(index_description, "spec")
+                else "N/A",
             }
         except Exception as e:
-            print(f"Failed to get index info: {e}")
+            log_error("Failed to get index info: %s", e)
             return None
     def create_collection(self, name: str, vector_size: int, distance: str = "Cosine") -> bool:
         """
         Create a new index.
         Args:
             name: Index name
             vector_size: Dimension of vectors
             distance: Distance metric (Cosine, Euclidean, DotProduct)
         Returns:
             True if successful, False otherwise
         """
         if not self._client:
             return False
         try:
             # Map distance names to Pinecone metrics
             metric_map = {
@@ -178,33 +180,34 @@ class PineconeConnection(VectorDBConnection):
                 "dot": "dotproduct",
             }
             metric = metric_map.get(distance.lower(), "cosine")
             # Create serverless index (default configuration)
             self._client.create_index(
                 name=name,
                 dimension=vector_size,
                 metric=metric,
-                spec=ServerlessSpec(
-                    cloud='aws',
-                    region='us-east-1'
-                )
+                spec=ServerlessSpec(cloud="aws", region="us-east-1"),
             )
             # Wait for index to be ready
             max_wait = 60  # seconds
             start_time = time.time()
             while time.time() - start_time < max_wait:
                 desc = self._client.describe_index(name)
-                status = desc.status.get('state', 'unknown') if hasattr(desc.status, 'get') else str(desc.status)  # type: ignore
-                if status.lower() == 'ready':
+                status = (
+                    desc.status.get("state", "unknown")
+                    if hasattr(desc.status, "get")
+                    else str(desc.status)
+                )  # type: ignore
+                if status.lower() == "ready":
                     return True
                 time.sleep(2)
             return False
         except Exception as e:
-            print(f"Failed to create index: {e}")
+            log_error("Failed to create index: %s", e)
             return False
     def add_items(
         self,
         collection_name: str,
@@ -215,111 +218,107 @@ class PineconeConnection(VectorDBConnection):
     ) -> bool:
         """
         Add items to an index.
         Args:
             collection_name: Name of index
             documents: Document texts (stored in metadata)
             metadatas: Metadata for each vector
             ids: IDs for each vector
             embeddings: Pre-computed embeddings (required for Pinecone)
         Returns:
             True if successful, False otherwise
         """
         if not embeddings:
-            print("Embeddings are required for Pinecone")
+            log_error("Embeddings are required for Pinecone")
             return False
         index = self._get_index(collection_name)
         if not index:
             return False
         try:
             # Generate IDs if not provided
             if not ids:
                 ids = [f"vec_{i}" for i in range(len(embeddings))]
             # Prepare vectors for upsert
             vectors = []
             for i, embedding in enumerate(embeddings):
                 metadata = {}
                 if metadatas and i < len(metadatas):
                     metadata = metadatas[i].copy()
                 # Add document text to metadata
                 if documents and i < len(documents):
-                    metadata['document'] = documents[i]
-                vectors.append({
-                    'id': ids[i],
-                    'values': embedding,
-                    'metadata': metadata
-                })
+                    metadata["document"] = documents[i]
+                vectors.append({"id": ids[i], "values": embedding, "metadata": metadata})
             # Upsert in batches of 100 (Pinecone limit)
             batch_size = 100
             for i in range(0, len(vectors), batch_size):
-                batch = vectors[i:i + batch_size]
+                batch = vectors[i : i + batch_size]
                 index.upsert(vectors=batch)
             return True
         except Exception as e:
-            print(f"Failed to add items: {e}")
+            log_error("Failed to add items: %s", e)
             return False
     def get_items(self, name: str, ids: List[str]) -> Dict[str, Any]:
         """
         Retrieve items by IDs.
         Args:
             name: Index name
             ids: List of vector IDs
         Returns:
             Dictionary with documents and metadatas
         """
         index = self._get_index(name)
         if not index:
             return {"documents": [], "metadatas": []}
         try:
             # Fetch vectors
             result = index.fetch(ids=ids)
             documents = []
             metadatas = []
             for vid in ids:
                 if vid in result.vectors:
                     vector_data = result.vectors[vid]
                     metadata = vector_data.metadata or {}
                     # Extract document from metadata
-                    doc = metadata.pop('document', '')
+                    doc = metadata.pop("document", "")
                     documents.append(doc)
                     metadatas.append(metadata)
                 else:
-                    documents.append('')
+                    documents.append("")
                     metadatas.append({})
             return {"documents": documents, "metadatas": metadatas}
         except Exception as e:
-            print(f"Failed to get items: {e}")
+            log_error("Failed to get items: %s", e)
             return {"documents": [], "metadatas": []}
     def delete_collection(self, name: str) -> bool:
         """
         Delete an index.
         Args:
             name: Index name
         Returns:
             True if successful, False otherwise
         """
         if not self._client:
             return False
         try:
             self._client.delete_index(name)
             if self._current_index_name == name:
@@ -327,29 +326,29 @@ class PineconeConnection(VectorDBConnection):
                 self._current_index_name = None
             return True
         except Exception as e:
-            print(f"Failed to delete index: {e}")
+            log_error("Failed to delete index: %s", e)
             return False
     def count_collection(self, name: str) -> int:
         """
         Return the number of vectors in the index.
         Args:
             name: Index name
         Returns:
             Number of vectors
         """
         index = self._get_index(name)
         if not index:
             return 0
         try:
             stats = index.describe_index_stats()
-            return stats.get('total_vector_count', 0)
+            return stats.get("total_vector_count", 0)
         except Exception:
             return 0
     def _get_embedding_function_for_collection(self, collection_name: str):
         """
         Returns embedding function and model type for a given collection, matching ChromaDB/Qdrant API.
@@ -363,6 +362,7 @@ class PineconeConnection(VectorDBConnection):
         # Prefer user-configured model for this collection
         from vector_inspector.services.settings_service import SettingsService
         model = None
         model_type: str = "sentence-transformer"
         if hasattr(self, "connection_id") and collection_name:
@@ -370,12 +370,14 @@ class PineconeConnection(VectorDBConnection):
             cfg = settings.get_embedding_model(getattr(self, "connection_id", ""), collection_name)
             if cfg and cfg.get("model") and cfg.get("type"):
                 from vector_inspector.core.embedding_utils import load_embedding_model
                 model = load_embedding_model(cfg["model"], cfg["type"])
                 model_type = str(cfg["type"]) or "sentence-transformer"
         # Fallback to dimension-based model if none configured
         if model is None:
             from vector_inspector.core.embedding_utils import get_embedding_model_for_dimension
             if dim_int is None:
                 dim_int = 384  # default for MiniLM
             loaded_model, _, inferred_type = get_embedding_model_for_dimension(dim_int)
@@ -383,6 +385,7 @@ class PineconeConnection(VectorDBConnection):
             model_type = str(inferred_type) or "sentence-transformer"
         from vector_inspector.core.embedding_utils import encode_text
         def embedding_fn(text: str):
             return encode_text(text, model, model_type)
@@ -399,7 +402,7 @@ class PineconeConnection(VectorDBConnection):
     ) -> Optional[Dict[str, Any]]:
         """
         Query an index for similar vectors.
         Args:
             collection_name: Name of index
             query_texts: Text queries (will be embedded if provided)
@@ -418,13 +421,13 @@ class PineconeConnection(VectorDBConnection):
             query_texts = None
         if not query_embeddings:
-            print("Query embeddings are required for Pinecone")
+            log_error("Query embeddings are required for Pinecone")
             return None
         index = self._get_index(collection_name)
         if not index:
             return None
         try:
             # Pinecone queries one vector at a time
             all_ids = []
@@ -432,54 +435,54 @@ class PineconeConnection(VectorDBConnection):
             all_documents = []
             all_metadatas = []
             all_embeddings = []
             for query_vector in query_embeddings:
                 # Build filter if provided
                 filter_dict = None
                 if where:
                     filter_dict = self._convert_filter(where)
                 result = index.query(
                     vector=query_vector,
                     top_k=n_results,
                     include_metadata=True,
                     include_values=True,
-                    filter=filter_dict
+                    filter=filter_dict,
                 )
                 # Extract results
                 ids = []
                 distances = []
                 documents = []
                 metadatas = []
                 embeddings = []
-                if hasattr(result, 'matches'):
+                if hasattr(result, "matches"):
                     for match in result.matches:  # type: ignore
                         ids.append(match.id)  # type: ignore
                         # Convert similarity to distance for cosine metric
-                        score = getattr(match, 'score', None)
+                        score = getattr(match, "score", None)
                         if score is not None:
                             distances.append(1.0 - score)
                         else:
                             distances.append(None)
                         metadata = match.metadata or {}  # type: ignore
-                        doc = metadata.pop('document', '')
+                        doc = metadata.pop("document", "")
                         documents.append(doc)
                         metadatas.append(metadata)
-                        if hasattr(match, 'values') and match.values:  # type: ignore
+                        if hasattr(match, "values") and match.values:  # type: ignore
                             embeddings.append(match.values)  # type: ignore
                         else:
                             embeddings.append([])
                 all_ids.append(ids)
                 all_distances.append(distances)
                 all_documents.append(documents)
                 all_metadatas.append(metadatas)
                 all_embeddings.append(embeddings)
             return {
                 "ids": all_ids,
                 "distances": all_distances,
@@ -488,21 +491,21 @@ class PineconeConnection(VectorDBConnection):
                 "embeddings": all_embeddings,
             }
         except Exception as e:
-            print(f"Query failed: {e}")
             import traceback
-            traceback.print_exc()
+            log_error("Query failed: %s\n%s", e, traceback.format_exc())
             return None
     def _convert_filter(self, where: Dict[str, Any]) -> Dict[str, Any]:
         """
         Convert generic filter to Pinecone filter format.
         Pinecone supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin
         """
         # Simple conversion - map field equality
         # For more complex filters, this would need expansion
         pinecone_filter = {}
         for key, value in where.items():
             if isinstance(value, dict):
                 # Handle operator-based filters
@@ -510,9 +513,9 @@ class PineconeConnection(VectorDBConnection):
             else:
                 # Simple equality
                 pinecone_filter[key] = {"$eq": value}
         return pinecone_filter
     def get_all_items(
         self,
         collection_name: str,
@@ -522,100 +525,90 @@ class PineconeConnection(VectorDBConnection):
     ) -> Optional[Dict[str, Any]]:
         """
         Get all items from an index using pagination.
         Note: Uses Pinecone's list() method which returns a generator of ID lists.
         Offset-based pagination is simulated by skipping items.
         Args:
             collection_name: Name of index
             limit: Maximum number of items to return
             offset: Number of items to skip
             where: Metadata filter (not supported in list operation)
         Returns:
             Index items or None if failed
         """
         index = self._get_index(collection_name)
         if not index:
             return None
         try:
             ids_to_fetch = []
             items_collected = 0
             items_skipped = 0
             target_offset = offset or 0
             target_limit = limit or 100
             # list() returns a generator that yields lists of IDs
             for id_list in index.list():  # type: ignore
                 if not id_list:
                     continue
                 # Handle offset by skipping items
                 for vid in id_list:
                     if items_skipped < target_offset:
                         items_skipped += 1
                         continue
                     if items_collected < target_limit:
                         ids_to_fetch.append(vid)
                         items_collected += 1
                     else:
                         break
                 # Stop if we have enough
                 if items_collected >= target_limit:
                     break
             # If no IDs found, return empty result
             if not ids_to_fetch:
-                return {
-                    "ids": [],
-                    "documents": [],
-                    "metadatas": [],
-                    "embeddings": []
-                }
+                return {"ids": [], "documents": [], "metadatas": [], "embeddings": []}
             # Fetch the actual vector data in batches (Pinecone fetch limit is 1000)
             batch_size = 1000
             all_ids = []
             all_documents = []
             all_metadatas = []
             all_embeddings = []
             for i in range(0, len(ids_to_fetch), batch_size):
-                batch_ids = ids_to_fetch[i:i + batch_size]
+                batch_ids = ids_to_fetch[i : i + batch_size]
                 fetch_result = index.fetch(ids=batch_ids)
                 for vid in batch_ids:
                     if vid in fetch_result.vectors:
                         vector_data = fetch_result.vectors[vid]
                         all_ids.append(vid)
                         metadata = vector_data.metadata.copy() if vector_data.metadata else {}
-                        doc = metadata.pop('document', '')
+                        doc = metadata.pop("document", "")
                         all_documents.append(doc)
                         all_metadatas.append(metadata)
                         all_embeddings.append(vector_data.values)
             return {
                 "ids": all_ids,
                 "documents": all_documents,
                 "metadatas": all_metadatas,
-                "embeddings": all_embeddings
+                "embeddings": all_embeddings,
             }
         except Exception as e:
-            print(f"Failed to get all items: {e}")
             import traceback
-            traceback.print_exc()
-            return {
-                "ids": [],
-                "documents": [],
-                "metadatas": [],
-                "embeddings": []
-            }
+            log_error("Failed to get all items: %s\n%s", e, traceback.format_exc())
+            return {"ids": [], "documents": [], "metadatas": [], "embeddings": []}
     def update_items(
         self,
         collection_name: str,
@@ -626,27 +619,27 @@ class PineconeConnection(VectorDBConnection):
     ) -> bool:
         """
         Update items in an index.
         Note: Pinecone updates via upsert (add_items can be used)
         Args:
             collection_name: Name of index
             ids: IDs of items to update
             documents: New document texts
             metadatas: New metadata
             embeddings: New embeddings
         Returns:
             True if successful, False otherwise
         """
         index = self._get_index(collection_name)
         if not index:
             return False
         try:
             # Fetch existing vectors to preserve data not being updated
             existing = index.fetch(ids=ids)
             vectors = []
             for i, vid in enumerate(ids):
                 # Start with existing data
@@ -660,32 +653,28 @@ class PineconeConnection(VectorDBConnection):
                         continue
                     values = embeddings[i]
                     metadata = {}
                 # Update metadata
                 if metadatas and i < len(metadatas):
                     metadata.update(metadatas[i])
                 # Update document
                 if documents and i < len(documents):
-                    metadata['document'] = documents[i]
-                vectors.append({
-                    'id': vid,
-                    'values': values,
-                    'metadata': metadata
-                })
+                    metadata["document"] = documents[i]
+                vectors.append({"id": vid, "values": values, "metadata": metadata})
             # Upsert in batches
             batch_size = 100
             for i in range(0, len(vectors), batch_size):
-                batch = vectors[i:i + batch_size]
+                batch = vectors[i : i + batch_size]
                 index.upsert(vectors=batch)
             return True
         except Exception as e:
-            print(f"Failed to update items: {e}")
+            log_error("Failed to update items: %s", e)
             return False
     def delete_items(
         self,
         collection_name: str,
@@ -694,19 +683,19 @@ class PineconeConnection(VectorDBConnection):
     ) -> bool:
         """
         Delete items from an index.
         Args:
             collection_name: Name of index
             ids: IDs of items to delete
             where: Metadata filter for items to delete
         Returns:
             True if successful, False otherwise
         """
         index = self._get_index(collection_name)
         if not index:
             return False
         try:
             if ids:
                 # Delete by IDs
@@ -718,24 +707,21 @@ class PineconeConnection(VectorDBConnection):
             else:
                 # Delete all (use with caution)
                 index.delete(delete_all=True)
             return True
         except Exception as e:
-            print(f"Failed to delete items: {e}")
+            log_error("Failed to delete items: %s", e)
             return False
     def get_connection_info(self) -> Dict[str, Any]:
         """
         Get information about the current connection.
         Returns:
             Dictionary with connection details
         """
-        info = {
-            "provider": "Pinecone",
-            "connected": self.is_connected
-        }
+        info = {"provider": "Pinecone", "connected": self.is_connected}
         if self.is_connected and self._client:
             try:
                 # Get account/environment info if available
@@ -743,13 +729,13 @@ class PineconeConnection(VectorDBConnection):
                 info["index_count"] = len(indexes)
             except Exception:
                 pass
         return info
     def get_supported_filter_operators(self) -> List[Dict[str, Any]]:
         """
         Get filter operators supported by Pinecone.
         Returns:
             List of operator dictionaries
         """

vector-inspector 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

vector-inspector 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl