PyPI - vector-inspector - Versions diffs - 0.2.6__tar.gz → 0.3.1__tar.gz - Mend

vector-inspector 0.2.6tar.gz → 0.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{vector_inspector-0.2.6 → vector_inspector-0.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vector-inspector
-Version: 0.2.6
+Version: 0.3.1
 Summary: A comprehensive desktop application for visualizing, querying, and managing vector database data
 Author-Email: Anthony Dawson <anthonypdawson+github@gmail.com>
 License: MIT
@@ -22,6 +22,8 @@ Requires-Dist: sentence-transformers>=2.2.0
 Requires-Dist: fastembed>=0.7.4
 Requires-Dist: pyarrow>=14.0.0
 Requires-Dist: pinecone>=8.0.0
+Requires-Dist: keyring>=25.7.0
+Requires-Dist: hf-xet>=1.2.0
 Description-Content-Type: text/markdown

{vector_inspector-0.2.6 → vector_inspector-0.3.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "vector-inspector"
-version = "0.2.6"
+version = "0.3.1"
 description = "A comprehensive desktop application for visualizing, querying, and managing vector database data"
 authors = [
     { name = "Anthony Dawson", email = "anthonypdawson+github@gmail.com" },
@@ -19,6 +19,8 @@ dependencies = [
     "fastembed>=0.7.4",
     "pyarrow>=14.0.0",
     "pinecone>=8.0.0",
+    "keyring>=25.7.0",
+    "hf-xet>=1.2.0",
 ]
 requires-python = ">=3.10,<3.13"
 readme = "README.md"

vector_inspector-0.3.1/src/vector_inspector/config/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""Data package for Vector Inspector.
+Contains static data files like the model registry.
+"""

vector_inspector-0.3.1/src/vector_inspector/config/known_embedding_models.json ADDED Viewed

@@ -0,0 +1,432 @@
+{
+    "models": [
+        {
+            "name": "all-MiniLM-L6-v2",
+            "type": "sentence-transformer",
+            "dimension": 384,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Fast, small-footprint text embeddings (good default for text search)"
+        },
+        {
+            "name": "openai/clip-vit-base-patch32",
+            "type": "clip",
+            "dimension": 512,
+            "modality": "multimodal",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Standard CLIP ViT-B/32 model; supports matching text ↔ images"
+        },
+        {
+            "name": "paraphrase-albert-small-v2",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Smaller paraphrase-specialized model"
+        },
+        {
+            "name": "all-mpnet-base-v2",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "High-quality text embeddings; recommended for semantic tasks"
+        },
+        {
+            "name": "all-roberta-large-v1",
+            "type": "sentence-transformer",
+            "dimension": 1024,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Large model — high quality, larger memory and compute"
+        },
+        {
+            "name": "gtr-t5-large",
+            "type": "sentence-transformer",
+            "dimension": 1536,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Very large embeddings useful for specialized high-recall tasks"
+        },
+        {
+            "name": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
+            "type": "sentence-transformer",
+            "dimension": 384,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Optimized for semantic search and question-answering tasks"
+        },
+        {
+            "name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+            "type": "sentence-transformer",
+            "dimension": 384,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Multilingual support for 50+ languages"
+        },
+        {
+            "name": "sentence-transformers/msmarco-distilbert-base-v4",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Trained on MS MARCO dataset, good for passage retrieval"
+        },
+        {
+            "name": "sentence-transformers/all-distilroberta-v1",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Distilled RoBERTa model, balance of speed and quality"
+        },
+        {
+            "name": "sentence-transformers/paraphrase-mpnet-base-v2",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "High-quality paraphrase detection and semantic similarity"
+        },
+        {
+            "name": "BAAI/bge-small-en-v1.5",
+            "type": "sentence-transformer",
+            "dimension": 384,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Beijing Academy of AI model, strong performance for size"
+        },
+        {
+            "name": "BAAI/bge-base-en-v1.5",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "High-quality English embeddings, MTEB benchmark leader"
+        },
+        {
+            "name": "BAAI/bge-large-en-v1.5",
+            "type": "sentence-transformer",
+            "dimension": 1024,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Large model with excellent retrieval performance"
+        },
+        {
+            "name": "thenlper/gte-small",
+            "type": "sentence-transformer",
+            "dimension": 384,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "General Text Embeddings (GTE) small variant"
+        },
+        {
+            "name": "thenlper/gte-base",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "General Text Embeddings (GTE) base model"
+        },
+        {
+            "name": "thenlper/gte-large",
+            "type": "sentence-transformer",
+            "dimension": 1024,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "General Text Embeddings (GTE) large variant"
+        },
+        {
+            "name": "intfloat/e5-small-v2",
+            "type": "sentence-transformer",
+            "dimension": 384,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "E5 family small model, prefix with 'query: ' or 'passage: '"
+        },
+        {
+            "name": "intfloat/e5-base-v2",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "E5 family base model, strong asymmetric retrieval"
+        },
+        {
+            "name": "intfloat/e5-large-v2",
+            "type": "sentence-transformer",
+            "dimension": 1024,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "E5 family large model, top MTEB performance"
+        },
+        {
+            "name": "intfloat/multilingual-e5-small",
+            "type": "sentence-transformer",
+            "dimension": 384,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Multilingual E5 model supporting 100+ languages"
+        },
+        {
+            "name": "intfloat/multilingual-e5-base",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Multilingual E5 base model, excellent cross-lingual retrieval"
+        },
+        {
+            "name": "intfloat/multilingual-e5-large",
+            "type": "sentence-transformer",
+            "dimension": 1024,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Multilingual E5 large model, best-in-class multilingual embeddings"
+        },
+        {
+            "name": "openai/clip-vit-large-patch14",
+            "type": "clip",
+            "dimension": 768,
+            "modality": "multimodal",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Larger CLIP ViT-L/14 model, better quality than base"
+        },
+        {
+            "name": "openai/clip-vit-large-patch14-336",
+            "type": "clip",
+            "dimension": 768,
+            "modality": "multimodal",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Higher resolution (336x336) variant of ViT-L/14"
+        },
+        {
+            "name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+            "type": "clip",
+            "dimension": 512,
+            "modality": "multimodal",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "LAION's CLIP trained on 2B image-text pairs"
+        },
+        {
+            "name": "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
+            "type": "clip",
+            "dimension": 1024,
+            "modality": "multimodal",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "LAION's huge CLIP model, excellent quality"
+        },
+        {
+            "name": "text-embedding-ada-002",
+            "type": "openai",
+            "dimension": 1536,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "openai-api",
+            "description": "OpenAI's production embedding model (legacy). Requires API key."
+        },
+        {
+            "name": "text-embedding-3-small",
+            "type": "openai",
+            "dimension": 1536,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "openai-api",
+            "description": "OpenAI's newer small model, better than ada-002. Requires API key."
+        },
+        {
+            "name": "text-embedding-3-large",
+            "type": "openai",
+            "dimension": 3072,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "openai-api",
+            "description": "OpenAI's large embedding model, highest quality. Requires API key."
+        },
+        {
+            "name": "embed-english-v3.0",
+            "type": "cohere",
+            "dimension": 1024,
+            "modality": "text",
+            "normalization": "none",
+            "source": "cohere-api",
+            "description": "Cohere's English embedding model. Requires API key."
+        },
+        {
+            "name": "embed-english-light-v3.0",
+            "type": "cohere",
+            "dimension": 384,
+            "modality": "text",
+            "normalization": "none",
+            "source": "cohere-api",
+            "description": "Cohere's lightweight English model. Requires API key."
+        },
+        {
+            "name": "embed-multilingual-v3.0",
+            "type": "cohere",
+            "dimension": 1024,
+            "modality": "text",
+            "normalization": "none",
+            "source": "cohere-api",
+            "description": "Cohere's multilingual model supporting 100+ languages. Requires API key."
+        },
+        {
+            "name": "embed-multilingual-light-v3.0",
+            "type": "cohere",
+            "dimension": 384,
+            "modality": "text",
+            "normalization": "none",
+            "source": "cohere-api",
+            "description": "Cohere's lightweight multilingual model. Requires API key."
+        },
+        {
+            "name": "textembedding-gecko@003",
+            "type": "vertex-ai",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "google-cloud",
+            "description": "Google's Gecko model for text embeddings. Requires Google Cloud credentials."
+        },
+        {
+            "name": "text-embedding-004",
+            "type": "vertex-ai",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "google-cloud",
+            "description": "Google's latest text embedding model. Requires Google Cloud credentials."
+        },
+        {
+            "name": "text-multilingual-embedding-002",
+            "type": "vertex-ai",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "google-cloud",
+            "description": "Google's multilingual embedding model. Requires Google Cloud credentials."
+        },
+        {
+            "name": "multimodalembedding@001",
+            "type": "vertex-ai",
+            "dimension": 1408,
+            "modality": "multimodal",
+            "normalization": "l2",
+            "source": "google-cloud",
+            "description": "Google's multimodal embedding model. Requires Google Cloud credentials."
+        },
+        {
+            "name": "voyage-large-2",
+            "type": "voyage",
+            "dimension": 1536,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "voyage-api",
+            "description": "Voyage AI's large model. Requires API key."
+        },
+        {
+            "name": "voyage-code-2",
+            "type": "voyage",
+            "dimension": 1536,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "voyage-api",
+            "description": "Voyage AI's code-optimized model. Requires API key."
+        },
+        {
+            "name": "voyage-2",
+            "type": "voyage",
+            "dimension": 1024,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "voyage-api",
+            "description": "Voyage AI's general-purpose model. Requires API key."
+        },
+        {
+            "name": "jinaai/jina-embeddings-v2-base-en",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Jina AI's 8k context length model, good for long documents"
+        },
+        {
+            "name": "jinaai/jina-embeddings-v2-small-en",
+            "type": "sentence-transformer",
+            "dimension": 512,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Jina AI's small model with 8k context length"
+        },
+        {
+            "name": "nomic-ai/nomic-embed-text-v1",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Nomic's open-source text embedding model with 8k context"
+        },
+        {
+            "name": "nomic-ai/nomic-embed-text-v1.5",
+            "type": "sentence-transformer",
+            "dimension": 768,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Nomic's improved model with better performance"
+        },
+        {
+            "name": "Alibaba-NLP/gte-Qwen2-7B-instruct",
+            "type": "sentence-transformer",
+            "dimension": 3584,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "Very large instruction-following embedding model, SOTA on many benchmarks"
+        },
+        {
+            "name": "nvidia/NV-Embed-v1",
+            "type": "sentence-transformer",
+            "dimension": 4096,
+            "modality": "text",
+            "normalization": "l2",
+            "source": "huggingface",
+            "description": "NVIDIA's embedding model, excellent for retrieval tasks"
+        }
+    ],
+    "metadata": {
+        "version": "1.0.0",
+        "last_updated": "2026-01-24",
+        "description": "Known embedding models registry for Vector Inspector"
+    }
+}

vector_inspector-0.3.1/src/vector_inspector/core/cache_manager.py ADDED Viewed

@@ -0,0 +1,159 @@
+"""
+Cache manager for storing databrowser and search panel state by database and collection.
+Provides fast switching between collections with automatic invalidation on refresh or settings changes.
+"""
+from typing import Dict, Any, Optional, Tuple
+from dataclasses import dataclass, field
+from datetime import datetime
+@dataclass
+class CacheEntry:
+    """Represents a cached state for a specific database and collection."""
+    data: Any
+    timestamp: datetime = field(default_factory=datetime.now)
+    # Browser state
+    scroll_position: int = 0
+    selected_indices: list = field(default_factory=list)
+    # Search panel state
+    search_query: str = ""
+    search_filters: Dict[str, Any] = field(default_factory=dict)
+    search_results: Optional[Any] = None
+    # User inputs
+    user_inputs: Dict[str, Any] = field(default_factory=dict)
+class CacheManager:
+    """
+    Manages cache for databrowser and search panel by (database, collection) key.
+    Supports invalidation on refresh or settings changes.
+    """
+    def __init__(self):
+        self._cache: Dict[Tuple[str, str], CacheEntry] = {}
+        self._enabled = True
+    def get(self, database: str, collection: str) -> Optional[CacheEntry]:
+        """Retrieve cached entry for a database and collection."""
+        if not self._enabled:
+            return None
+        key = (database, collection)
+        return self._cache.get(key)
+    def set(self, database: str, collection: str, entry: CacheEntry) -> None:
+        """Store a cache entry for a database and collection."""
+        if not self._enabled:
+            return
+        key = (database, collection)
+        entry.timestamp = datetime.now()
+        self._cache[key] = entry
+    def update(self, database: str, collection: str, **kwargs) -> None:
+        """Update specific fields in an existing cache entry."""
+        key = (database, collection)
+        if key in self._cache:
+            entry = self._cache[key]
+            for field_name, value in kwargs.items():
+                if hasattr(entry, field_name):
+                    setattr(entry, field_name, value)
+            entry.timestamp = datetime.now()
+        else:
+            # Create new entry with provided fields
+            entry = CacheEntry(data=None)
+            for field_name, value in kwargs.items():
+                if hasattr(entry, field_name):
+                    setattr(entry, field_name, value)
+            self._cache[key] = entry
+    def invalidate(self, database: Optional[str] = None, collection: Optional[str] = None) -> None:
+        """
+        Invalidate cache entries.
+        - If both database and collection are provided, invalidate that specific entry.
+        - If only database is provided, invalidate all collections in that database.
+        - If neither is provided, invalidate all entries (global refresh).
+        """
+        if database is None and collection is None:
+            # Clear all cache
+            self._cache.clear()
+        elif collection is None and database is not None:
+            # Clear all collections in the specified database
+            keys_to_remove = [key for key in self._cache.keys() if key[0] == database]
+            for key in keys_to_remove:
+                del self._cache[key]
+        elif database is not None and collection is not None:
+            # Clear specific database/collection combination
+            key = (database, collection)
+            if key in self._cache:
+                del self._cache[key]
+    def clear(self) -> None:
+        """Clear all cached entries."""
+        self._cache.clear()
+    def enable(self) -> None:
+        """Enable caching."""
+        self._enabled = True
+    def disable(self) -> None:
+        """Disable caching and clear all entries."""
+        self._enabled = False
+        self._cache.clear()
+    def is_enabled(self) -> bool:
+        """Check if caching is enabled."""
+        return self._enabled
+    def get_cache_info(self) -> Dict[str, Any]:
+        """Get information about the current cache state."""
+        return {
+            "enabled": self._enabled,
+            "entry_count": len(self._cache),
+            "entries": [
+                {
+                    "database": db,
+                    "collection": coll,
+                    "timestamp": entry.timestamp.isoformat(),
+                    "has_data": entry.data is not None,
+                    "has_search_results": entry.search_results is not None,
+                }
+                for (db, coll), entry in self._cache.items()
+            ]
+        }
+# Global cache manager instance
+_cache_manager: Optional[CacheManager] = None
+def get_cache_manager() -> CacheManager:
+    """Get or create the global cache manager instance."""
+    global _cache_manager
+    if _cache_manager is None:
+        _cache_manager = CacheManager()
+        # Initialize from settings
+        try:
+            from vector_inspector.services.settings_service import SettingsService
+            settings = SettingsService()
+            if not settings.get_cache_enabled():
+                _cache_manager.disable()
+        except Exception:
+            # If settings can't be loaded, default to enabled
+            pass
+    return _cache_manager
+def invalidate_cache_on_settings_change() -> None:
+    """Invalidate all cache when settings change."""
+    cache = get_cache_manager()
+    cache.invalidate()
+def invalidate_cache_on_refresh(database: Optional[str] = None, collection: Optional[str] = None) -> None:
+    """Invalidate cache on refresh action."""
+    cache = get_cache_manager()
+    cache.invalidate(database, collection)

vector-inspector 0.2.6__tar.gz → 0.3.1__tar.gz

vector-inspector 0.2.6tar.gz → 0.3.1tar.gz