vector-inspector 0.2.6__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/PKG-INFO +3 -1
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/pyproject.toml +3 -1
- vector_inspector-0.2.7/src/vector_inspector/core/cache_manager.py +159 -0
- vector_inspector-0.2.7/src/vector_inspector/core/connection_manager.py +277 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/chroma_connection.py +90 -5
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/qdrant_connection.py +62 -8
- vector_inspector-0.2.7/src/vector_inspector/core/embedding_utils.py +140 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/backup_restore_service.py +3 -29
- vector_inspector-0.2.7/src/vector_inspector/services/credential_service.py +130 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/filter_service.py +1 -1
- vector_inspector-0.2.7/src/vector_inspector/services/profile_service.py +409 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/settings_service.py +19 -0
- vector_inspector-0.2.7/src/vector_inspector/ui/components/connection_manager_panel.py +320 -0
- vector_inspector-0.2.7/src/vector_inspector/ui/components/profile_manager_panel.py +518 -0
- vector_inspector-0.2.7/src/vector_inspector/ui/dialogs/__init__.py +5 -0
- vector_inspector-0.2.7/src/vector_inspector/ui/dialogs/cross_db_migration.py +364 -0
- vector_inspector-0.2.7/src/vector_inspector/ui/dialogs/embedding_config_dialog.py +176 -0
- vector_inspector-0.2.7/src/vector_inspector/ui/main_window.py +579 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/info_panel.py +225 -55
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/metadata_view.py +71 -3
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/search_view.py +43 -3
- vector_inspector-0.2.6/src/vector_inspector/ui/main_window.py +0 -344
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/README.md +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/__init__.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/__main__.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/__init__.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/__init__.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/base_connection.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/template_connection.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/main.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/__init__.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/import_export_service.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/visualization_service.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/__init__.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/__init__.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/backup_restore_dialog.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/filter_builder.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/item_dialog.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/loading_dialog.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/__init__.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/collection_browser.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/connection_view.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/visualization_view.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/utils/__init__.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/utils/lazy_imports.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/tests/test_connections.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/tests/test_filter_service.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/tests/test_settings_service.py +0 -0
- {vector_inspector-0.2.6 → vector_inspector-0.2.7}/tests/vector_inspector.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vector-inspector
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: A comprehensive desktop application for visualizing, querying, and managing vector database data
|
|
5
5
|
Author-Email: Anthony Dawson <anthonypdawson+github@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -22,6 +22,8 @@ Requires-Dist: sentence-transformers>=2.2.0
|
|
|
22
22
|
Requires-Dist: fastembed>=0.7.4
|
|
23
23
|
Requires-Dist: pyarrow>=14.0.0
|
|
24
24
|
Requires-Dist: pinecone>=8.0.0
|
|
25
|
+
Requires-Dist: keyring>=25.7.0
|
|
26
|
+
Requires-Dist: hf-xet>=1.2.0
|
|
25
27
|
Description-Content-Type: text/markdown
|
|
26
28
|
|
|
27
29
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "vector-inspector"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.7"
|
|
4
4
|
description = "A comprehensive desktop application for visualizing, querying, and managing vector database data"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Anthony Dawson", email = "anthonypdawson+github@gmail.com" },
|
|
@@ -19,6 +19,8 @@ dependencies = [
|
|
|
19
19
|
"fastembed>=0.7.4",
|
|
20
20
|
"pyarrow>=14.0.0",
|
|
21
21
|
"pinecone>=8.0.0",
|
|
22
|
+
"keyring>=25.7.0",
|
|
23
|
+
"hf-xet>=1.2.0",
|
|
22
24
|
]
|
|
23
25
|
requires-python = ">=3.10,<3.13"
|
|
24
26
|
readme = "README.md"
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cache manager for storing databrowser and search panel state by database and collection.
|
|
3
|
+
Provides fast switching between collections with automatic invalidation on refresh or settings changes.
|
|
4
|
+
"""
|
|
5
|
+
from typing import Dict, Any, Optional, Tuple
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class CacheEntry:
|
|
12
|
+
"""Represents a cached state for a specific database and collection."""
|
|
13
|
+
data: Any
|
|
14
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
15
|
+
|
|
16
|
+
# Browser state
|
|
17
|
+
scroll_position: int = 0
|
|
18
|
+
selected_indices: list = field(default_factory=list)
|
|
19
|
+
|
|
20
|
+
# Search panel state
|
|
21
|
+
search_query: str = ""
|
|
22
|
+
search_filters: Dict[str, Any] = field(default_factory=dict)
|
|
23
|
+
search_results: Optional[Any] = None
|
|
24
|
+
|
|
25
|
+
# User inputs
|
|
26
|
+
user_inputs: Dict[str, Any] = field(default_factory=dict)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CacheManager:
|
|
30
|
+
"""
|
|
31
|
+
Manages cache for databrowser and search panel by (database, collection) key.
|
|
32
|
+
Supports invalidation on refresh or settings changes.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self):
|
|
36
|
+
self._cache: Dict[Tuple[str, str], CacheEntry] = {}
|
|
37
|
+
self._enabled = True
|
|
38
|
+
|
|
39
|
+
def get(self, database: str, collection: str) -> Optional[CacheEntry]:
|
|
40
|
+
"""Retrieve cached entry for a database and collection."""
|
|
41
|
+
if not self._enabled:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
key = (database, collection)
|
|
45
|
+
return self._cache.get(key)
|
|
46
|
+
|
|
47
|
+
def set(self, database: str, collection: str, entry: CacheEntry) -> None:
|
|
48
|
+
"""Store a cache entry for a database and collection."""
|
|
49
|
+
if not self._enabled:
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
key = (database, collection)
|
|
53
|
+
entry.timestamp = datetime.now()
|
|
54
|
+
self._cache[key] = entry
|
|
55
|
+
|
|
56
|
+
def update(self, database: str, collection: str, **kwargs) -> None:
|
|
57
|
+
"""Update specific fields in an existing cache entry."""
|
|
58
|
+
key = (database, collection)
|
|
59
|
+
if key in self._cache:
|
|
60
|
+
entry = self._cache[key]
|
|
61
|
+
for field_name, value in kwargs.items():
|
|
62
|
+
if hasattr(entry, field_name):
|
|
63
|
+
setattr(entry, field_name, value)
|
|
64
|
+
entry.timestamp = datetime.now()
|
|
65
|
+
else:
|
|
66
|
+
# Create new entry with provided fields
|
|
67
|
+
entry = CacheEntry(data=None)
|
|
68
|
+
for field_name, value in kwargs.items():
|
|
69
|
+
if hasattr(entry, field_name):
|
|
70
|
+
setattr(entry, field_name, value)
|
|
71
|
+
self._cache[key] = entry
|
|
72
|
+
|
|
73
|
+
def invalidate(self, database: Optional[str] = None, collection: Optional[str] = None) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Invalidate cache entries.
|
|
76
|
+
- If both database and collection are provided, invalidate that specific entry.
|
|
77
|
+
- If only database is provided, invalidate all collections in that database.
|
|
78
|
+
- If neither is provided, invalidate all entries (global refresh).
|
|
79
|
+
"""
|
|
80
|
+
if database is None and collection is None:
|
|
81
|
+
# Clear all cache
|
|
82
|
+
self._cache.clear()
|
|
83
|
+
elif collection is None and database is not None:
|
|
84
|
+
# Clear all collections in the specified database
|
|
85
|
+
keys_to_remove = [key for key in self._cache.keys() if key[0] == database]
|
|
86
|
+
for key in keys_to_remove:
|
|
87
|
+
del self._cache[key]
|
|
88
|
+
elif database is not None and collection is not None:
|
|
89
|
+
# Clear specific database/collection combination
|
|
90
|
+
key = (database, collection)
|
|
91
|
+
if key in self._cache:
|
|
92
|
+
del self._cache[key]
|
|
93
|
+
|
|
94
|
+
def clear(self) -> None:
|
|
95
|
+
"""Clear all cached entries."""
|
|
96
|
+
self._cache.clear()
|
|
97
|
+
|
|
98
|
+
def enable(self) -> None:
|
|
99
|
+
"""Enable caching."""
|
|
100
|
+
self._enabled = True
|
|
101
|
+
|
|
102
|
+
def disable(self) -> None:
|
|
103
|
+
"""Disable caching and clear all entries."""
|
|
104
|
+
self._enabled = False
|
|
105
|
+
self._cache.clear()
|
|
106
|
+
|
|
107
|
+
def is_enabled(self) -> bool:
|
|
108
|
+
"""Check if caching is enabled."""
|
|
109
|
+
return self._enabled
|
|
110
|
+
|
|
111
|
+
def get_cache_info(self) -> Dict[str, Any]:
|
|
112
|
+
"""Get information about the current cache state."""
|
|
113
|
+
return {
|
|
114
|
+
"enabled": self._enabled,
|
|
115
|
+
"entry_count": len(self._cache),
|
|
116
|
+
"entries": [
|
|
117
|
+
{
|
|
118
|
+
"database": db,
|
|
119
|
+
"collection": coll,
|
|
120
|
+
"timestamp": entry.timestamp.isoformat(),
|
|
121
|
+
"has_data": entry.data is not None,
|
|
122
|
+
"has_search_results": entry.search_results is not None,
|
|
123
|
+
}
|
|
124
|
+
for (db, coll), entry in self._cache.items()
|
|
125
|
+
]
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Global cache manager instance
|
|
130
|
+
_cache_manager: Optional[CacheManager] = None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def get_cache_manager() -> CacheManager:
|
|
134
|
+
"""Get or create the global cache manager instance."""
|
|
135
|
+
global _cache_manager
|
|
136
|
+
if _cache_manager is None:
|
|
137
|
+
_cache_manager = CacheManager()
|
|
138
|
+
# Initialize from settings
|
|
139
|
+
try:
|
|
140
|
+
from vector_inspector.services.settings_service import SettingsService
|
|
141
|
+
settings = SettingsService()
|
|
142
|
+
if not settings.get_cache_enabled():
|
|
143
|
+
_cache_manager.disable()
|
|
144
|
+
except Exception:
|
|
145
|
+
# If settings can't be loaded, default to enabled
|
|
146
|
+
pass
|
|
147
|
+
return _cache_manager
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def invalidate_cache_on_settings_change() -> None:
|
|
151
|
+
"""Invalidate all cache when settings change."""
|
|
152
|
+
cache = get_cache_manager()
|
|
153
|
+
cache.invalidate()
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def invalidate_cache_on_refresh(database: Optional[str] = None, collection: Optional[str] = None) -> None:
|
|
157
|
+
"""Invalidate cache on refresh action."""
|
|
158
|
+
cache = get_cache_manager()
|
|
159
|
+
cache.invalidate(database, collection)
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""Connection manager for handling multiple vector database connections."""
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Dict, Optional, List, Any
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from PySide6.QtCore import QObject, Signal
|
|
7
|
+
|
|
8
|
+
from .connections.base_connection import VectorDBConnection
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ConnectionState(Enum):
|
|
12
|
+
"""Possible connection states."""
|
|
13
|
+
DISCONNECTED = "disconnected"
|
|
14
|
+
CONNECTING = "connecting"
|
|
15
|
+
CONNECTED = "connected"
|
|
16
|
+
ERROR = "error"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ConnectionInstance:
|
|
20
|
+
"""Represents a single active connection with its state and context."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
connection_id: str,
|
|
25
|
+
name: str,
|
|
26
|
+
provider: str,
|
|
27
|
+
connection: VectorDBConnection,
|
|
28
|
+
config: Dict[str, Any]
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Initialize a connection instance.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
connection_id: Unique connection identifier
|
|
35
|
+
name: User-friendly connection name
|
|
36
|
+
provider: Provider type (chromadb, qdrant, etc.)
|
|
37
|
+
connection: The actual connection object
|
|
38
|
+
config: Connection configuration dict
|
|
39
|
+
"""
|
|
40
|
+
self.id = connection_id
|
|
41
|
+
self.name = name
|
|
42
|
+
self.provider = provider
|
|
43
|
+
self.connection = connection
|
|
44
|
+
self.config = config
|
|
45
|
+
self.state = ConnectionState.DISCONNECTED
|
|
46
|
+
self.active_collection: Optional[str] = None
|
|
47
|
+
self.collections: List[str] = []
|
|
48
|
+
self.error_message: Optional[str] = None
|
|
49
|
+
|
|
50
|
+
def get_display_name(self) -> str:
|
|
51
|
+
"""Get a display-friendly connection name."""
|
|
52
|
+
return f"{self.name} ({self.provider})"
|
|
53
|
+
|
|
54
|
+
def get_breadcrumb(self) -> str:
|
|
55
|
+
"""Get breadcrumb showing connection > collection."""
|
|
56
|
+
if self.active_collection:
|
|
57
|
+
return f"{self.name} > {self.active_collection}"
|
|
58
|
+
return self.name
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ConnectionManager(QObject):
|
|
62
|
+
"""Manages multiple vector database connections and saved profiles.
|
|
63
|
+
|
|
64
|
+
Signals:
|
|
65
|
+
connection_opened: Emitted when a new connection is opened (connection_id)
|
|
66
|
+
connection_closed: Emitted when a connection is closed (connection_id)
|
|
67
|
+
connection_state_changed: Emitted when connection state changes (connection_id, state)
|
|
68
|
+
active_connection_changed: Emitted when active connection changes (connection_id or None)
|
|
69
|
+
active_collection_changed: Emitted when active collection changes (connection_id, collection_name or None)
|
|
70
|
+
collections_updated: Emitted when collections list is updated (connection_id, collections)
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
# Signals
|
|
74
|
+
connection_opened = Signal(str) # connection_id
|
|
75
|
+
connection_closed = Signal(str) # connection_id
|
|
76
|
+
connection_state_changed = Signal(str, ConnectionState) # connection_id, state
|
|
77
|
+
active_connection_changed = Signal(object) # connection_id or None
|
|
78
|
+
active_collection_changed = Signal(str, object) # connection_id, collection_name or None
|
|
79
|
+
collections_updated = Signal(str, list) # connection_id, collections
|
|
80
|
+
|
|
81
|
+
MAX_CONNECTIONS = 10 # Limit to prevent resource exhaustion
|
|
82
|
+
|
|
83
|
+
def __init__(self):
|
|
84
|
+
"""Initialize the connection manager."""
|
|
85
|
+
super().__init__()
|
|
86
|
+
self._connections: Dict[str, ConnectionInstance] = {}
|
|
87
|
+
self._active_connection_id: Optional[str] = None
|
|
88
|
+
|
|
89
|
+
def create_connection(
|
|
90
|
+
self,
|
|
91
|
+
name: str,
|
|
92
|
+
provider: str,
|
|
93
|
+
connection: VectorDBConnection,
|
|
94
|
+
config: Dict[str, Any]
|
|
95
|
+
) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Create a new connection instance (not yet connected).
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
name: User-friendly connection name
|
|
101
|
+
provider: Provider type
|
|
102
|
+
connection: The connection object
|
|
103
|
+
config: Connection configuration
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
The connection ID
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
RuntimeError: If maximum connections limit reached
|
|
110
|
+
"""
|
|
111
|
+
if len(self._connections) >= self.MAX_CONNECTIONS:
|
|
112
|
+
raise RuntimeError(f"Maximum number of connections ({self.MAX_CONNECTIONS}) reached")
|
|
113
|
+
|
|
114
|
+
connection_id = str(uuid.uuid4())
|
|
115
|
+
instance = ConnectionInstance(connection_id, name, provider, connection, config)
|
|
116
|
+
self._connections[connection_id] = instance
|
|
117
|
+
|
|
118
|
+
# Set as active if it's the first connection
|
|
119
|
+
if len(self._connections) == 1:
|
|
120
|
+
self._active_connection_id = connection_id
|
|
121
|
+
self.active_connection_changed.emit(connection_id)
|
|
122
|
+
|
|
123
|
+
# Don't emit connection_opened yet - wait until actually connected
|
|
124
|
+
return connection_id
|
|
125
|
+
|
|
126
|
+
def mark_connection_opened(self, connection_id: str):
|
|
127
|
+
"""
|
|
128
|
+
Mark a connection as opened (after successful connection).
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
connection_id: ID of connection that opened
|
|
132
|
+
"""
|
|
133
|
+
if connection_id in self._connections:
|
|
134
|
+
self.connection_opened.emit(connection_id)
|
|
135
|
+
|
|
136
|
+
def get_connection(self, connection_id: str) -> Optional[ConnectionInstance]:
|
|
137
|
+
"""Get a connection instance by ID."""
|
|
138
|
+
return self._connections.get(connection_id)
|
|
139
|
+
|
|
140
|
+
def get_active_connection(self) -> Optional[ConnectionInstance]:
|
|
141
|
+
"""Get the currently active connection instance."""
|
|
142
|
+
if self._active_connection_id:
|
|
143
|
+
return self._connections.get(self._active_connection_id)
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
def get_active_connection_id(self) -> Optional[str]:
|
|
147
|
+
"""Get the currently active connection ID."""
|
|
148
|
+
return self._active_connection_id
|
|
149
|
+
|
|
150
|
+
def set_active_connection(self, connection_id: str) -> bool:
|
|
151
|
+
"""
|
|
152
|
+
Set the active connection.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
connection_id: ID of connection to make active
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
True if successful, False if connection not found
|
|
159
|
+
"""
|
|
160
|
+
if connection_id not in self._connections:
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
self._active_connection_id = connection_id
|
|
164
|
+
self.active_connection_changed.emit(connection_id)
|
|
165
|
+
return True
|
|
166
|
+
|
|
167
|
+
def close_connection(self, connection_id: str) -> bool:
|
|
168
|
+
"""
|
|
169
|
+
Close and remove a connection.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
connection_id: ID of connection to close
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
True if successful, False if connection not found
|
|
176
|
+
"""
|
|
177
|
+
instance = self._connections.get(connection_id)
|
|
178
|
+
if not instance:
|
|
179
|
+
return False
|
|
180
|
+
|
|
181
|
+
# Disconnect the connection
|
|
182
|
+
try:
|
|
183
|
+
instance.connection.disconnect()
|
|
184
|
+
except Exception as e:
|
|
185
|
+
print(f"Error disconnecting: {e}")
|
|
186
|
+
|
|
187
|
+
# Remove from connections dict
|
|
188
|
+
del self._connections[connection_id]
|
|
189
|
+
|
|
190
|
+
# If this was the active connection, set a new one or None
|
|
191
|
+
if self._active_connection_id == connection_id:
|
|
192
|
+
if self._connections:
|
|
193
|
+
# Set first available connection as active
|
|
194
|
+
self._active_connection_id = next(iter(self._connections.keys()))
|
|
195
|
+
self.active_connection_changed.emit(self._active_connection_id)
|
|
196
|
+
else:
|
|
197
|
+
self._active_connection_id = None
|
|
198
|
+
self.active_connection_changed.emit(None)
|
|
199
|
+
|
|
200
|
+
self.connection_closed.emit(connection_id)
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
def update_connection_state(self, connection_id: str, state: ConnectionState, error: Optional[str] = None):
|
|
204
|
+
"""
|
|
205
|
+
Update the state of a connection.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
connection_id: ID of connection
|
|
209
|
+
state: New connection state
|
|
210
|
+
error: Optional error message if state is ERROR
|
|
211
|
+
"""
|
|
212
|
+
instance = self._connections.get(connection_id)
|
|
213
|
+
if instance:
|
|
214
|
+
instance.state = state
|
|
215
|
+
if error:
|
|
216
|
+
instance.error_message = error
|
|
217
|
+
else:
|
|
218
|
+
instance.error_message = None
|
|
219
|
+
self.connection_state_changed.emit(connection_id, state)
|
|
220
|
+
|
|
221
|
+
def update_collections(self, connection_id: str, collections: List[str]):
|
|
222
|
+
"""
|
|
223
|
+
Update the collections list for a connection.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
connection_id: ID of connection
|
|
227
|
+
collections: List of collection names
|
|
228
|
+
"""
|
|
229
|
+
instance = self._connections.get(connection_id)
|
|
230
|
+
if instance:
|
|
231
|
+
instance.collections = collections
|
|
232
|
+
self.collections_updated.emit(connection_id, collections)
|
|
233
|
+
|
|
234
|
+
def set_active_collection(self, connection_id: str, collection_name: Optional[str]):
|
|
235
|
+
"""
|
|
236
|
+
Set the active collection for a connection.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
connection_id: ID of connection
|
|
240
|
+
collection_name: Name of collection to make active, or None
|
|
241
|
+
"""
|
|
242
|
+
instance = self._connections.get(connection_id)
|
|
243
|
+
if instance:
|
|
244
|
+
instance.active_collection = collection_name
|
|
245
|
+
self.active_collection_changed.emit(connection_id, collection_name)
|
|
246
|
+
|
|
247
|
+
def get_all_connections(self) -> List[ConnectionInstance]:
|
|
248
|
+
"""Get list of all connection instances."""
|
|
249
|
+
return list(self._connections.values())
|
|
250
|
+
|
|
251
|
+
def get_connection_count(self) -> int:
|
|
252
|
+
"""Get the number of active connections."""
|
|
253
|
+
return len(self._connections)
|
|
254
|
+
|
|
255
|
+
def close_all_connections(self):
|
|
256
|
+
"""Close all connections. Typically called on application exit."""
|
|
257
|
+
connection_ids = list(self._connections.keys())
|
|
258
|
+
for conn_id in connection_ids:
|
|
259
|
+
self.close_connection(conn_id)
|
|
260
|
+
|
|
261
|
+
def rename_connection(self, connection_id: str, new_name: str) -> bool:
|
|
262
|
+
"""
|
|
263
|
+
Rename a connection.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
connection_id: ID of connection
|
|
267
|
+
new_name: New name for the connection
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
True if successful, False if connection not found
|
|
271
|
+
"""
|
|
272
|
+
instance = self._connections.get(connection_id)
|
|
273
|
+
if instance:
|
|
274
|
+
instance.name = new_name
|
|
275
|
+
return True
|
|
276
|
+
return False
|
|
277
|
+
|
|
@@ -6,10 +6,44 @@ from pathlib import Path
|
|
|
6
6
|
import chromadb
|
|
7
7
|
from chromadb.api import ClientAPI
|
|
8
8
|
from chromadb.api.models.Collection import Collection
|
|
9
|
+
from chromadb import Documents, EmbeddingFunction, Embeddings
|
|
9
10
|
|
|
10
11
|
from .base_connection import VectorDBConnection
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
class DimensionAwareEmbeddingFunction(EmbeddingFunction):
|
|
15
|
+
"""Embedding function that selects model based on collection's expected dimension."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, expected_dimension: int):
|
|
18
|
+
"""Initialize with expected dimension (model loaded lazily on first use)."""
|
|
19
|
+
self.expected_dimension = expected_dimension
|
|
20
|
+
self.model = None
|
|
21
|
+
self.model_name = None
|
|
22
|
+
self.model_type = None
|
|
23
|
+
self._initialized = False
|
|
24
|
+
|
|
25
|
+
def _ensure_model_loaded(self):
|
|
26
|
+
"""Lazy load the embedding model on first use."""
|
|
27
|
+
if self._initialized:
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
from ..embedding_utils import get_embedding_model_for_dimension
|
|
31
|
+
print(f"[ChromaDB] Loading embedding model for {self.expected_dimension}d vectors...")
|
|
32
|
+
self.model, self.model_name, self.model_type = get_embedding_model_for_dimension(self.expected_dimension)
|
|
33
|
+
print(f"[ChromaDB] Using {self.model_type} model '{self.model_name}' for {self.expected_dimension}d embeddings")
|
|
34
|
+
self._initialized = True
|
|
35
|
+
|
|
36
|
+
def __call__(self, input: Documents) -> Embeddings:
|
|
37
|
+
"""Embed documents using the dimension-appropriate model."""
|
|
38
|
+
self._ensure_model_loaded()
|
|
39
|
+
from ..embedding_utils import encode_text
|
|
40
|
+
embeddings = []
|
|
41
|
+
for text in input:
|
|
42
|
+
embedding = encode_text(text, self.model, self.model_type)
|
|
43
|
+
embeddings.append(embedding)
|
|
44
|
+
return embeddings
|
|
45
|
+
|
|
46
|
+
|
|
13
47
|
class ChromaDBConnection(VectorDBConnection):
|
|
14
48
|
"""Manages connection to ChromaDB and provides query interface."""
|
|
15
49
|
|
|
@@ -90,12 +124,47 @@ class ChromaDBConnection(VectorDBConnection):
|
|
|
90
124
|
print(f"Failed to list collections: {e}")
|
|
91
125
|
return []
|
|
92
126
|
|
|
93
|
-
def
|
|
94
|
-
"""
|
|
95
|
-
|
|
127
|
+
def _get_collection_basic(self, name: str) -> Optional[Collection]:
|
|
128
|
+
"""Get collection without custom embedding function (for info lookup)."""
|
|
129
|
+
if not self._client:
|
|
130
|
+
return None
|
|
131
|
+
try:
|
|
132
|
+
return self._client.get_collection(name=name)
|
|
133
|
+
except Exception as e:
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
def _get_embedding_function_for_collection(self, name: str) -> Optional[EmbeddingFunction]:
|
|
137
|
+
"""Get the appropriate embedding function for a collection based on its dimension."""
|
|
138
|
+
# Get basic collection to check dimension
|
|
139
|
+
basic_col = self._get_collection_basic(name)
|
|
140
|
+
if not basic_col:
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
# Get a sample to determine vector dimension
|
|
145
|
+
sample = basic_col.get(limit=1, include=["embeddings"])
|
|
146
|
+
embeddings = sample.get("embeddings") if sample else None
|
|
147
|
+
# Avoid numpy array truthiness issues - check is not None explicitly
|
|
148
|
+
if embeddings is not None and len(embeddings) > 0:
|
|
149
|
+
first_embedding = embeddings[0]
|
|
150
|
+
# Check if embedding exists and has content
|
|
151
|
+
if first_embedding is not None and len(first_embedding) > 0:
|
|
152
|
+
vector_dim = len(first_embedding)
|
|
153
|
+
print(f"[ChromaDB] Collection '{name}' has {vector_dim}d vectors")
|
|
154
|
+
return DimensionAwareEmbeddingFunction(vector_dim)
|
|
155
|
+
except Exception as e:
|
|
156
|
+
print(f"[ChromaDB] Failed to determine embedding function: {e}")
|
|
157
|
+
import traceback
|
|
158
|
+
traceback.print_exc()
|
|
159
|
+
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
def get_collection(self, name: str, embedding_function: Optional[EmbeddingFunction] = None) -> Optional[Collection]:
|
|
163
|
+
"""Get a collection (without overriding existing embedding function).
|
|
96
164
|
|
|
97
165
|
Args:
|
|
98
166
|
name: Collection name
|
|
167
|
+
embedding_function: Optional custom embedding function (ignored if collection exists)
|
|
99
168
|
|
|
100
169
|
Returns:
|
|
101
170
|
Collection object or None if failed
|
|
@@ -103,7 +172,9 @@ class ChromaDBConnection(VectorDBConnection):
|
|
|
103
172
|
if not self._client:
|
|
104
173
|
return None
|
|
105
174
|
try:
|
|
106
|
-
|
|
175
|
+
# Just get the collection without trying to override embedding function
|
|
176
|
+
# This avoids conflicts with existing collections
|
|
177
|
+
self._current_collection = self._client.get_collection(name=name)
|
|
107
178
|
return self._current_collection
|
|
108
179
|
except Exception as e:
|
|
109
180
|
print(f"Failed to get collection: {e}")
|
|
@@ -119,7 +190,7 @@ class ChromaDBConnection(VectorDBConnection):
|
|
|
119
190
|
Returns:
|
|
120
191
|
Dictionary with collection info
|
|
121
192
|
"""
|
|
122
|
-
collection = self.
|
|
193
|
+
collection = self._get_collection_basic(name)
|
|
123
194
|
if not collection:
|
|
124
195
|
return None
|
|
125
196
|
|
|
@@ -189,10 +260,22 @@ class ChromaDBConnection(VectorDBConnection):
|
|
|
189
260
|
Returns:
|
|
190
261
|
Query results or None if failed
|
|
191
262
|
"""
|
|
263
|
+
print(f"[ChromaDB] query_collection called for '{collection_name}'")
|
|
192
264
|
collection = self.get_collection(collection_name)
|
|
193
265
|
if not collection:
|
|
266
|
+
print(f"[ChromaDB] Failed to get collection '{collection_name}'")
|
|
194
267
|
return None
|
|
195
268
|
|
|
269
|
+
# If query_texts provided, we need to manually embed them with dimension-aware model
|
|
270
|
+
if query_texts and not query_embeddings:
|
|
271
|
+
embedding_function = self._get_embedding_function_for_collection(collection_name)
|
|
272
|
+
if embedding_function:
|
|
273
|
+
print(f"[ChromaDB] Manually embedding query texts with dimension-aware model")
|
|
274
|
+
query_embeddings = embedding_function(query_texts)
|
|
275
|
+
query_texts = None # Use embeddings instead of texts
|
|
276
|
+
else:
|
|
277
|
+
print(f"[ChromaDB] Warning: Could not determine embedding function, using collection's default")
|
|
278
|
+
|
|
196
279
|
try:
|
|
197
280
|
results = collection.query(
|
|
198
281
|
query_texts=query_texts,
|
|
@@ -205,6 +288,8 @@ class ChromaDBConnection(VectorDBConnection):
|
|
|
205
288
|
return cast(Dict[str, Any], results)
|
|
206
289
|
except Exception as e:
|
|
207
290
|
print(f"Query failed: {e}")
|
|
291
|
+
import traceback
|
|
292
|
+
traceback.print_exc()
|
|
208
293
|
return None
|
|
209
294
|
|
|
210
295
|
def get_all_items(
|