vector-inspector 0.2.6__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/PKG-INFO +3 -1
  2. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/pyproject.toml +3 -1
  3. vector_inspector-0.2.7/src/vector_inspector/core/cache_manager.py +159 -0
  4. vector_inspector-0.2.7/src/vector_inspector/core/connection_manager.py +277 -0
  5. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/chroma_connection.py +90 -5
  6. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/qdrant_connection.py +62 -8
  7. vector_inspector-0.2.7/src/vector_inspector/core/embedding_utils.py +140 -0
  8. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/backup_restore_service.py +3 -29
  9. vector_inspector-0.2.7/src/vector_inspector/services/credential_service.py +130 -0
  10. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/filter_service.py +1 -1
  11. vector_inspector-0.2.7/src/vector_inspector/services/profile_service.py +409 -0
  12. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/settings_service.py +19 -0
  13. vector_inspector-0.2.7/src/vector_inspector/ui/components/connection_manager_panel.py +320 -0
  14. vector_inspector-0.2.7/src/vector_inspector/ui/components/profile_manager_panel.py +518 -0
  15. vector_inspector-0.2.7/src/vector_inspector/ui/dialogs/__init__.py +5 -0
  16. vector_inspector-0.2.7/src/vector_inspector/ui/dialogs/cross_db_migration.py +364 -0
  17. vector_inspector-0.2.7/src/vector_inspector/ui/dialogs/embedding_config_dialog.py +176 -0
  18. vector_inspector-0.2.7/src/vector_inspector/ui/main_window.py +579 -0
  19. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/info_panel.py +225 -55
  20. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/metadata_view.py +71 -3
  21. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/search_view.py +43 -3
  22. vector_inspector-0.2.6/src/vector_inspector/ui/main_window.py +0 -344
  23. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/README.md +0 -0
  24. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/__init__.py +0 -0
  25. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/__main__.py +0 -0
  26. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/__init__.py +0 -0
  27. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/__init__.py +0 -0
  28. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/base_connection.py +0 -0
  29. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/core/connections/template_connection.py +0 -0
  30. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/main.py +0 -0
  31. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/__init__.py +0 -0
  32. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/import_export_service.py +0 -0
  33. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/services/visualization_service.py +0 -0
  34. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/__init__.py +0 -0
  35. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/__init__.py +0 -0
  36. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/backup_restore_dialog.py +0 -0
  37. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/filter_builder.py +0 -0
  38. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/item_dialog.py +0 -0
  39. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/components/loading_dialog.py +0 -0
  40. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/__init__.py +0 -0
  41. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/collection_browser.py +0 -0
  42. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/connection_view.py +0 -0
  43. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/ui/views/visualization_view.py +0 -0
  44. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/utils/__init__.py +0 -0
  45. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/src/vector_inspector/utils/lazy_imports.py +0 -0
  46. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/tests/test_connections.py +0 -0
  47. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/tests/test_filter_service.py +0 -0
  48. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/tests/test_settings_service.py +0 -0
  49. {vector_inspector-0.2.6 → vector_inspector-0.2.7}/tests/vector_inspector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vector-inspector
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: A comprehensive desktop application for visualizing, querying, and managing vector database data
5
5
  Author-Email: Anthony Dawson <anthonypdawson+github@gmail.com>
6
6
  License: MIT
@@ -22,6 +22,8 @@ Requires-Dist: sentence-transformers>=2.2.0
22
22
  Requires-Dist: fastembed>=0.7.4
23
23
  Requires-Dist: pyarrow>=14.0.0
24
24
  Requires-Dist: pinecone>=8.0.0
25
+ Requires-Dist: keyring>=25.7.0
26
+ Requires-Dist: hf-xet>=1.2.0
25
27
  Description-Content-Type: text/markdown
26
28
 
27
29
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "vector-inspector"
3
- version = "0.2.6"
3
+ version = "0.2.7"
4
4
  description = "A comprehensive desktop application for visualizing, querying, and managing vector database data"
5
5
  authors = [
6
6
  { name = "Anthony Dawson", email = "anthonypdawson+github@gmail.com" },
@@ -19,6 +19,8 @@ dependencies = [
19
19
  "fastembed>=0.7.4",
20
20
  "pyarrow>=14.0.0",
21
21
  "pinecone>=8.0.0",
22
+ "keyring>=25.7.0",
23
+ "hf-xet>=1.2.0",
22
24
  ]
23
25
  requires-python = ">=3.10,<3.13"
24
26
  readme = "README.md"
@@ -0,0 +1,159 @@
1
+ """
2
+ Cache manager for storing databrowser and search panel state by database and collection.
3
+ Provides fast switching between collections with automatic invalidation on refresh or settings changes.
4
+ """
5
+ from typing import Dict, Any, Optional, Tuple
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+
9
+
10
+ @dataclass
11
+ class CacheEntry:
12
+ """Represents a cached state for a specific database and collection."""
13
+ data: Any
14
+ timestamp: datetime = field(default_factory=datetime.now)
15
+
16
+ # Browser state
17
+ scroll_position: int = 0
18
+ selected_indices: list = field(default_factory=list)
19
+
20
+ # Search panel state
21
+ search_query: str = ""
22
+ search_filters: Dict[str, Any] = field(default_factory=dict)
23
+ search_results: Optional[Any] = None
24
+
25
+ # User inputs
26
+ user_inputs: Dict[str, Any] = field(default_factory=dict)
27
+
28
+
29
+ class CacheManager:
30
+ """
31
+ Manages cache for databrowser and search panel by (database, collection) key.
32
+ Supports invalidation on refresh or settings changes.
33
+ """
34
+
35
+ def __init__(self):
36
+ self._cache: Dict[Tuple[str, str], CacheEntry] = {}
37
+ self._enabled = True
38
+
39
+ def get(self, database: str, collection: str) -> Optional[CacheEntry]:
40
+ """Retrieve cached entry for a database and collection."""
41
+ if not self._enabled:
42
+ return None
43
+
44
+ key = (database, collection)
45
+ return self._cache.get(key)
46
+
47
+ def set(self, database: str, collection: str, entry: CacheEntry) -> None:
48
+ """Store a cache entry for a database and collection."""
49
+ if not self._enabled:
50
+ return
51
+
52
+ key = (database, collection)
53
+ entry.timestamp = datetime.now()
54
+ self._cache[key] = entry
55
+
56
+ def update(self, database: str, collection: str, **kwargs) -> None:
57
+ """Update specific fields in an existing cache entry."""
58
+ key = (database, collection)
59
+ if key in self._cache:
60
+ entry = self._cache[key]
61
+ for field_name, value in kwargs.items():
62
+ if hasattr(entry, field_name):
63
+ setattr(entry, field_name, value)
64
+ entry.timestamp = datetime.now()
65
+ else:
66
+ # Create new entry with provided fields
67
+ entry = CacheEntry(data=None)
68
+ for field_name, value in kwargs.items():
69
+ if hasattr(entry, field_name):
70
+ setattr(entry, field_name, value)
71
+ self._cache[key] = entry
72
+
73
+ def invalidate(self, database: Optional[str] = None, collection: Optional[str] = None) -> None:
74
+ """
75
+ Invalidate cache entries.
76
+ - If both database and collection are provided, invalidate that specific entry.
77
+ - If only database is provided, invalidate all collections in that database.
78
+ - If neither is provided, invalidate all entries (global refresh).
79
+ """
80
+ if database is None and collection is None:
81
+ # Clear all cache
82
+ self._cache.clear()
83
+ elif collection is None and database is not None:
84
+ # Clear all collections in the specified database
85
+ keys_to_remove = [key for key in self._cache.keys() if key[0] == database]
86
+ for key in keys_to_remove:
87
+ del self._cache[key]
88
+ elif database is not None and collection is not None:
89
+ # Clear specific database/collection combination
90
+ key = (database, collection)
91
+ if key in self._cache:
92
+ del self._cache[key]
93
+
94
+ def clear(self) -> None:
95
+ """Clear all cached entries."""
96
+ self._cache.clear()
97
+
98
+ def enable(self) -> None:
99
+ """Enable caching."""
100
+ self._enabled = True
101
+
102
+ def disable(self) -> None:
103
+ """Disable caching and clear all entries."""
104
+ self._enabled = False
105
+ self._cache.clear()
106
+
107
+ def is_enabled(self) -> bool:
108
+ """Check if caching is enabled."""
109
+ return self._enabled
110
+
111
+ def get_cache_info(self) -> Dict[str, Any]:
112
+ """Get information about the current cache state."""
113
+ return {
114
+ "enabled": self._enabled,
115
+ "entry_count": len(self._cache),
116
+ "entries": [
117
+ {
118
+ "database": db,
119
+ "collection": coll,
120
+ "timestamp": entry.timestamp.isoformat(),
121
+ "has_data": entry.data is not None,
122
+ "has_search_results": entry.search_results is not None,
123
+ }
124
+ for (db, coll), entry in self._cache.items()
125
+ ]
126
+ }
127
+
128
+
129
+ # Global cache manager instance
130
+ _cache_manager: Optional[CacheManager] = None
131
+
132
+
133
+ def get_cache_manager() -> CacheManager:
134
+ """Get or create the global cache manager instance."""
135
+ global _cache_manager
136
+ if _cache_manager is None:
137
+ _cache_manager = CacheManager()
138
+ # Initialize from settings
139
+ try:
140
+ from vector_inspector.services.settings_service import SettingsService
141
+ settings = SettingsService()
142
+ if not settings.get_cache_enabled():
143
+ _cache_manager.disable()
144
+ except Exception:
145
+ # If settings can't be loaded, default to enabled
146
+ pass
147
+ return _cache_manager
148
+
149
+
150
+ def invalidate_cache_on_settings_change() -> None:
151
+ """Invalidate all cache when settings change."""
152
+ cache = get_cache_manager()
153
+ cache.invalidate()
154
+
155
+
156
+ def invalidate_cache_on_refresh(database: Optional[str] = None, collection: Optional[str] = None) -> None:
157
+ """Invalidate cache on refresh action."""
158
+ cache = get_cache_manager()
159
+ cache.invalidate(database, collection)
@@ -0,0 +1,277 @@
1
+ """Connection manager for handling multiple vector database connections."""
2
+
3
+ import uuid
4
+ from typing import Dict, Optional, List, Any
5
+ from enum import Enum
6
+ from PySide6.QtCore import QObject, Signal
7
+
8
+ from .connections.base_connection import VectorDBConnection
9
+
10
+
11
+ class ConnectionState(Enum):
12
+ """Possible connection states."""
13
+ DISCONNECTED = "disconnected"
14
+ CONNECTING = "connecting"
15
+ CONNECTED = "connected"
16
+ ERROR = "error"
17
+
18
+
19
+ class ConnectionInstance:
20
+ """Represents a single active connection with its state and context."""
21
+
22
+ def __init__(
23
+ self,
24
+ connection_id: str,
25
+ name: str,
26
+ provider: str,
27
+ connection: VectorDBConnection,
28
+ config: Dict[str, Any]
29
+ ):
30
+ """
31
+ Initialize a connection instance.
32
+
33
+ Args:
34
+ connection_id: Unique connection identifier
35
+ name: User-friendly connection name
36
+ provider: Provider type (chromadb, qdrant, etc.)
37
+ connection: The actual connection object
38
+ config: Connection configuration dict
39
+ """
40
+ self.id = connection_id
41
+ self.name = name
42
+ self.provider = provider
43
+ self.connection = connection
44
+ self.config = config
45
+ self.state = ConnectionState.DISCONNECTED
46
+ self.active_collection: Optional[str] = None
47
+ self.collections: List[str] = []
48
+ self.error_message: Optional[str] = None
49
+
50
+ def get_display_name(self) -> str:
51
+ """Get a display-friendly connection name."""
52
+ return f"{self.name} ({self.provider})"
53
+
54
+ def get_breadcrumb(self) -> str:
55
+ """Get breadcrumb showing connection > collection."""
56
+ if self.active_collection:
57
+ return f"{self.name} > {self.active_collection}"
58
+ return self.name
59
+
60
+
61
+ class ConnectionManager(QObject):
62
+ """Manages multiple vector database connections and saved profiles.
63
+
64
+ Signals:
65
+ connection_opened: Emitted when a new connection is opened (connection_id)
66
+ connection_closed: Emitted when a connection is closed (connection_id)
67
+ connection_state_changed: Emitted when connection state changes (connection_id, state)
68
+ active_connection_changed: Emitted when active connection changes (connection_id or None)
69
+ active_collection_changed: Emitted when active collection changes (connection_id, collection_name or None)
70
+ collections_updated: Emitted when collections list is updated (connection_id, collections)
71
+ """
72
+
73
+ # Signals
74
+ connection_opened = Signal(str) # connection_id
75
+ connection_closed = Signal(str) # connection_id
76
+ connection_state_changed = Signal(str, ConnectionState) # connection_id, state
77
+ active_connection_changed = Signal(object) # connection_id or None
78
+ active_collection_changed = Signal(str, object) # connection_id, collection_name or None
79
+ collections_updated = Signal(str, list) # connection_id, collections
80
+
81
+ MAX_CONNECTIONS = 10 # Limit to prevent resource exhaustion
82
+
83
+ def __init__(self):
84
+ """Initialize the connection manager."""
85
+ super().__init__()
86
+ self._connections: Dict[str, ConnectionInstance] = {}
87
+ self._active_connection_id: Optional[str] = None
88
+
89
+ def create_connection(
90
+ self,
91
+ name: str,
92
+ provider: str,
93
+ connection: VectorDBConnection,
94
+ config: Dict[str, Any]
95
+ ) -> str:
96
+ """
97
+ Create a new connection instance (not yet connected).
98
+
99
+ Args:
100
+ name: User-friendly connection name
101
+ provider: Provider type
102
+ connection: The connection object
103
+ config: Connection configuration
104
+
105
+ Returns:
106
+ The connection ID
107
+
108
+ Raises:
109
+ RuntimeError: If maximum connections limit reached
110
+ """
111
+ if len(self._connections) >= self.MAX_CONNECTIONS:
112
+ raise RuntimeError(f"Maximum number of connections ({self.MAX_CONNECTIONS}) reached")
113
+
114
+ connection_id = str(uuid.uuid4())
115
+ instance = ConnectionInstance(connection_id, name, provider, connection, config)
116
+ self._connections[connection_id] = instance
117
+
118
+ # Set as active if it's the first connection
119
+ if len(self._connections) == 1:
120
+ self._active_connection_id = connection_id
121
+ self.active_connection_changed.emit(connection_id)
122
+
123
+ # Don't emit connection_opened yet - wait until actually connected
124
+ return connection_id
125
+
126
+ def mark_connection_opened(self, connection_id: str):
127
+ """
128
+ Mark a connection as opened (after successful connection).
129
+
130
+ Args:
131
+ connection_id: ID of connection that opened
132
+ """
133
+ if connection_id in self._connections:
134
+ self.connection_opened.emit(connection_id)
135
+
136
+ def get_connection(self, connection_id: str) -> Optional[ConnectionInstance]:
137
+ """Get a connection instance by ID."""
138
+ return self._connections.get(connection_id)
139
+
140
+ def get_active_connection(self) -> Optional[ConnectionInstance]:
141
+ """Get the currently active connection instance."""
142
+ if self._active_connection_id:
143
+ return self._connections.get(self._active_connection_id)
144
+ return None
145
+
146
+ def get_active_connection_id(self) -> Optional[str]:
147
+ """Get the currently active connection ID."""
148
+ return self._active_connection_id
149
+
150
+ def set_active_connection(self, connection_id: str) -> bool:
151
+ """
152
+ Set the active connection.
153
+
154
+ Args:
155
+ connection_id: ID of connection to make active
156
+
157
+ Returns:
158
+ True if successful, False if connection not found
159
+ """
160
+ if connection_id not in self._connections:
161
+ return False
162
+
163
+ self._active_connection_id = connection_id
164
+ self.active_connection_changed.emit(connection_id)
165
+ return True
166
+
167
+ def close_connection(self, connection_id: str) -> bool:
168
+ """
169
+ Close and remove a connection.
170
+
171
+ Args:
172
+ connection_id: ID of connection to close
173
+
174
+ Returns:
175
+ True if successful, False if connection not found
176
+ """
177
+ instance = self._connections.get(connection_id)
178
+ if not instance:
179
+ return False
180
+
181
+ # Disconnect the connection
182
+ try:
183
+ instance.connection.disconnect()
184
+ except Exception as e:
185
+ print(f"Error disconnecting: {e}")
186
+
187
+ # Remove from connections dict
188
+ del self._connections[connection_id]
189
+
190
+ # If this was the active connection, set a new one or None
191
+ if self._active_connection_id == connection_id:
192
+ if self._connections:
193
+ # Set first available connection as active
194
+ self._active_connection_id = next(iter(self._connections.keys()))
195
+ self.active_connection_changed.emit(self._active_connection_id)
196
+ else:
197
+ self._active_connection_id = None
198
+ self.active_connection_changed.emit(None)
199
+
200
+ self.connection_closed.emit(connection_id)
201
+ return True
202
+
203
+ def update_connection_state(self, connection_id: str, state: ConnectionState, error: Optional[str] = None):
204
+ """
205
+ Update the state of a connection.
206
+
207
+ Args:
208
+ connection_id: ID of connection
209
+ state: New connection state
210
+ error: Optional error message if state is ERROR
211
+ """
212
+ instance = self._connections.get(connection_id)
213
+ if instance:
214
+ instance.state = state
215
+ if error:
216
+ instance.error_message = error
217
+ else:
218
+ instance.error_message = None
219
+ self.connection_state_changed.emit(connection_id, state)
220
+
221
+ def update_collections(self, connection_id: str, collections: List[str]):
222
+ """
223
+ Update the collections list for a connection.
224
+
225
+ Args:
226
+ connection_id: ID of connection
227
+ collections: List of collection names
228
+ """
229
+ instance = self._connections.get(connection_id)
230
+ if instance:
231
+ instance.collections = collections
232
+ self.collections_updated.emit(connection_id, collections)
233
+
234
+ def set_active_collection(self, connection_id: str, collection_name: Optional[str]):
235
+ """
236
+ Set the active collection for a connection.
237
+
238
+ Args:
239
+ connection_id: ID of connection
240
+ collection_name: Name of collection to make active, or None
241
+ """
242
+ instance = self._connections.get(connection_id)
243
+ if instance:
244
+ instance.active_collection = collection_name
245
+ self.active_collection_changed.emit(connection_id, collection_name)
246
+
247
+ def get_all_connections(self) -> List[ConnectionInstance]:
248
+ """Get list of all connection instances."""
249
+ return list(self._connections.values())
250
+
251
+ def get_connection_count(self) -> int:
252
+ """Get the number of active connections."""
253
+ return len(self._connections)
254
+
255
+ def close_all_connections(self):
256
+ """Close all connections. Typically called on application exit."""
257
+ connection_ids = list(self._connections.keys())
258
+ for conn_id in connection_ids:
259
+ self.close_connection(conn_id)
260
+
261
+ def rename_connection(self, connection_id: str, new_name: str) -> bool:
262
+ """
263
+ Rename a connection.
264
+
265
+ Args:
266
+ connection_id: ID of connection
267
+ new_name: New name for the connection
268
+
269
+ Returns:
270
+ True if successful, False if connection not found
271
+ """
272
+ instance = self._connections.get(connection_id)
273
+ if instance:
274
+ instance.name = new_name
275
+ return True
276
+ return False
277
+
@@ -6,10 +6,44 @@ from pathlib import Path
6
6
  import chromadb
7
7
  from chromadb.api import ClientAPI
8
8
  from chromadb.api.models.Collection import Collection
9
+ from chromadb import Documents, EmbeddingFunction, Embeddings
9
10
 
10
11
  from .base_connection import VectorDBConnection
11
12
 
12
13
 
14
+ class DimensionAwareEmbeddingFunction(EmbeddingFunction):
15
+ """Embedding function that selects model based on collection's expected dimension."""
16
+
17
+ def __init__(self, expected_dimension: int):
18
+ """Initialize with expected dimension (model loaded lazily on first use)."""
19
+ self.expected_dimension = expected_dimension
20
+ self.model = None
21
+ self.model_name = None
22
+ self.model_type = None
23
+ self._initialized = False
24
+
25
+ def _ensure_model_loaded(self):
26
+ """Lazy load the embedding model on first use."""
27
+ if self._initialized:
28
+ return
29
+
30
+ from ..embedding_utils import get_embedding_model_for_dimension
31
+ print(f"[ChromaDB] Loading embedding model for {self.expected_dimension}d vectors...")
32
+ self.model, self.model_name, self.model_type = get_embedding_model_for_dimension(self.expected_dimension)
33
+ print(f"[ChromaDB] Using {self.model_type} model '{self.model_name}' for {self.expected_dimension}d embeddings")
34
+ self._initialized = True
35
+
36
+ def __call__(self, input: Documents) -> Embeddings:
37
+ """Embed documents using the dimension-appropriate model."""
38
+ self._ensure_model_loaded()
39
+ from ..embedding_utils import encode_text
40
+ embeddings = []
41
+ for text in input:
42
+ embedding = encode_text(text, self.model, self.model_type)
43
+ embeddings.append(embedding)
44
+ return embeddings
45
+
46
+
13
47
  class ChromaDBConnection(VectorDBConnection):
14
48
  """Manages connection to ChromaDB and provides query interface."""
15
49
 
@@ -90,12 +124,47 @@ class ChromaDBConnection(VectorDBConnection):
90
124
  print(f"Failed to list collections: {e}")
91
125
  return []
92
126
 
93
- def get_collection(self, name: str) -> Optional[Collection]:
94
- """
95
- Get or create a collection.
127
+ def _get_collection_basic(self, name: str) -> Optional[Collection]:
128
+ """Get collection without custom embedding function (for info lookup)."""
129
+ if not self._client:
130
+ return None
131
+ try:
132
+ return self._client.get_collection(name=name)
133
+ except Exception as e:
134
+ return None
135
+
136
+ def _get_embedding_function_for_collection(self, name: str) -> Optional[EmbeddingFunction]:
137
+ """Get the appropriate embedding function for a collection based on its dimension."""
138
+ # Get basic collection to check dimension
139
+ basic_col = self._get_collection_basic(name)
140
+ if not basic_col:
141
+ return None
142
+
143
+ try:
144
+ # Get a sample to determine vector dimension
145
+ sample = basic_col.get(limit=1, include=["embeddings"])
146
+ embeddings = sample.get("embeddings") if sample else None
147
+ # Avoid numpy array truthiness issues - check is not None explicitly
148
+ if embeddings is not None and len(embeddings) > 0:
149
+ first_embedding = embeddings[0]
150
+ # Check if embedding exists and has content
151
+ if first_embedding is not None and len(first_embedding) > 0:
152
+ vector_dim = len(first_embedding)
153
+ print(f"[ChromaDB] Collection '{name}' has {vector_dim}d vectors")
154
+ return DimensionAwareEmbeddingFunction(vector_dim)
155
+ except Exception as e:
156
+ print(f"[ChromaDB] Failed to determine embedding function: {e}")
157
+ import traceback
158
+ traceback.print_exc()
159
+
160
+ return None
161
+
162
+ def get_collection(self, name: str, embedding_function: Optional[EmbeddingFunction] = None) -> Optional[Collection]:
163
+ """Get a collection (without overriding existing embedding function).
96
164
 
97
165
  Args:
98
166
  name: Collection name
167
+ embedding_function: Optional custom embedding function (ignored if collection exists)
99
168
 
100
169
  Returns:
101
170
  Collection object or None if failed
@@ -103,7 +172,9 @@ class ChromaDBConnection(VectorDBConnection):
103
172
  if not self._client:
104
173
  return None
105
174
  try:
106
- self._current_collection = self._client.get_or_create_collection(name=name)
175
+ # Just get the collection without trying to override embedding function
176
+ # This avoids conflicts with existing collections
177
+ self._current_collection = self._client.get_collection(name=name)
107
178
  return self._current_collection
108
179
  except Exception as e:
109
180
  print(f"Failed to get collection: {e}")
@@ -119,7 +190,7 @@ class ChromaDBConnection(VectorDBConnection):
119
190
  Returns:
120
191
  Dictionary with collection info
121
192
  """
122
- collection = self.get_collection(name)
193
+ collection = self._get_collection_basic(name)
123
194
  if not collection:
124
195
  return None
125
196
 
@@ -189,10 +260,22 @@ class ChromaDBConnection(VectorDBConnection):
189
260
  Returns:
190
261
  Query results or None if failed
191
262
  """
263
+ print(f"[ChromaDB] query_collection called for '{collection_name}'")
192
264
  collection = self.get_collection(collection_name)
193
265
  if not collection:
266
+ print(f"[ChromaDB] Failed to get collection '{collection_name}'")
194
267
  return None
195
268
 
269
+ # If query_texts provided, we need to manually embed them with dimension-aware model
270
+ if query_texts and not query_embeddings:
271
+ embedding_function = self._get_embedding_function_for_collection(collection_name)
272
+ if embedding_function:
273
+ print(f"[ChromaDB] Manually embedding query texts with dimension-aware model")
274
+ query_embeddings = embedding_function(query_texts)
275
+ query_texts = None # Use embeddings instead of texts
276
+ else:
277
+ print(f"[ChromaDB] Warning: Could not determine embedding function, using collection's default")
278
+
196
279
  try:
197
280
  results = collection.query(
198
281
  query_texts=query_texts,
@@ -205,6 +288,8 @@ class ChromaDBConnection(VectorDBConnection):
205
288
  return cast(Dict[str, Any], results)
206
289
  except Exception as e:
207
290
  print(f"Query failed: {e}")
291
+ import traceback
292
+ traceback.print_exc()
208
293
  return None
209
294
 
210
295
  def get_all_items(