vector-inspector 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. vector_inspector/__init__.py +3 -0
  2. vector_inspector/__main__.py +4 -0
  3. vector_inspector/core/__init__.py +1 -0
  4. vector_inspector/core/connections/__init__.py +7 -0
  5. vector_inspector/core/connections/base_connection.py +233 -0
  6. vector_inspector/core/connections/chroma_connection.py +384 -0
  7. vector_inspector/core/connections/qdrant_connection.py +723 -0
  8. vector_inspector/core/connections/template_connection.py +346 -0
  9. vector_inspector/main.py +21 -0
  10. vector_inspector/services/__init__.py +1 -0
  11. vector_inspector/services/backup_restore_service.py +286 -0
  12. vector_inspector/services/filter_service.py +72 -0
  13. vector_inspector/services/import_export_service.py +287 -0
  14. vector_inspector/services/settings_service.py +60 -0
  15. vector_inspector/services/visualization_service.py +116 -0
  16. vector_inspector/ui/__init__.py +1 -0
  17. vector_inspector/ui/components/__init__.py +1 -0
  18. vector_inspector/ui/components/backup_restore_dialog.py +350 -0
  19. vector_inspector/ui/components/filter_builder.py +370 -0
  20. vector_inspector/ui/components/item_dialog.py +118 -0
  21. vector_inspector/ui/components/loading_dialog.py +30 -0
  22. vector_inspector/ui/main_window.py +288 -0
  23. vector_inspector/ui/views/__init__.py +1 -0
  24. vector_inspector/ui/views/collection_browser.py +112 -0
  25. vector_inspector/ui/views/connection_view.py +423 -0
  26. vector_inspector/ui/views/metadata_view.py +555 -0
  27. vector_inspector/ui/views/search_view.py +268 -0
  28. vector_inspector/ui/views/visualization_view.py +245 -0
  29. vector_inspector-0.2.0.dist-info/METADATA +382 -0
  30. vector_inspector-0.2.0.dist-info/RECORD +32 -0
  31. vector_inspector-0.2.0.dist-info/WHEEL +4 -0
  32. vector_inspector-0.2.0.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,72 @@
1
+ """Service for applying client-side filters to data."""
2
+
3
+ from typing import Dict, Any, List
4
+
5
+
6
+ def apply_client_side_filters(data: Dict[str, Any], filters: List[Dict[str, Any]]) -> Dict[str, Any]:
7
+ """
8
+ Apply client-side filters to fetched data.
9
+
10
+ Args:
11
+ data: Data dictionary with ids, documents, metadatas, etc.
12
+ filters: List of client-side filter dictionaries
13
+
14
+ Returns:
15
+ Filtered data dictionary
16
+ """
17
+ if not filters:
18
+ return data
19
+
20
+ ids = data.get("ids", [])
21
+ documents = data.get("documents", [])
22
+ metadatas = data.get("metadatas", [])
23
+ embeddings = data.get("embeddings", [])
24
+
25
+ # Track which indices to keep
26
+ keep_indices = []
27
+
28
+ for i in range(len(ids)):
29
+ # Check if this item passes all client-side filters
30
+ passes = True
31
+
32
+ for filt in filters:
33
+ field = filt.get("field", "")
34
+ op = filt.get("op", "")
35
+ value = filt.get("value", "")
36
+
37
+ # Special handling for document field
38
+ if field.lower() == "document":
39
+ item_value = documents[i] if i < len(documents) else ""
40
+ else:
41
+ # Get from metadata
42
+ metadata = metadatas[i] if i < len(metadatas) else {}
43
+ item_value = metadata.get(field, "")
44
+
45
+ # Convert to string for text operations
46
+ item_value_str = str(item_value).lower()
47
+ search_value = str(value).lower()
48
+
49
+ # Apply operator
50
+ if op == "contains":
51
+ if search_value not in item_value_str:
52
+ passes = False
53
+ break
54
+ elif op == "not_contains":
55
+ if search_value in item_value_str:
56
+ passes = False
57
+ break
58
+
59
+ if passes:
60
+ keep_indices.append(i)
61
+
62
+ # Filter the data
63
+ filtered_data = {
64
+ "ids": [ids[i] for i in keep_indices],
65
+ "documents": [documents[i] for i in keep_indices if i < len(documents)],
66
+ "metadatas": [metadatas[i] for i in keep_indices if i < len(metadatas)],
67
+ }
68
+
69
+ if embeddings:
70
+ filtered_data["embeddings"] = [embeddings[i] for i in keep_indices if i < len(embeddings)]
71
+
72
+ return filtered_data
@@ -0,0 +1,287 @@
1
+ """Service for importing and exporting collection data."""
2
+
3
+ import json
4
+ import csv
5
+ from typing import Dict, Any, List, Optional
6
+ from pathlib import Path
7
+ import pandas as pd
8
+
9
+
10
+ class ImportExportService:
11
+ """Handles import/export operations for vector database collections."""
12
+
13
+ @staticmethod
14
+ def export_to_json(data: Dict[str, Any], file_path: str) -> bool:
15
+ """
16
+ Export collection data to JSON format.
17
+
18
+ Args:
19
+ data: Collection data dictionary with ids, documents, metadatas, embeddings
20
+ file_path: Path to save JSON file
21
+
22
+ Returns:
23
+ True if successful, False otherwise
24
+ """
25
+ try:
26
+ # Structure data for export
27
+ export_data = []
28
+ ids = data.get("ids", [])
29
+ documents = data.get("documents", [])
30
+ metadatas = data.get("metadatas", [])
31
+ embeddings = data.get("embeddings", [])
32
+
33
+ for i, item_id in enumerate(ids):
34
+ item = {
35
+ "id": item_id,
36
+ "document": documents[i] if i < len(documents) else None,
37
+ "metadata": metadatas[i] if i < len(metadatas) else {},
38
+ }
39
+ # Optionally include embeddings
40
+ if embeddings and i < len(embeddings):
41
+ item["embedding"] = embeddings[i]
42
+ export_data.append(item)
43
+
44
+ # Write to file
45
+ with open(file_path, 'w', encoding='utf-8') as f:
46
+ json.dump(export_data, f, indent=2, ensure_ascii=False)
47
+
48
+ return True
49
+ except Exception as e:
50
+ print(f"Export to JSON failed: {e}")
51
+ return False
52
+
53
+ @staticmethod
54
+ def export_to_csv(data: Dict[str, Any], file_path: str, include_embeddings: bool = False) -> bool:
55
+ """
56
+ Export collection data to CSV format.
57
+
58
+ Args:
59
+ data: Collection data dictionary
60
+ file_path: Path to save CSV file
61
+ include_embeddings: Whether to include embedding vectors
62
+
63
+ Returns:
64
+ True if successful, False otherwise
65
+ """
66
+ try:
67
+ ids = data.get("ids", [])
68
+ documents = data.get("documents", [])
69
+ metadatas = data.get("metadatas", [])
70
+ embeddings = data.get("embeddings", [])
71
+
72
+ # Prepare rows
73
+ rows = []
74
+ for i, item_id in enumerate(ids):
75
+ row = {
76
+ "id": item_id,
77
+ "document": documents[i] if i < len(documents) else "",
78
+ }
79
+
80
+ # Add metadata fields
81
+ if i < len(metadatas) and metadatas[i]:
82
+ for key, value in metadatas[i].items():
83
+ row[f"metadata_{key}"] = value
84
+
85
+ # Optionally add embeddings
86
+ if include_embeddings and embeddings and i < len(embeddings):
87
+ row["embedding"] = json.dumps(embeddings[i])
88
+
89
+ rows.append(row)
90
+
91
+ # Convert to DataFrame and save
92
+ df = pd.DataFrame(rows)
93
+ df.to_csv(file_path, index=False, encoding='utf-8')
94
+
95
+ return True
96
+ except Exception as e:
97
+ print(f"Export to CSV failed: {e}")
98
+ return False
99
+
100
+ @staticmethod
101
+ def export_to_parquet(data: Dict[str, Any], file_path: str) -> bool:
102
+ """
103
+ Export collection data to Parquet format.
104
+
105
+ Args:
106
+ data: Collection data dictionary
107
+ file_path: Path to save Parquet file
108
+
109
+ Returns:
110
+ True if successful, False otherwise
111
+ """
112
+ try:
113
+ ids = data.get("ids", [])
114
+ documents = data.get("documents", [])
115
+ metadatas = data.get("metadatas", [])
116
+ embeddings = data.get("embeddings", [])
117
+
118
+ # Prepare data for DataFrame
119
+ df_data = {
120
+ "id": ids,
121
+ "document": documents if documents else [None] * len(ids),
122
+ }
123
+
124
+ # Add metadata fields as separate columns
125
+ if metadatas and metadatas[0]:
126
+ for key in metadatas[0].keys():
127
+ df_data[f"metadata_{key}"] = [m.get(key) if m else None for m in metadatas]
128
+
129
+ # Add embeddings as a column
130
+ if embeddings:
131
+ df_data["embedding"] = embeddings
132
+
133
+ # Create DataFrame and save
134
+ df = pd.DataFrame(df_data)
135
+ df.to_parquet(file_path, index=False, engine='pyarrow')
136
+
137
+ return True
138
+ except Exception as e:
139
+ print(f"Export to Parquet failed: {e}")
140
+ return False
141
+
142
+ @staticmethod
143
+ def import_from_json(file_path: str) -> Optional[Dict[str, Any]]:
144
+ """
145
+ Import collection data from JSON format.
146
+
147
+ Args:
148
+ file_path: Path to JSON file
149
+
150
+ Returns:
151
+ Dictionary with ids, documents, metadatas, embeddings or None if failed
152
+ """
153
+ try:
154
+ with open(file_path, 'r', encoding='utf-8') as f:
155
+ data = json.load(f)
156
+
157
+ # Parse data
158
+ ids = []
159
+ documents = []
160
+ metadatas = []
161
+ embeddings = []
162
+
163
+ for item in data:
164
+ ids.append(item.get("id", ""))
165
+ documents.append(item.get("document", ""))
166
+ metadatas.append(item.get("metadata", {}))
167
+ if "embedding" in item:
168
+ embeddings.append(item["embedding"])
169
+
170
+ result = {
171
+ "ids": ids,
172
+ "documents": documents,
173
+ "metadatas": metadatas,
174
+ }
175
+
176
+ if embeddings:
177
+ result["embeddings"] = embeddings
178
+
179
+ return result
180
+
181
+ except Exception as e:
182
+ print(f"Import from JSON failed: {e}")
183
+ return None
184
+
185
+ @staticmethod
186
+ def import_from_csv(file_path: str) -> Optional[Dict[str, Any]]:
187
+ """
188
+ Import collection data from CSV format.
189
+
190
+ Args:
191
+ file_path: Path to CSV file
192
+
193
+ Returns:
194
+ Dictionary with ids, documents, metadatas, embeddings or None if failed
195
+ """
196
+ try:
197
+ df = pd.read_csv(file_path, encoding='utf-8')
198
+
199
+ ids = df["id"].tolist()
200
+ documents = df["document"].tolist() if "document" in df.columns else [""] * len(ids)
201
+
202
+ # Extract metadata columns
203
+ metadata_cols = [col for col in df.columns if col.startswith("metadata_")]
204
+ metadatas = []
205
+
206
+ for idx in range(len(ids)):
207
+ metadata = {}
208
+ for col in metadata_cols:
209
+ key = col.replace("metadata_", "")
210
+ value = df.loc[idx, col]
211
+ if pd.notna(value):
212
+ metadata[key] = value
213
+ metadatas.append(metadata)
214
+
215
+ # Extract embeddings if present
216
+ embeddings = []
217
+ if "embedding" in df.columns:
218
+ for emb_str in df["embedding"]:
219
+ if pd.notna(emb_str):
220
+ embeddings.append(json.loads(emb_str))
221
+ else:
222
+ embeddings.append([])
223
+
224
+ result = {
225
+ "ids": ids,
226
+ "documents": documents,
227
+ "metadatas": metadatas,
228
+ }
229
+
230
+ if embeddings:
231
+ result["embeddings"] = embeddings
232
+
233
+ return result
234
+
235
+ except Exception as e:
236
+ print(f"Import from CSV failed: {e}")
237
+ return None
238
+
239
+ @staticmethod
240
+ def import_from_parquet(file_path: str) -> Optional[Dict[str, Any]]:
241
+ """
242
+ Import collection data from Parquet format.
243
+
244
+ Args:
245
+ file_path: Path to Parquet file
246
+
247
+ Returns:
248
+ Dictionary with ids, documents, metadatas, embeddings or None if failed
249
+ """
250
+ try:
251
+ df = pd.read_parquet(file_path, engine='pyarrow')
252
+
253
+ ids = df["id"].tolist()
254
+ documents = df["document"].tolist() if "document" in df.columns else [""] * len(ids)
255
+
256
+ # Extract metadata columns
257
+ metadata_cols = [col for col in df.columns if col.startswith("metadata_")]
258
+ metadatas = []
259
+
260
+ for idx in range(len(ids)):
261
+ metadata = {}
262
+ for col in metadata_cols:
263
+ key = col.replace("metadata_", "")
264
+ value = df.loc[idx, col]
265
+ if pd.notna(value):
266
+ metadata[key] = value
267
+ metadatas.append(metadata)
268
+
269
+ # Extract embeddings if present
270
+ embeddings = []
271
+ if "embedding" in df.columns:
272
+ embeddings = df["embedding"].tolist()
273
+
274
+ result = {
275
+ "ids": ids,
276
+ "documents": documents,
277
+ "metadatas": metadatas,
278
+ }
279
+
280
+ if embeddings:
281
+ result["embeddings"] = embeddings
282
+
283
+ return result
284
+
285
+ except Exception as e:
286
+ print(f"Import from Parquet failed: {e}")
287
+ return None
@@ -0,0 +1,60 @@
1
+ """Service for persisting application settings."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Dict, Any, Optional
6
+
7
+
8
+ class SettingsService:
9
+ """Handles loading and saving application settings."""
10
+
11
+ def __init__(self):
12
+ """Initialize settings service."""
13
+ self.settings_dir = Path.home() / ".vector-viewer"
14
+ self.settings_file = self.settings_dir / "settings.json"
15
+ self.settings: Dict[str, Any] = {}
16
+ self._load_settings()
17
+
18
+ def _load_settings(self):
19
+ """Load settings from file."""
20
+ try:
21
+ if self.settings_file.exists():
22
+ with open(self.settings_file, 'r', encoding='utf-8') as f:
23
+ self.settings = json.load(f)
24
+ except Exception as e:
25
+ print(f"Failed to load settings: {e}")
26
+ self.settings = {}
27
+
28
+ def _save_settings(self):
29
+ """Save settings to file."""
30
+ try:
31
+ # Create settings directory if it doesn't exist
32
+ self.settings_dir.mkdir(parents=True, exist_ok=True)
33
+
34
+ with open(self.settings_file, 'w', encoding='utf-8') as f:
35
+ json.dump(self.settings, f, indent=2, ensure_ascii=False)
36
+ except Exception as e:
37
+ print(f"Failed to save settings: {e}")
38
+
39
+ def get_last_connection(self) -> Optional[Dict[str, Any]]:
40
+ """Get the last connection configuration."""
41
+ return self.settings.get("last_connection")
42
+
43
+ def save_last_connection(self, config: Dict[str, Any]):
44
+ """Save the last connection configuration."""
45
+ self.settings["last_connection"] = config
46
+ self._save_settings()
47
+
48
+ def get(self, key: str, default: Any = None) -> Any:
49
+ """Get a setting value."""
50
+ return self.settings.get(key, default)
51
+
52
+ def set(self, key: str, value: Any):
53
+ """Set a setting value."""
54
+ self.settings[key] = value
55
+ self._save_settings()
56
+
57
+ def clear(self):
58
+ """Clear all settings."""
59
+ self.settings = {}
60
+ self._save_settings()
@@ -0,0 +1,116 @@
1
+ """Visualization service for dimensionality reduction."""
2
+
3
+ from typing import Optional, List, Tuple
4
+ import numpy as np
5
+ from sklearn.decomposition import PCA
6
+ from sklearn.manifold import TSNE
7
+ import umap
8
+
9
+
10
+ class VisualizationService:
11
+ """Service for vector dimensionality reduction and visualization."""
12
+
13
+ @staticmethod
14
+ def reduce_dimensions(
15
+ embeddings: List[List[float]],
16
+ method: str = "pca",
17
+ n_components: int = 2,
18
+ **kwargs
19
+ ) -> Optional[np.ndarray]:
20
+ """
21
+ Reduce dimensionality of embeddings.
22
+
23
+ Args:
24
+ embeddings: List of embedding vectors
25
+ method: Reduction method ('pca', 'tsne', or 'umap')
26
+ n_components: Target number of dimensions (2 or 3)
27
+ **kwargs: Additional method-specific parameters
28
+
29
+ Returns:
30
+ Reduced embeddings as numpy array, or None if failed
31
+ """
32
+ if embeddings is None or len(embeddings) == 0:
33
+ return None
34
+
35
+ try:
36
+ X = np.array(embeddings)
37
+
38
+ if method == "pca":
39
+ reducer = PCA(n_components=n_components)
40
+ reduced = reducer.fit_transform(X)
41
+
42
+ elif method == "tsne":
43
+ perplexity = kwargs.get("perplexity", min(30, len(embeddings) - 1))
44
+ reducer = TSNE(
45
+ n_components=n_components,
46
+ perplexity=perplexity,
47
+ random_state=42
48
+ )
49
+ reduced = reducer.fit_transform(X)
50
+
51
+ elif method == "umap":
52
+ n_neighbors = kwargs.get("n_neighbors", min(15, len(embeddings) - 1))
53
+ reducer = umap.UMAP(
54
+ n_components=n_components,
55
+ n_neighbors=n_neighbors,
56
+ random_state=42
57
+ )
58
+ reduced = reducer.fit_transform(X)
59
+
60
+ else:
61
+ print(f"Unknown method: {method}")
62
+ return None
63
+
64
+ return reduced
65
+
66
+ except Exception as e:
67
+ print(f"Dimensionality reduction failed: {e}")
68
+ return None
69
+
70
+ @staticmethod
71
+ def prepare_plot_data(
72
+ reduced_embeddings: np.ndarray,
73
+ labels: Optional[List[str]] = None,
74
+ metadata: Optional[List[dict]] = None,
75
+ color_by: Optional[str] = None
76
+ ) -> Tuple[np.ndarray, List[str], List[str]]:
77
+ """
78
+ Prepare data for plotting.
79
+
80
+ Args:
81
+ reduced_embeddings: Reduced dimension embeddings
82
+ labels: Text labels for each point
83
+ metadata: Metadata dictionaries for each point
84
+ color_by: Metadata field to use for coloring
85
+
86
+ Returns:
87
+ Tuple of (embeddings, labels, colors)
88
+ """
89
+ n_points = len(reduced_embeddings)
90
+
91
+ # Prepare labels
92
+ if labels is None:
93
+ labels = [f"Point {i}" for i in range(n_points)]
94
+
95
+ # Prepare colors
96
+ colors = ["blue"] * n_points
97
+ if color_by and metadata:
98
+ unique_values = set()
99
+ values = []
100
+ for meta in metadata:
101
+ value = meta.get(color_by, "unknown")
102
+ values.append(str(value))
103
+ unique_values.add(str(value))
104
+
105
+ # Map values to colors
106
+ color_map = {}
107
+ color_palette = [
108
+ "red", "blue", "green", "orange", "purple",
109
+ "cyan", "magenta", "yellow", "pink", "brown"
110
+ ]
111
+ for i, val in enumerate(sorted(unique_values)):
112
+ color_map[val] = color_palette[i % len(color_palette)]
113
+
114
+ colors = [color_map[val] for val in values]
115
+
116
+ return reduced_embeddings, labels, colors
@@ -0,0 +1 @@
1
+ """User interface components and views."""
@@ -0,0 +1 @@
1
+ """Reusable UI components."""