vector-inspector 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/core/connection_manager.py +55 -49
- vector_inspector/core/connections/base_connection.py +41 -41
- vector_inspector/core/connections/chroma_connection.py +110 -86
- vector_inspector/core/connections/pinecone_connection.py +168 -182
- vector_inspector/core/connections/qdrant_connection.py +109 -126
- vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
- vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
- vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
- vector_inspector/core/connections/template_connection.py +55 -65
- vector_inspector/core/embedding_utils.py +32 -32
- vector_inspector/core/logging.py +27 -0
- vector_inspector/core/model_registry.py +4 -3
- vector_inspector/main.py +6 -2
- vector_inspector/services/backup_helpers.py +63 -0
- vector_inspector/services/backup_restore_service.py +73 -152
- vector_inspector/services/credential_service.py +33 -40
- vector_inspector/services/import_export_service.py +70 -67
- vector_inspector/services/profile_service.py +92 -94
- vector_inspector/services/settings_service.py +68 -48
- vector_inspector/services/visualization_service.py +40 -39
- vector_inspector/ui/components/splash_window.py +57 -0
- vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
- vector_inspector/ui/main_window.py +200 -146
- vector_inspector/ui/views/info_panel.py +208 -127
- vector_inspector/ui/views/metadata_view.py +8 -7
- vector_inspector/ui/views/search_view.py +97 -75
- vector_inspector/ui/views/visualization_view.py +140 -97
- vector_inspector/utils/version.py +5 -0
- {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/METADATA +9 -2
- {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/RECORD +32 -25
- {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/WHEEL +0 -0
- {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/entry_points.txt +0 -0
|
@@ -6,20 +6,21 @@ from typing import Dict, Any, List, Optional
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import pandas as pd
|
|
8
8
|
import numpy as np
|
|
9
|
+
from vector_inspector.core.logging import log_error
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class ImportExportService:
|
|
12
13
|
"""Handles import/export operations for vector database collections."""
|
|
13
|
-
|
|
14
|
+
|
|
14
15
|
@staticmethod
|
|
15
16
|
def export_to_json(data: Dict[str, Any], file_path: str) -> bool:
|
|
16
17
|
"""
|
|
17
18
|
Export collection data to JSON format.
|
|
18
|
-
|
|
19
|
+
|
|
19
20
|
Args:
|
|
20
21
|
data: Collection data dictionary with ids, documents, metadatas, embeddings
|
|
21
22
|
file_path: Path to save JSON file
|
|
22
|
-
|
|
23
|
+
|
|
23
24
|
Returns:
|
|
24
25
|
True if successful, False otherwise
|
|
25
26
|
"""
|
|
@@ -30,7 +31,7 @@ class ImportExportService:
|
|
|
30
31
|
documents = data.get("documents", [])
|
|
31
32
|
metadatas = data.get("metadatas", [])
|
|
32
33
|
embeddings = data.get("embeddings", [])
|
|
33
|
-
|
|
34
|
+
|
|
34
35
|
for i, item_id in enumerate(ids):
|
|
35
36
|
item = {
|
|
36
37
|
"id": item_id,
|
|
@@ -45,26 +46,28 @@ class ImportExportService:
|
|
|
45
46
|
embedding = embedding.tolist()
|
|
46
47
|
item["embedding"] = embedding
|
|
47
48
|
export_data.append(item)
|
|
48
|
-
|
|
49
|
+
|
|
49
50
|
# Write to file
|
|
50
|
-
with open(file_path,
|
|
51
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
51
52
|
json.dump(export_data, f, indent=2, ensure_ascii=False)
|
|
52
|
-
|
|
53
|
+
|
|
53
54
|
return True
|
|
54
55
|
except Exception as e:
|
|
55
|
-
|
|
56
|
+
log_error("Export to JSON failed: %s", e)
|
|
56
57
|
return False
|
|
57
|
-
|
|
58
|
+
|
|
58
59
|
@staticmethod
|
|
59
|
-
def export_to_csv(
|
|
60
|
+
def export_to_csv(
|
|
61
|
+
data: Dict[str, Any], file_path: str, include_embeddings: bool = False
|
|
62
|
+
) -> bool:
|
|
60
63
|
"""
|
|
61
64
|
Export collection data to CSV format.
|
|
62
|
-
|
|
65
|
+
|
|
63
66
|
Args:
|
|
64
67
|
data: Collection data dictionary
|
|
65
68
|
file_path: Path to save CSV file
|
|
66
69
|
include_embeddings: Whether to include embedding vectors
|
|
67
|
-
|
|
70
|
+
|
|
68
71
|
Returns:
|
|
69
72
|
True if successful, False otherwise
|
|
70
73
|
"""
|
|
@@ -73,7 +76,7 @@ class ImportExportService:
|
|
|
73
76
|
documents = data.get("documents", [])
|
|
74
77
|
metadatas = data.get("metadatas", [])
|
|
75
78
|
embeddings = data.get("embeddings", [])
|
|
76
|
-
|
|
79
|
+
|
|
77
80
|
# Prepare rows
|
|
78
81
|
rows = []
|
|
79
82
|
for i, item_id in enumerate(ids):
|
|
@@ -81,12 +84,12 @@ class ImportExportService:
|
|
|
81
84
|
"id": item_id,
|
|
82
85
|
"document": documents[i] if i < len(documents) else "",
|
|
83
86
|
}
|
|
84
|
-
|
|
87
|
+
|
|
85
88
|
# Add metadata fields
|
|
86
89
|
if i < len(metadatas) and metadatas[i]:
|
|
87
90
|
for key, value in metadatas[i].items():
|
|
88
91
|
row[f"metadata_{key}"] = value
|
|
89
|
-
|
|
92
|
+
|
|
90
93
|
# Optionally add embeddings (convert numpy arrays to lists)
|
|
91
94
|
if include_embeddings and len(embeddings) > 0 and i < len(embeddings):
|
|
92
95
|
embedding = embeddings[i]
|
|
@@ -94,27 +97,27 @@ class ImportExportService:
|
|
|
94
97
|
if isinstance(embedding, np.ndarray):
|
|
95
98
|
embedding = embedding.tolist()
|
|
96
99
|
row["embedding"] = json.dumps(embedding)
|
|
97
|
-
|
|
100
|
+
|
|
98
101
|
rows.append(row)
|
|
99
|
-
|
|
102
|
+
|
|
100
103
|
# Convert to DataFrame and save
|
|
101
104
|
df = pd.DataFrame(rows)
|
|
102
|
-
df.to_csv(file_path, index=False, encoding=
|
|
103
|
-
|
|
105
|
+
df.to_csv(file_path, index=False, encoding="utf-8")
|
|
106
|
+
|
|
104
107
|
return True
|
|
105
108
|
except Exception as e:
|
|
106
|
-
|
|
109
|
+
log_error("Export to CSV failed: %s", e)
|
|
107
110
|
return False
|
|
108
|
-
|
|
111
|
+
|
|
109
112
|
@staticmethod
|
|
110
113
|
def export_to_parquet(data: Dict[str, Any], file_path: str) -> bool:
|
|
111
114
|
"""
|
|
112
115
|
Export collection data to Parquet format.
|
|
113
|
-
|
|
116
|
+
|
|
114
117
|
Args:
|
|
115
118
|
data: Collection data dictionary
|
|
116
119
|
file_path: Path to save Parquet file
|
|
117
|
-
|
|
120
|
+
|
|
118
121
|
Returns:
|
|
119
122
|
True if successful, False otherwise
|
|
120
123
|
"""
|
|
@@ -123,18 +126,18 @@ class ImportExportService:
|
|
|
123
126
|
documents = data.get("documents", [])
|
|
124
127
|
metadatas = data.get("metadatas", [])
|
|
125
128
|
embeddings = data.get("embeddings", [])
|
|
126
|
-
|
|
129
|
+
|
|
127
130
|
# Prepare data for DataFrame
|
|
128
131
|
df_data = {
|
|
129
132
|
"id": ids,
|
|
130
133
|
"document": documents if len(documents) > 0 else [None] * len(ids),
|
|
131
134
|
}
|
|
132
|
-
|
|
135
|
+
|
|
133
136
|
# Add metadata fields as separate columns
|
|
134
137
|
if len(metadatas) > 0 and metadatas[0]:
|
|
135
138
|
for key in metadatas[0].keys():
|
|
136
139
|
df_data[f"metadata_{key}"] = [m.get(key) if m else None for m in metadatas]
|
|
137
|
-
|
|
140
|
+
|
|
138
141
|
# Add embeddings as a column (convert numpy arrays to lists for compatibility)
|
|
139
142
|
if len(embeddings) > 0:
|
|
140
143
|
# Convert numpy arrays to lists if needed
|
|
@@ -145,80 +148,80 @@ class ImportExportService:
|
|
|
145
148
|
else:
|
|
146
149
|
embedding_list.append(emb)
|
|
147
150
|
df_data["embedding"] = embedding_list
|
|
148
|
-
|
|
151
|
+
|
|
149
152
|
# Create DataFrame and save
|
|
150
153
|
df = pd.DataFrame(df_data)
|
|
151
|
-
df.to_parquet(file_path, index=False, engine=
|
|
152
|
-
|
|
154
|
+
df.to_parquet(file_path, index=False, engine="pyarrow")
|
|
155
|
+
|
|
153
156
|
return True
|
|
154
157
|
except Exception as e:
|
|
155
|
-
|
|
158
|
+
log_error("Export to Parquet failed: %s", e)
|
|
156
159
|
return False
|
|
157
|
-
|
|
160
|
+
|
|
158
161
|
@staticmethod
|
|
159
162
|
def import_from_json(file_path: str) -> Optional[Dict[str, Any]]:
|
|
160
163
|
"""
|
|
161
164
|
Import collection data from JSON format.
|
|
162
|
-
|
|
165
|
+
|
|
163
166
|
Args:
|
|
164
167
|
file_path: Path to JSON file
|
|
165
|
-
|
|
168
|
+
|
|
166
169
|
Returns:
|
|
167
170
|
Dictionary with ids, documents, metadatas, embeddings or None if failed
|
|
168
171
|
"""
|
|
169
172
|
try:
|
|
170
|
-
with open(file_path,
|
|
173
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
171
174
|
data = json.load(f)
|
|
172
|
-
|
|
175
|
+
|
|
173
176
|
# Parse data
|
|
174
177
|
ids = []
|
|
175
178
|
documents = []
|
|
176
179
|
metadatas = []
|
|
177
180
|
embeddings = []
|
|
178
|
-
|
|
181
|
+
|
|
179
182
|
for item in data:
|
|
180
183
|
ids.append(item.get("id", ""))
|
|
181
184
|
documents.append(item.get("document", ""))
|
|
182
185
|
metadatas.append(item.get("metadata", {}))
|
|
183
186
|
if "embedding" in item:
|
|
184
187
|
embeddings.append(item["embedding"])
|
|
185
|
-
|
|
188
|
+
|
|
186
189
|
result = {
|
|
187
190
|
"ids": ids,
|
|
188
191
|
"documents": documents,
|
|
189
192
|
"metadatas": metadatas,
|
|
190
193
|
}
|
|
191
|
-
|
|
194
|
+
|
|
192
195
|
if len(embeddings) > 0:
|
|
193
196
|
result["embeddings"] = embeddings
|
|
194
|
-
|
|
197
|
+
|
|
195
198
|
return result
|
|
196
|
-
|
|
199
|
+
|
|
197
200
|
except Exception as e:
|
|
198
|
-
|
|
201
|
+
log_error("Import from JSON failed: %s", e)
|
|
199
202
|
return None
|
|
200
|
-
|
|
203
|
+
|
|
201
204
|
@staticmethod
|
|
202
205
|
def import_from_csv(file_path: str) -> Optional[Dict[str, Any]]:
|
|
203
206
|
"""
|
|
204
207
|
Import collection data from CSV format.
|
|
205
|
-
|
|
208
|
+
|
|
206
209
|
Args:
|
|
207
210
|
file_path: Path to CSV file
|
|
208
|
-
|
|
211
|
+
|
|
209
212
|
Returns:
|
|
210
213
|
Dictionary with ids, documents, metadatas, embeddings or None if failed
|
|
211
214
|
"""
|
|
212
215
|
try:
|
|
213
|
-
df = pd.read_csv(file_path, encoding=
|
|
214
|
-
|
|
216
|
+
df = pd.read_csv(file_path, encoding="utf-8")
|
|
217
|
+
|
|
215
218
|
ids = df["id"].tolist()
|
|
216
219
|
documents = df["document"].tolist() if "document" in df.columns else [""] * len(ids)
|
|
217
|
-
|
|
220
|
+
|
|
218
221
|
# Extract metadata columns
|
|
219
222
|
metadata_cols = [col for col in df.columns if col.startswith("metadata_")]
|
|
220
223
|
metadatas = []
|
|
221
|
-
|
|
224
|
+
|
|
222
225
|
for idx in range(len(ids)):
|
|
223
226
|
metadata = {}
|
|
224
227
|
for col in metadata_cols:
|
|
@@ -227,7 +230,7 @@ class ImportExportService:
|
|
|
227
230
|
if pd.notna(value):
|
|
228
231
|
metadata[key] = value
|
|
229
232
|
metadatas.append(metadata)
|
|
230
|
-
|
|
233
|
+
|
|
231
234
|
# Extract embeddings if present
|
|
232
235
|
embeddings = []
|
|
233
236
|
if "embedding" in df.columns:
|
|
@@ -236,43 +239,43 @@ class ImportExportService:
|
|
|
236
239
|
embeddings.append(json.loads(emb_str))
|
|
237
240
|
else:
|
|
238
241
|
embeddings.append([])
|
|
239
|
-
|
|
242
|
+
|
|
240
243
|
result = {
|
|
241
244
|
"ids": ids,
|
|
242
245
|
"documents": documents,
|
|
243
246
|
"metadatas": metadatas,
|
|
244
247
|
}
|
|
245
|
-
|
|
248
|
+
|
|
246
249
|
if len(embeddings) > 0:
|
|
247
250
|
result["embeddings"] = embeddings
|
|
248
|
-
|
|
251
|
+
|
|
249
252
|
return result
|
|
250
|
-
|
|
253
|
+
|
|
251
254
|
except Exception as e:
|
|
252
|
-
|
|
255
|
+
log_error("Import from CSV failed: %s", e)
|
|
253
256
|
return None
|
|
254
|
-
|
|
257
|
+
|
|
255
258
|
@staticmethod
|
|
256
259
|
def import_from_parquet(file_path: str) -> Optional[Dict[str, Any]]:
|
|
257
260
|
"""
|
|
258
261
|
Import collection data from Parquet format.
|
|
259
|
-
|
|
262
|
+
|
|
260
263
|
Args:
|
|
261
264
|
file_path: Path to Parquet file
|
|
262
|
-
|
|
265
|
+
|
|
263
266
|
Returns:
|
|
264
267
|
Dictionary with ids, documents, metadatas, embeddings or None if failed
|
|
265
268
|
"""
|
|
266
269
|
try:
|
|
267
|
-
df = pd.read_parquet(file_path, engine=
|
|
268
|
-
|
|
270
|
+
df = pd.read_parquet(file_path, engine="pyarrow")
|
|
271
|
+
|
|
269
272
|
ids = df["id"].tolist()
|
|
270
273
|
documents = df["document"].tolist() if "document" in df.columns else [""] * len(ids)
|
|
271
|
-
|
|
274
|
+
|
|
272
275
|
# Extract metadata columns
|
|
273
276
|
metadata_cols = [col for col in df.columns if col.startswith("metadata_")]
|
|
274
277
|
metadatas = []
|
|
275
|
-
|
|
278
|
+
|
|
276
279
|
for idx in range(len(ids)):
|
|
277
280
|
metadata = {}
|
|
278
281
|
for col in metadata_cols:
|
|
@@ -281,23 +284,23 @@ class ImportExportService:
|
|
|
281
284
|
if pd.notna(value):
|
|
282
285
|
metadata[key] = value
|
|
283
286
|
metadatas.append(metadata)
|
|
284
|
-
|
|
287
|
+
|
|
285
288
|
# Extract embeddings if present
|
|
286
289
|
embeddings = []
|
|
287
290
|
if "embedding" in df.columns:
|
|
288
291
|
embeddings = df["embedding"].tolist()
|
|
289
|
-
|
|
292
|
+
|
|
290
293
|
result = {
|
|
291
294
|
"ids": ids,
|
|
292
295
|
"documents": documents,
|
|
293
296
|
"metadatas": metadatas,
|
|
294
297
|
}
|
|
295
|
-
|
|
298
|
+
|
|
296
299
|
if len(embeddings) > 0:
|
|
297
300
|
result["embeddings"] = embeddings
|
|
298
|
-
|
|
301
|
+
|
|
299
302
|
return result
|
|
300
|
-
|
|
303
|
+
|
|
301
304
|
except Exception as e:
|
|
302
|
-
|
|
305
|
+
log_error("Import from Parquet failed: %s", e)
|
|
303
306
|
return None
|