vector-inspector 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/__init__.py +3 -0
- vector_inspector/__main__.py +4 -0
- vector_inspector/core/__init__.py +1 -0
- vector_inspector/core/connections/__init__.py +7 -0
- vector_inspector/core/connections/base_connection.py +233 -0
- vector_inspector/core/connections/chroma_connection.py +384 -0
- vector_inspector/core/connections/qdrant_connection.py +723 -0
- vector_inspector/core/connections/template_connection.py +346 -0
- vector_inspector/main.py +21 -0
- vector_inspector/services/__init__.py +1 -0
- vector_inspector/services/backup_restore_service.py +286 -0
- vector_inspector/services/filter_service.py +72 -0
- vector_inspector/services/import_export_service.py +287 -0
- vector_inspector/services/settings_service.py +60 -0
- vector_inspector/services/visualization_service.py +116 -0
- vector_inspector/ui/__init__.py +1 -0
- vector_inspector/ui/components/__init__.py +1 -0
- vector_inspector/ui/components/backup_restore_dialog.py +350 -0
- vector_inspector/ui/components/filter_builder.py +370 -0
- vector_inspector/ui/components/item_dialog.py +118 -0
- vector_inspector/ui/components/loading_dialog.py +30 -0
- vector_inspector/ui/main_window.py +288 -0
- vector_inspector/ui/views/__init__.py +1 -0
- vector_inspector/ui/views/collection_browser.py +112 -0
- vector_inspector/ui/views/connection_view.py +423 -0
- vector_inspector/ui/views/metadata_view.py +555 -0
- vector_inspector/ui/views/search_view.py +268 -0
- vector_inspector/ui/views/visualization_view.py +245 -0
- vector_inspector-0.2.0.dist-info/METADATA +382 -0
- vector_inspector-0.2.0.dist-info/RECORD +32 -0
- vector_inspector-0.2.0.dist-info/WHEEL +4 -0
- vector_inspector-0.2.0.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Service for applying client-side filters to data."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, Any, List
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def apply_client_side_filters(data: Dict[str, Any], filters: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
7
|
+
"""
|
|
8
|
+
Apply client-side filters to fetched data.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
data: Data dictionary with ids, documents, metadatas, etc.
|
|
12
|
+
filters: List of client-side filter dictionaries
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Filtered data dictionary
|
|
16
|
+
"""
|
|
17
|
+
if not filters:
|
|
18
|
+
return data
|
|
19
|
+
|
|
20
|
+
ids = data.get("ids", [])
|
|
21
|
+
documents = data.get("documents", [])
|
|
22
|
+
metadatas = data.get("metadatas", [])
|
|
23
|
+
embeddings = data.get("embeddings", [])
|
|
24
|
+
|
|
25
|
+
# Track which indices to keep
|
|
26
|
+
keep_indices = []
|
|
27
|
+
|
|
28
|
+
for i in range(len(ids)):
|
|
29
|
+
# Check if this item passes all client-side filters
|
|
30
|
+
passes = True
|
|
31
|
+
|
|
32
|
+
for filt in filters:
|
|
33
|
+
field = filt.get("field", "")
|
|
34
|
+
op = filt.get("op", "")
|
|
35
|
+
value = filt.get("value", "")
|
|
36
|
+
|
|
37
|
+
# Special handling for document field
|
|
38
|
+
if field.lower() == "document":
|
|
39
|
+
item_value = documents[i] if i < len(documents) else ""
|
|
40
|
+
else:
|
|
41
|
+
# Get from metadata
|
|
42
|
+
metadata = metadatas[i] if i < len(metadatas) else {}
|
|
43
|
+
item_value = metadata.get(field, "")
|
|
44
|
+
|
|
45
|
+
# Convert to string for text operations
|
|
46
|
+
item_value_str = str(item_value).lower()
|
|
47
|
+
search_value = str(value).lower()
|
|
48
|
+
|
|
49
|
+
# Apply operator
|
|
50
|
+
if op == "contains":
|
|
51
|
+
if search_value not in item_value_str:
|
|
52
|
+
passes = False
|
|
53
|
+
break
|
|
54
|
+
elif op == "not_contains":
|
|
55
|
+
if search_value in item_value_str:
|
|
56
|
+
passes = False
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
if passes:
|
|
60
|
+
keep_indices.append(i)
|
|
61
|
+
|
|
62
|
+
# Filter the data
|
|
63
|
+
filtered_data = {
|
|
64
|
+
"ids": [ids[i] for i in keep_indices],
|
|
65
|
+
"documents": [documents[i] for i in keep_indices if i < len(documents)],
|
|
66
|
+
"metadatas": [metadatas[i] for i in keep_indices if i < len(metadatas)],
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if embeddings:
|
|
70
|
+
filtered_data["embeddings"] = [embeddings[i] for i in keep_indices if i < len(embeddings)]
|
|
71
|
+
|
|
72
|
+
return filtered_data
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""Service for importing and exporting collection data."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import csv
|
|
5
|
+
from typing import Dict, Any, List, Optional
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ImportExportService:
|
|
11
|
+
"""Handles import/export operations for vector database collections."""
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def export_to_json(data: Dict[str, Any], file_path: str) -> bool:
|
|
15
|
+
"""
|
|
16
|
+
Export collection data to JSON format.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
data: Collection data dictionary with ids, documents, metadatas, embeddings
|
|
20
|
+
file_path: Path to save JSON file
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
True if successful, False otherwise
|
|
24
|
+
"""
|
|
25
|
+
try:
|
|
26
|
+
# Structure data for export
|
|
27
|
+
export_data = []
|
|
28
|
+
ids = data.get("ids", [])
|
|
29
|
+
documents = data.get("documents", [])
|
|
30
|
+
metadatas = data.get("metadatas", [])
|
|
31
|
+
embeddings = data.get("embeddings", [])
|
|
32
|
+
|
|
33
|
+
for i, item_id in enumerate(ids):
|
|
34
|
+
item = {
|
|
35
|
+
"id": item_id,
|
|
36
|
+
"document": documents[i] if i < len(documents) else None,
|
|
37
|
+
"metadata": metadatas[i] if i < len(metadatas) else {},
|
|
38
|
+
}
|
|
39
|
+
# Optionally include embeddings
|
|
40
|
+
if embeddings and i < len(embeddings):
|
|
41
|
+
item["embedding"] = embeddings[i]
|
|
42
|
+
export_data.append(item)
|
|
43
|
+
|
|
44
|
+
# Write to file
|
|
45
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
46
|
+
json.dump(export_data, f, indent=2, ensure_ascii=False)
|
|
47
|
+
|
|
48
|
+
return True
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print(f"Export to JSON failed: {e}")
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def export_to_csv(data: Dict[str, Any], file_path: str, include_embeddings: bool = False) -> bool:
|
|
55
|
+
"""
|
|
56
|
+
Export collection data to CSV format.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
data: Collection data dictionary
|
|
60
|
+
file_path: Path to save CSV file
|
|
61
|
+
include_embeddings: Whether to include embedding vectors
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
True if successful, False otherwise
|
|
65
|
+
"""
|
|
66
|
+
try:
|
|
67
|
+
ids = data.get("ids", [])
|
|
68
|
+
documents = data.get("documents", [])
|
|
69
|
+
metadatas = data.get("metadatas", [])
|
|
70
|
+
embeddings = data.get("embeddings", [])
|
|
71
|
+
|
|
72
|
+
# Prepare rows
|
|
73
|
+
rows = []
|
|
74
|
+
for i, item_id in enumerate(ids):
|
|
75
|
+
row = {
|
|
76
|
+
"id": item_id,
|
|
77
|
+
"document": documents[i] if i < len(documents) else "",
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# Add metadata fields
|
|
81
|
+
if i < len(metadatas) and metadatas[i]:
|
|
82
|
+
for key, value in metadatas[i].items():
|
|
83
|
+
row[f"metadata_{key}"] = value
|
|
84
|
+
|
|
85
|
+
# Optionally add embeddings
|
|
86
|
+
if include_embeddings and embeddings and i < len(embeddings):
|
|
87
|
+
row["embedding"] = json.dumps(embeddings[i])
|
|
88
|
+
|
|
89
|
+
rows.append(row)
|
|
90
|
+
|
|
91
|
+
# Convert to DataFrame and save
|
|
92
|
+
df = pd.DataFrame(rows)
|
|
93
|
+
df.to_csv(file_path, index=False, encoding='utf-8')
|
|
94
|
+
|
|
95
|
+
return True
|
|
96
|
+
except Exception as e:
|
|
97
|
+
print(f"Export to CSV failed: {e}")
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def export_to_parquet(data: Dict[str, Any], file_path: str) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
Export collection data to Parquet format.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
data: Collection data dictionary
|
|
107
|
+
file_path: Path to save Parquet file
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
True if successful, False otherwise
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
ids = data.get("ids", [])
|
|
114
|
+
documents = data.get("documents", [])
|
|
115
|
+
metadatas = data.get("metadatas", [])
|
|
116
|
+
embeddings = data.get("embeddings", [])
|
|
117
|
+
|
|
118
|
+
# Prepare data for DataFrame
|
|
119
|
+
df_data = {
|
|
120
|
+
"id": ids,
|
|
121
|
+
"document": documents if documents else [None] * len(ids),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Add metadata fields as separate columns
|
|
125
|
+
if metadatas and metadatas[0]:
|
|
126
|
+
for key in metadatas[0].keys():
|
|
127
|
+
df_data[f"metadata_{key}"] = [m.get(key) if m else None for m in metadatas]
|
|
128
|
+
|
|
129
|
+
# Add embeddings as a column
|
|
130
|
+
if embeddings:
|
|
131
|
+
df_data["embedding"] = embeddings
|
|
132
|
+
|
|
133
|
+
# Create DataFrame and save
|
|
134
|
+
df = pd.DataFrame(df_data)
|
|
135
|
+
df.to_parquet(file_path, index=False, engine='pyarrow')
|
|
136
|
+
|
|
137
|
+
return True
|
|
138
|
+
except Exception as e:
|
|
139
|
+
print(f"Export to Parquet failed: {e}")
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def import_from_json(file_path: str) -> Optional[Dict[str, Any]]:
|
|
144
|
+
"""
|
|
145
|
+
Import collection data from JSON format.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
file_path: Path to JSON file
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Dictionary with ids, documents, metadatas, embeddings or None if failed
|
|
152
|
+
"""
|
|
153
|
+
try:
|
|
154
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
155
|
+
data = json.load(f)
|
|
156
|
+
|
|
157
|
+
# Parse data
|
|
158
|
+
ids = []
|
|
159
|
+
documents = []
|
|
160
|
+
metadatas = []
|
|
161
|
+
embeddings = []
|
|
162
|
+
|
|
163
|
+
for item in data:
|
|
164
|
+
ids.append(item.get("id", ""))
|
|
165
|
+
documents.append(item.get("document", ""))
|
|
166
|
+
metadatas.append(item.get("metadata", {}))
|
|
167
|
+
if "embedding" in item:
|
|
168
|
+
embeddings.append(item["embedding"])
|
|
169
|
+
|
|
170
|
+
result = {
|
|
171
|
+
"ids": ids,
|
|
172
|
+
"documents": documents,
|
|
173
|
+
"metadatas": metadatas,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if embeddings:
|
|
177
|
+
result["embeddings"] = embeddings
|
|
178
|
+
|
|
179
|
+
return result
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
print(f"Import from JSON failed: {e}")
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def import_from_csv(file_path: str) -> Optional[Dict[str, Any]]:
|
|
187
|
+
"""
|
|
188
|
+
Import collection data from CSV format.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
file_path: Path to CSV file
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Dictionary with ids, documents, metadatas, embeddings or None if failed
|
|
195
|
+
"""
|
|
196
|
+
try:
|
|
197
|
+
df = pd.read_csv(file_path, encoding='utf-8')
|
|
198
|
+
|
|
199
|
+
ids = df["id"].tolist()
|
|
200
|
+
documents = df["document"].tolist() if "document" in df.columns else [""] * len(ids)
|
|
201
|
+
|
|
202
|
+
# Extract metadata columns
|
|
203
|
+
metadata_cols = [col for col in df.columns if col.startswith("metadata_")]
|
|
204
|
+
metadatas = []
|
|
205
|
+
|
|
206
|
+
for idx in range(len(ids)):
|
|
207
|
+
metadata = {}
|
|
208
|
+
for col in metadata_cols:
|
|
209
|
+
key = col.replace("metadata_", "")
|
|
210
|
+
value = df.loc[idx, col]
|
|
211
|
+
if pd.notna(value):
|
|
212
|
+
metadata[key] = value
|
|
213
|
+
metadatas.append(metadata)
|
|
214
|
+
|
|
215
|
+
# Extract embeddings if present
|
|
216
|
+
embeddings = []
|
|
217
|
+
if "embedding" in df.columns:
|
|
218
|
+
for emb_str in df["embedding"]:
|
|
219
|
+
if pd.notna(emb_str):
|
|
220
|
+
embeddings.append(json.loads(emb_str))
|
|
221
|
+
else:
|
|
222
|
+
embeddings.append([])
|
|
223
|
+
|
|
224
|
+
result = {
|
|
225
|
+
"ids": ids,
|
|
226
|
+
"documents": documents,
|
|
227
|
+
"metadatas": metadatas,
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if embeddings:
|
|
231
|
+
result["embeddings"] = embeddings
|
|
232
|
+
|
|
233
|
+
return result
|
|
234
|
+
|
|
235
|
+
except Exception as e:
|
|
236
|
+
print(f"Import from CSV failed: {e}")
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def import_from_parquet(file_path: str) -> Optional[Dict[str, Any]]:
|
|
241
|
+
"""
|
|
242
|
+
Import collection data from Parquet format.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
file_path: Path to Parquet file
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Dictionary with ids, documents, metadatas, embeddings or None if failed
|
|
249
|
+
"""
|
|
250
|
+
try:
|
|
251
|
+
df = pd.read_parquet(file_path, engine='pyarrow')
|
|
252
|
+
|
|
253
|
+
ids = df["id"].tolist()
|
|
254
|
+
documents = df["document"].tolist() if "document" in df.columns else [""] * len(ids)
|
|
255
|
+
|
|
256
|
+
# Extract metadata columns
|
|
257
|
+
metadata_cols = [col for col in df.columns if col.startswith("metadata_")]
|
|
258
|
+
metadatas = []
|
|
259
|
+
|
|
260
|
+
for idx in range(len(ids)):
|
|
261
|
+
metadata = {}
|
|
262
|
+
for col in metadata_cols:
|
|
263
|
+
key = col.replace("metadata_", "")
|
|
264
|
+
value = df.loc[idx, col]
|
|
265
|
+
if pd.notna(value):
|
|
266
|
+
metadata[key] = value
|
|
267
|
+
metadatas.append(metadata)
|
|
268
|
+
|
|
269
|
+
# Extract embeddings if present
|
|
270
|
+
embeddings = []
|
|
271
|
+
if "embedding" in df.columns:
|
|
272
|
+
embeddings = df["embedding"].tolist()
|
|
273
|
+
|
|
274
|
+
result = {
|
|
275
|
+
"ids": ids,
|
|
276
|
+
"documents": documents,
|
|
277
|
+
"metadatas": metadatas,
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if embeddings:
|
|
281
|
+
result["embeddings"] = embeddings
|
|
282
|
+
|
|
283
|
+
return result
|
|
284
|
+
|
|
285
|
+
except Exception as e:
|
|
286
|
+
print(f"Import from Parquet failed: {e}")
|
|
287
|
+
return None
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Service for persisting application settings."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, Any, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SettingsService:
|
|
9
|
+
"""Handles loading and saving application settings."""
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
"""Initialize settings service."""
|
|
13
|
+
self.settings_dir = Path.home() / ".vector-viewer"
|
|
14
|
+
self.settings_file = self.settings_dir / "settings.json"
|
|
15
|
+
self.settings: Dict[str, Any] = {}
|
|
16
|
+
self._load_settings()
|
|
17
|
+
|
|
18
|
+
def _load_settings(self):
|
|
19
|
+
"""Load settings from file."""
|
|
20
|
+
try:
|
|
21
|
+
if self.settings_file.exists():
|
|
22
|
+
with open(self.settings_file, 'r', encoding='utf-8') as f:
|
|
23
|
+
self.settings = json.load(f)
|
|
24
|
+
except Exception as e:
|
|
25
|
+
print(f"Failed to load settings: {e}")
|
|
26
|
+
self.settings = {}
|
|
27
|
+
|
|
28
|
+
def _save_settings(self):
|
|
29
|
+
"""Save settings to file."""
|
|
30
|
+
try:
|
|
31
|
+
# Create settings directory if it doesn't exist
|
|
32
|
+
self.settings_dir.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
|
|
34
|
+
with open(self.settings_file, 'w', encoding='utf-8') as f:
|
|
35
|
+
json.dump(self.settings, f, indent=2, ensure_ascii=False)
|
|
36
|
+
except Exception as e:
|
|
37
|
+
print(f"Failed to save settings: {e}")
|
|
38
|
+
|
|
39
|
+
def get_last_connection(self) -> Optional[Dict[str, Any]]:
|
|
40
|
+
"""Get the last connection configuration."""
|
|
41
|
+
return self.settings.get("last_connection")
|
|
42
|
+
|
|
43
|
+
def save_last_connection(self, config: Dict[str, Any]):
|
|
44
|
+
"""Save the last connection configuration."""
|
|
45
|
+
self.settings["last_connection"] = config
|
|
46
|
+
self._save_settings()
|
|
47
|
+
|
|
48
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
49
|
+
"""Get a setting value."""
|
|
50
|
+
return self.settings.get(key, default)
|
|
51
|
+
|
|
52
|
+
def set(self, key: str, value: Any):
|
|
53
|
+
"""Set a setting value."""
|
|
54
|
+
self.settings[key] = value
|
|
55
|
+
self._save_settings()
|
|
56
|
+
|
|
57
|
+
def clear(self):
|
|
58
|
+
"""Clear all settings."""
|
|
59
|
+
self.settings = {}
|
|
60
|
+
self._save_settings()
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Visualization service for dimensionality reduction."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, List, Tuple
|
|
4
|
+
import numpy as np
|
|
5
|
+
from sklearn.decomposition import PCA
|
|
6
|
+
from sklearn.manifold import TSNE
|
|
7
|
+
import umap
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class VisualizationService:
|
|
11
|
+
"""Service for vector dimensionality reduction and visualization."""
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def reduce_dimensions(
|
|
15
|
+
embeddings: List[List[float]],
|
|
16
|
+
method: str = "pca",
|
|
17
|
+
n_components: int = 2,
|
|
18
|
+
**kwargs
|
|
19
|
+
) -> Optional[np.ndarray]:
|
|
20
|
+
"""
|
|
21
|
+
Reduce dimensionality of embeddings.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
embeddings: List of embedding vectors
|
|
25
|
+
method: Reduction method ('pca', 'tsne', or 'umap')
|
|
26
|
+
n_components: Target number of dimensions (2 or 3)
|
|
27
|
+
**kwargs: Additional method-specific parameters
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Reduced embeddings as numpy array, or None if failed
|
|
31
|
+
"""
|
|
32
|
+
if embeddings is None or len(embeddings) == 0:
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
X = np.array(embeddings)
|
|
37
|
+
|
|
38
|
+
if method == "pca":
|
|
39
|
+
reducer = PCA(n_components=n_components)
|
|
40
|
+
reduced = reducer.fit_transform(X)
|
|
41
|
+
|
|
42
|
+
elif method == "tsne":
|
|
43
|
+
perplexity = kwargs.get("perplexity", min(30, len(embeddings) - 1))
|
|
44
|
+
reducer = TSNE(
|
|
45
|
+
n_components=n_components,
|
|
46
|
+
perplexity=perplexity,
|
|
47
|
+
random_state=42
|
|
48
|
+
)
|
|
49
|
+
reduced = reducer.fit_transform(X)
|
|
50
|
+
|
|
51
|
+
elif method == "umap":
|
|
52
|
+
n_neighbors = kwargs.get("n_neighbors", min(15, len(embeddings) - 1))
|
|
53
|
+
reducer = umap.UMAP(
|
|
54
|
+
n_components=n_components,
|
|
55
|
+
n_neighbors=n_neighbors,
|
|
56
|
+
random_state=42
|
|
57
|
+
)
|
|
58
|
+
reduced = reducer.fit_transform(X)
|
|
59
|
+
|
|
60
|
+
else:
|
|
61
|
+
print(f"Unknown method: {method}")
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
return reduced
|
|
65
|
+
|
|
66
|
+
except Exception as e:
|
|
67
|
+
print(f"Dimensionality reduction failed: {e}")
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def prepare_plot_data(
|
|
72
|
+
reduced_embeddings: np.ndarray,
|
|
73
|
+
labels: Optional[List[str]] = None,
|
|
74
|
+
metadata: Optional[List[dict]] = None,
|
|
75
|
+
color_by: Optional[str] = None
|
|
76
|
+
) -> Tuple[np.ndarray, List[str], List[str]]:
|
|
77
|
+
"""
|
|
78
|
+
Prepare data for plotting.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
reduced_embeddings: Reduced dimension embeddings
|
|
82
|
+
labels: Text labels for each point
|
|
83
|
+
metadata: Metadata dictionaries for each point
|
|
84
|
+
color_by: Metadata field to use for coloring
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Tuple of (embeddings, labels, colors)
|
|
88
|
+
"""
|
|
89
|
+
n_points = len(reduced_embeddings)
|
|
90
|
+
|
|
91
|
+
# Prepare labels
|
|
92
|
+
if labels is None:
|
|
93
|
+
labels = [f"Point {i}" for i in range(n_points)]
|
|
94
|
+
|
|
95
|
+
# Prepare colors
|
|
96
|
+
colors = ["blue"] * n_points
|
|
97
|
+
if color_by and metadata:
|
|
98
|
+
unique_values = set()
|
|
99
|
+
values = []
|
|
100
|
+
for meta in metadata:
|
|
101
|
+
value = meta.get(color_by, "unknown")
|
|
102
|
+
values.append(str(value))
|
|
103
|
+
unique_values.add(str(value))
|
|
104
|
+
|
|
105
|
+
# Map values to colors
|
|
106
|
+
color_map = {}
|
|
107
|
+
color_palette = [
|
|
108
|
+
"red", "blue", "green", "orange", "purple",
|
|
109
|
+
"cyan", "magenta", "yellow", "pink", "brown"
|
|
110
|
+
]
|
|
111
|
+
for i, val in enumerate(sorted(unique_values)):
|
|
112
|
+
color_map[val] = color_palette[i % len(color_palette)]
|
|
113
|
+
|
|
114
|
+
colors = [color_map[val] for val in values]
|
|
115
|
+
|
|
116
|
+
return reduced_embeddings, labels, colors
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""User interface components and views."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Reusable UI components."""
|