vector-inspector 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/PKG-INFO +34 -167
- vector_inspector-0.2.2/README.md +224 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/pyproject.toml +7 -1
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/main_window.py +1 -1
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/tests/test_connections.py +60 -60
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/tests/test_filter_service.py +101 -101
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/tests/test_settings_service.py +101 -101
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/tests/vector_inspector.py +35 -35
- vector_inspector-0.2.0/README.md +0 -361
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/__init__.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/__main__.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/core/__init__.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/core/connections/__init__.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/core/connections/base_connection.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/core/connections/chroma_connection.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/core/connections/qdrant_connection.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/core/connections/template_connection.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/main.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/services/__init__.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/services/backup_restore_service.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/services/filter_service.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/services/import_export_service.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/services/settings_service.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/services/visualization_service.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/__init__.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/components/__init__.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/components/backup_restore_dialog.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/components/filter_builder.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/components/item_dialog.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/components/loading_dialog.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/views/__init__.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/views/collection_browser.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/views/connection_view.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/views/metadata_view.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/views/search_view.py +0 -0
- {vector_inspector-0.2.0 → vector_inspector-0.2.2}/src/vector_inspector/ui/views/visualization_view.py +0 -0
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vector-inspector
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: A comprehensive desktop application for visualizing, querying, and managing vector database data
|
|
5
5
|
Author-Email: Anthony Dawson <anthonypdawson+github@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/anthony-dawson/vector-inspector
|
|
8
|
+
Project-URL: Source, https://github.com/anthony-dawson/vector-inspector
|
|
9
|
+
Project-URL: Issues, https://github.com/anthony-dawson/vector-inspector/issues
|
|
10
|
+
Project-URL: Documentation, https://github.com/anthony-dawson/vector-inspector#readme
|
|
7
11
|
Requires-Python: ==3.12.*
|
|
8
12
|
Requires-Dist: chromadb>=0.4.22
|
|
9
13
|
Requires-Dist: qdrant-client>=1.7.0
|
|
@@ -26,24 +30,23 @@ A comprehensive desktop application for visualizing, querying, and managing vect
|
|
|
26
30
|
|
|
27
31
|
## Overview
|
|
28
32
|
|
|
33
|
+
Vector Inspector bridges the gap between vector databases and user-friendly data exploration tools. While vector databases are powerful for semantic search and AI applications, they often lack the intuitive inspection and management tools that traditional SQL databases have. This project aims to provide that missing layer.
|
|
34
|
+
|
|
29
35
|
## Table of Contents
|
|
30
36
|
|
|
31
37
|
- [Overview](#overview)
|
|
32
38
|
- [Key Features](#key-features)
|
|
33
39
|
- [Architecture](#architecture)
|
|
34
|
-
- [Application Structure](#application-structure)
|
|
35
40
|
- [Use Cases](#use-cases)
|
|
36
|
-
- [Feature Access
|
|
37
|
-
- [
|
|
38
|
-
- [Installation
|
|
41
|
+
- [Feature Access](#feature-access)
|
|
42
|
+
- [Roadmap](#roadmap)
|
|
43
|
+
- [Installation](#installation)
|
|
39
44
|
- [Configuration](#configuration)
|
|
40
45
|
- [Development Setup](#development-setup)
|
|
41
46
|
- [Contributing](#contributing)
|
|
42
47
|
- [License](#license)
|
|
43
48
|
- [Acknowledgments](#acknowledgments)
|
|
44
49
|
|
|
45
|
-
Vector Inspector bridges the gap between vector databases and user-friendly data exploration tools. While vector databases are powerful for semantic search and AI applications, they often lack the intuitive inspection and management tools that traditional SQL databases have. This project aims to provide that missing layer.
|
|
46
|
-
|
|
47
50
|
## Key Features
|
|
48
51
|
|
|
49
52
|
### 1. **Multi-Provider Support**
|
|
@@ -119,58 +122,9 @@ Vector Inspector bridges the gap between vector databases and user-friendly data
|
|
|
119
122
|
|
|
120
123
|
## Architecture
|
|
121
124
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
#### Frontend (GUI)
|
|
125
|
-
- **Framework**: PySide6 (Qt for Python) - native desktop application
|
|
126
|
-
- **UI Components**: Qt Widgets for forms, dialogs, and application structure
|
|
127
|
-
- **Visualization**:
|
|
128
|
-
- Plotly for interactive charts (embedded via QWebEngineView)
|
|
129
|
-
- matplotlib for static visualizations
|
|
130
|
-
- **Data Grid**: QTableView with custom models for high-performance data display
|
|
131
|
-
|
|
132
|
-
#### Backend
|
|
133
|
-
- **Language**: Python 3.12
|
|
134
|
-
- **Core Libraries**:
|
|
135
|
-
- Vector DB clients: `chromadb`, `qdrant-client` (implemented), `pinecone-client`, `weaviate-client`, `pymilvus` (planned)
|
|
136
|
-
- Embeddings: `sentence-transformers`, `fastembed` (implemented), `openai`, `cohere` (planned)
|
|
137
|
-
- Data processing: `pandas`, `numpy`
|
|
138
|
-
- Dimensionality reduction: `scikit-learn`, `umap-learn`
|
|
139
|
-
- **API Layer**: FastAPI (planned for programmatic access) or direct Python integration
|
|
140
|
-
|
|
141
|
-
#### Data Layer
|
|
142
|
-
- **Connection Management**: Provider-specific connection classes with unified interface
|
|
143
|
-
- **Query Abstraction**: Base connection interface that each provider implements
|
|
144
|
-
- **Storage Modes**:
|
|
145
|
-
- ChromaDB: Persistent local storage
|
|
146
|
-
- Qdrant Remote: Connect via host/port (e.g., localhost:6333)
|
|
147
|
-
- Qdrant Embedded: Local path storage without separate server
|
|
148
|
-
- **Caching**: Redis or in-memory cache for frequently accessed data (planned)
|
|
149
|
-
- **Settings Persistence**: User settings saved to ~/.vector-viewer/settings.json
|
|
150
|
-
|
|
151
|
-
### Application Structure
|
|
125
|
+
Vector Inspector is built with PySide6 (Qt for Python) for the GUI, providing a native desktop experience. The backend uses Python with support for multiple vector database providers through a unified interface.
|
|
152
126
|
|
|
153
|
-
|
|
154
|
-
vector-viewer/
|
|
155
|
-
├── src/
|
|
156
|
-
│ └── vector_viewer/
|
|
157
|
-
│ ├── core/
|
|
158
|
-
│ │ └── connections/ # Connection managers for each provider
|
|
159
|
-
│ ├── ui/
|
|
160
|
-
│ │ ├── components/ # Reusable UI components
|
|
161
|
-
│ │ └── views/ # Main application views
|
|
162
|
-
│ ├── services/ # Business logic services
|
|
163
|
-
│ └── main.py # Application entry point
|
|
164
|
-
├── tests/
|
|
165
|
-
├── docs/
|
|
166
|
-
├── data/ # Local database storage
|
|
167
|
-
│ ├── chroma_db/
|
|
168
|
-
│ └── qdrant/
|
|
169
|
-
├── run.sh / run.bat # Launch scripts
|
|
170
|
-
└── pyproject.toml
|
|
171
|
-
```
|
|
172
|
-
|
|
173
|
-
User settings are saved to `~/.vector-viewer/settings.json`
|
|
127
|
+
For detailed architecture information, see [docs/architecture.md](docs/architecture.md).
|
|
174
128
|
|
|
175
129
|
## Use Cases
|
|
176
130
|
|
|
@@ -181,120 +135,33 @@ User settings are saved to `~/.vector-viewer/settings.json`
|
|
|
181
135
|
5. **Data Migration**: Transfer data between vector database providers
|
|
182
136
|
6. **Education**: Learn and experiment with vector databases interactively
|
|
183
137
|
|
|
184
|
-
## Feature Access
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
| Import/export (CSV, JSON, Parquet) | Free |
|
|
196
|
-
| Provider abstraction layer | Free |
|
|
197
|
-
| Pinecone support | Free |
|
|
198
|
-
| Weaviate support | Free |
|
|
199
|
-
| Qdrant support (basic/experimental) | Free |
|
|
200
|
-
| Milvus support | Pro |
|
|
201
|
-
| ChromaDB advanced support | Pro |
|
|
202
|
-
| FAISS (local files) support | Pro |
|
|
203
|
-
| pgvector (PostgreSQL extension) support | Pro |
|
|
204
|
-
| Elasticsearch with vector search support | Pro |
|
|
205
|
-
| Advanced query builder | Free |
|
|
206
|
-
| 3D visualization | Free |
|
|
207
|
-
| Embedding model integration (basic) | Free |
|
|
208
|
-
| Query history and saved queries | Free |
|
|
209
|
-
| Model Comparison Mode | Pro |
|
|
210
|
-
| Cluster Explorer | Pro |
|
|
211
|
-
| Embedding Inspector | Pro |
|
|
212
|
-
| Embedding Provenance Graph | Pro |
|
|
213
|
-
| Semantic Drift Timeline | Pro |
|
|
214
|
-
| Cross-Collection Similarity | Pro |
|
|
215
|
-
| Vector Surgery | Pro |
|
|
216
|
-
| Custom plugin system | Pro |
|
|
217
|
-
| Team collaboration features | Pro |
|
|
218
|
-
|
|
219
|
-
> **Note:** Qdrant support is available for free users in the open source version (basic/experimental). Advanced Qdrant features (e.g., payload filtering, geo, cloud auth) may be reserved for Pro in the future.
|
|
220
|
-
|
|
221
|
-
## Planned Roadmap
|
|
222
|
-
|
|
223
|
-
### Phase 1: Foundation (MVP)
|
|
224
|
-
- [x] Connection to ChromaDB
|
|
225
|
-
- [x] Basic metadata browsing and filtering
|
|
226
|
-
- [x] Simple similarity search interface
|
|
227
|
-
- [x] 2D vector visualization (PCA/t-SNE)
|
|
228
|
-
- [x] Basic CRUD operations
|
|
229
|
-
|
|
230
|
-
### Phase 2: Core Features
|
|
231
|
-
- [x] Metadata filtering (advanced filtering, combine with search)
|
|
232
|
-
- [x] Item editing (update metadata and documents)
|
|
233
|
-
- [x] Import/export (CSV, JSON, Parquet, backup/restore)
|
|
234
|
-
- [x] Provider abstraction layer (unified interface for all supported vector DBs)
|
|
235
|
-
- [x] Qdrant support (basic/experimental, free)
|
|
236
|
-
|
|
237
|
-
### Phase 3: UX & Professional Polish
|
|
238
|
-
- [ ] **Unified Information Panel** (new "Info" tab as default view)
|
|
239
|
-
- [ ] Database and collection metadata display
|
|
240
|
-
- [ ] Connection health and version information
|
|
241
|
-
- [ ] Schema visualization and index configuration display
|
|
242
|
-
|
|
243
|
-
### Phase 4: Modular/Plugin System & Hybrid Model
|
|
244
|
-
- [ ] Implement modular/plugin system for feature extensions
|
|
245
|
-
- [ ] Migrate paid/advanced features to commercial modules
|
|
246
|
-
- [ ] Add licensing/access control for commercial features
|
|
247
|
-
|
|
248
|
-
### Phase 5: Provider Expansion (Incremental)
|
|
249
|
-
- [ ] Pinecone support (free)
|
|
250
|
-
- [ ] Weaviate support (free)
|
|
251
|
-
- [ ] Qdrant support (paid)
|
|
252
|
-
|
|
253
|
-
#### Future/Backlog Providers
|
|
254
|
-
- [ ] Milvus support (paid)
|
|
255
|
-
- [ ] ChromaDB advanced support (paid)
|
|
256
|
-
- [ ] FAISS (local files) support (paid)
|
|
257
|
-
- [ ] pgvector (PostgreSQL extension) support (paid)
|
|
258
|
-
- [ ] Elasticsearch with vector search support (paid)
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
### Phase 6A: Advanced Usability & Visualization
|
|
262
|
-
- [ ] Advanced query builder (free)
|
|
263
|
-
- [ ] 3D visualization (free)
|
|
264
|
-
- [ ] Embedding model integration (free)
|
|
265
|
-
- [ ] Query history and saved queries (free)
|
|
266
|
-
- [ ] Metadata Type Detection & Rich Media Preview (free)
|
|
267
|
-
|
|
268
|
-
### Phase 6B: Analytical & Comparison Tools
|
|
269
|
-
- [ ] Model Comparison Mode (paid)
|
|
270
|
-
- [ ] Cluster Explorer (paid)
|
|
271
|
-
- [ ] Embedding Inspector (paid)
|
|
272
|
-
- [ ] Embedding Provenance Graph (paid)
|
|
273
|
-
|
|
274
|
-
### Phase 6C: Temporal & Cross-Collection Analytics
|
|
275
|
-
- [ ] Semantic Drift Timeline (paid)
|
|
276
|
-
- [ ] Cross-Collection Similarity (paid)
|
|
277
|
-
|
|
278
|
-
### Phase 6D: Experimental & Power Features
|
|
279
|
-
- [ ] Vector Surgery (paid)
|
|
280
|
-
- [ ] Custom plugin system (paid)
|
|
281
|
-
- [ ] Team collaboration features (paid)
|
|
282
|
-
|
|
283
|
-
### Phase 7: Enterprise Features
|
|
284
|
-
- [ ] Multi-user support with auth
|
|
285
|
-
- [ ] Audit logging
|
|
286
|
-
- [ ] Advanced security features
|
|
287
|
-
- [ ] Custom reporting
|
|
288
|
-
- [ ] API for programmatic access (FastAPI backend)
|
|
289
|
-
- [ ] Caching layer (Redis/in-memory) for performance
|
|
290
|
-
- [ ] Connection pooling and optimization
|
|
138
|
+
## Feature Access
|
|
139
|
+
|
|
140
|
+
Vector Inspector is available in both free (open source) and Pro versions. The free version includes all core features for ChromaDB and basic Qdrant support, while Pro adds advanced analytics and additional providers.
|
|
141
|
+
|
|
142
|
+
See [FEATURES.md](FEATURES.md) for a complete feature comparison.
|
|
143
|
+
|
|
144
|
+
## Roadmap
|
|
145
|
+
|
|
146
|
+
**Current Status**: ✅ Phase 2 Complete
|
|
147
|
+
|
|
148
|
+
See [ROADMAP.md](ROADMAP.md) for the complete development roadmap and planned features.
|
|
291
149
|
|
|
292
150
|
## Installation
|
|
293
151
|
|
|
152
|
+
### From PyPI (Recommended)
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
pip install vector-inspector
|
|
156
|
+
vector-inspector
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### From Source
|
|
160
|
+
|
|
294
161
|
```bash
|
|
295
162
|
# Clone the repository
|
|
296
|
-
git clone https://github.com/anthonypdawson/vector-
|
|
297
|
-
cd vector-
|
|
163
|
+
git clone https://github.com/anthonypdawson/vector-inspector.git
|
|
164
|
+
cd vector-inspector
|
|
298
165
|
|
|
299
166
|
# Install dependencies using PDM
|
|
300
167
|
pdm install
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# Vector Inspector
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
A comprehensive desktop application for visualizing, querying, and managing vector database data. Similar to SQL database viewers, Vector Inspector provides an intuitive GUI for exploring vector embeddings, metadata, and performing similarity searches across multiple vector database providers.
|
|
5
|
+
|
|
6
|
+
## Overview
|
|
7
|
+
|
|
8
|
+
Vector Inspector bridges the gap between vector databases and user-friendly data exploration tools. While vector databases are powerful for semantic search and AI applications, they often lack the intuitive inspection and management tools that traditional SQL databases have. This project aims to provide that missing layer.
|
|
9
|
+
|
|
10
|
+
## Table of Contents
|
|
11
|
+
|
|
12
|
+
- [Overview](#overview)
|
|
13
|
+
- [Key Features](#key-features)
|
|
14
|
+
- [Architecture](#architecture)
|
|
15
|
+
- [Use Cases](#use-cases)
|
|
16
|
+
- [Feature Access](#feature-access)
|
|
17
|
+
- [Roadmap](#roadmap)
|
|
18
|
+
- [Installation](#installation)
|
|
19
|
+
- [Configuration](#configuration)
|
|
20
|
+
- [Development Setup](#development-setup)
|
|
21
|
+
- [Contributing](#contributing)
|
|
22
|
+
- [License](#license)
|
|
23
|
+
- [Acknowledgments](#acknowledgments)
|
|
24
|
+
|
|
25
|
+
## Key Features
|
|
26
|
+
|
|
27
|
+
### 1. **Multi-Provider Support**
|
|
28
|
+
- Connect to vector databases:
|
|
29
|
+
- ChromaDB (persistent local storage)
|
|
30
|
+
- Qdrant (remote server or embedded local)
|
|
31
|
+
- Unified interface regardless of backend provider
|
|
32
|
+
- Automatically saves last connection configuration
|
|
33
|
+
|
|
34
|
+
### 2. **Data Visualization**
|
|
35
|
+
- **Metadata Explorer**: Browse and filter vector entries by metadata fields
|
|
36
|
+
- **Vector Dimensionality Reduction**: Visualize high-dimensional vectors in 2D/3D using:
|
|
37
|
+
- t-SNE
|
|
38
|
+
- UMAP
|
|
39
|
+
- PCA
|
|
40
|
+
- **Cluster Visualization**: Color-code vectors by metadata categories or clustering results
|
|
41
|
+
- **Interactive Plots**: Zoom, pan, and select vectors for detailed inspection
|
|
42
|
+
- **Data Distribution Charts**: Histograms and statistics for metadata fields
|
|
43
|
+
|
|
44
|
+
### 3. **Search & Query Interface**
|
|
45
|
+
- **Similarity Search**:
|
|
46
|
+
- Text-to-vector search (with embedding model integration)
|
|
47
|
+
- Vector-to-vector search
|
|
48
|
+
- Find similar items to selected entries
|
|
49
|
+
- Adjustable top-k results and similarity thresholds
|
|
50
|
+
- **Metadata Filtering**:
|
|
51
|
+
- SQL-like query builder for metadata
|
|
52
|
+
- Combine vector similarity with metadata filters
|
|
53
|
+
- Advanced filtering: ranges, IN clauses, pattern matching
|
|
54
|
+
- **Hybrid Search**: Combine semantic search with keyword search
|
|
55
|
+
- **Query History**: Save and reuse frequent queries
|
|
56
|
+
|
|
57
|
+
### 4. **Data Management**
|
|
58
|
+
- **Browse Collections/Indexes**: View all available collections with statistics
|
|
59
|
+
- **CRUD Operations**:
|
|
60
|
+
- View individual vectors and their metadata
|
|
61
|
+
- Add new vectors (with auto-embedding options)
|
|
62
|
+
- Update metadata fields
|
|
63
|
+
- Delete vectors (single or batch)
|
|
64
|
+
- **Bulk Import/Export**:
|
|
65
|
+
- Import from CSV, JSON, Parquet
|
|
66
|
+
- Export query results to various formats
|
|
67
|
+
- Backup and restore collections
|
|
68
|
+
- **Schema Inspector**: View collection configuration, vector dimensions, metadata schema
|
|
69
|
+
|
|
70
|
+
### 5. **SQL-Like Experience**
|
|
71
|
+
- **Query Console**: Write queries in a familiar SQL-like syntax (where supported)
|
|
72
|
+
- **Results Grid**:
|
|
73
|
+
- Sortable, filterable table view
|
|
74
|
+
- Pagination for large result sets
|
|
75
|
+
- Column customization
|
|
76
|
+
- **Data Inspector**: Click any row to see full details including raw vector
|
|
77
|
+
- **Query Execution Plans**: Understand how queries are executed
|
|
78
|
+
- **Auto-completion**: Intelligent suggestions for collection names, fields, and operations
|
|
79
|
+
|
|
80
|
+
### 6. **Advanced Features**
|
|
81
|
+
- **Embedding Model Integration**:
|
|
82
|
+
- Use OpenAI, Cohere, HuggingFace models for text-to-vector conversion
|
|
83
|
+
- Local model support (sentence-transformers)
|
|
84
|
+
- Custom model integration
|
|
85
|
+
- **Vector Analysis**:
|
|
86
|
+
- Compute similarity matrices
|
|
87
|
+
- Identify outliers and anomalies
|
|
88
|
+
- Cluster analysis with k-means, DBSCAN
|
|
89
|
+
- **Embedding Inspector**:
|
|
90
|
+
- For similar collections or items, automatically identify which vector dimensions (activations) most contribute to the similarity
|
|
91
|
+
- Map key activations to interpretable concepts (e.g., 'humor', 'sadness', 'anger') using metadata or labels
|
|
92
|
+
- Generate human-readable explanations for why items are similar
|
|
93
|
+
- **Performance Monitoring**:
|
|
94
|
+
- Query latency tracking
|
|
95
|
+
- Index performance metrics
|
|
96
|
+
- Connection health monitoring
|
|
97
|
+
|
|
98
|
+
## Architecture
|
|
99
|
+
|
|
100
|
+
Vector Inspector is built with PySide6 (Qt for Python) for the GUI, providing a native desktop experience. The backend uses Python with support for multiple vector database providers through a unified interface.
|
|
101
|
+
|
|
102
|
+
For detailed architecture information, see [docs/architecture.md](docs/architecture.md).
|
|
103
|
+
|
|
104
|
+
## Use Cases
|
|
105
|
+
|
|
106
|
+
1. **AI/ML Development**: Inspect embeddings generated during model development
|
|
107
|
+
2. **RAG System Debugging**: Verify what documents are being retrieved
|
|
108
|
+
3. **Data Quality Assurance**: Identify poorly embedded or outlier vectors
|
|
109
|
+
4. **Production Monitoring**: Check vector database health and data consistency
|
|
110
|
+
5. **Data Migration**: Transfer data between vector database providers
|
|
111
|
+
6. **Education**: Learn and experiment with vector databases interactively
|
|
112
|
+
|
|
113
|
+
## Feature Access
|
|
114
|
+
|
|
115
|
+
Vector Inspector is available in both free (open source) and Pro versions. The free version includes all core features for ChromaDB and basic Qdrant support, while Pro adds advanced analytics and additional providers.
|
|
116
|
+
|
|
117
|
+
See [FEATURES.md](FEATURES.md) for a complete feature comparison.
|
|
118
|
+
|
|
119
|
+
## Roadmap
|
|
120
|
+
|
|
121
|
+
**Current Status**: ✅ Phase 2 Complete
|
|
122
|
+
|
|
123
|
+
See [ROADMAP.md](ROADMAP.md) for the complete development roadmap and planned features.
|
|
124
|
+
|
|
125
|
+
## Installation
|
|
126
|
+
|
|
127
|
+
### From PyPI (Recommended)
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
pip install vector-inspector
|
|
131
|
+
vector-inspector
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### From Source
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
# Clone the repository
|
|
138
|
+
git clone https://github.com/anthonypdawson/vector-inspector.git
|
|
139
|
+
cd vector-inspector
|
|
140
|
+
|
|
141
|
+
# Install dependencies using PDM
|
|
142
|
+
pdm install
|
|
143
|
+
|
|
144
|
+
# Launch application
|
|
145
|
+
./run.sh # Linux/macOS
|
|
146
|
+
./run.bat # Windows
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Configuration
|
|
150
|
+
|
|
151
|
+
Paths are resolved relative to the project root (where `pyproject.toml` is). For example, entering `./data/chroma_db` will use the absolute path resolved from the project root.
|
|
152
|
+
|
|
153
|
+
The application automatically saves your last connection configuration to `~/.vector-viewer/settings.json`. The next time you launch the application, it will attempt to reconnect using the last saved settings.
|
|
154
|
+
|
|
155
|
+
Example settings structure:
|
|
156
|
+
```json
|
|
157
|
+
{
|
|
158
|
+
"last_connection": {
|
|
159
|
+
"provider": "chromadb",
|
|
160
|
+
"connection_type": "persistent",
|
|
161
|
+
"path": "./data/chroma_db"
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Development Setup
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
# Install PDM if you haven't already
|
|
170
|
+
pip install pdm
|
|
171
|
+
|
|
172
|
+
# Install dependencies with development tools (PDM will create venv automatically)
|
|
173
|
+
pdm install -d
|
|
174
|
+
|
|
175
|
+
# Run tests
|
|
176
|
+
pdm run pytest
|
|
177
|
+
|
|
178
|
+
# Run application in development mode
|
|
179
|
+
./run.sh # Linux/macOS
|
|
180
|
+
./run.bat # Windows
|
|
181
|
+
|
|
182
|
+
# Or use Python module directly from src directory:
|
|
183
|
+
cd src
|
|
184
|
+
pdm run python -m vector_viewer
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Contributing
|
|
188
|
+
|
|
189
|
+
Contributions are welcome! Areas where help is needed:
|
|
190
|
+
- Additional vector database provider integrations
|
|
191
|
+
- UI/UX improvements
|
|
192
|
+
- Performance optimizations
|
|
193
|
+
- Documentation
|
|
194
|
+
- Test coverage
|
|
195
|
+
|
|
196
|
+
Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
197
|
+
|
|
198
|
+
## License
|
|
199
|
+
|
|
200
|
+
MIT License - See [LICENSE](LICENSE) file for details.
|
|
201
|
+
|
|
202
|
+
## Acknowledgments
|
|
203
|
+
|
|
204
|
+
This project draws inspiration from:
|
|
205
|
+
- DBeaver (SQL database viewer)
|
|
206
|
+
- MongoDB Compass (NoSQL database GUI)
|
|
207
|
+
- Pinecone Console
|
|
208
|
+
- Various vector database management tools
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
**Status**: ✅ Phase 2 Complete - Advanced Features Implemented!
|
|
213
|
+
|
|
214
|
+
**What's New in Phase 2:**
|
|
215
|
+
- 🔍 Advanced metadata filtering with customizable filter rules (AND/OR logic)
|
|
216
|
+
- ✏️ Double-click to edit items directly in the data browser
|
|
217
|
+
- 📥 Import data from CSV, JSON, and Parquet files
|
|
218
|
+
- 📤 Export filtered data to CSV, JSON, and Parquet formats
|
|
219
|
+
- 💾 Comprehensive backup and restore system for collections
|
|
220
|
+
- 🔄 Metadata filters integrated with search for powerful queries
|
|
221
|
+
|
|
222
|
+
See [GETTING_STARTED.md](GETTING_STARTED.md) for usage instructions and [IMPLEMENTATION_SUMMARY.md](IMPLEMENTATION_SUMMARY.md) for technical details.
|
|
223
|
+
|
|
224
|
+
**Contact**: Anthony Dawson
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "vector-inspector"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.2"
|
|
4
4
|
description = "A comprehensive desktop application for visualizing, querying, and managing vector database data"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Anthony Dawson", email = "anthonypdawson+github@gmail.com" },
|
|
@@ -25,6 +25,12 @@ readme = "README.md"
|
|
|
25
25
|
[project.license]
|
|
26
26
|
text = "MIT"
|
|
27
27
|
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/anthony-dawson/vector-inspector"
|
|
30
|
+
Source = "https://github.com/anthony-dawson/vector-inspector"
|
|
31
|
+
Issues = "https://github.com/anthony-dawson/vector-inspector/issues"
|
|
32
|
+
Documentation = "https://github.com/anthony-dawson/vector-inspector#readme"
|
|
33
|
+
|
|
28
34
|
[project.scripts]
|
|
29
35
|
vector-inspector = "vector_inspector.main:main"
|
|
30
36
|
|
|
@@ -282,7 +282,7 @@ class MainWindow(QMainWindow):
|
|
|
282
282
|
"<h2>Vector Inspector 0.1.0</h2>"
|
|
283
283
|
"<p>A comprehensive desktop application for visualizing, "
|
|
284
284
|
"querying, and managing vector database data.</p>"
|
|
285
|
-
'<p><a href="https://github.com/anthonypdawson/vector-
|
|
285
|
+
'<p><a href="https://github.com/anthonypdawson/vector-inspector" style="color:#2980b9;">GitHub Project Page</a></p>'
|
|
286
286
|
"<hr />"
|
|
287
287
|
"<p>Built with PySide6 and ChromaDB</p>"
|
|
288
288
|
)
|
|
@@ -1,60 +1,60 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
from vector_inspector.core.connections.chroma_connection import ChromaDBConnection
|
|
3
|
-
from vector_inspector.core.connections.qdrant_connection import QdrantConnection
|
|
4
|
-
import uuid
|
|
5
|
-
|
|
6
|
-
@pytest.mark.parametrize("provider", ["chroma", "qdrant"])
|
|
7
|
-
def test_provider_integration(provider, tmp_path):
|
|
8
|
-
"""Test provider connection using standard add_items signature."""
|
|
9
|
-
collection_name = f"test_collection_{uuid.uuid4().hex[:8]}"
|
|
10
|
-
test_ids = ["id1", "id2"]
|
|
11
|
-
test_vectors = [[0.1, 0.2], [0.3, 0.4]]
|
|
12
|
-
test_docs = ["hello", "world"]
|
|
13
|
-
test_metadata = [{"type": "greeting"}, {"type": "noun"}]
|
|
14
|
-
|
|
15
|
-
if provider == "chroma":
|
|
16
|
-
conn = ChromaDBConnection()
|
|
17
|
-
assert conn.connect()
|
|
18
|
-
assert conn.create_collection(collection_name, vector_size=2)
|
|
19
|
-
# Use standard signature: collection_name, documents, metadatas, ids, embeddings
|
|
20
|
-
success = conn.add_items(
|
|
21
|
-
collection_name,
|
|
22
|
-
documents=test_docs,
|
|
23
|
-
metadatas=test_metadata,
|
|
24
|
-
ids=test_ids,
|
|
25
|
-
embeddings=test_vectors
|
|
26
|
-
)
|
|
27
|
-
assert success
|
|
28
|
-
assert collection_name in conn.list_collections()
|
|
29
|
-
# Verify items inserted
|
|
30
|
-
info = conn.get_collection_info(collection_name)
|
|
31
|
-
assert info["count"] == 2
|
|
32
|
-
res = conn.get_all_items(collection_name, limit=10)
|
|
33
|
-
assert len(res["documents"]) == 2
|
|
34
|
-
assert conn.delete_collection(collection_name)
|
|
35
|
-
assert collection_name not in conn.list_collections()
|
|
36
|
-
|
|
37
|
-
elif provider == "qdrant":
|
|
38
|
-
db_path = str(tmp_path / "qdrant_test")
|
|
39
|
-
conn = QdrantConnection(path=db_path)
|
|
40
|
-
assert conn.connect()
|
|
41
|
-
assert conn.create_collection(collection_name, vector_size=2, distance="Cosine")
|
|
42
|
-
# Use standard signature
|
|
43
|
-
success = conn.add_items(
|
|
44
|
-
collection_name,
|
|
45
|
-
documents=test_docs,
|
|
46
|
-
metadatas=test_metadata,
|
|
47
|
-
ids=test_ids,
|
|
48
|
-
embeddings=test_vectors
|
|
49
|
-
)
|
|
50
|
-
assert success
|
|
51
|
-
assert collection_name in conn.list_collections()
|
|
52
|
-
# Verify items inserted
|
|
53
|
-
info = conn.get_collection_info(collection_name)
|
|
54
|
-
if info["count"] == 0:
|
|
55
|
-
pytest.skip("Qdrant local upsert not supported in this environment")
|
|
56
|
-
assert info["count"] == 2
|
|
57
|
-
res = conn.get_all_items(collection_name, limit=10)
|
|
58
|
-
assert len(res["documents"]) == 2
|
|
59
|
-
assert conn.delete_collection(collection_name)
|
|
60
|
-
assert collection_name not in conn.list_collections()
|
|
1
|
+
import pytest
|
|
2
|
+
from vector_inspector.core.connections.chroma_connection import ChromaDBConnection
|
|
3
|
+
from vector_inspector.core.connections.qdrant_connection import QdrantConnection
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
@pytest.mark.parametrize("provider", ["chroma", "qdrant"])
|
|
7
|
+
def test_provider_integration(provider, tmp_path):
|
|
8
|
+
"""Test provider connection using standard add_items signature."""
|
|
9
|
+
collection_name = f"test_collection_{uuid.uuid4().hex[:8]}"
|
|
10
|
+
test_ids = ["id1", "id2"]
|
|
11
|
+
test_vectors = [[0.1, 0.2], [0.3, 0.4]]
|
|
12
|
+
test_docs = ["hello", "world"]
|
|
13
|
+
test_metadata = [{"type": "greeting"}, {"type": "noun"}]
|
|
14
|
+
|
|
15
|
+
if provider == "chroma":
|
|
16
|
+
conn = ChromaDBConnection()
|
|
17
|
+
assert conn.connect()
|
|
18
|
+
assert conn.create_collection(collection_name, vector_size=2)
|
|
19
|
+
# Use standard signature: collection_name, documents, metadatas, ids, embeddings
|
|
20
|
+
success = conn.add_items(
|
|
21
|
+
collection_name,
|
|
22
|
+
documents=test_docs,
|
|
23
|
+
metadatas=test_metadata,
|
|
24
|
+
ids=test_ids,
|
|
25
|
+
embeddings=test_vectors
|
|
26
|
+
)
|
|
27
|
+
assert success
|
|
28
|
+
assert collection_name in conn.list_collections()
|
|
29
|
+
# Verify items inserted
|
|
30
|
+
info = conn.get_collection_info(collection_name)
|
|
31
|
+
assert info["count"] == 2
|
|
32
|
+
res = conn.get_all_items(collection_name, limit=10)
|
|
33
|
+
assert len(res["documents"]) == 2
|
|
34
|
+
assert conn.delete_collection(collection_name)
|
|
35
|
+
assert collection_name not in conn.list_collections()
|
|
36
|
+
|
|
37
|
+
elif provider == "qdrant":
|
|
38
|
+
db_path = str(tmp_path / "qdrant_test")
|
|
39
|
+
conn = QdrantConnection(path=db_path)
|
|
40
|
+
assert conn.connect()
|
|
41
|
+
assert conn.create_collection(collection_name, vector_size=2, distance="Cosine")
|
|
42
|
+
# Use standard signature
|
|
43
|
+
success = conn.add_items(
|
|
44
|
+
collection_name,
|
|
45
|
+
documents=test_docs,
|
|
46
|
+
metadatas=test_metadata,
|
|
47
|
+
ids=test_ids,
|
|
48
|
+
embeddings=test_vectors
|
|
49
|
+
)
|
|
50
|
+
assert success
|
|
51
|
+
assert collection_name in conn.list_collections()
|
|
52
|
+
# Verify items inserted
|
|
53
|
+
info = conn.get_collection_info(collection_name)
|
|
54
|
+
if info["count"] == 0:
|
|
55
|
+
pytest.skip("Qdrant local upsert not supported in this environment")
|
|
56
|
+
assert info["count"] == 2
|
|
57
|
+
res = conn.get_all_items(collection_name, limit=10)
|
|
58
|
+
assert len(res["documents"]) == 2
|
|
59
|
+
assert conn.delete_collection(collection_name)
|
|
60
|
+
assert collection_name not in conn.list_collections()
|