vector-inspector 0.2.7__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/PKG-INFO +1 -1
  2. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/pyproject.toml +1 -1
  3. vector_inspector-0.3.1/src/vector_inspector/config/__init__.py +4 -0
  4. vector_inspector-0.3.1/src/vector_inspector/config/known_embedding_models.json +432 -0
  5. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/core/connections/__init__.py +2 -1
  6. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/core/connections/base_connection.py +42 -1
  7. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/core/connections/chroma_connection.py +47 -11
  8. vector_inspector-0.3.1/src/vector_inspector/core/connections/pinecone_connection.py +768 -0
  9. vector_inspector-0.3.1/src/vector_inspector/core/embedding_providers/__init__.py +14 -0
  10. vector_inspector-0.3.1/src/vector_inspector/core/embedding_providers/base_provider.py +128 -0
  11. vector_inspector-0.3.1/src/vector_inspector/core/embedding_providers/clip_provider.py +260 -0
  12. vector_inspector-0.3.1/src/vector_inspector/core/embedding_providers/provider_factory.py +176 -0
  13. vector_inspector-0.3.1/src/vector_inspector/core/embedding_providers/sentence_transformer_provider.py +203 -0
  14. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/core/embedding_utils.py +69 -42
  15. vector_inspector-0.3.1/src/vector_inspector/core/model_registry.py +205 -0
  16. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/services/backup_restore_service.py +16 -0
  17. vector_inspector-0.3.1/src/vector_inspector/services/settings_service.py +195 -0
  18. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/components/connection_manager_panel.py +7 -0
  19. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/components/profile_manager_panel.py +61 -14
  20. vector_inspector-0.3.1/src/vector_inspector/ui/dialogs/__init__.py +6 -0
  21. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/dialogs/cross_db_migration.py +20 -1
  22. vector_inspector-0.3.1/src/vector_inspector/ui/dialogs/embedding_config_dialog.py +315 -0
  23. vector_inspector-0.3.1/src/vector_inspector/ui/dialogs/provider_type_dialog.py +189 -0
  24. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/main_window.py +33 -2
  25. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/views/connection_view.py +55 -10
  26. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/views/info_panel.py +83 -36
  27. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/views/search_view.py +1 -1
  28. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/views/visualization_view.py +19 -5
  29. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/tests/test_connections.py +12 -1
  30. vector_inspector-0.3.1/tests/test_pinecone_connection.py +405 -0
  31. vector_inspector-0.2.7/src/vector_inspector/services/settings_service.py +0 -79
  32. vector_inspector-0.2.7/src/vector_inspector/ui/dialogs/__init__.py +0 -5
  33. vector_inspector-0.2.7/src/vector_inspector/ui/dialogs/embedding_config_dialog.py +0 -176
  34. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/README.md +0 -0
  35. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/__init__.py +0 -0
  36. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/__main__.py +0 -0
  37. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/core/__init__.py +0 -0
  38. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/core/cache_manager.py +0 -0
  39. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/core/connection_manager.py +0 -0
  40. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/core/connections/qdrant_connection.py +0 -0
  41. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/core/connections/template_connection.py +0 -0
  42. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/main.py +0 -0
  43. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/services/__init__.py +0 -0
  44. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/services/credential_service.py +0 -0
  45. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/services/filter_service.py +0 -0
  46. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/services/import_export_service.py +0 -0
  47. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/services/profile_service.py +0 -0
  48. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/services/visualization_service.py +0 -0
  49. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/__init__.py +0 -0
  50. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/components/__init__.py +0 -0
  51. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/components/backup_restore_dialog.py +0 -0
  52. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/components/filter_builder.py +0 -0
  53. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/components/item_dialog.py +0 -0
  54. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/components/loading_dialog.py +0 -0
  55. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/views/__init__.py +0 -0
  56. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/views/collection_browser.py +0 -0
  57. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/ui/views/metadata_view.py +0 -0
  58. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/utils/__init__.py +0 -0
  59. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/src/vector_inspector/utils/lazy_imports.py +0 -0
  60. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/tests/test_filter_service.py +0 -0
  61. /vector_inspector-0.2.7/tests/vector_inspector.py → /vector_inspector-0.3.1/tests/test_runner.py +0 -0
  62. {vector_inspector-0.2.7 → vector_inspector-0.3.1}/tests/test_settings_service.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vector-inspector
3
- Version: 0.2.7
3
+ Version: 0.3.1
4
4
  Summary: A comprehensive desktop application for visualizing, querying, and managing vector database data
5
5
  Author-Email: Anthony Dawson <anthonypdawson+github@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "vector-inspector"
3
- version = "0.2.7"
3
+ version = "0.3.1"
4
4
  description = "A comprehensive desktop application for visualizing, querying, and managing vector database data"
5
5
  authors = [
6
6
  { name = "Anthony Dawson", email = "anthonypdawson+github@gmail.com" },
@@ -0,0 +1,4 @@
1
+ """Data package for Vector Inspector.
2
+
3
+ Contains static data files like the model registry.
4
+ """
@@ -0,0 +1,432 @@
1
+ {
2
+ "models": [
3
+ {
4
+ "name": "all-MiniLM-L6-v2",
5
+ "type": "sentence-transformer",
6
+ "dimension": 384,
7
+ "modality": "text",
8
+ "normalization": "l2",
9
+ "source": "huggingface",
10
+ "description": "Fast, small-footprint text embeddings (good default for text search)"
11
+ },
12
+ {
13
+ "name": "openai/clip-vit-base-patch32",
14
+ "type": "clip",
15
+ "dimension": 512,
16
+ "modality": "multimodal",
17
+ "normalization": "l2",
18
+ "source": "huggingface",
19
+ "description": "Standard CLIP ViT-B/32 model; supports matching text ↔ images"
20
+ },
21
+ {
22
+ "name": "paraphrase-albert-small-v2",
23
+ "type": "sentence-transformer",
24
+ "dimension": 768,
25
+ "modality": "text",
26
+ "normalization": "l2",
27
+ "source": "huggingface",
28
+ "description": "Smaller paraphrase-specialized model"
29
+ },
30
+ {
31
+ "name": "all-mpnet-base-v2",
32
+ "type": "sentence-transformer",
33
+ "dimension": 768,
34
+ "modality": "text",
35
+ "normalization": "l2",
36
+ "source": "huggingface",
37
+ "description": "High-quality text embeddings; recommended for semantic tasks"
38
+ },
39
+ {
40
+ "name": "all-roberta-large-v1",
41
+ "type": "sentence-transformer",
42
+ "dimension": 1024,
43
+ "modality": "text",
44
+ "normalization": "l2",
45
+ "source": "huggingface",
46
+ "description": "Large model — high quality, larger memory and compute"
47
+ },
48
+ {
49
+ "name": "gtr-t5-large",
50
+ "type": "sentence-transformer",
51
+ "dimension": 1536,
52
+ "modality": "text",
53
+ "normalization": "l2",
54
+ "source": "huggingface",
55
+ "description": "Very large embeddings useful for specialized high-recall tasks"
56
+ },
57
+ {
58
+ "name": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
59
+ "type": "sentence-transformer",
60
+ "dimension": 384,
61
+ "modality": "text",
62
+ "normalization": "l2",
63
+ "source": "huggingface",
64
+ "description": "Optimized for semantic search and question-answering tasks"
65
+ },
66
+ {
67
+ "name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
68
+ "type": "sentence-transformer",
69
+ "dimension": 384,
70
+ "modality": "text",
71
+ "normalization": "l2",
72
+ "source": "huggingface",
73
+ "description": "Multilingual support for 50+ languages"
74
+ },
75
+ {
76
+ "name": "sentence-transformers/msmarco-distilbert-base-v4",
77
+ "type": "sentence-transformer",
78
+ "dimension": 768,
79
+ "modality": "text",
80
+ "normalization": "l2",
81
+ "source": "huggingface",
82
+ "description": "Trained on MS MARCO dataset, good for passage retrieval"
83
+ },
84
+ {
85
+ "name": "sentence-transformers/all-distilroberta-v1",
86
+ "type": "sentence-transformer",
87
+ "dimension": 768,
88
+ "modality": "text",
89
+ "normalization": "l2",
90
+ "source": "huggingface",
91
+ "description": "Distilled RoBERTa model, balance of speed and quality"
92
+ },
93
+ {
94
+ "name": "sentence-transformers/paraphrase-mpnet-base-v2",
95
+ "type": "sentence-transformer",
96
+ "dimension": 768,
97
+ "modality": "text",
98
+ "normalization": "l2",
99
+ "source": "huggingface",
100
+ "description": "High-quality paraphrase detection and semantic similarity"
101
+ },
102
+ {
103
+ "name": "BAAI/bge-small-en-v1.5",
104
+ "type": "sentence-transformer",
105
+ "dimension": 384,
106
+ "modality": "text",
107
+ "normalization": "l2",
108
+ "source": "huggingface",
109
+ "description": "Beijing Academy of AI model, strong performance for size"
110
+ },
111
+ {
112
+ "name": "BAAI/bge-base-en-v1.5",
113
+ "type": "sentence-transformer",
114
+ "dimension": 768,
115
+ "modality": "text",
116
+ "normalization": "l2",
117
+ "source": "huggingface",
118
+ "description": "High-quality English embeddings, MTEB benchmark leader"
119
+ },
120
+ {
121
+ "name": "BAAI/bge-large-en-v1.5",
122
+ "type": "sentence-transformer",
123
+ "dimension": 1024,
124
+ "modality": "text",
125
+ "normalization": "l2",
126
+ "source": "huggingface",
127
+ "description": "Large model with excellent retrieval performance"
128
+ },
129
+ {
130
+ "name": "thenlper/gte-small",
131
+ "type": "sentence-transformer",
132
+ "dimension": 384,
133
+ "modality": "text",
134
+ "normalization": "l2",
135
+ "source": "huggingface",
136
+ "description": "General Text Embeddings (GTE) small variant"
137
+ },
138
+ {
139
+ "name": "thenlper/gte-base",
140
+ "type": "sentence-transformer",
141
+ "dimension": 768,
142
+ "modality": "text",
143
+ "normalization": "l2",
144
+ "source": "huggingface",
145
+ "description": "General Text Embeddings (GTE) base model"
146
+ },
147
+ {
148
+ "name": "thenlper/gte-large",
149
+ "type": "sentence-transformer",
150
+ "dimension": 1024,
151
+ "modality": "text",
152
+ "normalization": "l2",
153
+ "source": "huggingface",
154
+ "description": "General Text Embeddings (GTE) large variant"
155
+ },
156
+ {
157
+ "name": "intfloat/e5-small-v2",
158
+ "type": "sentence-transformer",
159
+ "dimension": 384,
160
+ "modality": "text",
161
+ "normalization": "l2",
162
+ "source": "huggingface",
163
+ "description": "E5 family small model, prefix with 'query: ' or 'passage: '"
164
+ },
165
+ {
166
+ "name": "intfloat/e5-base-v2",
167
+ "type": "sentence-transformer",
168
+ "dimension": 768,
169
+ "modality": "text",
170
+ "normalization": "l2",
171
+ "source": "huggingface",
172
+ "description": "E5 family base model, strong asymmetric retrieval"
173
+ },
174
+ {
175
+ "name": "intfloat/e5-large-v2",
176
+ "type": "sentence-transformer",
177
+ "dimension": 1024,
178
+ "modality": "text",
179
+ "normalization": "l2",
180
+ "source": "huggingface",
181
+ "description": "E5 family large model, top MTEB performance"
182
+ },
183
+ {
184
+ "name": "intfloat/multilingual-e5-small",
185
+ "type": "sentence-transformer",
186
+ "dimension": 384,
187
+ "modality": "text",
188
+ "normalization": "l2",
189
+ "source": "huggingface",
190
+ "description": "Multilingual E5 model supporting 100+ languages"
191
+ },
192
+ {
193
+ "name": "intfloat/multilingual-e5-base",
194
+ "type": "sentence-transformer",
195
+ "dimension": 768,
196
+ "modality": "text",
197
+ "normalization": "l2",
198
+ "source": "huggingface",
199
+ "description": "Multilingual E5 base model, excellent cross-lingual retrieval"
200
+ },
201
+ {
202
+ "name": "intfloat/multilingual-e5-large",
203
+ "type": "sentence-transformer",
204
+ "dimension": 1024,
205
+ "modality": "text",
206
+ "normalization": "l2",
207
+ "source": "huggingface",
208
+ "description": "Multilingual E5 large model, best-in-class multilingual embeddings"
209
+ },
210
+ {
211
+ "name": "openai/clip-vit-large-patch14",
212
+ "type": "clip",
213
+ "dimension": 768,
214
+ "modality": "multimodal",
215
+ "normalization": "l2",
216
+ "source": "huggingface",
217
+ "description": "Larger CLIP ViT-L/14 model, better quality than base"
218
+ },
219
+ {
220
+ "name": "openai/clip-vit-large-patch14-336",
221
+ "type": "clip",
222
+ "dimension": 768,
223
+ "modality": "multimodal",
224
+ "normalization": "l2",
225
+ "source": "huggingface",
226
+ "description": "Higher resolution (336x336) variant of ViT-L/14"
227
+ },
228
+ {
229
+ "name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
230
+ "type": "clip",
231
+ "dimension": 512,
232
+ "modality": "multimodal",
233
+ "normalization": "l2",
234
+ "source": "huggingface",
235
+ "description": "LAION's CLIP trained on 2B image-text pairs"
236
+ },
237
+ {
238
+ "name": "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
239
+ "type": "clip",
240
+ "dimension": 1024,
241
+ "modality": "multimodal",
242
+ "normalization": "l2",
243
+ "source": "huggingface",
244
+ "description": "LAION's huge CLIP model, excellent quality"
245
+ },
246
+ {
247
+ "name": "text-embedding-ada-002",
248
+ "type": "openai",
249
+ "dimension": 1536,
250
+ "modality": "text",
251
+ "normalization": "l2",
252
+ "source": "openai-api",
253
+ "description": "OpenAI's production embedding model (legacy). Requires API key."
254
+ },
255
+ {
256
+ "name": "text-embedding-3-small",
257
+ "type": "openai",
258
+ "dimension": 1536,
259
+ "modality": "text",
260
+ "normalization": "l2",
261
+ "source": "openai-api",
262
+ "description": "OpenAI's newer small model, better than ada-002. Requires API key."
263
+ },
264
+ {
265
+ "name": "text-embedding-3-large",
266
+ "type": "openai",
267
+ "dimension": 3072,
268
+ "modality": "text",
269
+ "normalization": "l2",
270
+ "source": "openai-api",
271
+ "description": "OpenAI's large embedding model, highest quality. Requires API key."
272
+ },
273
+ {
274
+ "name": "embed-english-v3.0",
275
+ "type": "cohere",
276
+ "dimension": 1024,
277
+ "modality": "text",
278
+ "normalization": "none",
279
+ "source": "cohere-api",
280
+ "description": "Cohere's English embedding model. Requires API key."
281
+ },
282
+ {
283
+ "name": "embed-english-light-v3.0",
284
+ "type": "cohere",
285
+ "dimension": 384,
286
+ "modality": "text",
287
+ "normalization": "none",
288
+ "source": "cohere-api",
289
+ "description": "Cohere's lightweight English model. Requires API key."
290
+ },
291
+ {
292
+ "name": "embed-multilingual-v3.0",
293
+ "type": "cohere",
294
+ "dimension": 1024,
295
+ "modality": "text",
296
+ "normalization": "none",
297
+ "source": "cohere-api",
298
+ "description": "Cohere's multilingual model supporting 100+ languages. Requires API key."
299
+ },
300
+ {
301
+ "name": "embed-multilingual-light-v3.0",
302
+ "type": "cohere",
303
+ "dimension": 384,
304
+ "modality": "text",
305
+ "normalization": "none",
306
+ "source": "cohere-api",
307
+ "description": "Cohere's lightweight multilingual model. Requires API key."
308
+ },
309
+ {
310
+ "name": "textembedding-gecko@003",
311
+ "type": "vertex-ai",
312
+ "dimension": 768,
313
+ "modality": "text",
314
+ "normalization": "l2",
315
+ "source": "google-cloud",
316
+ "description": "Google's Gecko model for text embeddings. Requires Google Cloud credentials."
317
+ },
318
+ {
319
+ "name": "text-embedding-004",
320
+ "type": "vertex-ai",
321
+ "dimension": 768,
322
+ "modality": "text",
323
+ "normalization": "l2",
324
+ "source": "google-cloud",
325
+ "description": "Google's latest text embedding model. Requires Google Cloud credentials."
326
+ },
327
+ {
328
+ "name": "text-multilingual-embedding-002",
329
+ "type": "vertex-ai",
330
+ "dimension": 768,
331
+ "modality": "text",
332
+ "normalization": "l2",
333
+ "source": "google-cloud",
334
+ "description": "Google's multilingual embedding model. Requires Google Cloud credentials."
335
+ },
336
+ {
337
+ "name": "multimodalembedding@001",
338
+ "type": "vertex-ai",
339
+ "dimension": 1408,
340
+ "modality": "multimodal",
341
+ "normalization": "l2",
342
+ "source": "google-cloud",
343
+ "description": "Google's multimodal embedding model. Requires Google Cloud credentials."
344
+ },
345
+ {
346
+ "name": "voyage-large-2",
347
+ "type": "voyage",
348
+ "dimension": 1536,
349
+ "modality": "text",
350
+ "normalization": "l2",
351
+ "source": "voyage-api",
352
+ "description": "Voyage AI's large model. Requires API key."
353
+ },
354
+ {
355
+ "name": "voyage-code-2",
356
+ "type": "voyage",
357
+ "dimension": 1536,
358
+ "modality": "text",
359
+ "normalization": "l2",
360
+ "source": "voyage-api",
361
+ "description": "Voyage AI's code-optimized model. Requires API key."
362
+ },
363
+ {
364
+ "name": "voyage-2",
365
+ "type": "voyage",
366
+ "dimension": 1024,
367
+ "modality": "text",
368
+ "normalization": "l2",
369
+ "source": "voyage-api",
370
+ "description": "Voyage AI's general-purpose model. Requires API key."
371
+ },
372
+ {
373
+ "name": "jinaai/jina-embeddings-v2-base-en",
374
+ "type": "sentence-transformer",
375
+ "dimension": 768,
376
+ "modality": "text",
377
+ "normalization": "l2",
378
+ "source": "huggingface",
379
+ "description": "Jina AI's 8k context length model, good for long documents"
380
+ },
381
+ {
382
+ "name": "jinaai/jina-embeddings-v2-small-en",
383
+ "type": "sentence-transformer",
384
+ "dimension": 512,
385
+ "modality": "text",
386
+ "normalization": "l2",
387
+ "source": "huggingface",
388
+ "description": "Jina AI's small model with 8k context length"
389
+ },
390
+ {
391
+ "name": "nomic-ai/nomic-embed-text-v1",
392
+ "type": "sentence-transformer",
393
+ "dimension": 768,
394
+ "modality": "text",
395
+ "normalization": "l2",
396
+ "source": "huggingface",
397
+ "description": "Nomic's open-source text embedding model with 8k context"
398
+ },
399
+ {
400
+ "name": "nomic-ai/nomic-embed-text-v1.5",
401
+ "type": "sentence-transformer",
402
+ "dimension": 768,
403
+ "modality": "text",
404
+ "normalization": "l2",
405
+ "source": "huggingface",
406
+ "description": "Nomic's improved model with better performance"
407
+ },
408
+ {
409
+ "name": "Alibaba-NLP/gte-Qwen2-7B-instruct",
410
+ "type": "sentence-transformer",
411
+ "dimension": 3584,
412
+ "modality": "text",
413
+ "normalization": "l2",
414
+ "source": "huggingface",
415
+ "description": "Very large instruction-following embedding model, SOTA on many benchmarks"
416
+ },
417
+ {
418
+ "name": "nvidia/NV-Embed-v1",
419
+ "type": "sentence-transformer",
420
+ "dimension": 4096,
421
+ "modality": "text",
422
+ "normalization": "l2",
423
+ "source": "huggingface",
424
+ "description": "NVIDIA's embedding model, excellent for retrieval tasks"
425
+ }
426
+ ],
427
+ "metadata": {
428
+ "version": "1.0.0",
429
+ "last_updated": "2026-01-24",
430
+ "description": "Known embedding models registry for Vector Inspector"
431
+ }
432
+ }
@@ -3,5 +3,6 @@
3
3
  from .base_connection import VectorDBConnection
4
4
  from .chroma_connection import ChromaDBConnection
5
5
  from .qdrant_connection import QdrantConnection
6
+ from .pinecone_connection import PineconeConnection
6
7
 
7
- __all__ = ["VectorDBConnection", "ChromaDBConnection", "QdrantConnection"]
8
+ __all__ = ["VectorDBConnection", "ChromaDBConnection", "QdrantConnection", "PineconeConnection"]
@@ -229,5 +229,46 @@ class VectorDBConnection(ABC):
229
229
  {"name": "in", "server_side": True},
230
230
  {"name": "not in", "server_side": True},
231
231
  {"name": "contains", "server_side": False},
232
- {"name": "not contains", "server_side": False},
233
232
  ]
233
+
234
+ def get_embedding_model(self, collection_name: str, connection_id: Optional[str] = None) -> Optional[str]:
235
+ """
236
+ Get the embedding model used for a collection.
237
+
238
+ Retrieves the model name from:
239
+ 1. Collection-level metadata (if supported)
240
+ 2. Vector metadata (_embedding_model field)
241
+ 3. User settings (for collections we can't modify)
242
+
243
+ Args:
244
+ collection_name: Name of collection
245
+ connection_id: Optional connection ID for settings lookup
246
+
247
+ Returns:
248
+ Model name string (e.g., "sentence-transformers/all-MiniLM-L6-v2") or None
249
+ """
250
+ try:
251
+ # First try to get from collection-level metadata
252
+ info = self.get_collection_info(collection_name)
253
+ if info and info.get("embedding_model"):
254
+ return info["embedding_model"]
255
+
256
+ # Fall back to checking a sample vector's metadata
257
+ data = self.get_all_items(collection_name, limit=1, offset=0)
258
+ if data and data.get("metadatas") and len(data["metadatas"]) > 0:
259
+ metadata = data["metadatas"][0]
260
+ if "_embedding_model" in metadata:
261
+ return metadata["_embedding_model"]
262
+
263
+ # Finally, check user settings (for collections we can't modify)
264
+ if connection_id:
265
+ from ...services.settings_service import SettingsService
266
+ settings = SettingsService()
267
+ model_info = settings.get_embedding_model(connection_id, collection_name)
268
+ if model_info:
269
+ return model_info["model"]
270
+
271
+ return None
272
+ except Exception as e:
273
+ print(f"Failed to get embedding model: {e}")
274
+ return None
@@ -212,27 +212,37 @@ class ChromaDBConnection(VectorDBConnection):
212
212
  # ChromaDB uses cosine distance by default (or can be configured)
213
213
  # Try to get metadata from collection if available
214
214
  distance_metric = "Cosine (default)"
215
+ embedding_model = None
215
216
  try:
216
217
  # ChromaDB collections may have metadata about distance function
217
218
  col_metadata = collection.metadata
218
- if col_metadata and "hnsw:space" in col_metadata:
219
- space = col_metadata["hnsw:space"]
220
- if space == "l2":
221
- distance_metric = "Euclidean (L2)"
222
- elif space == "ip":
223
- distance_metric = "Inner Product"
224
- elif space == "cosine":
225
- distance_metric = "Cosine"
219
+ if col_metadata:
220
+ if "hnsw:space" in col_metadata:
221
+ space = col_metadata["hnsw:space"]
222
+ if space == "l2":
223
+ distance_metric = "Euclidean (L2)"
224
+ elif space == "ip":
225
+ distance_metric = "Inner Product"
226
+ elif space == "cosine":
227
+ distance_metric = "Cosine"
228
+ # Get embedding model if stored
229
+ if "embedding_model" in col_metadata:
230
+ embedding_model = col_metadata["embedding_model"]
226
231
  except:
227
232
  pass # Use default if unable to determine
228
233
 
229
- return {
234
+ result = {
230
235
  "name": name,
231
236
  "count": count,
232
237
  "metadata_fields": metadata_fields,
233
238
  "vector_dimension": vector_dimension,
234
239
  "distance_metric": distance_metric,
235
240
  }
241
+
242
+ if embedding_model:
243
+ result["embedding_model"] = embedding_model
244
+
245
+ return result
236
246
  except Exception as e:
237
247
  print(f"Failed to get collection info: {e}")
238
248
  return None
@@ -453,8 +463,34 @@ class ChromaDBConnection(VectorDBConnection):
453
463
 
454
464
  # Implement base connection uniform APIs
455
465
  def create_collection(self, name: str, vector_size: int, distance: str = "Cosine") -> bool:
456
- """Create a collection. Chroma doesn't require vector size at creation."""
457
- return self.get_collection(name) is not None
466
+ """Create a collection. If it doesn't exist, attempt to create it using Chroma client APIs."""
467
+ if not self._client:
468
+ return False
469
+
470
+ try:
471
+ # Prefer get_or_create_collection if available
472
+ if hasattr(self._client, "get_or_create_collection"):
473
+ col = self._client.get_or_create_collection(name=name)
474
+ self._current_collection = col
475
+ return True
476
+
477
+ # Fallback to create_collection/create and then fetch
478
+ if hasattr(self._client, "create_collection"):
479
+ try:
480
+ self._client.create_collection(name=name)
481
+ except Exception:
482
+ # Some clients may raise if already exists; ignore
483
+ pass
484
+ col = self._client.get_collection(name=name)
485
+ self._current_collection = col
486
+ return col is not None
487
+
488
+ # As a last resort, check if collection exists
489
+ col = self.get_collection(name)
490
+ return col is not None
491
+ except Exception as e:
492
+ print(f"Failed to create collection: {e}")
493
+ return False
458
494
 
459
495
  def get_items(self, name: str, ids: List[str]) -> Dict[str, Any]:
460
496
  """Retrieve items by IDs."""