vector-inspector 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,18 +4,18 @@ from typing import Optional, Dict, Type
4
4
  from .base_provider import EmbeddingProvider
5
5
  from .sentence_transformer_provider import SentenceTransformerProvider
6
6
  from .clip_provider import CLIPProvider
7
- from ..model_registry import get_model_registry
7
+ from vector_inspector.core.model_registry import get_model_registry
8
8
 
9
9
 
10
10
  class ProviderFactory:
11
11
  """Factory for creating appropriate embedding providers based on model type."""
12
-
12
+
13
13
  # Registry of provider classes by type
14
14
  _PROVIDER_REGISTRY: Dict[str, Type[EmbeddingProvider]] = {
15
15
  "sentence-transformer": SentenceTransformerProvider,
16
16
  "clip": CLIPProvider,
17
17
  }
18
-
18
+
19
19
  # Model name patterns to auto-detect provider type
20
20
  _MODEL_PATTERNS = {
21
21
  "clip": ["clip", "CLIP"],
@@ -33,40 +33,37 @@ class ProviderFactory:
33
33
  "e5-",
34
34
  "jina-",
35
35
  "nomic-",
36
- ]
36
+ ],
37
37
  }
38
-
38
+
39
39
  @classmethod
40
40
  def create(
41
- cls,
42
- model_name: str,
43
- model_type: Optional[str] = None,
44
- **kwargs
41
+ cls, model_name: str, model_type: Optional[str] = None, **kwargs
45
42
  ) -> EmbeddingProvider:
46
43
  """Create an embedding provider for the given model.
47
-
44
+
48
45
  Args:
49
46
  model_name: Model identifier (HF ID, path, or API name)
50
47
  model_type: Explicit provider type (sentence-transformer, clip, openai, etc.)
51
48
  If None, will attempt auto-detection based on model name
52
49
  **kwargs: Additional arguments passed to provider constructor
53
-
50
+
54
51
  Returns:
55
52
  Appropriate EmbeddingProvider instance
56
-
53
+
57
54
  Raises:
58
55
  ValueError: If model type is unknown or cannot be auto-detected
59
56
  """
60
57
  # Auto-detect provider type if not specified
61
58
  if model_type is None:
62
59
  model_type = cls._detect_provider_type(model_name)
63
-
60
+
64
61
  # Normalize model type
65
62
  model_type = model_type.lower()
66
-
63
+
67
64
  # Get provider class from registry
68
65
  provider_class = cls._PROVIDER_REGISTRY.get(model_type)
69
-
66
+
70
67
  if provider_class is None:
71
68
  # Check if it's a cloud provider (not yet implemented)
72
69
  if model_type in ["openai", "cohere", "vertex-ai", "voyage"]:
@@ -79,22 +76,22 @@ class ProviderFactory:
79
76
  f"Unknown provider type: {model_type}. "
80
77
  f"Supported types: {', '.join(cls._PROVIDER_REGISTRY.keys())}"
81
78
  )
82
-
79
+
83
80
  # Create and return provider instance
84
81
  return provider_class(model_name, **kwargs)
85
-
82
+
86
83
  @classmethod
87
84
  def _detect_provider_type(cls, model_name: str) -> str:
88
85
  """Auto-detect provider type based on model name patterns.
89
-
86
+
90
87
  First checks the known model registry, then falls back to pattern matching.
91
-
88
+
92
89
  Args:
93
90
  model_name: Model identifier
94
-
91
+
95
92
  Returns:
96
93
  Detected provider type
97
-
94
+
98
95
  Raises:
99
96
  ValueError: If provider type cannot be detected
100
97
  """
@@ -103,54 +100,54 @@ class ProviderFactory:
103
100
  model_info = registry.get_model_by_name(model_name)
104
101
  if model_info:
105
102
  return model_info.type
106
-
103
+
107
104
  # Fall back to pattern matching
108
105
  model_name_lower = model_name.lower()
109
-
106
+
110
107
  # Check each pattern category
111
108
  for provider_type, patterns in cls._MODEL_PATTERNS.items():
112
109
  for pattern in patterns:
113
110
  if pattern.lower() in model_name_lower:
114
111
  return provider_type
115
-
112
+
116
113
  # Default to sentence-transformer for HuggingFace models
117
114
  if "/" in model_name and not model_name.startswith("http"):
118
115
  return "sentence-transformer"
119
-
116
+
120
117
  raise ValueError(
121
118
  f"Cannot auto-detect provider type for model: {model_name}. "
122
119
  "Please specify model_type explicitly."
123
120
  )
124
-
121
+
125
122
  @classmethod
126
123
  def register_provider(cls, model_type: str, provider_class: Type[EmbeddingProvider]):
127
124
  """Register a new provider type.
128
-
125
+
129
126
  Args:
130
127
  model_type: Provider type identifier
131
128
  provider_class: Provider class (must inherit from EmbeddingProvider)
132
129
  """
133
130
  if not issubclass(provider_class, EmbeddingProvider):
134
131
  raise TypeError(f"{provider_class} must inherit from EmbeddingProvider")
135
-
132
+
136
133
  cls._PROVIDER_REGISTRY[model_type.lower()] = provider_class
137
-
134
+
138
135
  @classmethod
139
136
  def list_supported_types(cls) -> list:
140
137
  """Get list of supported provider types.
141
-
138
+
142
139
  Returns:
143
140
  List of registered provider type names
144
141
  """
145
142
  return list(cls._PROVIDER_REGISTRY.keys())
146
-
143
+
147
144
  @classmethod
148
145
  def supports_type(cls, model_type: str) -> bool:
149
146
  """Check if a provider type is supported.
150
-
147
+
151
148
  Args:
152
149
  model_type: Provider type to check
153
-
150
+
154
151
  Returns:
155
152
  True if supported, False otherwise
156
153
  """
@@ -159,17 +156,15 @@ class ProviderFactory:
159
156
 
160
157
  # Convenience function for creating providers
161
158
  def create_provider(
162
- model_name: str,
163
- model_type: Optional[str] = None,
164
- **kwargs
159
+ model_name: str, model_type: Optional[str] = None, **kwargs
165
160
  ) -> EmbeddingProvider:
166
161
  """Create an embedding provider (convenience wrapper around ProviderFactory).
167
-
162
+
168
163
  Args:
169
164
  model_name: Model identifier
170
165
  model_type: Optional explicit provider type
171
166
  **kwargs: Additional arguments for provider
172
-
167
+
173
168
  Returns:
174
169
  EmbeddingProvider instance
175
170
  """
@@ -4,7 +4,7 @@ from typing import Optional, Union, Tuple
4
4
  from sentence_transformers import SentenceTransformer
5
5
  from vector_inspector.core.logging import log_info
6
6
 
7
- from .model_registry import get_model_registry
7
+ from vector_inspector.core.model_registry import get_model_registry
8
8
 
9
9
 
10
10
  # Default model to use when dimension is unknown or not mapped
@@ -87,7 +87,7 @@ def get_available_models_for_dimension(dimension: int) -> list:
87
87
 
88
88
  # Add custom models from settings
89
89
  try:
90
- from ..services.settings_service import SettingsService
90
+ from vector_inspector.services.settings_service import SettingsService
91
91
 
92
92
  settings = SettingsService()
93
93
  custom_models = settings.get_custom_embedding_models(dimension)
@@ -6,29 +6,26 @@ from pathlib import Path
6
6
  from datetime import datetime
7
7
  import shutil
8
8
 
9
- from ..core.logging import log_info, log_error, log_debug
9
+ from vector_inspector.core.logging import log_info, log_error, log_debug
10
10
  from .backup_helpers import write_backup_zip, read_backup_zip, normalize_embeddings
11
11
 
12
12
 
13
13
  class BackupRestoreService:
14
14
  """Handles backup and restore operations for vector database collections."""
15
-
15
+
16
16
  @staticmethod
17
17
  def backup_collection(
18
- connection,
19
- collection_name: str,
20
- backup_dir: str,
21
- include_embeddings: bool = True
18
+ connection, collection_name: str, backup_dir: str, include_embeddings: bool = True
22
19
  ) -> Optional[str]:
23
20
  """
24
21
  Backup a collection to a directory.
25
-
22
+
26
23
  Args:
27
24
  connection: Vector database connection
28
25
  collection_name: Name of collection to backup
29
26
  backup_dir: Directory to store backups
30
27
  include_embeddings: Whether to include embedding vectors
31
-
28
+
32
29
  Returns:
33
30
  Path to backup file or None if failed
34
31
  """
@@ -69,23 +66,20 @@ class BackupRestoreService:
69
66
  except Exception as e:
70
67
  log_error("Backup failed: %s", e)
71
68
  return None
72
-
69
+
73
70
  @staticmethod
74
71
  def restore_collection(
75
- connection,
76
- backup_file: str,
77
- collection_name: Optional[str] = None,
78
- overwrite: bool = False
72
+ connection, backup_file: str, collection_name: Optional[str] = None, overwrite: bool = False
79
73
  ) -> bool:
80
74
  """
81
75
  Restore a collection from a backup file.
82
-
76
+
83
77
  Args:
84
78
  connection: Vector database connection
85
79
  backup_file: Path to backup zip file
86
80
  collection_name: Optional new name for restored collection
87
81
  overwrite: Whether to overwrite existing collection
88
-
82
+
89
83
  Returns:
90
84
  True if successful, False otherwise
91
85
  """
@@ -97,7 +91,10 @@ class BackupRestoreService:
97
91
  existing_collections = connection.list_collections()
98
92
  if restore_collection_name in existing_collections:
99
93
  if not overwrite:
100
- log_info("Collection %s already exists. Use overwrite=True to replace it.", restore_collection_name)
94
+ log_info(
95
+ "Collection %s already exists. Use overwrite=True to replace it.",
96
+ restore_collection_name,
97
+ )
101
98
  return False
102
99
  else:
103
100
  connection.delete_collection(restore_collection_name)
@@ -129,7 +126,10 @@ class BackupRestoreService:
129
126
  log_error("Failed to restore collection %s", restore_collection_name)
130
127
  try:
131
128
  if restore_collection_name in connection.list_collections():
132
- log_info("Cleaning up failed restore: deleting collection '%s'", restore_collection_name)
129
+ log_info(
130
+ "Cleaning up failed restore: deleting collection '%s'",
131
+ restore_collection_name,
132
+ )
133
133
  connection.delete_collection(restore_collection_name)
134
134
  except Exception as cleanup_error:
135
135
  log_error("Warning: Failed to clean up collection: %s", cleanup_error)
@@ -138,21 +138,27 @@ class BackupRestoreService:
138
138
  except Exception as e:
139
139
  log_error("Restore failed: %s", e)
140
140
  try:
141
- if restore_collection_name and restore_collection_name in connection.list_collections():
142
- log_info("Cleaning up failed restore: deleting collection '%s'", restore_collection_name)
141
+ if (
142
+ restore_collection_name
143
+ and restore_collection_name in connection.list_collections()
144
+ ):
145
+ log_info(
146
+ "Cleaning up failed restore: deleting collection '%s'",
147
+ restore_collection_name,
148
+ )
143
149
  connection.delete_collection(restore_collection_name)
144
150
  except Exception as cleanup_error:
145
151
  log_error("Warning: Failed to clean up collection: %s", cleanup_error)
146
152
  return False
147
-
153
+
148
154
  @staticmethod
149
155
  def list_backups(backup_dir: str) -> list:
150
156
  """
151
157
  List all backup files in a directory.
152
-
158
+
153
159
  Args:
154
160
  backup_dir: Directory containing backups
155
-
161
+
156
162
  Returns:
157
163
  List of backup file information dictionaries
158
164
  """
@@ -164,28 +170,30 @@ class BackupRestoreService:
164
170
  for backup_file in backup_path.glob("*_backup_*.zip"):
165
171
  try:
166
172
  metadata, _ = read_backup_zip(backup_file)
167
- backups.append({
168
- "file_path": str(backup_file),
169
- "file_name": backup_file.name,
170
- "collection_name": metadata.get("collection_name", "Unknown"),
171
- "timestamp": metadata.get("backup_timestamp", "Unknown"),
172
- "item_count": metadata.get("item_count", 0),
173
- "file_size": backup_file.stat().st_size,
174
- })
173
+ backups.append(
174
+ {
175
+ "file_path": str(backup_file),
176
+ "file_name": backup_file.name,
177
+ "collection_name": metadata.get("collection_name", "Unknown"),
178
+ "timestamp": metadata.get("backup_timestamp", "Unknown"),
179
+ "item_count": metadata.get("item_count", 0),
180
+ "file_size": backup_file.stat().st_size,
181
+ }
182
+ )
175
183
  except Exception:
176
184
  continue
177
185
 
178
186
  backups.sort(key=lambda x: x["timestamp"], reverse=True)
179
187
  return backups
180
-
188
+
181
189
  @staticmethod
182
190
  def delete_backup(backup_file: str) -> bool:
183
191
  """
184
192
  Delete a backup file.
185
-
193
+
186
194
  Args:
187
195
  backup_file: Path to backup file to delete
188
-
196
+
189
197
  Returns:
190
198
  True if successful, False otherwise
191
199
  """