thoth-dbmanager 0.5.1__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {thoth_dbmanager-0.5.1/thoth_dbmanager.egg-info → thoth_dbmanager-0.5.2}/PKG-INFO +6 -1
  2. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/pyproject.toml +4 -1
  3. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/core/interfaces.py +81 -0
  4. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/plugins/sqlite.py +1 -0
  5. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2/thoth_dbmanager.egg-info}/PKG-INFO +6 -1
  6. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager.egg-info/requires.txt +6 -0
  7. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/LICENSE +0 -0
  8. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/MANIFEST.in +0 -0
  9. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/README.md +0 -0
  10. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/setup.cfg +0 -0
  11. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/tests/test_lsh_interactive.py +0 -0
  12. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/tests/test_thoth_db_manager_base.py +0 -0
  13. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/ThothDbManager.py +0 -0
  14. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/__init__.py +0 -0
  15. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/adapters/__init__.py +0 -0
  16. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/adapters/mariadb.py +0 -0
  17. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/adapters/postgresql.py +0 -0
  18. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/adapters/sqlite.py +0 -0
  19. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/adapters/sqlserver.py +0 -0
  20. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/core/__init__.py +0 -0
  21. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/core/factory.py +0 -0
  22. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/core/registry.py +0 -0
  23. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/documents.py +0 -0
  24. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/dynamic_imports.py +0 -0
  25. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/helpers/__init__.py +0 -0
  26. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/helpers/multi_db_generator.py +0 -0
  27. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/helpers/preprocess_values.py +0 -0
  28. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/helpers/schema.py +0 -0
  29. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/helpers/search.py +0 -0
  30. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/lsh/__init__.py +0 -0
  31. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/lsh/core.py +0 -0
  32. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/lsh/factory.py +0 -0
  33. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/lsh/manager.py +0 -0
  34. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/lsh/storage.py +0 -0
  35. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/plugins/__init__.py +0 -0
  36. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/plugins/mariadb.py +0 -0
  37. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/plugins/postgresql.py +0 -0
  38. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager/plugins/sqlserver.py +0 -0
  39. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager.egg-info/SOURCES.txt +0 -0
  40. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager.egg-info/dependency_links.txt +0 -0
  41. {thoth_dbmanager-0.5.1 → thoth_dbmanager-0.5.2}/thoth_dbmanager.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thoth_dbmanager
3
- Version: 0.5.1
3
+ Version: 0.5.2
4
4
  Summary: A Python library for managing SQL databases with support for multiple database types, LSH-based similarity search, and a modern plugin architecture.
5
5
  Author-email: Marco Pancotti <mp@tylconsulting.it>
6
6
  Project-URL: Homepage, https://github.com/mptyl/thoth_dbmanager
@@ -35,10 +35,15 @@ Requires-Dist: mariadb>=1.1.0; extra == "mariadb"
35
35
  Provides-Extra: sqlserver
36
36
  Requires-Dist: pyodbc>=4.0.0; extra == "sqlserver"
37
37
  Provides-Extra: sqlite
38
+ Provides-Extra: embeddings
39
+ Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
40
+ Requires-Dist: numpy>=1.21.0; extra == "embeddings"
38
41
  Provides-Extra: all
39
42
  Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
40
43
  Requires-Dist: mariadb>=1.1.0; extra == "all"
41
44
  Requires-Dist: pyodbc>=4.0.0; extra == "all"
45
+ Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
46
+ Requires-Dist: numpy>=1.21.0; extra == "all"
42
47
  Provides-Extra: dev
43
48
  Requires-Dist: pytest>=7.0.0; extra == "dev"
44
49
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "thoth_dbmanager"
7
- version = "0.5.1"
7
+ version = "0.5.2"
8
8
  authors = [
9
9
  { name="Marco Pancotti", email="mp@tylconsulting.it" },
10
10
  ]
@@ -39,12 +39,15 @@ postgresql = ["psycopg2-binary>=2.9.0"]
39
39
  mariadb = ["mariadb>=1.1.0"]
40
40
  sqlserver = ["pyodbc>=4.0.0"]
41
41
  sqlite = []
42
+ embeddings = ["sentence-transformers>=2.0.0", "numpy>=1.21.0"]
42
43
 
43
44
  # Convenience groups
44
45
  all = [
45
46
  "psycopg2-binary>=2.9.0",
46
47
  "mariadb>=1.1.0",
47
48
  "pyodbc>=4.0.0",
49
+ "sentence-transformers>=2.0.0",
50
+ "numpy>=1.21.0",
48
51
  ]
49
52
 
50
53
  # Development dependencies
@@ -279,3 +279,84 @@ class DbPlugin(ABC):
279
279
  if not self.adapter:
280
280
  raise RuntimeError("Plugin not initialized")
281
281
  return self.adapter.get_unique_values()
282
+
283
+ def get_embedding_function(self):
284
+ """
285
+ Get the embedding function for similarity computations.
286
+
287
+ Returns:
288
+ SafeSentenceTransformer: An embedding function with encode method
289
+ """
290
+ try:
291
+ # Import SafeSentenceTransformer
292
+ try:
293
+ from sentence_transformers import SentenceTransformer
294
+ import logging
295
+
296
+ logger = logging.getLogger(__name__)
297
+
298
+ class SafeSentenceTransformer:
299
+ """
300
+ Wrapper for SentenceTransformer that handles PyTorch meta tensor issues.
301
+ """
302
+ def __init__(self, model_name_or_path: str):
303
+ self.model_name_or_path = model_name_or_path
304
+ self._model = None
305
+
306
+ def _get_model(self):
307
+ """Lazy initialization of the SentenceTransformer model."""
308
+ if self._model is None:
309
+ try:
310
+ logger.info(f"Initializing SentenceTransformer with model: {self.model_name_or_path}")
311
+ self._model = SentenceTransformer(
312
+ model_name_or_path=self.model_name_or_path,
313
+ device='cpu' # Explicitly set device to CPU to avoid meta tensor issues
314
+ )
315
+ logger.info("SentenceTransformer initialized successfully")
316
+ except Exception as e:
317
+ logger.error(f"Failed to initialize SentenceTransformer: {e}")
318
+ # Try alternative initialization approach
319
+ try:
320
+ logger.info("Trying alternative initialization approach...")
321
+ self._model = SentenceTransformer(self.model_name_or_path)
322
+ # Move to CPU explicitly after initialization
323
+ self._model = self._model.to('cpu')
324
+ logger.info("Alternative initialization successful")
325
+ except Exception as e2:
326
+ logger.error(f"Alternative initialization also failed: {e2}")
327
+ raise e2
328
+ return self._model
329
+
330
+ def encode(self, sentences, **kwargs):
331
+ """Encode sentences using the underlying SentenceTransformer model."""
332
+ model = self._get_model()
333
+ return model.encode(sentences, **kwargs)
334
+
335
+ return SafeSentenceTransformer(
336
+ model_name_or_path="paraphrase-multilingual-MiniLM-L12-v2"
337
+ )
338
+
339
+ except ImportError:
340
+ import logging
341
+ logger = logging.getLogger(__name__)
342
+ logger.warning("sentence_transformers not available, creating dummy embedding function")
343
+ # Create a dummy embedding function for testing
344
+ class DummyEmbeddingFunction:
345
+ def encode(self, sentences, **kwargs):
346
+ import numpy as np
347
+ # Return dummy embeddings - same shape for all sentences
348
+ return np.random.rand(len(sentences), 384) # 384 is typical embedding size
349
+
350
+ return DummyEmbeddingFunction()
351
+
352
+ except Exception as e:
353
+ import logging
354
+ logger = logging.getLogger(__name__)
355
+ logger.error(f"Failed to create embedding function: {e}")
356
+ # Return a basic dummy function as fallback
357
+ class BasicDummyEmbeddingFunction:
358
+ def encode(self, sentences, **kwargs):
359
+ import numpy as np
360
+ return np.random.rand(len(sentences), 384)
361
+
362
+ return BasicDummyEmbeddingFunction()
@@ -225,3 +225,4 @@ class SQLitePlugin(DbPlugin):
225
225
  return self.adapter.health_check()
226
226
  else:
227
227
  return False
228
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thoth_dbmanager
3
- Version: 0.5.1
3
+ Version: 0.5.2
4
4
  Summary: A Python library for managing SQL databases with support for multiple database types, LSH-based similarity search, and a modern plugin architecture.
5
5
  Author-email: Marco Pancotti <mp@tylconsulting.it>
6
6
  Project-URL: Homepage, https://github.com/mptyl/thoth_dbmanager
@@ -35,10 +35,15 @@ Requires-Dist: mariadb>=1.1.0; extra == "mariadb"
35
35
  Provides-Extra: sqlserver
36
36
  Requires-Dist: pyodbc>=4.0.0; extra == "sqlserver"
37
37
  Provides-Extra: sqlite
38
+ Provides-Extra: embeddings
39
+ Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
40
+ Requires-Dist: numpy>=1.21.0; extra == "embeddings"
38
41
  Provides-Extra: all
39
42
  Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
40
43
  Requires-Dist: mariadb>=1.1.0; extra == "all"
41
44
  Requires-Dist: pyodbc>=4.0.0; extra == "all"
45
+ Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
46
+ Requires-Dist: numpy>=1.21.0; extra == "all"
42
47
  Provides-Extra: dev
43
48
  Requires-Dist: pytest>=7.0.0; extra == "dev"
44
49
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
@@ -9,6 +9,8 @@ requests>=2.25.0
9
9
  psycopg2-binary>=2.9.0
10
10
  mariadb>=1.1.0
11
11
  pyodbc>=4.0.0
12
+ sentence-transformers>=2.0.0
13
+ numpy>=1.21.0
12
14
 
13
15
  [dev]
14
16
  pytest>=7.0.0
@@ -22,6 +24,10 @@ twine>=4.0.0
22
24
  psutil>=5.8.0
23
25
  docker>=6.0.0
24
26
 
27
+ [embeddings]
28
+ sentence-transformers>=2.0.0
29
+ numpy>=1.21.0
30
+
25
31
  [mariadb]
26
32
  mariadb>=1.1.0
27
33
 
File without changes