thoth-dbmanager 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. thoth_dbmanager/ThothDbManager.py +459 -0
  2. thoth_dbmanager/__init__.py +136 -0
  3. thoth_dbmanager/adapters/__init__.py +21 -0
  4. thoth_dbmanager/adapters/mariadb.py +165 -0
  5. thoth_dbmanager/adapters/mysql.py +165 -0
  6. thoth_dbmanager/adapters/oracle.py +554 -0
  7. thoth_dbmanager/adapters/postgresql.py +444 -0
  8. thoth_dbmanager/adapters/qdrant.py +189 -0
  9. thoth_dbmanager/adapters/sqlite.py +385 -0
  10. thoth_dbmanager/adapters/sqlserver.py +583 -0
  11. thoth_dbmanager/adapters/supabase.py +249 -0
  12. thoth_dbmanager/core/__init__.py +13 -0
  13. thoth_dbmanager/core/factory.py +272 -0
  14. thoth_dbmanager/core/interfaces.py +271 -0
  15. thoth_dbmanager/core/registry.py +220 -0
  16. thoth_dbmanager/documents.py +155 -0
  17. thoth_dbmanager/dynamic_imports.py +250 -0
  18. thoth_dbmanager/helpers/__init__.py +0 -0
  19. thoth_dbmanager/helpers/multi_db_generator.py +508 -0
  20. thoth_dbmanager/helpers/preprocess_values.py +159 -0
  21. thoth_dbmanager/helpers/schema.py +376 -0
  22. thoth_dbmanager/helpers/search.py +117 -0
  23. thoth_dbmanager/lsh/__init__.py +21 -0
  24. thoth_dbmanager/lsh/core.py +182 -0
  25. thoth_dbmanager/lsh/factory.py +76 -0
  26. thoth_dbmanager/lsh/manager.py +170 -0
  27. thoth_dbmanager/lsh/storage.py +96 -0
  28. thoth_dbmanager/plugins/__init__.py +23 -0
  29. thoth_dbmanager/plugins/mariadb.py +436 -0
  30. thoth_dbmanager/plugins/mysql.py +408 -0
  31. thoth_dbmanager/plugins/oracle.py +150 -0
  32. thoth_dbmanager/plugins/postgresql.py +145 -0
  33. thoth_dbmanager/plugins/qdrant.py +41 -0
  34. thoth_dbmanager/plugins/sqlite.py +170 -0
  35. thoth_dbmanager/plugins/sqlserver.py +149 -0
  36. thoth_dbmanager/plugins/supabase.py +224 -0
  37. {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.2.dist-info}/METADATA +9 -6
  38. thoth_dbmanager-0.4.2.dist-info/RECORD +41 -0
  39. thoth_dbmanager-0.4.2.dist-info/top_level.txt +1 -0
  40. thoth_dbmanager-0.4.0.dist-info/RECORD +0 -5
  41. thoth_dbmanager-0.4.0.dist-info/top_level.txt +0 -1
  42. {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.2.dist-info}/WHEEL +0 -0
  43. {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,444 @@
1
+ """
2
+ PostgreSQL adapter implementation.
3
+ """
4
+ import logging
5
+ from typing import Any, Dict, List, Optional, Union
6
+ import psycopg2
7
+ from psycopg2.extras import RealDictCursor
8
+ from sqlalchemy import create_engine, text
9
+ from sqlalchemy.exc import SQLAlchemyError
10
+
11
+ from ..core.interfaces import DbAdapter
12
+ from ..documents import (
13
+ TableDocument,
14
+ ColumnDocument,
15
+ SchemaDocument,
16
+ ForeignKeyDocument,
17
+ IndexDocument
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class PostgreSQLAdapter(DbAdapter):
24
+ """
25
+ PostgreSQL database adapter implementation.
26
+ """
27
+
28
+ def __init__(self, connection_params: Dict[str, Any]):
29
+ super().__init__(connection_params)
30
+ self.engine = None
31
+ self.raw_connection = None
32
+
33
+ def connect(self) -> None:
34
+ """Establish PostgreSQL connection"""
35
+ try:
36
+ # Create SQLAlchemy engine
37
+ connection_string = self._build_connection_string()
38
+ self.engine = create_engine(connection_string, echo=False)
39
+
40
+ # Test connection
41
+ with self.engine.connect() as conn:
42
+ conn.execute(text("SELECT 1"))
43
+
44
+ # Also create raw psycopg2 connection for specific operations
45
+ self.raw_connection = psycopg2.connect(**self._get_psycopg2_params())
46
+
47
+ self._initialized = True
48
+ logger.info("PostgreSQL connection established successfully")
49
+
50
+ except Exception as e:
51
+ logger.error(f"Failed to connect to PostgreSQL: {e}")
52
+ raise
53
+
54
+ def disconnect(self) -> None:
55
+ """Close PostgreSQL connection"""
56
+ try:
57
+ if self.engine:
58
+ self.engine.dispose()
59
+ self.engine = None
60
+
61
+ if self.raw_connection:
62
+ self.raw_connection.close()
63
+ self.raw_connection = None
64
+
65
+ self._initialized = False
66
+ logger.info("PostgreSQL connection closed")
67
+
68
+ except Exception as e:
69
+ logger.error(f"Error closing PostgreSQL connection: {e}")
70
+
71
+ def _build_connection_string(self) -> str:
72
+ """Build SQLAlchemy connection string"""
73
+ params = self.connection_params
74
+ host = params.get('host', 'localhost')
75
+ port = params.get('port', 5432)
76
+ database = params.get('database')
77
+ user = params.get('user')
78
+ password = params.get('password')
79
+
80
+ if not all([database, user, password]):
81
+ raise ValueError("Missing required connection parameters: database, user, password")
82
+
83
+ return f"postgresql://{user}:{password}@{host}:{port}/{database}"
84
+
85
+ def _get_psycopg2_params(self) -> Dict[str, Any]:
86
+ """Get parameters for psycopg2 connection"""
87
+ return {
88
+ 'host': self.connection_params.get('host', 'localhost'),
89
+ 'port': self.connection_params.get('port', 5432),
90
+ 'database': self.connection_params.get('database'),
91
+ 'user': self.connection_params.get('user'),
92
+ 'password': self.connection_params.get('password')
93
+ }
94
+
95
+ def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
96
+ """Execute SQL query"""
97
+ if not self.engine:
98
+ raise RuntimeError("Not connected to database")
99
+
100
+ try:
101
+ with self.engine.connect() as conn:
102
+ # Set query timeout
103
+ conn.execute(text(f"SET statement_timeout = {timeout * 1000}")) # PostgreSQL uses milliseconds
104
+
105
+ # Execute query
106
+ if params:
107
+ result = conn.execute(text(query), params)
108
+ else:
109
+ result = conn.execute(text(query))
110
+
111
+ # Handle different fetch modes
112
+ if query.strip().upper().startswith(('SELECT', 'WITH')):
113
+ if fetch == "all":
114
+ return result.fetchall()
115
+ elif fetch == "one":
116
+ return result.fetchone()
117
+ elif isinstance(fetch, int):
118
+ return result.fetchmany(fetch)
119
+ else:
120
+ return result.fetchall()
121
+ else:
122
+ # For non-SELECT queries, return rowcount
123
+ conn.commit()
124
+ return result.rowcount
125
+
126
+ except SQLAlchemyError as e:
127
+ logger.error(f"PostgreSQL query error: {e}")
128
+ raise
129
+
130
+ def get_tables_as_documents(self) -> List[TableDocument]:
131
+ """Get tables as document objects"""
132
+ query = """
133
+ SELECT
134
+ schemaname as schema_name,
135
+ tablename as table_name,
136
+ COALESCE(obj_description(c.oid), '') as comment
137
+ FROM pg_tables pt
138
+ LEFT JOIN pg_class c ON c.relname = pt.tablename
139
+ LEFT JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = pt.schemaname
140
+ WHERE schemaname NOT IN ('information_schema', 'pg_catalog', 'pg_toast')
141
+ ORDER BY schemaname, tablename
142
+ """
143
+
144
+ results = self.execute_query(query)
145
+ documents = []
146
+
147
+ for row in results:
148
+ doc = TableDocument(
149
+ table_name=row.table_name,
150
+ schema_name=row.schema_name,
151
+ comment=row.comment or ""
152
+ )
153
+ documents.append(doc)
154
+
155
+ return documents
156
+
157
+
158
+ def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
159
+ """Get columns as document objects"""
160
+ query = """
161
+ SELECT
162
+ c.column_name,
163
+ c.data_type,
164
+ c.is_nullable,
165
+ c.column_default,
166
+ c.character_maximum_length,
167
+ COALESCE(pgd.description, '') as comment,
168
+ CASE WHEN pk.column_name IS NOT NULL THEN true ELSE false END as is_pk,
169
+ c.table_schema as schema_name
170
+ FROM information_schema.columns c
171
+ LEFT JOIN pg_class pgc ON pgc.relname = c.table_name
172
+ LEFT JOIN pg_namespace pgn ON pgn.oid = pgc.relnamespace AND pgn.nspname = c.table_schema
173
+ LEFT JOIN pg_description pgd ON pgd.objoid = pgc.oid AND pgd.objsubid = c.ordinal_position
174
+ LEFT JOIN (
175
+ SELECT ku.column_name, ku.table_name, ku.table_schema
176
+ FROM information_schema.table_constraints tc
177
+ JOIN information_schema.key_column_usage ku ON tc.constraint_name = ku.constraint_name
178
+ WHERE tc.constraint_type = 'PRIMARY KEY'
179
+ ) pk ON pk.column_name = c.column_name AND pk.table_name = c.table_name AND pk.table_schema = c.table_schema
180
+ WHERE c.table_name = :table_name
181
+ AND c.table_schema NOT IN ('information_schema', 'pg_catalog')
182
+ ORDER BY c.ordinal_position
183
+ """
184
+
185
+ results = self.execute_query(query, {"table_name": table_name})
186
+ documents = []
187
+
188
+ for row in results:
189
+ doc = ColumnDocument(
190
+ table_name=table_name,
191
+ column_name=row.column_name,
192
+ data_type=row.data_type,
193
+ comment=row.comment or "",
194
+ is_pk=row.is_pk,
195
+ is_nullable=row.is_nullable == 'YES',
196
+ default_value=row.column_default,
197
+ max_length=row.character_maximum_length,
198
+ schema_name=row.schema_name
199
+ )
200
+ documents.append(doc)
201
+
202
+ return documents
203
+
204
+ def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
205
+ """Get foreign keys as document objects"""
206
+ query = """
207
+ SELECT
208
+ tc.constraint_name,
209
+ tc.table_schema as schema_name,
210
+ tc.table_name as source_table,
211
+ kcu.column_name as source_column,
212
+ ccu.table_name as target_table,
213
+ ccu.column_name as target_column
214
+ FROM information_schema.table_constraints tc
215
+ JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name
216
+ JOIN information_schema.constraint_column_usage ccu ON ccu.constraint_name = tc.constraint_name
217
+ WHERE tc.constraint_type = 'FOREIGN KEY'
218
+ AND tc.table_schema NOT IN ('information_schema', 'pg_catalog')
219
+ ORDER BY tc.table_schema, tc.table_name, kcu.ordinal_position
220
+ """
221
+
222
+ results = self.execute_query(query)
223
+ documents = []
224
+
225
+ for row in results:
226
+ doc = ForeignKeyDocument(
227
+ source_table_name=row.source_table,
228
+ source_column_name=row.source_column,
229
+ target_table_name=row.target_table,
230
+ target_column_name=row.target_column,
231
+ constraint_name=row.constraint_name,
232
+ schema_name=row.schema_name
233
+ )
234
+ documents.append(doc)
235
+
236
+ return documents
237
+
238
+ def get_schemas_as_documents(self) -> List[SchemaDocument]:
239
+ """Get schemas as document objects"""
240
+ query = """
241
+ SELECT
242
+ schema_name,
243
+ schema_owner as owner,
244
+ COALESCE(obj_description(n.oid), '') as description
245
+ FROM information_schema.schemata s
246
+ LEFT JOIN pg_namespace n ON n.nspname = s.schema_name
247
+ WHERE schema_name NOT IN ('information_schema', 'pg_catalog', 'pg_toast')
248
+ ORDER BY schema_name
249
+ """
250
+
251
+ results = self.execute_query(query)
252
+ documents = []
253
+
254
+ for row in results:
255
+ doc = SchemaDocument(
256
+ schema_name=row.schema_name,
257
+ description=row.description or "",
258
+ owner=row.owner
259
+ )
260
+ documents.append(doc)
261
+
262
+ return documents
263
+
264
+ def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
265
+ """Get indexes as document objects"""
266
+ base_query = """
267
+ SELECT
268
+ i.relname as index_name,
269
+ t.relname as table_name,
270
+ n.nspname as schema_name,
271
+ ix.indisunique as is_unique,
272
+ ix.indisprimary as is_primary,
273
+ am.amname as index_type,
274
+ array_agg(a.attname ORDER BY a.attnum) as columns
275
+ FROM pg_index ix
276
+ JOIN pg_class i ON i.oid = ix.indexrelid
277
+ JOIN pg_class t ON t.oid = ix.indrelid
278
+ JOIN pg_namespace n ON n.oid = t.relnamespace
279
+ JOIN pg_am am ON am.oid = i.relam
280
+ JOIN pg_attribute a ON a.attrelid = t.oid AND a.attnum = ANY(ix.indkey)
281
+ WHERE n.nspname NOT IN ('information_schema', 'pg_catalog', 'pg_toast')
282
+ """
283
+
284
+ if table_name:
285
+ query = base_query + " AND t.relname = :table_name"
286
+ params = {"table_name": table_name}
287
+ else:
288
+ query = base_query
289
+ params = None
290
+
291
+ query += " GROUP BY i.relname, t.relname, n.nspname, ix.indisunique, ix.indisprimary, am.amname ORDER BY t.relname, i.relname"
292
+
293
+ results = self.execute_query(query, params)
294
+ documents = []
295
+
296
+ for row in results:
297
+ doc = IndexDocument(
298
+ index_name=row.index_name,
299
+ table_name=row.table_name,
300
+ columns=row.columns,
301
+ is_unique=row.is_unique,
302
+ is_primary=row.is_primary,
303
+ index_type=row.index_type,
304
+ schema_name=row.schema_name
305
+ )
306
+ documents.append(doc)
307
+
308
+ return documents
309
+
310
+ def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
311
+ """Get unique values from the database"""
312
+ result = {}
313
+
314
+ # Get all tables
315
+ tables = self.get_tables_as_documents()
316
+
317
+ for table_doc in tables:
318
+ table_name = table_doc.table_name
319
+ schema_name = table_doc.schema_name
320
+ full_table_name = f"{schema_name}.{table_name}"
321
+
322
+ # Get columns for this table
323
+ columns = self.get_columns_as_documents(table_name)
324
+
325
+ result[table_name] = {}
326
+
327
+ for column_doc in columns:
328
+ column_name = column_doc.column_name
329
+
330
+ # Only get unique values for text/varchar columns to avoid large datasets
331
+ if column_doc.data_type in ['text', 'varchar', 'character varying', 'char', 'character']:
332
+ try:
333
+ query = f"""
334
+ SELECT DISTINCT "{column_name}"
335
+ FROM "{schema_name}"."{table_name}"
336
+ WHERE "{column_name}" IS NOT NULL
337
+ AND LENGTH("{column_name}") > 0
338
+ ORDER BY "{column_name}"
339
+ LIMIT 1000
340
+ """
341
+
342
+ values = self.execute_query(query)
343
+ result[table_name][column_name] = [str(row[0]) for row in values if row[0]]
344
+
345
+ except Exception as e:
346
+ logger.warning(f"Could not get unique values for {full_table_name}.{column_name}: {e}")
347
+ result[table_name][column_name] = []
348
+ else:
349
+ result[table_name][column_name] = []
350
+
351
+ return result
352
+
353
+ def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
354
+ """
355
+ Retrieves the most frequent values for each column in the specified table.
356
+
357
+ Args:
358
+ table_name (str): The name of the table.
359
+ number_of_rows (int, optional): Maximum number of example values to return per column. Defaults to 30.
360
+
361
+ Returns:
362
+ Dict[str, List[Any]]: A dictionary mapping column names to lists of example values.
363
+ """
364
+ # First, get the schema name for the table
365
+ schema_query = """
366
+ SELECT table_schema
367
+ FROM information_schema.tables
368
+ WHERE table_name = :table_name
369
+ AND table_schema NOT IN ('information_schema', 'pg_catalog')
370
+ LIMIT 1
371
+ """
372
+
373
+ try:
374
+ schema_result = self.execute_query(schema_query, {"table_name": table_name})
375
+ if not schema_result:
376
+ logger.warning(f"Table {table_name} not found")
377
+ return {}
378
+
379
+ schema_name = schema_result[0][0]
380
+ except Exception as e:
381
+ logger.error(f"Error getting schema for table {table_name}: {e}")
382
+ return {}
383
+
384
+ # Get column information
385
+ columns_query = """
386
+ SELECT column_name, data_type
387
+ FROM information_schema.columns
388
+ WHERE table_name = :table_name AND table_schema = :schema_name
389
+ ORDER BY ordinal_position
390
+ """
391
+
392
+ try:
393
+ columns_result = self.execute_query(columns_query, {"table_name": table_name, "schema_name": schema_name})
394
+ except Exception as e:
395
+ logger.error(f"Error getting columns for table {schema_name}.{table_name}: {e}")
396
+ return {}
397
+
398
+ if not columns_result:
399
+ logger.warning(f"No columns found for table {schema_name}.{table_name}")
400
+ return {}
401
+
402
+ most_frequent_values: Dict[str, List[Any]] = {}
403
+
404
+ for row in columns_result:
405
+ column_name = row[0]
406
+ data_type = row[1]
407
+
408
+ # PostgreSQL uses double quotes for identifier quoting
409
+ quoted_column_name = f'"{column_name}"'
410
+ quoted_schema_name = f'"{schema_name}"'
411
+ quoted_table_name = f'"{table_name}"'
412
+
413
+ # Query to get most frequent values
414
+ query_str = f"""
415
+ SELECT {quoted_column_name}
416
+ FROM (
417
+ SELECT {quoted_column_name}, COUNT(*) as _freq
418
+ FROM {quoted_schema_name}.{quoted_table_name}
419
+ WHERE {quoted_column_name} IS NOT NULL
420
+ GROUP BY {quoted_column_name}
421
+ ORDER BY _freq DESC
422
+ LIMIT :num_rows
423
+ ) as subquery
424
+ """
425
+
426
+ try:
427
+ result = self.execute_query(query_str, {"num_rows": number_of_rows})
428
+ values = [row[0] for row in result]
429
+ most_frequent_values[column_name] = values
430
+ except Exception as e:
431
+ logger.error(f"Error fetching frequent values for {column_name} in {schema_name}.{table_name}: {e}")
432
+ most_frequent_values[column_name] = []
433
+
434
+ # Normalize list lengths
435
+ max_length = 0
436
+ if most_frequent_values:
437
+ max_length = max(len(v) for v in most_frequent_values.values()) if most_frequent_values else 0
438
+
439
+ for column_name in most_frequent_values:
440
+ current_len = len(most_frequent_values[column_name])
441
+ if current_len < max_length:
442
+ most_frequent_values[column_name].extend([None] * (max_length - current_len))
443
+
444
+ return most_frequent_values
@@ -0,0 +1,189 @@
1
+ """
2
+ Qdrant adapter for Thoth SQL Database Manager.
3
+ """
4
+
5
+ from typing import Any, Dict, List, Optional, Union
6
+ from ..core.interfaces import DbAdapter
7
+
8
+
9
+ class QdrantAdapter(DbAdapter):
10
+ """
11
+ Qdrant vector database adapter implementation.
12
+ """
13
+
14
+ def __init__(self, **kwargs):
15
+ """Initialize Qdrant adapter with connection parameters."""
16
+ super().__init__()
17
+ self.host = kwargs.get('host', 'localhost')
18
+ self.port = kwargs.get('port', 6333)
19
+ self.api_key = kwargs.get('api_key')
20
+ self.collection_name = kwargs.get('collection_name', 'thoth_documents')
21
+ self._client = None
22
+
23
+ def connect(self) -> bool:
24
+ """Establish connection to Qdrant."""
25
+ try:
26
+ # Import qdrant_client here to avoid dependency issues
27
+ from qdrant_client import QdrantClient
28
+
29
+ if self.api_key:
30
+ self._client = QdrantClient(
31
+ host=self.host,
32
+ port=self.port,
33
+ api_key=self.api_key
34
+ )
35
+ else:
36
+ self._client = QdrantClient(
37
+ host=self.host,
38
+ port=self.port
39
+ )
40
+
41
+ # Test connection
42
+ self._client.get_collections()
43
+ return True
44
+ except Exception as e:
45
+ print(f"Failed to connect to Qdrant: {e}")
46
+ return False
47
+
48
+ def disconnect(self) -> None:
49
+ """Disconnect from Qdrant."""
50
+ if self._client:
51
+ self._client.close()
52
+ self._client = None
53
+
54
+ def execute_query(self, query: str, params: Optional[Dict] = None,
55
+ fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
56
+ """
57
+ Execute a query against Qdrant.
58
+ Note: Qdrant doesn't use SQL, so this adapts the interface.
59
+ """
60
+ if not self._client:
61
+ raise RuntimeError("Not connected to Qdrant")
62
+
63
+ # This is a placeholder - adapt based on your specific needs
64
+ # Qdrant uses vector search, not SQL queries
65
+ return {"message": "Qdrant uses vector search, not SQL queries"}
66
+
67
+ def get_tables(self) -> List[Dict[str, str]]:
68
+ """Get collections (equivalent to tables in Qdrant)."""
69
+ if not self._client:
70
+ raise RuntimeError("Not connected to Qdrant")
71
+
72
+ try:
73
+ collections = self._client.get_collections()
74
+ return [
75
+ {
76
+ "table_name": collection.name,
77
+ "table_type": "COLLECTION"
78
+ }
79
+ for collection in collections.collections
80
+ ]
81
+ except Exception as e:
82
+ print(f"Error getting collections: {e}")
83
+ return []
84
+
85
+ def get_columns(self, table_name: str) -> List[Dict[str, Any]]:
86
+ """Get collection info (equivalent to columns in Qdrant)."""
87
+ if not self._client:
88
+ raise RuntimeError("Not connected to Qdrant")
89
+
90
+ try:
91
+ collection_info = self._client.get_collection(table_name)
92
+ return [
93
+ {
94
+ "column_name": "id",
95
+ "data_type": "UUID",
96
+ "is_nullable": False
97
+ },
98
+ {
99
+ "column_name": "vector",
100
+ "data_type": f"VECTOR({collection_info.config.params.vectors.size})",
101
+ "is_nullable": False
102
+ },
103
+ {
104
+ "column_name": "payload",
105
+ "data_type": "JSON",
106
+ "is_nullable": True
107
+ }
108
+ ]
109
+ except Exception as e:
110
+ print(f"Error getting collection info: {e}")
111
+ return []
112
+
113
+ def get_foreign_keys(self) -> List[Dict[str, str]]:
114
+ """Get foreign keys (not applicable for Qdrant)."""
115
+ return []
116
+
117
+ def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
118
+ """Get unique values from collections."""
119
+ if not self._client:
120
+ raise RuntimeError("Not connected to Qdrant")
121
+
122
+ # This is a simplified implementation
123
+ # In practice, you'd need to scroll through points and extract unique payload values
124
+ return {}
125
+
126
+ def add_documentation(self, doc_type: str, content: Dict[str, Any]) -> str:
127
+ """Add documentation to Qdrant collection."""
128
+ if not self._client:
129
+ raise RuntimeError("Not connected to Qdrant")
130
+
131
+ try:
132
+ from qdrant_client.models import PointStruct
133
+ import uuid
134
+
135
+ # Generate a unique ID for the document
136
+ doc_id = str(uuid.uuid4())
137
+
138
+ # Create a point with the documentation content
139
+ point = PointStruct(
140
+ id=doc_id,
141
+ vector=content.get('vector', [0.0] * 384), # Default vector size
142
+ payload={
143
+ "doc_type": doc_type,
144
+ "content": content
145
+ }
146
+ )
147
+
148
+ # Upsert the point
149
+ self._client.upsert(
150
+ collection_name=self.collection_name,
151
+ points=[point]
152
+ )
153
+
154
+ return doc_id
155
+ except Exception as e:
156
+ print(f"Error adding documentation: {e}")
157
+ raise
158
+
159
+ def delete_collection(self, collection_name: str) -> bool:
160
+ """Delete a collection from Qdrant."""
161
+ if not self._client:
162
+ raise RuntimeError("Not connected to Qdrant")
163
+
164
+ try:
165
+ self._client.delete_collection(collection_name)
166
+ return True
167
+ except Exception as e:
168
+ print(f"Error deleting collection: {e}")
169
+ return False
170
+
171
+ def create_collection(self, collection_name: str, vector_size: int = 384) -> bool:
172
+ """Create a new collection in Qdrant."""
173
+ if not self._client:
174
+ raise RuntimeError("Not connected to Qdrant")
175
+
176
+ try:
177
+ from qdrant_client.models import VectorParams, Distance
178
+
179
+ self._client.create_collection(
180
+ collection_name=collection_name,
181
+ vectors_config=VectorParams(
182
+ size=vector_size,
183
+ distance=Distance.COSINE
184
+ )
185
+ )
186
+ return True
187
+ except Exception as e:
188
+ print(f"Error creating collection: {e}")
189
+ return False