unstructured-ingest 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (44) hide show
  1. test/integration/connectors/sql/test_singlestore.py +156 -0
  2. test/integration/connectors/test_confluence.py +113 -0
  3. test/integration/connectors/test_kafka.py +67 -0
  4. test/integration/connectors/test_onedrive.py +112 -0
  5. test/integration/connectors/test_qdrant.py +137 -0
  6. test/integration/connectors/test_s3.py +1 -1
  7. test/integration/connectors/utils/docker.py +2 -1
  8. test/integration/connectors/utils/docker_compose.py +23 -8
  9. test/integration/connectors/utils/validation.py +73 -22
  10. unstructured_ingest/__version__.py +1 -1
  11. unstructured_ingest/connector/kafka.py +0 -1
  12. unstructured_ingest/interfaces.py +7 -7
  13. unstructured_ingest/v2/interfaces/file_data.py +1 -0
  14. unstructured_ingest/v2/processes/chunker.py +2 -2
  15. unstructured_ingest/v2/processes/connectors/__init__.py +15 -7
  16. unstructured_ingest/v2/processes/connectors/astradb.py +278 -55
  17. unstructured_ingest/v2/processes/connectors/confluence.py +195 -0
  18. unstructured_ingest/v2/processes/connectors/databricks/volumes.py +5 -5
  19. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +2 -10
  20. unstructured_ingest/v2/processes/connectors/gitlab.py +267 -0
  21. unstructured_ingest/v2/processes/connectors/kafka/__init__.py +13 -0
  22. unstructured_ingest/v2/processes/connectors/kafka/cloud.py +82 -0
  23. unstructured_ingest/v2/processes/connectors/kafka/kafka.py +196 -0
  24. unstructured_ingest/v2/processes/connectors/kafka/local.py +75 -0
  25. unstructured_ingest/v2/processes/connectors/onedrive.py +163 -2
  26. unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +16 -0
  27. unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +59 -0
  28. unstructured_ingest/v2/processes/connectors/qdrant/local.py +58 -0
  29. unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +168 -0
  30. unstructured_ingest/v2/processes/connectors/qdrant/server.py +60 -0
  31. unstructured_ingest/v2/processes/connectors/sql/__init__.py +5 -0
  32. unstructured_ingest/v2/processes/connectors/sql/postgres.py +1 -20
  33. unstructured_ingest/v2/processes/connectors/sql/singlestore.py +168 -0
  34. unstructured_ingest/v2/processes/connectors/sql/snowflake.py +5 -5
  35. unstructured_ingest/v2/processes/connectors/sql/sql.py +15 -6
  36. unstructured_ingest/v2/processes/partitioner.py +14 -3
  37. unstructured_ingest/v2/unstructured_api.py +25 -11
  38. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.2.dist-info}/METADATA +17 -17
  39. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.2.dist-info}/RECORD +43 -27
  40. unstructured_ingest/v2/processes/connectors/singlestore.py +0 -156
  41. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.2.dist-info}/LICENSE.md +0 -0
  42. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.2.dist-info}/WHEEL +0 -0
  43. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.2.dist-info}/entry_points.txt +0 -0
  44. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.2.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,11 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
9
9
  from dateutil import parser
10
10
  from pydantic import Field, Secret
11
11
 
12
- from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
12
+ from unstructured_ingest.error import (
13
+ DestinationConnectionError,
14
+ SourceConnectionError,
15
+ SourceConnectionNetworkError,
16
+ )
13
17
  from unstructured_ingest.utils.dep_check import requires_dependencies
14
18
  from unstructured_ingest.v2.interfaces import (
15
19
  AccessConfig,
@@ -22,16 +26,20 @@ from unstructured_ingest.v2.interfaces import (
22
26
  Indexer,
23
27
  IndexerConfig,
24
28
  SourceIdentifiers,
29
+ Uploader,
30
+ UploaderConfig,
25
31
  download_responses,
26
32
  )
27
33
  from unstructured_ingest.v2.logger import logger
28
34
  from unstructured_ingest.v2.processes.connector_registry import (
35
+ DestinationRegistryEntry,
29
36
  SourceRegistryEntry,
30
37
  )
31
38
 
32
39
  if TYPE_CHECKING:
33
40
  from office365.graph_client import GraphClient
34
41
  from office365.onedrive.driveitems.driveItem import DriveItem
42
+ from office365.onedrive.drives.drive import Drive
35
43
 
36
44
  CONNECTOR_TYPE = "onedrive"
37
45
  MAX_MB_SIZE = 512_000_000
@@ -55,6 +63,11 @@ class OnedriveConnectionConfig(ConnectionConfig):
55
63
  )
56
64
  access_config: Secret[OnedriveAccessConfig]
57
65
 
66
+ def get_drive(self) -> "Drive":
67
+ client = self.get_client()
68
+ drive = client.users[self.user_pname].drive
69
+ return drive
70
+
58
71
  @requires_dependencies(["msal"], extras="onedrive")
59
72
  def get_token(self):
60
73
  from msal import ConfidentialClientApplication
@@ -100,7 +113,6 @@ class OnedriveIndexer(Indexer):
100
113
  raise SourceConnectionError(
101
114
  "{} ({})".format(error, token_resp.get("error_description"))
102
115
  )
103
- self.connection_config.get_client()
104
116
  except Exception as e:
105
117
  logger.error(f"failed to validate connection: {e}", exc_info=True)
106
118
  raise SourceConnectionError(f"failed to validate connection: {e}")
@@ -224,6 +236,149 @@ class OnedriveDownloader(Downloader):
224
236
  return DownloadResponse(file_data=file_data, path=download_path)
225
237
 
226
238
 
239
+ class OnedriveUploaderConfig(UploaderConfig):
240
+ remote_url: str = Field(
241
+ description="URL of the destination in OneDrive, e.g., 'onedrive://Documents/Folder'"
242
+ )
243
+ prefix: str = "onedrive://"
244
+
245
+ @property
246
+ def root_folder(self) -> str:
247
+ url = (
248
+ self.remote_url.replace(self.prefix, "", 1)
249
+ if self.remote_url.startswith(self.prefix)
250
+ else self.remote_url
251
+ )
252
+ return url.split("/")[0]
253
+
254
+ @property
255
+ def url(self) -> str:
256
+ url = (
257
+ self.remote_url.replace(self.prefix, "", 1)
258
+ if self.remote_url.startswith(self.prefix)
259
+ else self.remote_url
260
+ )
261
+ return url
262
+
263
+
264
+ @dataclass
265
+ class OnedriveUploader(Uploader):
266
+ connection_config: OnedriveConnectionConfig
267
+ upload_config: OnedriveUploaderConfig
268
+ connector_type: str = CONNECTOR_TYPE
269
+
270
+ @requires_dependencies(["office365"], extras="onedrive")
271
+ def precheck(self) -> None:
272
+ from office365.runtime.client_request_exception import ClientRequestException
273
+
274
+ try:
275
+ token_resp: dict = self.connection_config.get_token()
276
+ if error := token_resp.get("error"):
277
+ raise SourceConnectionError(
278
+ "{} ({})".format(error, token_resp.get("error_description"))
279
+ )
280
+ drive = self.connection_config.get_drive()
281
+ root = drive.root
282
+ root_folder = self.upload_config.root_folder
283
+ folder = root.get_by_path(root_folder)
284
+ try:
285
+ folder.get().execute_query()
286
+ except ClientRequestException as e:
287
+ if e.message != "The resource could not be found.":
288
+ raise e
289
+ folder = root.create_folder(root_folder).execute_query()
290
+ logger.info(f"successfully created folder: {folder.name}")
291
+ except Exception as e:
292
+ logger.error(f"failed to validate connection: {e}", exc_info=True)
293
+ raise SourceConnectionError(f"failed to validate connection: {e}")
294
+
295
+ def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
296
+ drive = self.connection_config.get_drive()
297
+
298
+ # Use the remote_url from upload_config as the base destination folder
299
+ base_destination_folder = self.upload_config.url
300
+
301
+ # Use the file's relative path to maintain directory structure, if needed
302
+ if file_data.source_identifiers and file_data.source_identifiers.rel_path:
303
+ # Combine the base destination folder with the file's relative path
304
+ destination_path = Path(base_destination_folder) / Path(
305
+ file_data.source_identifiers.rel_path
306
+ )
307
+ else:
308
+ # If no relative path is provided, upload directly to the base destination folder
309
+ destination_path = Path(base_destination_folder) / path.name
310
+
311
+ destination_folder = destination_path.parent
312
+ file_name = destination_path.name
313
+
314
+ # Convert destination folder to a string suitable for OneDrive API
315
+ destination_folder_str = str(destination_folder).replace("\\", "/")
316
+
317
+ # Resolve the destination folder in OneDrive, creating it if necessary
318
+ try:
319
+ # Attempt to get the folder
320
+ folder = drive.root.get_by_path(destination_folder_str)
321
+ folder.get().execute_query()
322
+ except Exception:
323
+ # Folder doesn't exist, create it recursively
324
+ current_folder = drive.root
325
+ for part in destination_folder.parts:
326
+ # Use filter to find the folder by name
327
+ folders = (
328
+ current_folder.children.filter(f"name eq '{part}' and folder ne null")
329
+ .get()
330
+ .execute_query()
331
+ )
332
+ if folders:
333
+ current_folder = folders[0]
334
+ else:
335
+ # Folder doesn't exist, create it
336
+ current_folder = current_folder.create_folder(part).execute_query()
337
+ folder = current_folder
338
+
339
+ # Check the size of the file
340
+ file_size = path.stat().st_size
341
+
342
+ if file_size < MAX_MB_SIZE:
343
+ # Use simple upload for small files
344
+ with path.open("rb") as local_file:
345
+ content = local_file.read()
346
+ logger.info(f"Uploading {path} to {destination_path} using simple upload")
347
+ try:
348
+ uploaded_file = folder.upload(file_name, content).execute_query()
349
+ if not uploaded_file or uploaded_file.name != file_name:
350
+ raise DestinationConnectionError(f"Upload failed for file '{file_name}'")
351
+ # Log details about the uploaded file
352
+ logger.info(
353
+ f"Uploaded file '{uploaded_file.name}' with ID '{uploaded_file.id}'"
354
+ )
355
+ except Exception as e:
356
+ logger.error(f"Failed to upload file '{file_name}': {e}", exc_info=True)
357
+ raise DestinationConnectionError(
358
+ f"Failed to upload file '{file_name}': {e}"
359
+ ) from e
360
+ else:
361
+ # Use resumable upload for large files
362
+ destination_fullpath = f"{destination_folder_str}/{file_name}"
363
+ destination_drive_item = drive.root.item_with_path(destination_fullpath)
364
+
365
+ logger.info(f"Uploading {path} to {destination_fullpath} using resumable upload")
366
+ try:
367
+ uploaded_file = destination_drive_item.resumable_upload(
368
+ source_path=str(path)
369
+ ).execute_query()
370
+ # Validate the upload
371
+ if not uploaded_file or uploaded_file.name != file_name:
372
+ raise DestinationConnectionError(f"Upload failed for file '{file_name}'")
373
+ # Log details about the uploaded file
374
+ logger.info(f"Uploaded file {uploaded_file.name} with ID {uploaded_file.id}")
375
+ except Exception as e:
376
+ logger.error(f"Failed to upload file '{file_name}' using resumable upload: {e}")
377
+ raise DestinationConnectionError(
378
+ f"Failed to upload file '{file_name}' using resumable upload: {e}"
379
+ ) from e
380
+
381
+
227
382
  onedrive_source_entry = SourceRegistryEntry(
228
383
  connection_config=OnedriveConnectionConfig,
229
384
  indexer_config=OnedriveIndexerConfig,
@@ -231,3 +386,9 @@ onedrive_source_entry = SourceRegistryEntry(
231
386
  downloader_config=OnedriveDownloaderConfig,
232
387
  downloader=OnedriveDownloader,
233
388
  )
389
+
390
+ onedrive_destination_entry = DestinationRegistryEntry(
391
+ connection_config=OnedriveConnectionConfig,
392
+ uploader=OnedriveUploader,
393
+ uploader_config=OnedriveUploaderConfig,
394
+ )
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from unstructured_ingest.v2.processes.connector_registry import (
4
+ add_destination_entry,
5
+ )
6
+
7
+ from .cloud import CONNECTOR_TYPE as CLOUD_CONNECTOR_TYPE
8
+ from .cloud import qdrant_cloud_destination_entry
9
+ from .local import CONNECTOR_TYPE as LOCAL_CONNECTOR_TYPE
10
+ from .local import qdrant_local_destination_entry
11
+ from .server import CONNECTOR_TYPE as SERVER_CONNECTOR_TYPE
12
+ from .server import qdrant_server_destination_entry
13
+
14
+ add_destination_entry(destination_type=CLOUD_CONNECTOR_TYPE, entry=qdrant_cloud_destination_entry)
15
+ add_destination_entry(destination_type=SERVER_CONNECTOR_TYPE, entry=qdrant_server_destination_entry)
16
+ add_destination_entry(destination_type=LOCAL_CONNECTOR_TYPE, entry=qdrant_local_destination_entry)
@@ -0,0 +1,59 @@
1
+ from dataclasses import dataclass
2
+
3
+ from pydantic import Field, Secret
4
+
5
+ from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
6
+ from unstructured_ingest.v2.processes.connectors.qdrant.qdrant import (
7
+ QdrantAccessConfig,
8
+ QdrantConnectionConfig,
9
+ QdrantUploader,
10
+ QdrantUploaderConfig,
11
+ QdrantUploadStager,
12
+ QdrantUploadStagerConfig,
13
+ )
14
+
15
+ CONNECTOR_TYPE = "qdrant-cloud"
16
+
17
+
18
+ class CloudQdrantAccessConfig(QdrantAccessConfig):
19
+ api_key: str = Field(description="Qdrant API key")
20
+
21
+
22
+ class CloudQdrantConnectionConfig(QdrantConnectionConfig):
23
+ url: str = Field(default=None, description="url of Qdrant Cloud")
24
+ access_config: Secret[CloudQdrantAccessConfig]
25
+
26
+ def get_client_kwargs(self) -> dict:
27
+ return {
28
+ "api_key": self.access_config.get_secret_value().api_key,
29
+ "url": self.url,
30
+ }
31
+
32
+
33
+ class CloudQdrantUploadStagerConfig(QdrantUploadStagerConfig):
34
+ pass
35
+
36
+
37
+ @dataclass
38
+ class CloudQdrantUploadStager(QdrantUploadStager):
39
+ upload_stager_config: CloudQdrantUploadStagerConfig
40
+
41
+
42
+ class CloudQdrantUploaderConfig(QdrantUploaderConfig):
43
+ pass
44
+
45
+
46
+ @dataclass
47
+ class CloudQdrantUploader(QdrantUploader):
48
+ connection_config: CloudQdrantConnectionConfig
49
+ upload_config: CloudQdrantUploaderConfig
50
+ connector_type: str = CONNECTOR_TYPE
51
+
52
+
53
+ qdrant_cloud_destination_entry = DestinationRegistryEntry(
54
+ connection_config=CloudQdrantConnectionConfig,
55
+ uploader=CloudQdrantUploader,
56
+ uploader_config=CloudQdrantUploaderConfig,
57
+ upload_stager=CloudQdrantUploadStager,
58
+ upload_stager_config=CloudQdrantUploadStagerConfig,
59
+ )
@@ -0,0 +1,58 @@
1
+ from dataclasses import dataclass
2
+
3
+ from pydantic import Field, Secret
4
+
5
+ from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
6
+ from unstructured_ingest.v2.processes.connectors.qdrant.qdrant import (
7
+ QdrantAccessConfig,
8
+ QdrantConnectionConfig,
9
+ QdrantUploader,
10
+ QdrantUploaderConfig,
11
+ QdrantUploadStager,
12
+ QdrantUploadStagerConfig,
13
+ )
14
+
15
+ CONNECTOR_TYPE = "qdrant-local"
16
+
17
+
18
+ class LocalQdrantAccessConfig(QdrantAccessConfig):
19
+ pass
20
+
21
+
22
+ class LocalQdrantConnectionConfig(QdrantConnectionConfig):
23
+ path: str = Field(default=None, description="Persistence path for QdrantLocal.")
24
+ access_config: Secret[LocalQdrantAccessConfig] = Field(
25
+ default_factory=LocalQdrantAccessConfig, validate_default=True
26
+ )
27
+
28
+ def get_client_kwargs(self) -> dict:
29
+ return {"path": self.path}
30
+
31
+
32
+ class LocalQdrantUploadStagerConfig(QdrantUploadStagerConfig):
33
+ pass
34
+
35
+
36
+ @dataclass
37
+ class LocalQdrantUploadStager(QdrantUploadStager):
38
+ upload_stager_config: LocalQdrantUploadStagerConfig
39
+
40
+
41
+ class LocalQdrantUploaderConfig(QdrantUploaderConfig):
42
+ pass
43
+
44
+
45
+ @dataclass
46
+ class LocalQdrantUploader(QdrantUploader):
47
+ connection_config: LocalQdrantConnectionConfig
48
+ upload_config: LocalQdrantUploaderConfig
49
+ connector_type: str = CONNECTOR_TYPE
50
+
51
+
52
+ qdrant_local_destination_entry = DestinationRegistryEntry(
53
+ connection_config=LocalQdrantConnectionConfig,
54
+ uploader=LocalQdrantUploader,
55
+ uploader_config=LocalQdrantUploaderConfig,
56
+ upload_stager=LocalQdrantUploadStager,
57
+ upload_stager_config=LocalQdrantUploadStagerConfig,
58
+ )
@@ -0,0 +1,168 @@
1
+ import asyncio
2
+ import json
3
+ import uuid
4
+ from abc import ABC, abstractmethod
5
+ from contextlib import asynccontextmanager
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING, Any, AsyncGenerator, Optional
9
+
10
+ from pydantic import Field, Secret
11
+
12
+ from unstructured_ingest.error import DestinationConnectionError, WriteError
13
+ from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
14
+ from unstructured_ingest.utils.dep_check import requires_dependencies
15
+ from unstructured_ingest.v2.interfaces import (
16
+ AccessConfig,
17
+ ConnectionConfig,
18
+ FileData,
19
+ Uploader,
20
+ UploaderConfig,
21
+ UploadStager,
22
+ UploadStagerConfig,
23
+ )
24
+ from unstructured_ingest.v2.logger import logger
25
+
26
+ if TYPE_CHECKING:
27
+ from qdrant_client import AsyncQdrantClient
28
+
29
+
30
+ class QdrantAccessConfig(AccessConfig, ABC):
31
+ pass
32
+
33
+
34
+ class QdrantConnectionConfig(ConnectionConfig, ABC):
35
+ access_config: Secret[QdrantAccessConfig] = Field(
36
+ default_factory=QdrantAccessConfig, validate_default=True, description="Access Config"
37
+ )
38
+
39
+ @abstractmethod
40
+ def get_client_kwargs(self) -> dict:
41
+ pass
42
+
43
+ @requires_dependencies(["qdrant_client"], extras="qdrant")
44
+ @asynccontextmanager
45
+ async def get_client(self) -> AsyncGenerator["AsyncQdrantClient", None]:
46
+ from qdrant_client.async_qdrant_client import AsyncQdrantClient
47
+
48
+ client_kwargs = self.get_client_kwargs()
49
+ client = AsyncQdrantClient(**client_kwargs)
50
+ try:
51
+ yield client
52
+ finally:
53
+ await client.close()
54
+
55
+
56
+ class QdrantUploadStagerConfig(UploadStagerConfig):
57
+ pass
58
+
59
+
60
+ @dataclass
61
+ class QdrantUploadStager(UploadStager, ABC):
62
+ upload_stager_config: QdrantUploadStagerConfig = field(
63
+ default_factory=lambda: QdrantUploadStagerConfig()
64
+ )
65
+
66
+ @staticmethod
67
+ def conform_dict(data: dict) -> dict:
68
+ """Prepares dictionary in the format that Chroma requires"""
69
+ return {
70
+ "id": str(uuid.uuid4()),
71
+ "vector": data.pop("embeddings", {}),
72
+ "payload": {
73
+ "text": data.pop("text", None),
74
+ "element_serialized": json.dumps(data),
75
+ **flatten_dict(
76
+ data,
77
+ separator="-",
78
+ flatten_lists=True,
79
+ ),
80
+ },
81
+ }
82
+
83
+ def run(
84
+ self,
85
+ elements_filepath: Path,
86
+ file_data: FileData,
87
+ output_dir: Path,
88
+ output_filename: str,
89
+ **kwargs: Any,
90
+ ) -> Path:
91
+ with open(elements_filepath) as elements_file:
92
+ elements_contents = json.load(elements_file)
93
+
94
+ conformed_elements = [self.conform_dict(data=element) for element in elements_contents]
95
+ output_path = Path(output_dir) / Path(f"{output_filename}.json")
96
+
97
+ with open(output_path, "w") as output_file:
98
+ json.dump(conformed_elements, output_file)
99
+ return output_path
100
+
101
+
102
+ class QdrantUploaderConfig(UploaderConfig):
103
+ collection_name: str = Field(description="Name of the collection.")
104
+ batch_size: int = Field(default=50, description="Number of records per batch.")
105
+ num_processes: Optional[int] = Field(
106
+ default=1,
107
+ description="Optional limit on number of threads to use for upload.",
108
+ deprecated=True,
109
+ )
110
+
111
+
112
+ @dataclass
113
+ class QdrantUploader(Uploader, ABC):
114
+ upload_config: QdrantUploaderConfig
115
+ connection_config: QdrantConnectionConfig
116
+
117
+ @DestinationConnectionError.wrap
118
+ def precheck(self) -> None:
119
+ async def check_connection():
120
+ async with self.connection_config.get_client() as async_client:
121
+ await async_client.get_collections()
122
+
123
+ asyncio.run(check_connection())
124
+
125
+ def is_async(self):
126
+ return True
127
+
128
+ async def run_async(
129
+ self,
130
+ path: Path,
131
+ file_data: FileData,
132
+ **kwargs: Any,
133
+ ) -> None:
134
+ with path.open("r") as file:
135
+ elements: list[dict] = json.load(file)
136
+
137
+ logger.debug("Loaded %i elements from %s", len(elements), path)
138
+
139
+ batches = list(batch_generator(elements, batch_size=self.upload_config.batch_size))
140
+ logger.debug(
141
+ "Elements split into %i batches of size %i.",
142
+ len(batches),
143
+ self.upload_config.batch_size,
144
+ )
145
+ await asyncio.gather(*[self._upsert_batch(batch) for batch in batches])
146
+
147
+ async def _upsert_batch(self, batch: list[dict]) -> None:
148
+ from qdrant_client import models
149
+
150
+ points: list[models.PointStruct] = [models.PointStruct(**item) for item in batch]
151
+ try:
152
+ logger.debug(
153
+ "Upserting %i points to the '%s' collection.",
154
+ len(points),
155
+ self.upload_config.collection_name,
156
+ )
157
+ async with self.connection_config.get_client() as async_client:
158
+ await async_client.upsert(
159
+ self.upload_config.collection_name, points=points, wait=True
160
+ )
161
+ except Exception as api_error:
162
+ logger.error(
163
+ "Failed to upsert points to the collection due to the following error %s", api_error
164
+ )
165
+
166
+ raise WriteError(f"Qdrant error: {api_error}") from api_error
167
+
168
+ logger.debug("Successfully upsert points to the collection.")
@@ -0,0 +1,60 @@
1
+ from dataclasses import dataclass
2
+
3
+ from pydantic import Field, Secret
4
+
5
+ from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
6
+ from unstructured_ingest.v2.processes.connectors.qdrant.qdrant import (
7
+ QdrantAccessConfig,
8
+ QdrantConnectionConfig,
9
+ QdrantUploader,
10
+ QdrantUploaderConfig,
11
+ QdrantUploadStager,
12
+ QdrantUploadStagerConfig,
13
+ )
14
+
15
+ CONNECTOR_TYPE = "qdrant-server"
16
+
17
+
18
+ class ServerQdrantAccessConfig(QdrantAccessConfig):
19
+ pass
20
+
21
+
22
+ class ServerQdrantConnectionConfig(QdrantConnectionConfig):
23
+ url: str = Field(default=None, description="url of Qdrant server")
24
+ access_config: Secret[ServerQdrantAccessConfig] = Field(
25
+ default_factory=ServerQdrantAccessConfig, validate_default=True
26
+ )
27
+
28
+ def get_client_kwargs(self) -> dict:
29
+ return {
30
+ "url": self.url,
31
+ }
32
+
33
+
34
+ class ServerQdrantUploadStagerConfig(QdrantUploadStagerConfig):
35
+ pass
36
+
37
+
38
+ @dataclass
39
+ class ServerQdrantUploadStager(QdrantUploadStager):
40
+ upload_stager_config: ServerQdrantUploadStagerConfig
41
+
42
+
43
+ class ServerQdrantUploaderConfig(QdrantUploaderConfig):
44
+ pass
45
+
46
+
47
+ @dataclass
48
+ class ServerQdrantUploader(QdrantUploader):
49
+ connection_config: ServerQdrantConnectionConfig
50
+ upload_config: ServerQdrantUploaderConfig
51
+ connector_type: str = CONNECTOR_TYPE
52
+
53
+
54
+ qdrant_server_destination_entry = DestinationRegistryEntry(
55
+ connection_config=ServerQdrantConnectionConfig,
56
+ uploader=ServerQdrantUploader,
57
+ uploader_config=ServerQdrantUploaderConfig,
58
+ upload_stager=ServerQdrantUploadStager,
59
+ upload_stager_config=ServerQdrantUploadStagerConfig,
60
+ )
@@ -7,6 +7,8 @@ from unstructured_ingest.v2.processes.connector_registry import (
7
7
 
8
8
  from .postgres import CONNECTOR_TYPE as POSTGRES_CONNECTOR_TYPE
9
9
  from .postgres import postgres_destination_entry, postgres_source_entry
10
+ from .singlestore import CONNECTOR_TYPE as SINGLESTORE_CONNECTOR_TYPE
11
+ from .singlestore import singlestore_destination_entry
10
12
  from .snowflake import CONNECTOR_TYPE as SNOWFLAKE_CONNECTOR_TYPE
11
13
  from .snowflake import snowflake_destination_entry, snowflake_source_entry
12
14
  from .sqlite import CONNECTOR_TYPE as SQLITE_CONNECTOR_TYPE
@@ -19,3 +21,6 @@ add_source_entry(source_type=SNOWFLAKE_CONNECTOR_TYPE, entry=snowflake_source_en
19
21
  add_destination_entry(destination_type=SQLITE_CONNECTOR_TYPE, entry=sqlite_destination_entry)
20
22
  add_destination_entry(destination_type=POSTGRES_CONNECTOR_TYPE, entry=postgres_destination_entry)
21
23
  add_destination_entry(destination_type=SNOWFLAKE_CONNECTOR_TYPE, entry=snowflake_destination_entry)
24
+ add_destination_entry(
25
+ destination_type=SINGLESTORE_CONNECTOR_TYPE, entry=singlestore_destination_entry
26
+ )
@@ -1,6 +1,6 @@
1
1
  from contextlib import contextmanager
2
2
  from dataclasses import dataclass, field
3
- from typing import TYPE_CHECKING, Any, Generator, Optional
3
+ from typing import TYPE_CHECKING, Generator, Optional
4
4
 
5
5
  from pydantic import Field, Secret
6
6
 
@@ -12,7 +12,6 @@ from unstructured_ingest.v2.processes.connector_registry import (
12
12
  SourceRegistryEntry,
13
13
  )
14
14
  from unstructured_ingest.v2.processes.connectors.sql.sql import (
15
- _DATE_COLUMNS,
16
15
  SQLAccessConfig,
17
16
  SQLConnectionConfig,
18
17
  SQLDownloader,
@@ -23,7 +22,6 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
23
22
  SQLUploaderConfig,
24
23
  SQLUploadStager,
25
24
  SQLUploadStagerConfig,
26
- parse_date_string,
27
25
  )
28
26
 
29
27
  if TYPE_CHECKING:
@@ -138,23 +136,6 @@ class PostgresUploader(SQLUploader):
138
136
  connector_type: str = CONNECTOR_TYPE
139
137
  values_delimiter: str = "%s"
140
138
 
141
- def prepare_data(
142
- self, columns: list[str], data: tuple[tuple[Any, ...], ...]
143
- ) -> list[tuple[Any, ...]]:
144
- output = []
145
- for row in data:
146
- parsed = []
147
- for column_name, value in zip(columns, row):
148
- if column_name in _DATE_COLUMNS:
149
- if value is None:
150
- parsed.append(None)
151
- else:
152
- parsed.append(parse_date_string(value))
153
- else:
154
- parsed.append(value)
155
- output.append(tuple(parsed))
156
- return output
157
-
158
139
 
159
140
  postgres_source_entry = SourceRegistryEntry(
160
141
  connection_config=PostgresConnectionConfig,