unstructured-ingest 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (39) hide show
  1. test/integration/connectors/conftest.py +13 -0
  2. test/integration/connectors/databricks_tests/test_volumes_native.py +8 -4
  3. test/integration/connectors/sql/test_postgres.py +6 -10
  4. test/integration/connectors/sql/test_singlestore.py +156 -0
  5. test/integration/connectors/sql/test_snowflake.py +205 -0
  6. test/integration/connectors/sql/test_sqlite.py +6 -10
  7. test/integration/connectors/test_delta_table.py +138 -0
  8. test/integration/connectors/test_s3.py +1 -1
  9. test/integration/connectors/utils/docker.py +78 -0
  10. test/integration/connectors/utils/docker_compose.py +23 -8
  11. test/integration/connectors/utils/validation.py +93 -2
  12. unstructured_ingest/__version__.py +1 -1
  13. unstructured_ingest/v2/cli/utils/click.py +32 -1
  14. unstructured_ingest/v2/cli/utils/model_conversion.py +10 -3
  15. unstructured_ingest/v2/interfaces/file_data.py +1 -0
  16. unstructured_ingest/v2/interfaces/indexer.py +4 -1
  17. unstructured_ingest/v2/pipeline/pipeline.py +10 -2
  18. unstructured_ingest/v2/pipeline/steps/index.py +18 -1
  19. unstructured_ingest/v2/processes/connectors/__init__.py +13 -6
  20. unstructured_ingest/v2/processes/connectors/astradb.py +278 -55
  21. unstructured_ingest/v2/processes/connectors/databricks/volumes.py +3 -1
  22. unstructured_ingest/v2/processes/connectors/delta_table.py +185 -0
  23. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +1 -0
  24. unstructured_ingest/v2/processes/connectors/slack.py +248 -0
  25. unstructured_ingest/v2/processes/connectors/sql/__init__.py +15 -2
  26. unstructured_ingest/v2/processes/connectors/sql/postgres.py +33 -56
  27. unstructured_ingest/v2/processes/connectors/sql/singlestore.py +168 -0
  28. unstructured_ingest/v2/processes/connectors/sql/snowflake.py +162 -0
  29. unstructured_ingest/v2/processes/connectors/sql/sql.py +51 -12
  30. unstructured_ingest/v2/processes/connectors/sql/sqlite.py +31 -32
  31. unstructured_ingest/v2/unstructured_api.py +1 -1
  32. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/METADATA +19 -17
  33. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/RECORD +37 -31
  34. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +0 -250
  35. unstructured_ingest/v2/processes/connectors/singlestore.py +0 -156
  36. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/LICENSE.md +0 -0
  37. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/WHEEL +0 -0
  38. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/entry_points.txt +0 -0
  39. {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,78 @@
1
+ import time
2
+ from contextlib import contextmanager
3
+ from typing import Optional
4
+
5
+ import docker
6
+ from docker.models.containers import Container
7
+
8
+
9
+ def get_container(
10
+ docker_client: docker.DockerClient,
11
+ image: str,
12
+ ports: dict,
13
+ environment: Optional[dict] = None,
14
+ volumes: Optional[dict] = None,
15
+ healthcheck: Optional[dict] = None,
16
+ ) -> Container:
17
+ run_kwargs = {
18
+ "image": image,
19
+ "detach": True,
20
+ "ports": ports,
21
+ }
22
+ if environment:
23
+ run_kwargs["environment"] = environment
24
+ if volumes:
25
+ run_kwargs["volumes"] = volumes
26
+ if healthcheck:
27
+ run_kwargs["healthcheck"] = healthcheck
28
+ container: Container = docker_client.containers.run(**run_kwargs)
29
+ return container
30
+
31
+
32
+ def has_healthcheck(container: Container) -> bool:
33
+ return container.attrs.get("Config", {}).get("Healthcheck", None) is not None
34
+
35
+
36
+ def healthcheck_wait(container: Container, timeout: int = 10) -> None:
37
+ health = container.health
38
+ start = time.time()
39
+ while health != "healthy" and time.time() - start < timeout:
40
+ time.sleep(1)
41
+ container.reload()
42
+ health = container.health
43
+ if health != "healthy":
44
+ health_dict = container.attrs.get("State", {}).get("Health", {})
45
+ raise TimeoutError(f"Docker container never came up healthy: {health_dict}")
46
+
47
+
48
+ @contextmanager
49
+ def container_context(
50
+ docker_client: docker.DockerClient,
51
+ image: str,
52
+ ports: dict,
53
+ environment: Optional[dict] = None,
54
+ volumes: Optional[dict] = None,
55
+ healthcheck: Optional[dict] = None,
56
+ healthcheck_timeout: int = 10,
57
+ ):
58
+ container: Optional[Container] = None
59
+ try:
60
+ container = get_container(
61
+ docker_client=docker_client,
62
+ image=image,
63
+ ports=ports,
64
+ environment=environment,
65
+ volumes=volumes,
66
+ healthcheck=healthcheck,
67
+ )
68
+ if has_healthcheck(container):
69
+ healthcheck_wait(container=container, timeout=healthcheck_timeout)
70
+ yield container
71
+ except AssertionError as e:
72
+ if container:
73
+ logs = container.logs()
74
+ print(logs.decode("utf-8"))
75
+ raise e
76
+ finally:
77
+ if container:
78
+ container.kill()
@@ -3,6 +3,23 @@ from contextlib import contextmanager
3
3
  from pathlib import Path
4
4
 
5
5
 
6
+ def docker_compose_down(docker_compose_path: Path):
7
+ cmd = f"docker compose -f {docker_compose_path.resolve()} down --remove-orphans -v --rmi all"
8
+ print(f"Running command: {cmd}")
9
+ final_resp = subprocess.run(
10
+ cmd,
11
+ shell=True,
12
+ capture_output=True,
13
+ )
14
+ if final_resp.returncode != 0:
15
+ print("STDOUT: {}".format(final_resp.stdout.decode("utf-8")))
16
+ print("STDERR: {}".format(final_resp.stderr.decode("utf-8")))
17
+
18
+
19
+ def run_cleanup(docker_compose_path: Path):
20
+ docker_compose_down(docker_compose_path=docker_compose_path)
21
+
22
+
6
23
  @contextmanager
7
24
  def docker_compose_context(docker_compose_path: Path):
8
25
  # Dynamically run a specific docker compose file and make sure it gets cleanup by
@@ -30,15 +47,13 @@ def docker_compose_context(docker_compose_path: Path):
30
47
  if resp:
31
48
  print("STDOUT: {}".format(resp.stdout.decode("utf-8")))
32
49
  print("STDERR: {}".format(resp.stderr.decode("utf-8")))
33
- raise e
34
- finally:
35
- cmd = f"docker compose -f {docker_compose_path.resolve()} down --remove-orphans -v"
36
- print(f"Running command: {cmd}")
37
- final_resp = subprocess.run(
50
+ cmd = f"docker compose -f {docker_compose_path.resolve()} logs"
51
+ logs = subprocess.run(
38
52
  cmd,
39
53
  shell=True,
40
54
  capture_output=True,
41
55
  )
42
- if final_resp.returncode != 0:
43
- print("STDOUT: {}".format(final_resp.stdout.decode("utf-8")))
44
- print("STDERR: {}".format(final_resp.stderr.decode("utf-8")))
56
+ print("DOCKER LOGS: {}".format(logs.stdout.decode("utf-8")))
57
+ raise e
58
+ finally:
59
+ run_cleanup(docker_compose_path=docker_compose_path)
@@ -1,3 +1,4 @@
1
+ import filecmp
1
2
  import json
2
3
  import os
3
4
  import shutil
@@ -5,15 +6,31 @@ from dataclasses import dataclass, field, replace
5
6
  from pathlib import Path
6
7
  from typing import Callable, Optional
7
8
 
9
+ import pandas as pd
8
10
  from deepdiff import DeepDiff
9
11
 
10
12
  from test.integration.connectors.utils.constants import expected_results_path
11
13
  from unstructured_ingest.v2.interfaces import Downloader, FileData, Indexer
12
14
 
13
15
 
16
+ def pandas_df_equality_check(expected_filepath: Path, current_filepath: Path) -> bool:
17
+ expected_df = pd.read_csv(expected_filepath)
18
+ current_df = pd.read_csv(current_filepath)
19
+ if expected_df.equals(current_df):
20
+ return True
21
+ # Print diff
22
+ diff = expected_df.merge(current_df, indicator=True, how="left").loc[
23
+ lambda x: x["_merge"] != "both"
24
+ ]
25
+ print("diff between expected and current df:")
26
+ print(diff)
27
+ return False
28
+
29
+
14
30
  @dataclass
15
31
  class ValidationConfigs:
16
32
  test_id: str
33
+ expected_number_indexed_file_data: Optional[int] = None
17
34
  expected_num_files: Optional[int] = None
18
35
  predownload_file_data_check: Optional[Callable[[FileData], None]] = None
19
36
  postdownload_file_data_check: Optional[Callable[[FileData], None]] = None
@@ -21,6 +38,8 @@ class ValidationConfigs:
21
38
  default_factory=lambda: ["local_download_path", "metadata.date_processed"]
22
39
  )
23
40
  exclude_fields_extend: list[str] = field(default_factory=list)
41
+ validate_downloaded_files: bool = False
42
+ downloaded_file_equality_check: Optional[Callable[[Path, Path], bool]] = None
24
43
 
25
44
  def get_exclude_fields(self) -> list[str]:
26
45
  exclude_fields = self.exclude_fields
@@ -78,6 +97,13 @@ def check_files(expected_output_dir: Path, all_file_data: list[FileData]):
78
97
  assert not diff, "diff in files that exist: {}".format(", ".join(diff))
79
98
 
80
99
 
100
+ def check_files_in_paths(expected_output_dir: Path, current_output_dir: Path):
101
+ expected_files = get_files(dir_path=expected_output_dir)
102
+ current_files = get_files(dir_path=current_output_dir)
103
+ diff = set(expected_files) ^ set(current_files)
104
+ assert not diff, "diff in files that exist: {}".format(", ".join(diff))
105
+
106
+
81
107
  def check_contents(
82
108
  expected_output_dir: Path, all_file_data: list[FileData], configs: ValidationConfigs
83
109
  ):
@@ -96,6 +122,32 @@ def check_contents(
96
122
  assert not found_diff, f"Diffs found between files: {found_diff}"
97
123
 
98
124
 
125
+ def check_raw_file_contents(
126
+ expected_output_dir: Path,
127
+ current_output_dir: Path,
128
+ configs: ValidationConfigs,
129
+ ):
130
+ current_files = get_files(dir_path=current_output_dir)
131
+ found_diff = False
132
+ files = []
133
+ for current_file in current_files:
134
+ current_file_path = current_output_dir / current_file
135
+ expected_file_path = expected_output_dir / current_file
136
+ if downloaded_file_equality_check := configs.downloaded_file_equality_check:
137
+ is_different = downloaded_file_equality_check(expected_file_path, current_file_path)
138
+ elif expected_file_path.suffix == ".csv" and current_file_path.suffix == ".csv":
139
+ is_different = not pandas_df_equality_check(
140
+ expected_filepath=expected_file_path, current_filepath=current_file_path
141
+ )
142
+ else:
143
+ is_different = not filecmp.cmp(expected_file_path, current_file_path, shallow=False)
144
+ if is_different:
145
+ found_diff = True
146
+ files.append(str(expected_file_path))
147
+ print(f"diffs between files {expected_file_path} and {current_file_path}")
148
+ assert not found_diff, "Diffs found between files: {}".format(", ".join(files))
149
+
150
+
99
151
  def run_expected_results_validation(
100
152
  expected_output_dir: Path, all_file_data: list[FileData], configs: ValidationConfigs
101
153
  ):
@@ -105,6 +157,21 @@ def run_expected_results_validation(
105
157
  )
106
158
 
107
159
 
160
+ def run_expected_download_files_validation(
161
+ expected_output_dir: Path,
162
+ current_download_dir: Path,
163
+ configs: ValidationConfigs,
164
+ ):
165
+ check_files_in_paths(
166
+ expected_output_dir=expected_output_dir, current_output_dir=current_download_dir
167
+ )
168
+ check_raw_file_contents(
169
+ expected_output_dir=expected_output_dir,
170
+ current_output_dir=current_download_dir,
171
+ configs=configs,
172
+ )
173
+
174
+
108
175
  def run_directory_structure_validation(expected_output_dir: Path, download_files: list[str]):
109
176
  directory_record = expected_output_dir / "directory_structure.json"
110
177
  with directory_record.open("r") as directory_file:
@@ -113,13 +180,18 @@ def run_directory_structure_validation(expected_output_dir: Path, download_files
113
180
  assert directory_structure == download_files
114
181
 
115
182
 
116
- def update_fixtures(output_dir: Path, download_dir: Path, all_file_data: list[FileData]):
183
+ def update_fixtures(
184
+ output_dir: Path,
185
+ download_dir: Path,
186
+ all_file_data: list[FileData],
187
+ save_downloads: bool = False,
188
+ ):
117
189
  # Delete current files
118
190
  shutil.rmtree(path=output_dir, ignore_errors=True)
119
191
  output_dir.mkdir(parents=True)
120
192
  # Rewrite the current file data
121
193
  file_data_output_path = output_dir / "file_data"
122
- file_data_output_path.mkdir(parents=True)
194
+ file_data_output_path.mkdir(parents=True, exist_ok=True)
123
195
  for file_data in all_file_data:
124
196
  file_data_path = file_data_output_path / f"{file_data.identifier}.json"
125
197
  with file_data_path.open(mode="w") as f:
@@ -132,6 +204,11 @@ def update_fixtures(output_dir: Path, download_dir: Path, all_file_data: list[Fi
132
204
  with download_dir_record.open(mode="w") as f:
133
205
  json.dump({"directory_structure": download_files}, f, indent=2)
134
206
 
207
+ # If applicable, save raw downloads
208
+ if save_downloads:
209
+ raw_download_output_path = output_dir / "downloads"
210
+ shutil.copytree(download_dir, raw_download_output_path)
211
+
135
212
 
136
213
  def run_all_validations(
137
214
  configs: ValidationConfigs,
@@ -140,6 +217,13 @@ def run_all_validations(
140
217
  download_dir: Path,
141
218
  test_output_dir: Path,
142
219
  ):
220
+ if expected_number_indexed_file_data := configs.expected_number_indexed_file_data:
221
+ assert (
222
+ len(predownload_file_data) == expected_number_indexed_file_data
223
+ ), f"expected {expected_number_indexed_file_data} but got {len(predownload_file_data)}"
224
+ if expected_num_files := configs.expected_num_files:
225
+ assert len(postdownload_file_data) == expected_num_files
226
+
143
227
  for pre_data, post_data in zip(predownload_file_data, postdownload_file_data):
144
228
  configs.run_file_data_validation(
145
229
  predownload_file_data=pre_data, postdownload_file_data=post_data
@@ -155,6 +239,12 @@ def run_all_validations(
155
239
  run_directory_structure_validation(
156
240
  expected_output_dir=configs.test_output_dir(), download_files=download_files
157
241
  )
242
+ if configs.validate_downloaded_files:
243
+ run_expected_download_files_validation(
244
+ expected_output_dir=test_output_dir / "downloads",
245
+ current_download_dir=download_dir,
246
+ configs=configs,
247
+ )
158
248
 
159
249
 
160
250
  async def source_connector_validation(
@@ -200,4 +290,5 @@ async def source_connector_validation(
200
290
  output_dir=test_output_dir,
201
291
  download_dir=download_dir,
202
292
  all_file_data=all_postdownload_file_data,
293
+ save_downloads=configs.validate_downloaded_files,
203
294
  )
@@ -1 +1 @@
1
- __version__ = "0.1.1" # pragma: no cover
1
+ __version__ = "0.2.1" # pragma: no cover
@@ -1,12 +1,13 @@
1
1
  import json
2
2
  import os.path
3
+ from datetime import date, datetime
3
4
  from gettext import gettext, ngettext
4
5
  from gettext import gettext as _
5
6
  from pathlib import Path
6
7
  from typing import Any, Optional, Type, TypeVar, Union
7
8
 
8
9
  import click
9
- from pydantic import BaseModel, ConfigDict, Secret
10
+ from pydantic import BaseModel, ConfigDict, Secret, TypeAdapter, ValidationError
10
11
 
11
12
 
12
13
  def conform_click_options(options: dict):
@@ -109,6 +110,36 @@ class DelimitedString(click.ParamType):
109
110
  return split
110
111
 
111
112
 
113
+ class PydanticDateTime(click.ParamType):
114
+ name = "datetime"
115
+
116
+ def convert(
117
+ self,
118
+ value: Any,
119
+ param: Optional[click.Parameter] = None,
120
+ ctx: Optional[click.Context] = None,
121
+ ) -> Any:
122
+ try:
123
+ return TypeAdapter(datetime).validate_strings(value)
124
+ except ValidationError:
125
+ self.fail(f"{value} is not a valid datetime", param, ctx)
126
+
127
+
128
+ class PydanticDate(click.ParamType):
129
+ name = "date"
130
+
131
+ def convert(
132
+ self,
133
+ value: Any,
134
+ param: Optional[click.Parameter] = None,
135
+ ctx: Optional[click.Context] = None,
136
+ ) -> Any:
137
+ try:
138
+ return TypeAdapter(date).validate_strings(value)
139
+ except ValidationError:
140
+ self.fail(f"{value} is not a valid date", param, ctx)
141
+
142
+
112
143
  BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
113
144
 
114
145
 
@@ -25,7 +25,12 @@ from pydantic.fields import FieldInfo
25
25
  from pydantic.types import _SecretBase
26
26
  from pydantic_core import PydanticUndefined
27
27
 
28
- from unstructured_ingest.v2.cli.utils.click import DelimitedString, Dict
28
+ from unstructured_ingest.v2.cli.utils.click import (
29
+ DelimitedString,
30
+ Dict,
31
+ PydanticDate,
32
+ PydanticDateTime,
33
+ )
29
34
 
30
35
  NoneType = type(None)
31
36
 
@@ -135,8 +140,10 @@ def get_type_from_annotation(field_type: Any) -> click.ParamType:
135
140
  return click.UUID
136
141
  if field_type is Path:
137
142
  return click.Path(path_type=Path)
138
- if field_type in (datetime.datetime, datetime.date):
139
- return click.DateTime()
143
+ if field_type is datetime.datetime:
144
+ return PydanticDateTime()
145
+ if field_type is datetime.date:
146
+ return PydanticDate()
140
147
  if field_origin is Literal:
141
148
  return click.Choice(field_args)
142
149
  if isinstance(field_type, EnumMeta):
@@ -43,6 +43,7 @@ class FileData(DataClassJsonMixin):
43
43
  additional_metadata: dict[str, Any] = field(default_factory=dict)
44
44
  reprocess: bool = False
45
45
  local_download_path: Optional[str] = None
46
+ display_name: Optional[str] = None
46
47
 
47
48
  @classmethod
48
49
  def from_file(cls, path: str) -> "FileData":
@@ -1,5 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any, Generator, Optional, TypeVar
2
+ from typing import Any, AsyncGenerator, Generator, Optional, TypeVar
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
@@ -25,3 +25,6 @@ class Indexer(BaseProcess, BaseConnector, ABC):
25
25
  @abstractmethod
26
26
  def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
27
27
  pass
28
+
29
+ async def run_async(self, **kwargs: Any) -> AsyncGenerator[FileData, None]:
30
+ raise NotImplementedError()
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  import logging
4
5
  import multiprocessing as mp
5
6
  import shutil
@@ -186,6 +187,14 @@ class Pipeline:
186
187
  filtered_records = [r for r in records if r["file_data_path"] in filtered_file_data_paths]
187
188
  return filtered_records
188
189
 
190
+ def get_indices(self) -> list[dict]:
191
+ if self.indexer_step.process.is_async():
192
+ indices = asyncio.run(self.indexer_step.run_async())
193
+ else:
194
+ indices = self.indexer_step.run()
195
+ indices_inputs = [{"file_data_path": i} for i in indices]
196
+ return indices_inputs
197
+
189
198
  def _run(self):
190
199
  logger.info(
191
200
  f"running local pipeline: {self} with configs: " f"{self.context.model_dump_json()}"
@@ -197,8 +206,7 @@ class Pipeline:
197
206
  self.context.status = {}
198
207
 
199
208
  # Index into data source
200
- indices = self.indexer_step.run()
201
- indices_inputs = [{"file_data_path": i} for i in indices]
209
+ indices_inputs = self.get_indices()
202
210
  if not indices_inputs:
203
211
  logger.info("No files to process after indexer, exiting")
204
212
  return
@@ -1,7 +1,7 @@
1
1
  import hashlib
2
2
  import json
3
3
  from dataclasses import dataclass
4
- from typing import Generator, Optional, TypeVar
4
+ from typing import AsyncGenerator, Generator, Optional, TypeVar
5
5
 
6
6
  from unstructured_ingest.v2.interfaces.indexer import Indexer
7
7
  from unstructured_ingest.v2.logger import logger
@@ -52,6 +52,23 @@ class IndexStep(PipelineStep):
52
52
  raise e
53
53
  continue
54
54
 
55
+ async def run_async(self) -> AsyncGenerator[str, None]:
56
+ async for file_data in self.process.run_async():
57
+ logger.debug(f"generated file data: {file_data.to_dict()}")
58
+ try:
59
+ record_hash = self.get_hash(extras=[file_data.identifier])
60
+ filename = f"{record_hash}.json"
61
+ filepath = (self.cache_dir / filename).resolve()
62
+ filepath.parent.mkdir(parents=True, exist_ok=True)
63
+ with open(str(filepath), "w") as f:
64
+ json.dump(file_data.to_dict(), f, indent=2)
65
+ yield str(filepath)
66
+ except Exception as e:
67
+ logger.error(f"failed to create index for file data: {file_data}", exc_info=True)
68
+ if self.context.raise_on_error:
69
+ raise e
70
+ continue
71
+
55
72
  def get_hash(self, extras: Optional[list[str]]) -> str:
56
73
  index_config_dict = json.loads(
57
74
  serialize_base_model_json(model=self.process.index_config, sort_keys=True)
@@ -11,13 +11,15 @@ from unstructured_ingest.v2.processes.connector_registry import (
11
11
  from .airtable import CONNECTOR_TYPE as AIRTABLE_CONNECTOR_TYPE
12
12
  from .airtable import airtable_source_entry
13
13
  from .astradb import CONNECTOR_TYPE as ASTRA_DB_CONNECTOR_TYPE
14
- from .astradb import astra_db_destination_entry
14
+ from .astradb import astra_db_destination_entry, astra_db_source_entry
15
15
  from .azure_cognitive_search import CONNECTOR_TYPE as AZURE_COGNTIVE_SEARCH_CONNECTOR_TYPE
16
16
  from .azure_cognitive_search import azure_cognitive_search_destination_entry
17
17
  from .chroma import CONNECTOR_TYPE as CHROMA_CONNECTOR_TYPE
18
18
  from .chroma import chroma_destination_entry
19
19
  from .couchbase import CONNECTOR_TYPE as COUCHBASE_CONNECTOR_TYPE
20
20
  from .couchbase import couchbase_destination_entry, couchbase_source_entry
21
+ from .delta_table import CONNECTOR_TYPE as DELTA_TABLE_CONNECTOR_TYPE
22
+ from .delta_table import delta_table_destination_entry
21
23
  from .elasticsearch import CONNECTOR_TYPE as ELASTICSEARCH_CONNECTOR_TYPE
22
24
  from .elasticsearch import elasticsearch_destination_entry, elasticsearch_source_entry
23
25
  from .google_drive import CONNECTOR_TYPE as GOOGLE_DRIVE_CONNECTOR_TYPE
@@ -42,11 +44,12 @@ from .salesforce import CONNECTOR_TYPE as SALESFORCE_CONNECTOR_TYPE
42
44
  from .salesforce import salesforce_source_entry
43
45
  from .sharepoint import CONNECTOR_TYPE as SHAREPOINT_CONNECTOR_TYPE
44
46
  from .sharepoint import sharepoint_source_entry
45
- from .singlestore import CONNECTOR_TYPE as SINGLESTORE_CONNECTOR_TYPE
46
- from .singlestore import singlestore_destination_entry
47
+ from .slack import CONNECTOR_TYPE as SLACK_CONNECTOR_TYPE
48
+ from .slack import slack_source_entry
47
49
  from .weaviate import CONNECTOR_TYPE as WEAVIATE_CONNECTOR_TYPE
48
50
  from .weaviate import weaviate_destination_entry
49
51
 
52
+ add_source_entry(source_type=ASTRA_DB_CONNECTOR_TYPE, entry=astra_db_source_entry)
50
53
  add_destination_entry(destination_type=ASTRA_DB_CONNECTOR_TYPE, entry=astra_db_destination_entry)
51
54
 
52
55
  add_destination_entry(destination_type=CHROMA_CONNECTOR_TYPE, entry=chroma_destination_entry)
@@ -54,6 +57,10 @@ add_destination_entry(destination_type=CHROMA_CONNECTOR_TYPE, entry=chroma_desti
54
57
  add_source_entry(source_type=COUCHBASE_CONNECTOR_TYPE, entry=couchbase_source_entry)
55
58
  add_destination_entry(destination_type=COUCHBASE_CONNECTOR_TYPE, entry=couchbase_destination_entry)
56
59
 
60
+ add_destination_entry(
61
+ destination_type=DELTA_TABLE_CONNECTOR_TYPE, entry=delta_table_destination_entry
62
+ )
63
+
57
64
  add_source_entry(source_type=ELASTICSEARCH_CONNECTOR_TYPE, entry=elasticsearch_source_entry)
58
65
  add_destination_entry(
59
66
  destination_type=ELASTICSEARCH_CONNECTOR_TYPE, entry=elasticsearch_destination_entry
@@ -80,9 +87,7 @@ add_source_entry(source_type=MONGODB_CONNECTOR_TYPE, entry=mongodb_source_entry)
80
87
 
81
88
  add_destination_entry(destination_type=PINECONE_CONNECTOR_TYPE, entry=pinecone_destination_entry)
82
89
  add_source_entry(source_type=SHAREPOINT_CONNECTOR_TYPE, entry=sharepoint_source_entry)
83
- add_destination_entry(
84
- destination_type=SINGLESTORE_CONNECTOR_TYPE, entry=singlestore_destination_entry
85
- )
90
+
86
91
  add_destination_entry(destination_type=MILVUS_CONNECTOR_TYPE, entry=milvus_destination_entry)
87
92
  add_destination_entry(
88
93
  destination_type=AZURE_COGNTIVE_SEARCH_CONNECTOR_TYPE,
@@ -93,3 +98,5 @@ add_destination_entry(destination_type=KDBAI_CONNECTOR_TYPE, entry=kdbai_destina
93
98
  add_source_entry(source_type=AIRTABLE_CONNECTOR_TYPE, entry=airtable_source_entry)
94
99
 
95
100
  add_source_entry(source_type=OUTLOOK_CONNECTOR_TYPE, entry=outlook_source_entry)
101
+
102
+ add_source_entry(source_type=SLACK_CONNECTOR_TYPE, entry=slack_source_entry)