unstructured-ingest 1.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/README.md +28 -0
- unstructured_ingest/cli/__init__.py +0 -0
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +269 -0
- unstructured_ingest/cli/base/dest.py +84 -0
- unstructured_ingest/cli/base/importer.py +34 -0
- unstructured_ingest/cli/base/src.py +75 -0
- unstructured_ingest/cli/cli.py +24 -0
- unstructured_ingest/cli/cmds.py +14 -0
- unstructured_ingest/cli/utils/__init__.py +0 -0
- unstructured_ingest/cli/utils/click.py +237 -0
- unstructured_ingest/cli/utils/model_conversion.py +222 -0
- unstructured_ingest/data_types/__init__.py +0 -0
- unstructured_ingest/data_types/entities.py +17 -0
- unstructured_ingest/data_types/file_data.py +116 -0
- unstructured_ingest/embed/__init__.py +0 -0
- unstructured_ingest/embed/azure_openai.py +63 -0
- unstructured_ingest/embed/bedrock.py +323 -0
- unstructured_ingest/embed/huggingface.py +69 -0
- unstructured_ingest/embed/interfaces.py +146 -0
- unstructured_ingest/embed/mixedbreadai.py +134 -0
- unstructured_ingest/embed/octoai.py +133 -0
- unstructured_ingest/embed/openai.py +142 -0
- unstructured_ingest/embed/togetherai.py +116 -0
- unstructured_ingest/embed/vertexai.py +109 -0
- unstructured_ingest/embed/voyageai.py +130 -0
- unstructured_ingest/error.py +156 -0
- unstructured_ingest/errors_v2.py +156 -0
- unstructured_ingest/interfaces/__init__.py +27 -0
- unstructured_ingest/interfaces/connector.py +56 -0
- unstructured_ingest/interfaces/downloader.py +90 -0
- unstructured_ingest/interfaces/indexer.py +29 -0
- unstructured_ingest/interfaces/process.py +22 -0
- unstructured_ingest/interfaces/processor.py +88 -0
- unstructured_ingest/interfaces/upload_stager.py +89 -0
- unstructured_ingest/interfaces/uploader.py +67 -0
- unstructured_ingest/logger.py +39 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/otel.py +128 -0
- unstructured_ingest/pipeline/__init__.py +0 -0
- unstructured_ingest/pipeline/interfaces.py +211 -0
- unstructured_ingest/pipeline/otel.py +32 -0
- unstructured_ingest/pipeline/pipeline.py +408 -0
- unstructured_ingest/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/pipeline/steps/chunk.py +78 -0
- unstructured_ingest/pipeline/steps/download.py +206 -0
- unstructured_ingest/pipeline/steps/embed.py +77 -0
- unstructured_ingest/pipeline/steps/filter.py +35 -0
- unstructured_ingest/pipeline/steps/index.py +86 -0
- unstructured_ingest/pipeline/steps/partition.py +77 -0
- unstructured_ingest/pipeline/steps/stage.py +65 -0
- unstructured_ingest/pipeline/steps/uncompress.py +50 -0
- unstructured_ingest/pipeline/steps/upload.py +58 -0
- unstructured_ingest/processes/__init__.py +18 -0
- unstructured_ingest/processes/chunker.py +131 -0
- unstructured_ingest/processes/connector_registry.py +69 -0
- unstructured_ingest/processes/connectors/__init__.py +129 -0
- unstructured_ingest/processes/connectors/airtable.py +238 -0
- unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +9 -0
- unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +23 -0
- unstructured_ingest/processes/connectors/astradb.py +592 -0
- unstructured_ingest/processes/connectors/azure_ai_search.py +275 -0
- unstructured_ingest/processes/connectors/chroma.py +193 -0
- unstructured_ingest/processes/connectors/confluence.py +527 -0
- unstructured_ingest/processes/connectors/couchbase.py +336 -0
- unstructured_ingest/processes/connectors/databricks/__init__.py +58 -0
- unstructured_ingest/processes/connectors/databricks/volumes.py +233 -0
- unstructured_ingest/processes/connectors/databricks/volumes_aws.py +93 -0
- unstructured_ingest/processes/connectors/databricks/volumes_azure.py +108 -0
- unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +91 -0
- unstructured_ingest/processes/connectors/databricks/volumes_native.py +92 -0
- unstructured_ingest/processes/connectors/databricks/volumes_table.py +187 -0
- unstructured_ingest/processes/connectors/delta_table.py +310 -0
- unstructured_ingest/processes/connectors/discord.py +161 -0
- unstructured_ingest/processes/connectors/duckdb/__init__.py +15 -0
- unstructured_ingest/processes/connectors/duckdb/base.py +103 -0
- unstructured_ingest/processes/connectors/duckdb/duckdb.py +130 -0
- unstructured_ingest/processes/connectors/duckdb/motherduck.py +130 -0
- unstructured_ingest/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +478 -0
- unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +523 -0
- unstructured_ingest/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/processes/connectors/fsspec/azure.py +203 -0
- unstructured_ingest/processes/connectors/fsspec/box.py +176 -0
- unstructured_ingest/processes/connectors/fsspec/dropbox.py +238 -0
- unstructured_ingest/processes/connectors/fsspec/fsspec.py +475 -0
- unstructured_ingest/processes/connectors/fsspec/gcs.py +203 -0
- unstructured_ingest/processes/connectors/fsspec/s3.py +253 -0
- unstructured_ingest/processes/connectors/fsspec/sftp.py +177 -0
- unstructured_ingest/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/processes/connectors/github.py +226 -0
- unstructured_ingest/processes/connectors/gitlab.py +270 -0
- unstructured_ingest/processes/connectors/google_drive.py +848 -0
- unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +10 -0
- unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +367 -0
- unstructured_ingest/processes/connectors/jira.py +522 -0
- unstructured_ingest/processes/connectors/kafka/__init__.py +17 -0
- unstructured_ingest/processes/connectors/kafka/cloud.py +121 -0
- unstructured_ingest/processes/connectors/kafka/kafka.py +275 -0
- unstructured_ingest/processes/connectors/kafka/local.py +103 -0
- unstructured_ingest/processes/connectors/kdbai.py +156 -0
- unstructured_ingest/processes/connectors/lancedb/__init__.py +30 -0
- unstructured_ingest/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/processes/connectors/lancedb/cloud.py +42 -0
- unstructured_ingest/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/processes/connectors/lancedb/lancedb.py +181 -0
- unstructured_ingest/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/processes/connectors/local.py +227 -0
- unstructured_ingest/processes/connectors/milvus.py +311 -0
- unstructured_ingest/processes/connectors/mongodb.py +389 -0
- unstructured_ingest/processes/connectors/neo4j.py +534 -0
- unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/processes/connectors/notion/client.py +349 -0
- unstructured_ingest/processes/connectors/notion/connector.py +350 -0
- unstructured_ingest/processes/connectors/notion/helpers.py +448 -0
- unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +102 -0
- unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +126 -0
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/processes/connectors/notion/interfaces.py +32 -0
- unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/processes/connectors/notion/types/block.py +96 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +131 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/code.py +43 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/file.py +49 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/image.py +21 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +109 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/table.py +60 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/template.py +30 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/video.py +22 -0
- unstructured_ingest/processes/connectors/notion/types/database.py +73 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +125 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +39 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +36 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +35 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +42 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +38 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +50 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +35 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +74 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +50 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +42 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +68 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +44 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +57 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +70 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +82 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +38 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +51 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +38 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +79 -0
- unstructured_ingest/processes/connectors/notion/types/date.py +29 -0
- unstructured_ingest/processes/connectors/notion/types/file.py +54 -0
- unstructured_ingest/processes/connectors/notion/types/page.py +52 -0
- unstructured_ingest/processes/connectors/notion/types/parent.py +66 -0
- unstructured_ingest/processes/connectors/notion/types/rich_text.py +189 -0
- unstructured_ingest/processes/connectors/notion/types/user.py +83 -0
- unstructured_ingest/processes/connectors/onedrive.py +485 -0
- unstructured_ingest/processes/connectors/outlook.py +242 -0
- unstructured_ingest/processes/connectors/pinecone.py +400 -0
- unstructured_ingest/processes/connectors/qdrant/__init__.py +16 -0
- unstructured_ingest/processes/connectors/qdrant/cloud.py +59 -0
- unstructured_ingest/processes/connectors/qdrant/local.py +58 -0
- unstructured_ingest/processes/connectors/qdrant/qdrant.py +163 -0
- unstructured_ingest/processes/connectors/qdrant/server.py +60 -0
- unstructured_ingest/processes/connectors/redisdb.py +214 -0
- unstructured_ingest/processes/connectors/salesforce.py +307 -0
- unstructured_ingest/processes/connectors/sharepoint.py +282 -0
- unstructured_ingest/processes/connectors/slack.py +249 -0
- unstructured_ingest/processes/connectors/sql/__init__.py +41 -0
- unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +228 -0
- unstructured_ingest/processes/connectors/sql/postgres.py +168 -0
- unstructured_ingest/processes/connectors/sql/singlestore.py +176 -0
- unstructured_ingest/processes/connectors/sql/snowflake.py +298 -0
- unstructured_ingest/processes/connectors/sql/sql.py +456 -0
- unstructured_ingest/processes/connectors/sql/sqlite.py +179 -0
- unstructured_ingest/processes/connectors/sql/teradata.py +254 -0
- unstructured_ingest/processes/connectors/sql/vastdb.py +263 -0
- unstructured_ingest/processes/connectors/utils.py +60 -0
- unstructured_ingest/processes/connectors/vectara.py +348 -0
- unstructured_ingest/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/processes/connectors/weaviate/cloud.py +166 -0
- unstructured_ingest/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/processes/connectors/weaviate/weaviate.py +337 -0
- unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/processes/connectors/zendesk/client.py +314 -0
- unstructured_ingest/processes/connectors/zendesk/zendesk.py +241 -0
- unstructured_ingest/processes/embedder.py +203 -0
- unstructured_ingest/processes/filter.py +60 -0
- unstructured_ingest/processes/partitioner.py +233 -0
- unstructured_ingest/processes/uncompress.py +61 -0
- unstructured_ingest/processes/utils/__init__.py +8 -0
- unstructured_ingest/processes/utils/blob_storage.py +32 -0
- unstructured_ingest/processes/utils/logging/connector.py +365 -0
- unstructured_ingest/processes/utils/logging/sanitizer.py +117 -0
- unstructured_ingest/unstructured_api.py +140 -0
- unstructured_ingest/utils/__init__.py +5 -0
- unstructured_ingest/utils/chunking.py +56 -0
- unstructured_ingest/utils/compression.py +72 -0
- unstructured_ingest/utils/constants.py +2 -0
- unstructured_ingest/utils/data_prep.py +216 -0
- unstructured_ingest/utils/dep_check.py +78 -0
- unstructured_ingest/utils/filesystem.py +27 -0
- unstructured_ingest/utils/html.py +174 -0
- unstructured_ingest/utils/ndjson.py +52 -0
- unstructured_ingest/utils/pydantic_models.py +52 -0
- unstructured_ingest/utils/string_and_date_utils.py +74 -0
- unstructured_ingest/utils/table.py +80 -0
- unstructured_ingest/utils/tls.py +15 -0
- unstructured_ingest-1.2.32.dist-info/METADATA +235 -0
- unstructured_ingest-1.2.32.dist-info/RECORD +243 -0
- unstructured_ingest-1.2.32.dist-info/WHEEL +4 -0
- unstructured_ingest-1.2.32.dist-info/entry_points.txt +2 -0
- unstructured_ingest-1.2.32.dist-info/licenses/LICENSE.md +201 -0
|
@@ -0,0 +1,523 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import re
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from time import time
|
|
7
|
+
from typing import TYPE_CHECKING, Any, AsyncGenerator, Awaitable, Callable, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field, Secret, field_validator
|
|
10
|
+
|
|
11
|
+
from unstructured_ingest.data_types.file_data import (
|
|
12
|
+
BatchFileData,
|
|
13
|
+
BatchItem,
|
|
14
|
+
FileData,
|
|
15
|
+
FileDataSourceMetadata,
|
|
16
|
+
)
|
|
17
|
+
from unstructured_ingest.error import (
|
|
18
|
+
DestinationConnectionError,
|
|
19
|
+
SourceConnectionError,
|
|
20
|
+
)
|
|
21
|
+
from unstructured_ingest.interfaces import (
|
|
22
|
+
AccessConfig,
|
|
23
|
+
ConnectionConfig,
|
|
24
|
+
)
|
|
25
|
+
from unstructured_ingest.interfaces.downloader import download_responses
|
|
26
|
+
from unstructured_ingest.logger import logger
|
|
27
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
28
|
+
DestinationRegistryEntry,
|
|
29
|
+
SourceRegistryEntry,
|
|
30
|
+
)
|
|
31
|
+
from unstructured_ingest.processes.connectors.elasticsearch.elasticsearch import (
|
|
32
|
+
ElasticsearchBatchFileData,
|
|
33
|
+
ElasticsearchDownloader,
|
|
34
|
+
ElasticsearchDownloaderConfig,
|
|
35
|
+
ElasticsearchIndexer,
|
|
36
|
+
ElasticsearchIndexerConfig,
|
|
37
|
+
ElasticsearchUploader,
|
|
38
|
+
ElasticsearchUploaderConfig,
|
|
39
|
+
ElasticsearchUploadStager,
|
|
40
|
+
ElasticsearchUploadStagerConfig,
|
|
41
|
+
ElastisearchAdditionalMetadata,
|
|
42
|
+
)
|
|
43
|
+
from unstructured_ingest.utils.data_prep import batch_generator, generator_batching_wbytes
|
|
44
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
45
|
+
|
|
46
|
+
if TYPE_CHECKING:
|
|
47
|
+
from opensearchpy import OpenSearch
|
|
48
|
+
|
|
49
|
+
CONNECTOR_TYPE = "opensearch"
|
|
50
|
+
|
|
51
|
+
"""OpenSearch connector - inherits from Elasticsearch connector (OpenSearch is an ES fork)."""
|
|
52
|
+
|
|
53
|
+
# Precompiled regex patterns for AWS hostname detection (GovCloud, China, standard)
|
|
54
|
+
_ES_PATTERN = re.compile(r"\.([a-z]{2}(?:-[a-z]+)+-\d+)\.es\.amazonaws\.com$")
|
|
55
|
+
_AOSS_PATTERN = re.compile(r"^[a-z0-9]+\.([a-z]{2}(?:-[a-z]+)+-\d+)\.aoss\.amazonaws\.com$")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _run_coroutine(fn: Callable[..., Awaitable[Any]], *args: Any, **kwargs: Any) -> Any:
|
|
59
|
+
"""Run an async function from sync context, handling existing event loops."""
|
|
60
|
+
try:
|
|
61
|
+
asyncio.get_running_loop()
|
|
62
|
+
except RuntimeError:
|
|
63
|
+
return asyncio.run(fn(*args, **kwargs))
|
|
64
|
+
|
|
65
|
+
with ThreadPoolExecutor(thread_name_prefix="opensearch") as pool:
|
|
66
|
+
return pool.submit(lambda: asyncio.run(fn(*args, **kwargs))).result()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class OpenSearchAccessConfig(AccessConfig):
|
|
70
|
+
password: Optional[str] = Field(default=None, description="password when using basic auth")
|
|
71
|
+
aws_access_key_id: Optional[str] = Field(
|
|
72
|
+
default=None,
|
|
73
|
+
description="AWS access key ID. When provided (with secret), IAM authentication is used. "
|
|
74
|
+
"Region and service type are auto-detected from the host URL.",
|
|
75
|
+
)
|
|
76
|
+
aws_secret_access_key: Optional[str] = Field(
|
|
77
|
+
default=None,
|
|
78
|
+
description="AWS secret access key. Required when aws_access_key_id is provided.",
|
|
79
|
+
)
|
|
80
|
+
aws_session_token: Optional[str] = Field(
|
|
81
|
+
default=None, description="AWS session token for temporary credentials (optional)"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def detect_aws_opensearch_config(host: str) -> Optional[Tuple[str, str]]:
|
|
86
|
+
"""Auto-detect AWS region and service from OpenSearch hostname."""
|
|
87
|
+
clean_host = host.replace("https://", "").replace("http://", "")
|
|
88
|
+
clean_host = clean_host.split(":")[0]
|
|
89
|
+
|
|
90
|
+
match = _ES_PATTERN.search(clean_host)
|
|
91
|
+
if match:
|
|
92
|
+
return (match.group(1), "es")
|
|
93
|
+
|
|
94
|
+
match = _AOSS_PATTERN.search(clean_host)
|
|
95
|
+
if match:
|
|
96
|
+
return (match.group(1), "aoss")
|
|
97
|
+
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class OpenSearchClientInput(BaseModel):
|
|
102
|
+
http_auth: Secret[Optional[tuple[str, str]]] = None
|
|
103
|
+
hosts: Optional[list[str]] = None
|
|
104
|
+
use_ssl: bool = False
|
|
105
|
+
verify_certs: bool = False
|
|
106
|
+
ssl_show_warn: bool = False
|
|
107
|
+
ca_certs: Optional[str] = None
|
|
108
|
+
client_cert: Optional[str] = None
|
|
109
|
+
client_key: Optional[str] = None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class OpenSearchConnectionConfig(ConnectionConfig):
|
|
113
|
+
hosts: list[str] = Field(
|
|
114
|
+
...,
|
|
115
|
+
min_length=1,
|
|
116
|
+
description="List of the OpenSearch hosts to connect",
|
|
117
|
+
examples=["http://localhost:9200"],
|
|
118
|
+
)
|
|
119
|
+
username: Optional[str] = Field(default=None, description="username when using basic auth")
|
|
120
|
+
use_ssl: bool = Field(default=False, description="use ssl for the connection")
|
|
121
|
+
verify_certs: bool = Field(default=False, description="whether to verify SSL certificates")
|
|
122
|
+
ssl_show_warn: bool = Field(
|
|
123
|
+
default=False, description="show warning when verify certs is disabled"
|
|
124
|
+
)
|
|
125
|
+
ca_certs: Optional[Path] = Field(default=None, description="path to CA bundle")
|
|
126
|
+
client_cert: Optional[Path] = Field(
|
|
127
|
+
default=None,
|
|
128
|
+
description="path to the file containing the private key and the certificate,"
|
|
129
|
+
" or cert only if using client_key",
|
|
130
|
+
)
|
|
131
|
+
client_key: Optional[Path] = Field(
|
|
132
|
+
default=None,
|
|
133
|
+
description="path to the file containing the private key"
|
|
134
|
+
" if using separate cert and key files",
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
access_config: Secret[OpenSearchAccessConfig]
|
|
138
|
+
|
|
139
|
+
@field_validator("hosts", mode="before")
|
|
140
|
+
@classmethod
|
|
141
|
+
def validate_hosts(cls, value):
|
|
142
|
+
if isinstance(value, str):
|
|
143
|
+
value = [value]
|
|
144
|
+
if not value:
|
|
145
|
+
raise ValueError("At least one OpenSearch host must be provided. ")
|
|
146
|
+
for host in value:
|
|
147
|
+
if not host or not host.strip():
|
|
148
|
+
raise ValueError("Host URL cannot be empty")
|
|
149
|
+
return value
|
|
150
|
+
|
|
151
|
+
def _has_aws_credentials(self) -> bool:
|
|
152
|
+
"""Check if AWS IAM credentials are provided."""
|
|
153
|
+
access_config = self.access_config.get_secret_value()
|
|
154
|
+
has_access_key = access_config.aws_access_key_id is not None
|
|
155
|
+
has_secret_key = access_config.aws_secret_access_key is not None
|
|
156
|
+
|
|
157
|
+
# Validate: Either both credentials or neither - partial credentials are invalid
|
|
158
|
+
if has_access_key != has_secret_key: # XOR: exactly one is set
|
|
159
|
+
raise ValueError(
|
|
160
|
+
"AWS IAM authentication requires BOTH aws_access_key_id and aws_secret_access_key. "
|
|
161
|
+
f"Currently provided: aws_access_key_id={'set' if has_access_key else 'not set'}, "
|
|
162
|
+
f"aws_secret_access_key={'set' if has_secret_key else 'not set'}"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
return has_access_key and has_secret_key
|
|
166
|
+
|
|
167
|
+
def _detect_and_validate_aws_config(self) -> Tuple[str, str]:
|
|
168
|
+
"""Auto-detect AWS region and service from host URL."""
|
|
169
|
+
if not self.hosts:
|
|
170
|
+
raise ValueError("Host is required for AWS OpenSearch connection")
|
|
171
|
+
|
|
172
|
+
detected = detect_aws_opensearch_config(self.hosts[0])
|
|
173
|
+
|
|
174
|
+
if not detected:
|
|
175
|
+
raise ValueError(
|
|
176
|
+
f"Could not auto-detect AWS region and service from host: {self.hosts[0]}. "
|
|
177
|
+
f"Ensure your host URL follows AWS OpenSearch format: "
|
|
178
|
+
f"https://search-domain-xxx.REGION.es.amazonaws.com (for OpenSearch Service) or "
|
|
179
|
+
f"https://xxx.REGION.aoss.amazonaws.com (for OpenSearch Serverless)"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
region, service = detected
|
|
183
|
+
logger.debug(
|
|
184
|
+
f"Auto-detected AWS configuration from host: region={region}, service={service}"
|
|
185
|
+
)
|
|
186
|
+
return region, service
|
|
187
|
+
|
|
188
|
+
@requires_dependencies(["opensearchpy", "boto3"], extras="opensearch")
|
|
189
|
+
async def _get_async_aws_auth(self):
|
|
190
|
+
"""Create AWS SigV4 authentication handler for async clients."""
|
|
191
|
+
import boto3
|
|
192
|
+
from opensearchpy import AWSV4SignerAsyncAuth
|
|
193
|
+
|
|
194
|
+
access_config = self.access_config.get_secret_value()
|
|
195
|
+
|
|
196
|
+
session = boto3.Session(
|
|
197
|
+
aws_access_key_id=access_config.aws_access_key_id,
|
|
198
|
+
aws_secret_access_key=access_config.aws_secret_access_key,
|
|
199
|
+
aws_session_token=access_config.aws_session_token,
|
|
200
|
+
)
|
|
201
|
+
credentials = session.get_credentials()
|
|
202
|
+
|
|
203
|
+
if not credentials:
|
|
204
|
+
raise ValueError("Failed to obtain AWS credentials from provided keys")
|
|
205
|
+
|
|
206
|
+
return AWSV4SignerAsyncAuth(credentials, *self._detect_and_validate_aws_config())
|
|
207
|
+
|
|
208
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
209
|
+
async def get_async_client_kwargs(self) -> dict:
|
|
210
|
+
"""Build AsyncOpenSearch client config (auto-detects IAM, basic auth, or cert auth)."""
|
|
211
|
+
access_config = self.access_config.get_secret_value()
|
|
212
|
+
client_input_kwargs = {}
|
|
213
|
+
|
|
214
|
+
if self.hosts:
|
|
215
|
+
client_input_kwargs["hosts"] = self.hosts
|
|
216
|
+
if self.use_ssl:
|
|
217
|
+
client_input_kwargs["use_ssl"] = self.use_ssl
|
|
218
|
+
if self.verify_certs:
|
|
219
|
+
client_input_kwargs["verify_certs"] = self.verify_certs
|
|
220
|
+
if self.ssl_show_warn:
|
|
221
|
+
client_input_kwargs["ssl_show_warn"] = self.ssl_show_warn
|
|
222
|
+
if self.ca_certs:
|
|
223
|
+
client_input_kwargs["ca_certs"] = str(self.ca_certs)
|
|
224
|
+
if self.client_cert:
|
|
225
|
+
client_input_kwargs["client_cert"] = str(self.client_cert)
|
|
226
|
+
if self.client_key:
|
|
227
|
+
client_input_kwargs["client_key"] = str(self.client_key)
|
|
228
|
+
|
|
229
|
+
if self._has_aws_credentials():
|
|
230
|
+
logger.debug("Using AWS IAM authentication")
|
|
231
|
+
|
|
232
|
+
# Must use http_async.AsyncHttpConnection for IAM auth handlers
|
|
233
|
+
from opensearchpy.connection.http_async import AsyncHttpConnection
|
|
234
|
+
|
|
235
|
+
client_input = OpenSearchClientInput(**client_input_kwargs)
|
|
236
|
+
client_kwargs = client_input.model_dump()
|
|
237
|
+
client_kwargs["http_auth"] = await self._get_async_aws_auth()
|
|
238
|
+
client_kwargs["connection_class"] = AsyncHttpConnection
|
|
239
|
+
|
|
240
|
+
elif self.username and access_config.password:
|
|
241
|
+
logger.debug("Using basic HTTP authentication")
|
|
242
|
+
client_input_kwargs["http_auth"] = (self.username, access_config.password)
|
|
243
|
+
|
|
244
|
+
client_input = OpenSearchClientInput(**client_input_kwargs)
|
|
245
|
+
client_kwargs = client_input.model_dump()
|
|
246
|
+
if client_input.http_auth:
|
|
247
|
+
client_kwargs["http_auth"] = client_input.http_auth.get_secret_value()
|
|
248
|
+
|
|
249
|
+
elif self.client_cert:
|
|
250
|
+
logger.debug("Using certificate-based authentication")
|
|
251
|
+
client_input = OpenSearchClientInput(**client_input_kwargs)
|
|
252
|
+
client_kwargs = client_input.model_dump()
|
|
253
|
+
|
|
254
|
+
else:
|
|
255
|
+
logger.warning("No authentication configured - connecting without credentials")
|
|
256
|
+
client_input = OpenSearchClientInput(**client_input_kwargs)
|
|
257
|
+
client_kwargs = client_input.model_dump()
|
|
258
|
+
|
|
259
|
+
# Retry and timeout configuration for resilience against transient errors
|
|
260
|
+
client_kwargs["max_retries"] = 3
|
|
261
|
+
client_kwargs["retry_on_status"] = [429, 502, 503]
|
|
262
|
+
client_kwargs["retry_on_timeout"] = True
|
|
263
|
+
client_kwargs["timeout"] = 60
|
|
264
|
+
|
|
265
|
+
return {k: v for k, v in client_kwargs.items() if v is not None}
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
class OpenSearchIndexerConfig(ElasticsearchIndexerConfig):
|
|
269
|
+
pass
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
@dataclass
|
|
273
|
+
class OpenSearchIndexer(ElasticsearchIndexer):
|
|
274
|
+
connection_config: OpenSearchConnectionConfig
|
|
275
|
+
index_config: OpenSearchIndexerConfig
|
|
276
|
+
client: "OpenSearch" = field(init=False)
|
|
277
|
+
|
|
278
|
+
def is_async(self) -> bool:
|
|
279
|
+
"""Signal pipeline to use async execution."""
|
|
280
|
+
return True
|
|
281
|
+
|
|
282
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
283
|
+
def precheck(self) -> None:
|
|
284
|
+
"""Validate connection and index (sync wrapper required by pipeline framework)."""
|
|
285
|
+
|
|
286
|
+
async def _async_precheck():
|
|
287
|
+
from opensearchpy import AsyncOpenSearch
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
async with AsyncOpenSearch(
|
|
291
|
+
**await self.connection_config.get_async_client_kwargs()
|
|
292
|
+
) as client:
|
|
293
|
+
# Use get_alias (GET) instead of exists (HEAD) - HEAD has IAM signing issues
|
|
294
|
+
# Also respects AWS FGAC by checking only the specific index
|
|
295
|
+
await client.indices.get_alias(index=self.index_config.index_name)
|
|
296
|
+
except Exception as e:
|
|
297
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
298
|
+
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
299
|
+
|
|
300
|
+
_run_coroutine(_async_precheck)
|
|
301
|
+
|
|
302
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
303
|
+
async def run_async(self, **kwargs: Any) -> AsyncGenerator[ElasticsearchBatchFileData, None]:
|
|
304
|
+
"""Async indexing for all authentication types."""
|
|
305
|
+
ids = list(await self._get_doc_ids_async())
|
|
306
|
+
for batch in batch_generator(ids, self.index_config.batch_size):
|
|
307
|
+
batch_items = [BatchItem(identifier=b) for b in batch]
|
|
308
|
+
url = f"{self.connection_config.hosts[0]}/{self.index_config.index_name}"
|
|
309
|
+
display_name = (
|
|
310
|
+
f"url={url}, batch_size={len(batch_items)} "
|
|
311
|
+
f"ids={batch_items[0].identifier}..{batch_items[-1].identifier}"
|
|
312
|
+
)
|
|
313
|
+
yield ElasticsearchBatchFileData(
|
|
314
|
+
connector_type=CONNECTOR_TYPE,
|
|
315
|
+
metadata=FileDataSourceMetadata(
|
|
316
|
+
url=url,
|
|
317
|
+
date_processed=str(time()),
|
|
318
|
+
),
|
|
319
|
+
additional_metadata=ElastisearchAdditionalMetadata(
|
|
320
|
+
index_name=self.index_config.index_name,
|
|
321
|
+
),
|
|
322
|
+
batch_items=batch_items,
|
|
323
|
+
display_name=display_name,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
327
|
+
async def _get_doc_ids_async(self) -> set[str]:
|
|
328
|
+
"""Fetch document IDs using async_scan."""
|
|
329
|
+
from opensearchpy import AsyncOpenSearch
|
|
330
|
+
from opensearchpy.helpers import async_scan
|
|
331
|
+
|
|
332
|
+
scan_query = {"stored_fields": [], "query": {"match_all": {}}}
|
|
333
|
+
|
|
334
|
+
async with AsyncOpenSearch(
|
|
335
|
+
**await self.connection_config.get_async_client_kwargs()
|
|
336
|
+
) as client:
|
|
337
|
+
doc_ids = set()
|
|
338
|
+
async for hit in async_scan(
|
|
339
|
+
client,
|
|
340
|
+
query=scan_query,
|
|
341
|
+
scroll="1m",
|
|
342
|
+
index=self.index_config.index_name,
|
|
343
|
+
):
|
|
344
|
+
doc_ids.add(hit["_id"])
|
|
345
|
+
return doc_ids
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class OpenSearchDownloaderConfig(ElasticsearchDownloaderConfig):
|
|
349
|
+
pass
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
@dataclass
|
|
353
|
+
class OpenSearchDownloader(ElasticsearchDownloader):
|
|
354
|
+
connection_config: OpenSearchConnectionConfig
|
|
355
|
+
download_config: OpenSearchDownloaderConfig
|
|
356
|
+
connector_type: str = CONNECTOR_TYPE
|
|
357
|
+
|
|
358
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
359
|
+
async def run_async(self, file_data: BatchFileData, **kwargs: Any) -> download_responses:
|
|
360
|
+
"""Download documents from OpenSearch."""
|
|
361
|
+
from opensearchpy import AsyncOpenSearch
|
|
362
|
+
from opensearchpy.helpers import async_scan
|
|
363
|
+
|
|
364
|
+
elasticsearch_filedata = ElasticsearchBatchFileData.cast(file_data=file_data)
|
|
365
|
+
|
|
366
|
+
index_name: str = elasticsearch_filedata.additional_metadata.index_name
|
|
367
|
+
ids: list[str] = [item.identifier for item in elasticsearch_filedata.batch_items]
|
|
368
|
+
|
|
369
|
+
scan_query = {
|
|
370
|
+
"version": True,
|
|
371
|
+
"query": {"ids": {"values": ids}},
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
# Only add _source if fields are explicitly specified (avoids AWS FGAC issues)
|
|
375
|
+
if self.download_config.fields:
|
|
376
|
+
scan_query["_source"] = self.download_config.fields
|
|
377
|
+
|
|
378
|
+
download_responses = []
|
|
379
|
+
async with AsyncOpenSearch(
|
|
380
|
+
**await self.connection_config.get_async_client_kwargs()
|
|
381
|
+
) as client:
|
|
382
|
+
async for result in async_scan(
|
|
383
|
+
client,
|
|
384
|
+
query=scan_query,
|
|
385
|
+
scroll="1m",
|
|
386
|
+
index=index_name,
|
|
387
|
+
):
|
|
388
|
+
download_responses.append(
|
|
389
|
+
self.generate_download_response(
|
|
390
|
+
result=result, index_name=index_name, file_data=elasticsearch_filedata
|
|
391
|
+
)
|
|
392
|
+
)
|
|
393
|
+
return download_responses
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
class OpenSearchUploaderConfig(ElasticsearchUploaderConfig):
|
|
397
|
+
batch_size_bytes: int = Field(
|
|
398
|
+
default=5_000_000,
|
|
399
|
+
description="Size limit (in bytes) for each batch of items to be uploaded. "
|
|
400
|
+
"Default is 5MB, lower than Elasticsearch default to accommodate "
|
|
401
|
+
"AWS OpenSearch cluster rate limits.",
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
@dataclass
|
|
406
|
+
class OpenSearchUploader(ElasticsearchUploader):
|
|
407
|
+
connection_config: OpenSearchConnectionConfig
|
|
408
|
+
upload_config: OpenSearchUploaderConfig
|
|
409
|
+
connector_type: str = CONNECTOR_TYPE
|
|
410
|
+
|
|
411
|
+
def is_async(self) -> bool:
|
|
412
|
+
"""Signal pipeline to use async execution."""
|
|
413
|
+
return True
|
|
414
|
+
|
|
415
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
416
|
+
def precheck(self) -> None:
|
|
417
|
+
"""Validate connection and index (sync wrapper required by pipeline framework)."""
|
|
418
|
+
|
|
419
|
+
async def _async_precheck():
|
|
420
|
+
from opensearchpy import AsyncOpenSearch
|
|
421
|
+
|
|
422
|
+
try:
|
|
423
|
+
async with AsyncOpenSearch(
|
|
424
|
+
**await self.connection_config.get_async_client_kwargs()
|
|
425
|
+
) as client:
|
|
426
|
+
# Use get_alias (GET) instead of exists (HEAD) - HEAD has IAM signing issues
|
|
427
|
+
# Also respects AWS FGAC by checking only the specific index
|
|
428
|
+
await client.indices.get_alias(index=self.upload_config.index_name)
|
|
429
|
+
except Exception as e:
|
|
430
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
431
|
+
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
432
|
+
|
|
433
|
+
_run_coroutine(_async_precheck)
|
|
434
|
+
|
|
435
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
436
|
+
async def run_data_async(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
|
|
437
|
+
"""Upload data to OpenSearch using async_bulk."""
|
|
438
|
+
from opensearchpy import AsyncOpenSearch
|
|
439
|
+
from opensearchpy.exceptions import TransportError
|
|
440
|
+
from opensearchpy.helpers import async_bulk
|
|
441
|
+
|
|
442
|
+
logger.debug(
|
|
443
|
+
f"writing {len(data)} elements to index {self.upload_config.index_name} "
|
|
444
|
+
f"at {self.connection_config.hosts} "
|
|
445
|
+
f"with batch size (bytes) {self.upload_config.batch_size_bytes}"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
async with AsyncOpenSearch(
|
|
449
|
+
**await self.connection_config.get_async_client_kwargs()
|
|
450
|
+
) as client:
|
|
451
|
+
for batch in generator_batching_wbytes(
|
|
452
|
+
data, batch_size_limit_bytes=self.upload_config.batch_size_bytes
|
|
453
|
+
):
|
|
454
|
+
# Retry with delay for rate limiting (429 errors)
|
|
455
|
+
max_attempts = 3
|
|
456
|
+
for attempt in range(max_attempts):
|
|
457
|
+
try:
|
|
458
|
+
success, failed = await async_bulk(
|
|
459
|
+
client=client,
|
|
460
|
+
actions=batch,
|
|
461
|
+
chunk_size=len(batch),
|
|
462
|
+
max_chunk_bytes=self.upload_config.batch_size_bytes,
|
|
463
|
+
raise_on_error=False,
|
|
464
|
+
)
|
|
465
|
+
break
|
|
466
|
+
except Exception as e:
|
|
467
|
+
# Check for rate limiting: precise type check, then string fallback
|
|
468
|
+
is_rate_limited = (
|
|
469
|
+
isinstance(e, TransportError) and e.status_code == 429
|
|
470
|
+
) or "429" in str(e) or "too many requests" in str(e).lower()
|
|
471
|
+
|
|
472
|
+
if attempt < max_attempts - 1 and is_rate_limited:
|
|
473
|
+
logger.warning(
|
|
474
|
+
f"Rate limited (attempt {attempt + 1}/{max_attempts}), "
|
|
475
|
+
f"waiting 5s before retry: {e}"
|
|
476
|
+
)
|
|
477
|
+
await asyncio.sleep(5)
|
|
478
|
+
else:
|
|
479
|
+
logger.error(f"Batch upload failed: {e}")
|
|
480
|
+
raise DestinationConnectionError(str(e))
|
|
481
|
+
|
|
482
|
+
# Check for document failures (outside try to avoid catching our own exception)
|
|
483
|
+
if failed:
|
|
484
|
+
logger.error(
|
|
485
|
+
f"Batch upload had {len(failed)} failures out of {len(batch)}. "
|
|
486
|
+
f"Failed items: {failed[:5]}"
|
|
487
|
+
)
|
|
488
|
+
raise DestinationConnectionError(
|
|
489
|
+
f"Failed to upload {len(failed)} out of {len(batch)} documents"
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
logger.debug(
|
|
493
|
+
f"uploaded batch of {len(batch)} elements to {self.upload_config.index_name}"
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
logger.info(f"Upload complete: {len(data)} elements to {self.upload_config.index_name}")
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
class OpenSearchUploadStagerConfig(ElasticsearchUploadStagerConfig):
|
|
500
|
+
pass
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
@dataclass
|
|
504
|
+
class OpenSearchUploadStager(ElasticsearchUploadStager):
|
|
505
|
+
upload_stager_config: OpenSearchUploadStagerConfig
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
opensearch_source_entry = SourceRegistryEntry(
|
|
509
|
+
connection_config=OpenSearchConnectionConfig,
|
|
510
|
+
indexer=OpenSearchIndexer,
|
|
511
|
+
indexer_config=OpenSearchIndexerConfig,
|
|
512
|
+
downloader=OpenSearchDownloader,
|
|
513
|
+
downloader_config=OpenSearchDownloaderConfig,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
opensearch_destination_entry = DestinationRegistryEntry(
|
|
518
|
+
connection_config=OpenSearchConnectionConfig,
|
|
519
|
+
upload_stager_config=OpenSearchUploadStagerConfig,
|
|
520
|
+
upload_stager=OpenSearchUploadStager,
|
|
521
|
+
uploader_config=OpenSearchUploaderConfig,
|
|
522
|
+
uploader=OpenSearchUploader,
|
|
523
|
+
)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
4
|
+
add_destination_entry,
|
|
5
|
+
add_source_entry,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
from .azure import CONNECTOR_TYPE as AZURE_CONNECTOR_TYPE
|
|
9
|
+
from .azure import azure_destination_entry, azure_source_entry
|
|
10
|
+
from .box import CONNECTOR_TYPE as BOX_CONNECTOR_TYPE
|
|
11
|
+
from .box import box_destination_entry, box_source_entry
|
|
12
|
+
from .dropbox import CONNECTOR_TYPE as DROPBOX_CONNECTOR_TYPE
|
|
13
|
+
from .dropbox import dropbox_destination_entry, dropbox_source_entry
|
|
14
|
+
from .gcs import CONNECTOR_TYPE as GCS_CONNECTOR_TYPE
|
|
15
|
+
from .gcs import gcs_destination_entry, gcs_source_entry
|
|
16
|
+
from .s3 import CONNECTOR_TYPE as S3_CONNECTOR_TYPE
|
|
17
|
+
from .s3 import s3_destination_entry, s3_source_entry
|
|
18
|
+
from .sftp import CONNECTOR_TYPE as SFTP_CONNECTOR_TYPE
|
|
19
|
+
from .sftp import sftp_destination_entry, sftp_source_entry
|
|
20
|
+
|
|
21
|
+
add_source_entry(source_type=AZURE_CONNECTOR_TYPE, entry=azure_source_entry)
|
|
22
|
+
add_destination_entry(destination_type=AZURE_CONNECTOR_TYPE, entry=azure_destination_entry)
|
|
23
|
+
|
|
24
|
+
add_source_entry(source_type=BOX_CONNECTOR_TYPE, entry=box_source_entry)
|
|
25
|
+
add_destination_entry(destination_type=BOX_CONNECTOR_TYPE, entry=box_destination_entry)
|
|
26
|
+
|
|
27
|
+
add_source_entry(source_type=DROPBOX_CONNECTOR_TYPE, entry=dropbox_source_entry)
|
|
28
|
+
add_destination_entry(destination_type=DROPBOX_CONNECTOR_TYPE, entry=dropbox_destination_entry)
|
|
29
|
+
|
|
30
|
+
add_source_entry(source_type=GCS_CONNECTOR_TYPE, entry=gcs_source_entry)
|
|
31
|
+
add_destination_entry(destination_type=GCS_CONNECTOR_TYPE, entry=gcs_destination_entry)
|
|
32
|
+
|
|
33
|
+
add_source_entry(source_type=S3_CONNECTOR_TYPE, entry=s3_source_entry)
|
|
34
|
+
add_destination_entry(destination_type=S3_CONNECTOR_TYPE, entry=s3_destination_entry)
|
|
35
|
+
|
|
36
|
+
add_source_entry(source_type=SFTP_CONNECTOR_TYPE, entry=sftp_source_entry)
|
|
37
|
+
add_destination_entry(destination_type=SFTP_CONNECTOR_TYPE, entry=sftp_destination_entry)
|