unstructured-ingest 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/__init__.py +0 -0
- test/integration/__init__.py +0 -0
- test/integration/chunkers/__init__.py +0 -0
- test/integration/chunkers/test_chunkers.py +31 -0
- test/integration/connectors/__init__.py +0 -0
- test/integration/connectors/conftest.py +38 -0
- test/integration/connectors/databricks/__init__.py +0 -0
- test/integration/connectors/databricks/test_volumes_native.py +269 -0
- test/integration/connectors/discord/__init__.py +0 -0
- test/integration/connectors/discord/test_discord.py +90 -0
- test/integration/connectors/duckdb/__init__.py +0 -0
- test/integration/connectors/duckdb/conftest.py +14 -0
- test/integration/connectors/duckdb/test_duckdb.py +89 -0
- test/integration/connectors/duckdb/test_motherduck.py +95 -0
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +34 -0
- test/integration/connectors/elasticsearch/test_elasticsearch.py +330 -0
- test/integration/connectors/elasticsearch/test_opensearch.py +325 -0
- test/integration/connectors/sql/__init__.py +0 -0
- test/integration/connectors/sql/test_postgres.py +195 -0
- test/integration/connectors/sql/test_singlestore.py +176 -0
- test/integration/connectors/sql/test_snowflake.py +238 -0
- test/integration/connectors/sql/test_sqlite.py +162 -0
- test/integration/connectors/test_astradb.py +217 -0
- test/integration/connectors/test_azure_ai_search.py +255 -0
- test/integration/connectors/test_chroma.py +120 -0
- test/integration/connectors/test_confluence.py +113 -0
- test/integration/connectors/test_delta_table.py +185 -0
- test/integration/connectors/test_lancedb.py +247 -0
- test/integration/connectors/test_milvus.py +203 -0
- test/integration/connectors/test_mongodb.py +335 -0
- test/integration/connectors/test_neo4j.py +236 -0
- test/integration/connectors/test_notion.py +145 -0
- test/integration/connectors/test_onedrive.py +118 -0
- test/integration/connectors/test_pinecone.py +288 -0
- test/integration/connectors/test_qdrant.py +215 -0
- test/integration/connectors/test_redis.py +119 -0
- test/integration/connectors/test_s3.py +183 -0
- test/integration/connectors/test_vectara.py +270 -0
- test/integration/connectors/utils/__init__.py +0 -0
- test/integration/connectors/utils/constants.py +7 -0
- test/integration/connectors/utils/docker.py +151 -0
- test/integration/connectors/utils/docker_compose.py +59 -0
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +75 -0
- test/integration/connectors/utils/validation/equality.py +75 -0
- test/integration/connectors/utils/validation/source.py +299 -0
- test/integration/connectors/utils/validation/utils.py +36 -0
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +15 -0
- test/integration/connectors/weaviate/test_cloud.py +34 -0
- test/integration/connectors/weaviate/test_local.py +131 -0
- test/integration/embedders/__init__.py +0 -0
- test/integration/embedders/conftest.py +13 -0
- test/integration/embedders/test_azure_openai.py +59 -0
- test/integration/embedders/test_bedrock.py +103 -0
- test/integration/embedders/test_huggingface.py +26 -0
- test/integration/embedders/test_mixedbread.py +71 -0
- test/integration/embedders/test_octoai.py +77 -0
- test/integration/embedders/test_openai.py +76 -0
- test/integration/embedders/test_togetherai.py +71 -0
- test/integration/embedders/test_vertexai.py +65 -0
- test/integration/embedders/test_voyageai.py +65 -0
- test/integration/embedders/utils.py +68 -0
- test/integration/partitioners/__init__.py +0 -0
- test/integration/partitioners/test_partitioner.py +75 -0
- test/integration/utils.py +15 -0
- test/unit/__init__.py +0 -0
- test/unit/embed/__init__.py +0 -0
- test/unit/embed/test_mixedbreadai.py +42 -0
- test/unit/embed/test_octoai.py +27 -0
- test/unit/embed/test_openai.py +20 -0
- test/unit/embed/test_vertexai.py +25 -0
- test/unit/embed/test_voyageai.py +24 -0
- test/unit/test_error.py +27 -0
- test/unit/test_logger.py +78 -0
- test/unit/test_utils.py +184 -0
- test/unit/v2/__init__.py +0 -0
- test/unit/v2/chunkers/__init__.py +0 -0
- test/unit/v2/chunkers/test_chunkers.py +49 -0
- test/unit/v2/connectors/__init__.py +0 -0
- test/unit/v2/connectors/test_confluence.py +39 -0
- test/unit/v2/embedders/__init__.py +0 -0
- test/unit/v2/embedders/test_bedrock.py +36 -0
- test/unit/v2/embedders/test_huggingface.py +48 -0
- test/unit/v2/embedders/test_mixedbread.py +37 -0
- test/unit/v2/embedders/test_octoai.py +35 -0
- test/unit/v2/embedders/test_openai.py +35 -0
- test/unit/v2/embedders/test_togetherai.py +37 -0
- test/unit/v2/embedders/test_vertexai.py +37 -0
- test/unit/v2/embedders/test_voyageai.py +38 -0
- test/unit/v2/partitioners/__init__.py +0 -0
- test/unit/v2/partitioners/test_partitioner.py +63 -0
- test/unit/v2/test_interfaces.py +26 -0
- test/unit/v2/test_utils.py +82 -0
- test/unit/v2/utils/__init__.py +0 -0
- test/unit/v2/utils/data_generator.py +32 -0
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/__init__.py +14 -0
- unstructured_ingest/cli/base/__init__.py +0 -0
- unstructured_ingest/cli/base/cmd.py +19 -0
- unstructured_ingest/cli/base/dest.py +87 -0
- unstructured_ingest/cli/base/src.py +57 -0
- unstructured_ingest/cli/cli.py +37 -0
- unstructured_ingest/cli/cmd_factory.py +12 -0
- unstructured_ingest/cli/cmds/__init__.py +145 -0
- unstructured_ingest/cli/cmds/airtable.py +69 -0
- unstructured_ingest/cli/cmds/astradb.py +99 -0
- unstructured_ingest/cli/cmds/azure_ai_search.py +65 -0
- unstructured_ingest/cli/cmds/biomed.py +52 -0
- unstructured_ingest/cli/cmds/chroma.py +104 -0
- unstructured_ingest/cli/cmds/clarifai.py +71 -0
- unstructured_ingest/cli/cmds/confluence.py +69 -0
- unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
- unstructured_ingest/cli/cmds/delta_table.py +94 -0
- unstructured_ingest/cli/cmds/discord.py +47 -0
- unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
- unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
- unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
- unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
- unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
- unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
- unstructured_ingest/cli/cmds/github.py +54 -0
- unstructured_ingest/cli/cmds/gitlab.py +54 -0
- unstructured_ingest/cli/cmds/google_drive.py +49 -0
- unstructured_ingest/cli/cmds/hubspot.py +70 -0
- unstructured_ingest/cli/cmds/jira.py +71 -0
- unstructured_ingest/cli/cmds/kafka.py +102 -0
- unstructured_ingest/cli/cmds/local.py +43 -0
- unstructured_ingest/cli/cmds/mongodb.py +72 -0
- unstructured_ingest/cli/cmds/notion.py +48 -0
- unstructured_ingest/cli/cmds/onedrive.py +66 -0
- unstructured_ingest/cli/cmds/opensearch.py +117 -0
- unstructured_ingest/cli/cmds/outlook.py +67 -0
- unstructured_ingest/cli/cmds/pinecone.py +71 -0
- unstructured_ingest/cli/cmds/qdrant.py +124 -0
- unstructured_ingest/cli/cmds/reddit.py +67 -0
- unstructured_ingest/cli/cmds/salesforce.py +58 -0
- unstructured_ingest/cli/cmds/sharepoint.py +66 -0
- unstructured_ingest/cli/cmds/slack.py +56 -0
- unstructured_ingest/cli/cmds/sql.py +66 -0
- unstructured_ingest/cli/cmds/vectara.py +66 -0
- unstructured_ingest/cli/cmds/weaviate.py +98 -0
- unstructured_ingest/cli/cmds/wikipedia.py +40 -0
- unstructured_ingest/cli/common.py +7 -0
- unstructured_ingest/cli/interfaces.py +663 -0
- unstructured_ingest/cli/utils.py +205 -0
- unstructured_ingest/connector/__init__.py +0 -0
- unstructured_ingest/connector/airtable.py +309 -0
- unstructured_ingest/connector/astradb.py +267 -0
- unstructured_ingest/connector/azure_ai_search.py +144 -0
- unstructured_ingest/connector/biomed.py +320 -0
- unstructured_ingest/connector/chroma.py +158 -0
- unstructured_ingest/connector/clarifai.py +122 -0
- unstructured_ingest/connector/confluence.py +285 -0
- unstructured_ingest/connector/databricks_volumes.py +137 -0
- unstructured_ingest/connector/delta_table.py +203 -0
- unstructured_ingest/connector/discord.py +180 -0
- unstructured_ingest/connector/elasticsearch.py +396 -0
- unstructured_ingest/connector/fsspec/__init__.py +0 -0
- unstructured_ingest/connector/fsspec/azure.py +78 -0
- unstructured_ingest/connector/fsspec/box.py +109 -0
- unstructured_ingest/connector/fsspec/dropbox.py +160 -0
- unstructured_ingest/connector/fsspec/fsspec.py +359 -0
- unstructured_ingest/connector/fsspec/gcs.py +82 -0
- unstructured_ingest/connector/fsspec/s3.py +62 -0
- unstructured_ingest/connector/fsspec/sftp.py +81 -0
- unstructured_ingest/connector/git.py +124 -0
- unstructured_ingest/connector/github.py +174 -0
- unstructured_ingest/connector/gitlab.py +142 -0
- unstructured_ingest/connector/google_drive.py +348 -0
- unstructured_ingest/connector/hubspot.py +278 -0
- unstructured_ingest/connector/jira.py +469 -0
- unstructured_ingest/connector/kafka.py +293 -0
- unstructured_ingest/connector/local.py +139 -0
- unstructured_ingest/connector/mongodb.py +284 -0
- unstructured_ingest/connector/notion/__init__.py +0 -0
- unstructured_ingest/connector/notion/client.py +248 -0
- unstructured_ingest/connector/notion/connector.py +469 -0
- unstructured_ingest/connector/notion/helpers.py +584 -0
- unstructured_ingest/connector/notion/interfaces.py +32 -0
- unstructured_ingest/connector/notion/types/__init__.py +0 -0
- unstructured_ingest/connector/notion/types/block.py +96 -0
- unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
- unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
- unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
- unstructured_ingest/connector/notion/types/database.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
- unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
- unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/connector/notion/types/date.py +26 -0
- unstructured_ingest/connector/notion/types/file.py +51 -0
- unstructured_ingest/connector/notion/types/page.py +45 -0
- unstructured_ingest/connector/notion/types/parent.py +66 -0
- unstructured_ingest/connector/notion/types/rich_text.py +189 -0
- unstructured_ingest/connector/notion/types/user.py +76 -0
- unstructured_ingest/connector/onedrive.py +232 -0
- unstructured_ingest/connector/opensearch.py +218 -0
- unstructured_ingest/connector/outlook.py +285 -0
- unstructured_ingest/connector/pinecone.py +140 -0
- unstructured_ingest/connector/qdrant.py +144 -0
- unstructured_ingest/connector/reddit.py +166 -0
- unstructured_ingest/connector/registry.py +109 -0
- unstructured_ingest/connector/salesforce.py +301 -0
- unstructured_ingest/connector/sharepoint.py +573 -0
- unstructured_ingest/connector/slack.py +224 -0
- unstructured_ingest/connector/sql.py +199 -0
- unstructured_ingest/connector/vectara.py +253 -0
- unstructured_ingest/connector/weaviate.py +190 -0
- unstructured_ingest/connector/wikipedia.py +208 -0
- unstructured_ingest/embed/__init__.py +0 -0
- unstructured_ingest/embed/azure_openai.py +31 -0
- unstructured_ingest/embed/bedrock.py +193 -0
- unstructured_ingest/embed/huggingface.py +52 -0
- unstructured_ingest/embed/interfaces.py +117 -0
- unstructured_ingest/embed/mixedbreadai.py +233 -0
- unstructured_ingest/embed/octoai.py +130 -0
- unstructured_ingest/embed/openai.py +116 -0
- unstructured_ingest/embed/togetherai.py +106 -0
- unstructured_ingest/embed/vertexai.py +126 -0
- unstructured_ingest/embed/voyageai.py +130 -0
- unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
- unstructured_ingest/enhanced_dataclass/core.py +99 -0
- unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
- unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
- unstructured_ingest/error.py +49 -0
- unstructured_ingest/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/ingest_backoff/_common.py +102 -0
- unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
- unstructured_ingest/interfaces.py +852 -0
- unstructured_ingest/logger.py +130 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/pipeline/__init__.py +22 -0
- unstructured_ingest/pipeline/copy.py +19 -0
- unstructured_ingest/pipeline/doc_factory.py +12 -0
- unstructured_ingest/pipeline/interfaces.py +270 -0
- unstructured_ingest/pipeline/partition.py +60 -0
- unstructured_ingest/pipeline/permissions.py +12 -0
- unstructured_ingest/pipeline/pipeline.py +117 -0
- unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- unstructured_ingest/pipeline/reformat/chunking.py +134 -0
- unstructured_ingest/pipeline/reformat/embedding.py +64 -0
- unstructured_ingest/pipeline/source.py +77 -0
- unstructured_ingest/pipeline/utils.py +6 -0
- unstructured_ingest/pipeline/write.py +18 -0
- unstructured_ingest/processor.py +93 -0
- unstructured_ingest/runner/__init__.py +104 -0
- unstructured_ingest/runner/airtable.py +35 -0
- unstructured_ingest/runner/astradb.py +34 -0
- unstructured_ingest/runner/base_runner.py +89 -0
- unstructured_ingest/runner/biomed.py +45 -0
- unstructured_ingest/runner/confluence.py +35 -0
- unstructured_ingest/runner/delta_table.py +34 -0
- unstructured_ingest/runner/discord.py +35 -0
- unstructured_ingest/runner/elasticsearch.py +40 -0
- unstructured_ingest/runner/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/fsspec/azure.py +30 -0
- unstructured_ingest/runner/fsspec/box.py +28 -0
- unstructured_ingest/runner/fsspec/dropbox.py +30 -0
- unstructured_ingest/runner/fsspec/fsspec.py +40 -0
- unstructured_ingest/runner/fsspec/gcs.py +28 -0
- unstructured_ingest/runner/fsspec/s3.py +28 -0
- unstructured_ingest/runner/fsspec/sftp.py +28 -0
- unstructured_ingest/runner/github.py +37 -0
- unstructured_ingest/runner/gitlab.py +37 -0
- unstructured_ingest/runner/google_drive.py +35 -0
- unstructured_ingest/runner/hubspot.py +35 -0
- unstructured_ingest/runner/jira.py +35 -0
- unstructured_ingest/runner/kafka.py +34 -0
- unstructured_ingest/runner/local.py +23 -0
- unstructured_ingest/runner/mongodb.py +34 -0
- unstructured_ingest/runner/notion.py +61 -0
- unstructured_ingest/runner/onedrive.py +35 -0
- unstructured_ingest/runner/opensearch.py +40 -0
- unstructured_ingest/runner/outlook.py +33 -0
- unstructured_ingest/runner/reddit.py +35 -0
- unstructured_ingest/runner/salesforce.py +33 -0
- unstructured_ingest/runner/sharepoint.py +35 -0
- unstructured_ingest/runner/slack.py +33 -0
- unstructured_ingest/runner/utils.py +47 -0
- unstructured_ingest/runner/wikipedia.py +35 -0
- unstructured_ingest/runner/writers/__init__.py +48 -0
- unstructured_ingest/runner/writers/astradb.py +22 -0
- unstructured_ingest/runner/writers/azure_ai_search.py +24 -0
- unstructured_ingest/runner/writers/base_writer.py +26 -0
- unstructured_ingest/runner/writers/chroma.py +22 -0
- unstructured_ingest/runner/writers/clarifai.py +19 -0
- unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
- unstructured_ingest/runner/writers/delta_table.py +24 -0
- unstructured_ingest/runner/writers/elasticsearch.py +24 -0
- unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
- unstructured_ingest/runner/writers/fsspec/box.py +21 -0
- unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
- unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
- unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
- unstructured_ingest/runner/writers/kafka.py +21 -0
- unstructured_ingest/runner/writers/mongodb.py +21 -0
- unstructured_ingest/runner/writers/opensearch.py +26 -0
- unstructured_ingest/runner/writers/pinecone.py +21 -0
- unstructured_ingest/runner/writers/qdrant.py +19 -0
- unstructured_ingest/runner/writers/sql.py +22 -0
- unstructured_ingest/runner/writers/vectara.py +22 -0
- unstructured_ingest/runner/writers/weaviate.py +21 -0
- unstructured_ingest/utils/__init__.py +0 -0
- unstructured_ingest/utils/chunking.py +56 -0
- unstructured_ingest/utils/compression.py +118 -0
- unstructured_ingest/utils/data_prep.py +200 -0
- unstructured_ingest/utils/dep_check.py +78 -0
- unstructured_ingest/utils/google_filetype.py +9 -0
- unstructured_ingest/utils/string_and_date_utils.py +49 -0
- unstructured_ingest/utils/table.py +73 -0
- unstructured_ingest/v2/__init__.py +1 -0
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +4 -0
- unstructured_ingest/v2/cli/base/cmd.py +269 -0
- unstructured_ingest/v2/cli/base/dest.py +85 -0
- unstructured_ingest/v2/cli/base/importer.py +34 -0
- unstructured_ingest/v2/cli/base/src.py +85 -0
- unstructured_ingest/v2/cli/cli.py +24 -0
- unstructured_ingest/v2/cli/cmds.py +14 -0
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/cli/utils/click.py +237 -0
- unstructured_ingest/v2/cli/utils/model_conversion.py +222 -0
- unstructured_ingest/v2/constants.py +2 -0
- unstructured_ingest/v2/errors.py +18 -0
- unstructured_ingest/v2/interfaces/__init__.py +32 -0
- unstructured_ingest/v2/interfaces/connector.py +50 -0
- unstructured_ingest/v2/interfaces/downloader.py +89 -0
- unstructured_ingest/v2/interfaces/file_data.py +116 -0
- unstructured_ingest/v2/interfaces/indexer.py +30 -0
- unstructured_ingest/v2/interfaces/process.py +19 -0
- unstructured_ingest/v2/interfaces/processor.py +88 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +102 -0
- unstructured_ingest/v2/interfaces/uploader.py +53 -0
- unstructured_ingest/v2/logger.py +126 -0
- unstructured_ingest/v2/main.py +11 -0
- unstructured_ingest/v2/otel.py +111 -0
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +211 -0
- unstructured_ingest/v2/pipeline/otel.py +32 -0
- unstructured_ingest/v2/pipeline/pipeline.py +384 -0
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/steps/chunk.py +80 -0
- unstructured_ingest/v2/pipeline/steps/download.py +207 -0
- unstructured_ingest/v2/pipeline/steps/embed.py +79 -0
- unstructured_ingest/v2/pipeline/steps/filter.py +35 -0
- unstructured_ingest/v2/pipeline/steps/index.py +86 -0
- unstructured_ingest/v2/pipeline/steps/partition.py +79 -0
- unstructured_ingest/v2/pipeline/steps/stage.py +65 -0
- unstructured_ingest/v2/pipeline/steps/uncompress.py +50 -0
- unstructured_ingest/v2/pipeline/steps/upload.py +58 -0
- unstructured_ingest/v2/processes/__init__.py +18 -0
- unstructured_ingest/v2/processes/chunker.py +124 -0
- unstructured_ingest/v2/processes/connector_registry.py +69 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +117 -0
- unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
- unstructured_ingest/v2/processes/connectors/astradb.py +402 -0
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +276 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +190 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +207 -0
- unstructured_ingest/v2/processes/connectors/couchbase.py +334 -0
- unstructured_ingest/v2/processes/connectors/databricks/__init__.py +52 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +208 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +87 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +102 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +85 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +86 -0
- unstructured_ingest/v2/processes/connectors/delta_table.py +191 -0
- unstructured_ingest/v2/processes/connectors/discord.py +158 -0
- unstructured_ingest/v2/processes/connectors/duckdb/__init__.py +15 -0
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +100 -0
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +127 -0
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +126 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +470 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +195 -0
- unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +170 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +168 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +332 -0
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +185 -0
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +171 -0
- unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/v2/processes/connectors/gitlab.py +268 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +348 -0
- unstructured_ingest/v2/processes/connectors/kafka/__init__.py +17 -0
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py +121 -0
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +273 -0
- unstructured_ingest/v2/processes/connectors/kafka/local.py +103 -0
- unstructured_ingest/v2/processes/connectors/kdbai.py +148 -0
- unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +30 -0
- unstructured_ingest/v2/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +42 -0
- unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +169 -0
- unstructured_ingest/v2/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/v2/processes/connectors/local.py +217 -0
- unstructured_ingest/v2/processes/connectors/milvus.py +225 -0
- unstructured_ingest/v2/processes/connectors/mongodb.py +361 -0
- unstructured_ingest/v2/processes/connectors/neo4j.py +385 -0
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/client.py +349 -0
- unstructured_ingest/v2/processes/connectors/notion/connector.py +346 -0
- unstructured_ingest/v2/processes/connectors/notion/helpers.py +448 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +32 -0
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +96 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/code.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/template.py +30 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/people.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/select.py +69 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/status.py +81 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/v2/processes/connectors/notion/types/date.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/file.py +54 -0
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +45 -0
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +66 -0
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +189 -0
- unstructured_ingest/v2/processes/connectors/notion/types/user.py +79 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +447 -0
- unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +277 -0
- unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +16 -0
- unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +59 -0
- unstructured_ingest/v2/processes/connectors/qdrant/local.py +58 -0
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +160 -0
- unstructured_ingest/v2/processes/connectors/qdrant/server.py +60 -0
- unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
- unstructured_ingest/v2/processes/connectors/salesforce.py +303 -0
- unstructured_ingest/v2/processes/connectors/sharepoint.py +448 -0
- unstructured_ingest/v2/processes/connectors/slack.py +248 -0
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +27 -0
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +162 -0
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +166 -0
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +210 -0
- unstructured_ingest/v2/processes/connectors/sql/sql.py +434 -0
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +168 -0
- unstructured_ingest/v2/processes/connectors/utils.py +29 -0
- unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
- unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +165 -0
- unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/v2/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +267 -0
- unstructured_ingest/v2/processes/embedder.py +195 -0
- unstructured_ingest/v2/processes/filter.py +60 -0
- unstructured_ingest/v2/processes/partitioner.py +188 -0
- unstructured_ingest/v2/processes/uncompress.py +61 -0
- unstructured_ingest/v2/unstructured_api.py +128 -0
- unstructured_ingest/v2/utils.py +61 -0
- unstructured_ingest-0.3.13.dist-info/LICENSE.md +201 -0
- unstructured_ingest-0.3.13.dist-info/METADATA +205 -0
- unstructured_ingest-0.3.13.dist-info/RECORD +557 -0
- unstructured_ingest-0.3.13.dist-info/WHEEL +5 -0
- unstructured_ingest-0.3.13.dist-info/entry_points.txt +2 -0
- unstructured_ingest-0.3.13.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from unstructured_ingest.cli import dest, src
|
|
6
|
+
from unstructured_ingest.v2.cli.cmds import dest as dest_v2
|
|
7
|
+
from unstructured_ingest.v2.cli.cmds import src as src_v2
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from click import Command
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.group()
|
|
14
|
+
def ingest():
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_cmd() -> "Command":
|
|
19
|
+
"""Construct and return a Click command object representing the main command for the CLI.
|
|
20
|
+
|
|
21
|
+
This function adds all dest_subcommand(s) to each src_subcommand, and adds all of those
|
|
22
|
+
to the main command as nested subcommands.
|
|
23
|
+
"""
|
|
24
|
+
cmd = ingest
|
|
25
|
+
src_dict = {s.name: s for s in src}
|
|
26
|
+
dest_dict = {d.name: d for d in dest}
|
|
27
|
+
for s in src_v2:
|
|
28
|
+
src_dict[s.name] = s
|
|
29
|
+
for d in dest_v2:
|
|
30
|
+
dest_dict[d.name] = d
|
|
31
|
+
# Add all subcommands
|
|
32
|
+
for src_subcommand in src_dict.values():
|
|
33
|
+
# Add all destination subcommands
|
|
34
|
+
for dest_subcommand in dest_dict.values():
|
|
35
|
+
src_subcommand.add_command(dest_subcommand)
|
|
36
|
+
cmd.add_command(src_subcommand)
|
|
37
|
+
return cmd
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
|
|
3
|
+
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
4
|
+
from unstructured_ingest.cli.cmds import base_src_cmd_fns
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_src_cmd_map() -> t.Dict[str, t.Callable[[], BaseSrcCmd]]:
|
|
8
|
+
return {b().cmd_name_key: b for b in base_src_cmd_fns}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_src_cmd(cmd_name: str) -> t.Callable[[], BaseSrcCmd]:
|
|
12
|
+
return get_src_cmd_map()[cmd_name]
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import collections
|
|
4
|
+
import typing as t
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
+
from unstructured_ingest.cli.cmds.fsspec.sftp import get_base_src_cmd as sftp_base_src_cmd
|
|
8
|
+
|
|
9
|
+
from .airtable import get_base_src_cmd as airtable_base_src_cmd
|
|
10
|
+
from .astradb import get_base_dest_cmd as astradb_base_dest_cmd
|
|
11
|
+
from .astradb import get_base_src_cmd as astradb_base_src_cmd
|
|
12
|
+
from .azure_ai_search import get_base_dest_cmd as azure_ai_search_base_dest_cmd
|
|
13
|
+
from .biomed import get_base_src_cmd as biomed_base_src_cmd
|
|
14
|
+
from .chroma import get_base_dest_cmd as chroma_base_dest_cmd
|
|
15
|
+
from .clarifai import get_base_dest_cmd as clarifai_base_dest_cmd
|
|
16
|
+
from .confluence import get_base_src_cmd as confluence_base_src_cmd
|
|
17
|
+
from .databricks_volumes import get_base_dest_cmd as databricks_volumes_dest_cmd
|
|
18
|
+
from .delta_table import get_base_dest_cmd as delta_table_dest_cmd
|
|
19
|
+
from .delta_table import get_base_src_cmd as delta_table_base_src_cmd
|
|
20
|
+
from .discord import get_base_src_cmd as discord_base_src_cmd
|
|
21
|
+
from .elasticsearch import get_base_dest_cmd as elasticsearch_base_dest_cmd
|
|
22
|
+
from .elasticsearch import get_base_src_cmd as elasticsearch_base_src_cmd
|
|
23
|
+
from .fsspec.azure import get_base_dest_cmd as azure_base_dest_cmd
|
|
24
|
+
from .fsspec.azure import get_base_src_cmd as azure_base_src_cmd
|
|
25
|
+
from .fsspec.box import get_base_dest_cmd as box_base_dest_cmd
|
|
26
|
+
from .fsspec.box import get_base_src_cmd as box_base_src_cmd
|
|
27
|
+
from .fsspec.dropbox import get_base_dest_cmd as dropbox_base_dest_cmd
|
|
28
|
+
from .fsspec.dropbox import get_base_src_cmd as dropbox_base_src_cmd
|
|
29
|
+
from .fsspec.fsspec import get_base_dest_cmd as fsspec_base_dest_cmd
|
|
30
|
+
from .fsspec.fsspec import get_base_src_cmd as fsspec_base_src_cmd
|
|
31
|
+
from .fsspec.gcs import get_base_dest_cmd as gcs_base_dest_cmd
|
|
32
|
+
from .fsspec.gcs import get_base_src_cmd as gcs_base_src_cmd
|
|
33
|
+
from .fsspec.s3 import get_base_dest_cmd as s3_base_dest_cmd
|
|
34
|
+
from .fsspec.s3 import get_base_src_cmd as s3_base_src_cmd
|
|
35
|
+
from .github import get_base_src_cmd as github_base_src_cmd
|
|
36
|
+
from .gitlab import get_base_src_cmd as gitlab_base_src_cmd
|
|
37
|
+
from .google_drive import get_base_src_cmd as google_drive_base_src_cmd
|
|
38
|
+
from .hubspot import get_base_src_cmd as hubspot_base_src_cmd
|
|
39
|
+
from .jira import get_base_src_cmd as jira_base_src_cmd
|
|
40
|
+
from .kafka import get_base_dest_cmd as kafka_base_dest_cmd
|
|
41
|
+
from .kafka import get_base_src_cmd as kafka_base_src_cmd
|
|
42
|
+
from .local import get_base_src_cmd as local_base_src_cmd
|
|
43
|
+
from .mongodb import get_base_dest_cmd as mongo_base_dest_cmd
|
|
44
|
+
from .mongodb import get_base_src_cmd as mongodb_base_src_cmd
|
|
45
|
+
from .notion import get_base_src_cmd as notion_base_src_cmd
|
|
46
|
+
from .onedrive import get_base_src_cmd as onedrive_base_src_cmd
|
|
47
|
+
from .opensearch import get_base_dest_cmd as opensearch_base_dest_cmd
|
|
48
|
+
from .opensearch import get_base_src_cmd as opensearch_base_src_cmd
|
|
49
|
+
from .outlook import get_base_src_cmd as outlook_base_src_cmd
|
|
50
|
+
from .pinecone import get_base_dest_cmd as pinecone_base_dest_cmd
|
|
51
|
+
from .qdrant import get_base_dest_cmd as qdrant_base_dest_cmd
|
|
52
|
+
from .reddit import get_base_src_cmd as reddit_base_src_cmd
|
|
53
|
+
from .salesforce import get_base_src_cmd as salesforce_base_src_cmd
|
|
54
|
+
from .sharepoint import get_base_src_cmd as sharepoint_base_src_cmd
|
|
55
|
+
from .slack import get_base_src_cmd as slack_base_src_cmd
|
|
56
|
+
from .sql import get_base_dest_cmd as sql_base_dest_cmd
|
|
57
|
+
from .vectara import get_base_dest_cmd as vectara_base_dest_cmd
|
|
58
|
+
from .weaviate import get_base_dest_cmd as weaviate_dest_cmd
|
|
59
|
+
from .wikipedia import get_base_src_cmd as wikipedia_base_src_cmd
|
|
60
|
+
|
|
61
|
+
if t.TYPE_CHECKING:
|
|
62
|
+
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
63
|
+
|
|
64
|
+
base_src_cmd_fns: t.List[t.Callable[[], BaseSrcCmd]] = [
|
|
65
|
+
airtable_base_src_cmd,
|
|
66
|
+
astradb_base_src_cmd,
|
|
67
|
+
azure_base_src_cmd,
|
|
68
|
+
biomed_base_src_cmd,
|
|
69
|
+
box_base_src_cmd,
|
|
70
|
+
confluence_base_src_cmd,
|
|
71
|
+
delta_table_base_src_cmd,
|
|
72
|
+
discord_base_src_cmd,
|
|
73
|
+
dropbox_base_src_cmd,
|
|
74
|
+
elasticsearch_base_src_cmd,
|
|
75
|
+
fsspec_base_src_cmd,
|
|
76
|
+
gcs_base_src_cmd,
|
|
77
|
+
github_base_src_cmd,
|
|
78
|
+
gitlab_base_src_cmd,
|
|
79
|
+
google_drive_base_src_cmd,
|
|
80
|
+
hubspot_base_src_cmd,
|
|
81
|
+
jira_base_src_cmd,
|
|
82
|
+
kafka_base_src_cmd,
|
|
83
|
+
local_base_src_cmd,
|
|
84
|
+
mongodb_base_src_cmd,
|
|
85
|
+
notion_base_src_cmd,
|
|
86
|
+
onedrive_base_src_cmd,
|
|
87
|
+
opensearch_base_src_cmd,
|
|
88
|
+
outlook_base_src_cmd,
|
|
89
|
+
reddit_base_src_cmd,
|
|
90
|
+
salesforce_base_src_cmd,
|
|
91
|
+
sftp_base_src_cmd,
|
|
92
|
+
sharepoint_base_src_cmd,
|
|
93
|
+
slack_base_src_cmd,
|
|
94
|
+
s3_base_src_cmd,
|
|
95
|
+
wikipedia_base_src_cmd,
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
# Make sure there are not overlapping names
|
|
99
|
+
src_cmd_names = [b().cmd_name for b in base_src_cmd_fns]
|
|
100
|
+
src_duplicates = [item for item, count in collections.Counter(src_cmd_names).items() if count > 1]
|
|
101
|
+
if src_duplicates:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
"multiple base src commands defined with the same names: {}".format(
|
|
104
|
+
", ".join(src_duplicates),
|
|
105
|
+
),
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
base_dest_cmd_fns: t.List[t.Callable[[], "BaseDestCmd"]] = [
|
|
109
|
+
astradb_base_dest_cmd,
|
|
110
|
+
azure_base_dest_cmd,
|
|
111
|
+
box_base_dest_cmd,
|
|
112
|
+
chroma_base_dest_cmd,
|
|
113
|
+
clarifai_base_dest_cmd,
|
|
114
|
+
databricks_volumes_dest_cmd,
|
|
115
|
+
dropbox_base_dest_cmd,
|
|
116
|
+
elasticsearch_base_dest_cmd,
|
|
117
|
+
fsspec_base_dest_cmd,
|
|
118
|
+
gcs_base_dest_cmd,
|
|
119
|
+
kafka_base_dest_cmd,
|
|
120
|
+
s3_base_dest_cmd,
|
|
121
|
+
azure_ai_search_base_dest_cmd,
|
|
122
|
+
delta_table_dest_cmd,
|
|
123
|
+
sql_base_dest_cmd,
|
|
124
|
+
weaviate_dest_cmd,
|
|
125
|
+
mongo_base_dest_cmd,
|
|
126
|
+
pinecone_base_dest_cmd,
|
|
127
|
+
qdrant_base_dest_cmd,
|
|
128
|
+
opensearch_base_dest_cmd,
|
|
129
|
+
vectara_base_dest_cmd,
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
# Make sure there are not overlapping names
|
|
133
|
+
dest_cmd_names = [b().cmd_name for b in base_dest_cmd_fns]
|
|
134
|
+
dest_duplicates = [item for item, count in collections.Counter(dest_cmd_names).items() if count > 1]
|
|
135
|
+
if dest_duplicates:
|
|
136
|
+
raise ValueError(
|
|
137
|
+
"multiple base dest commands defined with the same names: {}".format(
|
|
138
|
+
", ".join(dest_duplicates),
|
|
139
|
+
),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
__all__ = [
|
|
143
|
+
"base_src_cmd_fns",
|
|
144
|
+
"base_dest_cmd_fns",
|
|
145
|
+
]
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
+
from unstructured_ingest.cli.interfaces import (
|
|
8
|
+
CliConfig,
|
|
9
|
+
)
|
|
10
|
+
from unstructured_ingest.connector.airtable import SimpleAirtableConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class AirtableCliConfig(SimpleAirtableConfig, CliConfig):
|
|
15
|
+
@staticmethod
|
|
16
|
+
def get_cli_options() -> t.List[click.Option]:
|
|
17
|
+
options = [
|
|
18
|
+
click.Option(
|
|
19
|
+
["--personal-access-token"],
|
|
20
|
+
default=None,
|
|
21
|
+
help="Personal access token to authenticate into Airtable. Check: "
|
|
22
|
+
"https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens "
|
|
23
|
+
"for more info",
|
|
24
|
+
),
|
|
25
|
+
click.Option(
|
|
26
|
+
["--list-of-paths"],
|
|
27
|
+
default=None,
|
|
28
|
+
help="""
|
|
29
|
+
A list of paths that specify the locations to ingest data from within Airtable.
|
|
30
|
+
|
|
31
|
+
If this argument is not set, the connector ingests all tables within each and every base.
|
|
32
|
+
--list-of-paths: path1 path2 path3 ….
|
|
33
|
+
path: base_id/table_id(optional)/view_id(optional)/
|
|
34
|
+
|
|
35
|
+
To obtain (base, table, view) ids in bulk, check:
|
|
36
|
+
https://airtable.com/developers/web/api/list-bases (base ids)
|
|
37
|
+
https://airtable.com/developers/web/api/get-base-schema (table and view ids)
|
|
38
|
+
https://pyairtable.readthedocs.io/en/latest/metadata.html (base, table and view ids)
|
|
39
|
+
|
|
40
|
+
To obtain specific ids from Airtable UI, go to your workspace, and copy any
|
|
41
|
+
relevant id from the URL structure:
|
|
42
|
+
https://airtable.com/appAbcDeF1ghijKlm/tblABcdEfG1HIJkLm/viwABCDEfg6hijKLM
|
|
43
|
+
appAbcDeF1ghijKlm -> base_id
|
|
44
|
+
tblABcdEfG1HIJkLm -> table_id
|
|
45
|
+
viwABCDEfg6hijKLM -> view_id
|
|
46
|
+
|
|
47
|
+
You can also check: https://support.airtable.com/docs/finding-airtable-ids
|
|
48
|
+
|
|
49
|
+
Here is an example for one --list-of-paths:
|
|
50
|
+
base1/ → gets the entirety of all tables inside base1
|
|
51
|
+
base1/table1 → gets all rows and columns within table1 in base1
|
|
52
|
+
base1/table1/view1 → gets the rows and columns that are
|
|
53
|
+
visible in view1 for the table1 in base1
|
|
54
|
+
|
|
55
|
+
Examples to invalid airtable_paths:
|
|
56
|
+
table1 → has to mention base to be valid
|
|
57
|
+
base1/view1 → has to mention table to be valid
|
|
58
|
+
""",
|
|
59
|
+
),
|
|
60
|
+
]
|
|
61
|
+
return options
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_base_src_cmd() -> BaseSrcCmd:
|
|
65
|
+
cmd_cls = BaseSrcCmd(
|
|
66
|
+
cmd_name="airtable",
|
|
67
|
+
cli_config=AirtableCliConfig,
|
|
68
|
+
)
|
|
69
|
+
return cmd_cls
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.cli.interfaces import CliConfig, Dict
|
|
7
|
+
from unstructured_ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class AstraDBCliConfig(SimpleAstraDBConfig, CliConfig):
|
|
12
|
+
@staticmethod
|
|
13
|
+
def get_cli_options() -> t.List[click.Option]:
|
|
14
|
+
options = [
|
|
15
|
+
click.Option(
|
|
16
|
+
["--token"],
|
|
17
|
+
required=True,
|
|
18
|
+
type=str,
|
|
19
|
+
help="Astra DB Token with access to the database.",
|
|
20
|
+
envvar="ASTRA_DB_APPLICATION_TOKEN",
|
|
21
|
+
show_envvar=True,
|
|
22
|
+
),
|
|
23
|
+
click.Option(
|
|
24
|
+
["--api-endpoint"],
|
|
25
|
+
required=True,
|
|
26
|
+
type=str,
|
|
27
|
+
help="The API endpoint for the Astra DB.",
|
|
28
|
+
envvar="ASTRA_DB_API_ENDPOINT",
|
|
29
|
+
show_envvar=True,
|
|
30
|
+
),
|
|
31
|
+
click.Option(
|
|
32
|
+
["--collection-name"],
|
|
33
|
+
required=False,
|
|
34
|
+
type=str,
|
|
35
|
+
help="The name of the Astra DB collection. "
|
|
36
|
+
"Note that the collection name must only include letters, "
|
|
37
|
+
"numbers, and underscores.",
|
|
38
|
+
),
|
|
39
|
+
click.Option(
|
|
40
|
+
["--keyspace"],
|
|
41
|
+
required=False,
|
|
42
|
+
default=None,
|
|
43
|
+
type=str,
|
|
44
|
+
help="The Astra DB connection keyspace.",
|
|
45
|
+
),
|
|
46
|
+
]
|
|
47
|
+
return options
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class AstraDBCliWriteConfig(AstraDBWriteConfig, CliConfig):
|
|
52
|
+
@staticmethod
|
|
53
|
+
def get_cli_options() -> t.List[click.Option]:
|
|
54
|
+
options = [
|
|
55
|
+
click.Option(
|
|
56
|
+
["--embedding-dimension"],
|
|
57
|
+
required=True,
|
|
58
|
+
default=384,
|
|
59
|
+
type=int,
|
|
60
|
+
help="The dimensionality of the embeddings",
|
|
61
|
+
),
|
|
62
|
+
click.Option(
|
|
63
|
+
["--requested-indexing-policy"],
|
|
64
|
+
required=False,
|
|
65
|
+
default=None,
|
|
66
|
+
type=Dict(),
|
|
67
|
+
help="The indexing policy to use for the collection."
|
|
68
|
+
'example: \'{"deny": ["metadata"]}\' ',
|
|
69
|
+
),
|
|
70
|
+
click.Option(
|
|
71
|
+
["--batch-size"],
|
|
72
|
+
default=20,
|
|
73
|
+
type=int,
|
|
74
|
+
help="Number of records per batch",
|
|
75
|
+
),
|
|
76
|
+
]
|
|
77
|
+
return options
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_base_src_cmd():
|
|
81
|
+
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
82
|
+
|
|
83
|
+
cmd_cls = BaseSrcCmd(
|
|
84
|
+
cmd_name="astradb",
|
|
85
|
+
cli_config=AstraDBCliConfig,
|
|
86
|
+
)
|
|
87
|
+
return cmd_cls
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_base_dest_cmd():
|
|
91
|
+
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
92
|
+
|
|
93
|
+
cmd_cls = BaseDestCmd(
|
|
94
|
+
cmd_name="astradb",
|
|
95
|
+
cli_config=AstraDBCliConfig,
|
|
96
|
+
additional_cli_options=[AstraDBCliWriteConfig],
|
|
97
|
+
write_config=AstraDBWriteConfig,
|
|
98
|
+
)
|
|
99
|
+
return cmd_cls
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.cli.interfaces import (
|
|
7
|
+
CliConfig,
|
|
8
|
+
)
|
|
9
|
+
from unstructured_ingest.connector.azure_ai_search import (
|
|
10
|
+
AzureAISearchWriteConfig,
|
|
11
|
+
SimpleAzureAISearchStorageConfig,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class AzureAISearchCliConfig(SimpleAzureAISearchStorageConfig, CliConfig):
|
|
17
|
+
@staticmethod
|
|
18
|
+
def get_cli_options() -> t.List[click.Option]:
|
|
19
|
+
options = [
|
|
20
|
+
click.Option(
|
|
21
|
+
["--key"],
|
|
22
|
+
required=True,
|
|
23
|
+
type=str,
|
|
24
|
+
help="Key credential used for authenticating to an Azure service.",
|
|
25
|
+
envvar="AZURE_SEARCH_API_KEY",
|
|
26
|
+
show_envvar=True,
|
|
27
|
+
),
|
|
28
|
+
click.Option(
|
|
29
|
+
["--endpoint"],
|
|
30
|
+
required=True,
|
|
31
|
+
type=str,
|
|
32
|
+
help="The URL endpoint of an Azure search service. "
|
|
33
|
+
"In the form of https://{{service_name}}.search.windows.net",
|
|
34
|
+
envvar="AZURE_SEARCH_ENDPOINT",
|
|
35
|
+
show_envvar=True,
|
|
36
|
+
),
|
|
37
|
+
]
|
|
38
|
+
return options
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class AzureAISearchCliWriteConfig(AzureAISearchWriteConfig, CliConfig):
|
|
43
|
+
@staticmethod
|
|
44
|
+
def get_cli_options() -> t.List[click.Option]:
|
|
45
|
+
options = [
|
|
46
|
+
click.Option(
|
|
47
|
+
["--index"],
|
|
48
|
+
required=True,
|
|
49
|
+
type=str,
|
|
50
|
+
help="The name of the index to connect to",
|
|
51
|
+
),
|
|
52
|
+
]
|
|
53
|
+
return options
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_base_dest_cmd():
|
|
57
|
+
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
58
|
+
|
|
59
|
+
cmd_cls = BaseDestCmd(
|
|
60
|
+
cmd_name="azure-ai-search",
|
|
61
|
+
cli_config=AzureAISearchCliConfig,
|
|
62
|
+
additional_cli_options=[AzureAISearchCliWriteConfig],
|
|
63
|
+
write_config=AzureAISearchCliWriteConfig,
|
|
64
|
+
)
|
|
65
|
+
return cmd_cls
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
+
from unstructured_ingest.cli.interfaces import (
|
|
8
|
+
CliConfig,
|
|
9
|
+
)
|
|
10
|
+
from unstructured_ingest.connector.biomed import SimpleBiomedConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class BiomedCliConfig(SimpleBiomedConfig, CliConfig):
|
|
15
|
+
@staticmethod
|
|
16
|
+
def get_cli_options() -> t.List[click.Option]:
|
|
17
|
+
options = [
|
|
18
|
+
click.Option(
|
|
19
|
+
["--api-id"],
|
|
20
|
+
default=None,
|
|
21
|
+
help="ID parameter for OA Web Service API.",
|
|
22
|
+
),
|
|
23
|
+
click.Option(
|
|
24
|
+
["--api-from"],
|
|
25
|
+
default=None,
|
|
26
|
+
help="From parameter for OA Web Service API.",
|
|
27
|
+
),
|
|
28
|
+
click.Option(
|
|
29
|
+
["--api-until"],
|
|
30
|
+
default=None,
|
|
31
|
+
help="Until parameter for OA Web Service API.",
|
|
32
|
+
),
|
|
33
|
+
click.Option(
|
|
34
|
+
["--path"],
|
|
35
|
+
default=None,
|
|
36
|
+
help="PMC Open Access FTP Directory Path.",
|
|
37
|
+
),
|
|
38
|
+
click.Option(
|
|
39
|
+
["--max-request-time"],
|
|
40
|
+
default=45,
|
|
41
|
+
help="(In seconds) Max request time to OA Web Service API.",
|
|
42
|
+
),
|
|
43
|
+
]
|
|
44
|
+
return options
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_base_src_cmd() -> BaseSrcCmd:
|
|
48
|
+
cmd_cls = BaseSrcCmd(
|
|
49
|
+
cmd_name="biomed",
|
|
50
|
+
cli_config=BiomedCliConfig,
|
|
51
|
+
)
|
|
52
|
+
return cmd_cls
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.cli.interfaces import CliConfig, Dict
|
|
7
|
+
from unstructured_ingest.connector.chroma import ChromaWriteConfig, SimpleChromaConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ChromaCliConfig(SimpleChromaConfig, CliConfig):
|
|
12
|
+
@staticmethod
|
|
13
|
+
def get_cli_options() -> t.List[click.Option]:
|
|
14
|
+
options = [
|
|
15
|
+
click.Option(
|
|
16
|
+
["--path"],
|
|
17
|
+
required=False,
|
|
18
|
+
type=str,
|
|
19
|
+
help="Location where Chroma is persisted," "if not connecting via http.",
|
|
20
|
+
),
|
|
21
|
+
click.Option(
|
|
22
|
+
["--settings"],
|
|
23
|
+
required=False,
|
|
24
|
+
type=Dict(),
|
|
25
|
+
help="A dictionary of settings to communicate with the chroma server."
|
|
26
|
+
'example: \'{"persist_directory":"./chroma-persist"}\' ',
|
|
27
|
+
),
|
|
28
|
+
click.Option(
|
|
29
|
+
["--tenant"],
|
|
30
|
+
required=False,
|
|
31
|
+
default="default_tenant",
|
|
32
|
+
type=str,
|
|
33
|
+
help="The tenant to use for this client. Chroma defaults to 'default_tenant'.",
|
|
34
|
+
),
|
|
35
|
+
click.Option(
|
|
36
|
+
["--database"],
|
|
37
|
+
required=False,
|
|
38
|
+
default="default_database",
|
|
39
|
+
type=str,
|
|
40
|
+
help="The database to use for this client."
|
|
41
|
+
"Chroma defaults to 'default_database'.",
|
|
42
|
+
),
|
|
43
|
+
click.Option(
|
|
44
|
+
["--host"],
|
|
45
|
+
required=False,
|
|
46
|
+
type=str,
|
|
47
|
+
help="The hostname of the Chroma server.",
|
|
48
|
+
),
|
|
49
|
+
click.Option(
|
|
50
|
+
["--port"],
|
|
51
|
+
required=False,
|
|
52
|
+
type=int,
|
|
53
|
+
help="The port of the Chroma server.",
|
|
54
|
+
),
|
|
55
|
+
click.Option(
|
|
56
|
+
["--ssl"],
|
|
57
|
+
required=False,
|
|
58
|
+
default=False,
|
|
59
|
+
is_flag=True,
|
|
60
|
+
type=bool,
|
|
61
|
+
help="Whether to use SSL to connect to the Chroma server.",
|
|
62
|
+
),
|
|
63
|
+
click.Option(
|
|
64
|
+
["--headers"],
|
|
65
|
+
required=False,
|
|
66
|
+
type=Dict(),
|
|
67
|
+
help="A dictionary of headers to send to the Chroma server."
|
|
68
|
+
'example: \'{"Authorization":"Basic()"}\' ',
|
|
69
|
+
),
|
|
70
|
+
click.Option(
|
|
71
|
+
["--collection-name"],
|
|
72
|
+
required=True,
|
|
73
|
+
type=str,
|
|
74
|
+
help="The name of the Chroma collection to write into.",
|
|
75
|
+
),
|
|
76
|
+
]
|
|
77
|
+
return options
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class ChromaCliWriteConfig(ChromaWriteConfig, CliConfig):
|
|
82
|
+
@staticmethod
|
|
83
|
+
def get_cli_options() -> t.List[click.Option]:
|
|
84
|
+
options = [
|
|
85
|
+
click.Option(
|
|
86
|
+
["--batch-size"],
|
|
87
|
+
default=100,
|
|
88
|
+
type=int,
|
|
89
|
+
help="Number of records per batch",
|
|
90
|
+
),
|
|
91
|
+
]
|
|
92
|
+
return options
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def get_base_dest_cmd():
|
|
96
|
+
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
97
|
+
|
|
98
|
+
cmd_cls = BaseDestCmd(
|
|
99
|
+
cmd_name="chroma",
|
|
100
|
+
cli_config=ChromaCliConfig,
|
|
101
|
+
additional_cli_options=[ChromaCliWriteConfig],
|
|
102
|
+
write_config=ChromaWriteConfig,
|
|
103
|
+
)
|
|
104
|
+
return cmd_cls
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.cli.interfaces import CliConfig
|
|
7
|
+
from unstructured_ingest.connector.clarifai import (
|
|
8
|
+
ClarifaiWriteConfig,
|
|
9
|
+
SimpleClarifaiConfig,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
CMD_NAME = "clarifai"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ClarifaiCliConfig(SimpleClarifaiConfig, CliConfig):
|
|
17
|
+
@staticmethod
|
|
18
|
+
def get_cli_options() -> t.List[click.Option]:
|
|
19
|
+
options = [
|
|
20
|
+
click.Option(
|
|
21
|
+
["--api-key"],
|
|
22
|
+
required=True,
|
|
23
|
+
type=str,
|
|
24
|
+
help="The CLARIFAI_PAT of the user to access clarifai platform apps and models",
|
|
25
|
+
envvar="CLARIFAI_PAT",
|
|
26
|
+
show_envvar=True,
|
|
27
|
+
),
|
|
28
|
+
click.Option(
|
|
29
|
+
["--app-id"],
|
|
30
|
+
required=True,
|
|
31
|
+
type=str,
|
|
32
|
+
help="Clarifai app name/id",
|
|
33
|
+
),
|
|
34
|
+
click.Option(
|
|
35
|
+
["--user-id"],
|
|
36
|
+
required=True,
|
|
37
|
+
type=str,
|
|
38
|
+
help="Clarifai User name/ID",
|
|
39
|
+
),
|
|
40
|
+
click.Option(
|
|
41
|
+
["--dataset-id"], type=str, default=None, help="Clarifai App Dataset ID (optional)"
|
|
42
|
+
),
|
|
43
|
+
]
|
|
44
|
+
return options
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class ClarifaiCliWriteConfig(ClarifaiWriteConfig, CliConfig):
|
|
49
|
+
@staticmethod
|
|
50
|
+
def get_cli_options() -> t.List[click.option]:
|
|
51
|
+
options = [
|
|
52
|
+
click.Option(
|
|
53
|
+
["--batch-size"],
|
|
54
|
+
type=int,
|
|
55
|
+
default=50,
|
|
56
|
+
help="No of inputs upload per batch",
|
|
57
|
+
),
|
|
58
|
+
]
|
|
59
|
+
return options
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_base_dest_cmd():
|
|
63
|
+
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
64
|
+
|
|
65
|
+
cmd_cls = BaseDestCmd(
|
|
66
|
+
cmd_name=CMD_NAME,
|
|
67
|
+
cli_config=ClarifaiCliConfig,
|
|
68
|
+
additional_cli_options=[ClarifaiCliWriteConfig],
|
|
69
|
+
write_config=ClarifaiWriteConfig,
|
|
70
|
+
)
|
|
71
|
+
return cmd_cls
|