unstructured-ingest 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- examples/airtable.py +44 -0
- examples/azure_cognitive_search.py +55 -0
- examples/chroma.py +54 -0
- examples/couchbase.py +55 -0
- examples/databricks_volumes_dest.py +55 -0
- examples/databricks_volumes_source.py +53 -0
- examples/delta_table.py +45 -0
- examples/discord_example.py +36 -0
- examples/elasticsearch.py +49 -0
- examples/google_drive.py +45 -0
- examples/kdbai.py +54 -0
- examples/local.py +36 -0
- examples/milvus.py +44 -0
- examples/mongodb.py +53 -0
- examples/opensearch.py +50 -0
- examples/pinecone.py +57 -0
- examples/s3.py +38 -0
- examples/salesforce.py +44 -0
- examples/sharepoint.py +47 -0
- examples/singlestore.py +49 -0
- examples/sql.py +90 -0
- examples/vectara.py +54 -0
- examples/weaviate.py +44 -0
- test/integration/chunkers/test_chunkers.py +1 -1
- test/integration/connectors/conftest.py +1 -1
- test/integration/connectors/databricks/test_volumes_native.py +3 -3
- test/integration/connectors/discord/test_discord.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +2 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
- test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
- test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
- test/integration/connectors/sql/test_postgres.py +2 -2
- test/integration/connectors/sql/test_singlestore.py +2 -2
- test/integration/connectors/sql/test_snowflake.py +2 -2
- test/integration/connectors/sql/test_sqlite.py +2 -2
- test/integration/connectors/sql/test_vastdb.py +1 -1
- test/integration/connectors/test_astradb.py +2 -2
- test/integration/connectors/test_azure_ai_search.py +2 -2
- test/integration/connectors/test_chroma.py +2 -2
- test/integration/connectors/test_confluence.py +1 -1
- test/integration/connectors/test_delta_table.py +2 -2
- test/integration/connectors/test_dropbox.py +2 -2
- test/integration/connectors/test_github.py +49 -0
- test/integration/connectors/test_google_drive.py +2 -2
- test/integration/connectors/test_jira.py +1 -1
- test/integration/connectors/test_lancedb.py +7 -7
- test/integration/connectors/test_milvus.py +2 -2
- test/integration/connectors/test_mongodb.py +2 -2
- test/integration/connectors/test_neo4j.py +7 -7
- test/integration/connectors/test_notion.py +2 -2
- test/integration/connectors/test_onedrive.py +2 -2
- test/integration/connectors/test_pinecone.py +3 -3
- test/integration/connectors/test_qdrant.py +6 -6
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +3 -3
- test/integration/connectors/test_sharepoint.py +1 -1
- test/integration/connectors/test_vectara.py +4 -4
- test/integration/connectors/test_zendesk.py +2 -2
- test/integration/connectors/utils/validation/destination.py +2 -2
- test/integration/connectors/utils/validation/source.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +1 -1
- test/integration/connectors/weaviate/test_local.py +2 -2
- test/integration/embedders/test_azure_openai.py +1 -1
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -1
- test/integration/embedders/test_mixedbread.py +1 -1
- test/integration/embedders/test_octoai.py +2 -2
- test/integration/embedders/test_openai.py +2 -2
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +1 -1
- test/integration/embedders/test_voyageai.py +1 -1
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
- test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
- test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
- test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
- test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
- test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
- test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
- test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
- test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
- test/unit/test_html.py +1 -1
- test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
- test/unit/test_utils.py +106 -97
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/__init__.py +0 -14
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +259 -9
- unstructured_ingest/cli/base/dest.py +58 -61
- unstructured_ingest/cli/base/src.py +54 -36
- unstructured_ingest/cli/cli.py +4 -17
- unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
- unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
- unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
- unstructured_ingest/embed/bedrock.py +3 -3
- unstructured_ingest/embed/octoai.py +3 -3
- unstructured_ingest/embed/openai.py +3 -3
- unstructured_ingest/embed/togetherai.py +4 -4
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +4 -4
- unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
- unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
- unstructured_ingest/{v2/otel.py → otel.py} +1 -1
- unstructured_ingest/pipeline/__init__.py +0 -22
- unstructured_ingest/pipeline/interfaces.py +179 -238
- unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
- unstructured_ingest/pipeline/pipeline.py +388 -97
- unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
- unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +14 -11
- unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
- unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +12 -11
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/processes/connectors/github.py +221 -0
- unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
- unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
- unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
- unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +11 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
- unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
- unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
- unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
- unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
- unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
- unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
- unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
- unstructured_ingest/utils/compression.py +1 -48
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/utils/html.py +3 -3
- unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
- unstructured_ingest/utils/string_and_date_utils.py +1 -1
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/METADATA +99 -99
- unstructured_ingest-0.7.0.dist-info/RECORD +370 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/top_level.txt +1 -0
- test/unit/v2/test_utils.py +0 -82
- unstructured_ingest/cli/cmd_factory.py +0 -12
- unstructured_ingest/cli/cmds/__init__.py +0 -145
- unstructured_ingest/cli/cmds/airtable.py +0 -69
- unstructured_ingest/cli/cmds/astradb.py +0 -99
- unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
- unstructured_ingest/cli/cmds/biomed.py +0 -52
- unstructured_ingest/cli/cmds/chroma.py +0 -104
- unstructured_ingest/cli/cmds/clarifai.py +0 -71
- unstructured_ingest/cli/cmds/confluence.py +0 -69
- unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
- unstructured_ingest/cli/cmds/delta_table.py +0 -94
- unstructured_ingest/cli/cmds/discord.py +0 -47
- unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
- unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
- unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
- unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
- unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
- unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
- unstructured_ingest/cli/cmds/github.py +0 -54
- unstructured_ingest/cli/cmds/gitlab.py +0 -54
- unstructured_ingest/cli/cmds/google_drive.py +0 -49
- unstructured_ingest/cli/cmds/hubspot.py +0 -70
- unstructured_ingest/cli/cmds/jira.py +0 -71
- unstructured_ingest/cli/cmds/kafka.py +0 -102
- unstructured_ingest/cli/cmds/local.py +0 -43
- unstructured_ingest/cli/cmds/mongodb.py +0 -72
- unstructured_ingest/cli/cmds/notion.py +0 -48
- unstructured_ingest/cli/cmds/onedrive.py +0 -66
- unstructured_ingest/cli/cmds/opensearch.py +0 -117
- unstructured_ingest/cli/cmds/outlook.py +0 -67
- unstructured_ingest/cli/cmds/pinecone.py +0 -71
- unstructured_ingest/cli/cmds/qdrant.py +0 -124
- unstructured_ingest/cli/cmds/reddit.py +0 -67
- unstructured_ingest/cli/cmds/salesforce.py +0 -58
- unstructured_ingest/cli/cmds/sharepoint.py +0 -66
- unstructured_ingest/cli/cmds/slack.py +0 -56
- unstructured_ingest/cli/cmds/sql.py +0 -66
- unstructured_ingest/cli/cmds/vectara.py +0 -66
- unstructured_ingest/cli/cmds/weaviate.py +0 -98
- unstructured_ingest/cli/cmds/wikipedia.py +0 -40
- unstructured_ingest/cli/common.py +0 -7
- unstructured_ingest/cli/interfaces.py +0 -663
- unstructured_ingest/cli/utils.py +0 -205
- unstructured_ingest/connector/airtable.py +0 -309
- unstructured_ingest/connector/astradb.py +0 -267
- unstructured_ingest/connector/azure_ai_search.py +0 -144
- unstructured_ingest/connector/biomed.py +0 -320
- unstructured_ingest/connector/chroma.py +0 -158
- unstructured_ingest/connector/clarifai.py +0 -122
- unstructured_ingest/connector/confluence.py +0 -285
- unstructured_ingest/connector/databricks_volumes.py +0 -137
- unstructured_ingest/connector/delta_table.py +0 -203
- unstructured_ingest/connector/discord.py +0 -180
- unstructured_ingest/connector/elasticsearch.py +0 -396
- unstructured_ingest/connector/fsspec/azure.py +0 -78
- unstructured_ingest/connector/fsspec/box.py +0 -109
- unstructured_ingest/connector/fsspec/dropbox.py +0 -160
- unstructured_ingest/connector/fsspec/fsspec.py +0 -359
- unstructured_ingest/connector/fsspec/gcs.py +0 -82
- unstructured_ingest/connector/fsspec/s3.py +0 -62
- unstructured_ingest/connector/fsspec/sftp.py +0 -81
- unstructured_ingest/connector/git.py +0 -124
- unstructured_ingest/connector/github.py +0 -174
- unstructured_ingest/connector/gitlab.py +0 -142
- unstructured_ingest/connector/google_drive.py +0 -348
- unstructured_ingest/connector/hubspot.py +0 -278
- unstructured_ingest/connector/jira.py +0 -469
- unstructured_ingest/connector/kafka.py +0 -293
- unstructured_ingest/connector/local.py +0 -139
- unstructured_ingest/connector/mongodb.py +0 -284
- unstructured_ingest/connector/notion/client.py +0 -248
- unstructured_ingest/connector/notion/connector.py +0 -469
- unstructured_ingest/connector/notion/helpers.py +0 -584
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
- unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
- unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
- unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
- unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
- unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
- unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
- unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
- unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
- unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
- unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
- unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
- unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
- unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
- unstructured_ingest/connector/notion/types/date.py +0 -26
- unstructured_ingest/connector/notion/types/file.py +0 -51
- unstructured_ingest/connector/notion/types/user.py +0 -76
- unstructured_ingest/connector/onedrive.py +0 -232
- unstructured_ingest/connector/opensearch.py +0 -218
- unstructured_ingest/connector/outlook.py +0 -285
- unstructured_ingest/connector/pinecone.py +0 -150
- unstructured_ingest/connector/qdrant.py +0 -144
- unstructured_ingest/connector/reddit.py +0 -166
- unstructured_ingest/connector/registry.py +0 -109
- unstructured_ingest/connector/salesforce.py +0 -301
- unstructured_ingest/connector/sharepoint.py +0 -573
- unstructured_ingest/connector/slack.py +0 -224
- unstructured_ingest/connector/sql.py +0 -199
- unstructured_ingest/connector/vectara.py +0 -253
- unstructured_ingest/connector/weaviate.py +0 -190
- unstructured_ingest/connector/wikipedia.py +0 -208
- unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
- unstructured_ingest/enhanced_dataclass/core.py +0 -99
- unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
- unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
- unstructured_ingest/interfaces.py +0 -852
- unstructured_ingest/pipeline/copy.py +0 -19
- unstructured_ingest/pipeline/doc_factory.py +0 -12
- unstructured_ingest/pipeline/partition.py +0 -60
- unstructured_ingest/pipeline/permissions.py +0 -12
- unstructured_ingest/pipeline/reformat/chunking.py +0 -134
- unstructured_ingest/pipeline/reformat/embedding.py +0 -64
- unstructured_ingest/pipeline/source.py +0 -77
- unstructured_ingest/pipeline/utils.py +0 -6
- unstructured_ingest/pipeline/write.py +0 -18
- unstructured_ingest/processor.py +0 -93
- unstructured_ingest/runner/__init__.py +0 -104
- unstructured_ingest/runner/airtable.py +0 -35
- unstructured_ingest/runner/astradb.py +0 -34
- unstructured_ingest/runner/base_runner.py +0 -89
- unstructured_ingest/runner/biomed.py +0 -45
- unstructured_ingest/runner/confluence.py +0 -35
- unstructured_ingest/runner/delta_table.py +0 -34
- unstructured_ingest/runner/discord.py +0 -35
- unstructured_ingest/runner/elasticsearch.py +0 -40
- unstructured_ingest/runner/fsspec/azure.py +0 -30
- unstructured_ingest/runner/fsspec/box.py +0 -28
- unstructured_ingest/runner/fsspec/dropbox.py +0 -30
- unstructured_ingest/runner/fsspec/fsspec.py +0 -40
- unstructured_ingest/runner/fsspec/gcs.py +0 -28
- unstructured_ingest/runner/fsspec/s3.py +0 -28
- unstructured_ingest/runner/fsspec/sftp.py +0 -28
- unstructured_ingest/runner/github.py +0 -37
- unstructured_ingest/runner/gitlab.py +0 -37
- unstructured_ingest/runner/google_drive.py +0 -35
- unstructured_ingest/runner/hubspot.py +0 -35
- unstructured_ingest/runner/jira.py +0 -35
- unstructured_ingest/runner/kafka.py +0 -34
- unstructured_ingest/runner/local.py +0 -23
- unstructured_ingest/runner/mongodb.py +0 -34
- unstructured_ingest/runner/notion.py +0 -61
- unstructured_ingest/runner/onedrive.py +0 -35
- unstructured_ingest/runner/opensearch.py +0 -40
- unstructured_ingest/runner/outlook.py +0 -33
- unstructured_ingest/runner/reddit.py +0 -35
- unstructured_ingest/runner/salesforce.py +0 -33
- unstructured_ingest/runner/sharepoint.py +0 -35
- unstructured_ingest/runner/slack.py +0 -33
- unstructured_ingest/runner/utils.py +0 -47
- unstructured_ingest/runner/wikipedia.py +0 -35
- unstructured_ingest/runner/writers/__init__.py +0 -48
- unstructured_ingest/runner/writers/astradb.py +0 -22
- unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
- unstructured_ingest/runner/writers/base_writer.py +0 -26
- unstructured_ingest/runner/writers/chroma.py +0 -22
- unstructured_ingest/runner/writers/clarifai.py +0 -19
- unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
- unstructured_ingest/runner/writers/delta_table.py +0 -24
- unstructured_ingest/runner/writers/elasticsearch.py +0 -24
- unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
- unstructured_ingest/runner/writers/fsspec/box.py +0 -21
- unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
- unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
- unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
- unstructured_ingest/runner/writers/kafka.py +0 -21
- unstructured_ingest/runner/writers/mongodb.py +0 -21
- unstructured_ingest/runner/writers/opensearch.py +0 -26
- unstructured_ingest/runner/writers/pinecone.py +0 -21
- unstructured_ingest/runner/writers/qdrant.py +0 -19
- unstructured_ingest/runner/writers/sql.py +0 -22
- unstructured_ingest/runner/writers/vectara.py +0 -22
- unstructured_ingest/runner/writers/weaviate.py +0 -21
- unstructured_ingest/utils/google_filetype.py +0 -9
- unstructured_ingest/v2/__init__.py +0 -1
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +0 -4
- unstructured_ingest/v2/cli/base/cmd.py +0 -269
- unstructured_ingest/v2/cli/base/dest.py +0 -85
- unstructured_ingest/v2/cli/base/src.py +0 -85
- unstructured_ingest/v2/cli/cli.py +0 -24
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/logger.py +0 -126
- unstructured_ingest/v2/main.py +0 -11
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +0 -211
- unstructured_ingest/v2/pipeline/pipeline.py +0 -408
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/v2/processes/utils/__init__.py +0 -0
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest-0.6.2.dist-info/RECORD +0 -589
- {test/unit/v2 → examples}/__init__.py +0 -0
- /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
- /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/data_generator.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
- /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
- /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
- /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
- /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
- /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
- /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
- /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
- /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
- /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
- /unstructured_ingest/{v2 → utils}/constants.py +0 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
-
from unstructured_ingest.cli.interfaces import CliConfig, Dict
|
|
8
|
-
from unstructured_ingest.connector.delta_table import DeltaTableWriteConfig, SimpleDeltaTableConfig
|
|
9
|
-
|
|
10
|
-
CMD_NAME = "delta-table"
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class DeltaTableCliConfig(SimpleDeltaTableConfig, CliConfig):
|
|
15
|
-
@staticmethod
|
|
16
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
17
|
-
options = [
|
|
18
|
-
click.Option(
|
|
19
|
-
["--table-uri"],
|
|
20
|
-
required=True,
|
|
21
|
-
help="the path of the DeltaTable",
|
|
22
|
-
),
|
|
23
|
-
click.Option(
|
|
24
|
-
["--version"],
|
|
25
|
-
default=None,
|
|
26
|
-
type=int,
|
|
27
|
-
help="version of the DeltaTable",
|
|
28
|
-
),
|
|
29
|
-
click.Option(
|
|
30
|
-
["--storage_options"],
|
|
31
|
-
required=False,
|
|
32
|
-
type=Dict(),
|
|
33
|
-
default=None,
|
|
34
|
-
help="a dictionary of the options to use for the storage backend, "
|
|
35
|
-
"passed in as a json string",
|
|
36
|
-
),
|
|
37
|
-
click.Option(
|
|
38
|
-
["--without-files"],
|
|
39
|
-
is_flag=True,
|
|
40
|
-
default=False,
|
|
41
|
-
help="If set, will load table without tracking files.",
|
|
42
|
-
),
|
|
43
|
-
]
|
|
44
|
-
return options
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
@dataclass
|
|
48
|
-
class DeltaTableCliWriteConfig(DeltaTableWriteConfig, CliConfig):
|
|
49
|
-
@staticmethod
|
|
50
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
51
|
-
options = [
|
|
52
|
-
click.Option(
|
|
53
|
-
["--overwrite-schema"],
|
|
54
|
-
is_flag=True,
|
|
55
|
-
default=False,
|
|
56
|
-
help="Flag to overwrite schema of destination table",
|
|
57
|
-
),
|
|
58
|
-
click.Option(
|
|
59
|
-
["--drop-empty-cols"],
|
|
60
|
-
is_flag=True,
|
|
61
|
-
default=False,
|
|
62
|
-
help="Flag to drop any columns that have no content",
|
|
63
|
-
),
|
|
64
|
-
click.Option(
|
|
65
|
-
["--mode"],
|
|
66
|
-
default="error",
|
|
67
|
-
type=click.Choice(["error", "append", "overwrite", "ignore"]),
|
|
68
|
-
help="How to handle existing data. Default is to error if table already exists. "
|
|
69
|
-
"If 'append', will add new data. "
|
|
70
|
-
"If 'overwrite', will replace table with new data. "
|
|
71
|
-
"If 'ignore', will not write anything if table already exists.",
|
|
72
|
-
),
|
|
73
|
-
]
|
|
74
|
-
return options
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def get_base_src_cmd() -> BaseSrcCmd:
|
|
78
|
-
cmd_cls = BaseSrcCmd(
|
|
79
|
-
cmd_name=CMD_NAME,
|
|
80
|
-
cli_config=DeltaTableCliConfig,
|
|
81
|
-
)
|
|
82
|
-
return cmd_cls
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def get_base_dest_cmd():
|
|
86
|
-
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
87
|
-
|
|
88
|
-
cmd_cls = BaseDestCmd(
|
|
89
|
-
cmd_name=CMD_NAME,
|
|
90
|
-
cli_config=DeltaTableCliConfig,
|
|
91
|
-
additional_cli_options=[DeltaTableCliWriteConfig],
|
|
92
|
-
write_config=DeltaTableWriteConfig,
|
|
93
|
-
)
|
|
94
|
-
return cmd_cls
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
-
from unstructured_ingest.cli.interfaces import (
|
|
8
|
-
CliConfig,
|
|
9
|
-
DelimitedString,
|
|
10
|
-
)
|
|
11
|
-
from unstructured_ingest.connector.discord import SimpleDiscordConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class DiscordCliConfig(SimpleDiscordConfig, CliConfig):
|
|
16
|
-
@staticmethod
|
|
17
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
18
|
-
options = [
|
|
19
|
-
click.Option(
|
|
20
|
-
["--token"],
|
|
21
|
-
required=True,
|
|
22
|
-
help="Bot token used to access Discord API, must have "
|
|
23
|
-
"READ_MESSAGE_HISTORY scope for the bot user",
|
|
24
|
-
),
|
|
25
|
-
click.Option(
|
|
26
|
-
["--channels"],
|
|
27
|
-
required=True,
|
|
28
|
-
type=DelimitedString(),
|
|
29
|
-
help="Comma-delimited list of discord channel ids to ingest from.",
|
|
30
|
-
),
|
|
31
|
-
click.Option(
|
|
32
|
-
["--period"],
|
|
33
|
-
default=None,
|
|
34
|
-
type=click.IntRange(0),
|
|
35
|
-
help="Number of days to go back in the history of "
|
|
36
|
-
"discord channels, must be a number",
|
|
37
|
-
),
|
|
38
|
-
]
|
|
39
|
-
return options
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def get_base_src_cmd() -> BaseSrcCmd:
|
|
43
|
-
cmd_cls = BaseSrcCmd(
|
|
44
|
-
cmd_name="discord",
|
|
45
|
-
cli_config=DiscordCliConfig,
|
|
46
|
-
)
|
|
47
|
-
return cmd_cls
|
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
-
from unstructured_ingest.cli.interfaces import CliConfig, DelimitedString
|
|
8
|
-
from unstructured_ingest.connector.elasticsearch import (
|
|
9
|
-
ElasticsearchWriteConfig,
|
|
10
|
-
SimpleElasticsearchConfig,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
CMD_NAME = "elasticsearch"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@dataclass
|
|
17
|
-
class ElasticsearchCliConfig(SimpleElasticsearchConfig, CliConfig):
|
|
18
|
-
@staticmethod
|
|
19
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
20
|
-
options = [
|
|
21
|
-
click.Option(
|
|
22
|
-
["--index-name"],
|
|
23
|
-
required=True,
|
|
24
|
-
type=str,
|
|
25
|
-
help="Name of the Elasticsearch index to pull data from, or upload data to.",
|
|
26
|
-
),
|
|
27
|
-
click.Option(
|
|
28
|
-
["--hosts"],
|
|
29
|
-
type=DelimitedString(),
|
|
30
|
-
help='List of the Elasticsearch hosts to connect to, e.g. "http://localhost:9200"',
|
|
31
|
-
),
|
|
32
|
-
click.Option(
|
|
33
|
-
["--fields"],
|
|
34
|
-
type=DelimitedString(),
|
|
35
|
-
default=[],
|
|
36
|
-
help="If provided, will limit the fields returned by Elasticsearch "
|
|
37
|
-
"to this comma-delimited list",
|
|
38
|
-
),
|
|
39
|
-
click.Option(
|
|
40
|
-
["--username"], type=str, default=None, help="username when using basic auth"
|
|
41
|
-
),
|
|
42
|
-
click.Option(
|
|
43
|
-
["--password"],
|
|
44
|
-
type=str,
|
|
45
|
-
default=None,
|
|
46
|
-
help="password when using basic auth or connecting to a cloud instance",
|
|
47
|
-
),
|
|
48
|
-
click.Option(
|
|
49
|
-
["--cloud-id"], type=str, default=None, help="id used to connect to Elastic Cloud"
|
|
50
|
-
),
|
|
51
|
-
click.Option(
|
|
52
|
-
["--es-api-key"], type=str, default=None, help="api key used for authentication"
|
|
53
|
-
),
|
|
54
|
-
click.Option(
|
|
55
|
-
["--api-key-id"],
|
|
56
|
-
type=str,
|
|
57
|
-
default=None,
|
|
58
|
-
help="id associated with api key used for authentication: "
|
|
59
|
-
"https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html", # noqa: E501
|
|
60
|
-
),
|
|
61
|
-
click.Option(
|
|
62
|
-
["--bearer-auth"],
|
|
63
|
-
type=str,
|
|
64
|
-
default=None,
|
|
65
|
-
help="bearer token used for HTTP bearer authentication",
|
|
66
|
-
),
|
|
67
|
-
click.Option(
|
|
68
|
-
["--ca-certs"],
|
|
69
|
-
type=click.Path(),
|
|
70
|
-
default=None,
|
|
71
|
-
),
|
|
72
|
-
click.Option(
|
|
73
|
-
["--ssl-assert-fingerprint"],
|
|
74
|
-
type=str,
|
|
75
|
-
default=None,
|
|
76
|
-
help="SHA256 fingerprint value",
|
|
77
|
-
),
|
|
78
|
-
click.Option(
|
|
79
|
-
["--batch-size"],
|
|
80
|
-
default=100,
|
|
81
|
-
type=click.IntRange(0),
|
|
82
|
-
help="how many records to read at a time per process",
|
|
83
|
-
),
|
|
84
|
-
]
|
|
85
|
-
return options
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
@dataclass
|
|
89
|
-
class ElasticsearchCliWriteConfig(ElasticsearchWriteConfig, CliConfig):
|
|
90
|
-
@staticmethod
|
|
91
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
92
|
-
options = [
|
|
93
|
-
click.Option(
|
|
94
|
-
["--batch-size-bytes"],
|
|
95
|
-
required=False,
|
|
96
|
-
default=15_000_000,
|
|
97
|
-
type=int,
|
|
98
|
-
help="Size limit (in bytes) for each batch of items to be uploaded. Check"
|
|
99
|
-
" https://www.elastic.co/guide/en/elasticsearch/guide/current/bulk.html"
|
|
100
|
-
"#_how_big_is_too_big for more information.",
|
|
101
|
-
),
|
|
102
|
-
click.Option(
|
|
103
|
-
["--num-processes"],
|
|
104
|
-
required=False,
|
|
105
|
-
default=1,
|
|
106
|
-
type=int,
|
|
107
|
-
help="Number of processes to be used while uploading content",
|
|
108
|
-
),
|
|
109
|
-
]
|
|
110
|
-
return options
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def get_base_src_cmd() -> BaseSrcCmd:
|
|
114
|
-
cmd_cls = BaseSrcCmd(
|
|
115
|
-
cmd_name="elasticsearch",
|
|
116
|
-
cli_config=ElasticsearchCliConfig,
|
|
117
|
-
)
|
|
118
|
-
return cmd_cls
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def get_base_dest_cmd():
|
|
122
|
-
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
123
|
-
|
|
124
|
-
cmd_cls = BaseDestCmd(
|
|
125
|
-
cmd_name="elasticsearch",
|
|
126
|
-
cli_config=ElasticsearchCliConfig,
|
|
127
|
-
additional_cli_options=[ElasticsearchCliWriteConfig],
|
|
128
|
-
addition_configs={
|
|
129
|
-
"connector_config": SimpleElasticsearchConfig,
|
|
130
|
-
"write_config": ElasticsearchCliWriteConfig,
|
|
131
|
-
},
|
|
132
|
-
)
|
|
133
|
-
return cmd_cls
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
-
from unstructured_ingest.cli.interfaces import (
|
|
8
|
-
CliConfig,
|
|
9
|
-
)
|
|
10
|
-
from unstructured_ingest.connector.fsspec.azure import (
|
|
11
|
-
AzureWriteConfig,
|
|
12
|
-
SimpleAzureBlobStorageConfig,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
CMD_NAME = "azure"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@dataclass
|
|
19
|
-
class AzureCliConfig(SimpleAzureBlobStorageConfig, CliConfig):
|
|
20
|
-
@staticmethod
|
|
21
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
22
|
-
options = [
|
|
23
|
-
click.Option(
|
|
24
|
-
["--account-key"],
|
|
25
|
-
default=None,
|
|
26
|
-
help="The storage account key. This is used for shared key "
|
|
27
|
-
"authentication. If any of account key, sas token or "
|
|
28
|
-
"client_id are not specified, anonymous access will be used.",
|
|
29
|
-
),
|
|
30
|
-
click.Option(
|
|
31
|
-
["--account-name"],
|
|
32
|
-
default=None,
|
|
33
|
-
help="The storage account name. This is used to authenticate "
|
|
34
|
-
"requests signed with an account key and to construct "
|
|
35
|
-
"the storage endpoint. It is required unless a connection "
|
|
36
|
-
"string is given, or if a custom domain is used with "
|
|
37
|
-
"anonymous authentication.",
|
|
38
|
-
),
|
|
39
|
-
click.Option(
|
|
40
|
-
["--connection-string"],
|
|
41
|
-
default=None,
|
|
42
|
-
help="If specified, this will override all other parameters. See "
|
|
43
|
-
"http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ " # noqa: E501
|
|
44
|
-
"for the connection string format.",
|
|
45
|
-
),
|
|
46
|
-
click.Option(
|
|
47
|
-
["--sas_token"],
|
|
48
|
-
default=None,
|
|
49
|
-
help="A shared access signature token to use to authenticate "
|
|
50
|
-
"requests instead of the account key. If account key and "
|
|
51
|
-
"sas token are both specified, account key will be used "
|
|
52
|
-
"to sign. If any of account key, sas token or client_id "
|
|
53
|
-
"are not specified, anonymous access will be used.",
|
|
54
|
-
),
|
|
55
|
-
]
|
|
56
|
-
return options
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
@dataclass
|
|
60
|
-
class AzureCliWriteConfig(AzureWriteConfig, CliConfig):
|
|
61
|
-
@staticmethod
|
|
62
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
63
|
-
options = [
|
|
64
|
-
click.Option(
|
|
65
|
-
["--overwrite"],
|
|
66
|
-
is_flag=True,
|
|
67
|
-
default=False,
|
|
68
|
-
show_default=True,
|
|
69
|
-
help="If set, will overwrite content if content already exists",
|
|
70
|
-
)
|
|
71
|
-
]
|
|
72
|
-
return options
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def get_base_src_cmd() -> BaseSrcCmd:
|
|
76
|
-
cmd_cls = BaseSrcCmd(
|
|
77
|
-
cmd_name=CMD_NAME,
|
|
78
|
-
cli_config=AzureCliConfig,
|
|
79
|
-
is_fsspec=True,
|
|
80
|
-
)
|
|
81
|
-
return cmd_cls
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def get_base_dest_cmd():
|
|
85
|
-
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
86
|
-
|
|
87
|
-
cmd_cls = BaseDestCmd(
|
|
88
|
-
cmd_name=CMD_NAME,
|
|
89
|
-
cli_config=AzureCliConfig,
|
|
90
|
-
write_config=AzureCliWriteConfig,
|
|
91
|
-
is_fsspec=True,
|
|
92
|
-
additional_cli_options=[AzureCliWriteConfig],
|
|
93
|
-
)
|
|
94
|
-
return cmd_cls
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
-
from unstructured_ingest.cli.interfaces import (
|
|
8
|
-
CliConfig,
|
|
9
|
-
)
|
|
10
|
-
from unstructured_ingest.connector.fsspec.box import BoxWriteConfig, SimpleBoxConfig
|
|
11
|
-
|
|
12
|
-
CMD_NAME = "box"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class BoxCliConfig(SimpleBoxConfig, CliConfig):
|
|
17
|
-
@staticmethod
|
|
18
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
19
|
-
options = [
|
|
20
|
-
click.Option(
|
|
21
|
-
["--box-app-config"],
|
|
22
|
-
default=None,
|
|
23
|
-
type=click.Path(),
|
|
24
|
-
help="Path to Box app credentials as json file.",
|
|
25
|
-
),
|
|
26
|
-
]
|
|
27
|
-
return options
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def get_base_src_cmd() -> BaseSrcCmd:
|
|
31
|
-
cmd_cls = BaseSrcCmd(
|
|
32
|
-
cmd_name=CMD_NAME,
|
|
33
|
-
cli_config=BoxCliConfig,
|
|
34
|
-
is_fsspec=True,
|
|
35
|
-
)
|
|
36
|
-
return cmd_cls
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def get_base_dest_cmd():
|
|
40
|
-
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
41
|
-
|
|
42
|
-
cmd_cls = BaseDestCmd(
|
|
43
|
-
cmd_name=CMD_NAME,
|
|
44
|
-
cli_config=BoxCliConfig,
|
|
45
|
-
write_config=BoxWriteConfig,
|
|
46
|
-
is_fsspec=True,
|
|
47
|
-
)
|
|
48
|
-
return cmd_cls
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
-
from unstructured_ingest.cli.interfaces import (
|
|
8
|
-
CliConfig,
|
|
9
|
-
)
|
|
10
|
-
from unstructured_ingest.connector.fsspec.dropbox import (
|
|
11
|
-
DropboxWriteConfig,
|
|
12
|
-
SimpleDropboxConfig,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
CMD_NAME = "dropbox"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@dataclass
|
|
19
|
-
class DropboxCliConfig(SimpleDropboxConfig, CliConfig):
|
|
20
|
-
@staticmethod
|
|
21
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
22
|
-
options = [
|
|
23
|
-
click.Option(
|
|
24
|
-
["--token"],
|
|
25
|
-
required=True,
|
|
26
|
-
type=str,
|
|
27
|
-
help="Dropbox access token.",
|
|
28
|
-
),
|
|
29
|
-
]
|
|
30
|
-
return options
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def get_base_src_cmd() -> BaseSrcCmd:
|
|
34
|
-
cmd_cls = BaseSrcCmd(
|
|
35
|
-
cmd_name=CMD_NAME,
|
|
36
|
-
cli_config=DropboxCliConfig,
|
|
37
|
-
is_fsspec=True,
|
|
38
|
-
)
|
|
39
|
-
return cmd_cls
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def get_base_dest_cmd():
|
|
43
|
-
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
44
|
-
|
|
45
|
-
cmd_cls = BaseDestCmd(
|
|
46
|
-
cmd_name=CMD_NAME,
|
|
47
|
-
cli_config=DropboxCliConfig,
|
|
48
|
-
write_config=DropboxWriteConfig,
|
|
49
|
-
is_fsspec=True,
|
|
50
|
-
)
|
|
51
|
-
return cmd_cls
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
2
|
-
|
|
3
|
-
CMD_NAME = "fsspec"
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def get_base_src_cmd() -> BaseSrcCmd:
|
|
7
|
-
cmd_cls = BaseSrcCmd(cmd_name=CMD_NAME, is_fsspec=True)
|
|
8
|
-
return cmd_cls
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def get_base_dest_cmd():
|
|
12
|
-
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
13
|
-
|
|
14
|
-
cmd_cls = BaseDestCmd(cmd_name=CMD_NAME, is_fsspec=True)
|
|
15
|
-
return cmd_cls
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
-
from unstructured_ingest.cli.interfaces import (
|
|
8
|
-
CliConfig,
|
|
9
|
-
FileOrJson,
|
|
10
|
-
)
|
|
11
|
-
from unstructured_ingest.connector.fsspec.gcs import GcsWriteConfig, SimpleGcsConfig
|
|
12
|
-
|
|
13
|
-
CMD_NAME = "gcs"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@dataclass
|
|
17
|
-
class GcsCliConfig(SimpleGcsConfig, CliConfig):
|
|
18
|
-
@staticmethod
|
|
19
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
20
|
-
help_string = """
|
|
21
|
-
Options:
|
|
22
|
-
- ``None``, GCSFS will attempt to guess your credentials in the
|
|
23
|
-
following order: gcloud CLI default, gcsfs cached token, google compute
|
|
24
|
-
metadata service, anonymous.
|
|
25
|
-
- ``'google_default'``, your default gcloud credentials will be used,
|
|
26
|
-
which are typically established by doing ``gcloud login`` in a terminal.
|
|
27
|
-
- ``'cache'``, credentials from previously successful gcsfs
|
|
28
|
-
authentication will be used (use this after "browser" auth succeeded)
|
|
29
|
-
- ``'anon'``, no authentication is performed, and you can only
|
|
30
|
-
access data which is accessible to allUsers (in this case, the project and
|
|
31
|
-
access level parameters are meaningless)
|
|
32
|
-
- ``'browser'``, you get an access code with which you can
|
|
33
|
-
authenticate via a specially provided URL
|
|
34
|
-
- if ``'cloud'``, we assume we are running within google compute
|
|
35
|
-
or google container engine, and query the internal metadata directly for
|
|
36
|
-
a token.
|
|
37
|
-
- you may supply a token generated by the
|
|
38
|
-
[gcloud](https://cloud.google.com/sdk/docs/)
|
|
39
|
-
utility; this is either a python dictionary or the name of a file
|
|
40
|
-
containing the JSON returned by logging in with the gcloud CLI tool.
|
|
41
|
-
"""
|
|
42
|
-
options = [
|
|
43
|
-
click.Option(
|
|
44
|
-
["--service-account-key"],
|
|
45
|
-
default=None,
|
|
46
|
-
type=FileOrJson(allow_raw_str=True),
|
|
47
|
-
help=help_string,
|
|
48
|
-
),
|
|
49
|
-
]
|
|
50
|
-
return options
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def get_base_src_cmd() -> BaseSrcCmd:
|
|
54
|
-
cmd_cls = BaseSrcCmd(
|
|
55
|
-
cmd_name=CMD_NAME,
|
|
56
|
-
cli_config=GcsCliConfig,
|
|
57
|
-
is_fsspec=True,
|
|
58
|
-
)
|
|
59
|
-
return cmd_cls
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def get_base_dest_cmd():
|
|
63
|
-
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
64
|
-
|
|
65
|
-
cmd_cls = BaseDestCmd(
|
|
66
|
-
cmd_name=CMD_NAME,
|
|
67
|
-
cli_config=GcsCliConfig,
|
|
68
|
-
write_config=GcsWriteConfig,
|
|
69
|
-
is_fsspec=True,
|
|
70
|
-
)
|
|
71
|
-
return cmd_cls
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
-
from unstructured_ingest.cli.interfaces import (
|
|
8
|
-
CliConfig,
|
|
9
|
-
)
|
|
10
|
-
from unstructured_ingest.connector.fsspec.s3 import S3WriteConfig, SimpleS3Config
|
|
11
|
-
|
|
12
|
-
CMD_NAME = "s3"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class S3CliConfig(SimpleS3Config, CliConfig):
|
|
17
|
-
@staticmethod
|
|
18
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
19
|
-
options = [
|
|
20
|
-
click.Option(
|
|
21
|
-
["--anonymous"],
|
|
22
|
-
is_flag=True,
|
|
23
|
-
default=False,
|
|
24
|
-
help="Connect to s3 without local AWS credentials.",
|
|
25
|
-
),
|
|
26
|
-
click.Option(
|
|
27
|
-
["--endpoint-url"],
|
|
28
|
-
type=str,
|
|
29
|
-
default=None,
|
|
30
|
-
help="Use this endpoint_url, if specified. Needed for "
|
|
31
|
-
"connecting to non-AWS S3 buckets.",
|
|
32
|
-
),
|
|
33
|
-
click.Option(
|
|
34
|
-
["--key"],
|
|
35
|
-
type=str,
|
|
36
|
-
default=None,
|
|
37
|
-
help="If not anonymous, use this access key ID, if specified. Takes precedence "
|
|
38
|
-
"over `aws_access_key_id` in client_kwargs.",
|
|
39
|
-
),
|
|
40
|
-
click.Option(
|
|
41
|
-
["--secret"],
|
|
42
|
-
type=str,
|
|
43
|
-
default=None,
|
|
44
|
-
help="If not anonymous, use this secret access key, if specified.",
|
|
45
|
-
),
|
|
46
|
-
click.Option(
|
|
47
|
-
["--token"],
|
|
48
|
-
type=str,
|
|
49
|
-
default=None,
|
|
50
|
-
help="If not anonymous, use this security token, if specified.",
|
|
51
|
-
),
|
|
52
|
-
]
|
|
53
|
-
return options
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def get_base_src_cmd():
|
|
57
|
-
cmd_cls = BaseSrcCmd(
|
|
58
|
-
cmd_name=CMD_NAME,
|
|
59
|
-
cli_config=S3CliConfig,
|
|
60
|
-
is_fsspec=True,
|
|
61
|
-
)
|
|
62
|
-
return cmd_cls
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def get_base_dest_cmd():
|
|
66
|
-
from unstructured_ingest.cli.base.dest import BaseDestCmd
|
|
67
|
-
|
|
68
|
-
cmd_cls = BaseDestCmd(
|
|
69
|
-
cmd_name=CMD_NAME,
|
|
70
|
-
cli_config=S3CliConfig,
|
|
71
|
-
write_config=S3WriteConfig,
|
|
72
|
-
is_fsspec=True,
|
|
73
|
-
)
|
|
74
|
-
return cmd_cls
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.cli.base.src import BaseSrcCmd
|
|
7
|
-
from unstructured_ingest.cli.interfaces import (
|
|
8
|
-
CliConfig,
|
|
9
|
-
)
|
|
10
|
-
from unstructured_ingest.connector.fsspec.sftp import SimpleSftpConfig
|
|
11
|
-
|
|
12
|
-
CMD_NAME = "sftp"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class SftpCliConfig(SimpleSftpConfig, CliConfig):
|
|
17
|
-
@staticmethod
|
|
18
|
-
def get_cli_options() -> t.List[click.Option]:
|
|
19
|
-
options = [
|
|
20
|
-
click.Option(
|
|
21
|
-
["--username"],
|
|
22
|
-
required=True,
|
|
23
|
-
type=str,
|
|
24
|
-
help="Username for sftp connection",
|
|
25
|
-
),
|
|
26
|
-
click.Option(
|
|
27
|
-
["--password"],
|
|
28
|
-
required=True,
|
|
29
|
-
type=str,
|
|
30
|
-
help="Password for sftp connection",
|
|
31
|
-
),
|
|
32
|
-
click.Option(
|
|
33
|
-
["--look-for-keys"],
|
|
34
|
-
required=False,
|
|
35
|
-
default=False,
|
|
36
|
-
is_flag=True,
|
|
37
|
-
type=bool,
|
|
38
|
-
help="Whether to search for private key files in ~/.ssh/",
|
|
39
|
-
),
|
|
40
|
-
click.Option(
|
|
41
|
-
["--allow-agent"],
|
|
42
|
-
required=False,
|
|
43
|
-
default=False,
|
|
44
|
-
is_flag=True,
|
|
45
|
-
type=bool,
|
|
46
|
-
help="Whether to connect to the SSH agent.",
|
|
47
|
-
),
|
|
48
|
-
]
|
|
49
|
-
return options
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def get_base_src_cmd() -> BaseSrcCmd:
|
|
53
|
-
cmd_cls = BaseSrcCmd(
|
|
54
|
-
cmd_name=CMD_NAME,
|
|
55
|
-
cli_config=SftpCliConfig,
|
|
56
|
-
is_fsspec=True,
|
|
57
|
-
)
|
|
58
|
-
return cmd_cls
|