unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- examples/airtable.py +44 -0
- examples/azure_cognitive_search.py +55 -0
- examples/chroma.py +54 -0
- examples/couchbase.py +55 -0
- examples/databricks_volumes_dest.py +55 -0
- examples/databricks_volumes_source.py +53 -0
- examples/delta_table.py +45 -0
- examples/discord_example.py +36 -0
- examples/elasticsearch.py +49 -0
- examples/google_drive.py +45 -0
- examples/kdbai.py +54 -0
- examples/local.py +36 -0
- examples/milvus.py +44 -0
- examples/mongodb.py +53 -0
- examples/opensearch.py +50 -0
- examples/pinecone.py +57 -0
- examples/s3.py +38 -0
- examples/salesforce.py +44 -0
- examples/sharepoint.py +47 -0
- examples/singlestore.py +49 -0
- examples/sql.py +90 -0
- examples/vectara.py +54 -0
- examples/weaviate.py +44 -0
- test/integration/chunkers/test_chunkers.py +1 -1
- test/integration/connectors/conftest.py +1 -1
- test/integration/connectors/databricks/test_volumes_native.py +3 -3
- test/integration/connectors/discord/test_discord.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +2 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
- test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
- test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
- test/integration/connectors/sql/test_postgres.py +2 -2
- test/integration/connectors/sql/test_singlestore.py +2 -2
- test/integration/connectors/sql/test_snowflake.py +2 -2
- test/integration/connectors/sql/test_sqlite.py +2 -2
- test/integration/connectors/sql/test_vastdb.py +1 -1
- test/integration/connectors/test_astradb.py +2 -2
- test/integration/connectors/test_azure_ai_search.py +2 -2
- test/integration/connectors/test_chroma.py +2 -2
- test/integration/connectors/test_confluence.py +1 -1
- test/integration/connectors/test_delta_table.py +2 -2
- test/integration/connectors/test_dropbox.py +2 -2
- test/integration/connectors/test_github.py +1 -1
- test/integration/connectors/test_google_drive.py +2 -2
- test/integration/connectors/test_jira.py +1 -1
- test/integration/connectors/test_lancedb.py +7 -7
- test/integration/connectors/test_milvus.py +2 -2
- test/integration/connectors/test_mongodb.py +2 -2
- test/integration/connectors/test_neo4j.py +7 -7
- test/integration/connectors/test_notion.py +2 -2
- test/integration/connectors/test_onedrive.py +2 -2
- test/integration/connectors/test_pinecone.py +3 -3
- test/integration/connectors/test_qdrant.py +6 -6
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +3 -3
- test/integration/connectors/test_sharepoint.py +1 -1
- test/integration/connectors/test_vectara.py +4 -4
- test/integration/connectors/test_zendesk.py +2 -2
- test/integration/connectors/utils/validation/destination.py +2 -2
- test/integration/connectors/utils/validation/source.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +1 -1
- test/integration/connectors/weaviate/test_local.py +2 -2
- test/integration/embedders/test_azure_openai.py +1 -1
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -1
- test/integration/embedders/test_mixedbread.py +1 -1
- test/integration/embedders/test_octoai.py +2 -2
- test/integration/embedders/test_openai.py +2 -2
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +1 -1
- test/integration/embedders/test_voyageai.py +1 -1
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
- test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
- test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
- test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
- test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
- test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
- test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
- test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
- test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
- test/unit/test_html.py +1 -1
- test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
- test/unit/test_utils.py +106 -97
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/__init__.py +0 -14
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +259 -9
- unstructured_ingest/cli/base/dest.py +58 -61
- unstructured_ingest/cli/base/src.py +54 -36
- unstructured_ingest/cli/cli.py +4 -17
- unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
- unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
- unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
- unstructured_ingest/embed/bedrock.py +3 -3
- unstructured_ingest/embed/octoai.py +3 -3
- unstructured_ingest/embed/openai.py +3 -3
- unstructured_ingest/embed/togetherai.py +4 -4
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +4 -4
- unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
- unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
- unstructured_ingest/{v2/otel.py → otel.py} +1 -1
- unstructured_ingest/pipeline/__init__.py +0 -22
- unstructured_ingest/pipeline/interfaces.py +179 -238
- unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
- unstructured_ingest/pipeline/pipeline.py +388 -97
- unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
- unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
- unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
- unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
- unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
- unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
- unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
- unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
- unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
- unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
- unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
- unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
- unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
- unstructured_ingest/utils/compression.py +1 -48
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/utils/html.py +3 -3
- unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
- unstructured_ingest/utils/string_and_date_utils.py +1 -1
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/METADATA +21 -21
- unstructured_ingest-0.7.0.dist-info/RECORD +370 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/top_level.txt +1 -0
- test/unit/v2/test_utils.py +0 -82
- unstructured_ingest/cli/cmd_factory.py +0 -12
- unstructured_ingest/cli/cmds/__init__.py +0 -145
- unstructured_ingest/cli/cmds/airtable.py +0 -69
- unstructured_ingest/cli/cmds/astradb.py +0 -99
- unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
- unstructured_ingest/cli/cmds/biomed.py +0 -52
- unstructured_ingest/cli/cmds/chroma.py +0 -104
- unstructured_ingest/cli/cmds/clarifai.py +0 -71
- unstructured_ingest/cli/cmds/confluence.py +0 -69
- unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
- unstructured_ingest/cli/cmds/delta_table.py +0 -94
- unstructured_ingest/cli/cmds/discord.py +0 -47
- unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
- unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
- unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
- unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
- unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
- unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
- unstructured_ingest/cli/cmds/github.py +0 -54
- unstructured_ingest/cli/cmds/gitlab.py +0 -54
- unstructured_ingest/cli/cmds/google_drive.py +0 -49
- unstructured_ingest/cli/cmds/hubspot.py +0 -70
- unstructured_ingest/cli/cmds/jira.py +0 -71
- unstructured_ingest/cli/cmds/kafka.py +0 -102
- unstructured_ingest/cli/cmds/local.py +0 -43
- unstructured_ingest/cli/cmds/mongodb.py +0 -72
- unstructured_ingest/cli/cmds/notion.py +0 -48
- unstructured_ingest/cli/cmds/onedrive.py +0 -66
- unstructured_ingest/cli/cmds/opensearch.py +0 -117
- unstructured_ingest/cli/cmds/outlook.py +0 -67
- unstructured_ingest/cli/cmds/pinecone.py +0 -71
- unstructured_ingest/cli/cmds/qdrant.py +0 -124
- unstructured_ingest/cli/cmds/reddit.py +0 -67
- unstructured_ingest/cli/cmds/salesforce.py +0 -58
- unstructured_ingest/cli/cmds/sharepoint.py +0 -66
- unstructured_ingest/cli/cmds/slack.py +0 -56
- unstructured_ingest/cli/cmds/sql.py +0 -66
- unstructured_ingest/cli/cmds/vectara.py +0 -66
- unstructured_ingest/cli/cmds/weaviate.py +0 -98
- unstructured_ingest/cli/cmds/wikipedia.py +0 -40
- unstructured_ingest/cli/common.py +0 -7
- unstructured_ingest/cli/interfaces.py +0 -663
- unstructured_ingest/cli/utils.py +0 -205
- unstructured_ingest/connector/airtable.py +0 -309
- unstructured_ingest/connector/astradb.py +0 -267
- unstructured_ingest/connector/azure_ai_search.py +0 -144
- unstructured_ingest/connector/biomed.py +0 -320
- unstructured_ingest/connector/chroma.py +0 -158
- unstructured_ingest/connector/clarifai.py +0 -122
- unstructured_ingest/connector/confluence.py +0 -285
- unstructured_ingest/connector/databricks_volumes.py +0 -137
- unstructured_ingest/connector/delta_table.py +0 -203
- unstructured_ingest/connector/discord.py +0 -180
- unstructured_ingest/connector/elasticsearch.py +0 -396
- unstructured_ingest/connector/fsspec/azure.py +0 -78
- unstructured_ingest/connector/fsspec/box.py +0 -109
- unstructured_ingest/connector/fsspec/dropbox.py +0 -160
- unstructured_ingest/connector/fsspec/fsspec.py +0 -359
- unstructured_ingest/connector/fsspec/gcs.py +0 -82
- unstructured_ingest/connector/fsspec/s3.py +0 -62
- unstructured_ingest/connector/fsspec/sftp.py +0 -81
- unstructured_ingest/connector/git.py +0 -124
- unstructured_ingest/connector/github.py +0 -174
- unstructured_ingest/connector/gitlab.py +0 -142
- unstructured_ingest/connector/google_drive.py +0 -348
- unstructured_ingest/connector/hubspot.py +0 -278
- unstructured_ingest/connector/jira.py +0 -469
- unstructured_ingest/connector/kafka.py +0 -293
- unstructured_ingest/connector/local.py +0 -139
- unstructured_ingest/connector/mongodb.py +0 -284
- unstructured_ingest/connector/notion/client.py +0 -248
- unstructured_ingest/connector/notion/connector.py +0 -469
- unstructured_ingest/connector/notion/helpers.py +0 -584
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
- unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
- unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
- unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
- unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
- unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
- unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
- unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
- unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
- unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
- unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
- unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
- unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
- unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
- unstructured_ingest/connector/notion/types/date.py +0 -26
- unstructured_ingest/connector/notion/types/file.py +0 -51
- unstructured_ingest/connector/notion/types/user.py +0 -76
- unstructured_ingest/connector/onedrive.py +0 -232
- unstructured_ingest/connector/opensearch.py +0 -218
- unstructured_ingest/connector/outlook.py +0 -285
- unstructured_ingest/connector/pinecone.py +0 -150
- unstructured_ingest/connector/qdrant.py +0 -144
- unstructured_ingest/connector/reddit.py +0 -166
- unstructured_ingest/connector/registry.py +0 -109
- unstructured_ingest/connector/salesforce.py +0 -301
- unstructured_ingest/connector/sharepoint.py +0 -573
- unstructured_ingest/connector/slack.py +0 -224
- unstructured_ingest/connector/sql.py +0 -199
- unstructured_ingest/connector/vectara.py +0 -253
- unstructured_ingest/connector/weaviate.py +0 -190
- unstructured_ingest/connector/wikipedia.py +0 -208
- unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
- unstructured_ingest/enhanced_dataclass/core.py +0 -99
- unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
- unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
- unstructured_ingest/interfaces.py +0 -852
- unstructured_ingest/pipeline/copy.py +0 -19
- unstructured_ingest/pipeline/doc_factory.py +0 -12
- unstructured_ingest/pipeline/partition.py +0 -60
- unstructured_ingest/pipeline/permissions.py +0 -12
- unstructured_ingest/pipeline/reformat/chunking.py +0 -134
- unstructured_ingest/pipeline/reformat/embedding.py +0 -64
- unstructured_ingest/pipeline/source.py +0 -77
- unstructured_ingest/pipeline/utils.py +0 -6
- unstructured_ingest/pipeline/write.py +0 -18
- unstructured_ingest/processor.py +0 -93
- unstructured_ingest/runner/__init__.py +0 -104
- unstructured_ingest/runner/airtable.py +0 -35
- unstructured_ingest/runner/astradb.py +0 -34
- unstructured_ingest/runner/base_runner.py +0 -89
- unstructured_ingest/runner/biomed.py +0 -45
- unstructured_ingest/runner/confluence.py +0 -35
- unstructured_ingest/runner/delta_table.py +0 -34
- unstructured_ingest/runner/discord.py +0 -35
- unstructured_ingest/runner/elasticsearch.py +0 -40
- unstructured_ingest/runner/fsspec/azure.py +0 -30
- unstructured_ingest/runner/fsspec/box.py +0 -28
- unstructured_ingest/runner/fsspec/dropbox.py +0 -30
- unstructured_ingest/runner/fsspec/fsspec.py +0 -40
- unstructured_ingest/runner/fsspec/gcs.py +0 -28
- unstructured_ingest/runner/fsspec/s3.py +0 -28
- unstructured_ingest/runner/fsspec/sftp.py +0 -28
- unstructured_ingest/runner/github.py +0 -37
- unstructured_ingest/runner/gitlab.py +0 -37
- unstructured_ingest/runner/google_drive.py +0 -35
- unstructured_ingest/runner/hubspot.py +0 -35
- unstructured_ingest/runner/jira.py +0 -35
- unstructured_ingest/runner/kafka.py +0 -34
- unstructured_ingest/runner/local.py +0 -23
- unstructured_ingest/runner/mongodb.py +0 -34
- unstructured_ingest/runner/notion.py +0 -61
- unstructured_ingest/runner/onedrive.py +0 -35
- unstructured_ingest/runner/opensearch.py +0 -40
- unstructured_ingest/runner/outlook.py +0 -33
- unstructured_ingest/runner/reddit.py +0 -35
- unstructured_ingest/runner/salesforce.py +0 -33
- unstructured_ingest/runner/sharepoint.py +0 -35
- unstructured_ingest/runner/slack.py +0 -33
- unstructured_ingest/runner/utils.py +0 -47
- unstructured_ingest/runner/wikipedia.py +0 -35
- unstructured_ingest/runner/writers/__init__.py +0 -48
- unstructured_ingest/runner/writers/astradb.py +0 -22
- unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
- unstructured_ingest/runner/writers/base_writer.py +0 -26
- unstructured_ingest/runner/writers/chroma.py +0 -22
- unstructured_ingest/runner/writers/clarifai.py +0 -19
- unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
- unstructured_ingest/runner/writers/delta_table.py +0 -24
- unstructured_ingest/runner/writers/elasticsearch.py +0 -24
- unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
- unstructured_ingest/runner/writers/fsspec/box.py +0 -21
- unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
- unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
- unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
- unstructured_ingest/runner/writers/kafka.py +0 -21
- unstructured_ingest/runner/writers/mongodb.py +0 -21
- unstructured_ingest/runner/writers/opensearch.py +0 -26
- unstructured_ingest/runner/writers/pinecone.py +0 -21
- unstructured_ingest/runner/writers/qdrant.py +0 -19
- unstructured_ingest/runner/writers/sql.py +0 -22
- unstructured_ingest/runner/writers/vectara.py +0 -22
- unstructured_ingest/runner/writers/weaviate.py +0 -21
- unstructured_ingest/utils/google_filetype.py +0 -9
- unstructured_ingest/v2/__init__.py +0 -1
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +0 -4
- unstructured_ingest/v2/cli/base/cmd.py +0 -269
- unstructured_ingest/v2/cli/base/dest.py +0 -85
- unstructured_ingest/v2/cli/base/src.py +0 -85
- unstructured_ingest/v2/cli/cli.py +0 -24
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/logger.py +0 -126
- unstructured_ingest/v2/main.py +0 -11
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +0 -211
- unstructured_ingest/v2/pipeline/pipeline.py +0 -408
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/v2/processes/utils/__init__.py +0 -0
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
- {test/unit/v2 → examples}/__init__.py +0 -0
- /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
- /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/data_generator.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
- /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
- /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
- /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
- /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
- /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
- /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
- /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
- /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
- /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
- /unstructured_ingest/{v2 → utils}/constants.py +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import typing as t
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
|
|
7
|
-
from unstructured_ingest.interfaces import (
|
|
8
|
-
BaseConnectorConfig,
|
|
9
|
-
BaseDestinationConnector,
|
|
10
|
-
BaseSourceConnector,
|
|
11
|
-
ChunkingConfig,
|
|
12
|
-
EmbeddingConfig,
|
|
13
|
-
PartitionConfig,
|
|
14
|
-
PermissionsConfig,
|
|
15
|
-
ProcessorConfig,
|
|
16
|
-
ReadConfig,
|
|
17
|
-
RetryStrategyConfig,
|
|
18
|
-
)
|
|
19
|
-
from unstructured_ingest.logger import ingest_log_streaming_init
|
|
20
|
-
from unstructured_ingest.processor import process_documents
|
|
21
|
-
from unstructured_ingest.runner.writers.base_writer import Writer
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
@dataclass
|
|
25
|
-
class Runner(EnhancedDataClassJsonMixin, ABC):
|
|
26
|
-
connector_config: BaseConnectorConfig
|
|
27
|
-
processor_config: ProcessorConfig
|
|
28
|
-
read_config: ReadConfig
|
|
29
|
-
partition_config: PartitionConfig
|
|
30
|
-
writer: t.Optional[Writer] = None
|
|
31
|
-
writer_kwargs: t.Optional[dict] = None
|
|
32
|
-
embedding_config: t.Optional[EmbeddingConfig] = None
|
|
33
|
-
chunking_config: t.Optional[ChunkingConfig] = None
|
|
34
|
-
permissions_config: t.Optional[PermissionsConfig] = None
|
|
35
|
-
retry_strategy_config: t.Optional[RetryStrategyConfig] = None
|
|
36
|
-
|
|
37
|
-
def run(self, *args, **kwargs):
|
|
38
|
-
ingest_log_streaming_init(logging.DEBUG if self.processor_config.verbose else logging.INFO)
|
|
39
|
-
self.update_read_config()
|
|
40
|
-
source_connector = self.get_source_connector()
|
|
41
|
-
self.process_documents(
|
|
42
|
-
source_doc_connector=source_connector,
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
@abstractmethod
|
|
46
|
-
def update_read_config(self):
|
|
47
|
-
pass
|
|
48
|
-
|
|
49
|
-
@abstractmethod
|
|
50
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
51
|
-
pass
|
|
52
|
-
|
|
53
|
-
def get_source_connector(self) -> BaseSourceConnector:
|
|
54
|
-
source_connector_cls = self.get_source_connector_cls()
|
|
55
|
-
return source_connector_cls(
|
|
56
|
-
processor_config=self.processor_config,
|
|
57
|
-
connector_config=self.connector_config,
|
|
58
|
-
read_config=self.read_config,
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
def get_dest_doc_connector(self) -> t.Optional[BaseDestinationConnector]:
|
|
62
|
-
writer_kwargs = self.writer_kwargs if self.writer_kwargs else {}
|
|
63
|
-
if self.writer:
|
|
64
|
-
return self.writer.get_connector(**writer_kwargs)
|
|
65
|
-
return None
|
|
66
|
-
|
|
67
|
-
def get_permissions_config(self) -> t.Optional[PermissionsConfig]:
|
|
68
|
-
if self.permissions_config is None:
|
|
69
|
-
return None
|
|
70
|
-
|
|
71
|
-
permissions_config_filled = bool(
|
|
72
|
-
self.permissions_config.application_id
|
|
73
|
-
and self.permissions_config.client_cred
|
|
74
|
-
and self.permissions_config.tenant,
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
return self.permissions_config if permissions_config_filled else None
|
|
78
|
-
|
|
79
|
-
def process_documents(self, source_doc_connector: BaseSourceConnector):
|
|
80
|
-
process_documents(
|
|
81
|
-
processor_config=self.processor_config,
|
|
82
|
-
source_doc_connector=source_doc_connector,
|
|
83
|
-
partition_config=self.partition_config,
|
|
84
|
-
dest_doc_connector=self.get_dest_doc_connector(),
|
|
85
|
-
embedder_config=self.embedding_config,
|
|
86
|
-
chunking_config=self.chunking_config,
|
|
87
|
-
permissions_config=self.get_permissions_config(),
|
|
88
|
-
retry_strategy_config=self.retry_strategy_config,
|
|
89
|
-
)
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.biomed import SimpleBiomedConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class BiomedRunner(Runner):
|
|
16
|
-
connector_config: "SimpleBiomedConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
base_path = (
|
|
20
|
-
self.connector_config.path
|
|
21
|
-
if self.connector_config.path
|
|
22
|
-
else "{}-{}-{}".format(
|
|
23
|
-
self.connector_config.api_id if self.connector_config.api_id else "",
|
|
24
|
-
self.connector_config.api_from if self.connector_config.api_from else "",
|
|
25
|
-
self.connector_config.api_until if self.connector_config.api_until else "",
|
|
26
|
-
)
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
hashed_dir_name = hashlib.sha256(
|
|
30
|
-
base_path.encode("utf-8"),
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
34
|
-
connector_name="biomed",
|
|
35
|
-
read_config=self.read_config,
|
|
36
|
-
hashed_dir_name=hashed_dir_name,
|
|
37
|
-
logger=logger,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
41
|
-
from unstructured_ingest.connector.biomed import (
|
|
42
|
-
BiomedSourceConnector,
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
return BiomedSourceConnector
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.confluence import SimpleConfluenceConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class ConfluenceRunner(Runner):
|
|
16
|
-
connector_config: "SimpleConfluenceConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
self.connector_config.url.encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
24
|
-
connector_name="confluence",
|
|
25
|
-
read_config=self.read_config,
|
|
26
|
-
hashed_dir_name=hashed_dir_name,
|
|
27
|
-
logger=logger,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
31
|
-
from unstructured_ingest.connector.confluence import (
|
|
32
|
-
ConfluenceSourceConnector,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
return ConfluenceSourceConnector
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.delta_table import SimpleDeltaTableConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class DeltaTableRunner(Runner):
|
|
16
|
-
connector_config: "SimpleDeltaTableConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
str(self.connector_config.table_uri).encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
23
|
-
connector_name="delta_table",
|
|
24
|
-
read_config=self.read_config,
|
|
25
|
-
hashed_dir_name=hashed_dir_name,
|
|
26
|
-
logger=logger,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
30
|
-
from unstructured_ingest.connector.delta_table import (
|
|
31
|
-
DeltaTableSourceConnector,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
return DeltaTableSourceConnector
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.discord import SimpleDiscordConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class DiscordRunner(Runner):
|
|
16
|
-
connector_config: "SimpleDiscordConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
",".join(self.connector_config.channels).encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
24
|
-
connector_name="discord",
|
|
25
|
-
read_config=self.read_config,
|
|
26
|
-
hashed_dir_name=hashed_dir_name,
|
|
27
|
-
logger=logger,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
31
|
-
from unstructured_ingest.connector.discord import (
|
|
32
|
-
DiscordSourceConnector,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
return DiscordSourceConnector
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.elasticsearch import SimpleElasticsearchConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class ElasticSearchRunner(Runner):
|
|
16
|
-
connector_config: "SimpleElasticsearchConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
"{}_{}".format(
|
|
21
|
-
",".join(self.connector_config.access_config.hosts),
|
|
22
|
-
self.connector_config.index_name,
|
|
23
|
-
).encode(
|
|
24
|
-
"utf-8",
|
|
25
|
-
),
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
29
|
-
connector_name="elasticsearch",
|
|
30
|
-
read_config=self.read_config,
|
|
31
|
-
hashed_dir_name=hashed_dir_name,
|
|
32
|
-
logger=logger,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
36
|
-
from unstructured_ingest.connector.elasticsearch import (
|
|
37
|
-
ElasticsearchSourceConnector,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
return ElasticsearchSourceConnector
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
7
|
-
from unstructured_ingest.runner.utils import update_download_dir_remote_url
|
|
8
|
-
|
|
9
|
-
if t.TYPE_CHECKING:
|
|
10
|
-
from unstructured_ingest.connector.fsspec.azure import SimpleAzureBlobStorageConfig
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class AzureRunner(Runner):
|
|
15
|
-
connector_config: "SimpleAzureBlobStorageConfig"
|
|
16
|
-
|
|
17
|
-
def update_read_config(self):
|
|
18
|
-
self.read_config.download_dir = update_download_dir_remote_url(
|
|
19
|
-
connector_name="azure",
|
|
20
|
-
read_config=self.read_config,
|
|
21
|
-
remote_url=self.connector_config.remote_url, # type: ignore
|
|
22
|
-
logger=logger,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
26
|
-
from unstructured_ingest.connector.fsspec.azure import (
|
|
27
|
-
AzureBlobStorageSourceConnector,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
return AzureBlobStorageSourceConnector
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
7
|
-
from unstructured_ingest.runner.utils import update_download_dir_remote_url
|
|
8
|
-
|
|
9
|
-
if t.TYPE_CHECKING:
|
|
10
|
-
from unstructured_ingest.connector.fsspec.box import SimpleBoxConfig
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class BoxRunner(Runner):
|
|
15
|
-
connector_config: "SimpleBoxConfig"
|
|
16
|
-
|
|
17
|
-
def update_read_config(self):
|
|
18
|
-
self.read_config.download_dir = update_download_dir_remote_url(
|
|
19
|
-
connector_name="box",
|
|
20
|
-
read_config=self.read_config,
|
|
21
|
-
remote_url=self.connector_config.remote_url, # type: ignore
|
|
22
|
-
logger=logger,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
26
|
-
from unstructured_ingest.connector.fsspec.box import BoxSourceConnector
|
|
27
|
-
|
|
28
|
-
return BoxSourceConnector
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
7
|
-
from unstructured_ingest.runner.utils import update_download_dir_remote_url
|
|
8
|
-
|
|
9
|
-
if t.TYPE_CHECKING:
|
|
10
|
-
from unstructured_ingest.connector.fsspec.dropbox import SimpleDropboxConfig
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class DropboxRunner(Runner):
|
|
15
|
-
connector_config: "SimpleDropboxConfig"
|
|
16
|
-
|
|
17
|
-
def update_read_config(self):
|
|
18
|
-
self.read_config.download_dir = update_download_dir_remote_url(
|
|
19
|
-
connector_name="dropbox",
|
|
20
|
-
read_config=self.read_config,
|
|
21
|
-
remote_url=self.connector_config.remote_url, # type: ignore
|
|
22
|
-
logger=logger,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
26
|
-
from unstructured_ingest.connector.fsspec.dropbox import (
|
|
27
|
-
DropboxSourceConnector,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
return DropboxSourceConnector
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
import warnings
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from urllib.parse import urlparse
|
|
5
|
-
|
|
6
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
7
|
-
from unstructured_ingest.logger import logger
|
|
8
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
9
|
-
from unstructured_ingest.runner.utils import update_download_dir_remote_url
|
|
10
|
-
|
|
11
|
-
if t.TYPE_CHECKING:
|
|
12
|
-
from unstructured_ingest.connector.fsspec.fsspec import SimpleFsspecConfig
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class FsspecRunner(Runner):
|
|
17
|
-
connector_config: "SimpleFsspecConfig"
|
|
18
|
-
|
|
19
|
-
def update_read_config(self):
|
|
20
|
-
self.read_config.download_dir = update_download_dir_remote_url(
|
|
21
|
-
connector_name="fsspec",
|
|
22
|
-
read_config=self.read_config,
|
|
23
|
-
remote_url=self.fsspec_config.remote_url, # type: ignore
|
|
24
|
-
logger=logger,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
protocol = urlparse(self.fsspec_config.remote_url).scheme # type: ignore
|
|
28
|
-
warnings.warn(
|
|
29
|
-
f"`fsspec` protocol {protocol} is not directly supported by `unstructured`,"
|
|
30
|
-
" so use it at your own risk. Supported protocols are `gcs`, `gs`, `s3`, `s3a`,"
|
|
31
|
-
"`dropbox`, `abfs`, `az` and `sftp`.",
|
|
32
|
-
UserWarning,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
36
|
-
from unstructured_ingest.connector.fsspec.fsspec import (
|
|
37
|
-
FsspecSourceConnector,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
return FsspecSourceConnector
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
7
|
-
from unstructured_ingest.runner.utils import update_download_dir_remote_url
|
|
8
|
-
|
|
9
|
-
if t.TYPE_CHECKING:
|
|
10
|
-
from unstructured_ingest.connector.fsspec.gcs import SimpleGcsConfig
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class GCSRunner(Runner):
|
|
15
|
-
connector_config: "SimpleGcsConfig"
|
|
16
|
-
|
|
17
|
-
def update_read_config(self):
|
|
18
|
-
self.read_config.download_dir = update_download_dir_remote_url(
|
|
19
|
-
connector_name="gcs",
|
|
20
|
-
read_config=self.read_config,
|
|
21
|
-
remote_url=self.connector_config.remote_url, # type: ignore
|
|
22
|
-
logger=logger,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
26
|
-
from unstructured_ingest.connector.fsspec.gcs import GcsSourceConnector
|
|
27
|
-
|
|
28
|
-
return GcsSourceConnector
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
7
|
-
from unstructured_ingest.runner.utils import update_download_dir_remote_url
|
|
8
|
-
|
|
9
|
-
if t.TYPE_CHECKING:
|
|
10
|
-
from unstructured_ingest.connector.fsspec.s3 import SimpleS3Config
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class S3Runner(Runner):
|
|
15
|
-
connector_config: "SimpleS3Config"
|
|
16
|
-
|
|
17
|
-
def update_read_config(self):
|
|
18
|
-
self.read_config.download_dir = update_download_dir_remote_url(
|
|
19
|
-
connector_name="s3",
|
|
20
|
-
read_config=self.read_config,
|
|
21
|
-
remote_url=self.connector_config.remote_url, # type: ignore
|
|
22
|
-
logger=logger,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
26
|
-
from unstructured_ingest.connector.fsspec.s3 import S3SourceConnector
|
|
27
|
-
|
|
28
|
-
return S3SourceConnector
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
7
|
-
from unstructured_ingest.runner.utils import update_download_dir_remote_url
|
|
8
|
-
|
|
9
|
-
if t.TYPE_CHECKING:
|
|
10
|
-
from unstructured_ingest.connector.fsspec.sftp import SimpleSftpConfig
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class SftpRunner(Runner):
|
|
15
|
-
connector_config: "SimpleSftpConfig"
|
|
16
|
-
|
|
17
|
-
def update_read_config(self):
|
|
18
|
-
self.read_config.download_dir = update_download_dir_remote_url(
|
|
19
|
-
connector_name="sftp",
|
|
20
|
-
read_config=self.read_config,
|
|
21
|
-
remote_url=self.connector_config.remote_url, # type: ignore
|
|
22
|
-
logger=logger,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
26
|
-
from unstructured_ingest.connector.fsspec.sftp import SftpSourceConnector
|
|
27
|
-
|
|
28
|
-
return SftpSourceConnector
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.github import SimpleGitHubConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class GithubRunner(Runner):
|
|
16
|
-
connector_config: "SimpleGitHubConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
f"{self.connector_config.url}_{self.connector_config.branch}".encode(
|
|
21
|
-
"utf-8",
|
|
22
|
-
),
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
26
|
-
connector_name="github",
|
|
27
|
-
read_config=self.read_config,
|
|
28
|
-
hashed_dir_name=hashed_dir_name,
|
|
29
|
-
logger=logger,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
33
|
-
from unstructured_ingest.connector.github import (
|
|
34
|
-
GitHubSourceConnector,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
return GitHubSourceConnector
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.gitlab import SimpleGitlabConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class GitlabRunner(Runner):
|
|
16
|
-
connector_config: "SimpleGitlabConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
f"{self.connector_config.url}_{self.connector_config.branch}".encode(
|
|
21
|
-
"utf-8",
|
|
22
|
-
),
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
26
|
-
connector_name="gitlab",
|
|
27
|
-
read_config=self.read_config,
|
|
28
|
-
hashed_dir_name=hashed_dir_name,
|
|
29
|
-
logger=logger,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
33
|
-
from unstructured_ingest.connector.gitlab import (
|
|
34
|
-
GitLabSourceConnector,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
return GitLabSourceConnector
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.google_drive import SimpleGoogleDriveConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class GoogleDriveRunner(Runner):
|
|
16
|
-
connector_config: "SimpleGoogleDriveConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
self.connector_config.drive_id.encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
24
|
-
connector_name="google_drive",
|
|
25
|
-
read_config=self.read_config,
|
|
26
|
-
hashed_dir_name=hashed_dir_name,
|
|
27
|
-
logger=logger,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
31
|
-
from unstructured_ingest.connector.google_drive import (
|
|
32
|
-
GoogleDriveSourceConnector,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
return GoogleDriveSourceConnector
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.hubspot import SimpleHubSpotConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class HubSpotRunner(Runner):
|
|
16
|
-
connector_config: "SimpleHubSpotConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
self.connector_config.access_config.api_token.encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
24
|
-
connector_name="hubspot",
|
|
25
|
-
read_config=self.read_config,
|
|
26
|
-
hashed_dir_name=hashed_dir_name,
|
|
27
|
-
logger=logger,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
31
|
-
from unstructured_ingest.connector.hubspot import (
|
|
32
|
-
HubSpotSourceConnector,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
return HubSpotSourceConnector
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.jira import SimpleJiraConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class JiraRunner(Runner):
|
|
16
|
-
connector_config: "SimpleJiraConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
self.connector_config.url.encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
24
|
-
connector_name="jira",
|
|
25
|
-
read_config=self.read_config,
|
|
26
|
-
hashed_dir_name=hashed_dir_name,
|
|
27
|
-
logger=logger,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
31
|
-
from unstructured_ingest.connector.jira import (
|
|
32
|
-
JiraSourceConnector,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
return JiraSourceConnector
|