unstructured-ingest 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- examples/airtable.py +44 -0
- examples/azure_cognitive_search.py +55 -0
- examples/chroma.py +54 -0
- examples/couchbase.py +55 -0
- examples/databricks_volumes_dest.py +55 -0
- examples/databricks_volumes_source.py +53 -0
- examples/delta_table.py +45 -0
- examples/discord_example.py +36 -0
- examples/elasticsearch.py +49 -0
- examples/google_drive.py +45 -0
- examples/kdbai.py +54 -0
- examples/local.py +36 -0
- examples/milvus.py +44 -0
- examples/mongodb.py +53 -0
- examples/opensearch.py +50 -0
- examples/pinecone.py +57 -0
- examples/s3.py +38 -0
- examples/salesforce.py +44 -0
- examples/sharepoint.py +47 -0
- examples/singlestore.py +49 -0
- examples/sql.py +90 -0
- examples/vectara.py +54 -0
- examples/weaviate.py +44 -0
- test/integration/chunkers/test_chunkers.py +1 -1
- test/integration/connectors/conftest.py +1 -1
- test/integration/connectors/databricks/test_volumes_native.py +3 -3
- test/integration/connectors/discord/test_discord.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +2 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
- test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
- test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
- test/integration/connectors/sql/test_postgres.py +2 -2
- test/integration/connectors/sql/test_singlestore.py +2 -2
- test/integration/connectors/sql/test_snowflake.py +2 -2
- test/integration/connectors/sql/test_sqlite.py +2 -2
- test/integration/connectors/sql/test_vastdb.py +1 -1
- test/integration/connectors/test_astradb.py +2 -2
- test/integration/connectors/test_azure_ai_search.py +2 -2
- test/integration/connectors/test_chroma.py +2 -2
- test/integration/connectors/test_confluence.py +1 -1
- test/integration/connectors/test_delta_table.py +2 -2
- test/integration/connectors/test_dropbox.py +2 -2
- test/integration/connectors/test_github.py +49 -0
- test/integration/connectors/test_google_drive.py +2 -2
- test/integration/connectors/test_jira.py +1 -1
- test/integration/connectors/test_lancedb.py +7 -7
- test/integration/connectors/test_milvus.py +2 -2
- test/integration/connectors/test_mongodb.py +2 -2
- test/integration/connectors/test_neo4j.py +7 -7
- test/integration/connectors/test_notion.py +2 -2
- test/integration/connectors/test_onedrive.py +2 -2
- test/integration/connectors/test_pinecone.py +3 -3
- test/integration/connectors/test_qdrant.py +6 -6
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +3 -3
- test/integration/connectors/test_sharepoint.py +1 -1
- test/integration/connectors/test_vectara.py +4 -4
- test/integration/connectors/test_zendesk.py +2 -2
- test/integration/connectors/utils/validation/destination.py +2 -2
- test/integration/connectors/utils/validation/source.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +1 -1
- test/integration/connectors/weaviate/test_local.py +2 -2
- test/integration/embedders/test_azure_openai.py +1 -1
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -1
- test/integration/embedders/test_mixedbread.py +1 -1
- test/integration/embedders/test_octoai.py +2 -2
- test/integration/embedders/test_openai.py +2 -2
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +1 -1
- test/integration/embedders/test_voyageai.py +1 -1
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
- test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
- test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
- test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
- test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
- test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
- test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
- test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
- test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
- test/unit/test_html.py +1 -1
- test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
- test/unit/test_utils.py +106 -97
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/__init__.py +0 -14
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +259 -9
- unstructured_ingest/cli/base/dest.py +58 -61
- unstructured_ingest/cli/base/src.py +54 -36
- unstructured_ingest/cli/cli.py +4 -17
- unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
- unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
- unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
- unstructured_ingest/embed/bedrock.py +3 -3
- unstructured_ingest/embed/octoai.py +3 -3
- unstructured_ingest/embed/openai.py +3 -3
- unstructured_ingest/embed/togetherai.py +4 -4
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +4 -4
- unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
- unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
- unstructured_ingest/{v2/otel.py → otel.py} +1 -1
- unstructured_ingest/pipeline/__init__.py +0 -22
- unstructured_ingest/pipeline/interfaces.py +179 -238
- unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
- unstructured_ingest/pipeline/pipeline.py +388 -97
- unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
- unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +14 -11
- unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
- unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +12 -11
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/processes/connectors/github.py +221 -0
- unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
- unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
- unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
- unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +11 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
- unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
- unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
- unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
- unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
- unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
- unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
- unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
- unstructured_ingest/utils/compression.py +1 -48
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/utils/html.py +3 -3
- unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
- unstructured_ingest/utils/string_and_date_utils.py +1 -1
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/METADATA +99 -99
- unstructured_ingest-0.7.0.dist-info/RECORD +370 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/top_level.txt +1 -0
- test/unit/v2/test_utils.py +0 -82
- unstructured_ingest/cli/cmd_factory.py +0 -12
- unstructured_ingest/cli/cmds/__init__.py +0 -145
- unstructured_ingest/cli/cmds/airtable.py +0 -69
- unstructured_ingest/cli/cmds/astradb.py +0 -99
- unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
- unstructured_ingest/cli/cmds/biomed.py +0 -52
- unstructured_ingest/cli/cmds/chroma.py +0 -104
- unstructured_ingest/cli/cmds/clarifai.py +0 -71
- unstructured_ingest/cli/cmds/confluence.py +0 -69
- unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
- unstructured_ingest/cli/cmds/delta_table.py +0 -94
- unstructured_ingest/cli/cmds/discord.py +0 -47
- unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
- unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
- unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
- unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
- unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
- unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
- unstructured_ingest/cli/cmds/github.py +0 -54
- unstructured_ingest/cli/cmds/gitlab.py +0 -54
- unstructured_ingest/cli/cmds/google_drive.py +0 -49
- unstructured_ingest/cli/cmds/hubspot.py +0 -70
- unstructured_ingest/cli/cmds/jira.py +0 -71
- unstructured_ingest/cli/cmds/kafka.py +0 -102
- unstructured_ingest/cli/cmds/local.py +0 -43
- unstructured_ingest/cli/cmds/mongodb.py +0 -72
- unstructured_ingest/cli/cmds/notion.py +0 -48
- unstructured_ingest/cli/cmds/onedrive.py +0 -66
- unstructured_ingest/cli/cmds/opensearch.py +0 -117
- unstructured_ingest/cli/cmds/outlook.py +0 -67
- unstructured_ingest/cli/cmds/pinecone.py +0 -71
- unstructured_ingest/cli/cmds/qdrant.py +0 -124
- unstructured_ingest/cli/cmds/reddit.py +0 -67
- unstructured_ingest/cli/cmds/salesforce.py +0 -58
- unstructured_ingest/cli/cmds/sharepoint.py +0 -66
- unstructured_ingest/cli/cmds/slack.py +0 -56
- unstructured_ingest/cli/cmds/sql.py +0 -66
- unstructured_ingest/cli/cmds/vectara.py +0 -66
- unstructured_ingest/cli/cmds/weaviate.py +0 -98
- unstructured_ingest/cli/cmds/wikipedia.py +0 -40
- unstructured_ingest/cli/common.py +0 -7
- unstructured_ingest/cli/interfaces.py +0 -663
- unstructured_ingest/cli/utils.py +0 -205
- unstructured_ingest/connector/airtable.py +0 -309
- unstructured_ingest/connector/astradb.py +0 -267
- unstructured_ingest/connector/azure_ai_search.py +0 -144
- unstructured_ingest/connector/biomed.py +0 -320
- unstructured_ingest/connector/chroma.py +0 -158
- unstructured_ingest/connector/clarifai.py +0 -122
- unstructured_ingest/connector/confluence.py +0 -285
- unstructured_ingest/connector/databricks_volumes.py +0 -137
- unstructured_ingest/connector/delta_table.py +0 -203
- unstructured_ingest/connector/discord.py +0 -180
- unstructured_ingest/connector/elasticsearch.py +0 -396
- unstructured_ingest/connector/fsspec/azure.py +0 -78
- unstructured_ingest/connector/fsspec/box.py +0 -109
- unstructured_ingest/connector/fsspec/dropbox.py +0 -160
- unstructured_ingest/connector/fsspec/fsspec.py +0 -359
- unstructured_ingest/connector/fsspec/gcs.py +0 -82
- unstructured_ingest/connector/fsspec/s3.py +0 -62
- unstructured_ingest/connector/fsspec/sftp.py +0 -81
- unstructured_ingest/connector/git.py +0 -124
- unstructured_ingest/connector/github.py +0 -174
- unstructured_ingest/connector/gitlab.py +0 -142
- unstructured_ingest/connector/google_drive.py +0 -348
- unstructured_ingest/connector/hubspot.py +0 -278
- unstructured_ingest/connector/jira.py +0 -469
- unstructured_ingest/connector/kafka.py +0 -293
- unstructured_ingest/connector/local.py +0 -139
- unstructured_ingest/connector/mongodb.py +0 -284
- unstructured_ingest/connector/notion/client.py +0 -248
- unstructured_ingest/connector/notion/connector.py +0 -469
- unstructured_ingest/connector/notion/helpers.py +0 -584
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
- unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
- unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
- unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
- unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
- unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
- unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
- unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
- unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
- unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
- unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
- unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
- unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
- unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
- unstructured_ingest/connector/notion/types/date.py +0 -26
- unstructured_ingest/connector/notion/types/file.py +0 -51
- unstructured_ingest/connector/notion/types/user.py +0 -76
- unstructured_ingest/connector/onedrive.py +0 -232
- unstructured_ingest/connector/opensearch.py +0 -218
- unstructured_ingest/connector/outlook.py +0 -285
- unstructured_ingest/connector/pinecone.py +0 -150
- unstructured_ingest/connector/qdrant.py +0 -144
- unstructured_ingest/connector/reddit.py +0 -166
- unstructured_ingest/connector/registry.py +0 -109
- unstructured_ingest/connector/salesforce.py +0 -301
- unstructured_ingest/connector/sharepoint.py +0 -573
- unstructured_ingest/connector/slack.py +0 -224
- unstructured_ingest/connector/sql.py +0 -199
- unstructured_ingest/connector/vectara.py +0 -253
- unstructured_ingest/connector/weaviate.py +0 -190
- unstructured_ingest/connector/wikipedia.py +0 -208
- unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
- unstructured_ingest/enhanced_dataclass/core.py +0 -99
- unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
- unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
- unstructured_ingest/interfaces.py +0 -852
- unstructured_ingest/pipeline/copy.py +0 -19
- unstructured_ingest/pipeline/doc_factory.py +0 -12
- unstructured_ingest/pipeline/partition.py +0 -60
- unstructured_ingest/pipeline/permissions.py +0 -12
- unstructured_ingest/pipeline/reformat/chunking.py +0 -134
- unstructured_ingest/pipeline/reformat/embedding.py +0 -64
- unstructured_ingest/pipeline/source.py +0 -77
- unstructured_ingest/pipeline/utils.py +0 -6
- unstructured_ingest/pipeline/write.py +0 -18
- unstructured_ingest/processor.py +0 -93
- unstructured_ingest/runner/__init__.py +0 -104
- unstructured_ingest/runner/airtable.py +0 -35
- unstructured_ingest/runner/astradb.py +0 -34
- unstructured_ingest/runner/base_runner.py +0 -89
- unstructured_ingest/runner/biomed.py +0 -45
- unstructured_ingest/runner/confluence.py +0 -35
- unstructured_ingest/runner/delta_table.py +0 -34
- unstructured_ingest/runner/discord.py +0 -35
- unstructured_ingest/runner/elasticsearch.py +0 -40
- unstructured_ingest/runner/fsspec/azure.py +0 -30
- unstructured_ingest/runner/fsspec/box.py +0 -28
- unstructured_ingest/runner/fsspec/dropbox.py +0 -30
- unstructured_ingest/runner/fsspec/fsspec.py +0 -40
- unstructured_ingest/runner/fsspec/gcs.py +0 -28
- unstructured_ingest/runner/fsspec/s3.py +0 -28
- unstructured_ingest/runner/fsspec/sftp.py +0 -28
- unstructured_ingest/runner/github.py +0 -37
- unstructured_ingest/runner/gitlab.py +0 -37
- unstructured_ingest/runner/google_drive.py +0 -35
- unstructured_ingest/runner/hubspot.py +0 -35
- unstructured_ingest/runner/jira.py +0 -35
- unstructured_ingest/runner/kafka.py +0 -34
- unstructured_ingest/runner/local.py +0 -23
- unstructured_ingest/runner/mongodb.py +0 -34
- unstructured_ingest/runner/notion.py +0 -61
- unstructured_ingest/runner/onedrive.py +0 -35
- unstructured_ingest/runner/opensearch.py +0 -40
- unstructured_ingest/runner/outlook.py +0 -33
- unstructured_ingest/runner/reddit.py +0 -35
- unstructured_ingest/runner/salesforce.py +0 -33
- unstructured_ingest/runner/sharepoint.py +0 -35
- unstructured_ingest/runner/slack.py +0 -33
- unstructured_ingest/runner/utils.py +0 -47
- unstructured_ingest/runner/wikipedia.py +0 -35
- unstructured_ingest/runner/writers/__init__.py +0 -48
- unstructured_ingest/runner/writers/astradb.py +0 -22
- unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
- unstructured_ingest/runner/writers/base_writer.py +0 -26
- unstructured_ingest/runner/writers/chroma.py +0 -22
- unstructured_ingest/runner/writers/clarifai.py +0 -19
- unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
- unstructured_ingest/runner/writers/delta_table.py +0 -24
- unstructured_ingest/runner/writers/elasticsearch.py +0 -24
- unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
- unstructured_ingest/runner/writers/fsspec/box.py +0 -21
- unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
- unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
- unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
- unstructured_ingest/runner/writers/kafka.py +0 -21
- unstructured_ingest/runner/writers/mongodb.py +0 -21
- unstructured_ingest/runner/writers/opensearch.py +0 -26
- unstructured_ingest/runner/writers/pinecone.py +0 -21
- unstructured_ingest/runner/writers/qdrant.py +0 -19
- unstructured_ingest/runner/writers/sql.py +0 -22
- unstructured_ingest/runner/writers/vectara.py +0 -22
- unstructured_ingest/runner/writers/weaviate.py +0 -21
- unstructured_ingest/utils/google_filetype.py +0 -9
- unstructured_ingest/v2/__init__.py +0 -1
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +0 -4
- unstructured_ingest/v2/cli/base/cmd.py +0 -269
- unstructured_ingest/v2/cli/base/dest.py +0 -85
- unstructured_ingest/v2/cli/base/src.py +0 -85
- unstructured_ingest/v2/cli/cli.py +0 -24
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/logger.py +0 -126
- unstructured_ingest/v2/main.py +0 -11
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +0 -211
- unstructured_ingest/v2/pipeline/pipeline.py +0 -408
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/v2/processes/utils/__init__.py +0 -0
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest-0.6.2.dist-info/RECORD +0 -589
- {test/unit/v2 → examples}/__init__.py +0 -0
- /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
- /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/data_generator.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
- /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
- /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
- /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
- /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
- /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
- /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
- /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
- /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
- /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
- /unstructured_ingest/{v2 → utils}/constants.py +0 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.kafka import SimpleKafkaConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class KafkaRunner(Runner):
|
|
16
|
-
connector_config: "SimpleKafkaConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
str(self.connector_config.bootstrap_server).encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
23
|
-
connector_name="kafka",
|
|
24
|
-
read_config=self.read_config,
|
|
25
|
-
hashed_dir_name=hashed_dir_name,
|
|
26
|
-
logger=logger,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
30
|
-
from unstructured_ingest.connector.kafka import (
|
|
31
|
-
KafkaSourceConnector,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
return KafkaSourceConnector
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
5
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
6
|
-
|
|
7
|
-
if t.TYPE_CHECKING:
|
|
8
|
-
from unstructured_ingest.connector.local import SimpleLocalConfig
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@dataclass
|
|
12
|
-
class LocalRunner(Runner):
|
|
13
|
-
connector_config: "SimpleLocalConfig"
|
|
14
|
-
|
|
15
|
-
def update_read_config(self):
|
|
16
|
-
pass
|
|
17
|
-
|
|
18
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
19
|
-
from unstructured_ingest.connector.local import (
|
|
20
|
-
LocalSourceConnector,
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
return LocalSourceConnector
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.mongodb import SimpleMongoDBConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class MongoDBRunner(Runner):
|
|
16
|
-
connector_config: "SimpleMongoDBConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
str(self.connector_config.access_config.uri).encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
23
|
-
connector_name="mongodb",
|
|
24
|
-
read_config=self.read_config,
|
|
25
|
-
hashed_dir_name=hashed_dir_name,
|
|
26
|
-
logger=logger,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
30
|
-
from unstructured_ingest.connector.mongodb import (
|
|
31
|
-
MongoDBSourceConnector,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
return MongoDBSourceConnector
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.notion.connector import SimpleNotionConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class NotionRunner(Runner):
|
|
16
|
-
connector_config: "SimpleNotionConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
if not self.connector_config.page_ids and not self.connector_config.database_ids:
|
|
20
|
-
raise ValueError("no page ids nor database ids provided")
|
|
21
|
-
|
|
22
|
-
if self.connector_config.page_ids and self.connector_config.database_ids:
|
|
23
|
-
hashed_dir_name = hashlib.sha256(
|
|
24
|
-
"{},{}".format(
|
|
25
|
-
",".join(self.connector_config.page_ids),
|
|
26
|
-
",".join(self.connector_config.database_ids),
|
|
27
|
-
).encode("utf-8"),
|
|
28
|
-
)
|
|
29
|
-
elif self.connector_config.page_ids:
|
|
30
|
-
hashed_dir_name = hashlib.sha256(
|
|
31
|
-
",".join(self.connector_config.page_ids).encode("utf-8"),
|
|
32
|
-
)
|
|
33
|
-
elif self.connector_config.database_ids:
|
|
34
|
-
hashed_dir_name = hashlib.sha256(
|
|
35
|
-
",".join(self.connector_config.database_ids).encode("utf-8"),
|
|
36
|
-
)
|
|
37
|
-
else:
|
|
38
|
-
raise ValueError("could not create local cache directory name")
|
|
39
|
-
|
|
40
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
41
|
-
connector_name="notion",
|
|
42
|
-
read_config=self.read_config,
|
|
43
|
-
hashed_dir_name=hashed_dir_name,
|
|
44
|
-
logger=logger,
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
48
|
-
from unstructured_ingest.connector.notion.connector import (
|
|
49
|
-
NotionSourceConnector,
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
return NotionSourceConnector
|
|
53
|
-
|
|
54
|
-
def get_source_connector(self) -> BaseSourceConnector:
|
|
55
|
-
source_connector_cls = self.get_source_connector_cls()
|
|
56
|
-
return source_connector_cls(
|
|
57
|
-
processor_config=self.processor_config,
|
|
58
|
-
connector_config=self.connector_config,
|
|
59
|
-
read_config=self.read_config,
|
|
60
|
-
retry_strategy_config=self.retry_strategy_config,
|
|
61
|
-
)
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.onedrive import SimpleOneDriveConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class OneDriveRunner(Runner):
|
|
16
|
-
connector_config: "SimpleOneDriveConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
f"{self.connector_config.tenant}_{self.connector_config.user_pname}".encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
24
|
-
connector_name="onedrive",
|
|
25
|
-
read_config=self.read_config,
|
|
26
|
-
hashed_dir_name=hashed_dir_name,
|
|
27
|
-
logger=logger,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
31
|
-
from unstructured_ingest.connector.onedrive import (
|
|
32
|
-
OneDriveSourceConnector,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
return OneDriveSourceConnector
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.opensearch import SimpleOpenSearchConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class OpenSearchRunner(Runner):
|
|
16
|
-
connector_config: "SimpleOpenSearchConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
"{}_{}".format(
|
|
21
|
-
",".join(self.connector_config.access_config.hosts),
|
|
22
|
-
self.connector_config.index_name,
|
|
23
|
-
).encode(
|
|
24
|
-
"utf-8",
|
|
25
|
-
),
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
29
|
-
connector_name="opensearch",
|
|
30
|
-
read_config=self.read_config,
|
|
31
|
-
hashed_dir_name=hashed_dir_name,
|
|
32
|
-
logger=logger,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
36
|
-
from unstructured_ingest.connector.opensearch import (
|
|
37
|
-
OpenSearchSourceConnector,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
return OpenSearchSourceConnector
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.outlook import SimpleOutlookConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class OutlookRunner(Runner):
|
|
16
|
-
connector_config: "SimpleOutlookConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(self.connector_config.user_email.encode("utf-8"))
|
|
20
|
-
|
|
21
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
22
|
-
connector_name="outlook",
|
|
23
|
-
read_config=self.read_config,
|
|
24
|
-
hashed_dir_name=hashed_dir_name,
|
|
25
|
-
logger=logger,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
29
|
-
from unstructured_ingest.connector.outlook import (
|
|
30
|
-
OutlookSourceConnector,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
return OutlookSourceConnector
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.reddit import SimpleRedditConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class RedditRunner(Runner):
|
|
16
|
-
connector_config: "SimpleRedditConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
self.connector_config.subreddit_name.encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
24
|
-
connector_name="reddit",
|
|
25
|
-
read_config=self.read_config,
|
|
26
|
-
hashed_dir_name=hashed_dir_name,
|
|
27
|
-
logger=logger,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
31
|
-
from unstructured_ingest.connector.reddit import (
|
|
32
|
-
RedditSourceConnector,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
return RedditSourceConnector
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.salesforce import SimpleSalesforceConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class SalesforceRunner(Runner):
|
|
16
|
-
connector_config: "SimpleSalesforceConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(self.connector_config.username.encode("utf-8"))
|
|
20
|
-
|
|
21
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
22
|
-
connector_name="salesforce",
|
|
23
|
-
read_config=self.read_config,
|
|
24
|
-
hashed_dir_name=hashed_dir_name,
|
|
25
|
-
logger=logger,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
29
|
-
from unstructured_ingest.connector.salesforce import (
|
|
30
|
-
SalesforceSourceConnector,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
return SalesforceSourceConnector
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.sharepoint import SimpleSharepointConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class SharePointRunner(Runner):
|
|
16
|
-
connector_config: "SimpleSharepointConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
f"{self.connector_config.site}_{self.connector_config.path}".encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
24
|
-
connector_name="sharepoint",
|
|
25
|
-
read_config=self.read_config,
|
|
26
|
-
hashed_dir_name=hashed_dir_name,
|
|
27
|
-
logger=logger,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
31
|
-
from unstructured_ingest.connector.sharepoint import (
|
|
32
|
-
SharepointSourceConnector,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
return SharepointSourceConnector
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
7
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
8
|
-
|
|
9
|
-
if t.TYPE_CHECKING:
|
|
10
|
-
from unstructured_ingest.connector.slack import SimpleSlackConfig
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class SlackRunner(Runner):
|
|
14
|
-
connector_config: "SimpleSlackConfig"
|
|
15
|
-
|
|
16
|
-
def update_read_config(self):
|
|
17
|
-
hashed_dir_name = hashlib.sha256(
|
|
18
|
-
",".join(self.connector_config.channels).encode("utf-8"),
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
22
|
-
connector_name="slack",
|
|
23
|
-
read_config=self.read_config,
|
|
24
|
-
hashed_dir_name=hashed_dir_name,
|
|
25
|
-
logger=logger,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
29
|
-
from unstructured_ingest.connector.slack import (
|
|
30
|
-
SlackSourceConnector,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
return SlackSourceConnector
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import hashlib
|
|
4
|
-
import logging
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from unstructured_ingest.interfaces import (
|
|
8
|
-
ReadConfig,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def update_download_dir_remote_url(
|
|
13
|
-
connector_name: str,
|
|
14
|
-
read_config: ReadConfig,
|
|
15
|
-
remote_url: str,
|
|
16
|
-
logger: logging.Logger,
|
|
17
|
-
) -> str:
|
|
18
|
-
hashed_dir_name = hashlib.sha256(remote_url.encode("utf-8"))
|
|
19
|
-
return update_download_dir_hash(
|
|
20
|
-
connector_name=connector_name,
|
|
21
|
-
read_config=read_config,
|
|
22
|
-
hashed_dir_name=hashed_dir_name,
|
|
23
|
-
logger=logger,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def update_download_dir_hash(
|
|
28
|
-
connector_name: str,
|
|
29
|
-
read_config: ReadConfig,
|
|
30
|
-
hashed_dir_name: hashlib._Hash,
|
|
31
|
-
logger: logging.Logger,
|
|
32
|
-
) -> str:
|
|
33
|
-
if not read_config.download_dir:
|
|
34
|
-
cache_path = Path.home() / ".cache" / "unstructured" / "ingest"
|
|
35
|
-
if not cache_path.exists():
|
|
36
|
-
cache_path.mkdir(parents=True, exist_ok=True)
|
|
37
|
-
download_dir = cache_path / connector_name / hashed_dir_name.hexdigest()[:10]
|
|
38
|
-
if read_config.preserve_downloads:
|
|
39
|
-
logger.warning(
|
|
40
|
-
f"Preserving downloaded files but download_dir is not specified,"
|
|
41
|
-
f" using {download_dir}",
|
|
42
|
-
)
|
|
43
|
-
new_download_dir = str(download_dir)
|
|
44
|
-
logger.debug(f"updating download directory to: {new_download_dir}")
|
|
45
|
-
else:
|
|
46
|
-
new_download_dir = read_config.download_dir
|
|
47
|
-
return new_download_dir
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import typing as t
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import BaseSourceConnector
|
|
6
|
-
from unstructured_ingest.logger import logger
|
|
7
|
-
from unstructured_ingest.runner.base_runner import Runner
|
|
8
|
-
from unstructured_ingest.runner.utils import update_download_dir_hash
|
|
9
|
-
|
|
10
|
-
if t.TYPE_CHECKING:
|
|
11
|
-
from unstructured_ingest.connector.wikipedia import SimpleWikipediaConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class WikipediaRunner(Runner):
|
|
16
|
-
connector_config: "SimpleWikipediaConfig"
|
|
17
|
-
|
|
18
|
-
def update_read_config(self):
|
|
19
|
-
hashed_dir_name = hashlib.sha256(
|
|
20
|
-
self.connector_config.page_title.encode("utf-8"),
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
self.read_config.download_dir = update_download_dir_hash(
|
|
24
|
-
connector_name="wikipedia",
|
|
25
|
-
read_config=self.read_config,
|
|
26
|
-
hashed_dir_name=hashed_dir_name,
|
|
27
|
-
logger=logger,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
|
|
31
|
-
from unstructured_ingest.connector.wikipedia import (
|
|
32
|
-
WikipediaSourceConnector,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
return WikipediaSourceConnector
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
|
|
3
|
-
from .astradb import AstraDBWriter
|
|
4
|
-
from .azure_ai_search import AzureAiSearchWriter
|
|
5
|
-
from .base_writer import Writer
|
|
6
|
-
from .chroma import ChromaWriter
|
|
7
|
-
from .clarifai import ClarifaiWriter
|
|
8
|
-
from .databricks_volumes import DatabricksVolumesWriter
|
|
9
|
-
from .delta_table import DeltaTableWriter
|
|
10
|
-
from .elasticsearch import ElasticsearchWriter
|
|
11
|
-
from .fsspec.azure import AzureWriter
|
|
12
|
-
from .fsspec.box import BoxWriter
|
|
13
|
-
from .fsspec.dropbox import DropboxWriter
|
|
14
|
-
from .fsspec.gcs import GcsWriter
|
|
15
|
-
from .fsspec.s3 import S3Writer
|
|
16
|
-
from .kafka import KafkaWriter
|
|
17
|
-
from .mongodb import MongodbWriter
|
|
18
|
-
from .opensearch import OpenSearchWriter
|
|
19
|
-
from .pinecone import PineconeWriter
|
|
20
|
-
from .qdrant import QdrantWriter
|
|
21
|
-
from .sql import SqlWriter
|
|
22
|
-
from .vectara import VectaraWriter
|
|
23
|
-
from .weaviate import WeaviateWriter
|
|
24
|
-
|
|
25
|
-
writer_map: t.Dict[str, t.Type[Writer]] = {
|
|
26
|
-
"astradb": AstraDBWriter,
|
|
27
|
-
"azure": AzureWriter,
|
|
28
|
-
"azure_ai_search": AzureAiSearchWriter,
|
|
29
|
-
"box": BoxWriter,
|
|
30
|
-
"chroma": ChromaWriter,
|
|
31
|
-
"clarifai": ClarifaiWriter,
|
|
32
|
-
"databricks_volumes": DatabricksVolumesWriter,
|
|
33
|
-
"delta_table": DeltaTableWriter,
|
|
34
|
-
"dropbox": DropboxWriter,
|
|
35
|
-
"elasticsearch": ElasticsearchWriter,
|
|
36
|
-
"gcs": GcsWriter,
|
|
37
|
-
"kafka": KafkaWriter,
|
|
38
|
-
"mongodb": MongodbWriter,
|
|
39
|
-
"opensearch": OpenSearchWriter,
|
|
40
|
-
"pinecone": PineconeWriter,
|
|
41
|
-
"qdrant": QdrantWriter,
|
|
42
|
-
"s3": S3Writer,
|
|
43
|
-
"sql": SqlWriter,
|
|
44
|
-
"vectara": VectaraWriter,
|
|
45
|
-
"weaviate": WeaviateWriter,
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
__all__ = ["writer_map"]
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
|
|
5
|
-
from unstructured_ingest.interfaces import BaseDestinationConnector
|
|
6
|
-
from unstructured_ingest.runner.writers.base_writer import Writer
|
|
7
|
-
|
|
8
|
-
if t.TYPE_CHECKING:
|
|
9
|
-
from unstructured_ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@dataclass
|
|
13
|
-
class AstraDBWriter(Writer, EnhancedDataClassJsonMixin):
|
|
14
|
-
write_config: "AstraDBWriteConfig"
|
|
15
|
-
connector_config: "SimpleAstraDBConfig"
|
|
16
|
-
|
|
17
|
-
def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
|
|
18
|
-
from unstructured_ingest.connector.astradb import (
|
|
19
|
-
AstraDBDestinationConnector,
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
return AstraDBDestinationConnector
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseDestinationConnector
|
|
5
|
-
from unstructured_ingest.runner.writers.base_writer import Writer
|
|
6
|
-
|
|
7
|
-
if t.TYPE_CHECKING:
|
|
8
|
-
from unstructured_ingest.connector.azure_ai_search import (
|
|
9
|
-
AzureAISearchWriteConfig,
|
|
10
|
-
SimpleAzureAISearchStorageConfig,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class AzureAiSearchWriter(Writer):
|
|
16
|
-
connector_config: "SimpleAzureAISearchStorageConfig"
|
|
17
|
-
write_config: "AzureAISearchWriteConfig"
|
|
18
|
-
|
|
19
|
-
def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
|
|
20
|
-
from unstructured_ingest.connector.azure_ai_search import (
|
|
21
|
-
AzureAISearchDestinationConnector,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
return AzureAISearchDestinationConnector
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from abc import ABC, abstractmethod
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.interfaces import (
|
|
6
|
-
BaseConnectorConfig,
|
|
7
|
-
BaseDestinationConnector,
|
|
8
|
-
WriteConfig,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@dataclass
|
|
13
|
-
class Writer(ABC):
|
|
14
|
-
connector_config: BaseConnectorConfig
|
|
15
|
-
write_config: WriteConfig
|
|
16
|
-
|
|
17
|
-
@abstractmethod
|
|
18
|
-
def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
|
|
19
|
-
pass
|
|
20
|
-
|
|
21
|
-
def get_connector(self, **kwargs) -> BaseDestinationConnector:
|
|
22
|
-
connector_cls = self.get_connector_cls()
|
|
23
|
-
return connector_cls(
|
|
24
|
-
write_config=self.write_config,
|
|
25
|
-
connector_config=self.connector_config,
|
|
26
|
-
)
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
|
|
5
|
-
from unstructured_ingest.interfaces import BaseDestinationConnector
|
|
6
|
-
from unstructured_ingest.runner.writers.base_writer import Writer
|
|
7
|
-
|
|
8
|
-
if t.TYPE_CHECKING:
|
|
9
|
-
from unstructured_ingest.connector.chroma import ChromaWriteConfig, SimpleChromaConfig
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@dataclass
|
|
13
|
-
class ChromaWriter(Writer, EnhancedDataClassJsonMixin):
|
|
14
|
-
write_config: "ChromaWriteConfig"
|
|
15
|
-
connector_config: "SimpleChromaConfig"
|
|
16
|
-
|
|
17
|
-
def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
|
|
18
|
-
from unstructured_ingest.connector.chroma import (
|
|
19
|
-
ChromaDestinationConnector,
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
return ChromaDestinationConnector
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseDestinationConnector
|
|
5
|
-
from unstructured_ingest.runner.writers.base_writer import Writer
|
|
6
|
-
|
|
7
|
-
if t.TYPE_CHECKING:
|
|
8
|
-
from unstructured_ingest.connector.clarifai import ClarifaiWriteConfig, SimpleClarifaiConfig
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@dataclass
|
|
12
|
-
class ClarifaiWriter(Writer):
|
|
13
|
-
write_config: "ClarifaiWriteConfig"
|
|
14
|
-
connector_config: "SimpleClarifaiConfig"
|
|
15
|
-
|
|
16
|
-
def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
|
|
17
|
-
from unstructured_ingest.connector.clarifai import ClarifaiDestinationConnector
|
|
18
|
-
|
|
19
|
-
return ClarifaiDestinationConnector
|