unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- examples/airtable.py +44 -0
- examples/azure_cognitive_search.py +55 -0
- examples/chroma.py +54 -0
- examples/couchbase.py +55 -0
- examples/databricks_volumes_dest.py +55 -0
- examples/databricks_volumes_source.py +53 -0
- examples/delta_table.py +45 -0
- examples/discord_example.py +36 -0
- examples/elasticsearch.py +49 -0
- examples/google_drive.py +45 -0
- examples/kdbai.py +54 -0
- examples/local.py +36 -0
- examples/milvus.py +44 -0
- examples/mongodb.py +53 -0
- examples/opensearch.py +50 -0
- examples/pinecone.py +57 -0
- examples/s3.py +38 -0
- examples/salesforce.py +44 -0
- examples/sharepoint.py +47 -0
- examples/singlestore.py +49 -0
- examples/sql.py +90 -0
- examples/vectara.py +54 -0
- examples/weaviate.py +44 -0
- test/integration/chunkers/test_chunkers.py +1 -1
- test/integration/connectors/conftest.py +1 -1
- test/integration/connectors/databricks/test_volumes_native.py +3 -3
- test/integration/connectors/discord/test_discord.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +2 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
- test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
- test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
- test/integration/connectors/sql/test_postgres.py +2 -2
- test/integration/connectors/sql/test_singlestore.py +2 -2
- test/integration/connectors/sql/test_snowflake.py +2 -2
- test/integration/connectors/sql/test_sqlite.py +2 -2
- test/integration/connectors/sql/test_vastdb.py +1 -1
- test/integration/connectors/test_astradb.py +2 -2
- test/integration/connectors/test_azure_ai_search.py +2 -2
- test/integration/connectors/test_chroma.py +2 -2
- test/integration/connectors/test_confluence.py +1 -1
- test/integration/connectors/test_delta_table.py +2 -2
- test/integration/connectors/test_dropbox.py +2 -2
- test/integration/connectors/test_github.py +1 -1
- test/integration/connectors/test_google_drive.py +2 -2
- test/integration/connectors/test_jira.py +1 -1
- test/integration/connectors/test_lancedb.py +7 -7
- test/integration/connectors/test_milvus.py +2 -2
- test/integration/connectors/test_mongodb.py +2 -2
- test/integration/connectors/test_neo4j.py +7 -7
- test/integration/connectors/test_notion.py +2 -2
- test/integration/connectors/test_onedrive.py +2 -2
- test/integration/connectors/test_pinecone.py +3 -3
- test/integration/connectors/test_qdrant.py +6 -6
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +3 -3
- test/integration/connectors/test_sharepoint.py +1 -1
- test/integration/connectors/test_vectara.py +4 -4
- test/integration/connectors/test_zendesk.py +2 -2
- test/integration/connectors/utils/validation/destination.py +2 -2
- test/integration/connectors/utils/validation/source.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +1 -1
- test/integration/connectors/weaviate/test_local.py +2 -2
- test/integration/embedders/test_azure_openai.py +1 -1
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -1
- test/integration/embedders/test_mixedbread.py +1 -1
- test/integration/embedders/test_octoai.py +2 -2
- test/integration/embedders/test_openai.py +2 -2
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +1 -1
- test/integration/embedders/test_voyageai.py +1 -1
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
- test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
- test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
- test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
- test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
- test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
- test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
- test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
- test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
- test/unit/test_html.py +1 -1
- test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
- test/unit/test_utils.py +106 -97
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/__init__.py +0 -14
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +259 -9
- unstructured_ingest/cli/base/dest.py +58 -61
- unstructured_ingest/cli/base/src.py +54 -36
- unstructured_ingest/cli/cli.py +4 -17
- unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
- unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
- unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
- unstructured_ingest/embed/bedrock.py +3 -3
- unstructured_ingest/embed/octoai.py +3 -3
- unstructured_ingest/embed/openai.py +3 -3
- unstructured_ingest/embed/togetherai.py +4 -4
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +4 -4
- unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
- unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
- unstructured_ingest/{v2/otel.py → otel.py} +1 -1
- unstructured_ingest/pipeline/__init__.py +0 -22
- unstructured_ingest/pipeline/interfaces.py +179 -238
- unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
- unstructured_ingest/pipeline/pipeline.py +388 -97
- unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
- unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
- unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
- unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
- unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
- unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +55 -27
- unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
- unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
- unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
- unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
- unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
- unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
- unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
- unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
- unstructured_ingest/utils/compression.py +1 -48
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/utils/html.py +3 -3
- unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
- unstructured_ingest/utils/string_and_date_utils.py +1 -1
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/METADATA +98 -97
- unstructured_ingest-0.7.1.dist-info/RECORD +370 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/top_level.txt +1 -0
- test/unit/v2/test_utils.py +0 -82
- unstructured_ingest/cli/cmd_factory.py +0 -12
- unstructured_ingest/cli/cmds/__init__.py +0 -145
- unstructured_ingest/cli/cmds/airtable.py +0 -69
- unstructured_ingest/cli/cmds/astradb.py +0 -99
- unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
- unstructured_ingest/cli/cmds/biomed.py +0 -52
- unstructured_ingest/cli/cmds/chroma.py +0 -104
- unstructured_ingest/cli/cmds/clarifai.py +0 -71
- unstructured_ingest/cli/cmds/confluence.py +0 -69
- unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
- unstructured_ingest/cli/cmds/delta_table.py +0 -94
- unstructured_ingest/cli/cmds/discord.py +0 -47
- unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
- unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
- unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
- unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
- unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
- unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
- unstructured_ingest/cli/cmds/github.py +0 -54
- unstructured_ingest/cli/cmds/gitlab.py +0 -54
- unstructured_ingest/cli/cmds/google_drive.py +0 -49
- unstructured_ingest/cli/cmds/hubspot.py +0 -70
- unstructured_ingest/cli/cmds/jira.py +0 -71
- unstructured_ingest/cli/cmds/kafka.py +0 -102
- unstructured_ingest/cli/cmds/local.py +0 -43
- unstructured_ingest/cli/cmds/mongodb.py +0 -72
- unstructured_ingest/cli/cmds/notion.py +0 -48
- unstructured_ingest/cli/cmds/onedrive.py +0 -66
- unstructured_ingest/cli/cmds/opensearch.py +0 -117
- unstructured_ingest/cli/cmds/outlook.py +0 -67
- unstructured_ingest/cli/cmds/pinecone.py +0 -71
- unstructured_ingest/cli/cmds/qdrant.py +0 -124
- unstructured_ingest/cli/cmds/reddit.py +0 -67
- unstructured_ingest/cli/cmds/salesforce.py +0 -58
- unstructured_ingest/cli/cmds/sharepoint.py +0 -66
- unstructured_ingest/cli/cmds/slack.py +0 -56
- unstructured_ingest/cli/cmds/sql.py +0 -66
- unstructured_ingest/cli/cmds/vectara.py +0 -66
- unstructured_ingest/cli/cmds/weaviate.py +0 -98
- unstructured_ingest/cli/cmds/wikipedia.py +0 -40
- unstructured_ingest/cli/common.py +0 -7
- unstructured_ingest/cli/interfaces.py +0 -663
- unstructured_ingest/cli/utils.py +0 -205
- unstructured_ingest/connector/airtable.py +0 -309
- unstructured_ingest/connector/astradb.py +0 -267
- unstructured_ingest/connector/azure_ai_search.py +0 -144
- unstructured_ingest/connector/biomed.py +0 -320
- unstructured_ingest/connector/chroma.py +0 -158
- unstructured_ingest/connector/clarifai.py +0 -122
- unstructured_ingest/connector/confluence.py +0 -285
- unstructured_ingest/connector/databricks_volumes.py +0 -137
- unstructured_ingest/connector/delta_table.py +0 -203
- unstructured_ingest/connector/discord.py +0 -180
- unstructured_ingest/connector/elasticsearch.py +0 -396
- unstructured_ingest/connector/fsspec/azure.py +0 -78
- unstructured_ingest/connector/fsspec/box.py +0 -109
- unstructured_ingest/connector/fsspec/dropbox.py +0 -160
- unstructured_ingest/connector/fsspec/fsspec.py +0 -359
- unstructured_ingest/connector/fsspec/gcs.py +0 -82
- unstructured_ingest/connector/fsspec/s3.py +0 -62
- unstructured_ingest/connector/fsspec/sftp.py +0 -81
- unstructured_ingest/connector/git.py +0 -124
- unstructured_ingest/connector/github.py +0 -174
- unstructured_ingest/connector/gitlab.py +0 -142
- unstructured_ingest/connector/google_drive.py +0 -348
- unstructured_ingest/connector/hubspot.py +0 -278
- unstructured_ingest/connector/jira.py +0 -469
- unstructured_ingest/connector/kafka.py +0 -293
- unstructured_ingest/connector/local.py +0 -139
- unstructured_ingest/connector/mongodb.py +0 -284
- unstructured_ingest/connector/notion/client.py +0 -248
- unstructured_ingest/connector/notion/connector.py +0 -469
- unstructured_ingest/connector/notion/helpers.py +0 -584
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
- unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
- unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
- unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
- unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
- unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
- unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
- unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
- unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
- unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
- unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
- unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
- unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
- unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
- unstructured_ingest/connector/notion/types/date.py +0 -26
- unstructured_ingest/connector/notion/types/file.py +0 -51
- unstructured_ingest/connector/notion/types/user.py +0 -76
- unstructured_ingest/connector/onedrive.py +0 -232
- unstructured_ingest/connector/opensearch.py +0 -218
- unstructured_ingest/connector/outlook.py +0 -285
- unstructured_ingest/connector/pinecone.py +0 -150
- unstructured_ingest/connector/qdrant.py +0 -144
- unstructured_ingest/connector/reddit.py +0 -166
- unstructured_ingest/connector/registry.py +0 -109
- unstructured_ingest/connector/salesforce.py +0 -301
- unstructured_ingest/connector/sharepoint.py +0 -573
- unstructured_ingest/connector/slack.py +0 -224
- unstructured_ingest/connector/sql.py +0 -199
- unstructured_ingest/connector/vectara.py +0 -253
- unstructured_ingest/connector/weaviate.py +0 -190
- unstructured_ingest/connector/wikipedia.py +0 -208
- unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
- unstructured_ingest/enhanced_dataclass/core.py +0 -99
- unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
- unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
- unstructured_ingest/interfaces.py +0 -852
- unstructured_ingest/pipeline/copy.py +0 -19
- unstructured_ingest/pipeline/doc_factory.py +0 -12
- unstructured_ingest/pipeline/partition.py +0 -60
- unstructured_ingest/pipeline/permissions.py +0 -12
- unstructured_ingest/pipeline/reformat/chunking.py +0 -134
- unstructured_ingest/pipeline/reformat/embedding.py +0 -64
- unstructured_ingest/pipeline/source.py +0 -77
- unstructured_ingest/pipeline/utils.py +0 -6
- unstructured_ingest/pipeline/write.py +0 -18
- unstructured_ingest/processor.py +0 -93
- unstructured_ingest/runner/__init__.py +0 -104
- unstructured_ingest/runner/airtable.py +0 -35
- unstructured_ingest/runner/astradb.py +0 -34
- unstructured_ingest/runner/base_runner.py +0 -89
- unstructured_ingest/runner/biomed.py +0 -45
- unstructured_ingest/runner/confluence.py +0 -35
- unstructured_ingest/runner/delta_table.py +0 -34
- unstructured_ingest/runner/discord.py +0 -35
- unstructured_ingest/runner/elasticsearch.py +0 -40
- unstructured_ingest/runner/fsspec/azure.py +0 -30
- unstructured_ingest/runner/fsspec/box.py +0 -28
- unstructured_ingest/runner/fsspec/dropbox.py +0 -30
- unstructured_ingest/runner/fsspec/fsspec.py +0 -40
- unstructured_ingest/runner/fsspec/gcs.py +0 -28
- unstructured_ingest/runner/fsspec/s3.py +0 -28
- unstructured_ingest/runner/fsspec/sftp.py +0 -28
- unstructured_ingest/runner/github.py +0 -37
- unstructured_ingest/runner/gitlab.py +0 -37
- unstructured_ingest/runner/google_drive.py +0 -35
- unstructured_ingest/runner/hubspot.py +0 -35
- unstructured_ingest/runner/jira.py +0 -35
- unstructured_ingest/runner/kafka.py +0 -34
- unstructured_ingest/runner/local.py +0 -23
- unstructured_ingest/runner/mongodb.py +0 -34
- unstructured_ingest/runner/notion.py +0 -61
- unstructured_ingest/runner/onedrive.py +0 -35
- unstructured_ingest/runner/opensearch.py +0 -40
- unstructured_ingest/runner/outlook.py +0 -33
- unstructured_ingest/runner/reddit.py +0 -35
- unstructured_ingest/runner/salesforce.py +0 -33
- unstructured_ingest/runner/sharepoint.py +0 -35
- unstructured_ingest/runner/slack.py +0 -33
- unstructured_ingest/runner/utils.py +0 -47
- unstructured_ingest/runner/wikipedia.py +0 -35
- unstructured_ingest/runner/writers/__init__.py +0 -48
- unstructured_ingest/runner/writers/astradb.py +0 -22
- unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
- unstructured_ingest/runner/writers/base_writer.py +0 -26
- unstructured_ingest/runner/writers/chroma.py +0 -22
- unstructured_ingest/runner/writers/clarifai.py +0 -19
- unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
- unstructured_ingest/runner/writers/delta_table.py +0 -24
- unstructured_ingest/runner/writers/elasticsearch.py +0 -24
- unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
- unstructured_ingest/runner/writers/fsspec/box.py +0 -21
- unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
- unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
- unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
- unstructured_ingest/runner/writers/kafka.py +0 -21
- unstructured_ingest/runner/writers/mongodb.py +0 -21
- unstructured_ingest/runner/writers/opensearch.py +0 -26
- unstructured_ingest/runner/writers/pinecone.py +0 -21
- unstructured_ingest/runner/writers/qdrant.py +0 -19
- unstructured_ingest/runner/writers/sql.py +0 -22
- unstructured_ingest/runner/writers/vectara.py +0 -22
- unstructured_ingest/runner/writers/weaviate.py +0 -21
- unstructured_ingest/utils/google_filetype.py +0 -9
- unstructured_ingest/v2/__init__.py +0 -1
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +0 -4
- unstructured_ingest/v2/cli/base/cmd.py +0 -269
- unstructured_ingest/v2/cli/base/dest.py +0 -85
- unstructured_ingest/v2/cli/base/src.py +0 -85
- unstructured_ingest/v2/cli/cli.py +0 -24
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/logger.py +0 -126
- unstructured_ingest/v2/main.py +0 -11
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +0 -211
- unstructured_ingest/v2/pipeline/pipeline.py +0 -408
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/v2/processes/utils/__init__.py +0 -0
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
- {test/unit/v2 → examples}/__init__.py +0 -0
- /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
- /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/data_generator.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
- /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
- /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
- /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
- /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
- /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
- /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
- /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
- /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
- /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
- /unstructured_ingest/{v2 → utils}/constants.py +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/entry_points.txt +0 -0
|
@@ -10,13 +10,17 @@ from typing import TYPE_CHECKING, Any, AsyncIterator, Optional
|
|
|
10
10
|
from dateutil import parser
|
|
11
11
|
from pydantic import Field, Secret
|
|
12
12
|
|
|
13
|
+
from unstructured_ingest.data_types.file_data import (
|
|
14
|
+
FileData,
|
|
15
|
+
FileDataSourceMetadata,
|
|
16
|
+
SourceIdentifiers,
|
|
17
|
+
)
|
|
13
18
|
from unstructured_ingest.error import (
|
|
14
19
|
DestinationConnectionError,
|
|
15
20
|
SourceConnectionError,
|
|
16
21
|
SourceConnectionNetworkError,
|
|
17
22
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.v2.interfaces import (
|
|
23
|
+
from unstructured_ingest.interfaces import (
|
|
20
24
|
AccessConfig,
|
|
21
25
|
ConnectionConfig,
|
|
22
26
|
Downloader,
|
|
@@ -27,20 +31,16 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
27
31
|
Uploader,
|
|
28
32
|
UploaderConfig,
|
|
29
33
|
)
|
|
30
|
-
from unstructured_ingest.
|
|
31
|
-
from unstructured_ingest.
|
|
34
|
+
from unstructured_ingest.logger import logger
|
|
35
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
32
36
|
DestinationRegistryEntry,
|
|
33
37
|
SourceRegistryEntry,
|
|
34
38
|
)
|
|
35
|
-
from unstructured_ingest.
|
|
39
|
+
from unstructured_ingest.processes.utils.blob_storage import (
|
|
36
40
|
BlobStoreUploadStager,
|
|
37
41
|
BlobStoreUploadStagerConfig,
|
|
38
42
|
)
|
|
39
|
-
from unstructured_ingest.
|
|
40
|
-
FileData,
|
|
41
|
-
FileDataSourceMetadata,
|
|
42
|
-
SourceIdentifiers,
|
|
43
|
-
)
|
|
43
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
44
44
|
|
|
45
45
|
if TYPE_CHECKING:
|
|
46
46
|
from office365.graph_client import GraphClient
|
|
@@ -53,11 +53,14 @@ MAX_BYTES_SIZE = 512_000_000
|
|
|
53
53
|
|
|
54
54
|
class OnedriveAccessConfig(AccessConfig):
|
|
55
55
|
client_cred: str = Field(description="Microsoft App client secret")
|
|
56
|
+
password: Optional[str] = Field(description="Service account password", default=None)
|
|
56
57
|
|
|
57
58
|
|
|
58
59
|
class OnedriveConnectionConfig(ConnectionConfig):
|
|
59
60
|
client_id: str = Field(description="Microsoft app client ID")
|
|
60
|
-
user_pname: str = Field(
|
|
61
|
+
user_pname: str = Field(
|
|
62
|
+
description="User principal name or service account, usually your Azure AD email."
|
|
63
|
+
)
|
|
61
64
|
tenant: str = Field(
|
|
62
65
|
repr=False, description="ID or domain name associated with your Azure AD instance"
|
|
63
66
|
)
|
|
@@ -74,25 +77,50 @@ class OnedriveConnectionConfig(ConnectionConfig):
|
|
|
74
77
|
drive = client.users[self.user_pname].drive
|
|
75
78
|
return drive
|
|
76
79
|
|
|
77
|
-
@requires_dependencies(["msal"], extras="onedrive")
|
|
80
|
+
@requires_dependencies(["msal", "requests"], extras="onedrive")
|
|
78
81
|
def get_token(self):
|
|
79
82
|
from msal import ConfidentialClientApplication
|
|
83
|
+
from requests import post
|
|
84
|
+
|
|
85
|
+
if self.access_config.get_secret_value().password:
|
|
86
|
+
url = f"https://login.microsoftonline.com/{self.tenant}/oauth2/v2.0/token"
|
|
87
|
+
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
|
88
|
+
data = {
|
|
89
|
+
"grant_type": "password",
|
|
90
|
+
"username": self.user_pname,
|
|
91
|
+
"password": self.access_config.get_secret_value().password,
|
|
92
|
+
"client_id": self.client_id,
|
|
93
|
+
"client_secret": self.access_config.get_secret_value().client_cred,
|
|
94
|
+
"scope": "https://graph.microsoft.com/.default",
|
|
95
|
+
}
|
|
96
|
+
response = post(url, headers=headers, data=data)
|
|
97
|
+
if response.status_code == 200:
|
|
98
|
+
return response.json()
|
|
99
|
+
else:
|
|
100
|
+
raise SourceConnectionError(
|
|
101
|
+
f"Oauth2 authentication failed with {response.status_code}: {response.text}"
|
|
102
|
+
)
|
|
80
103
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
104
|
+
else:
|
|
105
|
+
try:
|
|
106
|
+
app = ConfidentialClientApplication(
|
|
107
|
+
authority=f"{self.authority_url}/{self.tenant}",
|
|
108
|
+
client_id=self.client_id,
|
|
109
|
+
client_credential=self.access_config.get_secret_value().client_cred,
|
|
110
|
+
)
|
|
111
|
+
token = app.acquire_token_for_client(
|
|
112
|
+
scopes=["https://graph.microsoft.com/.default"]
|
|
113
|
+
)
|
|
114
|
+
except ValueError as exc:
|
|
115
|
+
logger.error("Couldn't set up credentials.")
|
|
116
|
+
raise exc
|
|
117
|
+
if "error" in token:
|
|
118
|
+
raise SourceConnectionNetworkError(
|
|
119
|
+
"failed to fetch token, {}: {}".format(
|
|
120
|
+
token["error"], token["error_description"]
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
return token
|
|
96
124
|
|
|
97
125
|
@requires_dependencies(["office365"], extras="onedrive")
|
|
98
126
|
def get_client(self) -> "GraphClient":
|
|
@@ -7,10 +7,13 @@ from typing import TYPE_CHECKING, Any, Coroutine, Generator
|
|
|
7
7
|
|
|
8
8
|
from pydantic import Field, Secret
|
|
9
9
|
|
|
10
|
+
from unstructured_ingest.data_types.file_data import (
|
|
11
|
+
FileData,
|
|
12
|
+
FileDataSourceMetadata,
|
|
13
|
+
SourceIdentifiers,
|
|
14
|
+
)
|
|
10
15
|
from unstructured_ingest.error import SourceConnectionError
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
-
from unstructured_ingest.v2.interfaces import (
|
|
16
|
+
from unstructured_ingest.interfaces import (
|
|
14
17
|
AccessConfig,
|
|
15
18
|
ConnectionConfig,
|
|
16
19
|
Downloader,
|
|
@@ -19,12 +22,9 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
19
22
|
Indexer,
|
|
20
23
|
IndexerConfig,
|
|
21
24
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
|
|
25
|
-
FileDataSourceMetadata,
|
|
26
|
-
SourceIdentifiers,
|
|
27
|
-
)
|
|
25
|
+
from unstructured_ingest.logger import logger
|
|
26
|
+
from unstructured_ingest.processes.connector_registry import SourceRegistryEntry
|
|
27
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
28
28
|
|
|
29
29
|
MAX_EMAILS_PER_FOLDER = 1_000_000 # Maximum number of emails per folder
|
|
30
30
|
|
|
@@ -5,12 +5,10 @@ from typing import TYPE_CHECKING, Any, Literal, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
8
9
|
from unstructured_ingest.error import DestinationConnectionError
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
12
|
-
from unstructured_ingest.v2.errors import UserError
|
|
13
|
-
from unstructured_ingest.v2.interfaces import (
|
|
10
|
+
from unstructured_ingest.errors_v2 import UserError
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
14
12
|
AccessConfig,
|
|
15
13
|
ConnectionConfig,
|
|
16
14
|
UploaderConfig,
|
|
@@ -18,10 +16,15 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
18
16
|
UploadStagerConfig,
|
|
19
17
|
VectorDBUploader,
|
|
20
18
|
)
|
|
21
|
-
from unstructured_ingest.
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
from unstructured_ingest.
|
|
19
|
+
from unstructured_ingest.logger import logger
|
|
20
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
21
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
22
|
+
from unstructured_ingest.utils.data_prep import (
|
|
23
|
+
flatten_dict,
|
|
24
|
+
generator_batching_wbytes,
|
|
25
|
+
get_enhanced_element_id,
|
|
26
|
+
)
|
|
27
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
28
|
|
|
26
29
|
if TYPE_CHECKING:
|
|
27
30
|
from pinecone import Index as PineconeIndex
|
|
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.processes.connectors.qdrant.qdrant import (
|
|
7
7
|
QdrantAccessConfig,
|
|
8
8
|
QdrantConnectionConfig,
|
|
9
9
|
QdrantUploader,
|
|
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.processes.connectors.qdrant.qdrant import (
|
|
7
7
|
QdrantAccessConfig,
|
|
8
8
|
QdrantConnectionConfig,
|
|
9
9
|
QdrantUploader,
|
|
@@ -7,10 +7,9 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
|
|
|
7
7
|
|
|
8
8
|
from pydantic import Field, Secret
|
|
9
9
|
|
|
10
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
10
11
|
from unstructured_ingest.error import DestinationConnectionError, WriteError
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
-
from unstructured_ingest.v2.interfaces import (
|
|
12
|
+
from unstructured_ingest.interfaces import (
|
|
14
13
|
AccessConfig,
|
|
15
14
|
ConnectionConfig,
|
|
16
15
|
Uploader,
|
|
@@ -18,9 +17,13 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
18
17
|
UploadStager,
|
|
19
18
|
UploadStagerConfig,
|
|
20
19
|
)
|
|
21
|
-
from unstructured_ingest.
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
|
|
20
|
+
from unstructured_ingest.logger import logger
|
|
21
|
+
from unstructured_ingest.utils.data_prep import (
|
|
22
|
+
batch_generator,
|
|
23
|
+
flatten_dict,
|
|
24
|
+
get_enhanced_element_id,
|
|
25
|
+
)
|
|
26
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
24
27
|
|
|
25
28
|
if TYPE_CHECKING:
|
|
26
29
|
from qdrant_client import AsyncQdrantClient, QdrantClient
|
|
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.processes.connectors.qdrant.qdrant import (
|
|
7
7
|
QdrantAccessConfig,
|
|
8
8
|
QdrantConnectionConfig,
|
|
9
9
|
QdrantUploader,
|
|
@@ -5,18 +5,18 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret, model_validator
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
8
9
|
from unstructured_ingest.error import DestinationConnectionError
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
|
-
from unstructured_ingest.v2.interfaces import (
|
|
10
|
+
from unstructured_ingest.interfaces import (
|
|
12
11
|
AccessConfig,
|
|
13
12
|
ConnectionConfig,
|
|
14
13
|
Uploader,
|
|
15
14
|
UploaderConfig,
|
|
16
15
|
)
|
|
17
|
-
from unstructured_ingest.
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
16
|
+
from unstructured_ingest.logger import logger
|
|
17
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
18
|
+
from unstructured_ingest.utils.data_prep import batch_generator
|
|
19
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
22
|
from redis.asyncio import Redis
|
|
@@ -20,9 +20,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional, Type
|
|
|
20
20
|
from dateutil import parser
|
|
21
21
|
from pydantic import Field, Secret
|
|
22
22
|
|
|
23
|
+
from unstructured_ingest.data_types.file_data import (
|
|
24
|
+
FileData,
|
|
25
|
+
FileDataSourceMetadata,
|
|
26
|
+
SourceIdentifiers,
|
|
27
|
+
)
|
|
23
28
|
from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
from unstructured_ingest.v2.interfaces import (
|
|
29
|
+
from unstructured_ingest.interfaces import (
|
|
26
30
|
AccessConfig,
|
|
27
31
|
ConnectionConfig,
|
|
28
32
|
Downloader,
|
|
@@ -31,15 +35,11 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
31
35
|
Indexer,
|
|
32
36
|
IndexerConfig,
|
|
33
37
|
)
|
|
34
|
-
from unstructured_ingest.
|
|
35
|
-
from unstructured_ingest.
|
|
38
|
+
from unstructured_ingest.logger import logger
|
|
39
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
36
40
|
SourceRegistryEntry,
|
|
37
41
|
)
|
|
38
|
-
from unstructured_ingest.
|
|
39
|
-
FileData,
|
|
40
|
-
FileDataSourceMetadata,
|
|
41
|
-
SourceIdentifiers,
|
|
42
|
-
)
|
|
42
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class MissingCategoryError(Exception):
|
|
@@ -6,16 +6,18 @@ from typing import TYPE_CHECKING, Any, AsyncIterator
|
|
|
6
6
|
|
|
7
7
|
from pydantic import Field
|
|
8
8
|
|
|
9
|
+
from unstructured_ingest.data_types.file_data import (
|
|
10
|
+
FileData,
|
|
11
|
+
)
|
|
9
12
|
from unstructured_ingest.error import (
|
|
10
13
|
SourceConnectionError,
|
|
11
14
|
SourceConnectionNetworkError,
|
|
12
15
|
)
|
|
13
|
-
from unstructured_ingest.
|
|
14
|
-
from unstructured_ingest.
|
|
15
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
16
|
+
from unstructured_ingest.logger import logger
|
|
17
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
16
18
|
SourceRegistryEntry,
|
|
17
19
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
20
|
+
from unstructured_ingest.processes.connectors.onedrive import (
|
|
19
21
|
OnedriveAccessConfig,
|
|
20
22
|
OnedriveConnectionConfig,
|
|
21
23
|
OnedriveDownloader,
|
|
@@ -23,9 +25,7 @@ from unstructured_ingest.v2.processes.connectors.onedrive import (
|
|
|
23
25
|
OnedriveIndexer,
|
|
24
26
|
OnedriveIndexerConfig,
|
|
25
27
|
)
|
|
26
|
-
from unstructured_ingest.
|
|
27
|
-
FileData,
|
|
28
|
-
)
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from office365.onedrive.driveitems.driveItem import DriveItem
|
|
@@ -100,7 +100,7 @@ class SharepointDownloader(OnedriveDownloader):
|
|
|
100
100
|
connector_type: str = CONNECTOR_TYPE
|
|
101
101
|
|
|
102
102
|
@SourceConnectionNetworkError.wrap
|
|
103
|
-
@requires_dependencies(["office365"], extras="
|
|
103
|
+
@requires_dependencies(["office365"], extras="sharepoint")
|
|
104
104
|
def _fetch_file(self, file_data: FileData) -> DriveItem:
|
|
105
105
|
from office365.runtime.client_request_exception import ClientRequestException
|
|
106
106
|
|
|
@@ -8,10 +8,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
8
8
|
|
|
9
9
|
from pydantic import Field, Secret
|
|
10
10
|
|
|
11
|
+
from unstructured_ingest.data_types.file_data import (
|
|
12
|
+
FileData,
|
|
13
|
+
FileDataSourceMetadata,
|
|
14
|
+
SourceIdentifiers,
|
|
15
|
+
)
|
|
11
16
|
from unstructured_ingest.error import SourceConnectionError
|
|
12
|
-
from unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
|
-
from unstructured_ingest.v2.interfaces import (
|
|
17
|
+
from unstructured_ingest.interfaces import (
|
|
15
18
|
AccessConfig,
|
|
16
19
|
ConnectionConfig,
|
|
17
20
|
Downloader,
|
|
@@ -20,12 +23,9 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
20
23
|
Indexer,
|
|
21
24
|
IndexerConfig,
|
|
22
25
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
|
|
26
|
-
FileDataSourceMetadata,
|
|
27
|
-
SourceIdentifiers,
|
|
28
|
-
)
|
|
26
|
+
from unstructured_ingest.logger import logger
|
|
27
|
+
from unstructured_ingest.processes.connector_registry import SourceRegistryEntry
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from slack_sdk import WebClient
|
|
@@ -6,13 +6,12 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
6
6
|
|
|
7
7
|
from pydantic import Field, Secret
|
|
8
8
|
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
9
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
10
|
+
from unstructured_ingest.logger import logger
|
|
11
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
13
12
|
DestinationRegistryEntry,
|
|
14
13
|
)
|
|
15
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
16
15
|
SQLAccessConfig,
|
|
17
16
|
SQLConnectionConfig,
|
|
18
17
|
SQLUploader,
|
|
@@ -20,7 +19,8 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
20
19
|
SQLUploadStager,
|
|
21
20
|
SQLUploadStagerConfig,
|
|
22
21
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
22
|
+
from unstructured_ingest.utils.data_prep import split_dataframe
|
|
23
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
24
24
|
|
|
25
25
|
if TYPE_CHECKING:
|
|
26
26
|
from databricks.sdk.core import oauth_service_principal
|
|
@@ -203,7 +203,7 @@ class DatabricksDeltaTablesUploader(SQLUploader):
|
|
|
203
203
|
f" table named {self.upload_config.table_name}"
|
|
204
204
|
# f" with batch size {self.upload_config.batch_size}"
|
|
205
205
|
)
|
|
206
|
-
# TODO: currently variable binding not supporting for list
|
|
206
|
+
# TODO: currently variable binding not supporting for list data_types,
|
|
207
207
|
# update once that gets resolved in SDK
|
|
208
208
|
for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
|
|
209
209
|
with self.get_cursor() as cursor:
|
|
@@ -4,13 +4,12 @@ from typing import TYPE_CHECKING, Generator, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
7
|
+
from unstructured_ingest.logger import logger
|
|
8
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
10
9
|
DestinationRegistryEntry,
|
|
11
10
|
SourceRegistryEntry,
|
|
12
11
|
)
|
|
13
|
-
from unstructured_ingest.
|
|
12
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
14
13
|
SQLAccessConfig,
|
|
15
14
|
SqlBatchFileData,
|
|
16
15
|
SQLConnectionConfig,
|
|
@@ -23,6 +22,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
23
22
|
SQLUploadStager,
|
|
24
23
|
SQLUploadStagerConfig,
|
|
25
24
|
)
|
|
25
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
28
28
|
from psycopg2.extensions import connection as PostgresConnection
|
|
@@ -5,13 +5,12 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
|
+
from unstructured_ingest.logger import logger
|
|
9
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
11
10
|
DestinationRegistryEntry,
|
|
12
11
|
SourceRegistryEntry,
|
|
13
12
|
)
|
|
14
|
-
from unstructured_ingest.
|
|
13
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
15
14
|
_DATE_COLUMNS,
|
|
16
15
|
SQLAccessConfig,
|
|
17
16
|
SqlBatchFileData,
|
|
@@ -26,6 +25,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
26
25
|
SQLUploadStagerConfig,
|
|
27
26
|
parse_date_string,
|
|
28
27
|
)
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from singlestoredb.connection import Connection as SingleStoreConnection
|
|
@@ -5,14 +5,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
9
|
+
from unstructured_ingest.logger import logger
|
|
10
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
12
11
|
DestinationRegistryEntry,
|
|
13
12
|
SourceRegistryEntry,
|
|
14
13
|
)
|
|
15
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
16
15
|
_DATE_COLUMNS,
|
|
17
16
|
SQLAccessConfig,
|
|
18
17
|
SqlBatchFileData,
|
|
@@ -27,7 +26,8 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
27
26
|
SQLUploadStagerConfig,
|
|
28
27
|
parse_date_string,
|
|
29
28
|
)
|
|
30
|
-
from unstructured_ingest.
|
|
29
|
+
from unstructured_ingest.utils.data_prep import split_dataframe
|
|
30
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
33
|
from pandas import DataFrame
|
|
@@ -11,10 +11,15 @@ from typing import TYPE_CHECKING, Any, Generator, Union
|
|
|
11
11
|
from dateutil import parser
|
|
12
12
|
from pydantic import BaseModel, Field, Secret
|
|
13
13
|
|
|
14
|
+
from unstructured_ingest.data_types.file_data import (
|
|
15
|
+
BatchFileData,
|
|
16
|
+
BatchItem,
|
|
17
|
+
FileData,
|
|
18
|
+
FileDataSourceMetadata,
|
|
19
|
+
SourceIdentifiers,
|
|
20
|
+
)
|
|
14
21
|
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
|
|
15
|
-
from unstructured_ingest.
|
|
16
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
17
|
-
from unstructured_ingest.v2.interfaces import (
|
|
22
|
+
from unstructured_ingest.interfaces import (
|
|
18
23
|
AccessConfig,
|
|
19
24
|
ConnectionConfig,
|
|
20
25
|
Downloader,
|
|
@@ -28,15 +33,15 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
28
33
|
UploadStagerConfig,
|
|
29
34
|
download_responses,
|
|
30
35
|
)
|
|
31
|
-
from unstructured_ingest.
|
|
32
|
-
from unstructured_ingest.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
from unstructured_ingest.logger import logger
|
|
37
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
38
|
+
from unstructured_ingest.utils.data_prep import (
|
|
39
|
+
get_data,
|
|
40
|
+
get_data_df,
|
|
41
|
+
get_enhanced_element_id,
|
|
42
|
+
split_dataframe,
|
|
43
|
+
write_data,
|
|
38
44
|
)
|
|
39
|
-
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
40
45
|
|
|
41
46
|
if TYPE_CHECKING:
|
|
42
47
|
from pandas import DataFrame
|
|
@@ -6,13 +6,12 @@ from typing import TYPE_CHECKING, Any, Generator
|
|
|
6
6
|
|
|
7
7
|
from pydantic import Field, Secret, model_validator
|
|
8
8
|
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
9
|
+
from unstructured_ingest.logger import logger
|
|
10
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
12
11
|
DestinationRegistryEntry,
|
|
13
12
|
SourceRegistryEntry,
|
|
14
13
|
)
|
|
15
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
16
15
|
_DATE_COLUMNS,
|
|
17
16
|
SQLAccessConfig,
|
|
18
17
|
SqlBatchFileData,
|
|
@@ -27,6 +26,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
27
26
|
SQLUploadStagerConfig,
|
|
28
27
|
parse_date_string,
|
|
29
28
|
)
|
|
29
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
30
30
|
|
|
31
31
|
if TYPE_CHECKING:
|
|
32
32
|
from sqlite3 import Connection as SqliteConnection
|
|
@@ -4,16 +4,16 @@ from typing import TYPE_CHECKING, Any, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
+
from unstructured_ingest.data_types.file_data import (
|
|
8
|
+
FileData,
|
|
9
|
+
)
|
|
7
10
|
from unstructured_ingest.error import DestinationConnectionError
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
11
|
-
from unstructured_ingest.v2.logger import logger
|
|
12
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
11
|
+
from unstructured_ingest.logger import logger
|
|
12
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
13
13
|
DestinationRegistryEntry,
|
|
14
14
|
SourceRegistryEntry,
|
|
15
15
|
)
|
|
16
|
-
from unstructured_ingest.
|
|
16
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
17
17
|
SQLAccessConfig,
|
|
18
18
|
SqlBatchFileData,
|
|
19
19
|
SQLConnectionConfig,
|
|
@@ -26,10 +26,9 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
26
26
|
SQLUploadStager,
|
|
27
27
|
SQLUploadStagerConfig,
|
|
28
28
|
)
|
|
29
|
-
from unstructured_ingest.
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
29
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
30
|
+
from unstructured_ingest.utils.data_prep import get_enhanced_element_id, split_dataframe
|
|
31
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
33
32
|
|
|
34
33
|
if TYPE_CHECKING:
|
|
35
34
|
from pandas import DataFrame
|