unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- examples/airtable.py +44 -0
- examples/azure_cognitive_search.py +55 -0
- examples/chroma.py +54 -0
- examples/couchbase.py +55 -0
- examples/databricks_volumes_dest.py +55 -0
- examples/databricks_volumes_source.py +53 -0
- examples/delta_table.py +45 -0
- examples/discord_example.py +36 -0
- examples/elasticsearch.py +49 -0
- examples/google_drive.py +45 -0
- examples/kdbai.py +54 -0
- examples/local.py +36 -0
- examples/milvus.py +44 -0
- examples/mongodb.py +53 -0
- examples/opensearch.py +50 -0
- examples/pinecone.py +57 -0
- examples/s3.py +38 -0
- examples/salesforce.py +44 -0
- examples/sharepoint.py +47 -0
- examples/singlestore.py +49 -0
- examples/sql.py +90 -0
- examples/vectara.py +54 -0
- examples/weaviate.py +44 -0
- test/integration/chunkers/test_chunkers.py +1 -1
- test/integration/connectors/conftest.py +1 -1
- test/integration/connectors/databricks/test_volumes_native.py +3 -3
- test/integration/connectors/discord/test_discord.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +2 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
- test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
- test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
- test/integration/connectors/sql/test_postgres.py +2 -2
- test/integration/connectors/sql/test_singlestore.py +2 -2
- test/integration/connectors/sql/test_snowflake.py +2 -2
- test/integration/connectors/sql/test_sqlite.py +2 -2
- test/integration/connectors/sql/test_vastdb.py +1 -1
- test/integration/connectors/test_astradb.py +2 -2
- test/integration/connectors/test_azure_ai_search.py +2 -2
- test/integration/connectors/test_chroma.py +2 -2
- test/integration/connectors/test_confluence.py +1 -1
- test/integration/connectors/test_delta_table.py +2 -2
- test/integration/connectors/test_dropbox.py +2 -2
- test/integration/connectors/test_github.py +1 -1
- test/integration/connectors/test_google_drive.py +2 -2
- test/integration/connectors/test_jira.py +1 -1
- test/integration/connectors/test_lancedb.py +7 -7
- test/integration/connectors/test_milvus.py +2 -2
- test/integration/connectors/test_mongodb.py +2 -2
- test/integration/connectors/test_neo4j.py +7 -7
- test/integration/connectors/test_notion.py +2 -2
- test/integration/connectors/test_onedrive.py +2 -2
- test/integration/connectors/test_pinecone.py +3 -3
- test/integration/connectors/test_qdrant.py +6 -6
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +3 -3
- test/integration/connectors/test_sharepoint.py +1 -1
- test/integration/connectors/test_vectara.py +4 -4
- test/integration/connectors/test_zendesk.py +2 -2
- test/integration/connectors/utils/validation/destination.py +2 -2
- test/integration/connectors/utils/validation/source.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +1 -1
- test/integration/connectors/weaviate/test_local.py +2 -2
- test/integration/embedders/test_azure_openai.py +1 -1
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -1
- test/integration/embedders/test_mixedbread.py +1 -1
- test/integration/embedders/test_octoai.py +2 -2
- test/integration/embedders/test_openai.py +2 -2
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +1 -1
- test/integration/embedders/test_voyageai.py +1 -1
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
- test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
- test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
- test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
- test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
- test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
- test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
- test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
- test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
- test/unit/test_html.py +1 -1
- test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
- test/unit/test_utils.py +106 -97
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/__init__.py +0 -14
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +259 -9
- unstructured_ingest/cli/base/dest.py +58 -61
- unstructured_ingest/cli/base/src.py +54 -36
- unstructured_ingest/cli/cli.py +4 -17
- unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
- unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
- unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
- unstructured_ingest/embed/bedrock.py +3 -3
- unstructured_ingest/embed/octoai.py +3 -3
- unstructured_ingest/embed/openai.py +3 -3
- unstructured_ingest/embed/togetherai.py +4 -4
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +4 -4
- unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
- unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
- unstructured_ingest/{v2/otel.py → otel.py} +1 -1
- unstructured_ingest/pipeline/__init__.py +0 -22
- unstructured_ingest/pipeline/interfaces.py +179 -238
- unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
- unstructured_ingest/pipeline/pipeline.py +388 -97
- unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
- unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
- unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
- unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
- unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
- unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +55 -27
- unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
- unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
- unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
- unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
- unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
- unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
- unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
- unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
- unstructured_ingest/utils/compression.py +1 -48
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/utils/html.py +3 -3
- unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
- unstructured_ingest/utils/string_and_date_utils.py +1 -1
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/METADATA +98 -97
- unstructured_ingest-0.7.1.dist-info/RECORD +370 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/top_level.txt +1 -0
- test/unit/v2/test_utils.py +0 -82
- unstructured_ingest/cli/cmd_factory.py +0 -12
- unstructured_ingest/cli/cmds/__init__.py +0 -145
- unstructured_ingest/cli/cmds/airtable.py +0 -69
- unstructured_ingest/cli/cmds/astradb.py +0 -99
- unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
- unstructured_ingest/cli/cmds/biomed.py +0 -52
- unstructured_ingest/cli/cmds/chroma.py +0 -104
- unstructured_ingest/cli/cmds/clarifai.py +0 -71
- unstructured_ingest/cli/cmds/confluence.py +0 -69
- unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
- unstructured_ingest/cli/cmds/delta_table.py +0 -94
- unstructured_ingest/cli/cmds/discord.py +0 -47
- unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
- unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
- unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
- unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
- unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
- unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
- unstructured_ingest/cli/cmds/github.py +0 -54
- unstructured_ingest/cli/cmds/gitlab.py +0 -54
- unstructured_ingest/cli/cmds/google_drive.py +0 -49
- unstructured_ingest/cli/cmds/hubspot.py +0 -70
- unstructured_ingest/cli/cmds/jira.py +0 -71
- unstructured_ingest/cli/cmds/kafka.py +0 -102
- unstructured_ingest/cli/cmds/local.py +0 -43
- unstructured_ingest/cli/cmds/mongodb.py +0 -72
- unstructured_ingest/cli/cmds/notion.py +0 -48
- unstructured_ingest/cli/cmds/onedrive.py +0 -66
- unstructured_ingest/cli/cmds/opensearch.py +0 -117
- unstructured_ingest/cli/cmds/outlook.py +0 -67
- unstructured_ingest/cli/cmds/pinecone.py +0 -71
- unstructured_ingest/cli/cmds/qdrant.py +0 -124
- unstructured_ingest/cli/cmds/reddit.py +0 -67
- unstructured_ingest/cli/cmds/salesforce.py +0 -58
- unstructured_ingest/cli/cmds/sharepoint.py +0 -66
- unstructured_ingest/cli/cmds/slack.py +0 -56
- unstructured_ingest/cli/cmds/sql.py +0 -66
- unstructured_ingest/cli/cmds/vectara.py +0 -66
- unstructured_ingest/cli/cmds/weaviate.py +0 -98
- unstructured_ingest/cli/cmds/wikipedia.py +0 -40
- unstructured_ingest/cli/common.py +0 -7
- unstructured_ingest/cli/interfaces.py +0 -663
- unstructured_ingest/cli/utils.py +0 -205
- unstructured_ingest/connector/airtable.py +0 -309
- unstructured_ingest/connector/astradb.py +0 -267
- unstructured_ingest/connector/azure_ai_search.py +0 -144
- unstructured_ingest/connector/biomed.py +0 -320
- unstructured_ingest/connector/chroma.py +0 -158
- unstructured_ingest/connector/clarifai.py +0 -122
- unstructured_ingest/connector/confluence.py +0 -285
- unstructured_ingest/connector/databricks_volumes.py +0 -137
- unstructured_ingest/connector/delta_table.py +0 -203
- unstructured_ingest/connector/discord.py +0 -180
- unstructured_ingest/connector/elasticsearch.py +0 -396
- unstructured_ingest/connector/fsspec/azure.py +0 -78
- unstructured_ingest/connector/fsspec/box.py +0 -109
- unstructured_ingest/connector/fsspec/dropbox.py +0 -160
- unstructured_ingest/connector/fsspec/fsspec.py +0 -359
- unstructured_ingest/connector/fsspec/gcs.py +0 -82
- unstructured_ingest/connector/fsspec/s3.py +0 -62
- unstructured_ingest/connector/fsspec/sftp.py +0 -81
- unstructured_ingest/connector/git.py +0 -124
- unstructured_ingest/connector/github.py +0 -174
- unstructured_ingest/connector/gitlab.py +0 -142
- unstructured_ingest/connector/google_drive.py +0 -348
- unstructured_ingest/connector/hubspot.py +0 -278
- unstructured_ingest/connector/jira.py +0 -469
- unstructured_ingest/connector/kafka.py +0 -293
- unstructured_ingest/connector/local.py +0 -139
- unstructured_ingest/connector/mongodb.py +0 -284
- unstructured_ingest/connector/notion/client.py +0 -248
- unstructured_ingest/connector/notion/connector.py +0 -469
- unstructured_ingest/connector/notion/helpers.py +0 -584
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
- unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
- unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
- unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
- unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
- unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
- unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
- unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
- unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
- unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
- unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
- unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
- unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
- unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
- unstructured_ingest/connector/notion/types/date.py +0 -26
- unstructured_ingest/connector/notion/types/file.py +0 -51
- unstructured_ingest/connector/notion/types/user.py +0 -76
- unstructured_ingest/connector/onedrive.py +0 -232
- unstructured_ingest/connector/opensearch.py +0 -218
- unstructured_ingest/connector/outlook.py +0 -285
- unstructured_ingest/connector/pinecone.py +0 -150
- unstructured_ingest/connector/qdrant.py +0 -144
- unstructured_ingest/connector/reddit.py +0 -166
- unstructured_ingest/connector/registry.py +0 -109
- unstructured_ingest/connector/salesforce.py +0 -301
- unstructured_ingest/connector/sharepoint.py +0 -573
- unstructured_ingest/connector/slack.py +0 -224
- unstructured_ingest/connector/sql.py +0 -199
- unstructured_ingest/connector/vectara.py +0 -253
- unstructured_ingest/connector/weaviate.py +0 -190
- unstructured_ingest/connector/wikipedia.py +0 -208
- unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
- unstructured_ingest/enhanced_dataclass/core.py +0 -99
- unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
- unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
- unstructured_ingest/interfaces.py +0 -852
- unstructured_ingest/pipeline/copy.py +0 -19
- unstructured_ingest/pipeline/doc_factory.py +0 -12
- unstructured_ingest/pipeline/partition.py +0 -60
- unstructured_ingest/pipeline/permissions.py +0 -12
- unstructured_ingest/pipeline/reformat/chunking.py +0 -134
- unstructured_ingest/pipeline/reformat/embedding.py +0 -64
- unstructured_ingest/pipeline/source.py +0 -77
- unstructured_ingest/pipeline/utils.py +0 -6
- unstructured_ingest/pipeline/write.py +0 -18
- unstructured_ingest/processor.py +0 -93
- unstructured_ingest/runner/__init__.py +0 -104
- unstructured_ingest/runner/airtable.py +0 -35
- unstructured_ingest/runner/astradb.py +0 -34
- unstructured_ingest/runner/base_runner.py +0 -89
- unstructured_ingest/runner/biomed.py +0 -45
- unstructured_ingest/runner/confluence.py +0 -35
- unstructured_ingest/runner/delta_table.py +0 -34
- unstructured_ingest/runner/discord.py +0 -35
- unstructured_ingest/runner/elasticsearch.py +0 -40
- unstructured_ingest/runner/fsspec/azure.py +0 -30
- unstructured_ingest/runner/fsspec/box.py +0 -28
- unstructured_ingest/runner/fsspec/dropbox.py +0 -30
- unstructured_ingest/runner/fsspec/fsspec.py +0 -40
- unstructured_ingest/runner/fsspec/gcs.py +0 -28
- unstructured_ingest/runner/fsspec/s3.py +0 -28
- unstructured_ingest/runner/fsspec/sftp.py +0 -28
- unstructured_ingest/runner/github.py +0 -37
- unstructured_ingest/runner/gitlab.py +0 -37
- unstructured_ingest/runner/google_drive.py +0 -35
- unstructured_ingest/runner/hubspot.py +0 -35
- unstructured_ingest/runner/jira.py +0 -35
- unstructured_ingest/runner/kafka.py +0 -34
- unstructured_ingest/runner/local.py +0 -23
- unstructured_ingest/runner/mongodb.py +0 -34
- unstructured_ingest/runner/notion.py +0 -61
- unstructured_ingest/runner/onedrive.py +0 -35
- unstructured_ingest/runner/opensearch.py +0 -40
- unstructured_ingest/runner/outlook.py +0 -33
- unstructured_ingest/runner/reddit.py +0 -35
- unstructured_ingest/runner/salesforce.py +0 -33
- unstructured_ingest/runner/sharepoint.py +0 -35
- unstructured_ingest/runner/slack.py +0 -33
- unstructured_ingest/runner/utils.py +0 -47
- unstructured_ingest/runner/wikipedia.py +0 -35
- unstructured_ingest/runner/writers/__init__.py +0 -48
- unstructured_ingest/runner/writers/astradb.py +0 -22
- unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
- unstructured_ingest/runner/writers/base_writer.py +0 -26
- unstructured_ingest/runner/writers/chroma.py +0 -22
- unstructured_ingest/runner/writers/clarifai.py +0 -19
- unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
- unstructured_ingest/runner/writers/delta_table.py +0 -24
- unstructured_ingest/runner/writers/elasticsearch.py +0 -24
- unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
- unstructured_ingest/runner/writers/fsspec/box.py +0 -21
- unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
- unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
- unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
- unstructured_ingest/runner/writers/kafka.py +0 -21
- unstructured_ingest/runner/writers/mongodb.py +0 -21
- unstructured_ingest/runner/writers/opensearch.py +0 -26
- unstructured_ingest/runner/writers/pinecone.py +0 -21
- unstructured_ingest/runner/writers/qdrant.py +0 -19
- unstructured_ingest/runner/writers/sql.py +0 -22
- unstructured_ingest/runner/writers/vectara.py +0 -22
- unstructured_ingest/runner/writers/weaviate.py +0 -21
- unstructured_ingest/utils/google_filetype.py +0 -9
- unstructured_ingest/v2/__init__.py +0 -1
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +0 -4
- unstructured_ingest/v2/cli/base/cmd.py +0 -269
- unstructured_ingest/v2/cli/base/dest.py +0 -85
- unstructured_ingest/v2/cli/base/src.py +0 -85
- unstructured_ingest/v2/cli/cli.py +0 -24
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/logger.py +0 -126
- unstructured_ingest/v2/main.py +0 -11
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +0 -211
- unstructured_ingest/v2/pipeline/pipeline.py +0 -408
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/v2/processes/utils/__init__.py +0 -0
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
- {test/unit/v2 → examples}/__init__.py +0 -0
- /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
- /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/data_generator.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
- /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
- /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
- /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
- /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
- /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
- /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
- /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
- /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
- /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
- /unstructured_ingest/{v2 → utils}/constants.py +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/entry_points.txt +0 -0
|
@@ -3,11 +3,11 @@ from dataclasses import dataclass
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Callable, Optional, TypedDict
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.
|
|
7
|
-
from unstructured_ingest.
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
6
|
+
from unstructured_ingest.data_types.file_data import file_data_from_file
|
|
7
|
+
from unstructured_ingest.interfaces import UploadContent
|
|
8
|
+
from unstructured_ingest.logger import logger
|
|
9
|
+
from unstructured_ingest.pipeline.interfaces import BatchPipelineStep
|
|
10
|
+
from unstructured_ingest.pipeline.otel import instrument
|
|
11
11
|
|
|
12
12
|
STEP_ID = "upload"
|
|
13
13
|
|
|
@@ -5,12 +5,12 @@ from typing import Any, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field, SecretStr
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.interfaces.process import BaseProcess
|
|
9
|
+
from unstructured_ingest.logger import logger
|
|
10
|
+
from unstructured_ingest.unstructured_api import call_api_async
|
|
8
11
|
from unstructured_ingest.utils.chunking import assign_and_map_hash_ids
|
|
9
12
|
from unstructured_ingest.utils.data_prep import get_json_data
|
|
10
13
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
|
-
from unstructured_ingest.v2.interfaces.process import BaseProcess
|
|
12
|
-
from unstructured_ingest.v2.logger import logger
|
|
13
|
-
from unstructured_ingest.v2.unstructured_api import call_api_async
|
|
14
14
|
|
|
15
15
|
CHUNK_MAX_CHARS_DEFAULT: int = 500
|
|
16
16
|
CHUNK_MULTI_PAGE_DEFAULT: bool = True
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import unstructured_ingest.
|
|
4
|
-
import unstructured_ingest.
|
|
5
|
-
import unstructured_ingest.
|
|
6
|
-
import unstructured_ingest.
|
|
7
|
-
import unstructured_ingest.
|
|
8
|
-
import unstructured_ingest.
|
|
9
|
-
import unstructured_ingest.
|
|
10
|
-
import unstructured_ingest.
|
|
11
|
-
import unstructured_ingest.
|
|
12
|
-
import unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.
|
|
3
|
+
import unstructured_ingest.processes.connectors.databricks # noqa: F401
|
|
4
|
+
import unstructured_ingest.processes.connectors.duckdb # noqa: F401
|
|
5
|
+
import unstructured_ingest.processes.connectors.elasticsearch # noqa: F401
|
|
6
|
+
import unstructured_ingest.processes.connectors.fsspec # noqa: F401
|
|
7
|
+
import unstructured_ingest.processes.connectors.ibm_watsonx # noqa: F401
|
|
8
|
+
import unstructured_ingest.processes.connectors.kafka # noqa: F401
|
|
9
|
+
import unstructured_ingest.processes.connectors.lancedb # noqa: F401
|
|
10
|
+
import unstructured_ingest.processes.connectors.qdrant # noqa: F401
|
|
11
|
+
import unstructured_ingest.processes.connectors.sql # noqa: F401
|
|
12
|
+
import unstructured_ingest.processes.connectors.weaviate # noqa: F401
|
|
13
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
14
14
|
add_destination_entry,
|
|
15
15
|
add_source_entry,
|
|
16
16
|
)
|
|
@@ -6,8 +6,8 @@ from uuid import NAMESPACE_DNS, uuid5
|
|
|
6
6
|
import pandas
|
|
7
7
|
from pydantic import BaseModel, Field, Secret, field_validator
|
|
8
8
|
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
9
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
10
|
+
from unstructured_ingest.interfaces import (
|
|
11
11
|
AccessConfig,
|
|
12
12
|
ConnectionConfig,
|
|
13
13
|
Downloader,
|
|
@@ -16,10 +16,10 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
16
16
|
Indexer,
|
|
17
17
|
IndexerConfig,
|
|
18
18
|
)
|
|
19
|
-
from unstructured_ingest.
|
|
19
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
20
20
|
SourceRegistryEntry,
|
|
21
21
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
22
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
25
|
from pyairtable import Api
|
|
@@ -10,16 +10,19 @@ from pydantic import BaseModel, Field, Secret
|
|
|
10
10
|
|
|
11
11
|
from unstructured_ingest import __name__ as integration_name
|
|
12
12
|
from unstructured_ingest.__version__ import __version__ as integration_version
|
|
13
|
+
from unstructured_ingest.data_types.file_data import (
|
|
14
|
+
BatchFileData,
|
|
15
|
+
BatchItem,
|
|
16
|
+
FileData,
|
|
17
|
+
FileDataSourceMetadata,
|
|
18
|
+
SourceIdentifiers,
|
|
19
|
+
)
|
|
13
20
|
from unstructured_ingest.error import (
|
|
14
21
|
DestinationConnectionError,
|
|
15
22
|
SourceConnectionError,
|
|
16
23
|
SourceConnectionNetworkError,
|
|
17
24
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
20
|
-
from unstructured_ingest.utils.string_and_date_utils import truncate_string_bytes
|
|
21
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
22
|
-
from unstructured_ingest.v2.interfaces import (
|
|
25
|
+
from unstructured_ingest.interfaces import (
|
|
23
26
|
AccessConfig,
|
|
24
27
|
ConnectionConfig,
|
|
25
28
|
Downloader,
|
|
@@ -33,19 +36,16 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
33
36
|
UploadStagerConfig,
|
|
34
37
|
download_responses,
|
|
35
38
|
)
|
|
36
|
-
from unstructured_ingest.
|
|
37
|
-
from unstructured_ingest.
|
|
39
|
+
from unstructured_ingest.logger import logger
|
|
40
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
38
41
|
DestinationRegistryEntry,
|
|
39
42
|
SourceRegistryEntry,
|
|
40
43
|
)
|
|
41
|
-
from unstructured_ingest.
|
|
42
|
-
from unstructured_ingest.
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
FileDataSourceMetadata,
|
|
47
|
-
SourceIdentifiers,
|
|
48
|
-
)
|
|
44
|
+
from unstructured_ingest.processes.connectors.utils import format_and_truncate_orig_elements
|
|
45
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
46
|
+
from unstructured_ingest.utils.data_prep import batch_generator, get_data
|
|
47
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
48
|
+
from unstructured_ingest.utils.string_and_date_utils import truncate_string_bytes
|
|
49
49
|
|
|
50
50
|
if TYPE_CHECKING:
|
|
51
51
|
from astrapy import AsyncCollection as AstraDBAsyncCollection
|
|
@@ -5,11 +5,9 @@ from typing import TYPE_CHECKING, Any, Generator
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
8
9
|
from unstructured_ingest.error import DestinationConnectionError, WriteError
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
12
|
-
from unstructured_ingest.v2.interfaces import (
|
|
10
|
+
from unstructured_ingest.interfaces import (
|
|
13
11
|
AccessConfig,
|
|
14
12
|
ConnectionConfig,
|
|
15
13
|
Uploader,
|
|
@@ -17,13 +15,14 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
17
15
|
UploadStager,
|
|
18
16
|
UploadStagerConfig,
|
|
19
17
|
)
|
|
20
|
-
from unstructured_ingest.
|
|
21
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.logger import logger
|
|
19
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
22
20
|
DestinationRegistryEntry,
|
|
23
21
|
)
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
from unstructured_ingest.
|
|
26
|
-
from unstructured_ingest.
|
|
22
|
+
from unstructured_ingest.processes.connectors.utils import parse_datetime
|
|
23
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
24
|
+
from unstructured_ingest.utils.data_prep import batch_generator, get_enhanced_element_id
|
|
25
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
27
26
|
|
|
28
27
|
if TYPE_CHECKING:
|
|
29
28
|
from azure.search.documents import SearchClient
|
|
@@ -6,10 +6,9 @@ from dateutil import parser
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
from pydantic.functional_validators import BeforeValidator
|
|
8
8
|
|
|
9
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
9
10
|
from unstructured_ingest.error import DestinationConnectionError
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
12
|
-
from unstructured_ingest.v2.interfaces import (
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
13
12
|
AccessConfig,
|
|
14
13
|
ConnectionConfig,
|
|
15
14
|
Uploader,
|
|
@@ -17,10 +16,14 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
17
16
|
UploadStager,
|
|
18
17
|
UploadStagerConfig,
|
|
19
18
|
)
|
|
20
|
-
from unstructured_ingest.
|
|
21
|
-
from unstructured_ingest.
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
|
|
19
|
+
from unstructured_ingest.logger import logger
|
|
20
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
21
|
+
from unstructured_ingest.utils.data_prep import (
|
|
22
|
+
batch_generator,
|
|
23
|
+
flatten_dict,
|
|
24
|
+
get_enhanced_element_id,
|
|
25
|
+
)
|
|
26
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
24
27
|
|
|
25
28
|
from .utils import conform_string_to_dict
|
|
26
29
|
|
|
@@ -5,11 +5,13 @@ from typing import TYPE_CHECKING, Generator, List, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.data_types.file_data import (
|
|
9
|
+
FileData,
|
|
10
|
+
FileDataSourceMetadata,
|
|
11
|
+
SourceIdentifiers,
|
|
12
|
+
)
|
|
8
13
|
from unstructured_ingest.error import SourceConnectionError
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.utils.html import HtmlMixin
|
|
11
|
-
from unstructured_ingest.utils.string_and_date_utils import fix_unescaped_unicode
|
|
12
|
-
from unstructured_ingest.v2.interfaces import (
|
|
14
|
+
from unstructured_ingest.interfaces import (
|
|
13
15
|
AccessConfig,
|
|
14
16
|
ConnectionConfig,
|
|
15
17
|
Downloader,
|
|
@@ -19,15 +21,13 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
19
21
|
IndexerConfig,
|
|
20
22
|
download_responses,
|
|
21
23
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
+
from unstructured_ingest.logger import logger
|
|
25
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
24
26
|
SourceRegistryEntry,
|
|
25
27
|
)
|
|
26
|
-
from unstructured_ingest.
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
SourceIdentifiers,
|
|
30
|
-
)
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
|
+
from unstructured_ingest.utils.html import HtmlMixin
|
|
30
|
+
from unstructured_ingest.utils.string_and_date_utils import fix_unescaped_unicode
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
33
|
from atlassian import Confluence
|
|
@@ -8,14 +8,19 @@ from typing import TYPE_CHECKING, Any, Generator, List
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, Field, Secret
|
|
10
10
|
|
|
11
|
+
from unstructured_ingest.data_types.file_data import (
|
|
12
|
+
BatchFileData,
|
|
13
|
+
BatchItem,
|
|
14
|
+
FileData,
|
|
15
|
+
FileDataSourceMetadata,
|
|
16
|
+
SourceIdentifiers,
|
|
17
|
+
)
|
|
11
18
|
from unstructured_ingest.error import (
|
|
12
19
|
DestinationConnectionError,
|
|
13
20
|
SourceConnectionError,
|
|
14
21
|
SourceConnectionNetworkError,
|
|
15
22
|
)
|
|
16
|
-
from unstructured_ingest.
|
|
17
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
18
|
-
from unstructured_ingest.v2.interfaces import (
|
|
23
|
+
from unstructured_ingest.interfaces import (
|
|
19
24
|
AccessConfig,
|
|
20
25
|
ConnectionConfig,
|
|
21
26
|
Downloader,
|
|
@@ -29,18 +34,13 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
29
34
|
UploadStagerConfig,
|
|
30
35
|
download_responses,
|
|
31
36
|
)
|
|
32
|
-
from unstructured_ingest.
|
|
33
|
-
from unstructured_ingest.
|
|
37
|
+
from unstructured_ingest.logger import logger
|
|
38
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
34
39
|
DestinationRegistryEntry,
|
|
35
40
|
SourceRegistryEntry,
|
|
36
41
|
)
|
|
37
|
-
from unstructured_ingest.
|
|
38
|
-
|
|
39
|
-
BatchItem,
|
|
40
|
-
FileData,
|
|
41
|
-
FileDataSourceMetadata,
|
|
42
|
-
SourceIdentifiers,
|
|
43
|
-
)
|
|
42
|
+
from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
|
|
43
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
44
44
|
|
|
45
45
|
if TYPE_CHECKING:
|
|
46
46
|
from couchbase.cluster import Cluster
|
|
@@ -7,14 +7,18 @@ from uuid import NAMESPACE_DNS, uuid5
|
|
|
7
7
|
|
|
8
8
|
from pydantic import BaseModel, Field, Secret
|
|
9
9
|
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
|
|
10
|
+
from unstructured_ingest.data_types.file_data import (
|
|
11
|
+
FileData,
|
|
12
|
+
FileDataSourceMetadata,
|
|
13
|
+
SourceIdentifiers,
|
|
14
|
+
)
|
|
15
|
+
from unstructured_ingest.errors_v2 import (
|
|
12
16
|
ProviderError,
|
|
13
17
|
RateLimitError,
|
|
14
18
|
UserAuthError,
|
|
15
19
|
UserError,
|
|
16
20
|
)
|
|
17
|
-
from unstructured_ingest.
|
|
21
|
+
from unstructured_ingest.interfaces import (
|
|
18
22
|
AccessConfig,
|
|
19
23
|
ConnectionConfig,
|
|
20
24
|
Downloader,
|
|
@@ -25,12 +29,8 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
25
29
|
Uploader,
|
|
26
30
|
UploaderConfig,
|
|
27
31
|
)
|
|
28
|
-
from unstructured_ingest.
|
|
29
|
-
from unstructured_ingest.
|
|
30
|
-
FileData,
|
|
31
|
-
FileDataSourceMetadata,
|
|
32
|
-
SourceIdentifiers,
|
|
33
|
-
)
|
|
32
|
+
from unstructured_ingest.logger import logger
|
|
33
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
34
34
|
|
|
35
35
|
if TYPE_CHECKING:
|
|
36
36
|
from databricks.sdk import WorkspaceClient
|
|
@@ -3,11 +3,11 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, Secret
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.
|
|
6
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
7
7
|
DestinationRegistryEntry,
|
|
8
8
|
SourceRegistryEntry,
|
|
9
9
|
)
|
|
10
|
-
from unstructured_ingest.
|
|
10
|
+
from unstructured_ingest.processes.connectors.databricks.volumes import (
|
|
11
11
|
DatabricksVolumesAccessConfig,
|
|
12
12
|
DatabricksVolumesConnectionConfig,
|
|
13
13
|
DatabricksVolumesDownloader,
|
|
@@ -17,7 +17,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
|
17
17
|
DatabricksVolumesUploader,
|
|
18
18
|
DatabricksVolumesUploaderConfig,
|
|
19
19
|
)
|
|
20
|
-
from unstructured_ingest.
|
|
20
|
+
from unstructured_ingest.processes.utils.blob_storage import (
|
|
21
21
|
BlobStoreUploadStager,
|
|
22
22
|
BlobStoreUploadStagerConfig,
|
|
23
23
|
)
|
|
@@ -3,11 +3,11 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, Secret
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.
|
|
6
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
7
7
|
DestinationRegistryEntry,
|
|
8
8
|
SourceRegistryEntry,
|
|
9
9
|
)
|
|
10
|
-
from unstructured_ingest.
|
|
10
|
+
from unstructured_ingest.processes.connectors.databricks.volumes import (
|
|
11
11
|
DatabricksVolumesAccessConfig,
|
|
12
12
|
DatabricksVolumesConnectionConfig,
|
|
13
13
|
DatabricksVolumesDownloader,
|
|
@@ -17,7 +17,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
|
17
17
|
DatabricksVolumesUploader,
|
|
18
18
|
DatabricksVolumesUploaderConfig,
|
|
19
19
|
)
|
|
20
|
-
from unstructured_ingest.
|
|
20
|
+
from unstructured_ingest.processes.utils.blob_storage import (
|
|
21
21
|
BlobStoreUploadStager,
|
|
22
22
|
BlobStoreUploadStagerConfig,
|
|
23
23
|
)
|
|
@@ -3,11 +3,11 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, Secret
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.
|
|
6
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
7
7
|
DestinationRegistryEntry,
|
|
8
8
|
SourceRegistryEntry,
|
|
9
9
|
)
|
|
10
|
-
from unstructured_ingest.
|
|
10
|
+
from unstructured_ingest.processes.connectors.databricks.volumes import (
|
|
11
11
|
DatabricksVolumesAccessConfig,
|
|
12
12
|
DatabricksVolumesConnectionConfig,
|
|
13
13
|
DatabricksVolumesDownloader,
|
|
@@ -17,7 +17,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
|
17
17
|
DatabricksVolumesUploader,
|
|
18
18
|
DatabricksVolumesUploaderConfig,
|
|
19
19
|
)
|
|
20
|
-
from unstructured_ingest.
|
|
20
|
+
from unstructured_ingest.processes.utils.blob_storage import (
|
|
21
21
|
BlobStoreUploadStager,
|
|
22
22
|
BlobStoreUploadStagerConfig,
|
|
23
23
|
)
|
|
@@ -3,11 +3,11 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, Secret
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.
|
|
6
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
7
7
|
DestinationRegistryEntry,
|
|
8
8
|
SourceRegistryEntry,
|
|
9
9
|
)
|
|
10
|
-
from unstructured_ingest.
|
|
10
|
+
from unstructured_ingest.processes.connectors.databricks.volumes import (
|
|
11
11
|
DatabricksVolumesAccessConfig,
|
|
12
12
|
DatabricksVolumesConnectionConfig,
|
|
13
13
|
DatabricksVolumesDownloader,
|
|
@@ -17,7 +17,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
|
|
|
17
17
|
DatabricksVolumesUploader,
|
|
18
18
|
DatabricksVolumesUploaderConfig,
|
|
19
19
|
)
|
|
20
|
-
from unstructured_ingest.
|
|
20
|
+
from unstructured_ingest.processes.utils.blob_storage import (
|
|
21
21
|
BlobStoreUploadStager,
|
|
22
22
|
BlobStoreUploadStagerConfig,
|
|
23
23
|
)
|
|
@@ -7,25 +7,24 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
7
7
|
|
|
8
8
|
from pydantic import Field
|
|
9
9
|
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.v2.interfaces import (
|
|
10
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
13
12
|
Uploader,
|
|
14
13
|
UploaderConfig,
|
|
15
14
|
UploadStager,
|
|
16
15
|
UploadStagerConfig,
|
|
17
16
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
17
|
+
from unstructured_ingest.logger import logger
|
|
18
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
20
19
|
DestinationRegistryEntry,
|
|
21
20
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
21
|
+
from unstructured_ingest.processes.connectors.databricks.volumes import DatabricksPathMixin
|
|
22
|
+
from unstructured_ingest.processes.connectors.sql.databricks_delta_tables import (
|
|
24
23
|
DatabricksDeltaTablesConnectionConfig,
|
|
25
24
|
DatabricksDeltaTablesUploadStagerConfig,
|
|
26
25
|
)
|
|
27
|
-
from unstructured_ingest.
|
|
28
|
-
from unstructured_ingest.
|
|
26
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
27
|
+
from unstructured_ingest.utils.data_prep import get_enhanced_element_id, get_json_data, write_data
|
|
29
28
|
|
|
30
29
|
CONNECTOR_TYPE = "databricks_volume_delta_tables"
|
|
31
30
|
|
|
@@ -8,11 +8,9 @@ from urllib.parse import urlparse
|
|
|
8
8
|
|
|
9
9
|
from pydantic import Field, Secret
|
|
10
10
|
|
|
11
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
11
12
|
from unstructured_ingest.error import DestinationConnectionError
|
|
12
|
-
from unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
|
-
from unstructured_ingest.utils.table import convert_to_pandas_dataframe
|
|
15
|
-
from unstructured_ingest.v2.interfaces import (
|
|
13
|
+
from unstructured_ingest.interfaces import (
|
|
16
14
|
AccessConfig,
|
|
17
15
|
ConnectionConfig,
|
|
18
16
|
Uploader,
|
|
@@ -20,9 +18,11 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
20
18
|
UploadStager,
|
|
21
19
|
UploadStagerConfig,
|
|
22
20
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
from unstructured_ingest.
|
|
21
|
+
from unstructured_ingest.logger import logger
|
|
22
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
23
|
+
from unstructured_ingest.utils.data_prep import get_data_df, get_json_data
|
|
24
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
|
+
from unstructured_ingest.utils.table import convert_to_pandas_dataframe
|
|
26
26
|
|
|
27
27
|
CONNECTOR_TYPE = "delta_table"
|
|
28
28
|
|
|
@@ -4,9 +4,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
+
from unstructured_ingest.data_types.file_data import (
|
|
8
|
+
FileData,
|
|
9
|
+
FileDataSourceMetadata,
|
|
10
|
+
SourceIdentifiers,
|
|
11
|
+
)
|
|
7
12
|
from unstructured_ingest.error import SourceConnectionError
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.v2.interfaces import (
|
|
13
|
+
from unstructured_ingest.interfaces import (
|
|
10
14
|
AccessConfig,
|
|
11
15
|
ConnectionConfig,
|
|
12
16
|
Downloader,
|
|
@@ -15,13 +19,9 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
15
19
|
Indexer,
|
|
16
20
|
IndexerConfig,
|
|
17
21
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
20
|
-
from unstructured_ingest.
|
|
21
|
-
FileData,
|
|
22
|
-
FileDataSourceMetadata,
|
|
23
|
-
SourceIdentifiers,
|
|
24
|
-
)
|
|
22
|
+
from unstructured_ingest.logger import logger
|
|
23
|
+
from unstructured_ingest.processes.connector_registry import SourceRegistryEntry
|
|
24
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
25
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
27
27
|
from discord import Client as DiscordClient
|
|
@@ -2,11 +2,10 @@ from dataclasses import dataclass
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
6
|
+
from unstructured_ingest.interfaces import UploadStager
|
|
7
|
+
from unstructured_ingest.utils.data_prep import get_data, get_enhanced_element_id, write_data
|
|
6
8
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
7
|
-
from unstructured_ingest.v2.interfaces import UploadStager
|
|
8
|
-
from unstructured_ingest.v2.types.file_data import FileData
|
|
9
|
-
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
10
9
|
|
|
11
10
|
_COLUMNS = (
|
|
12
11
|
"id",
|
|
@@ -5,20 +5,20 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
8
9
|
from unstructured_ingest.error import DestinationConnectionError
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
|
-
from unstructured_ingest.v2.interfaces import (
|
|
10
|
+
from unstructured_ingest.interfaces import (
|
|
12
11
|
AccessConfig,
|
|
13
12
|
ConnectionConfig,
|
|
14
13
|
Uploader,
|
|
15
14
|
UploaderConfig,
|
|
16
15
|
UploadStagerConfig,
|
|
17
16
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
20
|
-
from unstructured_ingest.
|
|
21
|
-
from unstructured_ingest.
|
|
17
|
+
from unstructured_ingest.logger import logger
|
|
18
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
19
|
+
from unstructured_ingest.processes.connectors.duckdb.base import BaseDuckDBUploadStager
|
|
20
|
+
from unstructured_ingest.utils.data_prep import get_data_df
|
|
21
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
22
22
|
|
|
23
23
|
if TYPE_CHECKING:
|
|
24
24
|
from duckdb import DuckDBPyConnection as DuckDBConnection
|
|
@@ -6,20 +6,20 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
8
|
from unstructured_ingest.__version__ import __version__ as unstructured_io_ingest_version
|
|
9
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
9
10
|
from unstructured_ingest.error import DestinationConnectionError
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
12
|
-
from unstructured_ingest.v2.interfaces import (
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
13
12
|
AccessConfig,
|
|
14
13
|
ConnectionConfig,
|
|
15
14
|
Uploader,
|
|
16
15
|
UploaderConfig,
|
|
17
16
|
UploadStagerConfig,
|
|
18
17
|
)
|
|
19
|
-
from unstructured_ingest.
|
|
20
|
-
from unstructured_ingest.
|
|
21
|
-
from unstructured_ingest.
|
|
22
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.logger import logger
|
|
19
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
20
|
+
from unstructured_ingest.processes.connectors.duckdb.base import BaseDuckDBUploadStager
|
|
21
|
+
from unstructured_ingest.utils.data_prep import get_data_df
|
|
22
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
25
|
from duckdb import DuckDBPyConnection as MotherDuckConnection
|