unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- examples/airtable.py +44 -0
- examples/azure_cognitive_search.py +55 -0
- examples/chroma.py +54 -0
- examples/couchbase.py +55 -0
- examples/databricks_volumes_dest.py +55 -0
- examples/databricks_volumes_source.py +53 -0
- examples/delta_table.py +45 -0
- examples/discord_example.py +36 -0
- examples/elasticsearch.py +49 -0
- examples/google_drive.py +45 -0
- examples/kdbai.py +54 -0
- examples/local.py +36 -0
- examples/milvus.py +44 -0
- examples/mongodb.py +53 -0
- examples/opensearch.py +50 -0
- examples/pinecone.py +57 -0
- examples/s3.py +38 -0
- examples/salesforce.py +44 -0
- examples/sharepoint.py +47 -0
- examples/singlestore.py +49 -0
- examples/sql.py +90 -0
- examples/vectara.py +54 -0
- examples/weaviate.py +44 -0
- test/integration/chunkers/test_chunkers.py +1 -1
- test/integration/connectors/conftest.py +1 -1
- test/integration/connectors/databricks/test_volumes_native.py +3 -3
- test/integration/connectors/discord/test_discord.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +2 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
- test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
- test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
- test/integration/connectors/sql/test_postgres.py +2 -2
- test/integration/connectors/sql/test_singlestore.py +2 -2
- test/integration/connectors/sql/test_snowflake.py +2 -2
- test/integration/connectors/sql/test_sqlite.py +2 -2
- test/integration/connectors/sql/test_vastdb.py +1 -1
- test/integration/connectors/test_astradb.py +2 -2
- test/integration/connectors/test_azure_ai_search.py +2 -2
- test/integration/connectors/test_chroma.py +2 -2
- test/integration/connectors/test_confluence.py +1 -1
- test/integration/connectors/test_delta_table.py +2 -2
- test/integration/connectors/test_dropbox.py +2 -2
- test/integration/connectors/test_github.py +1 -1
- test/integration/connectors/test_google_drive.py +2 -2
- test/integration/connectors/test_jira.py +1 -1
- test/integration/connectors/test_lancedb.py +7 -7
- test/integration/connectors/test_milvus.py +2 -2
- test/integration/connectors/test_mongodb.py +2 -2
- test/integration/connectors/test_neo4j.py +7 -7
- test/integration/connectors/test_notion.py +2 -2
- test/integration/connectors/test_onedrive.py +2 -2
- test/integration/connectors/test_pinecone.py +3 -3
- test/integration/connectors/test_qdrant.py +6 -6
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +3 -3
- test/integration/connectors/test_sharepoint.py +1 -1
- test/integration/connectors/test_vectara.py +4 -4
- test/integration/connectors/test_zendesk.py +2 -2
- test/integration/connectors/utils/validation/destination.py +2 -2
- test/integration/connectors/utils/validation/source.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +1 -1
- test/integration/connectors/weaviate/test_local.py +2 -2
- test/integration/embedders/test_azure_openai.py +1 -1
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -1
- test/integration/embedders/test_mixedbread.py +1 -1
- test/integration/embedders/test_octoai.py +2 -2
- test/integration/embedders/test_openai.py +2 -2
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +1 -1
- test/integration/embedders/test_voyageai.py +1 -1
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
- test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
- test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
- test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
- test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
- test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
- test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
- test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
- test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
- test/unit/test_html.py +1 -1
- test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
- test/unit/test_utils.py +106 -97
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/__init__.py +0 -14
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +259 -9
- unstructured_ingest/cli/base/dest.py +58 -61
- unstructured_ingest/cli/base/src.py +54 -36
- unstructured_ingest/cli/cli.py +4 -17
- unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
- unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
- unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
- unstructured_ingest/embed/bedrock.py +3 -3
- unstructured_ingest/embed/octoai.py +3 -3
- unstructured_ingest/embed/openai.py +3 -3
- unstructured_ingest/embed/togetherai.py +4 -4
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +4 -4
- unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
- unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
- unstructured_ingest/{v2/otel.py → otel.py} +1 -1
- unstructured_ingest/pipeline/__init__.py +0 -22
- unstructured_ingest/pipeline/interfaces.py +179 -238
- unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
- unstructured_ingest/pipeline/pipeline.py +388 -97
- unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
- unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
- unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
- unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
- unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
- unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
- unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
- unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
- unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
- unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
- unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
- unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
- unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
- unstructured_ingest/utils/compression.py +1 -48
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/utils/html.py +3 -3
- unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
- unstructured_ingest/utils/string_and_date_utils.py +1 -1
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/METADATA +21 -21
- unstructured_ingest-0.7.0.dist-info/RECORD +370 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/top_level.txt +1 -0
- test/unit/v2/test_utils.py +0 -82
- unstructured_ingest/cli/cmd_factory.py +0 -12
- unstructured_ingest/cli/cmds/__init__.py +0 -145
- unstructured_ingest/cli/cmds/airtable.py +0 -69
- unstructured_ingest/cli/cmds/astradb.py +0 -99
- unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
- unstructured_ingest/cli/cmds/biomed.py +0 -52
- unstructured_ingest/cli/cmds/chroma.py +0 -104
- unstructured_ingest/cli/cmds/clarifai.py +0 -71
- unstructured_ingest/cli/cmds/confluence.py +0 -69
- unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
- unstructured_ingest/cli/cmds/delta_table.py +0 -94
- unstructured_ingest/cli/cmds/discord.py +0 -47
- unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
- unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
- unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
- unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
- unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
- unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
- unstructured_ingest/cli/cmds/github.py +0 -54
- unstructured_ingest/cli/cmds/gitlab.py +0 -54
- unstructured_ingest/cli/cmds/google_drive.py +0 -49
- unstructured_ingest/cli/cmds/hubspot.py +0 -70
- unstructured_ingest/cli/cmds/jira.py +0 -71
- unstructured_ingest/cli/cmds/kafka.py +0 -102
- unstructured_ingest/cli/cmds/local.py +0 -43
- unstructured_ingest/cli/cmds/mongodb.py +0 -72
- unstructured_ingest/cli/cmds/notion.py +0 -48
- unstructured_ingest/cli/cmds/onedrive.py +0 -66
- unstructured_ingest/cli/cmds/opensearch.py +0 -117
- unstructured_ingest/cli/cmds/outlook.py +0 -67
- unstructured_ingest/cli/cmds/pinecone.py +0 -71
- unstructured_ingest/cli/cmds/qdrant.py +0 -124
- unstructured_ingest/cli/cmds/reddit.py +0 -67
- unstructured_ingest/cli/cmds/salesforce.py +0 -58
- unstructured_ingest/cli/cmds/sharepoint.py +0 -66
- unstructured_ingest/cli/cmds/slack.py +0 -56
- unstructured_ingest/cli/cmds/sql.py +0 -66
- unstructured_ingest/cli/cmds/vectara.py +0 -66
- unstructured_ingest/cli/cmds/weaviate.py +0 -98
- unstructured_ingest/cli/cmds/wikipedia.py +0 -40
- unstructured_ingest/cli/common.py +0 -7
- unstructured_ingest/cli/interfaces.py +0 -663
- unstructured_ingest/cli/utils.py +0 -205
- unstructured_ingest/connector/airtable.py +0 -309
- unstructured_ingest/connector/astradb.py +0 -267
- unstructured_ingest/connector/azure_ai_search.py +0 -144
- unstructured_ingest/connector/biomed.py +0 -320
- unstructured_ingest/connector/chroma.py +0 -158
- unstructured_ingest/connector/clarifai.py +0 -122
- unstructured_ingest/connector/confluence.py +0 -285
- unstructured_ingest/connector/databricks_volumes.py +0 -137
- unstructured_ingest/connector/delta_table.py +0 -203
- unstructured_ingest/connector/discord.py +0 -180
- unstructured_ingest/connector/elasticsearch.py +0 -396
- unstructured_ingest/connector/fsspec/azure.py +0 -78
- unstructured_ingest/connector/fsspec/box.py +0 -109
- unstructured_ingest/connector/fsspec/dropbox.py +0 -160
- unstructured_ingest/connector/fsspec/fsspec.py +0 -359
- unstructured_ingest/connector/fsspec/gcs.py +0 -82
- unstructured_ingest/connector/fsspec/s3.py +0 -62
- unstructured_ingest/connector/fsspec/sftp.py +0 -81
- unstructured_ingest/connector/git.py +0 -124
- unstructured_ingest/connector/github.py +0 -174
- unstructured_ingest/connector/gitlab.py +0 -142
- unstructured_ingest/connector/google_drive.py +0 -348
- unstructured_ingest/connector/hubspot.py +0 -278
- unstructured_ingest/connector/jira.py +0 -469
- unstructured_ingest/connector/kafka.py +0 -293
- unstructured_ingest/connector/local.py +0 -139
- unstructured_ingest/connector/mongodb.py +0 -284
- unstructured_ingest/connector/notion/client.py +0 -248
- unstructured_ingest/connector/notion/connector.py +0 -469
- unstructured_ingest/connector/notion/helpers.py +0 -584
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
- unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
- unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
- unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
- unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
- unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
- unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
- unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
- unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
- unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
- unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
- unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
- unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
- unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
- unstructured_ingest/connector/notion/types/date.py +0 -26
- unstructured_ingest/connector/notion/types/file.py +0 -51
- unstructured_ingest/connector/notion/types/user.py +0 -76
- unstructured_ingest/connector/onedrive.py +0 -232
- unstructured_ingest/connector/opensearch.py +0 -218
- unstructured_ingest/connector/outlook.py +0 -285
- unstructured_ingest/connector/pinecone.py +0 -150
- unstructured_ingest/connector/qdrant.py +0 -144
- unstructured_ingest/connector/reddit.py +0 -166
- unstructured_ingest/connector/registry.py +0 -109
- unstructured_ingest/connector/salesforce.py +0 -301
- unstructured_ingest/connector/sharepoint.py +0 -573
- unstructured_ingest/connector/slack.py +0 -224
- unstructured_ingest/connector/sql.py +0 -199
- unstructured_ingest/connector/vectara.py +0 -253
- unstructured_ingest/connector/weaviate.py +0 -190
- unstructured_ingest/connector/wikipedia.py +0 -208
- unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
- unstructured_ingest/enhanced_dataclass/core.py +0 -99
- unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
- unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
- unstructured_ingest/interfaces.py +0 -852
- unstructured_ingest/pipeline/copy.py +0 -19
- unstructured_ingest/pipeline/doc_factory.py +0 -12
- unstructured_ingest/pipeline/partition.py +0 -60
- unstructured_ingest/pipeline/permissions.py +0 -12
- unstructured_ingest/pipeline/reformat/chunking.py +0 -134
- unstructured_ingest/pipeline/reformat/embedding.py +0 -64
- unstructured_ingest/pipeline/source.py +0 -77
- unstructured_ingest/pipeline/utils.py +0 -6
- unstructured_ingest/pipeline/write.py +0 -18
- unstructured_ingest/processor.py +0 -93
- unstructured_ingest/runner/__init__.py +0 -104
- unstructured_ingest/runner/airtable.py +0 -35
- unstructured_ingest/runner/astradb.py +0 -34
- unstructured_ingest/runner/base_runner.py +0 -89
- unstructured_ingest/runner/biomed.py +0 -45
- unstructured_ingest/runner/confluence.py +0 -35
- unstructured_ingest/runner/delta_table.py +0 -34
- unstructured_ingest/runner/discord.py +0 -35
- unstructured_ingest/runner/elasticsearch.py +0 -40
- unstructured_ingest/runner/fsspec/azure.py +0 -30
- unstructured_ingest/runner/fsspec/box.py +0 -28
- unstructured_ingest/runner/fsspec/dropbox.py +0 -30
- unstructured_ingest/runner/fsspec/fsspec.py +0 -40
- unstructured_ingest/runner/fsspec/gcs.py +0 -28
- unstructured_ingest/runner/fsspec/s3.py +0 -28
- unstructured_ingest/runner/fsspec/sftp.py +0 -28
- unstructured_ingest/runner/github.py +0 -37
- unstructured_ingest/runner/gitlab.py +0 -37
- unstructured_ingest/runner/google_drive.py +0 -35
- unstructured_ingest/runner/hubspot.py +0 -35
- unstructured_ingest/runner/jira.py +0 -35
- unstructured_ingest/runner/kafka.py +0 -34
- unstructured_ingest/runner/local.py +0 -23
- unstructured_ingest/runner/mongodb.py +0 -34
- unstructured_ingest/runner/notion.py +0 -61
- unstructured_ingest/runner/onedrive.py +0 -35
- unstructured_ingest/runner/opensearch.py +0 -40
- unstructured_ingest/runner/outlook.py +0 -33
- unstructured_ingest/runner/reddit.py +0 -35
- unstructured_ingest/runner/salesforce.py +0 -33
- unstructured_ingest/runner/sharepoint.py +0 -35
- unstructured_ingest/runner/slack.py +0 -33
- unstructured_ingest/runner/utils.py +0 -47
- unstructured_ingest/runner/wikipedia.py +0 -35
- unstructured_ingest/runner/writers/__init__.py +0 -48
- unstructured_ingest/runner/writers/astradb.py +0 -22
- unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
- unstructured_ingest/runner/writers/base_writer.py +0 -26
- unstructured_ingest/runner/writers/chroma.py +0 -22
- unstructured_ingest/runner/writers/clarifai.py +0 -19
- unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
- unstructured_ingest/runner/writers/delta_table.py +0 -24
- unstructured_ingest/runner/writers/elasticsearch.py +0 -24
- unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
- unstructured_ingest/runner/writers/fsspec/box.py +0 -21
- unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
- unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
- unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
- unstructured_ingest/runner/writers/kafka.py +0 -21
- unstructured_ingest/runner/writers/mongodb.py +0 -21
- unstructured_ingest/runner/writers/opensearch.py +0 -26
- unstructured_ingest/runner/writers/pinecone.py +0 -21
- unstructured_ingest/runner/writers/qdrant.py +0 -19
- unstructured_ingest/runner/writers/sql.py +0 -22
- unstructured_ingest/runner/writers/vectara.py +0 -22
- unstructured_ingest/runner/writers/weaviate.py +0 -21
- unstructured_ingest/utils/google_filetype.py +0 -9
- unstructured_ingest/v2/__init__.py +0 -1
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +0 -4
- unstructured_ingest/v2/cli/base/cmd.py +0 -269
- unstructured_ingest/v2/cli/base/dest.py +0 -85
- unstructured_ingest/v2/cli/base/src.py +0 -85
- unstructured_ingest/v2/cli/cli.py +0 -24
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/logger.py +0 -126
- unstructured_ingest/v2/main.py +0 -11
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +0 -211
- unstructured_ingest/v2/pipeline/pipeline.py +0 -408
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/v2/processes/utils/__init__.py +0 -0
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
- {test/unit/v2 → examples}/__init__.py +0 -0
- /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
- /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/data_generator.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
- /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
- /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
- /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
- /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
- /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
- /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
- /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
- /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
- /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
- /unstructured_ingest/{v2 → utils}/constants.py +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.0.dist-info}/entry_points.txt +0 -0
|
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.processes.connectors.qdrant.qdrant import (
|
|
7
7
|
QdrantAccessConfig,
|
|
8
8
|
QdrantConnectionConfig,
|
|
9
9
|
QdrantUploader,
|
|
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.processes.connectors.qdrant.qdrant import (
|
|
7
7
|
QdrantAccessConfig,
|
|
8
8
|
QdrantConnectionConfig,
|
|
9
9
|
QdrantUploader,
|
|
@@ -7,10 +7,9 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
|
|
|
7
7
|
|
|
8
8
|
from pydantic import Field, Secret
|
|
9
9
|
|
|
10
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
10
11
|
from unstructured_ingest.error import DestinationConnectionError, WriteError
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
-
from unstructured_ingest.v2.interfaces import (
|
|
12
|
+
from unstructured_ingest.interfaces import (
|
|
14
13
|
AccessConfig,
|
|
15
14
|
ConnectionConfig,
|
|
16
15
|
Uploader,
|
|
@@ -18,9 +17,13 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
18
17
|
UploadStager,
|
|
19
18
|
UploadStagerConfig,
|
|
20
19
|
)
|
|
21
|
-
from unstructured_ingest.
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
|
|
20
|
+
from unstructured_ingest.logger import logger
|
|
21
|
+
from unstructured_ingest.utils.data_prep import (
|
|
22
|
+
batch_generator,
|
|
23
|
+
flatten_dict,
|
|
24
|
+
get_enhanced_element_id,
|
|
25
|
+
)
|
|
26
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
24
27
|
|
|
25
28
|
if TYPE_CHECKING:
|
|
26
29
|
from qdrant_client import AsyncQdrantClient, QdrantClient
|
|
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.processes.connectors.qdrant.qdrant import (
|
|
7
7
|
QdrantAccessConfig,
|
|
8
8
|
QdrantConnectionConfig,
|
|
9
9
|
QdrantUploader,
|
|
@@ -5,18 +5,18 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret, model_validator
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
8
9
|
from unstructured_ingest.error import DestinationConnectionError
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
|
-
from unstructured_ingest.v2.interfaces import (
|
|
10
|
+
from unstructured_ingest.interfaces import (
|
|
12
11
|
AccessConfig,
|
|
13
12
|
ConnectionConfig,
|
|
14
13
|
Uploader,
|
|
15
14
|
UploaderConfig,
|
|
16
15
|
)
|
|
17
|
-
from unstructured_ingest.
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
16
|
+
from unstructured_ingest.logger import logger
|
|
17
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
18
|
+
from unstructured_ingest.utils.data_prep import batch_generator
|
|
19
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
22
|
from redis.asyncio import Redis
|
|
@@ -20,9 +20,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional, Type
|
|
|
20
20
|
from dateutil import parser
|
|
21
21
|
from pydantic import Field, Secret
|
|
22
22
|
|
|
23
|
+
from unstructured_ingest.data_types.file_data import (
|
|
24
|
+
FileData,
|
|
25
|
+
FileDataSourceMetadata,
|
|
26
|
+
SourceIdentifiers,
|
|
27
|
+
)
|
|
23
28
|
from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
from unstructured_ingest.v2.interfaces import (
|
|
29
|
+
from unstructured_ingest.interfaces import (
|
|
26
30
|
AccessConfig,
|
|
27
31
|
ConnectionConfig,
|
|
28
32
|
Downloader,
|
|
@@ -31,15 +35,11 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
31
35
|
Indexer,
|
|
32
36
|
IndexerConfig,
|
|
33
37
|
)
|
|
34
|
-
from unstructured_ingest.
|
|
35
|
-
from unstructured_ingest.
|
|
38
|
+
from unstructured_ingest.logger import logger
|
|
39
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
36
40
|
SourceRegistryEntry,
|
|
37
41
|
)
|
|
38
|
-
from unstructured_ingest.
|
|
39
|
-
FileData,
|
|
40
|
-
FileDataSourceMetadata,
|
|
41
|
-
SourceIdentifiers,
|
|
42
|
-
)
|
|
42
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class MissingCategoryError(Exception):
|
|
@@ -6,16 +6,18 @@ from typing import TYPE_CHECKING, Any, AsyncIterator
|
|
|
6
6
|
|
|
7
7
|
from pydantic import Field
|
|
8
8
|
|
|
9
|
+
from unstructured_ingest.data_types.file_data import (
|
|
10
|
+
FileData,
|
|
11
|
+
)
|
|
9
12
|
from unstructured_ingest.error import (
|
|
10
13
|
SourceConnectionError,
|
|
11
14
|
SourceConnectionNetworkError,
|
|
12
15
|
)
|
|
13
|
-
from unstructured_ingest.
|
|
14
|
-
from unstructured_ingest.
|
|
15
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
16
|
+
from unstructured_ingest.logger import logger
|
|
17
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
16
18
|
SourceRegistryEntry,
|
|
17
19
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
20
|
+
from unstructured_ingest.processes.connectors.onedrive import (
|
|
19
21
|
OnedriveAccessConfig,
|
|
20
22
|
OnedriveConnectionConfig,
|
|
21
23
|
OnedriveDownloader,
|
|
@@ -23,9 +25,7 @@ from unstructured_ingest.v2.processes.connectors.onedrive import (
|
|
|
23
25
|
OnedriveIndexer,
|
|
24
26
|
OnedriveIndexerConfig,
|
|
25
27
|
)
|
|
26
|
-
from unstructured_ingest.
|
|
27
|
-
FileData,
|
|
28
|
-
)
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from office365.onedrive.driveitems.driveItem import DriveItem
|
|
@@ -8,10 +8,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
8
8
|
|
|
9
9
|
from pydantic import Field, Secret
|
|
10
10
|
|
|
11
|
+
from unstructured_ingest.data_types.file_data import (
|
|
12
|
+
FileData,
|
|
13
|
+
FileDataSourceMetadata,
|
|
14
|
+
SourceIdentifiers,
|
|
15
|
+
)
|
|
11
16
|
from unstructured_ingest.error import SourceConnectionError
|
|
12
|
-
from unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
|
-
from unstructured_ingest.v2.interfaces import (
|
|
17
|
+
from unstructured_ingest.interfaces import (
|
|
15
18
|
AccessConfig,
|
|
16
19
|
ConnectionConfig,
|
|
17
20
|
Downloader,
|
|
@@ -20,12 +23,9 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
20
23
|
Indexer,
|
|
21
24
|
IndexerConfig,
|
|
22
25
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
|
|
26
|
-
FileDataSourceMetadata,
|
|
27
|
-
SourceIdentifiers,
|
|
28
|
-
)
|
|
26
|
+
from unstructured_ingest.logger import logger
|
|
27
|
+
from unstructured_ingest.processes.connector_registry import SourceRegistryEntry
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from slack_sdk import WebClient
|
|
@@ -6,13 +6,12 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
6
6
|
|
|
7
7
|
from pydantic import Field, Secret
|
|
8
8
|
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
9
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
10
|
+
from unstructured_ingest.logger import logger
|
|
11
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
13
12
|
DestinationRegistryEntry,
|
|
14
13
|
)
|
|
15
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
16
15
|
SQLAccessConfig,
|
|
17
16
|
SQLConnectionConfig,
|
|
18
17
|
SQLUploader,
|
|
@@ -20,7 +19,8 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
20
19
|
SQLUploadStager,
|
|
21
20
|
SQLUploadStagerConfig,
|
|
22
21
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
22
|
+
from unstructured_ingest.utils.data_prep import split_dataframe
|
|
23
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
24
24
|
|
|
25
25
|
if TYPE_CHECKING:
|
|
26
26
|
from databricks.sdk.core import oauth_service_principal
|
|
@@ -203,7 +203,7 @@ class DatabricksDeltaTablesUploader(SQLUploader):
|
|
|
203
203
|
f" table named {self.upload_config.table_name}"
|
|
204
204
|
# f" with batch size {self.upload_config.batch_size}"
|
|
205
205
|
)
|
|
206
|
-
# TODO: currently variable binding not supporting for list
|
|
206
|
+
# TODO: currently variable binding not supporting for list data_types,
|
|
207
207
|
# update once that gets resolved in SDK
|
|
208
208
|
for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
|
|
209
209
|
with self.get_cursor() as cursor:
|
|
@@ -4,13 +4,12 @@ from typing import TYPE_CHECKING, Generator, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
7
|
+
from unstructured_ingest.logger import logger
|
|
8
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
10
9
|
DestinationRegistryEntry,
|
|
11
10
|
SourceRegistryEntry,
|
|
12
11
|
)
|
|
13
|
-
from unstructured_ingest.
|
|
12
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
14
13
|
SQLAccessConfig,
|
|
15
14
|
SqlBatchFileData,
|
|
16
15
|
SQLConnectionConfig,
|
|
@@ -23,6 +22,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
23
22
|
SQLUploadStager,
|
|
24
23
|
SQLUploadStagerConfig,
|
|
25
24
|
)
|
|
25
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
28
28
|
from psycopg2.extensions import connection as PostgresConnection
|
|
@@ -5,13 +5,12 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
|
+
from unstructured_ingest.logger import logger
|
|
9
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
11
10
|
DestinationRegistryEntry,
|
|
12
11
|
SourceRegistryEntry,
|
|
13
12
|
)
|
|
14
|
-
from unstructured_ingest.
|
|
13
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
15
14
|
_DATE_COLUMNS,
|
|
16
15
|
SQLAccessConfig,
|
|
17
16
|
SqlBatchFileData,
|
|
@@ -26,6 +25,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
26
25
|
SQLUploadStagerConfig,
|
|
27
26
|
parse_date_string,
|
|
28
27
|
)
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from singlestoredb.connection import Connection as SingleStoreConnection
|
|
@@ -5,14 +5,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
9
|
+
from unstructured_ingest.logger import logger
|
|
10
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
12
11
|
DestinationRegistryEntry,
|
|
13
12
|
SourceRegistryEntry,
|
|
14
13
|
)
|
|
15
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
16
15
|
_DATE_COLUMNS,
|
|
17
16
|
SQLAccessConfig,
|
|
18
17
|
SqlBatchFileData,
|
|
@@ -27,7 +26,8 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
27
26
|
SQLUploadStagerConfig,
|
|
28
27
|
parse_date_string,
|
|
29
28
|
)
|
|
30
|
-
from unstructured_ingest.
|
|
29
|
+
from unstructured_ingest.utils.data_prep import split_dataframe
|
|
30
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
33
|
from pandas import DataFrame
|
|
@@ -11,10 +11,15 @@ from typing import TYPE_CHECKING, Any, Generator, Union
|
|
|
11
11
|
from dateutil import parser
|
|
12
12
|
from pydantic import BaseModel, Field, Secret
|
|
13
13
|
|
|
14
|
+
from unstructured_ingest.data_types.file_data import (
|
|
15
|
+
BatchFileData,
|
|
16
|
+
BatchItem,
|
|
17
|
+
FileData,
|
|
18
|
+
FileDataSourceMetadata,
|
|
19
|
+
SourceIdentifiers,
|
|
20
|
+
)
|
|
14
21
|
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
|
|
15
|
-
from unstructured_ingest.
|
|
16
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
17
|
-
from unstructured_ingest.v2.interfaces import (
|
|
22
|
+
from unstructured_ingest.interfaces import (
|
|
18
23
|
AccessConfig,
|
|
19
24
|
ConnectionConfig,
|
|
20
25
|
Downloader,
|
|
@@ -28,15 +33,15 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
28
33
|
UploadStagerConfig,
|
|
29
34
|
download_responses,
|
|
30
35
|
)
|
|
31
|
-
from unstructured_ingest.
|
|
32
|
-
from unstructured_ingest.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
from unstructured_ingest.logger import logger
|
|
37
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
38
|
+
from unstructured_ingest.utils.data_prep import (
|
|
39
|
+
get_data,
|
|
40
|
+
get_data_df,
|
|
41
|
+
get_enhanced_element_id,
|
|
42
|
+
split_dataframe,
|
|
43
|
+
write_data,
|
|
38
44
|
)
|
|
39
|
-
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
40
45
|
|
|
41
46
|
if TYPE_CHECKING:
|
|
42
47
|
from pandas import DataFrame
|
|
@@ -6,13 +6,12 @@ from typing import TYPE_CHECKING, Any, Generator
|
|
|
6
6
|
|
|
7
7
|
from pydantic import Field, Secret, model_validator
|
|
8
8
|
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
9
|
+
from unstructured_ingest.logger import logger
|
|
10
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
12
11
|
DestinationRegistryEntry,
|
|
13
12
|
SourceRegistryEntry,
|
|
14
13
|
)
|
|
15
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
16
15
|
_DATE_COLUMNS,
|
|
17
16
|
SQLAccessConfig,
|
|
18
17
|
SqlBatchFileData,
|
|
@@ -27,6 +26,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
27
26
|
SQLUploadStagerConfig,
|
|
28
27
|
parse_date_string,
|
|
29
28
|
)
|
|
29
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
30
30
|
|
|
31
31
|
if TYPE_CHECKING:
|
|
32
32
|
from sqlite3 import Connection as SqliteConnection
|
|
@@ -4,16 +4,16 @@ from typing import TYPE_CHECKING, Any, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
+
from unstructured_ingest.data_types.file_data import (
|
|
8
|
+
FileData,
|
|
9
|
+
)
|
|
7
10
|
from unstructured_ingest.error import DestinationConnectionError
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
11
|
-
from unstructured_ingest.v2.logger import logger
|
|
12
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
11
|
+
from unstructured_ingest.logger import logger
|
|
12
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
13
13
|
DestinationRegistryEntry,
|
|
14
14
|
SourceRegistryEntry,
|
|
15
15
|
)
|
|
16
|
-
from unstructured_ingest.
|
|
16
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
17
17
|
SQLAccessConfig,
|
|
18
18
|
SqlBatchFileData,
|
|
19
19
|
SQLConnectionConfig,
|
|
@@ -26,10 +26,9 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
26
26
|
SQLUploadStager,
|
|
27
27
|
SQLUploadStagerConfig,
|
|
28
28
|
)
|
|
29
|
-
from unstructured_ingest.
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
29
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
30
|
+
from unstructured_ingest.utils.data_prep import get_enhanced_element_id, split_dataframe
|
|
31
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
33
32
|
|
|
34
33
|
if TYPE_CHECKING:
|
|
35
34
|
from pandas import DataFrame
|
|
@@ -8,10 +8,9 @@ from typing import Any, Dict, Mapping, Optional
|
|
|
8
8
|
|
|
9
9
|
from pydantic import Field, Secret
|
|
10
10
|
|
|
11
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
11
12
|
from unstructured_ingest.error import DestinationConnectionError
|
|
12
|
-
from unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
|
-
from unstructured_ingest.v2.interfaces import (
|
|
13
|
+
from unstructured_ingest.interfaces import (
|
|
15
14
|
AccessConfig,
|
|
16
15
|
ConnectionConfig,
|
|
17
16
|
Uploader,
|
|
@@ -19,9 +18,10 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
19
18
|
UploadStager,
|
|
20
19
|
UploadStagerConfig,
|
|
21
20
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
from unstructured_ingest.
|
|
21
|
+
from unstructured_ingest.logger import logger
|
|
22
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
23
|
+
from unstructured_ingest.utils.data_prep import flatten_dict
|
|
24
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
25
|
|
|
26
26
|
BASE_URL = "https://api.vectara.io/v2"
|
|
27
27
|
|
|
@@ -4,9 +4,8 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.v2.processes.connectors.weaviate.weaviate import (
|
|
7
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
8
|
+
from unstructured_ingest.processes.connectors.weaviate.weaviate import (
|
|
10
9
|
WeaviateAccessConfig,
|
|
11
10
|
WeaviateConnectionConfig,
|
|
12
11
|
WeaviateUploader,
|
|
@@ -14,6 +13,7 @@ from unstructured_ingest.v2.processes.connectors.weaviate.weaviate import (
|
|
|
14
13
|
WeaviateUploadStager,
|
|
15
14
|
WeaviateUploadStagerConfig,
|
|
16
15
|
)
|
|
16
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from weaviate.auth import AuthCredentials
|
|
@@ -4,9 +4,8 @@ from typing import TYPE_CHECKING, Generator, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.v2.processes.connectors.weaviate.weaviate import (
|
|
7
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
8
|
+
from unstructured_ingest.processes.connectors.weaviate.weaviate import (
|
|
10
9
|
WeaviateAccessConfig,
|
|
11
10
|
WeaviateConnectionConfig,
|
|
12
11
|
WeaviateUploader,
|
|
@@ -14,6 +13,7 @@ from unstructured_ingest.v2.processes.connectors.weaviate.weaviate import (
|
|
|
14
13
|
WeaviateUploadStager,
|
|
15
14
|
WeaviateUploadStagerConfig,
|
|
16
15
|
)
|
|
16
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from weaviate.client import WeaviateClient
|
|
@@ -4,9 +4,8 @@ from typing import TYPE_CHECKING, Generator
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.v2.processes.connectors.weaviate.weaviate import (
|
|
7
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
8
|
+
from unstructured_ingest.processes.connectors.weaviate.weaviate import (
|
|
10
9
|
WeaviateAccessConfig,
|
|
11
10
|
WeaviateConnectionConfig,
|
|
12
11
|
WeaviateUploader,
|
|
@@ -14,6 +13,7 @@ from unstructured_ingest.v2.processes.connectors.weaviate.weaviate import (
|
|
|
14
13
|
WeaviateUploadStager,
|
|
15
14
|
WeaviateUploadStagerConfig,
|
|
16
15
|
)
|
|
16
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from weaviate.client import WeaviateClient
|
|
@@ -10,10 +10,9 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
10
10
|
from dateutil import parser
|
|
11
11
|
from pydantic import Field, Secret
|
|
12
12
|
|
|
13
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
13
14
|
from unstructured_ingest.error import DestinationConnectionError, WriteError
|
|
14
|
-
from unstructured_ingest.
|
|
15
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
16
|
-
from unstructured_ingest.v2.interfaces import (
|
|
15
|
+
from unstructured_ingest.interfaces import (
|
|
17
16
|
AccessConfig,
|
|
18
17
|
ConnectionConfig,
|
|
19
18
|
UploaderConfig,
|
|
@@ -21,8 +20,9 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
21
20
|
UploadStagerConfig,
|
|
22
21
|
VectorDBUploader,
|
|
23
22
|
)
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
from unstructured_ingest.
|
|
23
|
+
from unstructured_ingest.logger import logger
|
|
24
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
25
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
28
28
|
from weaviate.classes.init import Timeout
|
|
@@ -4,10 +4,10 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Literal, Optional, Union
|
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel, Field, HttpUrl
|
|
6
6
|
|
|
7
|
+
from unstructured_ingest.errors_v2 import ProviderError, RateLimitError, UserAuthError, UserError
|
|
8
|
+
from unstructured_ingest.logger import logger
|
|
7
9
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
8
10
|
from unstructured_ingest.utils.string_and_date_utils import fix_unescaped_unicode
|
|
9
|
-
from unstructured_ingest.v2.errors import ProviderError, RateLimitError, UserAuthError, UserError
|
|
10
|
-
from unstructured_ingest.v2.logger import logger
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
13
13
|
from httpx import AsyncClient, Client
|
|
@@ -8,9 +8,12 @@ from typing import Any, AsyncGenerator, Literal, Union
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, Field, Secret
|
|
10
10
|
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
from unstructured_ingest.data_types.file_data import (
|
|
12
|
+
FileData,
|
|
13
|
+
FileDataSourceMetadata,
|
|
14
|
+
SourceIdentifiers,
|
|
15
|
+
)
|
|
16
|
+
from unstructured_ingest.interfaces import (
|
|
14
17
|
AccessConfig,
|
|
15
18
|
ConnectionConfig,
|
|
16
19
|
Downloader,
|
|
@@ -19,13 +22,10 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
19
22
|
Indexer,
|
|
20
23
|
IndexerConfig,
|
|
21
24
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
|
|
26
|
-
FileDataSourceMetadata,
|
|
27
|
-
SourceIdentifiers,
|
|
28
|
-
)
|
|
25
|
+
from unstructured_ingest.logger import logger
|
|
26
|
+
from unstructured_ingest.processes.connector_registry import SourceRegistryEntry
|
|
27
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
28
|
+
from unstructured_ingest.utils.html import HtmlMixin
|
|
29
29
|
|
|
30
30
|
from .client import ZendeskArticle, ZendeskClient, ZendeskTicket
|
|
31
31
|
|
|
@@ -5,8 +5,8 @@ from typing import TYPE_CHECKING, Any, Literal, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field, SecretStr
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.interfaces.process import BaseProcess
|
|
8
9
|
from unstructured_ingest.utils.data_prep import get_data
|
|
9
|
-
from unstructured_ingest.v2.interfaces.process import BaseProcess
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
12
|
from unstructured_ingest.embed.interfaces import BaseEmbeddingEncoder
|
|
@@ -5,15 +5,15 @@ from typing import Any, Callable, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
9
|
+
from unstructured_ingest.interfaces.process import BaseProcess
|
|
10
|
+
from unstructured_ingest.logger import logger
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class FiltererConfig(BaseModel):
|
|
14
14
|
file_glob: Optional[list[str]] = Field(
|
|
15
15
|
default=None,
|
|
16
|
-
description="file globs to limit which
|
|
16
|
+
description="file globs to limit which data_types of " "files are accepted",
|
|
17
17
|
examples=["*.pdf", "*.html"],
|
|
18
18
|
)
|
|
19
19
|
max_file_size: Optional[int] = Field(
|