unstructured-ingest 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- examples/airtable.py +44 -0
- examples/azure_cognitive_search.py +55 -0
- examples/chroma.py +54 -0
- examples/couchbase.py +55 -0
- examples/databricks_volumes_dest.py +55 -0
- examples/databricks_volumes_source.py +53 -0
- examples/delta_table.py +45 -0
- examples/discord_example.py +36 -0
- examples/elasticsearch.py +49 -0
- examples/google_drive.py +45 -0
- examples/kdbai.py +54 -0
- examples/local.py +36 -0
- examples/milvus.py +44 -0
- examples/mongodb.py +53 -0
- examples/opensearch.py +50 -0
- examples/pinecone.py +57 -0
- examples/s3.py +38 -0
- examples/salesforce.py +44 -0
- examples/sharepoint.py +47 -0
- examples/singlestore.py +49 -0
- examples/sql.py +90 -0
- examples/vectara.py +54 -0
- examples/weaviate.py +44 -0
- test/integration/chunkers/test_chunkers.py +1 -1
- test/integration/connectors/conftest.py +1 -1
- test/integration/connectors/databricks/test_volumes_native.py +3 -3
- test/integration/connectors/discord/test_discord.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +2 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
- test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
- test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
- test/integration/connectors/sql/test_postgres.py +2 -2
- test/integration/connectors/sql/test_singlestore.py +2 -2
- test/integration/connectors/sql/test_snowflake.py +2 -2
- test/integration/connectors/sql/test_sqlite.py +2 -2
- test/integration/connectors/sql/test_vastdb.py +1 -1
- test/integration/connectors/test_astradb.py +2 -2
- test/integration/connectors/test_azure_ai_search.py +2 -2
- test/integration/connectors/test_chroma.py +2 -2
- test/integration/connectors/test_confluence.py +1 -1
- test/integration/connectors/test_delta_table.py +2 -2
- test/integration/connectors/test_dropbox.py +2 -2
- test/integration/connectors/test_github.py +49 -0
- test/integration/connectors/test_google_drive.py +2 -2
- test/integration/connectors/test_jira.py +1 -1
- test/integration/connectors/test_lancedb.py +7 -7
- test/integration/connectors/test_milvus.py +2 -2
- test/integration/connectors/test_mongodb.py +2 -2
- test/integration/connectors/test_neo4j.py +7 -7
- test/integration/connectors/test_notion.py +2 -2
- test/integration/connectors/test_onedrive.py +2 -2
- test/integration/connectors/test_pinecone.py +3 -3
- test/integration/connectors/test_qdrant.py +6 -6
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +3 -3
- test/integration/connectors/test_sharepoint.py +1 -1
- test/integration/connectors/test_vectara.py +4 -4
- test/integration/connectors/test_zendesk.py +2 -2
- test/integration/connectors/utils/validation/destination.py +2 -2
- test/integration/connectors/utils/validation/source.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +1 -1
- test/integration/connectors/weaviate/test_local.py +2 -2
- test/integration/embedders/test_azure_openai.py +1 -1
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -1
- test/integration/embedders/test_mixedbread.py +1 -1
- test/integration/embedders/test_octoai.py +2 -2
- test/integration/embedders/test_openai.py +2 -2
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +1 -1
- test/integration/embedders/test_voyageai.py +1 -1
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
- test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
- test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
- test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
- test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
- test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
- test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
- test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
- test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
- test/unit/test_html.py +1 -1
- test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
- test/unit/test_utils.py +106 -97
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/__init__.py +0 -14
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +259 -9
- unstructured_ingest/cli/base/dest.py +58 -61
- unstructured_ingest/cli/base/src.py +54 -36
- unstructured_ingest/cli/cli.py +4 -17
- unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
- unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
- unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
- unstructured_ingest/embed/bedrock.py +3 -3
- unstructured_ingest/embed/octoai.py +3 -3
- unstructured_ingest/embed/openai.py +3 -3
- unstructured_ingest/embed/togetherai.py +4 -4
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +4 -4
- unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
- unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
- unstructured_ingest/{v2/otel.py → otel.py} +1 -1
- unstructured_ingest/pipeline/__init__.py +0 -22
- unstructured_ingest/pipeline/interfaces.py +179 -238
- unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
- unstructured_ingest/pipeline/pipeline.py +388 -97
- unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
- unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +14 -11
- unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
- unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +12 -11
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/processes/connectors/github.py +221 -0
- unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
- unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
- unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
- unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +11 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
- unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
- unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
- unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
- unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
- unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
- unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
- unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
- unstructured_ingest/utils/compression.py +1 -48
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/utils/html.py +3 -3
- unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
- unstructured_ingest/utils/string_and_date_utils.py +1 -1
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/METADATA +99 -99
- unstructured_ingest-0.7.0.dist-info/RECORD +370 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/top_level.txt +1 -0
- test/unit/v2/test_utils.py +0 -82
- unstructured_ingest/cli/cmd_factory.py +0 -12
- unstructured_ingest/cli/cmds/__init__.py +0 -145
- unstructured_ingest/cli/cmds/airtable.py +0 -69
- unstructured_ingest/cli/cmds/astradb.py +0 -99
- unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
- unstructured_ingest/cli/cmds/biomed.py +0 -52
- unstructured_ingest/cli/cmds/chroma.py +0 -104
- unstructured_ingest/cli/cmds/clarifai.py +0 -71
- unstructured_ingest/cli/cmds/confluence.py +0 -69
- unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
- unstructured_ingest/cli/cmds/delta_table.py +0 -94
- unstructured_ingest/cli/cmds/discord.py +0 -47
- unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
- unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
- unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
- unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
- unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
- unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
- unstructured_ingest/cli/cmds/github.py +0 -54
- unstructured_ingest/cli/cmds/gitlab.py +0 -54
- unstructured_ingest/cli/cmds/google_drive.py +0 -49
- unstructured_ingest/cli/cmds/hubspot.py +0 -70
- unstructured_ingest/cli/cmds/jira.py +0 -71
- unstructured_ingest/cli/cmds/kafka.py +0 -102
- unstructured_ingest/cli/cmds/local.py +0 -43
- unstructured_ingest/cli/cmds/mongodb.py +0 -72
- unstructured_ingest/cli/cmds/notion.py +0 -48
- unstructured_ingest/cli/cmds/onedrive.py +0 -66
- unstructured_ingest/cli/cmds/opensearch.py +0 -117
- unstructured_ingest/cli/cmds/outlook.py +0 -67
- unstructured_ingest/cli/cmds/pinecone.py +0 -71
- unstructured_ingest/cli/cmds/qdrant.py +0 -124
- unstructured_ingest/cli/cmds/reddit.py +0 -67
- unstructured_ingest/cli/cmds/salesforce.py +0 -58
- unstructured_ingest/cli/cmds/sharepoint.py +0 -66
- unstructured_ingest/cli/cmds/slack.py +0 -56
- unstructured_ingest/cli/cmds/sql.py +0 -66
- unstructured_ingest/cli/cmds/vectara.py +0 -66
- unstructured_ingest/cli/cmds/weaviate.py +0 -98
- unstructured_ingest/cli/cmds/wikipedia.py +0 -40
- unstructured_ingest/cli/common.py +0 -7
- unstructured_ingest/cli/interfaces.py +0 -663
- unstructured_ingest/cli/utils.py +0 -205
- unstructured_ingest/connector/airtable.py +0 -309
- unstructured_ingest/connector/astradb.py +0 -267
- unstructured_ingest/connector/azure_ai_search.py +0 -144
- unstructured_ingest/connector/biomed.py +0 -320
- unstructured_ingest/connector/chroma.py +0 -158
- unstructured_ingest/connector/clarifai.py +0 -122
- unstructured_ingest/connector/confluence.py +0 -285
- unstructured_ingest/connector/databricks_volumes.py +0 -137
- unstructured_ingest/connector/delta_table.py +0 -203
- unstructured_ingest/connector/discord.py +0 -180
- unstructured_ingest/connector/elasticsearch.py +0 -396
- unstructured_ingest/connector/fsspec/azure.py +0 -78
- unstructured_ingest/connector/fsspec/box.py +0 -109
- unstructured_ingest/connector/fsspec/dropbox.py +0 -160
- unstructured_ingest/connector/fsspec/fsspec.py +0 -359
- unstructured_ingest/connector/fsspec/gcs.py +0 -82
- unstructured_ingest/connector/fsspec/s3.py +0 -62
- unstructured_ingest/connector/fsspec/sftp.py +0 -81
- unstructured_ingest/connector/git.py +0 -124
- unstructured_ingest/connector/github.py +0 -174
- unstructured_ingest/connector/gitlab.py +0 -142
- unstructured_ingest/connector/google_drive.py +0 -348
- unstructured_ingest/connector/hubspot.py +0 -278
- unstructured_ingest/connector/jira.py +0 -469
- unstructured_ingest/connector/kafka.py +0 -293
- unstructured_ingest/connector/local.py +0 -139
- unstructured_ingest/connector/mongodb.py +0 -284
- unstructured_ingest/connector/notion/client.py +0 -248
- unstructured_ingest/connector/notion/connector.py +0 -469
- unstructured_ingest/connector/notion/helpers.py +0 -584
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
- unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
- unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
- unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
- unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
- unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
- unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
- unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
- unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
- unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
- unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
- unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
- unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
- unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
- unstructured_ingest/connector/notion/types/date.py +0 -26
- unstructured_ingest/connector/notion/types/file.py +0 -51
- unstructured_ingest/connector/notion/types/user.py +0 -76
- unstructured_ingest/connector/onedrive.py +0 -232
- unstructured_ingest/connector/opensearch.py +0 -218
- unstructured_ingest/connector/outlook.py +0 -285
- unstructured_ingest/connector/pinecone.py +0 -150
- unstructured_ingest/connector/qdrant.py +0 -144
- unstructured_ingest/connector/reddit.py +0 -166
- unstructured_ingest/connector/registry.py +0 -109
- unstructured_ingest/connector/salesforce.py +0 -301
- unstructured_ingest/connector/sharepoint.py +0 -573
- unstructured_ingest/connector/slack.py +0 -224
- unstructured_ingest/connector/sql.py +0 -199
- unstructured_ingest/connector/vectara.py +0 -253
- unstructured_ingest/connector/weaviate.py +0 -190
- unstructured_ingest/connector/wikipedia.py +0 -208
- unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
- unstructured_ingest/enhanced_dataclass/core.py +0 -99
- unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
- unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
- unstructured_ingest/interfaces.py +0 -852
- unstructured_ingest/pipeline/copy.py +0 -19
- unstructured_ingest/pipeline/doc_factory.py +0 -12
- unstructured_ingest/pipeline/partition.py +0 -60
- unstructured_ingest/pipeline/permissions.py +0 -12
- unstructured_ingest/pipeline/reformat/chunking.py +0 -134
- unstructured_ingest/pipeline/reformat/embedding.py +0 -64
- unstructured_ingest/pipeline/source.py +0 -77
- unstructured_ingest/pipeline/utils.py +0 -6
- unstructured_ingest/pipeline/write.py +0 -18
- unstructured_ingest/processor.py +0 -93
- unstructured_ingest/runner/__init__.py +0 -104
- unstructured_ingest/runner/airtable.py +0 -35
- unstructured_ingest/runner/astradb.py +0 -34
- unstructured_ingest/runner/base_runner.py +0 -89
- unstructured_ingest/runner/biomed.py +0 -45
- unstructured_ingest/runner/confluence.py +0 -35
- unstructured_ingest/runner/delta_table.py +0 -34
- unstructured_ingest/runner/discord.py +0 -35
- unstructured_ingest/runner/elasticsearch.py +0 -40
- unstructured_ingest/runner/fsspec/azure.py +0 -30
- unstructured_ingest/runner/fsspec/box.py +0 -28
- unstructured_ingest/runner/fsspec/dropbox.py +0 -30
- unstructured_ingest/runner/fsspec/fsspec.py +0 -40
- unstructured_ingest/runner/fsspec/gcs.py +0 -28
- unstructured_ingest/runner/fsspec/s3.py +0 -28
- unstructured_ingest/runner/fsspec/sftp.py +0 -28
- unstructured_ingest/runner/github.py +0 -37
- unstructured_ingest/runner/gitlab.py +0 -37
- unstructured_ingest/runner/google_drive.py +0 -35
- unstructured_ingest/runner/hubspot.py +0 -35
- unstructured_ingest/runner/jira.py +0 -35
- unstructured_ingest/runner/kafka.py +0 -34
- unstructured_ingest/runner/local.py +0 -23
- unstructured_ingest/runner/mongodb.py +0 -34
- unstructured_ingest/runner/notion.py +0 -61
- unstructured_ingest/runner/onedrive.py +0 -35
- unstructured_ingest/runner/opensearch.py +0 -40
- unstructured_ingest/runner/outlook.py +0 -33
- unstructured_ingest/runner/reddit.py +0 -35
- unstructured_ingest/runner/salesforce.py +0 -33
- unstructured_ingest/runner/sharepoint.py +0 -35
- unstructured_ingest/runner/slack.py +0 -33
- unstructured_ingest/runner/utils.py +0 -47
- unstructured_ingest/runner/wikipedia.py +0 -35
- unstructured_ingest/runner/writers/__init__.py +0 -48
- unstructured_ingest/runner/writers/astradb.py +0 -22
- unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
- unstructured_ingest/runner/writers/base_writer.py +0 -26
- unstructured_ingest/runner/writers/chroma.py +0 -22
- unstructured_ingest/runner/writers/clarifai.py +0 -19
- unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
- unstructured_ingest/runner/writers/delta_table.py +0 -24
- unstructured_ingest/runner/writers/elasticsearch.py +0 -24
- unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
- unstructured_ingest/runner/writers/fsspec/box.py +0 -21
- unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
- unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
- unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
- unstructured_ingest/runner/writers/kafka.py +0 -21
- unstructured_ingest/runner/writers/mongodb.py +0 -21
- unstructured_ingest/runner/writers/opensearch.py +0 -26
- unstructured_ingest/runner/writers/pinecone.py +0 -21
- unstructured_ingest/runner/writers/qdrant.py +0 -19
- unstructured_ingest/runner/writers/sql.py +0 -22
- unstructured_ingest/runner/writers/vectara.py +0 -22
- unstructured_ingest/runner/writers/weaviate.py +0 -21
- unstructured_ingest/utils/google_filetype.py +0 -9
- unstructured_ingest/v2/__init__.py +0 -1
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +0 -4
- unstructured_ingest/v2/cli/base/cmd.py +0 -269
- unstructured_ingest/v2/cli/base/dest.py +0 -85
- unstructured_ingest/v2/cli/base/src.py +0 -85
- unstructured_ingest/v2/cli/cli.py +0 -24
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/logger.py +0 -126
- unstructured_ingest/v2/main.py +0 -11
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +0 -211
- unstructured_ingest/v2/pipeline/pipeline.py +0 -408
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/v2/processes/utils/__init__.py +0 -0
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest-0.6.2.dist-info/RECORD +0 -589
- {test/unit/v2 → examples}/__init__.py +0 -0
- /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
- /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/data_generator.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
- /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
- /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
- /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
- /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
- /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
- /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
- /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
- /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
- /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
- /unstructured_ingest/{v2 → utils}/constants.py +0 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# https://developers.notion.com/reference/parent-object
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
|
|
4
|
-
from unstructured_ingest.
|
|
4
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import FromJSONMixin
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
# https://developers.notion.com/reference/parent-object#database-parent
|
|
@@ -6,12 +6,12 @@ from htmlBuilder.attributes import Href, Style
|
|
|
6
6
|
from htmlBuilder.tags import A, B, Code, Div, HtmlTag, I, S, Span, U
|
|
7
7
|
from htmlBuilder.tags import Text as HtmlText
|
|
8
8
|
|
|
9
|
-
from unstructured_ingest.
|
|
9
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import (
|
|
10
10
|
FromJSONMixin,
|
|
11
11
|
GetHTMLMixin,
|
|
12
12
|
)
|
|
13
|
-
from unstructured_ingest.
|
|
14
|
-
from unstructured_ingest.
|
|
13
|
+
from unstructured_ingest.processes.connectors.notion.types.date import Date
|
|
14
|
+
from unstructured_ingest.processes.connectors.notion.types.user import People
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
@dataclass
|
|
@@ -5,7 +5,7 @@ from typing import Optional
|
|
|
5
5
|
from htmlBuilder.attributes import Href
|
|
6
6
|
from htmlBuilder.tags import A, Div, HtmlTag
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import (
|
|
9
9
|
FromJSONMixin,
|
|
10
10
|
GetHTMLMixin,
|
|
11
11
|
)
|
|
@@ -10,13 +10,17 @@ from typing import TYPE_CHECKING, Any, AsyncIterator, Optional
|
|
|
10
10
|
from dateutil import parser
|
|
11
11
|
from pydantic import Field, Secret
|
|
12
12
|
|
|
13
|
+
from unstructured_ingest.data_types.file_data import (
|
|
14
|
+
FileData,
|
|
15
|
+
FileDataSourceMetadata,
|
|
16
|
+
SourceIdentifiers,
|
|
17
|
+
)
|
|
13
18
|
from unstructured_ingest.error import (
|
|
14
19
|
DestinationConnectionError,
|
|
15
20
|
SourceConnectionError,
|
|
16
21
|
SourceConnectionNetworkError,
|
|
17
22
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.v2.interfaces import (
|
|
23
|
+
from unstructured_ingest.interfaces import (
|
|
20
24
|
AccessConfig,
|
|
21
25
|
ConnectionConfig,
|
|
22
26
|
Downloader,
|
|
@@ -27,20 +31,16 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
27
31
|
Uploader,
|
|
28
32
|
UploaderConfig,
|
|
29
33
|
)
|
|
30
|
-
from unstructured_ingest.
|
|
31
|
-
from unstructured_ingest.
|
|
34
|
+
from unstructured_ingest.logger import logger
|
|
35
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
32
36
|
DestinationRegistryEntry,
|
|
33
37
|
SourceRegistryEntry,
|
|
34
38
|
)
|
|
35
|
-
from unstructured_ingest.
|
|
39
|
+
from unstructured_ingest.processes.utils.blob_storage import (
|
|
36
40
|
BlobStoreUploadStager,
|
|
37
41
|
BlobStoreUploadStagerConfig,
|
|
38
42
|
)
|
|
39
|
-
from unstructured_ingest.
|
|
40
|
-
FileData,
|
|
41
|
-
FileDataSourceMetadata,
|
|
42
|
-
SourceIdentifiers,
|
|
43
|
-
)
|
|
43
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
44
44
|
|
|
45
45
|
if TYPE_CHECKING:
|
|
46
46
|
from office365.graph_client import GraphClient
|
|
@@ -7,10 +7,13 @@ from typing import TYPE_CHECKING, Any, Coroutine, Generator
|
|
|
7
7
|
|
|
8
8
|
from pydantic import Field, Secret
|
|
9
9
|
|
|
10
|
+
from unstructured_ingest.data_types.file_data import (
|
|
11
|
+
FileData,
|
|
12
|
+
FileDataSourceMetadata,
|
|
13
|
+
SourceIdentifiers,
|
|
14
|
+
)
|
|
10
15
|
from unstructured_ingest.error import SourceConnectionError
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
-
from unstructured_ingest.v2.interfaces import (
|
|
16
|
+
from unstructured_ingest.interfaces import (
|
|
14
17
|
AccessConfig,
|
|
15
18
|
ConnectionConfig,
|
|
16
19
|
Downloader,
|
|
@@ -19,12 +22,9 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
19
22
|
Indexer,
|
|
20
23
|
IndexerConfig,
|
|
21
24
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
|
|
25
|
-
FileDataSourceMetadata,
|
|
26
|
-
SourceIdentifiers,
|
|
27
|
-
)
|
|
25
|
+
from unstructured_ingest.logger import logger
|
|
26
|
+
from unstructured_ingest.processes.connector_registry import SourceRegistryEntry
|
|
27
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
28
28
|
|
|
29
29
|
MAX_EMAILS_PER_FOLDER = 1_000_000 # Maximum number of emails per folder
|
|
30
30
|
|
|
@@ -5,12 +5,10 @@ from typing import TYPE_CHECKING, Any, Literal, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
8
9
|
from unstructured_ingest.error import DestinationConnectionError
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
12
|
-
from unstructured_ingest.v2.errors import UserError
|
|
13
|
-
from unstructured_ingest.v2.interfaces import (
|
|
10
|
+
from unstructured_ingest.errors_v2 import UserError
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
14
12
|
AccessConfig,
|
|
15
13
|
ConnectionConfig,
|
|
16
14
|
UploaderConfig,
|
|
@@ -18,10 +16,15 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
18
16
|
UploadStagerConfig,
|
|
19
17
|
VectorDBUploader,
|
|
20
18
|
)
|
|
21
|
-
from unstructured_ingest.
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
from unstructured_ingest.
|
|
19
|
+
from unstructured_ingest.logger import logger
|
|
20
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
21
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
22
|
+
from unstructured_ingest.utils.data_prep import (
|
|
23
|
+
flatten_dict,
|
|
24
|
+
generator_batching_wbytes,
|
|
25
|
+
get_enhanced_element_id,
|
|
26
|
+
)
|
|
27
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
28
|
|
|
26
29
|
if TYPE_CHECKING:
|
|
27
30
|
from pinecone import Index as PineconeIndex
|
|
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.processes.connectors.qdrant.qdrant import (
|
|
7
7
|
QdrantAccessConfig,
|
|
8
8
|
QdrantConnectionConfig,
|
|
9
9
|
QdrantUploader,
|
|
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.processes.connectors.qdrant.qdrant import (
|
|
7
7
|
QdrantAccessConfig,
|
|
8
8
|
QdrantConnectionConfig,
|
|
9
9
|
QdrantUploader,
|
|
@@ -7,10 +7,9 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
|
|
|
7
7
|
|
|
8
8
|
from pydantic import Field, Secret
|
|
9
9
|
|
|
10
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
10
11
|
from unstructured_ingest.error import DestinationConnectionError, WriteError
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
-
from unstructured_ingest.v2.interfaces import (
|
|
12
|
+
from unstructured_ingest.interfaces import (
|
|
14
13
|
AccessConfig,
|
|
15
14
|
ConnectionConfig,
|
|
16
15
|
Uploader,
|
|
@@ -18,9 +17,13 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
18
17
|
UploadStager,
|
|
19
18
|
UploadStagerConfig,
|
|
20
19
|
)
|
|
21
|
-
from unstructured_ingest.
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
|
|
20
|
+
from unstructured_ingest.logger import logger
|
|
21
|
+
from unstructured_ingest.utils.data_prep import (
|
|
22
|
+
batch_generator,
|
|
23
|
+
flatten_dict,
|
|
24
|
+
get_enhanced_element_id,
|
|
25
|
+
)
|
|
26
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
24
27
|
|
|
25
28
|
if TYPE_CHECKING:
|
|
26
29
|
from qdrant_client import AsyncQdrantClient, QdrantClient
|
|
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.processes.connectors.qdrant.qdrant import (
|
|
7
7
|
QdrantAccessConfig,
|
|
8
8
|
QdrantConnectionConfig,
|
|
9
9
|
QdrantUploader,
|
|
@@ -5,18 +5,18 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret, model_validator
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
8
9
|
from unstructured_ingest.error import DestinationConnectionError
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
|
-
from unstructured_ingest.v2.interfaces import (
|
|
10
|
+
from unstructured_ingest.interfaces import (
|
|
12
11
|
AccessConfig,
|
|
13
12
|
ConnectionConfig,
|
|
14
13
|
Uploader,
|
|
15
14
|
UploaderConfig,
|
|
16
15
|
)
|
|
17
|
-
from unstructured_ingest.
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
16
|
+
from unstructured_ingest.logger import logger
|
|
17
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
18
|
+
from unstructured_ingest.utils.data_prep import batch_generator
|
|
19
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
22
|
from redis.asyncio import Redis
|
|
@@ -20,9 +20,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional, Type
|
|
|
20
20
|
from dateutil import parser
|
|
21
21
|
from pydantic import Field, Secret
|
|
22
22
|
|
|
23
|
+
from unstructured_ingest.data_types.file_data import (
|
|
24
|
+
FileData,
|
|
25
|
+
FileDataSourceMetadata,
|
|
26
|
+
SourceIdentifiers,
|
|
27
|
+
)
|
|
23
28
|
from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
from unstructured_ingest.v2.interfaces import (
|
|
29
|
+
from unstructured_ingest.interfaces import (
|
|
26
30
|
AccessConfig,
|
|
27
31
|
ConnectionConfig,
|
|
28
32
|
Downloader,
|
|
@@ -31,15 +35,11 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
31
35
|
Indexer,
|
|
32
36
|
IndexerConfig,
|
|
33
37
|
)
|
|
34
|
-
from unstructured_ingest.
|
|
35
|
-
from unstructured_ingest.
|
|
38
|
+
from unstructured_ingest.logger import logger
|
|
39
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
36
40
|
SourceRegistryEntry,
|
|
37
41
|
)
|
|
38
|
-
from unstructured_ingest.
|
|
39
|
-
FileData,
|
|
40
|
-
FileDataSourceMetadata,
|
|
41
|
-
SourceIdentifiers,
|
|
42
|
-
)
|
|
42
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class MissingCategoryError(Exception):
|
|
@@ -6,16 +6,18 @@ from typing import TYPE_CHECKING, Any, AsyncIterator
|
|
|
6
6
|
|
|
7
7
|
from pydantic import Field
|
|
8
8
|
|
|
9
|
+
from unstructured_ingest.data_types.file_data import (
|
|
10
|
+
FileData,
|
|
11
|
+
)
|
|
9
12
|
from unstructured_ingest.error import (
|
|
10
13
|
SourceConnectionError,
|
|
11
14
|
SourceConnectionNetworkError,
|
|
12
15
|
)
|
|
13
|
-
from unstructured_ingest.
|
|
14
|
-
from unstructured_ingest.
|
|
15
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
16
|
+
from unstructured_ingest.logger import logger
|
|
17
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
16
18
|
SourceRegistryEntry,
|
|
17
19
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
20
|
+
from unstructured_ingest.processes.connectors.onedrive import (
|
|
19
21
|
OnedriveAccessConfig,
|
|
20
22
|
OnedriveConnectionConfig,
|
|
21
23
|
OnedriveDownloader,
|
|
@@ -23,9 +25,7 @@ from unstructured_ingest.v2.processes.connectors.onedrive import (
|
|
|
23
25
|
OnedriveIndexer,
|
|
24
26
|
OnedriveIndexerConfig,
|
|
25
27
|
)
|
|
26
|
-
from unstructured_ingest.
|
|
27
|
-
FileData,
|
|
28
|
-
)
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from office365.onedrive.driveitems.driveItem import DriveItem
|
|
@@ -8,10 +8,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
8
8
|
|
|
9
9
|
from pydantic import Field, Secret
|
|
10
10
|
|
|
11
|
+
from unstructured_ingest.data_types.file_data import (
|
|
12
|
+
FileData,
|
|
13
|
+
FileDataSourceMetadata,
|
|
14
|
+
SourceIdentifiers,
|
|
15
|
+
)
|
|
11
16
|
from unstructured_ingest.error import SourceConnectionError
|
|
12
|
-
from unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
|
-
from unstructured_ingest.v2.interfaces import (
|
|
17
|
+
from unstructured_ingest.interfaces import (
|
|
15
18
|
AccessConfig,
|
|
16
19
|
ConnectionConfig,
|
|
17
20
|
Downloader,
|
|
@@ -20,12 +23,9 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
20
23
|
Indexer,
|
|
21
24
|
IndexerConfig,
|
|
22
25
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
from unstructured_ingest.
|
|
25
|
-
|
|
26
|
-
FileDataSourceMetadata,
|
|
27
|
-
SourceIdentifiers,
|
|
28
|
-
)
|
|
26
|
+
from unstructured_ingest.logger import logger
|
|
27
|
+
from unstructured_ingest.processes.connector_registry import SourceRegistryEntry
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from slack_sdk import WebClient
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import os
|
|
2
3
|
from contextlib import contextmanager
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
5
6
|
|
|
6
7
|
from pydantic import Field, Secret
|
|
7
8
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
9
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
10
|
+
from unstructured_ingest.logger import logger
|
|
11
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
12
12
|
DestinationRegistryEntry,
|
|
13
13
|
)
|
|
14
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
15
15
|
SQLAccessConfig,
|
|
16
16
|
SQLConnectionConfig,
|
|
17
17
|
SQLUploader,
|
|
@@ -19,7 +19,8 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
19
19
|
SQLUploadStager,
|
|
20
20
|
SQLUploadStagerConfig,
|
|
21
21
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
22
|
+
from unstructured_ingest.utils.data_prep import split_dataframe
|
|
23
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
23
24
|
|
|
24
25
|
if TYPE_CHECKING:
|
|
25
26
|
from databricks.sdk.core import oauth_service_principal
|
|
@@ -42,7 +43,6 @@ class DatabricksDeltaTablesConnectionConfig(SQLConnectionConfig):
|
|
|
42
43
|
access_config: Secret[DatabricksDeltaTablesAccessConfig]
|
|
43
44
|
server_hostname: str = Field(description="server hostname connection config value")
|
|
44
45
|
http_path: str = Field(description="http path connection config value")
|
|
45
|
-
user_agent: str = "unstructuredio_oss"
|
|
46
46
|
|
|
47
47
|
@requires_dependencies(["databricks"], extras="databricks-delta-tables")
|
|
48
48
|
def get_credentials_provider(self) -> "oauth_service_principal":
|
|
@@ -86,7 +86,9 @@ class DatabricksDeltaTablesConnectionConfig(SQLConnectionConfig):
|
|
|
86
86
|
from databricks.sql import connect
|
|
87
87
|
|
|
88
88
|
connect_kwargs = connect_kwargs or {}
|
|
89
|
-
connect_kwargs["_user_agent_entry"] =
|
|
89
|
+
connect_kwargs["_user_agent_entry"] = os.getenv(
|
|
90
|
+
"UNSTRUCTURED_USER_AGENT", "unstructuredio_oss"
|
|
91
|
+
)
|
|
90
92
|
connect_kwargs["server_hostname"] = connect_kwargs.get(
|
|
91
93
|
"server_hostname", self.server_hostname
|
|
92
94
|
)
|
|
@@ -201,7 +203,7 @@ class DatabricksDeltaTablesUploader(SQLUploader):
|
|
|
201
203
|
f" table named {self.upload_config.table_name}"
|
|
202
204
|
# f" with batch size {self.upload_config.batch_size}"
|
|
203
205
|
)
|
|
204
|
-
# TODO: currently variable binding not supporting for list
|
|
206
|
+
# TODO: currently variable binding not supporting for list data_types,
|
|
205
207
|
# update once that gets resolved in SDK
|
|
206
208
|
for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
|
|
207
209
|
with self.get_cursor() as cursor:
|
|
@@ -4,13 +4,12 @@ from typing import TYPE_CHECKING, Generator, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
7
|
+
from unstructured_ingest.logger import logger
|
|
8
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
10
9
|
DestinationRegistryEntry,
|
|
11
10
|
SourceRegistryEntry,
|
|
12
11
|
)
|
|
13
|
-
from unstructured_ingest.
|
|
12
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
14
13
|
SQLAccessConfig,
|
|
15
14
|
SqlBatchFileData,
|
|
16
15
|
SQLConnectionConfig,
|
|
@@ -23,6 +22,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
23
22
|
SQLUploadStager,
|
|
24
23
|
SQLUploadStagerConfig,
|
|
25
24
|
)
|
|
25
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
28
28
|
from psycopg2.extensions import connection as PostgresConnection
|
|
@@ -5,13 +5,12 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
|
+
from unstructured_ingest.logger import logger
|
|
9
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
11
10
|
DestinationRegistryEntry,
|
|
12
11
|
SourceRegistryEntry,
|
|
13
12
|
)
|
|
14
|
-
from unstructured_ingest.
|
|
13
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
15
14
|
_DATE_COLUMNS,
|
|
16
15
|
SQLAccessConfig,
|
|
17
16
|
SqlBatchFileData,
|
|
@@ -26,6 +25,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
26
25
|
SQLUploadStagerConfig,
|
|
27
26
|
parse_date_string,
|
|
28
27
|
)
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from singlestoredb.connection import Connection as SingleStoreConnection
|
|
@@ -5,14 +5,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
9
|
+
from unstructured_ingest.logger import logger
|
|
10
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
12
11
|
DestinationRegistryEntry,
|
|
13
12
|
SourceRegistryEntry,
|
|
14
13
|
)
|
|
15
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
16
15
|
_DATE_COLUMNS,
|
|
17
16
|
SQLAccessConfig,
|
|
18
17
|
SqlBatchFileData,
|
|
@@ -27,7 +26,8 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
27
26
|
SQLUploadStagerConfig,
|
|
28
27
|
parse_date_string,
|
|
29
28
|
)
|
|
30
|
-
from unstructured_ingest.
|
|
29
|
+
from unstructured_ingest.utils.data_prep import split_dataframe
|
|
30
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
33
|
from pandas import DataFrame
|
|
@@ -11,10 +11,15 @@ from typing import TYPE_CHECKING, Any, Generator, Union
|
|
|
11
11
|
from dateutil import parser
|
|
12
12
|
from pydantic import BaseModel, Field, Secret
|
|
13
13
|
|
|
14
|
+
from unstructured_ingest.data_types.file_data import (
|
|
15
|
+
BatchFileData,
|
|
16
|
+
BatchItem,
|
|
17
|
+
FileData,
|
|
18
|
+
FileDataSourceMetadata,
|
|
19
|
+
SourceIdentifiers,
|
|
20
|
+
)
|
|
14
21
|
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
|
|
15
|
-
from unstructured_ingest.
|
|
16
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
17
|
-
from unstructured_ingest.v2.interfaces import (
|
|
22
|
+
from unstructured_ingest.interfaces import (
|
|
18
23
|
AccessConfig,
|
|
19
24
|
ConnectionConfig,
|
|
20
25
|
Downloader,
|
|
@@ -28,15 +33,15 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
28
33
|
UploadStagerConfig,
|
|
29
34
|
download_responses,
|
|
30
35
|
)
|
|
31
|
-
from unstructured_ingest.
|
|
32
|
-
from unstructured_ingest.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
from unstructured_ingest.logger import logger
|
|
37
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
38
|
+
from unstructured_ingest.utils.data_prep import (
|
|
39
|
+
get_data,
|
|
40
|
+
get_data_df,
|
|
41
|
+
get_enhanced_element_id,
|
|
42
|
+
split_dataframe,
|
|
43
|
+
write_data,
|
|
38
44
|
)
|
|
39
|
-
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
40
45
|
|
|
41
46
|
if TYPE_CHECKING:
|
|
42
47
|
from pandas import DataFrame
|
|
@@ -6,13 +6,12 @@ from typing import TYPE_CHECKING, Any, Generator
|
|
|
6
6
|
|
|
7
7
|
from pydantic import Field, Secret, model_validator
|
|
8
8
|
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
9
|
+
from unstructured_ingest.logger import logger
|
|
10
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
12
11
|
DestinationRegistryEntry,
|
|
13
12
|
SourceRegistryEntry,
|
|
14
13
|
)
|
|
15
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
16
15
|
_DATE_COLUMNS,
|
|
17
16
|
SQLAccessConfig,
|
|
18
17
|
SqlBatchFileData,
|
|
@@ -27,6 +26,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
27
26
|
SQLUploadStagerConfig,
|
|
28
27
|
parse_date_string,
|
|
29
28
|
)
|
|
29
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
30
30
|
|
|
31
31
|
if TYPE_CHECKING:
|
|
32
32
|
from sqlite3 import Connection as SqliteConnection
|
|
@@ -4,16 +4,16 @@ from typing import TYPE_CHECKING, Any, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
7
|
+
from unstructured_ingest.data_types.file_data import (
|
|
8
|
+
FileData,
|
|
9
|
+
)
|
|
7
10
|
from unstructured_ingest.error import DestinationConnectionError
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
11
|
-
from unstructured_ingest.v2.logger import logger
|
|
12
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
11
|
+
from unstructured_ingest.logger import logger
|
|
12
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
13
13
|
DestinationRegistryEntry,
|
|
14
14
|
SourceRegistryEntry,
|
|
15
15
|
)
|
|
16
|
-
from unstructured_ingest.
|
|
16
|
+
from unstructured_ingest.processes.connectors.sql.sql import (
|
|
17
17
|
SQLAccessConfig,
|
|
18
18
|
SqlBatchFileData,
|
|
19
19
|
SQLConnectionConfig,
|
|
@@ -26,10 +26,9 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
26
26
|
SQLUploadStager,
|
|
27
27
|
SQLUploadStagerConfig,
|
|
28
28
|
)
|
|
29
|
-
from unstructured_ingest.
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
29
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
30
|
+
from unstructured_ingest.utils.data_prep import get_enhanced_element_id, split_dataframe
|
|
31
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
33
32
|
|
|
34
33
|
if TYPE_CHECKING:
|
|
35
34
|
from pandas import DataFrame
|
|
@@ -8,10 +8,9 @@ from typing import Any, Dict, Mapping, Optional
|
|
|
8
8
|
|
|
9
9
|
from pydantic import Field, Secret
|
|
10
10
|
|
|
11
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
11
12
|
from unstructured_ingest.error import DestinationConnectionError
|
|
12
|
-
from unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
|
-
from unstructured_ingest.v2.interfaces import (
|
|
13
|
+
from unstructured_ingest.interfaces import (
|
|
15
14
|
AccessConfig,
|
|
16
15
|
ConnectionConfig,
|
|
17
16
|
Uploader,
|
|
@@ -19,9 +18,10 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
19
18
|
UploadStager,
|
|
20
19
|
UploadStagerConfig,
|
|
21
20
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
from unstructured_ingest.
|
|
21
|
+
from unstructured_ingest.logger import logger
|
|
22
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
23
|
+
from unstructured_ingest.utils.data_prep import flatten_dict
|
|
24
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
25
|
|
|
26
26
|
BASE_URL = "https://api.vectara.io/v2"
|
|
27
27
|
|