unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- examples/airtable.py +44 -0
- examples/azure_cognitive_search.py +55 -0
- examples/chroma.py +54 -0
- examples/couchbase.py +55 -0
- examples/databricks_volumes_dest.py +55 -0
- examples/databricks_volumes_source.py +53 -0
- examples/delta_table.py +45 -0
- examples/discord_example.py +36 -0
- examples/elasticsearch.py +49 -0
- examples/google_drive.py +45 -0
- examples/kdbai.py +54 -0
- examples/local.py +36 -0
- examples/milvus.py +44 -0
- examples/mongodb.py +53 -0
- examples/opensearch.py +50 -0
- examples/pinecone.py +57 -0
- examples/s3.py +38 -0
- examples/salesforce.py +44 -0
- examples/sharepoint.py +47 -0
- examples/singlestore.py +49 -0
- examples/sql.py +90 -0
- examples/vectara.py +54 -0
- examples/weaviate.py +44 -0
- test/integration/chunkers/test_chunkers.py +1 -1
- test/integration/connectors/conftest.py +1 -1
- test/integration/connectors/databricks/test_volumes_native.py +3 -3
- test/integration/connectors/discord/test_discord.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +2 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
- test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
- test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
- test/integration/connectors/sql/test_postgres.py +2 -2
- test/integration/connectors/sql/test_singlestore.py +2 -2
- test/integration/connectors/sql/test_snowflake.py +2 -2
- test/integration/connectors/sql/test_sqlite.py +2 -2
- test/integration/connectors/sql/test_vastdb.py +1 -1
- test/integration/connectors/test_astradb.py +2 -2
- test/integration/connectors/test_azure_ai_search.py +2 -2
- test/integration/connectors/test_chroma.py +2 -2
- test/integration/connectors/test_confluence.py +1 -1
- test/integration/connectors/test_delta_table.py +2 -2
- test/integration/connectors/test_dropbox.py +2 -2
- test/integration/connectors/test_github.py +1 -1
- test/integration/connectors/test_google_drive.py +2 -2
- test/integration/connectors/test_jira.py +1 -1
- test/integration/connectors/test_lancedb.py +7 -7
- test/integration/connectors/test_milvus.py +2 -2
- test/integration/connectors/test_mongodb.py +2 -2
- test/integration/connectors/test_neo4j.py +7 -7
- test/integration/connectors/test_notion.py +2 -2
- test/integration/connectors/test_onedrive.py +2 -2
- test/integration/connectors/test_pinecone.py +3 -3
- test/integration/connectors/test_qdrant.py +6 -6
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +3 -3
- test/integration/connectors/test_sharepoint.py +1 -1
- test/integration/connectors/test_vectara.py +4 -4
- test/integration/connectors/test_zendesk.py +2 -2
- test/integration/connectors/utils/validation/destination.py +2 -2
- test/integration/connectors/utils/validation/source.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +1 -1
- test/integration/connectors/weaviate/test_local.py +2 -2
- test/integration/embedders/test_azure_openai.py +1 -1
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -1
- test/integration/embedders/test_mixedbread.py +1 -1
- test/integration/embedders/test_octoai.py +2 -2
- test/integration/embedders/test_openai.py +2 -2
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +1 -1
- test/integration/embedders/test_voyageai.py +1 -1
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
- test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
- test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
- test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
- test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
- test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
- test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
- test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
- test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
- test/unit/test_html.py +1 -1
- test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
- test/unit/test_utils.py +106 -97
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/__init__.py +0 -14
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +259 -9
- unstructured_ingest/cli/base/dest.py +58 -61
- unstructured_ingest/cli/base/src.py +54 -36
- unstructured_ingest/cli/cli.py +4 -17
- unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
- unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
- unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
- unstructured_ingest/embed/bedrock.py +3 -3
- unstructured_ingest/embed/octoai.py +3 -3
- unstructured_ingest/embed/openai.py +3 -3
- unstructured_ingest/embed/togetherai.py +4 -4
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +4 -4
- unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
- unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
- unstructured_ingest/{v2/otel.py → otel.py} +1 -1
- unstructured_ingest/pipeline/__init__.py +0 -22
- unstructured_ingest/pipeline/interfaces.py +179 -238
- unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
- unstructured_ingest/pipeline/pipeline.py +388 -97
- unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
- unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
- unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
- unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
- unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
- unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +55 -27
- unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
- unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
- unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
- unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
- unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
- unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
- unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
- unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
- unstructured_ingest/utils/compression.py +1 -48
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/utils/html.py +3 -3
- unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
- unstructured_ingest/utils/string_and_date_utils.py +1 -1
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/METADATA +98 -97
- unstructured_ingest-0.7.1.dist-info/RECORD +370 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/top_level.txt +1 -0
- test/unit/v2/test_utils.py +0 -82
- unstructured_ingest/cli/cmd_factory.py +0 -12
- unstructured_ingest/cli/cmds/__init__.py +0 -145
- unstructured_ingest/cli/cmds/airtable.py +0 -69
- unstructured_ingest/cli/cmds/astradb.py +0 -99
- unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
- unstructured_ingest/cli/cmds/biomed.py +0 -52
- unstructured_ingest/cli/cmds/chroma.py +0 -104
- unstructured_ingest/cli/cmds/clarifai.py +0 -71
- unstructured_ingest/cli/cmds/confluence.py +0 -69
- unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
- unstructured_ingest/cli/cmds/delta_table.py +0 -94
- unstructured_ingest/cli/cmds/discord.py +0 -47
- unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
- unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
- unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
- unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
- unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
- unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
- unstructured_ingest/cli/cmds/github.py +0 -54
- unstructured_ingest/cli/cmds/gitlab.py +0 -54
- unstructured_ingest/cli/cmds/google_drive.py +0 -49
- unstructured_ingest/cli/cmds/hubspot.py +0 -70
- unstructured_ingest/cli/cmds/jira.py +0 -71
- unstructured_ingest/cli/cmds/kafka.py +0 -102
- unstructured_ingest/cli/cmds/local.py +0 -43
- unstructured_ingest/cli/cmds/mongodb.py +0 -72
- unstructured_ingest/cli/cmds/notion.py +0 -48
- unstructured_ingest/cli/cmds/onedrive.py +0 -66
- unstructured_ingest/cli/cmds/opensearch.py +0 -117
- unstructured_ingest/cli/cmds/outlook.py +0 -67
- unstructured_ingest/cli/cmds/pinecone.py +0 -71
- unstructured_ingest/cli/cmds/qdrant.py +0 -124
- unstructured_ingest/cli/cmds/reddit.py +0 -67
- unstructured_ingest/cli/cmds/salesforce.py +0 -58
- unstructured_ingest/cli/cmds/sharepoint.py +0 -66
- unstructured_ingest/cli/cmds/slack.py +0 -56
- unstructured_ingest/cli/cmds/sql.py +0 -66
- unstructured_ingest/cli/cmds/vectara.py +0 -66
- unstructured_ingest/cli/cmds/weaviate.py +0 -98
- unstructured_ingest/cli/cmds/wikipedia.py +0 -40
- unstructured_ingest/cli/common.py +0 -7
- unstructured_ingest/cli/interfaces.py +0 -663
- unstructured_ingest/cli/utils.py +0 -205
- unstructured_ingest/connector/airtable.py +0 -309
- unstructured_ingest/connector/astradb.py +0 -267
- unstructured_ingest/connector/azure_ai_search.py +0 -144
- unstructured_ingest/connector/biomed.py +0 -320
- unstructured_ingest/connector/chroma.py +0 -158
- unstructured_ingest/connector/clarifai.py +0 -122
- unstructured_ingest/connector/confluence.py +0 -285
- unstructured_ingest/connector/databricks_volumes.py +0 -137
- unstructured_ingest/connector/delta_table.py +0 -203
- unstructured_ingest/connector/discord.py +0 -180
- unstructured_ingest/connector/elasticsearch.py +0 -396
- unstructured_ingest/connector/fsspec/azure.py +0 -78
- unstructured_ingest/connector/fsspec/box.py +0 -109
- unstructured_ingest/connector/fsspec/dropbox.py +0 -160
- unstructured_ingest/connector/fsspec/fsspec.py +0 -359
- unstructured_ingest/connector/fsspec/gcs.py +0 -82
- unstructured_ingest/connector/fsspec/s3.py +0 -62
- unstructured_ingest/connector/fsspec/sftp.py +0 -81
- unstructured_ingest/connector/git.py +0 -124
- unstructured_ingest/connector/github.py +0 -174
- unstructured_ingest/connector/gitlab.py +0 -142
- unstructured_ingest/connector/google_drive.py +0 -348
- unstructured_ingest/connector/hubspot.py +0 -278
- unstructured_ingest/connector/jira.py +0 -469
- unstructured_ingest/connector/kafka.py +0 -293
- unstructured_ingest/connector/local.py +0 -139
- unstructured_ingest/connector/mongodb.py +0 -284
- unstructured_ingest/connector/notion/client.py +0 -248
- unstructured_ingest/connector/notion/connector.py +0 -469
- unstructured_ingest/connector/notion/helpers.py +0 -584
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
- unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
- unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
- unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
- unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
- unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
- unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
- unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
- unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
- unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
- unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
- unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
- unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
- unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
- unstructured_ingest/connector/notion/types/date.py +0 -26
- unstructured_ingest/connector/notion/types/file.py +0 -51
- unstructured_ingest/connector/notion/types/user.py +0 -76
- unstructured_ingest/connector/onedrive.py +0 -232
- unstructured_ingest/connector/opensearch.py +0 -218
- unstructured_ingest/connector/outlook.py +0 -285
- unstructured_ingest/connector/pinecone.py +0 -150
- unstructured_ingest/connector/qdrant.py +0 -144
- unstructured_ingest/connector/reddit.py +0 -166
- unstructured_ingest/connector/registry.py +0 -109
- unstructured_ingest/connector/salesforce.py +0 -301
- unstructured_ingest/connector/sharepoint.py +0 -573
- unstructured_ingest/connector/slack.py +0 -224
- unstructured_ingest/connector/sql.py +0 -199
- unstructured_ingest/connector/vectara.py +0 -253
- unstructured_ingest/connector/weaviate.py +0 -190
- unstructured_ingest/connector/wikipedia.py +0 -208
- unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
- unstructured_ingest/enhanced_dataclass/core.py +0 -99
- unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
- unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
- unstructured_ingest/interfaces.py +0 -852
- unstructured_ingest/pipeline/copy.py +0 -19
- unstructured_ingest/pipeline/doc_factory.py +0 -12
- unstructured_ingest/pipeline/partition.py +0 -60
- unstructured_ingest/pipeline/permissions.py +0 -12
- unstructured_ingest/pipeline/reformat/chunking.py +0 -134
- unstructured_ingest/pipeline/reformat/embedding.py +0 -64
- unstructured_ingest/pipeline/source.py +0 -77
- unstructured_ingest/pipeline/utils.py +0 -6
- unstructured_ingest/pipeline/write.py +0 -18
- unstructured_ingest/processor.py +0 -93
- unstructured_ingest/runner/__init__.py +0 -104
- unstructured_ingest/runner/airtable.py +0 -35
- unstructured_ingest/runner/astradb.py +0 -34
- unstructured_ingest/runner/base_runner.py +0 -89
- unstructured_ingest/runner/biomed.py +0 -45
- unstructured_ingest/runner/confluence.py +0 -35
- unstructured_ingest/runner/delta_table.py +0 -34
- unstructured_ingest/runner/discord.py +0 -35
- unstructured_ingest/runner/elasticsearch.py +0 -40
- unstructured_ingest/runner/fsspec/azure.py +0 -30
- unstructured_ingest/runner/fsspec/box.py +0 -28
- unstructured_ingest/runner/fsspec/dropbox.py +0 -30
- unstructured_ingest/runner/fsspec/fsspec.py +0 -40
- unstructured_ingest/runner/fsspec/gcs.py +0 -28
- unstructured_ingest/runner/fsspec/s3.py +0 -28
- unstructured_ingest/runner/fsspec/sftp.py +0 -28
- unstructured_ingest/runner/github.py +0 -37
- unstructured_ingest/runner/gitlab.py +0 -37
- unstructured_ingest/runner/google_drive.py +0 -35
- unstructured_ingest/runner/hubspot.py +0 -35
- unstructured_ingest/runner/jira.py +0 -35
- unstructured_ingest/runner/kafka.py +0 -34
- unstructured_ingest/runner/local.py +0 -23
- unstructured_ingest/runner/mongodb.py +0 -34
- unstructured_ingest/runner/notion.py +0 -61
- unstructured_ingest/runner/onedrive.py +0 -35
- unstructured_ingest/runner/opensearch.py +0 -40
- unstructured_ingest/runner/outlook.py +0 -33
- unstructured_ingest/runner/reddit.py +0 -35
- unstructured_ingest/runner/salesforce.py +0 -33
- unstructured_ingest/runner/sharepoint.py +0 -35
- unstructured_ingest/runner/slack.py +0 -33
- unstructured_ingest/runner/utils.py +0 -47
- unstructured_ingest/runner/wikipedia.py +0 -35
- unstructured_ingest/runner/writers/__init__.py +0 -48
- unstructured_ingest/runner/writers/astradb.py +0 -22
- unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
- unstructured_ingest/runner/writers/base_writer.py +0 -26
- unstructured_ingest/runner/writers/chroma.py +0 -22
- unstructured_ingest/runner/writers/clarifai.py +0 -19
- unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
- unstructured_ingest/runner/writers/delta_table.py +0 -24
- unstructured_ingest/runner/writers/elasticsearch.py +0 -24
- unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
- unstructured_ingest/runner/writers/fsspec/box.py +0 -21
- unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
- unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
- unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
- unstructured_ingest/runner/writers/kafka.py +0 -21
- unstructured_ingest/runner/writers/mongodb.py +0 -21
- unstructured_ingest/runner/writers/opensearch.py +0 -26
- unstructured_ingest/runner/writers/pinecone.py +0 -21
- unstructured_ingest/runner/writers/qdrant.py +0 -19
- unstructured_ingest/runner/writers/sql.py +0 -22
- unstructured_ingest/runner/writers/vectara.py +0 -22
- unstructured_ingest/runner/writers/weaviate.py +0 -21
- unstructured_ingest/utils/google_filetype.py +0 -9
- unstructured_ingest/v2/__init__.py +0 -1
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +0 -4
- unstructured_ingest/v2/cli/base/cmd.py +0 -269
- unstructured_ingest/v2/cli/base/dest.py +0 -85
- unstructured_ingest/v2/cli/base/src.py +0 -85
- unstructured_ingest/v2/cli/cli.py +0 -24
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/logger.py +0 -126
- unstructured_ingest/v2/main.py +0 -11
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +0 -211
- unstructured_ingest/v2/pipeline/pipeline.py +0 -408
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/v2/processes/utils/__init__.py +0 -0
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
- {test/unit/v2 → examples}/__init__.py +0 -0
- /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
- /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/data_generator.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
- /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
- /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
- /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
- /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
- /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
- /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
- /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
- /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
- /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
- /unstructured_ingest/{v2 → utils}/constants.py +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/entry_points.txt +0 -0
|
@@ -5,10 +5,9 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
8
9
|
from unstructured_ingest.error import DestinationConnectionError
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
|
-
from unstructured_ingest.v2.interfaces import (
|
|
10
|
+
from unstructured_ingest.interfaces import (
|
|
12
11
|
AccessConfig,
|
|
13
12
|
ConnectionConfig,
|
|
14
13
|
Uploader,
|
|
@@ -16,12 +15,17 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
16
15
|
UploadStager,
|
|
17
16
|
UploadStagerConfig,
|
|
18
17
|
)
|
|
19
|
-
from unstructured_ingest.
|
|
20
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.logger import logger
|
|
19
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
21
20
|
DestinationRegistryEntry,
|
|
22
21
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
-
|
|
22
|
+
from unstructured_ingest.utils.data_prep import (
|
|
23
|
+
flatten_dict,
|
|
24
|
+
get_data_df,
|
|
25
|
+
get_enhanced_element_id,
|
|
26
|
+
split_dataframe,
|
|
27
|
+
)
|
|
28
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
29
|
|
|
26
30
|
if TYPE_CHECKING:
|
|
27
31
|
from kdbai_client import Database, Session, Table
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from unstructured_ingest.
|
|
3
|
+
from unstructured_ingest.processes.connector_registry import add_destination_entry
|
|
4
4
|
|
|
5
5
|
from .aws import CONNECTOR_TYPE as LANCEDB_S3_CONNECTOR_TYPE
|
|
6
6
|
from .aws import lancedb_aws_destination_entry
|
|
@@ -2,9 +2,9 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
7
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.interfaces.connector import AccessConfig
|
|
6
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
7
|
+
from unstructured_ingest.processes.connectors.lancedb.lancedb import (
|
|
8
8
|
LanceDBRemoteConnectionConfig,
|
|
9
9
|
LanceDBUploader,
|
|
10
10
|
LanceDBUploaderConfig,
|
|
@@ -2,9 +2,9 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
7
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.interfaces.connector import AccessConfig
|
|
6
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
7
|
+
from unstructured_ingest.processes.connectors.lancedb.lancedb import (
|
|
8
8
|
LanceDBRemoteConnectionConfig,
|
|
9
9
|
LanceDBUploader,
|
|
10
10
|
LanceDBUploaderConfig,
|
|
@@ -2,9 +2,9 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
7
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.interfaces.connector import AccessConfig
|
|
6
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
7
|
+
from unstructured_ingest.processes.connectors.lancedb.lancedb import (
|
|
8
8
|
LanceDBRemoteConnectionConfig,
|
|
9
9
|
LanceDBUploader,
|
|
10
10
|
LanceDBUploaderConfig,
|
|
@@ -2,9 +2,9 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
7
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.interfaces.connector import AccessConfig
|
|
6
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
7
|
+
from unstructured_ingest.processes.connectors.lancedb.lancedb import (
|
|
8
8
|
LanceDBRemoteConnectionConfig,
|
|
9
9
|
LanceDBUploader,
|
|
10
10
|
LanceDBUploaderConfig,
|
|
@@ -10,19 +10,19 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Optional
|
|
|
10
10
|
|
|
11
11
|
from pydantic import Field
|
|
12
12
|
|
|
13
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
13
14
|
from unstructured_ingest.error import DestinationConnectionError
|
|
14
|
-
from unstructured_ingest.
|
|
15
|
-
from unstructured_ingest.utils.data_prep import flatten_dict
|
|
16
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
17
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
18
|
-
from unstructured_ingest.v2.interfaces import (
|
|
15
|
+
from unstructured_ingest.interfaces import (
|
|
19
16
|
ConnectionConfig,
|
|
20
17
|
Uploader,
|
|
21
18
|
UploaderConfig,
|
|
22
19
|
UploadStager,
|
|
23
20
|
UploadStagerConfig,
|
|
24
21
|
)
|
|
25
|
-
from unstructured_ingest.
|
|
22
|
+
from unstructured_ingest.logger import logger
|
|
23
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
24
|
+
from unstructured_ingest.utils.data_prep import flatten_dict
|
|
25
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
26
26
|
|
|
27
27
|
CONNECTOR_TYPE = "lancedb"
|
|
28
28
|
|
|
@@ -2,9 +2,9 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, Secret
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.
|
|
6
|
-
from unstructured_ingest.
|
|
7
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.interfaces.connector import AccessConfig
|
|
6
|
+
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
7
|
+
from unstructured_ingest.processes.connectors.lancedb.lancedb import (
|
|
8
8
|
LanceDBConnectionConfig,
|
|
9
9
|
LanceDBUploader,
|
|
10
10
|
LanceDBUploaderConfig,
|
|
@@ -8,7 +8,12 @@ from typing import Any, Generator
|
|
|
8
8
|
|
|
9
9
|
from pydantic import Field, Secret
|
|
10
10
|
|
|
11
|
-
from unstructured_ingest.
|
|
11
|
+
from unstructured_ingest.data_types.file_data import (
|
|
12
|
+
FileData,
|
|
13
|
+
FileDataSourceMetadata,
|
|
14
|
+
SourceIdentifiers,
|
|
15
|
+
)
|
|
16
|
+
from unstructured_ingest.interfaces import (
|
|
12
17
|
AccessConfig,
|
|
13
18
|
ConnectionConfig,
|
|
14
19
|
Downloader,
|
|
@@ -19,20 +24,15 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
19
24
|
Uploader,
|
|
20
25
|
UploaderConfig,
|
|
21
26
|
)
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
27
|
+
from unstructured_ingest.logger import logger
|
|
28
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
24
29
|
DestinationRegistryEntry,
|
|
25
30
|
SourceRegistryEntry,
|
|
26
31
|
)
|
|
27
|
-
from unstructured_ingest.
|
|
32
|
+
from unstructured_ingest.processes.utils.blob_storage import (
|
|
28
33
|
BlobStoreUploadStager,
|
|
29
34
|
BlobStoreUploadStagerConfig,
|
|
30
35
|
)
|
|
31
|
-
from unstructured_ingest.v2.types.file_data import (
|
|
32
|
-
FileData,
|
|
33
|
-
FileDataSourceMetadata,
|
|
34
|
-
SourceIdentifiers,
|
|
35
|
-
)
|
|
36
36
|
|
|
37
37
|
CONNECTOR_TYPE = "local"
|
|
38
38
|
|
|
@@ -6,11 +6,9 @@ from typing import TYPE_CHECKING, Any, Generator, Optional, Union
|
|
|
6
6
|
from dateutil import parser
|
|
7
7
|
from pydantic import Field, Secret
|
|
8
8
|
|
|
9
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
9
10
|
from unstructured_ingest.error import DestinationConnectionError, WriteError
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
12
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
13
|
-
from unstructured_ingest.v2.interfaces import (
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
14
12
|
AccessConfig,
|
|
15
13
|
ConnectionConfig,
|
|
16
14
|
Uploader,
|
|
@@ -18,11 +16,13 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
18
16
|
UploadStager,
|
|
19
17
|
UploadStagerConfig,
|
|
20
18
|
)
|
|
21
|
-
from unstructured_ingest.
|
|
22
|
-
from unstructured_ingest.
|
|
19
|
+
from unstructured_ingest.logger import logger
|
|
20
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
23
21
|
DestinationRegistryEntry,
|
|
24
22
|
)
|
|
25
|
-
from unstructured_ingest.
|
|
23
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
24
|
+
from unstructured_ingest.utils.data_prep import flatten_dict
|
|
25
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
28
28
|
from pymilvus import MilvusClient
|
|
@@ -7,11 +7,15 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
7
7
|
from pydantic import BaseModel, Field, Secret
|
|
8
8
|
|
|
9
9
|
from unstructured_ingest.__version__ import __version__ as unstructured_version
|
|
10
|
+
from unstructured_ingest.data_types.file_data import (
|
|
11
|
+
BatchFileData,
|
|
12
|
+
BatchItem,
|
|
13
|
+
FileData,
|
|
14
|
+
FileDataSourceMetadata,
|
|
15
|
+
SourceIdentifiers,
|
|
16
|
+
)
|
|
10
17
|
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
-
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
14
|
-
from unstructured_ingest.v2.interfaces import (
|
|
18
|
+
from unstructured_ingest.interfaces import (
|
|
15
19
|
AccessConfig,
|
|
16
20
|
ConnectionConfig,
|
|
17
21
|
Downloader,
|
|
@@ -23,18 +27,14 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
23
27
|
UploaderConfig,
|
|
24
28
|
download_responses,
|
|
25
29
|
)
|
|
26
|
-
from unstructured_ingest.
|
|
27
|
-
from unstructured_ingest.
|
|
30
|
+
from unstructured_ingest.logger import logger
|
|
31
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
28
32
|
DestinationRegistryEntry,
|
|
29
33
|
SourceRegistryEntry,
|
|
30
34
|
)
|
|
31
|
-
from unstructured_ingest.
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
FileData,
|
|
35
|
-
FileDataSourceMetadata,
|
|
36
|
-
SourceIdentifiers,
|
|
37
|
-
)
|
|
35
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
36
|
+
from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
|
|
37
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
38
38
|
|
|
39
39
|
if TYPE_CHECKING:
|
|
40
40
|
from pymongo import MongoClient
|
|
@@ -11,11 +11,9 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Literal, Optional
|
|
|
11
11
|
|
|
12
12
|
from pydantic import BaseModel, ConfigDict, Field, Secret, field_validator
|
|
13
13
|
|
|
14
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
14
15
|
from unstructured_ingest.error import DestinationConnectionError
|
|
15
|
-
from unstructured_ingest.
|
|
16
|
-
from unstructured_ingest.utils.data_prep import batch_generator, get_json_data
|
|
17
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
18
|
-
from unstructured_ingest.v2.interfaces import (
|
|
16
|
+
from unstructured_ingest.interfaces import (
|
|
19
17
|
AccessConfig,
|
|
20
18
|
ConnectionConfig,
|
|
21
19
|
Uploader,
|
|
@@ -23,11 +21,13 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
23
21
|
UploadStager,
|
|
24
22
|
UploadStagerConfig,
|
|
25
23
|
)
|
|
26
|
-
from unstructured_ingest.
|
|
24
|
+
from unstructured_ingest.logger import logger
|
|
25
|
+
from unstructured_ingest.processes.connector_registry import (
|
|
27
26
|
DestinationRegistryEntry,
|
|
28
27
|
)
|
|
29
|
-
from unstructured_ingest.
|
|
30
|
-
from unstructured_ingest.
|
|
28
|
+
from unstructured_ingest.processes.connectors.utils import format_and_truncate_orig_elements
|
|
29
|
+
from unstructured_ingest.utils.data_prep import batch_generator, get_json_data
|
|
30
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
31
31
|
|
|
32
32
|
SimilarityFunction = Literal["cosine"]
|
|
33
33
|
|
|
@@ -10,13 +10,13 @@ from notion_client.api_endpoints import Endpoint
|
|
|
10
10
|
from notion_client.api_endpoints import PagesEndpoint as NotionPagesEndpoint
|
|
11
11
|
from notion_client.errors import HTTPResponseError, RequestTimeoutError
|
|
12
12
|
|
|
13
|
-
from unstructured_ingest.ingest_backoff import RetryHandler
|
|
14
|
-
from unstructured_ingest.
|
|
13
|
+
from unstructured_ingest.processes.connectors.notion.ingest_backoff import RetryHandler
|
|
14
|
+
from unstructured_ingest.processes.connectors.notion.ingest_backoff.types import RetryStrategyConfig
|
|
15
|
+
from unstructured_ingest.processes.connectors.notion.types.block import Block
|
|
16
|
+
from unstructured_ingest.processes.connectors.notion.types.database import Database
|
|
17
|
+
from unstructured_ingest.processes.connectors.notion.types.database_properties import map_cells
|
|
18
|
+
from unstructured_ingest.processes.connectors.notion.types.page import Page
|
|
15
19
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
16
|
-
from unstructured_ingest.v2.processes.connectors.notion.types.block import Block
|
|
17
|
-
from unstructured_ingest.v2.processes.connectors.notion.types.database import Database
|
|
18
|
-
from unstructured_ingest.v2.processes.connectors.notion.types.database_properties import map_cells
|
|
19
|
-
from unstructured_ingest.v2.processes.connectors.notion.types.page import Page
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
@requires_dependencies(["httpx"], extras="notion")
|
|
@@ -4,9 +4,13 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic import UUID4, Field, Secret
|
|
6
6
|
|
|
7
|
+
from unstructured_ingest.data_types.file_data import (
|
|
8
|
+
FileData,
|
|
9
|
+
FileDataSourceMetadata,
|
|
10
|
+
SourceIdentifiers,
|
|
11
|
+
)
|
|
7
12
|
from unstructured_ingest.error import SourceConnectionError
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.v2.interfaces import (
|
|
13
|
+
from unstructured_ingest.interfaces import (
|
|
10
14
|
AccessConfig,
|
|
11
15
|
ConnectionConfig,
|
|
12
16
|
Downloader,
|
|
@@ -15,16 +19,12 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
15
19
|
Indexer,
|
|
16
20
|
IndexerConfig,
|
|
17
21
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
20
|
-
from unstructured_ingest.
|
|
21
|
-
FileData,
|
|
22
|
-
FileDataSourceMetadata,
|
|
23
|
-
SourceIdentifiers,
|
|
24
|
-
)
|
|
22
|
+
from unstructured_ingest.logger import logger
|
|
23
|
+
from unstructured_ingest.processes.connector_registry import SourceRegistryEntry
|
|
24
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
25
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
27
|
-
from unstructured_ingest.
|
|
27
|
+
from unstructured_ingest.processes.connectors.notion.client import Client
|
|
28
28
|
|
|
29
29
|
NOTION_API_VERSION = "2022-06-28"
|
|
30
30
|
CONNECTOR_TYPE = "notion"
|
|
@@ -39,7 +39,7 @@ class NotionConnectionConfig(ConnectionConfig):
|
|
|
39
39
|
|
|
40
40
|
@requires_dependencies(["notion_client"], extras="notion")
|
|
41
41
|
def get_client(self) -> "Client":
|
|
42
|
-
from unstructured_ingest.
|
|
42
|
+
from unstructured_ingest.processes.connectors.notion.client import Client
|
|
43
43
|
|
|
44
44
|
return Client(
|
|
45
45
|
notion_version=NOTION_API_VERSION,
|
|
@@ -222,7 +222,7 @@ class NotionIndexer(Indexer):
|
|
|
222
222
|
processed_pages: set[str],
|
|
223
223
|
processed_databases: set[str],
|
|
224
224
|
) -> tuple[set[str], set[str]]:
|
|
225
|
-
from unstructured_ingest.
|
|
225
|
+
from unstructured_ingest.processes.connectors.notion.helpers import (
|
|
226
226
|
get_recursive_content_from_page,
|
|
227
227
|
)
|
|
228
228
|
|
|
@@ -242,7 +242,7 @@ class NotionIndexer(Indexer):
|
|
|
242
242
|
processed_pages: set[str],
|
|
243
243
|
processed_databases: set[str],
|
|
244
244
|
) -> tuple[set[str], set[str]]:
|
|
245
|
-
from unstructured_ingest.
|
|
245
|
+
from unstructured_ingest.processes.connectors.notion.helpers import (
|
|
246
246
|
get_recursive_content_from_database,
|
|
247
247
|
)
|
|
248
248
|
|
|
@@ -290,7 +290,7 @@ class NotionDownloader(Downloader):
|
|
|
290
290
|
raise ValueError("Invalid record_locator in file_data")
|
|
291
291
|
|
|
292
292
|
def download_page(self, client, page_id: str, file_data: FileData) -> DownloadResponse:
|
|
293
|
-
from unstructured_ingest.
|
|
293
|
+
from unstructured_ingest.processes.connectors.notion.helpers import extract_page_html
|
|
294
294
|
|
|
295
295
|
try:
|
|
296
296
|
text_extraction = extract_page_html(
|
|
@@ -315,7 +315,7 @@ class NotionDownloader(Downloader):
|
|
|
315
315
|
return None
|
|
316
316
|
|
|
317
317
|
def download_database(self, client, database_id: str, file_data: FileData) -> DownloadResponse:
|
|
318
|
-
from unstructured_ingest.
|
|
318
|
+
from unstructured_ingest.processes.connectors.notion.helpers import extract_database_html
|
|
319
319
|
|
|
320
320
|
try:
|
|
321
321
|
text_extraction = extract_database_html(
|
|
@@ -22,10 +22,10 @@ from htmlBuilder.tags import (
|
|
|
22
22
|
)
|
|
23
23
|
from notion_client.errors import APIResponseError
|
|
24
24
|
|
|
25
|
-
import unstructured_ingest.
|
|
26
|
-
from unstructured_ingest.
|
|
27
|
-
from unstructured_ingest.
|
|
28
|
-
from unstructured_ingest.
|
|
25
|
+
import unstructured_ingest.processes.connectors.notion.types.blocks as notion_blocks
|
|
26
|
+
from unstructured_ingest.processes.connectors.notion.client import Client
|
|
27
|
+
from unstructured_ingest.processes.connectors.notion.types.block import Block
|
|
28
|
+
from unstructured_ingest.processes.connectors.notion.types.database import Database
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
@dataclass
|
unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py
RENAMED
|
@@ -16,7 +16,11 @@ from backoff._typing import (
|
|
|
16
16
|
_WaitGenerator,
|
|
17
17
|
)
|
|
18
18
|
|
|
19
|
-
from unstructured_ingest.ingest_backoff._common import
|
|
19
|
+
from unstructured_ingest.processes.connectors.notion.ingest_backoff._common import (
|
|
20
|
+
_log_backoff,
|
|
21
|
+
_log_giveup,
|
|
22
|
+
_log_start,
|
|
23
|
+
)
|
|
20
24
|
|
|
21
25
|
|
|
22
26
|
class RetryHandler:
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class RetryStrategyConfig:
|
|
7
|
+
"""
|
|
8
|
+
Contains all info needed for decorator to pull from `self` for backoff
|
|
9
|
+
and retry triggered by exception.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
max_retries: The maximum number of attempts to make before giving
|
|
13
|
+
up. Once exhausted, the exception will be allowed to escape.
|
|
14
|
+
The default value of None means there is no limit to the
|
|
15
|
+
number of tries. If a callable is passed, it will be
|
|
16
|
+
evaluated at runtime and its return value used.
|
|
17
|
+
max_retry_time: The maximum total amount of time to try for before
|
|
18
|
+
giving up. Once expired, the exception will be allowed to
|
|
19
|
+
escape. If a callable is passed, it will be
|
|
20
|
+
evaluated at runtime and its return value used.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
max_retries: Optional[int] = None
|
|
24
|
+
max_retry_time: Optional[float] = None
|
|
@@ -4,14 +4,14 @@ from typing import Optional
|
|
|
4
4
|
|
|
5
5
|
from htmlBuilder.tags import HtmlTag
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
7
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import (
|
|
8
8
|
BlockBase,
|
|
9
9
|
FromJSONMixin,
|
|
10
10
|
GetHTMLMixin,
|
|
11
11
|
)
|
|
12
|
-
from unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.
|
|
14
|
-
from unstructured_ingest.
|
|
12
|
+
from unstructured_ingest.processes.connectors.notion.types import blocks
|
|
13
|
+
from unstructured_ingest.processes.connectors.notion.types.parent import Parent
|
|
14
|
+
from unstructured_ingest.processes.connectors.notion.types.user import PartialUser
|
|
15
15
|
|
|
16
16
|
block_type_mapping = {
|
|
17
17
|
"bookmark": blocks.Bookmark,
|
|
@@ -5,8 +5,8 @@ from typing import List, Optional
|
|
|
5
5
|
from htmlBuilder.attributes import Href
|
|
6
6
|
from htmlBuilder.tags import A, Br, Div, HtmlTag
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.processes.connectors.notion.types.rich_text import RichText
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@dataclass
|
unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py
RENAMED
|
@@ -4,8 +4,8 @@ from typing import List, Optional
|
|
|
4
4
|
|
|
5
5
|
from htmlBuilder.tags import HtmlTag, Li
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
8
|
-
from unstructured_ingest.
|
|
7
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.types.rich_text import RichText
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
@dataclass
|
|
@@ -5,12 +5,12 @@ from typing import List, Optional, Union
|
|
|
5
5
|
from htmlBuilder.attributes import Href, Style
|
|
6
6
|
from htmlBuilder.tags import A, Div, HtmlTag, P
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import (
|
|
9
9
|
BlockBase,
|
|
10
10
|
FromJSONMixin,
|
|
11
11
|
GetHTMLMixin,
|
|
12
12
|
)
|
|
13
|
-
from unstructured_ingest.
|
|
13
|
+
from unstructured_ingest.processes.connectors.notion.types.rich_text import RichText
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@dataclass
|
|
@@ -4,7 +4,7 @@ from typing import Optional
|
|
|
4
4
|
|
|
5
5
|
from htmlBuilder.tags import HtmlTag, P
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
7
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase, GetHTMLMixin
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@dataclass
|
|
@@ -5,8 +5,8 @@ from typing import List, Optional
|
|
|
5
5
|
from htmlBuilder.tags import Br, Div, HtmlTag
|
|
6
6
|
from htmlBuilder.tags import Code as HtmlCode
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.processes.connectors.notion.types.rich_text import RichText
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@dataclass
|
|
@@ -5,7 +5,7 @@ from typing import Optional
|
|
|
5
5
|
from htmlBuilder.attributes import Style
|
|
6
6
|
from htmlBuilder.tags import Hr, HtmlTag
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
@dataclass
|
|
@@ -5,8 +5,8 @@ from typing import List, Optional
|
|
|
5
5
|
from htmlBuilder.attributes import Href
|
|
6
6
|
from htmlBuilder.tags import A, Br, Div, HtmlTag
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.processes.connectors.notion.types.rich_text import RichText
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@dataclass
|
|
@@ -5,10 +5,10 @@ from typing import List, Optional
|
|
|
5
5
|
from htmlBuilder.attributes import Href
|
|
6
6
|
from htmlBuilder.tags import A, Br, Div, HtmlTag
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
10
|
-
from unstructured_ingest.
|
|
11
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.processes.connectors.notion.types.file import External
|
|
10
|
+
from unstructured_ingest.processes.connectors.notion.types.file import File as FileContent
|
|
11
|
+
from unstructured_ingest.processes.connectors.notion.types.rich_text import RichText
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@dataclass
|
|
@@ -5,8 +5,8 @@ from typing import List, Optional
|
|
|
5
5
|
from htmlBuilder.attributes import Style
|
|
6
6
|
from htmlBuilder.tags import Div, HtmlTag
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
9
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.processes.connectors.notion.types.rich_text import RichText
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@dataclass
|
|
@@ -4,8 +4,8 @@ from typing import Optional
|
|
|
4
4
|
from htmlBuilder.attributes import Src
|
|
5
5
|
from htmlBuilder.tags import HtmlTag, Img
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.
|
|
8
|
-
from unstructured_ingest.
|
|
7
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.types.file import FileObject
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class Image(BlockBase, FileObject):
|
unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py
RENAMED
|
@@ -5,7 +5,7 @@ from typing import Optional
|
|
|
5
5
|
from htmlBuilder.attributes import Href
|
|
6
6
|
from htmlBuilder.tags import A, HtmlTag
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.
|
|
8
|
+
from unstructured_ingest.processes.connectors.notion.interfaces import BlockBase
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
@dataclass
|