unstructured-ingest 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/__init__.py +0 -0
- test/integration/__init__.py +0 -0
- test/integration/chunkers/__init__.py +0 -0
- test/integration/chunkers/test_chunkers.py +31 -0
- test/integration/connectors/__init__.py +0 -0
- test/integration/connectors/conftest.py +38 -0
- test/integration/connectors/databricks/__init__.py +0 -0
- test/integration/connectors/databricks/test_volumes_native.py +269 -0
- test/integration/connectors/discord/__init__.py +0 -0
- test/integration/connectors/discord/test_discord.py +90 -0
- test/integration/connectors/duckdb/__init__.py +0 -0
- test/integration/connectors/duckdb/conftest.py +14 -0
- test/integration/connectors/duckdb/test_duckdb.py +89 -0
- test/integration/connectors/duckdb/test_motherduck.py +95 -0
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +34 -0
- test/integration/connectors/elasticsearch/test_elasticsearch.py +330 -0
- test/integration/connectors/elasticsearch/test_opensearch.py +325 -0
- test/integration/connectors/sql/__init__.py +0 -0
- test/integration/connectors/sql/test_postgres.py +195 -0
- test/integration/connectors/sql/test_singlestore.py +176 -0
- test/integration/connectors/sql/test_snowflake.py +238 -0
- test/integration/connectors/sql/test_sqlite.py +162 -0
- test/integration/connectors/test_astradb.py +217 -0
- test/integration/connectors/test_azure_ai_search.py +255 -0
- test/integration/connectors/test_chroma.py +120 -0
- test/integration/connectors/test_confluence.py +113 -0
- test/integration/connectors/test_delta_table.py +185 -0
- test/integration/connectors/test_lancedb.py +247 -0
- test/integration/connectors/test_milvus.py +203 -0
- test/integration/connectors/test_mongodb.py +335 -0
- test/integration/connectors/test_neo4j.py +236 -0
- test/integration/connectors/test_notion.py +145 -0
- test/integration/connectors/test_onedrive.py +118 -0
- test/integration/connectors/test_pinecone.py +288 -0
- test/integration/connectors/test_qdrant.py +215 -0
- test/integration/connectors/test_redis.py +119 -0
- test/integration/connectors/test_s3.py +183 -0
- test/integration/connectors/test_vectara.py +270 -0
- test/integration/connectors/utils/__init__.py +0 -0
- test/integration/connectors/utils/constants.py +7 -0
- test/integration/connectors/utils/docker.py +151 -0
- test/integration/connectors/utils/docker_compose.py +59 -0
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +75 -0
- test/integration/connectors/utils/validation/equality.py +75 -0
- test/integration/connectors/utils/validation/source.py +299 -0
- test/integration/connectors/utils/validation/utils.py +36 -0
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +15 -0
- test/integration/connectors/weaviate/test_cloud.py +34 -0
- test/integration/connectors/weaviate/test_local.py +131 -0
- test/integration/embedders/__init__.py +0 -0
- test/integration/embedders/conftest.py +13 -0
- test/integration/embedders/test_azure_openai.py +59 -0
- test/integration/embedders/test_bedrock.py +103 -0
- test/integration/embedders/test_huggingface.py +26 -0
- test/integration/embedders/test_mixedbread.py +71 -0
- test/integration/embedders/test_octoai.py +77 -0
- test/integration/embedders/test_openai.py +76 -0
- test/integration/embedders/test_togetherai.py +71 -0
- test/integration/embedders/test_vertexai.py +65 -0
- test/integration/embedders/test_voyageai.py +65 -0
- test/integration/embedders/utils.py +68 -0
- test/integration/partitioners/__init__.py +0 -0
- test/integration/partitioners/test_partitioner.py +75 -0
- test/integration/utils.py +15 -0
- test/unit/__init__.py +0 -0
- test/unit/embed/__init__.py +0 -0
- test/unit/embed/test_mixedbreadai.py +42 -0
- test/unit/embed/test_octoai.py +27 -0
- test/unit/embed/test_openai.py +20 -0
- test/unit/embed/test_vertexai.py +25 -0
- test/unit/embed/test_voyageai.py +24 -0
- test/unit/test_error.py +27 -0
- test/unit/test_logger.py +78 -0
- test/unit/test_utils.py +184 -0
- test/unit/v2/__init__.py +0 -0
- test/unit/v2/chunkers/__init__.py +0 -0
- test/unit/v2/chunkers/test_chunkers.py +49 -0
- test/unit/v2/connectors/__init__.py +0 -0
- test/unit/v2/connectors/test_confluence.py +39 -0
- test/unit/v2/embedders/__init__.py +0 -0
- test/unit/v2/embedders/test_bedrock.py +36 -0
- test/unit/v2/embedders/test_huggingface.py +48 -0
- test/unit/v2/embedders/test_mixedbread.py +37 -0
- test/unit/v2/embedders/test_octoai.py +35 -0
- test/unit/v2/embedders/test_openai.py +35 -0
- test/unit/v2/embedders/test_togetherai.py +37 -0
- test/unit/v2/embedders/test_vertexai.py +37 -0
- test/unit/v2/embedders/test_voyageai.py +38 -0
- test/unit/v2/partitioners/__init__.py +0 -0
- test/unit/v2/partitioners/test_partitioner.py +63 -0
- test/unit/v2/test_interfaces.py +26 -0
- test/unit/v2/test_utils.py +82 -0
- test/unit/v2/utils/__init__.py +0 -0
- test/unit/v2/utils/data_generator.py +32 -0
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/__init__.py +14 -0
- unstructured_ingest/cli/base/__init__.py +0 -0
- unstructured_ingest/cli/base/cmd.py +19 -0
- unstructured_ingest/cli/base/dest.py +87 -0
- unstructured_ingest/cli/base/src.py +57 -0
- unstructured_ingest/cli/cli.py +37 -0
- unstructured_ingest/cli/cmd_factory.py +12 -0
- unstructured_ingest/cli/cmds/__init__.py +145 -0
- unstructured_ingest/cli/cmds/airtable.py +69 -0
- unstructured_ingest/cli/cmds/astradb.py +99 -0
- unstructured_ingest/cli/cmds/azure_ai_search.py +65 -0
- unstructured_ingest/cli/cmds/biomed.py +52 -0
- unstructured_ingest/cli/cmds/chroma.py +104 -0
- unstructured_ingest/cli/cmds/clarifai.py +71 -0
- unstructured_ingest/cli/cmds/confluence.py +69 -0
- unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
- unstructured_ingest/cli/cmds/delta_table.py +94 -0
- unstructured_ingest/cli/cmds/discord.py +47 -0
- unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
- unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
- unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
- unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
- unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
- unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
- unstructured_ingest/cli/cmds/github.py +54 -0
- unstructured_ingest/cli/cmds/gitlab.py +54 -0
- unstructured_ingest/cli/cmds/google_drive.py +49 -0
- unstructured_ingest/cli/cmds/hubspot.py +70 -0
- unstructured_ingest/cli/cmds/jira.py +71 -0
- unstructured_ingest/cli/cmds/kafka.py +102 -0
- unstructured_ingest/cli/cmds/local.py +43 -0
- unstructured_ingest/cli/cmds/mongodb.py +72 -0
- unstructured_ingest/cli/cmds/notion.py +48 -0
- unstructured_ingest/cli/cmds/onedrive.py +66 -0
- unstructured_ingest/cli/cmds/opensearch.py +117 -0
- unstructured_ingest/cli/cmds/outlook.py +67 -0
- unstructured_ingest/cli/cmds/pinecone.py +71 -0
- unstructured_ingest/cli/cmds/qdrant.py +124 -0
- unstructured_ingest/cli/cmds/reddit.py +67 -0
- unstructured_ingest/cli/cmds/salesforce.py +58 -0
- unstructured_ingest/cli/cmds/sharepoint.py +66 -0
- unstructured_ingest/cli/cmds/slack.py +56 -0
- unstructured_ingest/cli/cmds/sql.py +66 -0
- unstructured_ingest/cli/cmds/vectara.py +66 -0
- unstructured_ingest/cli/cmds/weaviate.py +98 -0
- unstructured_ingest/cli/cmds/wikipedia.py +40 -0
- unstructured_ingest/cli/common.py +7 -0
- unstructured_ingest/cli/interfaces.py +663 -0
- unstructured_ingest/cli/utils.py +205 -0
- unstructured_ingest/connector/__init__.py +0 -0
- unstructured_ingest/connector/airtable.py +309 -0
- unstructured_ingest/connector/astradb.py +267 -0
- unstructured_ingest/connector/azure_ai_search.py +144 -0
- unstructured_ingest/connector/biomed.py +320 -0
- unstructured_ingest/connector/chroma.py +158 -0
- unstructured_ingest/connector/clarifai.py +122 -0
- unstructured_ingest/connector/confluence.py +285 -0
- unstructured_ingest/connector/databricks_volumes.py +137 -0
- unstructured_ingest/connector/delta_table.py +203 -0
- unstructured_ingest/connector/discord.py +180 -0
- unstructured_ingest/connector/elasticsearch.py +396 -0
- unstructured_ingest/connector/fsspec/__init__.py +0 -0
- unstructured_ingest/connector/fsspec/azure.py +78 -0
- unstructured_ingest/connector/fsspec/box.py +109 -0
- unstructured_ingest/connector/fsspec/dropbox.py +160 -0
- unstructured_ingest/connector/fsspec/fsspec.py +359 -0
- unstructured_ingest/connector/fsspec/gcs.py +82 -0
- unstructured_ingest/connector/fsspec/s3.py +62 -0
- unstructured_ingest/connector/fsspec/sftp.py +81 -0
- unstructured_ingest/connector/git.py +124 -0
- unstructured_ingest/connector/github.py +174 -0
- unstructured_ingest/connector/gitlab.py +142 -0
- unstructured_ingest/connector/google_drive.py +348 -0
- unstructured_ingest/connector/hubspot.py +278 -0
- unstructured_ingest/connector/jira.py +469 -0
- unstructured_ingest/connector/kafka.py +293 -0
- unstructured_ingest/connector/local.py +139 -0
- unstructured_ingest/connector/mongodb.py +284 -0
- unstructured_ingest/connector/notion/__init__.py +0 -0
- unstructured_ingest/connector/notion/client.py +248 -0
- unstructured_ingest/connector/notion/connector.py +469 -0
- unstructured_ingest/connector/notion/helpers.py +584 -0
- unstructured_ingest/connector/notion/interfaces.py +32 -0
- unstructured_ingest/connector/notion/types/__init__.py +0 -0
- unstructured_ingest/connector/notion/types/block.py +96 -0
- unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
- unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
- unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
- unstructured_ingest/connector/notion/types/database.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
- unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
- unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/connector/notion/types/date.py +26 -0
- unstructured_ingest/connector/notion/types/file.py +51 -0
- unstructured_ingest/connector/notion/types/page.py +45 -0
- unstructured_ingest/connector/notion/types/parent.py +66 -0
- unstructured_ingest/connector/notion/types/rich_text.py +189 -0
- unstructured_ingest/connector/notion/types/user.py +76 -0
- unstructured_ingest/connector/onedrive.py +232 -0
- unstructured_ingest/connector/opensearch.py +218 -0
- unstructured_ingest/connector/outlook.py +285 -0
- unstructured_ingest/connector/pinecone.py +140 -0
- unstructured_ingest/connector/qdrant.py +144 -0
- unstructured_ingest/connector/reddit.py +166 -0
- unstructured_ingest/connector/registry.py +109 -0
- unstructured_ingest/connector/salesforce.py +301 -0
- unstructured_ingest/connector/sharepoint.py +573 -0
- unstructured_ingest/connector/slack.py +224 -0
- unstructured_ingest/connector/sql.py +199 -0
- unstructured_ingest/connector/vectara.py +253 -0
- unstructured_ingest/connector/weaviate.py +190 -0
- unstructured_ingest/connector/wikipedia.py +208 -0
- unstructured_ingest/embed/__init__.py +0 -0
- unstructured_ingest/embed/azure_openai.py +31 -0
- unstructured_ingest/embed/bedrock.py +193 -0
- unstructured_ingest/embed/huggingface.py +52 -0
- unstructured_ingest/embed/interfaces.py +117 -0
- unstructured_ingest/embed/mixedbreadai.py +233 -0
- unstructured_ingest/embed/octoai.py +130 -0
- unstructured_ingest/embed/openai.py +116 -0
- unstructured_ingest/embed/togetherai.py +106 -0
- unstructured_ingest/embed/vertexai.py +126 -0
- unstructured_ingest/embed/voyageai.py +130 -0
- unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
- unstructured_ingest/enhanced_dataclass/core.py +99 -0
- unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
- unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
- unstructured_ingest/error.py +49 -0
- unstructured_ingest/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/ingest_backoff/_common.py +102 -0
- unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
- unstructured_ingest/interfaces.py +852 -0
- unstructured_ingest/logger.py +130 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/pipeline/__init__.py +22 -0
- unstructured_ingest/pipeline/copy.py +19 -0
- unstructured_ingest/pipeline/doc_factory.py +12 -0
- unstructured_ingest/pipeline/interfaces.py +270 -0
- unstructured_ingest/pipeline/partition.py +60 -0
- unstructured_ingest/pipeline/permissions.py +12 -0
- unstructured_ingest/pipeline/pipeline.py +117 -0
- unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- unstructured_ingest/pipeline/reformat/chunking.py +134 -0
- unstructured_ingest/pipeline/reformat/embedding.py +64 -0
- unstructured_ingest/pipeline/source.py +77 -0
- unstructured_ingest/pipeline/utils.py +6 -0
- unstructured_ingest/pipeline/write.py +18 -0
- unstructured_ingest/processor.py +93 -0
- unstructured_ingest/runner/__init__.py +104 -0
- unstructured_ingest/runner/airtable.py +35 -0
- unstructured_ingest/runner/astradb.py +34 -0
- unstructured_ingest/runner/base_runner.py +89 -0
- unstructured_ingest/runner/biomed.py +45 -0
- unstructured_ingest/runner/confluence.py +35 -0
- unstructured_ingest/runner/delta_table.py +34 -0
- unstructured_ingest/runner/discord.py +35 -0
- unstructured_ingest/runner/elasticsearch.py +40 -0
- unstructured_ingest/runner/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/fsspec/azure.py +30 -0
- unstructured_ingest/runner/fsspec/box.py +28 -0
- unstructured_ingest/runner/fsspec/dropbox.py +30 -0
- unstructured_ingest/runner/fsspec/fsspec.py +40 -0
- unstructured_ingest/runner/fsspec/gcs.py +28 -0
- unstructured_ingest/runner/fsspec/s3.py +28 -0
- unstructured_ingest/runner/fsspec/sftp.py +28 -0
- unstructured_ingest/runner/github.py +37 -0
- unstructured_ingest/runner/gitlab.py +37 -0
- unstructured_ingest/runner/google_drive.py +35 -0
- unstructured_ingest/runner/hubspot.py +35 -0
- unstructured_ingest/runner/jira.py +35 -0
- unstructured_ingest/runner/kafka.py +34 -0
- unstructured_ingest/runner/local.py +23 -0
- unstructured_ingest/runner/mongodb.py +34 -0
- unstructured_ingest/runner/notion.py +61 -0
- unstructured_ingest/runner/onedrive.py +35 -0
- unstructured_ingest/runner/opensearch.py +40 -0
- unstructured_ingest/runner/outlook.py +33 -0
- unstructured_ingest/runner/reddit.py +35 -0
- unstructured_ingest/runner/salesforce.py +33 -0
- unstructured_ingest/runner/sharepoint.py +35 -0
- unstructured_ingest/runner/slack.py +33 -0
- unstructured_ingest/runner/utils.py +47 -0
- unstructured_ingest/runner/wikipedia.py +35 -0
- unstructured_ingest/runner/writers/__init__.py +48 -0
- unstructured_ingest/runner/writers/astradb.py +22 -0
- unstructured_ingest/runner/writers/azure_ai_search.py +24 -0
- unstructured_ingest/runner/writers/base_writer.py +26 -0
- unstructured_ingest/runner/writers/chroma.py +22 -0
- unstructured_ingest/runner/writers/clarifai.py +19 -0
- unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
- unstructured_ingest/runner/writers/delta_table.py +24 -0
- unstructured_ingest/runner/writers/elasticsearch.py +24 -0
- unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
- unstructured_ingest/runner/writers/fsspec/box.py +21 -0
- unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
- unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
- unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
- unstructured_ingest/runner/writers/kafka.py +21 -0
- unstructured_ingest/runner/writers/mongodb.py +21 -0
- unstructured_ingest/runner/writers/opensearch.py +26 -0
- unstructured_ingest/runner/writers/pinecone.py +21 -0
- unstructured_ingest/runner/writers/qdrant.py +19 -0
- unstructured_ingest/runner/writers/sql.py +22 -0
- unstructured_ingest/runner/writers/vectara.py +22 -0
- unstructured_ingest/runner/writers/weaviate.py +21 -0
- unstructured_ingest/utils/__init__.py +0 -0
- unstructured_ingest/utils/chunking.py +56 -0
- unstructured_ingest/utils/compression.py +118 -0
- unstructured_ingest/utils/data_prep.py +200 -0
- unstructured_ingest/utils/dep_check.py +78 -0
- unstructured_ingest/utils/google_filetype.py +9 -0
- unstructured_ingest/utils/string_and_date_utils.py +49 -0
- unstructured_ingest/utils/table.py +73 -0
- unstructured_ingest/v2/__init__.py +1 -0
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +4 -0
- unstructured_ingest/v2/cli/base/cmd.py +269 -0
- unstructured_ingest/v2/cli/base/dest.py +85 -0
- unstructured_ingest/v2/cli/base/importer.py +34 -0
- unstructured_ingest/v2/cli/base/src.py +85 -0
- unstructured_ingest/v2/cli/cli.py +24 -0
- unstructured_ingest/v2/cli/cmds.py +14 -0
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/cli/utils/click.py +237 -0
- unstructured_ingest/v2/cli/utils/model_conversion.py +222 -0
- unstructured_ingest/v2/constants.py +2 -0
- unstructured_ingest/v2/errors.py +18 -0
- unstructured_ingest/v2/interfaces/__init__.py +32 -0
- unstructured_ingest/v2/interfaces/connector.py +50 -0
- unstructured_ingest/v2/interfaces/downloader.py +89 -0
- unstructured_ingest/v2/interfaces/file_data.py +116 -0
- unstructured_ingest/v2/interfaces/indexer.py +30 -0
- unstructured_ingest/v2/interfaces/process.py +19 -0
- unstructured_ingest/v2/interfaces/processor.py +88 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +102 -0
- unstructured_ingest/v2/interfaces/uploader.py +53 -0
- unstructured_ingest/v2/logger.py +126 -0
- unstructured_ingest/v2/main.py +11 -0
- unstructured_ingest/v2/otel.py +111 -0
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +211 -0
- unstructured_ingest/v2/pipeline/otel.py +32 -0
- unstructured_ingest/v2/pipeline/pipeline.py +384 -0
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/steps/chunk.py +80 -0
- unstructured_ingest/v2/pipeline/steps/download.py +207 -0
- unstructured_ingest/v2/pipeline/steps/embed.py +79 -0
- unstructured_ingest/v2/pipeline/steps/filter.py +35 -0
- unstructured_ingest/v2/pipeline/steps/index.py +86 -0
- unstructured_ingest/v2/pipeline/steps/partition.py +79 -0
- unstructured_ingest/v2/pipeline/steps/stage.py +65 -0
- unstructured_ingest/v2/pipeline/steps/uncompress.py +50 -0
- unstructured_ingest/v2/pipeline/steps/upload.py +58 -0
- unstructured_ingest/v2/processes/__init__.py +18 -0
- unstructured_ingest/v2/processes/chunker.py +124 -0
- unstructured_ingest/v2/processes/connector_registry.py +69 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +117 -0
- unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
- unstructured_ingest/v2/processes/connectors/astradb.py +402 -0
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +276 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +190 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +207 -0
- unstructured_ingest/v2/processes/connectors/couchbase.py +334 -0
- unstructured_ingest/v2/processes/connectors/databricks/__init__.py +52 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +208 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +87 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +102 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +85 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +86 -0
- unstructured_ingest/v2/processes/connectors/delta_table.py +191 -0
- unstructured_ingest/v2/processes/connectors/discord.py +158 -0
- unstructured_ingest/v2/processes/connectors/duckdb/__init__.py +15 -0
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +100 -0
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +127 -0
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +126 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +470 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +195 -0
- unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +170 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +168 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +332 -0
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +185 -0
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +171 -0
- unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/v2/processes/connectors/gitlab.py +268 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +348 -0
- unstructured_ingest/v2/processes/connectors/kafka/__init__.py +17 -0
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py +121 -0
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +273 -0
- unstructured_ingest/v2/processes/connectors/kafka/local.py +103 -0
- unstructured_ingest/v2/processes/connectors/kdbai.py +148 -0
- unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +30 -0
- unstructured_ingest/v2/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +42 -0
- unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +169 -0
- unstructured_ingest/v2/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/v2/processes/connectors/local.py +217 -0
- unstructured_ingest/v2/processes/connectors/milvus.py +225 -0
- unstructured_ingest/v2/processes/connectors/mongodb.py +361 -0
- unstructured_ingest/v2/processes/connectors/neo4j.py +385 -0
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/client.py +349 -0
- unstructured_ingest/v2/processes/connectors/notion/connector.py +346 -0
- unstructured_ingest/v2/processes/connectors/notion/helpers.py +448 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +32 -0
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +96 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/code.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/template.py +30 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/people.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/select.py +69 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/status.py +81 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/v2/processes/connectors/notion/types/date.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/file.py +54 -0
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +45 -0
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +66 -0
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +189 -0
- unstructured_ingest/v2/processes/connectors/notion/types/user.py +79 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +447 -0
- unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +277 -0
- unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +16 -0
- unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +59 -0
- unstructured_ingest/v2/processes/connectors/qdrant/local.py +58 -0
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +160 -0
- unstructured_ingest/v2/processes/connectors/qdrant/server.py +60 -0
- unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
- unstructured_ingest/v2/processes/connectors/salesforce.py +303 -0
- unstructured_ingest/v2/processes/connectors/sharepoint.py +448 -0
- unstructured_ingest/v2/processes/connectors/slack.py +248 -0
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +27 -0
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +162 -0
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +166 -0
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +210 -0
- unstructured_ingest/v2/processes/connectors/sql/sql.py +434 -0
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +168 -0
- unstructured_ingest/v2/processes/connectors/utils.py +29 -0
- unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
- unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +165 -0
- unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/v2/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +267 -0
- unstructured_ingest/v2/processes/embedder.py +195 -0
- unstructured_ingest/v2/processes/filter.py +60 -0
- unstructured_ingest/v2/processes/partitioner.py +188 -0
- unstructured_ingest/v2/processes/uncompress.py +61 -0
- unstructured_ingest/v2/unstructured_api.py +128 -0
- unstructured_ingest/v2/utils.py +61 -0
- unstructured_ingest-0.3.13.dist-info/LICENSE.md +201 -0
- unstructured_ingest-0.3.13.dist-info/METADATA +205 -0
- unstructured_ingest-0.3.13.dist-info/RECORD +557 -0
- unstructured_ingest-0.3.13.dist-info/WHEEL +5 -0
- unstructured_ingest-0.3.13.dist-info/entry_points.txt +2 -0
- unstructured_ingest-0.3.13.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#bulleted-list-item
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.tags import HtmlTag, Li
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
8
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class BulletedListItem(BlockBase):
|
|
13
|
+
color: str
|
|
14
|
+
children: List[dict] = field(default_factory=list)
|
|
15
|
+
rich_text: List[RichText] = field(default_factory=list)
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def can_have_children() -> bool:
|
|
19
|
+
return True
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def from_dict(cls, data: dict):
|
|
23
|
+
rich_text = data.pop("rich_text", [])
|
|
24
|
+
return cls(
|
|
25
|
+
color=data["color"],
|
|
26
|
+
children=data.get("children", []),
|
|
27
|
+
rich_text=[RichText.from_dict(rt) for rt in rich_text],
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
31
|
+
return Li([], [rt.get_html() for rt in self.rich_text])
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#callout
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.attributes import Href, Style
|
|
6
|
+
from htmlBuilder.tags import A, Div, HtmlTag, P
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.connector.notion.interfaces import (
|
|
9
|
+
BlockBase,
|
|
10
|
+
FromJSONMixin,
|
|
11
|
+
GetHTMLMixin,
|
|
12
|
+
)
|
|
13
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class EmojiIcon(FromJSONMixin, GetHTMLMixin):
|
|
18
|
+
emoji: str
|
|
19
|
+
type: str = "emoji"
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def from_dict(cls, data: dict):
|
|
23
|
+
return cls(**data)
|
|
24
|
+
|
|
25
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
26
|
+
return P([], self.emoji)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class ExternalIconContent(FromJSONMixin):
|
|
31
|
+
url: str
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def from_dict(cls, data: dict):
|
|
35
|
+
return cls(**data)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ExternalIcon(FromJSONMixin, GetHTMLMixin):
|
|
40
|
+
external: ExternalIconContent
|
|
41
|
+
type: str = "external"
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def from_dict(cls, data: dict):
|
|
45
|
+
return cls(external=ExternalIconContent.from_dict(data=data.pop("external")), **data)
|
|
46
|
+
|
|
47
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
48
|
+
if self.external:
|
|
49
|
+
return A([Href(self.external.url)], [self.external.url])
|
|
50
|
+
else:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Icon(FromJSONMixin):
|
|
55
|
+
@classmethod
|
|
56
|
+
def from_dict(cls, data: dict) -> Union[EmojiIcon, ExternalIcon]:
|
|
57
|
+
t = data.get("type")
|
|
58
|
+
if t == "emoji":
|
|
59
|
+
return EmojiIcon.from_dict(data)
|
|
60
|
+
elif t == "external":
|
|
61
|
+
return ExternalIcon.from_dict(data)
|
|
62
|
+
else:
|
|
63
|
+
raise ValueError(f"Unexpected icon type: {t} ({data})")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class Callout(BlockBase):
|
|
68
|
+
color: str
|
|
69
|
+
icon: Optional[Union[EmojiIcon, ExternalIcon]] = None
|
|
70
|
+
rich_text: List[RichText] = field(default_factory=list)
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def can_have_children() -> bool:
|
|
74
|
+
return True
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def from_dict(cls, data: dict):
|
|
78
|
+
rich_text = data.pop("rich_text", [])
|
|
79
|
+
return cls(
|
|
80
|
+
color=data["color"],
|
|
81
|
+
icon=Icon.from_dict(data.pop("icon")),
|
|
82
|
+
rich_text=[RichText.from_dict(rt) for rt in rich_text],
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
86
|
+
elements = []
|
|
87
|
+
if self.icon and self.icon.get_html():
|
|
88
|
+
elements.append(self.icon.get_html())
|
|
89
|
+
if self.rich_text:
|
|
90
|
+
elements.extend([rt.get_html() for rt in self.rich_text])
|
|
91
|
+
attributes = []
|
|
92
|
+
if self.color:
|
|
93
|
+
attributes.append(Style(f"color:{self.color}"))
|
|
94
|
+
return Div(attributes, elements)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#child-database
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.tags import HtmlTag, P
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ChildDatabase(BlockBase):
|
|
12
|
+
title: str
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def can_have_children() -> bool:
|
|
16
|
+
return True
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def from_dict(cls, data: dict):
|
|
20
|
+
return cls(**data)
|
|
21
|
+
|
|
22
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
23
|
+
return P([], self.title)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#child-page
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.tags import HtmlTag, P
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase, GetHTMLMixin
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ChildPage(BlockBase, GetHTMLMixin):
|
|
12
|
+
title: str
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def can_have_children() -> bool:
|
|
16
|
+
return True
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def from_dict(cls, data: dict):
|
|
20
|
+
return cls(**data)
|
|
21
|
+
|
|
22
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
23
|
+
return P([], self.title)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#code
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.tags import Br, Div, HtmlTag
|
|
6
|
+
from htmlBuilder.tags import Code as HtmlCode
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Code(BlockBase):
|
|
14
|
+
language: str
|
|
15
|
+
rich_text: List[RichText] = field(default_factory=list)
|
|
16
|
+
caption: List[RichText] = field(default_factory=list)
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def can_have_children() -> bool:
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_dict(cls, data: dict):
|
|
24
|
+
rich_text = data.pop("rich_text", [])
|
|
25
|
+
caption = data.pop("caption", [])
|
|
26
|
+
return cls(
|
|
27
|
+
language=data["language"],
|
|
28
|
+
rich_text=[RichText.from_dict(rt) for rt in rich_text],
|
|
29
|
+
caption=[RichText.from_dict(c) for c in caption],
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
33
|
+
texts = []
|
|
34
|
+
if self.rich_text:
|
|
35
|
+
texts.append(HtmlCode([], [rt.get_html() for rt in self.rich_text]))
|
|
36
|
+
if self.caption:
|
|
37
|
+
texts.append(Div([], [rt.get_html() for rt in self.caption]))
|
|
38
|
+
if not texts:
|
|
39
|
+
return None
|
|
40
|
+
joined = [Br()] * (len(texts) * 2 - 1)
|
|
41
|
+
joined[0::2] = texts
|
|
42
|
+
|
|
43
|
+
return Div([], joined)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#column-list-and-column
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.tags import HtmlTag
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ColumnList(BlockBase):
|
|
12
|
+
@staticmethod
|
|
13
|
+
def can_have_children() -> bool:
|
|
14
|
+
return True
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def from_dict(cls, data: dict):
|
|
18
|
+
return cls()
|
|
19
|
+
|
|
20
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class Column(BlockBase):
|
|
26
|
+
@staticmethod
|
|
27
|
+
def can_have_children() -> bool:
|
|
28
|
+
return True
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def from_dict(cls, data: dict):
|
|
32
|
+
return cls()
|
|
33
|
+
|
|
34
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
35
|
+
return None
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#divider
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.attributes import Style
|
|
6
|
+
from htmlBuilder.tags import Hr, HtmlTag
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Divider(BlockBase):
|
|
13
|
+
@staticmethod
|
|
14
|
+
def can_have_children() -> bool:
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def from_dict(cls, data: dict):
|
|
19
|
+
return cls()
|
|
20
|
+
|
|
21
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
22
|
+
return Hr([Style("border-top: 3px solid #bbb")])
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#embed
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.attributes import Href
|
|
6
|
+
from htmlBuilder.tags import A, Br, Div, HtmlTag
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Embed(BlockBase):
|
|
14
|
+
url: str
|
|
15
|
+
caption: List[RichText] = field(default_factory=list)
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def can_have_children() -> bool:
|
|
19
|
+
return False
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def from_dict(cls, data: dict):
|
|
23
|
+
return cls(caption=[RichText.from_dict(d) for d in data.pop("caption", [])], **data)
|
|
24
|
+
|
|
25
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
26
|
+
texts = []
|
|
27
|
+
if self.url:
|
|
28
|
+
texts.append(A([Href(self.url)], self.url))
|
|
29
|
+
if self.caption:
|
|
30
|
+
texts.append(Div([], [rt.get_html() for rt in self.caption]))
|
|
31
|
+
if not texts:
|
|
32
|
+
return None
|
|
33
|
+
joined = [Br()] * (len(texts) * 2 - 1)
|
|
34
|
+
joined[0::2] = texts
|
|
35
|
+
|
|
36
|
+
return Div([], joined)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#equation
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.tags import Div, HtmlTag
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class Equation(BlockBase):
|
|
12
|
+
expression: str
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def can_have_children() -> bool:
|
|
16
|
+
return False
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def from_dict(cls, data: dict):
|
|
20
|
+
return cls(**data)
|
|
21
|
+
|
|
22
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
23
|
+
return Div([], self.expression)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#file
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.attributes import Href
|
|
6
|
+
from htmlBuilder.tags import A, Br, Div, HtmlTag
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.connector.notion.types.file import External
|
|
10
|
+
from unstructured_ingest.connector.notion.types.file import File as FileContent
|
|
11
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class File(BlockBase):
|
|
16
|
+
type: str
|
|
17
|
+
external: Optional[External] = None
|
|
18
|
+
file: Optional[FileContent] = None
|
|
19
|
+
caption: List[RichText] = field(default_factory=list)
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def can_have_children() -> bool:
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def from_dict(cls, data: dict):
|
|
27
|
+
caption = [RichText.from_dict(rt) for rt in data.pop("caption", [])]
|
|
28
|
+
t = data["type"]
|
|
29
|
+
file = cls(type=t, caption=caption)
|
|
30
|
+
if t == "external":
|
|
31
|
+
file.external = External.from_dict(data["external"])
|
|
32
|
+
elif t == "file":
|
|
33
|
+
file.file = FileContent.from_dict(data["file"])
|
|
34
|
+
return file
|
|
35
|
+
|
|
36
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
37
|
+
texts = []
|
|
38
|
+
if self.file:
|
|
39
|
+
texts.append(A([Href(self.file.url)], self.file.url))
|
|
40
|
+
if self.external:
|
|
41
|
+
texts.append(A([Href(self.external.url)], self.external.url))
|
|
42
|
+
if self.caption:
|
|
43
|
+
texts.append(Div([], [rt.get_html() for rt in self.caption]))
|
|
44
|
+
if not texts:
|
|
45
|
+
return None
|
|
46
|
+
joined = [Br()] * (len(texts) * 2 - 1)
|
|
47
|
+
joined[0::2] = texts
|
|
48
|
+
|
|
49
|
+
return Div([], joined)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#headings
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.attributes import Style
|
|
6
|
+
from htmlBuilder.tags import Div, HtmlTag
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Heading(BlockBase):
|
|
14
|
+
color: str
|
|
15
|
+
is_toggleable: bool
|
|
16
|
+
rich_text: List[RichText] = field(default_factory=list)
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def can_have_children() -> bool:
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_dict(cls, data: dict):
|
|
24
|
+
rich_text = data.pop("rich_text", [])
|
|
25
|
+
heading = cls(**data)
|
|
26
|
+
heading.rich_text = [RichText.from_dict(rt) for rt in rich_text]
|
|
27
|
+
return heading
|
|
28
|
+
|
|
29
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
30
|
+
if not self.rich_text:
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
texts = [rt.get_html() for rt in self.rich_text]
|
|
34
|
+
attributes = []
|
|
35
|
+
if self.color and self.color != "default":
|
|
36
|
+
attributes.append(Style(f"color: {self.color}"))
|
|
37
|
+
return Div(attributes, texts)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#image
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from htmlBuilder.attributes import Src
|
|
5
|
+
from htmlBuilder.tags import HtmlTag, Img
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
8
|
+
from unstructured_ingest.connector.notion.types.file import FileObject
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Image(BlockBase, FileObject):
|
|
12
|
+
@staticmethod
|
|
13
|
+
def can_have_children() -> bool:
|
|
14
|
+
return False
|
|
15
|
+
|
|
16
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
17
|
+
if self.external:
|
|
18
|
+
return Img([Src(self.external.url)], [])
|
|
19
|
+
if self.file:
|
|
20
|
+
return Img([Src(self.file.url)], [])
|
|
21
|
+
return None
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#link-preview
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.attributes import Href
|
|
6
|
+
from htmlBuilder.tags import A, HtmlTag
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class LinkPreview(BlockBase):
|
|
13
|
+
url: str
|
|
14
|
+
|
|
15
|
+
@staticmethod
|
|
16
|
+
def can_have_children() -> bool:
|
|
17
|
+
return False
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def from_dict(cls, data: dict):
|
|
21
|
+
return cls(**data)
|
|
22
|
+
|
|
23
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
24
|
+
return A([Href(self.url)], self.url)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#link-to-page
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.tags import Div, HtmlTag
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class LinkToPage(BlockBase):
|
|
12
|
+
type: str
|
|
13
|
+
page_id: Optional[str] = None
|
|
14
|
+
database_id: Optional[str] = None
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def can_have_children() -> bool:
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def from_dict(cls, data: dict):
|
|
22
|
+
return cls(**data)
|
|
23
|
+
|
|
24
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
25
|
+
if page_id := self.page_id:
|
|
26
|
+
return Div([], page_id)
|
|
27
|
+
if database_id := self.database_id:
|
|
28
|
+
return Div([], database_id)
|
|
29
|
+
return None
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#numbered-list-item
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.tags import HtmlTag, Li
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
8
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class NumberedListItem(BlockBase):
|
|
13
|
+
color: str
|
|
14
|
+
children: List[dict] = field(default_factory=list)
|
|
15
|
+
rich_text: List[RichText] = field(default_factory=list)
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def can_have_children() -> bool:
|
|
19
|
+
return True
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def from_dict(cls, data: dict):
|
|
23
|
+
rich_text = data.pop("rich_text", [])
|
|
24
|
+
numbered_list = cls(**data)
|
|
25
|
+
numbered_list.rich_text = [RichText.from_dict(rt) for rt in rich_text]
|
|
26
|
+
return numbered_list
|
|
27
|
+
|
|
28
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
29
|
+
return Li([], [rt.get_html() for rt in self.rich_text])
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#paragraph
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.tags import Br, Div, HtmlTag
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
8
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Paragraph(BlockBase):
|
|
13
|
+
color: str
|
|
14
|
+
children: List[dict] = field(default_factory=list)
|
|
15
|
+
rich_text: List[RichText] = field(default_factory=list)
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def can_have_children() -> bool:
|
|
19
|
+
return True
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def from_dict(cls, data: dict):
|
|
23
|
+
rich_text = data.pop("rich_text", [])
|
|
24
|
+
paragraph = cls(**data)
|
|
25
|
+
paragraph.rich_text = [RichText.from_dict(rt) for rt in rich_text]
|
|
26
|
+
return paragraph
|
|
27
|
+
|
|
28
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
29
|
+
if not self.rich_text:
|
|
30
|
+
return Br()
|
|
31
|
+
return Div([], [rt.get_html() for rt in self.rich_text])
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#pdf
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.attributes import Href
|
|
6
|
+
from htmlBuilder.tags import A, Br, Div, HtmlTag
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.connector.notion.types.file import External, File
|
|
10
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class PDF(BlockBase):
|
|
15
|
+
type: str
|
|
16
|
+
caption: List[RichText] = field(default_factory=list)
|
|
17
|
+
external: Optional[External] = None
|
|
18
|
+
file: Optional[File] = None
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def can_have_children() -> bool:
|
|
22
|
+
return False
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def from_dict(cls, data: dict):
|
|
26
|
+
caption = data.pop("caption", [])
|
|
27
|
+
t = data["type"]
|
|
28
|
+
paragraph = cls(type=t)
|
|
29
|
+
paragraph.caption = [RichText.from_dict(c) for c in caption]
|
|
30
|
+
if t == "external":
|
|
31
|
+
paragraph.external = External.from_dict(data["external"])
|
|
32
|
+
elif t == "file":
|
|
33
|
+
paragraph.file = File.from_dict(data["file"])
|
|
34
|
+
return paragraph
|
|
35
|
+
|
|
36
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
37
|
+
texts = []
|
|
38
|
+
if self.external:
|
|
39
|
+
texts.append(A([Href(self.external.url)], self.external.url))
|
|
40
|
+
if self.file:
|
|
41
|
+
texts.append(A([Href(self.file.url)], self.file.url))
|
|
42
|
+
if self.caption:
|
|
43
|
+
texts.append(Div([], [rt.get_html() for rt in self.caption]))
|
|
44
|
+
if not texts:
|
|
45
|
+
return None
|
|
46
|
+
joined = [Br()] * (len(texts) * 2 - 1)
|
|
47
|
+
joined[0::2] = texts
|
|
48
|
+
|
|
49
|
+
return Div([], joined)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# https://developers.notion.com/reference/block#quote
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from htmlBuilder.attributes import Style
|
|
6
|
+
from htmlBuilder.tags import Div, HtmlTag
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.connector.notion.interfaces import BlockBase
|
|
9
|
+
from unstructured_ingest.connector.notion.types.rich_text import RichText
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Quote(BlockBase):
|
|
14
|
+
color: str
|
|
15
|
+
children: List[dict] = field(default_factory=list)
|
|
16
|
+
rich_text: List[RichText] = field(default_factory=list)
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def can_have_children() -> bool:
|
|
20
|
+
return True
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_dict(cls, data: dict):
|
|
24
|
+
rich_text = data.pop("rich_text", [])
|
|
25
|
+
quote = cls(**data)
|
|
26
|
+
quote.rich_text = [RichText.from_dict(rt) for rt in rich_text]
|
|
27
|
+
return quote
|
|
28
|
+
|
|
29
|
+
def get_html(self) -> Optional[HtmlTag]:
|
|
30
|
+
if not self.rich_text:
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
texts = [rt.get_html() for rt in self.rich_text]
|
|
34
|
+
attributes = []
|
|
35
|
+
if self.color and self.color != "default":
|
|
36
|
+
attributes.append(Style(f"color: {self.color}"))
|
|
37
|
+
return Div(attributes, texts)
|