unstructured-ingest 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/__init__.py +14 -0
- unstructured_ingest/cli/base/__init__.py +0 -0
- unstructured_ingest/cli/base/cmd.py +19 -0
- unstructured_ingest/cli/base/dest.py +87 -0
- unstructured_ingest/cli/base/src.py +57 -0
- unstructured_ingest/cli/cli.py +32 -0
- unstructured_ingest/cli/cmd_factory.py +12 -0
- unstructured_ingest/cli/cmds/__init__.py +145 -0
- unstructured_ingest/cli/cmds/airtable.py +69 -0
- unstructured_ingest/cli/cmds/astra.py +99 -0
- unstructured_ingest/cli/cmds/azure_cognitive_search.py +65 -0
- unstructured_ingest/cli/cmds/biomed.py +52 -0
- unstructured_ingest/cli/cmds/chroma.py +104 -0
- unstructured_ingest/cli/cmds/clarifai.py +71 -0
- unstructured_ingest/cli/cmds/confluence.py +69 -0
- unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
- unstructured_ingest/cli/cmds/delta_table.py +94 -0
- unstructured_ingest/cli/cmds/discord.py +47 -0
- unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
- unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
- unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
- unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
- unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
- unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
- unstructured_ingest/cli/cmds/github.py +54 -0
- unstructured_ingest/cli/cmds/gitlab.py +54 -0
- unstructured_ingest/cli/cmds/google_drive.py +49 -0
- unstructured_ingest/cli/cmds/hubspot.py +70 -0
- unstructured_ingest/cli/cmds/jira.py +71 -0
- unstructured_ingest/cli/cmds/kafka.py +102 -0
- unstructured_ingest/cli/cmds/local.py +43 -0
- unstructured_ingest/cli/cmds/mongodb.py +72 -0
- unstructured_ingest/cli/cmds/notion.py +48 -0
- unstructured_ingest/cli/cmds/onedrive.py +66 -0
- unstructured_ingest/cli/cmds/opensearch.py +117 -0
- unstructured_ingest/cli/cmds/outlook.py +67 -0
- unstructured_ingest/cli/cmds/pinecone.py +71 -0
- unstructured_ingest/cli/cmds/qdrant.py +124 -0
- unstructured_ingest/cli/cmds/reddit.py +67 -0
- unstructured_ingest/cli/cmds/salesforce.py +58 -0
- unstructured_ingest/cli/cmds/sharepoint.py +66 -0
- unstructured_ingest/cli/cmds/slack.py +56 -0
- unstructured_ingest/cli/cmds/sql.py +66 -0
- unstructured_ingest/cli/cmds/vectara.py +66 -0
- unstructured_ingest/cli/cmds/weaviate.py +98 -0
- unstructured_ingest/cli/cmds/wikipedia.py +40 -0
- unstructured_ingest/cli/common.py +7 -0
- unstructured_ingest/cli/interfaces.py +656 -0
- unstructured_ingest/cli/utils.py +205 -0
- unstructured_ingest/connector/__init__.py +0 -0
- unstructured_ingest/connector/airtable.py +309 -0
- unstructured_ingest/connector/astra.py +237 -0
- unstructured_ingest/connector/azure_cognitive_search.py +144 -0
- unstructured_ingest/connector/biomed.py +313 -0
- unstructured_ingest/connector/chroma.py +158 -0
- unstructured_ingest/connector/clarifai.py +122 -0
- unstructured_ingest/connector/confluence.py +285 -0
- unstructured_ingest/connector/databricks_volumes.py +137 -0
- unstructured_ingest/connector/delta_table.py +203 -0
- unstructured_ingest/connector/discord.py +180 -0
- unstructured_ingest/connector/elasticsearch.py +396 -0
- unstructured_ingest/connector/fsspec/__init__.py +0 -0
- unstructured_ingest/connector/fsspec/azure.py +78 -0
- unstructured_ingest/connector/fsspec/box.py +109 -0
- unstructured_ingest/connector/fsspec/dropbox.py +160 -0
- unstructured_ingest/connector/fsspec/fsspec.py +359 -0
- unstructured_ingest/connector/fsspec/gcs.py +82 -0
- unstructured_ingest/connector/fsspec/s3.py +62 -0
- unstructured_ingest/connector/fsspec/sftp.py +81 -0
- unstructured_ingest/connector/git.py +124 -0
- unstructured_ingest/connector/github.py +173 -0
- unstructured_ingest/connector/gitlab.py +142 -0
- unstructured_ingest/connector/google_drive.py +349 -0
- unstructured_ingest/connector/hubspot.py +278 -0
- unstructured_ingest/connector/jira.py +469 -0
- unstructured_ingest/connector/kafka.py +294 -0
- unstructured_ingest/connector/local.py +139 -0
- unstructured_ingest/connector/mongodb.py +285 -0
- unstructured_ingest/connector/notion/__init__.py +0 -0
- unstructured_ingest/connector/notion/client.py +233 -0
- unstructured_ingest/connector/notion/connector.py +468 -0
- unstructured_ingest/connector/notion/helpers.py +584 -0
- unstructured_ingest/connector/notion/interfaces.py +32 -0
- unstructured_ingest/connector/notion/types/__init__.py +0 -0
- unstructured_ingest/connector/notion/types/block.py +95 -0
- unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
- unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
- unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
- unstructured_ingest/connector/notion/types/database.py +72 -0
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
- unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
- unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/connector/notion/types/date.py +26 -0
- unstructured_ingest/connector/notion/types/file.py +51 -0
- unstructured_ingest/connector/notion/types/page.py +44 -0
- unstructured_ingest/connector/notion/types/parent.py +66 -0
- unstructured_ingest/connector/notion/types/rich_text.py +189 -0
- unstructured_ingest/connector/notion/types/user.py +76 -0
- unstructured_ingest/connector/onedrive.py +232 -0
- unstructured_ingest/connector/opensearch.py +218 -0
- unstructured_ingest/connector/outlook.py +285 -0
- unstructured_ingest/connector/pinecone.py +140 -0
- unstructured_ingest/connector/qdrant.py +144 -0
- unstructured_ingest/connector/reddit.py +166 -0
- unstructured_ingest/connector/registry.py +109 -0
- unstructured_ingest/connector/salesforce.py +301 -0
- unstructured_ingest/connector/sharepoint.py +573 -0
- unstructured_ingest/connector/slack.py +224 -0
- unstructured_ingest/connector/sql.py +199 -0
- unstructured_ingest/connector/vectara.py +248 -0
- unstructured_ingest/connector/weaviate.py +190 -0
- unstructured_ingest/connector/wikipedia.py +208 -0
- unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
- unstructured_ingest/enhanced_dataclass/core.py +99 -0
- unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
- unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
- unstructured_ingest/error.py +49 -0
- unstructured_ingest/evaluate.py +338 -0
- unstructured_ingest/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/ingest_backoff/_common.py +102 -0
- unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
- unstructured_ingest/interfaces.py +838 -0
- unstructured_ingest/logger.py +130 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/pipeline/__init__.py +22 -0
- unstructured_ingest/pipeline/copy.py +19 -0
- unstructured_ingest/pipeline/doc_factory.py +12 -0
- unstructured_ingest/pipeline/interfaces.py +265 -0
- unstructured_ingest/pipeline/partition.py +60 -0
- unstructured_ingest/pipeline/permissions.py +12 -0
- unstructured_ingest/pipeline/pipeline.py +117 -0
- unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- unstructured_ingest/pipeline/reformat/chunking.py +130 -0
- unstructured_ingest/pipeline/reformat/embedding.py +66 -0
- unstructured_ingest/pipeline/source.py +77 -0
- unstructured_ingest/pipeline/utils.py +6 -0
- unstructured_ingest/pipeline/write.py +18 -0
- unstructured_ingest/processor.py +93 -0
- unstructured_ingest/runner/__init__.py +104 -0
- unstructured_ingest/runner/airtable.py +35 -0
- unstructured_ingest/runner/astra.py +34 -0
- unstructured_ingest/runner/base_runner.py +89 -0
- unstructured_ingest/runner/biomed.py +45 -0
- unstructured_ingest/runner/confluence.py +35 -0
- unstructured_ingest/runner/delta_table.py +34 -0
- unstructured_ingest/runner/discord.py +35 -0
- unstructured_ingest/runner/elasticsearch.py +40 -0
- unstructured_ingest/runner/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/fsspec/azure.py +30 -0
- unstructured_ingest/runner/fsspec/box.py +28 -0
- unstructured_ingest/runner/fsspec/dropbox.py +30 -0
- unstructured_ingest/runner/fsspec/fsspec.py +40 -0
- unstructured_ingest/runner/fsspec/gcs.py +28 -0
- unstructured_ingest/runner/fsspec/s3.py +28 -0
- unstructured_ingest/runner/fsspec/sftp.py +28 -0
- unstructured_ingest/runner/github.py +37 -0
- unstructured_ingest/runner/gitlab.py +37 -0
- unstructured_ingest/runner/google_drive.py +35 -0
- unstructured_ingest/runner/hubspot.py +35 -0
- unstructured_ingest/runner/jira.py +35 -0
- unstructured_ingest/runner/kafka.py +34 -0
- unstructured_ingest/runner/local.py +23 -0
- unstructured_ingest/runner/mongodb.py +34 -0
- unstructured_ingest/runner/notion.py +61 -0
- unstructured_ingest/runner/onedrive.py +35 -0
- unstructured_ingest/runner/opensearch.py +40 -0
- unstructured_ingest/runner/outlook.py +33 -0
- unstructured_ingest/runner/reddit.py +35 -0
- unstructured_ingest/runner/salesforce.py +33 -0
- unstructured_ingest/runner/sharepoint.py +35 -0
- unstructured_ingest/runner/slack.py +33 -0
- unstructured_ingest/runner/utils.py +47 -0
- unstructured_ingest/runner/wikipedia.py +35 -0
- unstructured_ingest/runner/writers/__init__.py +48 -0
- unstructured_ingest/runner/writers/astra.py +22 -0
- unstructured_ingest/runner/writers/azure_cognitive_search.py +24 -0
- unstructured_ingest/runner/writers/base_writer.py +26 -0
- unstructured_ingest/runner/writers/chroma.py +22 -0
- unstructured_ingest/runner/writers/clarifai.py +19 -0
- unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
- unstructured_ingest/runner/writers/delta_table.py +24 -0
- unstructured_ingest/runner/writers/elasticsearch.py +24 -0
- unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
- unstructured_ingest/runner/writers/fsspec/box.py +21 -0
- unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
- unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
- unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
- unstructured_ingest/runner/writers/kafka.py +21 -0
- unstructured_ingest/runner/writers/mongodb.py +21 -0
- unstructured_ingest/runner/writers/opensearch.py +26 -0
- unstructured_ingest/runner/writers/pinecone.py +21 -0
- unstructured_ingest/runner/writers/qdrant.py +19 -0
- unstructured_ingest/runner/writers/sql.py +22 -0
- unstructured_ingest/runner/writers/vectara.py +22 -0
- unstructured_ingest/runner/writers/weaviate.py +21 -0
- unstructured_ingest/utils/__init__.py +0 -0
- unstructured_ingest/utils/compression.py +117 -0
- unstructured_ingest/utils/data_prep.py +112 -0
- unstructured_ingest/utils/dep_check.py +66 -0
- unstructured_ingest/utils/string_and_date_utils.py +39 -0
- unstructured_ingest/utils/table.py +73 -0
- unstructured_ingest/v2/__init__.py +1 -0
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +4 -0
- unstructured_ingest/v2/cli/base/cmd.py +215 -0
- unstructured_ingest/v2/cli/base/dest.py +76 -0
- unstructured_ingest/v2/cli/base/importer.py +34 -0
- unstructured_ingest/v2/cli/base/src.py +70 -0
- unstructured_ingest/v2/cli/cli.py +24 -0
- unstructured_ingest/v2/cli/cmds/__init__.py +87 -0
- unstructured_ingest/v2/cli/cmds/astra.py +85 -0
- unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +72 -0
- unstructured_ingest/v2/cli/cmds/chroma.py +108 -0
- unstructured_ingest/v2/cli/cmds/databricks_volumes.py +161 -0
- unstructured_ingest/v2/cli/cmds/elasticsearch.py +159 -0
- unstructured_ingest/v2/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/v2/cli/cmds/fsspec/azure.py +84 -0
- unstructured_ingest/v2/cli/cmds/fsspec/box.py +58 -0
- unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +58 -0
- unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +77 -0
- unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +81 -0
- unstructured_ingest/v2/cli/cmds/fsspec/s3.py +84 -0
- unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +80 -0
- unstructured_ingest/v2/cli/cmds/google_drive.py +74 -0
- unstructured_ingest/v2/cli/cmds/local.py +60 -0
- unstructured_ingest/v2/cli/cmds/mongodb.py +62 -0
- unstructured_ingest/v2/cli/cmds/onedrive.py +91 -0
- unstructured_ingest/v2/cli/cmds/opensearch.py +93 -0
- unstructured_ingest/v2/cli/cmds/pinecone.py +62 -0
- unstructured_ingest/v2/cli/cmds/salesforce.py +79 -0
- unstructured_ingest/v2/cli/cmds/sharepoint.py +112 -0
- unstructured_ingest/v2/cli/cmds/singlestore.py +96 -0
- unstructured_ingest/v2/cli/cmds/sql.py +84 -0
- unstructured_ingest/v2/cli/cmds/weaviate.py +100 -0
- unstructured_ingest/v2/cli/configs/__init__.py +6 -0
- unstructured_ingest/v2/cli/configs/chunk.py +89 -0
- unstructured_ingest/v2/cli/configs/embed.py +74 -0
- unstructured_ingest/v2/cli/configs/partition.py +99 -0
- unstructured_ingest/v2/cli/configs/processor.py +88 -0
- unstructured_ingest/v2/cli/interfaces.py +27 -0
- unstructured_ingest/v2/cli/utils.py +240 -0
- unstructured_ingest/v2/example.py +37 -0
- unstructured_ingest/v2/interfaces/__init__.py +29 -0
- unstructured_ingest/v2/interfaces/connector.py +32 -0
- unstructured_ingest/v2/interfaces/downloader.py +79 -0
- unstructured_ingest/v2/interfaces/file_data.py +49 -0
- unstructured_ingest/v2/interfaces/indexer.py +28 -0
- unstructured_ingest/v2/interfaces/process.py +20 -0
- unstructured_ingest/v2/interfaces/processor.py +48 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +48 -0
- unstructured_ingest/v2/interfaces/uploader.py +39 -0
- unstructured_ingest/v2/logger.py +126 -0
- unstructured_ingest/v2/main.py +11 -0
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +167 -0
- unstructured_ingest/v2/pipeline/pipeline.py +284 -0
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/steps/chunk.py +85 -0
- unstructured_ingest/v2/pipeline/steps/download.py +124 -0
- unstructured_ingest/v2/pipeline/steps/embed.py +84 -0
- unstructured_ingest/v2/pipeline/steps/index.py +61 -0
- unstructured_ingest/v2/pipeline/steps/partition.py +78 -0
- unstructured_ingest/v2/pipeline/steps/stage.py +64 -0
- unstructured_ingest/v2/pipeline/steps/uncompress.py +68 -0
- unstructured_ingest/v2/pipeline/steps/upload.py +73 -0
- unstructured_ingest/v2/pipeline/utils.py +15 -0
- unstructured_ingest/v2/processes/__init__.py +0 -0
- unstructured_ingest/v2/processes/chunker.py +97 -0
- unstructured_ingest/v2/processes/connector_registry.py +63 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +77 -0
- unstructured_ingest/v2/processes/connectors/astra.py +152 -0
- unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +211 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +204 -0
- unstructured_ingest/v2/processes/connectors/databricks_volumes.py +96 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch.py +401 -0
- unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +144 -0
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +131 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +130 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +342 -0
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +141 -0
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +164 -0
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +166 -0
- unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +335 -0
- unstructured_ingest/v2/processes/connectors/local.py +204 -0
- unstructured_ingest/v2/processes/connectors/mongodb.py +138 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +216 -0
- unstructured_ingest/v2/processes/connectors/opensearch.py +155 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +178 -0
- unstructured_ingest/v2/processes/connectors/salesforce.py +293 -0
- unstructured_ingest/v2/processes/connectors/sharepoint.py +412 -0
- unstructured_ingest/v2/processes/connectors/singlestore.py +160 -0
- unstructured_ingest/v2/processes/connectors/sql.py +269 -0
- unstructured_ingest/v2/processes/connectors/utils.py +19 -0
- unstructured_ingest/v2/processes/connectors/weaviate.py +235 -0
- unstructured_ingest/v2/processes/embedder.py +76 -0
- unstructured_ingest/v2/processes/partitioner.py +166 -0
- unstructured_ingest/v2/processes/uncompress.py +43 -0
- unstructured_ingest-0.0.0.dist-info/METADATA +319 -0
- unstructured_ingest-0.0.0.dist-info/RECORD +356 -0
- unstructured_ingest-0.0.0.dist-info/WHEEL +5 -0
- unstructured_ingest-0.0.0.dist-info/entry_points.txt +2 -0
- unstructured_ingest-0.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import typing as t
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from unstructured.__version__ import __version__ as unstructured_version
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.enhanced_dataclass import enhanced_field
|
|
9
|
+
from unstructured_ingest.enhanced_dataclass.core import _asdict
|
|
10
|
+
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError, WriteError
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
12
|
+
AccessConfig,
|
|
13
|
+
BaseConnectorConfig,
|
|
14
|
+
BaseDestinationConnector,
|
|
15
|
+
BaseIngestDocBatch,
|
|
16
|
+
BaseSingleIngestDoc,
|
|
17
|
+
BaseSourceConnector,
|
|
18
|
+
IngestDocCleanupMixin,
|
|
19
|
+
SourceConnectorCleanupMixin,
|
|
20
|
+
SourceMetadata,
|
|
21
|
+
)
|
|
22
|
+
from unstructured_ingest.logger import logger
|
|
23
|
+
from unstructured_ingest.utils.data_prep import flatten_dict
|
|
24
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
|
+
|
|
26
|
+
if t.TYPE_CHECKING:
|
|
27
|
+
from pymongo import MongoClient
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
SERVER_API_VERSION = "1"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def parse_userinfo(userinfo: str) -> t.Tuple[str, str]:
|
|
34
|
+
user, _, passwd = userinfo.partition(":")
|
|
35
|
+
return user, passwd
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class MongoDBAccessConfig(AccessConfig):
|
|
40
|
+
uri: t.Optional[str] = enhanced_field(sensitive=True, default=None)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class SimpleMongoDBConfig(BaseConnectorConfig):
|
|
45
|
+
access_config: MongoDBAccessConfig
|
|
46
|
+
host: t.Optional[str] = None
|
|
47
|
+
database: t.Optional[str] = None
|
|
48
|
+
collection: t.Optional[str] = None
|
|
49
|
+
port: int = 27017
|
|
50
|
+
batch_size: int = 100
|
|
51
|
+
|
|
52
|
+
@requires_dependencies(["pymongo"], extras="mongodb")
|
|
53
|
+
def generate_client(self) -> "MongoClient":
|
|
54
|
+
from pymongo import MongoClient
|
|
55
|
+
from pymongo.driver_info import DriverInfo
|
|
56
|
+
from pymongo.server_api import ServerApi
|
|
57
|
+
|
|
58
|
+
if self.access_config.uri:
|
|
59
|
+
return MongoClient(
|
|
60
|
+
self.access_config.uri,
|
|
61
|
+
server_api=ServerApi(version=SERVER_API_VERSION),
|
|
62
|
+
driver=DriverInfo(name="unstructured", version=unstructured_version),
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
return MongoClient(
|
|
66
|
+
host=self.host,
|
|
67
|
+
port=self.port,
|
|
68
|
+
server_api=ServerApi(version=SERVER_API_VERSION),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def get_collection(self, client):
|
|
72
|
+
database = client[self.database]
|
|
73
|
+
return database.get_collection(name=self.collection)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class MongoDBDocumentMeta:
|
|
78
|
+
collection: str
|
|
79
|
+
document_id: str
|
|
80
|
+
date_created: str
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class MongoDBIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
|
|
85
|
+
connector_config: SimpleMongoDBConfig
|
|
86
|
+
document_meta: MongoDBDocumentMeta
|
|
87
|
+
document: dict = field(default_factory=dict)
|
|
88
|
+
registry_name: str = "mongodb"
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def filename(self):
|
|
92
|
+
return (
|
|
93
|
+
Path(self.read_config.download_dir)
|
|
94
|
+
/ self.connector_config.collection
|
|
95
|
+
/ f"{self.document_meta.document_id}.txt"
|
|
96
|
+
).resolve()
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def _output_filename(self):
|
|
100
|
+
return (
|
|
101
|
+
Path(self.processor_config.output_dir)
|
|
102
|
+
/ self.connector_config.collection
|
|
103
|
+
/ f"{self.document_meta.document_id}.json"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def update_source_metadata(self, **kwargs):
|
|
107
|
+
if self.document is None:
|
|
108
|
+
self.source_metadata = SourceMetadata(
|
|
109
|
+
exists=False,
|
|
110
|
+
)
|
|
111
|
+
return
|
|
112
|
+
self.source_metadata = SourceMetadata(
|
|
113
|
+
date_created=self.document_meta.date_created,
|
|
114
|
+
exists=True,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
@SourceConnectionError.wrap
|
|
118
|
+
@requires_dependencies(["pymongo"], extras="mongodb")
|
|
119
|
+
@BaseSingleIngestDoc.skip_if_file_exists
|
|
120
|
+
def get_file(self):
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
|
|
125
|
+
return {
|
|
126
|
+
"host": self.connector_config.host,
|
|
127
|
+
"collection": self.connector_config.collection,
|
|
128
|
+
"document_id": self.document_meta.document_id,
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class MongoDBIngestDocBatch(BaseIngestDocBatch):
|
|
134
|
+
connector_config: SimpleMongoDBConfig
|
|
135
|
+
ingest_docs: t.List[MongoDBIngestDoc] = field(default_factory=list)
|
|
136
|
+
list_of_ids: t.List[str] = field(default_factory=list)
|
|
137
|
+
registry_name: str = "mongodb_batch"
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def unique_id(self) -> str:
|
|
141
|
+
return ",".join(sorted(self.list_of_ids))
|
|
142
|
+
|
|
143
|
+
@requires_dependencies(["pymongo"], extras="mongodb")
|
|
144
|
+
def _get_docs(self) -> t.List[dict]:
|
|
145
|
+
"""Fetches all documents in a collection."""
|
|
146
|
+
from bson.objectid import ObjectId
|
|
147
|
+
|
|
148
|
+
# Note for future. Maybe this could use other client
|
|
149
|
+
client = self.connector_config.generate_client()
|
|
150
|
+
collection = self.connector_config.get_collection(client)
|
|
151
|
+
# MondoDB expects a list of ObjectIds
|
|
152
|
+
list_of_object_ids = []
|
|
153
|
+
for x in self.list_of_ids:
|
|
154
|
+
list_of_object_ids.append(ObjectId(x))
|
|
155
|
+
return list(collection.find({"_id": {"$in": list_of_object_ids}}))
|
|
156
|
+
|
|
157
|
+
def get_files(self):
|
|
158
|
+
documents = self._get_docs()
|
|
159
|
+
for doc in documents:
|
|
160
|
+
ingest_doc = MongoDBIngestDoc(
|
|
161
|
+
processor_config=self.processor_config,
|
|
162
|
+
read_config=self.read_config,
|
|
163
|
+
connector_config=self.connector_config,
|
|
164
|
+
document_meta=MongoDBDocumentMeta(
|
|
165
|
+
collection=self.connector_config.collection,
|
|
166
|
+
document_id=str(doc.get("_id")),
|
|
167
|
+
date_created=doc.get("_id").generation_time.isoformat(),
|
|
168
|
+
),
|
|
169
|
+
document=doc,
|
|
170
|
+
)
|
|
171
|
+
ingest_doc.update_source_metadata()
|
|
172
|
+
del doc["_id"]
|
|
173
|
+
filename = ingest_doc.filename
|
|
174
|
+
flattened_dict = flatten_dict(dictionary=doc)
|
|
175
|
+
str_values = [str(value) for value in flattened_dict.values()]
|
|
176
|
+
concatenated_values = "\n".join(str_values)
|
|
177
|
+
|
|
178
|
+
filename.parent.mkdir(parents=True, exist_ok=True)
|
|
179
|
+
with open(filename, "w", encoding="utf8") as f:
|
|
180
|
+
f.write(concatenated_values)
|
|
181
|
+
|
|
182
|
+
self.ingest_docs.append(ingest_doc)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@dataclass
|
|
186
|
+
class MongoDBSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
|
|
187
|
+
connector_config: SimpleMongoDBConfig
|
|
188
|
+
_client: t.Optional["MongoClient"] = field(init=False, default=None)
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def client(self) -> "MongoClient":
|
|
192
|
+
if self._client is None:
|
|
193
|
+
self._client = self.connector_config.generate_client()
|
|
194
|
+
return self._client
|
|
195
|
+
|
|
196
|
+
def check_connection(self):
|
|
197
|
+
try:
|
|
198
|
+
self.client.admin.command("ping")
|
|
199
|
+
except Exception as e:
|
|
200
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
201
|
+
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
202
|
+
|
|
203
|
+
def initialize(self):
|
|
204
|
+
_ = self.client
|
|
205
|
+
|
|
206
|
+
@requires_dependencies(["pymongo"], extras="mongodb")
|
|
207
|
+
def _get_doc_ids(self) -> t.List[str]:
|
|
208
|
+
"""Fetches all document ids in a collection."""
|
|
209
|
+
collection = self.connector_config.get_collection(self.client)
|
|
210
|
+
return [str(x) for x in collection.distinct("_id")]
|
|
211
|
+
|
|
212
|
+
def get_ingest_docs(self):
|
|
213
|
+
"""Fetches all documents in an index, using ids that are fetched with _get_doc_ids"""
|
|
214
|
+
ids = self._get_doc_ids()
|
|
215
|
+
id_batches = [
|
|
216
|
+
ids[
|
|
217
|
+
i
|
|
218
|
+
* self.connector_config.batch_size : (i + 1) # noqa
|
|
219
|
+
* self.connector_config.batch_size
|
|
220
|
+
]
|
|
221
|
+
for i in range(
|
|
222
|
+
(len(ids) + self.connector_config.batch_size - 1)
|
|
223
|
+
// self.connector_config.batch_size
|
|
224
|
+
)
|
|
225
|
+
]
|
|
226
|
+
|
|
227
|
+
return [
|
|
228
|
+
MongoDBIngestDocBatch(
|
|
229
|
+
connector_config=self.connector_config,
|
|
230
|
+
processor_config=self.processor_config,
|
|
231
|
+
read_config=self.read_config,
|
|
232
|
+
list_of_ids=batched_ids,
|
|
233
|
+
)
|
|
234
|
+
for batched_ids in id_batches
|
|
235
|
+
]
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@dataclass
|
|
239
|
+
class MongoDBDestinationConnector(BaseDestinationConnector):
|
|
240
|
+
connector_config: SimpleMongoDBConfig
|
|
241
|
+
_client: t.Optional["MongoClient"] = field(init=False, default=None)
|
|
242
|
+
|
|
243
|
+
def to_dict(self, **kwargs):
|
|
244
|
+
"""
|
|
245
|
+
The _client variable in this dataclass breaks deepcopy due to:
|
|
246
|
+
TypeError: cannot pickle '_thread.lock' object
|
|
247
|
+
When serializing, remove it, meaning client data will need to be reinitialized
|
|
248
|
+
when deserialized
|
|
249
|
+
"""
|
|
250
|
+
self_cp = copy.copy(self)
|
|
251
|
+
if hasattr(self_cp, "_client"):
|
|
252
|
+
setattr(self_cp, "_client", None)
|
|
253
|
+
return _asdict(self_cp, **kwargs)
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def client(self) -> "MongoClient":
|
|
257
|
+
if self._client is None:
|
|
258
|
+
self._client = self.connector_config.generate_client()
|
|
259
|
+
return self._client
|
|
260
|
+
|
|
261
|
+
@requires_dependencies(["pymongo"], extras="mongodb")
|
|
262
|
+
def check_connection(self):
|
|
263
|
+
try:
|
|
264
|
+
self.client.admin.command("ping")
|
|
265
|
+
except Exception as e:
|
|
266
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
267
|
+
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
268
|
+
|
|
269
|
+
def initialize(self):
|
|
270
|
+
_ = self.client
|
|
271
|
+
|
|
272
|
+
@requires_dependencies(["pymongo"], extras="mongodb")
|
|
273
|
+
def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
|
|
274
|
+
logger.info(
|
|
275
|
+
f"writing {len(elements_dict)} documents to destination "
|
|
276
|
+
f"database {self.connector_config.database}, "
|
|
277
|
+
f"at collection {self.connector_config.collection}",
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
collection = self.connector_config.get_collection(self.client)
|
|
281
|
+
try:
|
|
282
|
+
collection.insert_many(elements_dict)
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.error(f"failed to write records: {e}", exc_info=True)
|
|
285
|
+
raise WriteError(f"failed to write records: {e}")
|
|
File without changes
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
from typing import Any, Generator, List, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
import backoff
|
|
4
|
+
import httpx
|
|
5
|
+
import notion_client.errors
|
|
6
|
+
from notion_client import Client as NotionClient
|
|
7
|
+
from notion_client.api_endpoints import BlocksChildrenEndpoint as NotionBlocksChildrenEndpoint
|
|
8
|
+
from notion_client.api_endpoints import BlocksEndpoint as NotionBlocksEndpoint
|
|
9
|
+
from notion_client.api_endpoints import DatabasesEndpoint as NotionDatabasesEndpoint
|
|
10
|
+
from notion_client.api_endpoints import Endpoint
|
|
11
|
+
from notion_client.api_endpoints import PagesEndpoint as NotionPagesEndpoint
|
|
12
|
+
from notion_client.errors import RequestTimeoutError
|
|
13
|
+
|
|
14
|
+
from unstructured_ingest.connector.notion.types.block import Block
|
|
15
|
+
from unstructured_ingest.connector.notion.types.database import Database
|
|
16
|
+
from unstructured_ingest.connector.notion.types.database_properties import (
|
|
17
|
+
map_cells,
|
|
18
|
+
)
|
|
19
|
+
from unstructured_ingest.connector.notion.types.page import Page
|
|
20
|
+
from unstructured_ingest.ingest_backoff import RetryHandler
|
|
21
|
+
from unstructured_ingest.interfaces import RetryStrategyConfig
|
|
22
|
+
|
|
23
|
+
retryable_exceptions = (
|
|
24
|
+
httpx.TimeoutException,
|
|
25
|
+
httpx.HTTPStatusError,
|
|
26
|
+
notion_client.errors.HTTPResponseError,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_retry_handler(endpoint: Endpoint) -> Optional[RetryHandler]:
|
|
31
|
+
if retry_strategy_config := getattr(endpoint, "retry_strategy_config"):
|
|
32
|
+
return RetryHandler(
|
|
33
|
+
backoff.expo,
|
|
34
|
+
retryable_exceptions,
|
|
35
|
+
max_time=retry_strategy_config.max_retry_time,
|
|
36
|
+
max_tries=retry_strategy_config.max_retries,
|
|
37
|
+
logger=endpoint.parent.logger,
|
|
38
|
+
start_log_level=endpoint.parent.logger.level,
|
|
39
|
+
backoff_log_level=endpoint.parent.logger.level,
|
|
40
|
+
)
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class BlocksChildrenEndpoint(NotionBlocksChildrenEndpoint):
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
*args,
|
|
48
|
+
retry_strategy_config: Optional[RetryStrategyConfig] = None,
|
|
49
|
+
**kwargs,
|
|
50
|
+
):
|
|
51
|
+
super().__init__(*args, **kwargs)
|
|
52
|
+
self.retry_strategy_config = retry_strategy_config
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def retry_handler(self) -> Optional[RetryHandler]:
|
|
56
|
+
return get_retry_handler(self)
|
|
57
|
+
|
|
58
|
+
def list(self, block_id: str, **kwargs: Any) -> Tuple[List[Block], dict]:
|
|
59
|
+
resp: dict = (
|
|
60
|
+
self.retry_handler(super().list, block_id=block_id, **kwargs)
|
|
61
|
+
if self.retry_handler
|
|
62
|
+
else super().list(block_id=block_id, **kwargs)
|
|
63
|
+
) # type: ignore
|
|
64
|
+
child_blocks = [Block.from_dict(data=b) for b in resp.pop("results", [])]
|
|
65
|
+
return child_blocks, resp
|
|
66
|
+
|
|
67
|
+
def iterate_list(
|
|
68
|
+
self,
|
|
69
|
+
block_id: str,
|
|
70
|
+
**kwargs: Any,
|
|
71
|
+
) -> Generator[List[Block], None, None]:
|
|
72
|
+
while True:
|
|
73
|
+
response: dict = (
|
|
74
|
+
self.retry_handler(super().list, block_id=block_id, **kwargs)
|
|
75
|
+
if self.retry_handler
|
|
76
|
+
else super().list(block_id=block_id, **kwargs)
|
|
77
|
+
) # type: ignore
|
|
78
|
+
child_blocks = [Block.from_dict(data=b) for b in response.pop("results", [])]
|
|
79
|
+
yield child_blocks
|
|
80
|
+
|
|
81
|
+
next_cursor = response.get("next_cursor")
|
|
82
|
+
if not response.get("has_more") or not next_cursor:
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class DatabasesEndpoint(NotionDatabasesEndpoint):
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
*args,
|
|
90
|
+
retry_strategy_config: Optional[RetryStrategyConfig] = None,
|
|
91
|
+
**kwargs,
|
|
92
|
+
):
|
|
93
|
+
super().__init__(*args, **kwargs)
|
|
94
|
+
self.retry_strategy_config = retry_strategy_config
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def retry_handler(self) -> Optional[RetryHandler]:
|
|
98
|
+
return get_retry_handler(self)
|
|
99
|
+
|
|
100
|
+
def retrieve(self, database_id: str, **kwargs: Any) -> Database:
|
|
101
|
+
resp: dict = (
|
|
102
|
+
self.retry_handler(super().retrieve, database_id=database_id, **kwargs)
|
|
103
|
+
if (self.retry_handler)
|
|
104
|
+
else (super().retrieve(database_id=database_id, **kwargs))
|
|
105
|
+
) # type: ignore
|
|
106
|
+
return Database.from_dict(data=resp)
|
|
107
|
+
|
|
108
|
+
def retrieve_status(self, database_id: str, **kwargs) -> int:
|
|
109
|
+
request = self.parent._build_request(
|
|
110
|
+
method="HEAD",
|
|
111
|
+
path=f"databases/{database_id}",
|
|
112
|
+
auth=kwargs.get("auth"),
|
|
113
|
+
)
|
|
114
|
+
try:
|
|
115
|
+
response: httpx.Response = (
|
|
116
|
+
self.retry_handler(self.parent.client.send, request)
|
|
117
|
+
if (self.retry_handler)
|
|
118
|
+
else (self.parent.client.send(request))
|
|
119
|
+
) # type: ignore
|
|
120
|
+
return response.status_code
|
|
121
|
+
except httpx.TimeoutException:
|
|
122
|
+
raise RequestTimeoutError()
|
|
123
|
+
|
|
124
|
+
def query(self, database_id: str, **kwargs: Any) -> Tuple[List[Page], dict]:
|
|
125
|
+
"""Get a list of [Pages](https://developers.notion.com/reference/page) contained in the database.
|
|
126
|
+
|
|
127
|
+
*[🔗 Endpoint documentation](https://developers.notion.com/reference/post-database-query)*
|
|
128
|
+
""" # noqa: E501
|
|
129
|
+
resp: dict = (
|
|
130
|
+
self.retry_handler(super().query, database_id=database_id, **kwargs)
|
|
131
|
+
if (self.retry_handler)
|
|
132
|
+
else (super().query(database_id=database_id, **kwargs))
|
|
133
|
+
) # type: ignore
|
|
134
|
+
pages = [Page.from_dict(data=p) for p in resp.pop("results")]
|
|
135
|
+
for p in pages:
|
|
136
|
+
p.properties = map_cells(p.properties)
|
|
137
|
+
return pages, resp
|
|
138
|
+
|
|
139
|
+
def iterate_query(self, database_id: str, **kwargs: Any) -> Generator[List[Page], None, None]:
|
|
140
|
+
while True:
|
|
141
|
+
response: dict = (
|
|
142
|
+
self.retry_handler(super().query, database_id=database_id, **kwargs)
|
|
143
|
+
if (self.retry_handler)
|
|
144
|
+
else (super().query(database_id=database_id, **kwargs))
|
|
145
|
+
) # type: ignore
|
|
146
|
+
pages = [Page.from_dict(data=p) for p in response.pop("results", [])]
|
|
147
|
+
for p in pages:
|
|
148
|
+
p.properties = map_cells(p.properties)
|
|
149
|
+
yield pages
|
|
150
|
+
|
|
151
|
+
next_cursor = response.get("next_cursor")
|
|
152
|
+
if not response.get("has_more") or not next_cursor:
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class BlocksEndpoint(NotionBlocksEndpoint):
|
|
157
|
+
def __init__(
|
|
158
|
+
self,
|
|
159
|
+
*args: Any,
|
|
160
|
+
retry_strategy_config: Optional[RetryStrategyConfig] = None,
|
|
161
|
+
**kwargs: Any,
|
|
162
|
+
) -> None:
|
|
163
|
+
super().__init__(*args, **kwargs)
|
|
164
|
+
self.retry_strategy_config = retry_strategy_config
|
|
165
|
+
self.children = BlocksChildrenEndpoint(
|
|
166
|
+
retry_strategy_config=retry_strategy_config,
|
|
167
|
+
*args,
|
|
168
|
+
**kwargs,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def retry_handler(self) -> Optional[RetryHandler]:
|
|
173
|
+
return get_retry_handler(self)
|
|
174
|
+
|
|
175
|
+
def retrieve(self, block_id: str, **kwargs: Any) -> Block:
|
|
176
|
+
resp: dict = (
|
|
177
|
+
self.retry_handler(super().retrieve, block_id=block_id, **kwargs)
|
|
178
|
+
if (self.retry_handler)
|
|
179
|
+
else (super().retrieve(block_id=block_id, **kwargs))
|
|
180
|
+
) # type: ignore
|
|
181
|
+
return Block.from_dict(data=resp)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class PagesEndpoint(NotionPagesEndpoint):
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
*args,
|
|
188
|
+
retry_strategy_config: Optional[RetryStrategyConfig] = None,
|
|
189
|
+
**kwargs,
|
|
190
|
+
):
|
|
191
|
+
super().__init__(*args, **kwargs)
|
|
192
|
+
self.retry_strategy_config = retry_strategy_config
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def retry_handler(self) -> Optional[RetryHandler]:
|
|
196
|
+
return get_retry_handler(self)
|
|
197
|
+
|
|
198
|
+
def retrieve(self, page_id: str, **kwargs: Any) -> Page:
|
|
199
|
+
resp: dict = (
|
|
200
|
+
self.retry_handler(super().retrieve, page_id=page_id, **kwargs)
|
|
201
|
+
if (self.retry_handler)
|
|
202
|
+
else (super().retrieve(page_id=page_id, **kwargs))
|
|
203
|
+
) # type: ignore
|
|
204
|
+
return Page.from_dict(data=resp)
|
|
205
|
+
|
|
206
|
+
def retrieve_status(self, page_id: str, **kwargs) -> int:
|
|
207
|
+
request = self.parent._build_request(
|
|
208
|
+
method="HEAD",
|
|
209
|
+
path=f"pages/{page_id}",
|
|
210
|
+
auth=kwargs.get("auth"),
|
|
211
|
+
)
|
|
212
|
+
try:
|
|
213
|
+
response: httpx.Response = (
|
|
214
|
+
self.retry_handler(self.parent.client.send, request)
|
|
215
|
+
if (self.retry_handler)
|
|
216
|
+
else (self.parent.client.send(request))
|
|
217
|
+
) # type: ignore
|
|
218
|
+
return response.status_code
|
|
219
|
+
except httpx.TimeoutException:
|
|
220
|
+
raise RequestTimeoutError()
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class Client(NotionClient):
|
|
224
|
+
def __init__(
|
|
225
|
+
self,
|
|
226
|
+
*args: Any,
|
|
227
|
+
retry_strategy_config: Optional[RetryStrategyConfig] = None,
|
|
228
|
+
**kwargs: Any,
|
|
229
|
+
) -> None:
|
|
230
|
+
super().__init__(*args, **kwargs)
|
|
231
|
+
self.blocks = BlocksEndpoint(retry_strategy_config=retry_strategy_config, parent=self)
|
|
232
|
+
self.pages = PagesEndpoint(retry_strategy_config=retry_strategy_config, parent=self)
|
|
233
|
+
self.databases = DatabasesEndpoint(retry_strategy_config=retry_strategy_config, parent=self)
|