unstructured-ingest 1.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/README.md +28 -0
- unstructured_ingest/cli/__init__.py +0 -0
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +269 -0
- unstructured_ingest/cli/base/dest.py +84 -0
- unstructured_ingest/cli/base/importer.py +34 -0
- unstructured_ingest/cli/base/src.py +75 -0
- unstructured_ingest/cli/cli.py +24 -0
- unstructured_ingest/cli/cmds.py +14 -0
- unstructured_ingest/cli/utils/__init__.py +0 -0
- unstructured_ingest/cli/utils/click.py +237 -0
- unstructured_ingest/cli/utils/model_conversion.py +222 -0
- unstructured_ingest/data_types/__init__.py +0 -0
- unstructured_ingest/data_types/entities.py +17 -0
- unstructured_ingest/data_types/file_data.py +116 -0
- unstructured_ingest/embed/__init__.py +0 -0
- unstructured_ingest/embed/azure_openai.py +63 -0
- unstructured_ingest/embed/bedrock.py +323 -0
- unstructured_ingest/embed/huggingface.py +69 -0
- unstructured_ingest/embed/interfaces.py +146 -0
- unstructured_ingest/embed/mixedbreadai.py +134 -0
- unstructured_ingest/embed/octoai.py +133 -0
- unstructured_ingest/embed/openai.py +142 -0
- unstructured_ingest/embed/togetherai.py +116 -0
- unstructured_ingest/embed/vertexai.py +109 -0
- unstructured_ingest/embed/voyageai.py +130 -0
- unstructured_ingest/error.py +156 -0
- unstructured_ingest/errors_v2.py +156 -0
- unstructured_ingest/interfaces/__init__.py +27 -0
- unstructured_ingest/interfaces/connector.py +56 -0
- unstructured_ingest/interfaces/downloader.py +90 -0
- unstructured_ingest/interfaces/indexer.py +29 -0
- unstructured_ingest/interfaces/process.py +22 -0
- unstructured_ingest/interfaces/processor.py +88 -0
- unstructured_ingest/interfaces/upload_stager.py +89 -0
- unstructured_ingest/interfaces/uploader.py +67 -0
- unstructured_ingest/logger.py +39 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/otel.py +128 -0
- unstructured_ingest/pipeline/__init__.py +0 -0
- unstructured_ingest/pipeline/interfaces.py +211 -0
- unstructured_ingest/pipeline/otel.py +32 -0
- unstructured_ingest/pipeline/pipeline.py +408 -0
- unstructured_ingest/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/pipeline/steps/chunk.py +78 -0
- unstructured_ingest/pipeline/steps/download.py +206 -0
- unstructured_ingest/pipeline/steps/embed.py +77 -0
- unstructured_ingest/pipeline/steps/filter.py +35 -0
- unstructured_ingest/pipeline/steps/index.py +86 -0
- unstructured_ingest/pipeline/steps/partition.py +77 -0
- unstructured_ingest/pipeline/steps/stage.py +65 -0
- unstructured_ingest/pipeline/steps/uncompress.py +50 -0
- unstructured_ingest/pipeline/steps/upload.py +58 -0
- unstructured_ingest/processes/__init__.py +18 -0
- unstructured_ingest/processes/chunker.py +131 -0
- unstructured_ingest/processes/connector_registry.py +69 -0
- unstructured_ingest/processes/connectors/__init__.py +129 -0
- unstructured_ingest/processes/connectors/airtable.py +238 -0
- unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +9 -0
- unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +23 -0
- unstructured_ingest/processes/connectors/astradb.py +592 -0
- unstructured_ingest/processes/connectors/azure_ai_search.py +275 -0
- unstructured_ingest/processes/connectors/chroma.py +193 -0
- unstructured_ingest/processes/connectors/confluence.py +527 -0
- unstructured_ingest/processes/connectors/couchbase.py +336 -0
- unstructured_ingest/processes/connectors/databricks/__init__.py +58 -0
- unstructured_ingest/processes/connectors/databricks/volumes.py +233 -0
- unstructured_ingest/processes/connectors/databricks/volumes_aws.py +93 -0
- unstructured_ingest/processes/connectors/databricks/volumes_azure.py +108 -0
- unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +91 -0
- unstructured_ingest/processes/connectors/databricks/volumes_native.py +92 -0
- unstructured_ingest/processes/connectors/databricks/volumes_table.py +187 -0
- unstructured_ingest/processes/connectors/delta_table.py +310 -0
- unstructured_ingest/processes/connectors/discord.py +161 -0
- unstructured_ingest/processes/connectors/duckdb/__init__.py +15 -0
- unstructured_ingest/processes/connectors/duckdb/base.py +103 -0
- unstructured_ingest/processes/connectors/duckdb/duckdb.py +130 -0
- unstructured_ingest/processes/connectors/duckdb/motherduck.py +130 -0
- unstructured_ingest/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +478 -0
- unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +523 -0
- unstructured_ingest/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/processes/connectors/fsspec/azure.py +203 -0
- unstructured_ingest/processes/connectors/fsspec/box.py +176 -0
- unstructured_ingest/processes/connectors/fsspec/dropbox.py +238 -0
- unstructured_ingest/processes/connectors/fsspec/fsspec.py +475 -0
- unstructured_ingest/processes/connectors/fsspec/gcs.py +203 -0
- unstructured_ingest/processes/connectors/fsspec/s3.py +253 -0
- unstructured_ingest/processes/connectors/fsspec/sftp.py +177 -0
- unstructured_ingest/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/processes/connectors/github.py +226 -0
- unstructured_ingest/processes/connectors/gitlab.py +270 -0
- unstructured_ingest/processes/connectors/google_drive.py +848 -0
- unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +10 -0
- unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +367 -0
- unstructured_ingest/processes/connectors/jira.py +522 -0
- unstructured_ingest/processes/connectors/kafka/__init__.py +17 -0
- unstructured_ingest/processes/connectors/kafka/cloud.py +121 -0
- unstructured_ingest/processes/connectors/kafka/kafka.py +275 -0
- unstructured_ingest/processes/connectors/kafka/local.py +103 -0
- unstructured_ingest/processes/connectors/kdbai.py +156 -0
- unstructured_ingest/processes/connectors/lancedb/__init__.py +30 -0
- unstructured_ingest/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/processes/connectors/lancedb/cloud.py +42 -0
- unstructured_ingest/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/processes/connectors/lancedb/lancedb.py +181 -0
- unstructured_ingest/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/processes/connectors/local.py +227 -0
- unstructured_ingest/processes/connectors/milvus.py +311 -0
- unstructured_ingest/processes/connectors/mongodb.py +389 -0
- unstructured_ingest/processes/connectors/neo4j.py +534 -0
- unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/processes/connectors/notion/client.py +349 -0
- unstructured_ingest/processes/connectors/notion/connector.py +350 -0
- unstructured_ingest/processes/connectors/notion/helpers.py +448 -0
- unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +102 -0
- unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +126 -0
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/processes/connectors/notion/interfaces.py +32 -0
- unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/processes/connectors/notion/types/block.py +96 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +131 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/code.py +43 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/file.py +49 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/image.py +21 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +109 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/table.py +60 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/template.py +30 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/processes/connectors/notion/types/blocks/video.py +22 -0
- unstructured_ingest/processes/connectors/notion/types/database.py +73 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +125 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +39 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +36 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +35 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +42 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +38 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +50 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +35 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +74 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +50 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +42 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +37 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +68 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +44 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +57 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +70 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +82 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +38 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +51 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +38 -0
- unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +79 -0
- unstructured_ingest/processes/connectors/notion/types/date.py +29 -0
- unstructured_ingest/processes/connectors/notion/types/file.py +54 -0
- unstructured_ingest/processes/connectors/notion/types/page.py +52 -0
- unstructured_ingest/processes/connectors/notion/types/parent.py +66 -0
- unstructured_ingest/processes/connectors/notion/types/rich_text.py +189 -0
- unstructured_ingest/processes/connectors/notion/types/user.py +83 -0
- unstructured_ingest/processes/connectors/onedrive.py +485 -0
- unstructured_ingest/processes/connectors/outlook.py +242 -0
- unstructured_ingest/processes/connectors/pinecone.py +400 -0
- unstructured_ingest/processes/connectors/qdrant/__init__.py +16 -0
- unstructured_ingest/processes/connectors/qdrant/cloud.py +59 -0
- unstructured_ingest/processes/connectors/qdrant/local.py +58 -0
- unstructured_ingest/processes/connectors/qdrant/qdrant.py +163 -0
- unstructured_ingest/processes/connectors/qdrant/server.py +60 -0
- unstructured_ingest/processes/connectors/redisdb.py +214 -0
- unstructured_ingest/processes/connectors/salesforce.py +307 -0
- unstructured_ingest/processes/connectors/sharepoint.py +282 -0
- unstructured_ingest/processes/connectors/slack.py +249 -0
- unstructured_ingest/processes/connectors/sql/__init__.py +41 -0
- unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +228 -0
- unstructured_ingest/processes/connectors/sql/postgres.py +168 -0
- unstructured_ingest/processes/connectors/sql/singlestore.py +176 -0
- unstructured_ingest/processes/connectors/sql/snowflake.py +298 -0
- unstructured_ingest/processes/connectors/sql/sql.py +456 -0
- unstructured_ingest/processes/connectors/sql/sqlite.py +179 -0
- unstructured_ingest/processes/connectors/sql/teradata.py +254 -0
- unstructured_ingest/processes/connectors/sql/vastdb.py +263 -0
- unstructured_ingest/processes/connectors/utils.py +60 -0
- unstructured_ingest/processes/connectors/vectara.py +348 -0
- unstructured_ingest/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/processes/connectors/weaviate/cloud.py +166 -0
- unstructured_ingest/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/processes/connectors/weaviate/weaviate.py +337 -0
- unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/processes/connectors/zendesk/client.py +314 -0
- unstructured_ingest/processes/connectors/zendesk/zendesk.py +241 -0
- unstructured_ingest/processes/embedder.py +203 -0
- unstructured_ingest/processes/filter.py +60 -0
- unstructured_ingest/processes/partitioner.py +233 -0
- unstructured_ingest/processes/uncompress.py +61 -0
- unstructured_ingest/processes/utils/__init__.py +8 -0
- unstructured_ingest/processes/utils/blob_storage.py +32 -0
- unstructured_ingest/processes/utils/logging/connector.py +365 -0
- unstructured_ingest/processes/utils/logging/sanitizer.py +117 -0
- unstructured_ingest/unstructured_api.py +140 -0
- unstructured_ingest/utils/__init__.py +5 -0
- unstructured_ingest/utils/chunking.py +56 -0
- unstructured_ingest/utils/compression.py +72 -0
- unstructured_ingest/utils/constants.py +2 -0
- unstructured_ingest/utils/data_prep.py +216 -0
- unstructured_ingest/utils/dep_check.py +78 -0
- unstructured_ingest/utils/filesystem.py +27 -0
- unstructured_ingest/utils/html.py +174 -0
- unstructured_ingest/utils/ndjson.py +52 -0
- unstructured_ingest/utils/pydantic_models.py +52 -0
- unstructured_ingest/utils/string_and_date_utils.py +74 -0
- unstructured_ingest/utils/table.py +80 -0
- unstructured_ingest/utils/tls.py +15 -0
- unstructured_ingest-1.2.32.dist-info/METADATA +235 -0
- unstructured_ingest-1.2.32.dist-info/RECORD +243 -0
- unstructured_ingest-1.2.32.dist-info/WHEEL +4 -0
- unstructured_ingest-1.2.32.dist-info/entry_points.txt +2 -0
- unstructured_ingest-1.2.32.dist-info/licenses/LICENSE.md +201 -0
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import logging
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import List, Optional, Tuple
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
from uuid import UUID
|
|
7
|
+
|
|
8
|
+
from htmlBuilder.attributes import Style
|
|
9
|
+
from htmlBuilder.tags import (
|
|
10
|
+
Body,
|
|
11
|
+
Div,
|
|
12
|
+
Head,
|
|
13
|
+
Html,
|
|
14
|
+
HtmlTag,
|
|
15
|
+
Ol,
|
|
16
|
+
Table,
|
|
17
|
+
Td,
|
|
18
|
+
Th,
|
|
19
|
+
Title,
|
|
20
|
+
Tr,
|
|
21
|
+
Ul,
|
|
22
|
+
)
|
|
23
|
+
from notion_client.errors import APIResponseError
|
|
24
|
+
|
|
25
|
+
import unstructured_ingest.processes.connectors.notion.types.blocks as notion_blocks
|
|
26
|
+
from unstructured_ingest.processes.connectors.notion.client import Client
|
|
27
|
+
from unstructured_ingest.processes.connectors.notion.types.block import Block
|
|
28
|
+
from unstructured_ingest.processes.connectors.notion.types.database import Database
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class HtmlExtractionResponse:
|
|
33
|
+
html: Optional[HtmlTag] = None
|
|
34
|
+
child_pages: List[str] = field(default_factory=list)
|
|
35
|
+
child_databases: List[str] = field(default_factory=list)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def process_block(
|
|
39
|
+
current_block: dict,
|
|
40
|
+
parent_page_id: str,
|
|
41
|
+
client: Client,
|
|
42
|
+
child_pages: list,
|
|
43
|
+
child_databases: list,
|
|
44
|
+
) -> Tuple[dict, list, list, dict]:
|
|
45
|
+
if isinstance(current_block["block"].block, notion_blocks.ChildPage) and current_block[
|
|
46
|
+
"block"
|
|
47
|
+
].id != str(parent_page_id):
|
|
48
|
+
child_pages.append(current_block["block"].id)
|
|
49
|
+
return {}, child_pages, child_databases
|
|
50
|
+
if isinstance(current_block["block"].block, notion_blocks.ChildDatabase):
|
|
51
|
+
child_databases.append(current_block["block"].id)
|
|
52
|
+
return {}, child_pages, child_databases
|
|
53
|
+
|
|
54
|
+
# recursively go through all blocks in a page, store each block in a dictionary
|
|
55
|
+
if current_block["block"].has_children:
|
|
56
|
+
children = []
|
|
57
|
+
for children_block in client.blocks.children.iterate_list(
|
|
58
|
+
block_id=current_block["block"].id
|
|
59
|
+
):
|
|
60
|
+
children.extend(children_block)
|
|
61
|
+
if children:
|
|
62
|
+
for child in children:
|
|
63
|
+
child_block = {
|
|
64
|
+
"block": child,
|
|
65
|
+
"level": current_block["level"] + 1,
|
|
66
|
+
"children": [],
|
|
67
|
+
"parent_id": current_block["block"].id,
|
|
68
|
+
}
|
|
69
|
+
child_element, child_pages, child_databases = process_block(
|
|
70
|
+
child_block, parent_page_id, client, child_pages, child_databases
|
|
71
|
+
)
|
|
72
|
+
current_block["children"].append(child_element)
|
|
73
|
+
return current_block, child_pages, child_databases
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def flush_list(type: str, item_list: list, html: list) -> Tuple[list, list]:
|
|
77
|
+
margin_left = 10 * (item_list[-1][1] - 1)
|
|
78
|
+
style = Style(f"margin-left: {margin_left}px")
|
|
79
|
+
if type == "bulleted_list":
|
|
80
|
+
html.append(Ul([style], [item[2] for item in item_list]))
|
|
81
|
+
else:
|
|
82
|
+
html.append(Ol([style], [item[2] for item in item_list]))
|
|
83
|
+
return [], html
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def build_html(
|
|
87
|
+
current_block: dict, bulleted_list: list, numbered_list: list
|
|
88
|
+
) -> Tuple[list, list, list]:
|
|
89
|
+
html = []
|
|
90
|
+
# extract current block's html
|
|
91
|
+
if isinstance(current_block["block"].block, notion_blocks.BulletedListItem):
|
|
92
|
+
if bulleted_list and current_block["parent_id"] != bulleted_list[-1][0]:
|
|
93
|
+
bulleted_list, html = flush_list("bulleted_list", bulleted_list, html)
|
|
94
|
+
bulleted_list.append(
|
|
95
|
+
(current_block["parent_id"], current_block["level"], current_block["block"].get_html())
|
|
96
|
+
)
|
|
97
|
+
if bulleted_list and current_block["peers_rank"] == current_block["peers_count"] - 1:
|
|
98
|
+
bulleted_list, html = flush_list("bulleted_list", bulleted_list, html)
|
|
99
|
+
elif isinstance(current_block["block"].block, notion_blocks.NumberedListItem):
|
|
100
|
+
if numbered_list and current_block["parent_id"] != numbered_list[-1][0]:
|
|
101
|
+
numbered_list, html = flush_list("numbered_list", numbered_list, html)
|
|
102
|
+
numbered_list.append(
|
|
103
|
+
(current_block["parent_id"], current_block["level"], current_block["block"].get_html())
|
|
104
|
+
)
|
|
105
|
+
if numbered_list and current_block["peers_rank"] == current_block["peers_count"] - 1:
|
|
106
|
+
numbered_list, html = flush_list("numbered_list", numbered_list, html)
|
|
107
|
+
else:
|
|
108
|
+
if bulleted_list:
|
|
109
|
+
bulleted_list, html = flush_list("bulleted_list", bulleted_list, html)
|
|
110
|
+
if numbered_list:
|
|
111
|
+
numbered_list, html = flush_list("numbered_list", numbered_list, html)
|
|
112
|
+
if (
|
|
113
|
+
isinstance(current_block["block"].block, notion_blocks.TableRow)
|
|
114
|
+
and current_block["peers_rank"] == 0
|
|
115
|
+
):
|
|
116
|
+
current_block["block"].is_header = True
|
|
117
|
+
if current_block["block"].get_html():
|
|
118
|
+
html.append(current_block["block"].get_html())
|
|
119
|
+
else:
|
|
120
|
+
html.append([])
|
|
121
|
+
# process current block's children
|
|
122
|
+
if current_block["children"]:
|
|
123
|
+
children_html = []
|
|
124
|
+
for index, child in enumerate(current_block["children"]):
|
|
125
|
+
if child:
|
|
126
|
+
child["peers_rank"] = index
|
|
127
|
+
child["peers_count"] = len(current_block["children"])
|
|
128
|
+
child_html, bulleted_list, numbered_list = build_html(
|
|
129
|
+
child, bulleted_list, numbered_list
|
|
130
|
+
)
|
|
131
|
+
if child_html:
|
|
132
|
+
children_html.append(child_html)
|
|
133
|
+
if isinstance(current_block["block"].block, notion_blocks.Column):
|
|
134
|
+
html.append(
|
|
135
|
+
Div(
|
|
136
|
+
[Style(f"width:{100 / current_block['peers_count']}%; float: left")],
|
|
137
|
+
children_html,
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
elif isinstance(current_block["block"].block, notion_blocks.Table):
|
|
141
|
+
html.append(Table([], children_html))
|
|
142
|
+
else:
|
|
143
|
+
html.append(Div([], children_html))
|
|
144
|
+
|
|
145
|
+
return html, bulleted_list, numbered_list
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def extract_page_html(
|
|
149
|
+
client: Client,
|
|
150
|
+
page_id: str,
|
|
151
|
+
logger: logging.Logger,
|
|
152
|
+
) -> HtmlExtractionResponse:
|
|
153
|
+
parent_page_id = UUID(page_id)
|
|
154
|
+
parent_block: Block = client.blocks.retrieve(block_id=page_id) # type: ignore
|
|
155
|
+
head = None
|
|
156
|
+
if isinstance(parent_block.block, notion_blocks.ChildPage):
|
|
157
|
+
head = Head([], Title([], parent_block.block.title))
|
|
158
|
+
current_block = {
|
|
159
|
+
"block": parent_block,
|
|
160
|
+
"level": 0,
|
|
161
|
+
"children": [],
|
|
162
|
+
"parent_id": None,
|
|
163
|
+
"peers_rank": 0,
|
|
164
|
+
"peers_count": 1,
|
|
165
|
+
}
|
|
166
|
+
logger.debug(f"processing page id: {page_id}")
|
|
167
|
+
current_block, child_pages, child_databases = process_block(
|
|
168
|
+
current_block, parent_page_id, client, [], []
|
|
169
|
+
)
|
|
170
|
+
html, _, _ = build_html(current_block, [], [])
|
|
171
|
+
body = Body([], html)
|
|
172
|
+
all_elements = [body]
|
|
173
|
+
if head:
|
|
174
|
+
all_elements = [head] + all_elements
|
|
175
|
+
full_html = Html([], all_elements)
|
|
176
|
+
return HtmlExtractionResponse(
|
|
177
|
+
full_html,
|
|
178
|
+
child_pages=child_pages,
|
|
179
|
+
child_databases=child_databases,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def extract_database_html(
|
|
184
|
+
client: Client,
|
|
185
|
+
database_id: str,
|
|
186
|
+
logger: logging.Logger,
|
|
187
|
+
) -> HtmlExtractionResponse:
|
|
188
|
+
logger.debug(f"processing database id: {database_id}")
|
|
189
|
+
database: Database = client.databases.retrieve(database_id=database_id) # type: ignore
|
|
190
|
+
property_keys = list(database.properties.keys())
|
|
191
|
+
property_keys = sorted(property_keys)
|
|
192
|
+
table_html_rows = []
|
|
193
|
+
child_pages: List[str] = []
|
|
194
|
+
child_databases: List[str] = []
|
|
195
|
+
# Create header row
|
|
196
|
+
table_html_rows.append(Tr([], [Th([], k) for k in property_keys]))
|
|
197
|
+
|
|
198
|
+
all_pages = []
|
|
199
|
+
for page_chunk in client.databases.iterate_query(database_id=database_id): # type: ignore
|
|
200
|
+
all_pages.extend(page_chunk)
|
|
201
|
+
|
|
202
|
+
logger.debug(f"creating {len(all_pages)} rows")
|
|
203
|
+
for page in all_pages:
|
|
204
|
+
if is_database_url(client=client, url=page.url):
|
|
205
|
+
child_databases.append(page.id)
|
|
206
|
+
if is_page_url(client=client, url=page.url):
|
|
207
|
+
child_pages.append(page.id)
|
|
208
|
+
properties = page.properties
|
|
209
|
+
inner_html = [properties.get(k).get_html() for k in property_keys] # type: ignore
|
|
210
|
+
table_html_rows.append(
|
|
211
|
+
Tr(
|
|
212
|
+
[],
|
|
213
|
+
[Td([], cell) for cell in [html if html else Div([], []) for html in inner_html]],
|
|
214
|
+
),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
table_html = Table([], table_html_rows)
|
|
218
|
+
|
|
219
|
+
return HtmlExtractionResponse(
|
|
220
|
+
html=table_html,
|
|
221
|
+
child_pages=child_pages,
|
|
222
|
+
child_databases=child_databases,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
@dataclass
|
|
227
|
+
class ChildExtractionResponse:
|
|
228
|
+
child_pages: List[str] = field(default_factory=list)
|
|
229
|
+
child_databases: List[str] = field(default_factory=list)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class QueueEntryType(enum.Enum):
|
|
233
|
+
DATABASE = "database"
|
|
234
|
+
PAGE = "page"
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@dataclass
|
|
238
|
+
class QueueEntry:
|
|
239
|
+
type: QueueEntryType
|
|
240
|
+
id: UUID
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def get_recursive_content_from_page(
|
|
244
|
+
client: Client,
|
|
245
|
+
page_id: str,
|
|
246
|
+
logger: logging.Logger,
|
|
247
|
+
) -> ChildExtractionResponse:
|
|
248
|
+
return get_recursive_content(
|
|
249
|
+
client=client,
|
|
250
|
+
init_entry=QueueEntry(type=QueueEntryType.PAGE, id=UUID(page_id)),
|
|
251
|
+
logger=logger,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def get_recursive_content_from_database(
|
|
256
|
+
client: Client,
|
|
257
|
+
database_id: str,
|
|
258
|
+
logger: logging.Logger,
|
|
259
|
+
) -> ChildExtractionResponse:
|
|
260
|
+
return get_recursive_content(
|
|
261
|
+
client=client,
|
|
262
|
+
init_entry=QueueEntry(type=QueueEntryType.DATABASE, id=UUID(database_id)),
|
|
263
|
+
logger=logger,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def get_recursive_content(
|
|
268
|
+
client: Client,
|
|
269
|
+
init_entry: QueueEntry,
|
|
270
|
+
logger: logging.Logger,
|
|
271
|
+
) -> ChildExtractionResponse:
|
|
272
|
+
parents: List[QueueEntry] = [init_entry]
|
|
273
|
+
child_pages: List[str] = []
|
|
274
|
+
child_dbs: List[str] = []
|
|
275
|
+
processed: List[str] = []
|
|
276
|
+
while len(parents) > 0:
|
|
277
|
+
parent: QueueEntry = parents.pop()
|
|
278
|
+
processed.append(str(parent.id))
|
|
279
|
+
if parent.type == QueueEntryType.PAGE:
|
|
280
|
+
logger.debug(f"getting child data from page: {parent.id}")
|
|
281
|
+
page_children = []
|
|
282
|
+
try:
|
|
283
|
+
for children_block in client.blocks.children.iterate_list( # type: ignore
|
|
284
|
+
block_id=str(parent.id),
|
|
285
|
+
):
|
|
286
|
+
page_children.extend(children_block)
|
|
287
|
+
except APIResponseError as api_error:
|
|
288
|
+
logger.error(f"failed to get page with id {parent.id}: {api_error}")
|
|
289
|
+
if str(parent.id) in child_pages:
|
|
290
|
+
child_pages.remove(str(parent.id))
|
|
291
|
+
continue
|
|
292
|
+
if not page_children:
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
# Extract child pages
|
|
296
|
+
child_pages_from_page = [
|
|
297
|
+
c for c in page_children if isinstance(c.block, notion_blocks.ChildPage)
|
|
298
|
+
]
|
|
299
|
+
if child_pages_from_page:
|
|
300
|
+
child_page_blocks: List[notion_blocks.ChildPage] = [
|
|
301
|
+
p.block
|
|
302
|
+
for p in child_pages_from_page
|
|
303
|
+
if isinstance(p.block, notion_blocks.ChildPage)
|
|
304
|
+
]
|
|
305
|
+
logger.debug(
|
|
306
|
+
"found child pages from parent page {}: {}".format(
|
|
307
|
+
parent.id,
|
|
308
|
+
", ".join([block.title for block in child_page_blocks]),
|
|
309
|
+
),
|
|
310
|
+
)
|
|
311
|
+
new_pages = [p.id for p in child_pages_from_page if p.id not in processed]
|
|
312
|
+
new_pages = list(set(new_pages))
|
|
313
|
+
child_pages.extend(new_pages)
|
|
314
|
+
parents.extend(
|
|
315
|
+
[QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Extract child databases
|
|
319
|
+
child_dbs_from_page = [
|
|
320
|
+
c for c in page_children if isinstance(c.block, notion_blocks.ChildDatabase)
|
|
321
|
+
]
|
|
322
|
+
if child_dbs_from_page:
|
|
323
|
+
child_db_blocks: List[notion_blocks.ChildDatabase] = [
|
|
324
|
+
c.block
|
|
325
|
+
for c in page_children
|
|
326
|
+
if isinstance(c.block, notion_blocks.ChildDatabase)
|
|
327
|
+
]
|
|
328
|
+
logger.debug(
|
|
329
|
+
"found child database from parent page {}: {}".format(
|
|
330
|
+
parent.id,
|
|
331
|
+
", ".join([block.title for block in child_db_blocks]),
|
|
332
|
+
),
|
|
333
|
+
)
|
|
334
|
+
new_dbs = [db.id for db in child_dbs_from_page if db.id not in processed]
|
|
335
|
+
new_dbs = list(set(new_dbs))
|
|
336
|
+
child_dbs.extend(new_dbs)
|
|
337
|
+
parents.extend(
|
|
338
|
+
[QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
linked_to_others: List[notion_blocks.LinkToPage] = [
|
|
342
|
+
c.block for c in page_children if isinstance(c.block, notion_blocks.LinkToPage)
|
|
343
|
+
]
|
|
344
|
+
for link in linked_to_others:
|
|
345
|
+
if (page_id := link.page_id) and (
|
|
346
|
+
page_id not in processed and page_id not in child_pages
|
|
347
|
+
):
|
|
348
|
+
child_pages.append(page_id)
|
|
349
|
+
parents.append(QueueEntry(type=QueueEntryType.PAGE, id=UUID(page_id)))
|
|
350
|
+
if (database_id := link.database_id) and (
|
|
351
|
+
database_id not in processed and database_id not in child_dbs
|
|
352
|
+
):
|
|
353
|
+
child_dbs.append(database_id)
|
|
354
|
+
parents.append(
|
|
355
|
+
QueueEntry(type=QueueEntryType.DATABASE, id=UUID(database_id)),
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
elif parent.type == QueueEntryType.DATABASE:
|
|
359
|
+
logger.debug(f"getting child data from database: {parent.id}")
|
|
360
|
+
database_pages = []
|
|
361
|
+
try:
|
|
362
|
+
for page_entries in client.databases.iterate_query( # type: ignore
|
|
363
|
+
database_id=str(parent.id),
|
|
364
|
+
):
|
|
365
|
+
database_pages.extend(page_entries)
|
|
366
|
+
except APIResponseError as api_error:
|
|
367
|
+
logger.error(f"failed to get database with id {parent.id}: {api_error}")
|
|
368
|
+
if str(parent.id) in child_dbs:
|
|
369
|
+
child_dbs.remove(str(parent.id))
|
|
370
|
+
continue
|
|
371
|
+
if not database_pages:
|
|
372
|
+
continue
|
|
373
|
+
|
|
374
|
+
child_pages_from_db = [
|
|
375
|
+
p for p in database_pages if is_page_url(client=client, url=p.url)
|
|
376
|
+
]
|
|
377
|
+
if child_pages_from_db:
|
|
378
|
+
logger.debug(
|
|
379
|
+
"found child pages from parent database {}: {}".format(
|
|
380
|
+
parent.id,
|
|
381
|
+
", ".join([p.url for p in child_pages_from_db]),
|
|
382
|
+
),
|
|
383
|
+
)
|
|
384
|
+
new_pages = [p.id for p in child_pages_from_db if p.id not in processed]
|
|
385
|
+
child_pages.extend(new_pages)
|
|
386
|
+
parents.extend(
|
|
387
|
+
[QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
child_dbs_from_db = [
|
|
391
|
+
p for p in database_pages if is_database_url(client=client, url=p.url)
|
|
392
|
+
]
|
|
393
|
+
if child_dbs_from_db:
|
|
394
|
+
logger.debug(
|
|
395
|
+
"found child database from parent database {}: {}".format(
|
|
396
|
+
parent.id,
|
|
397
|
+
", ".join([db.url for db in child_dbs_from_db]),
|
|
398
|
+
),
|
|
399
|
+
)
|
|
400
|
+
new_dbs = [db.id for db in child_dbs_from_db if db.id not in processed]
|
|
401
|
+
child_dbs.extend(new_dbs)
|
|
402
|
+
parents.extend(
|
|
403
|
+
[QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
return ChildExtractionResponse(
|
|
407
|
+
child_pages=child_pages,
|
|
408
|
+
child_databases=child_dbs,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def is_valid_uuid(uuid_str: str) -> bool:
|
|
413
|
+
try:
|
|
414
|
+
UUID(uuid_str)
|
|
415
|
+
return True
|
|
416
|
+
except Exception:
|
|
417
|
+
return False
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def get_uuid_from_url(path: str) -> Optional[str]:
|
|
421
|
+
strings = path.split("-")
|
|
422
|
+
if len(strings) > 0 and is_valid_uuid(strings[-1]):
|
|
423
|
+
return strings[-1]
|
|
424
|
+
return None
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def is_page_url(client: Client, url: str):
|
|
428
|
+
parsed_url = urlparse(url)
|
|
429
|
+
path = parsed_url.path.split("/")[-1]
|
|
430
|
+
if parsed_url.netloc != "www.notion.so":
|
|
431
|
+
return False
|
|
432
|
+
page_uuid = get_uuid_from_url(path=path)
|
|
433
|
+
if not page_uuid:
|
|
434
|
+
return False
|
|
435
|
+
check_resp = client.pages.retrieve_status(page_id=page_uuid)
|
|
436
|
+
return check_resp == 200
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def is_database_url(client: Client, url: str):
|
|
440
|
+
parsed_url = urlparse(url)
|
|
441
|
+
path = parsed_url.path.split("/")[-1]
|
|
442
|
+
if parsed_url.netloc != "www.notion.so":
|
|
443
|
+
return False
|
|
444
|
+
database_uuid = get_uuid_from_url(path=path)
|
|
445
|
+
if not database_uuid:
|
|
446
|
+
return False
|
|
447
|
+
check_resp = client.databases.retrieve_status(database_id=database_uuid)
|
|
448
|
+
return check_resp == 200
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
import traceback
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# Default startup handler
|
|
7
|
+
def _log_start(details, logger, log_level):
|
|
8
|
+
max_tried = details.get("max_tries")
|
|
9
|
+
max_time = details.get("max_time")
|
|
10
|
+
if max_tried is not None and max_time is not None:
|
|
11
|
+
s = "%.1fs or %d tries"
|
|
12
|
+
s_args = [max_time, max_tried]
|
|
13
|
+
elif max_tried is not None:
|
|
14
|
+
s = "%d tries"
|
|
15
|
+
s_args = [max_tried]
|
|
16
|
+
else:
|
|
17
|
+
s = "%.1fs"
|
|
18
|
+
s_args = [max_time]
|
|
19
|
+
exception = details.get("exception")
|
|
20
|
+
if isinstance(exception, tuple):
|
|
21
|
+
exception = list(exception)
|
|
22
|
+
elif not isinstance(exception, list):
|
|
23
|
+
exception = [exception]
|
|
24
|
+
exception_s = ", ".join([e.__name__ for e in exception])
|
|
25
|
+
if log_level >= logging.INFO:
|
|
26
|
+
msg = f"Attempting %s(...), will retry for {s} given these issues: %s"
|
|
27
|
+
log_args = [details["target"].__name__] + s_args + [exception_s]
|
|
28
|
+
else:
|
|
29
|
+
msg = f"Attempting %s(%s), will retry for {s} given these issues: %s"
|
|
30
|
+
target_input_list = []
|
|
31
|
+
if args := details.get("args"):
|
|
32
|
+
target_input_list.extend([str(d) for d in args])
|
|
33
|
+
if kwargs := details.get("kwargs"):
|
|
34
|
+
target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()])
|
|
35
|
+
target_input = ", ".join(target_input_list) if target_input_list else ""
|
|
36
|
+
log_args = (
|
|
37
|
+
[
|
|
38
|
+
details["target"].__name__,
|
|
39
|
+
target_input,
|
|
40
|
+
]
|
|
41
|
+
+ s_args
|
|
42
|
+
+ [exception_s]
|
|
43
|
+
)
|
|
44
|
+
logger.log(log_level, msg, *log_args)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Default backoff handler
|
|
48
|
+
def _log_backoff(details, logger, log_level):
|
|
49
|
+
if log_level >= logging.INFO:
|
|
50
|
+
msg = "Backing off %s(...) for %.1fs (%s)"
|
|
51
|
+
log_args = [details["target"].__name__, details["tries"]]
|
|
52
|
+
else:
|
|
53
|
+
msg = "Backing off %.1fs seconds after %d tries calling function %s(%s) -> %s"
|
|
54
|
+
target_input_list = []
|
|
55
|
+
if args := details.get("args"):
|
|
56
|
+
target_input_list.extend([str(d) for d in args])
|
|
57
|
+
if kwargs := details.get("kwargs"):
|
|
58
|
+
target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()])
|
|
59
|
+
target_input = ", ".join(target_input_list) if target_input_list else ""
|
|
60
|
+
log_args = [
|
|
61
|
+
details["wait"],
|
|
62
|
+
details["tries"],
|
|
63
|
+
details["target"].__name__,
|
|
64
|
+
target_input,
|
|
65
|
+
]
|
|
66
|
+
exc_typ, exc, _ = sys.exc_info()
|
|
67
|
+
if exc is not None:
|
|
68
|
+
exc_fmt = traceback.format_exception_only(exc_typ, exc)[-1]
|
|
69
|
+
log_args.append(exc_fmt.rstrip("\n"))
|
|
70
|
+
else:
|
|
71
|
+
log_args.append(str(details["value"]))
|
|
72
|
+
logger.log(log_level, msg, *log_args)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Default giveup handler
|
|
76
|
+
def _log_giveup(details, logger, log_level):
|
|
77
|
+
if log_level >= logging.INFO:
|
|
78
|
+
msg = "Giving up %s(...) after %.1fs (%s)"
|
|
79
|
+
log_args = [details["target"].__name__, details["tries"]]
|
|
80
|
+
else:
|
|
81
|
+
msg = "Giving up after %d tries (%.1fs) calling function %s(%s) -> %s"
|
|
82
|
+
target_input_list = []
|
|
83
|
+
if args := details.get("args"):
|
|
84
|
+
target_input_list.extend([str(d) for d in args])
|
|
85
|
+
if kwargs := details.get("kwargs"):
|
|
86
|
+
target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()])
|
|
87
|
+
target_input = ", ".join(target_input_list) if target_input_list else "..."
|
|
88
|
+
log_args = [
|
|
89
|
+
details["tries"],
|
|
90
|
+
details["wait"],
|
|
91
|
+
details["target"].__name__,
|
|
92
|
+
target_input,
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
exc_typ, exc, _ = sys.exc_info()
|
|
96
|
+
if exc is not None:
|
|
97
|
+
exc_fmt = traceback.format_exception_only(exc_typ, exc)[-1]
|
|
98
|
+
log_args.append(exc_fmt.rstrip("\n"))
|
|
99
|
+
else:
|
|
100
|
+
log_args.append(details["value"])
|
|
101
|
+
|
|
102
|
+
logger.log(log_level, msg, *log_args)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# coding:utf-8
|
|
2
|
+
import logging
|
|
3
|
+
from collections.abc import Iterable as IterableType
|
|
4
|
+
from typing import Any, Iterable, Optional, Type, Union
|
|
5
|
+
|
|
6
|
+
from backoff import _sync
|
|
7
|
+
from backoff._common import _config_handlers, _prepare_logger
|
|
8
|
+
from backoff._jitter import full_jitter
|
|
9
|
+
from backoff._typing import (
|
|
10
|
+
_Handler,
|
|
11
|
+
_Jitterer,
|
|
12
|
+
_MaybeCallable,
|
|
13
|
+
_MaybeLogger,
|
|
14
|
+
_MaybeSequence,
|
|
15
|
+
_Predicate,
|
|
16
|
+
_WaitGenerator,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from unstructured_ingest.processes.connectors.notion.ingest_backoff._common import (
|
|
20
|
+
_log_backoff,
|
|
21
|
+
_log_giveup,
|
|
22
|
+
_log_start,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RetryHandler:
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
wait_gen: _WaitGenerator,
|
|
30
|
+
exception: _MaybeSequence[Type[Exception]],
|
|
31
|
+
*,
|
|
32
|
+
max_tries: Optional[_MaybeCallable[int]] = None,
|
|
33
|
+
max_time: Optional[_MaybeCallable[float]] = None,
|
|
34
|
+
jitter: Union[_Jitterer, None] = full_jitter,
|
|
35
|
+
giveup: _Predicate[Exception] = lambda e: False,
|
|
36
|
+
on_start: Union[_Handler, Iterable[_Handler], None] = None,
|
|
37
|
+
on_success: Union[_Handler, Iterable[_Handler], None] = None,
|
|
38
|
+
on_backoff: Union[_Handler, Iterable[_Handler], None] = None,
|
|
39
|
+
on_giveup: Union[_Handler, Iterable[_Handler], None] = None,
|
|
40
|
+
raise_on_giveup: bool = True,
|
|
41
|
+
logger: _MaybeLogger = "backoff",
|
|
42
|
+
start_log_level: int = logging.INFO,
|
|
43
|
+
backoff_log_level: int = logging.INFO,
|
|
44
|
+
giveup_log_level: int = logging.ERROR,
|
|
45
|
+
**wait_gen_kwargs: Any,
|
|
46
|
+
):
|
|
47
|
+
prepared_logger = _prepare_logger(logger)
|
|
48
|
+
on_success = _config_handlers(on_success)
|
|
49
|
+
on_start = _config_handlers(
|
|
50
|
+
on_start,
|
|
51
|
+
default_handler=_log_start,
|
|
52
|
+
logger=prepared_logger,
|
|
53
|
+
log_level=start_log_level,
|
|
54
|
+
)
|
|
55
|
+
on_backoff = _config_handlers(
|
|
56
|
+
on_backoff,
|
|
57
|
+
default_handler=_log_backoff,
|
|
58
|
+
logger=prepared_logger,
|
|
59
|
+
log_level=backoff_log_level,
|
|
60
|
+
)
|
|
61
|
+
on_giveup = _config_handlers(
|
|
62
|
+
on_giveup,
|
|
63
|
+
default_handler=_log_giveup,
|
|
64
|
+
logger=prepared_logger,
|
|
65
|
+
log_level=giveup_log_level,
|
|
66
|
+
)
|
|
67
|
+
prepared_logger.debug(
|
|
68
|
+
"Initiating retry handler with "
|
|
69
|
+
"max_tries={}, "
|
|
70
|
+
"max_time={}, "
|
|
71
|
+
"exception={}, "
|
|
72
|
+
"start_log_level={}, "
|
|
73
|
+
"backoff_log_level={}, "
|
|
74
|
+
"giveup_log_level={}".format(
|
|
75
|
+
max_tries,
|
|
76
|
+
max_time,
|
|
77
|
+
(
|
|
78
|
+
", ".join([e.__name__ for e in exception])
|
|
79
|
+
if isinstance(exception, IterableType)
|
|
80
|
+
else exception.__name__
|
|
81
|
+
),
|
|
82
|
+
logging.getLevelName(start_log_level),
|
|
83
|
+
logging.getLevelName(backoff_log_level),
|
|
84
|
+
logging.getLevelName(giveup_log_level),
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
self.on_start = on_start
|
|
88
|
+
self.on_success = on_success
|
|
89
|
+
self.on_backoff = on_backoff
|
|
90
|
+
self.on_giveup = on_giveup
|
|
91
|
+
self.jitter = jitter
|
|
92
|
+
self.giveup = giveup
|
|
93
|
+
self.raise_on_giveup = raise_on_giveup
|
|
94
|
+
self.wait_gen_kwargs = wait_gen_kwargs
|
|
95
|
+
self.wait_gen = wait_gen
|
|
96
|
+
self.exception = exception
|
|
97
|
+
self.max_tries = max_tries
|
|
98
|
+
self.max_time = max_time
|
|
99
|
+
|
|
100
|
+
def __call__(self, target, *args, **kwargs):
|
|
101
|
+
_sync._call_handlers(
|
|
102
|
+
self.on_start,
|
|
103
|
+
target=target,
|
|
104
|
+
args=args,
|
|
105
|
+
kwargs=kwargs,
|
|
106
|
+
tries=None,
|
|
107
|
+
elapsed=None,
|
|
108
|
+
max_tries=self.max_tries,
|
|
109
|
+
max_time=self.max_time,
|
|
110
|
+
exception=self.exception,
|
|
111
|
+
)
|
|
112
|
+
wrapped_func = _sync.retry_exception(
|
|
113
|
+
target,
|
|
114
|
+
self.wait_gen,
|
|
115
|
+
self.exception,
|
|
116
|
+
max_tries=self.max_tries,
|
|
117
|
+
max_time=self.max_time,
|
|
118
|
+
jitter=self.jitter,
|
|
119
|
+
giveup=self.giveup,
|
|
120
|
+
on_success=self.on_success,
|
|
121
|
+
on_backoff=self.on_backoff,
|
|
122
|
+
on_giveup=self.on_giveup,
|
|
123
|
+
raise_on_giveup=self.raise_on_giveup,
|
|
124
|
+
wait_gen_kwargs=self.wait_gen_kwargs,
|
|
125
|
+
)
|
|
126
|
+
return wrapped_func(*args, **kwargs)
|