unstructured-ingest 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/__init__.py +0 -0
- test/integration/__init__.py +0 -0
- test/integration/chunkers/__init__.py +0 -0
- test/integration/chunkers/test_chunkers.py +31 -0
- test/integration/connectors/__init__.py +0 -0
- test/integration/connectors/conftest.py +38 -0
- test/integration/connectors/databricks/__init__.py +0 -0
- test/integration/connectors/databricks/test_volumes_native.py +269 -0
- test/integration/connectors/discord/__init__.py +0 -0
- test/integration/connectors/discord/test_discord.py +90 -0
- test/integration/connectors/duckdb/__init__.py +0 -0
- test/integration/connectors/duckdb/conftest.py +14 -0
- test/integration/connectors/duckdb/test_duckdb.py +89 -0
- test/integration/connectors/duckdb/test_motherduck.py +95 -0
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +34 -0
- test/integration/connectors/elasticsearch/test_elasticsearch.py +330 -0
- test/integration/connectors/elasticsearch/test_opensearch.py +325 -0
- test/integration/connectors/sql/__init__.py +0 -0
- test/integration/connectors/sql/test_postgres.py +195 -0
- test/integration/connectors/sql/test_singlestore.py +176 -0
- test/integration/connectors/sql/test_snowflake.py +238 -0
- test/integration/connectors/sql/test_sqlite.py +162 -0
- test/integration/connectors/test_astradb.py +217 -0
- test/integration/connectors/test_azure_ai_search.py +255 -0
- test/integration/connectors/test_chroma.py +120 -0
- test/integration/connectors/test_confluence.py +113 -0
- test/integration/connectors/test_delta_table.py +185 -0
- test/integration/connectors/test_lancedb.py +247 -0
- test/integration/connectors/test_milvus.py +203 -0
- test/integration/connectors/test_mongodb.py +335 -0
- test/integration/connectors/test_neo4j.py +236 -0
- test/integration/connectors/test_notion.py +145 -0
- test/integration/connectors/test_onedrive.py +118 -0
- test/integration/connectors/test_pinecone.py +288 -0
- test/integration/connectors/test_qdrant.py +215 -0
- test/integration/connectors/test_redis.py +119 -0
- test/integration/connectors/test_s3.py +183 -0
- test/integration/connectors/test_vectara.py +270 -0
- test/integration/connectors/utils/__init__.py +0 -0
- test/integration/connectors/utils/constants.py +7 -0
- test/integration/connectors/utils/docker.py +151 -0
- test/integration/connectors/utils/docker_compose.py +59 -0
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +75 -0
- test/integration/connectors/utils/validation/equality.py +75 -0
- test/integration/connectors/utils/validation/source.py +299 -0
- test/integration/connectors/utils/validation/utils.py +36 -0
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +15 -0
- test/integration/connectors/weaviate/test_cloud.py +34 -0
- test/integration/connectors/weaviate/test_local.py +131 -0
- test/integration/embedders/__init__.py +0 -0
- test/integration/embedders/conftest.py +13 -0
- test/integration/embedders/test_azure_openai.py +59 -0
- test/integration/embedders/test_bedrock.py +103 -0
- test/integration/embedders/test_huggingface.py +26 -0
- test/integration/embedders/test_mixedbread.py +71 -0
- test/integration/embedders/test_octoai.py +77 -0
- test/integration/embedders/test_openai.py +76 -0
- test/integration/embedders/test_togetherai.py +71 -0
- test/integration/embedders/test_vertexai.py +65 -0
- test/integration/embedders/test_voyageai.py +65 -0
- test/integration/embedders/utils.py +68 -0
- test/integration/partitioners/__init__.py +0 -0
- test/integration/partitioners/test_partitioner.py +75 -0
- test/integration/utils.py +15 -0
- test/unit/__init__.py +0 -0
- test/unit/embed/__init__.py +0 -0
- test/unit/embed/test_mixedbreadai.py +42 -0
- test/unit/embed/test_octoai.py +27 -0
- test/unit/embed/test_openai.py +20 -0
- test/unit/embed/test_vertexai.py +25 -0
- test/unit/embed/test_voyageai.py +24 -0
- test/unit/test_error.py +27 -0
- test/unit/test_logger.py +78 -0
- test/unit/test_utils.py +184 -0
- test/unit/v2/__init__.py +0 -0
- test/unit/v2/chunkers/__init__.py +0 -0
- test/unit/v2/chunkers/test_chunkers.py +49 -0
- test/unit/v2/connectors/__init__.py +0 -0
- test/unit/v2/connectors/test_confluence.py +39 -0
- test/unit/v2/embedders/__init__.py +0 -0
- test/unit/v2/embedders/test_bedrock.py +36 -0
- test/unit/v2/embedders/test_huggingface.py +48 -0
- test/unit/v2/embedders/test_mixedbread.py +37 -0
- test/unit/v2/embedders/test_octoai.py +35 -0
- test/unit/v2/embedders/test_openai.py +35 -0
- test/unit/v2/embedders/test_togetherai.py +37 -0
- test/unit/v2/embedders/test_vertexai.py +37 -0
- test/unit/v2/embedders/test_voyageai.py +38 -0
- test/unit/v2/partitioners/__init__.py +0 -0
- test/unit/v2/partitioners/test_partitioner.py +63 -0
- test/unit/v2/test_interfaces.py +26 -0
- test/unit/v2/test_utils.py +82 -0
- test/unit/v2/utils/__init__.py +0 -0
- test/unit/v2/utils/data_generator.py +32 -0
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/__init__.py +14 -0
- unstructured_ingest/cli/base/__init__.py +0 -0
- unstructured_ingest/cli/base/cmd.py +19 -0
- unstructured_ingest/cli/base/dest.py +87 -0
- unstructured_ingest/cli/base/src.py +57 -0
- unstructured_ingest/cli/cli.py +37 -0
- unstructured_ingest/cli/cmd_factory.py +12 -0
- unstructured_ingest/cli/cmds/__init__.py +145 -0
- unstructured_ingest/cli/cmds/airtable.py +69 -0
- unstructured_ingest/cli/cmds/astradb.py +99 -0
- unstructured_ingest/cli/cmds/azure_ai_search.py +65 -0
- unstructured_ingest/cli/cmds/biomed.py +52 -0
- unstructured_ingest/cli/cmds/chroma.py +104 -0
- unstructured_ingest/cli/cmds/clarifai.py +71 -0
- unstructured_ingest/cli/cmds/confluence.py +69 -0
- unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
- unstructured_ingest/cli/cmds/delta_table.py +94 -0
- unstructured_ingest/cli/cmds/discord.py +47 -0
- unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
- unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
- unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
- unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
- unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
- unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
- unstructured_ingest/cli/cmds/github.py +54 -0
- unstructured_ingest/cli/cmds/gitlab.py +54 -0
- unstructured_ingest/cli/cmds/google_drive.py +49 -0
- unstructured_ingest/cli/cmds/hubspot.py +70 -0
- unstructured_ingest/cli/cmds/jira.py +71 -0
- unstructured_ingest/cli/cmds/kafka.py +102 -0
- unstructured_ingest/cli/cmds/local.py +43 -0
- unstructured_ingest/cli/cmds/mongodb.py +72 -0
- unstructured_ingest/cli/cmds/notion.py +48 -0
- unstructured_ingest/cli/cmds/onedrive.py +66 -0
- unstructured_ingest/cli/cmds/opensearch.py +117 -0
- unstructured_ingest/cli/cmds/outlook.py +67 -0
- unstructured_ingest/cli/cmds/pinecone.py +71 -0
- unstructured_ingest/cli/cmds/qdrant.py +124 -0
- unstructured_ingest/cli/cmds/reddit.py +67 -0
- unstructured_ingest/cli/cmds/salesforce.py +58 -0
- unstructured_ingest/cli/cmds/sharepoint.py +66 -0
- unstructured_ingest/cli/cmds/slack.py +56 -0
- unstructured_ingest/cli/cmds/sql.py +66 -0
- unstructured_ingest/cli/cmds/vectara.py +66 -0
- unstructured_ingest/cli/cmds/weaviate.py +98 -0
- unstructured_ingest/cli/cmds/wikipedia.py +40 -0
- unstructured_ingest/cli/common.py +7 -0
- unstructured_ingest/cli/interfaces.py +663 -0
- unstructured_ingest/cli/utils.py +205 -0
- unstructured_ingest/connector/__init__.py +0 -0
- unstructured_ingest/connector/airtable.py +309 -0
- unstructured_ingest/connector/astradb.py +267 -0
- unstructured_ingest/connector/azure_ai_search.py +144 -0
- unstructured_ingest/connector/biomed.py +320 -0
- unstructured_ingest/connector/chroma.py +158 -0
- unstructured_ingest/connector/clarifai.py +122 -0
- unstructured_ingest/connector/confluence.py +285 -0
- unstructured_ingest/connector/databricks_volumes.py +137 -0
- unstructured_ingest/connector/delta_table.py +203 -0
- unstructured_ingest/connector/discord.py +180 -0
- unstructured_ingest/connector/elasticsearch.py +396 -0
- unstructured_ingest/connector/fsspec/__init__.py +0 -0
- unstructured_ingest/connector/fsspec/azure.py +78 -0
- unstructured_ingest/connector/fsspec/box.py +109 -0
- unstructured_ingest/connector/fsspec/dropbox.py +160 -0
- unstructured_ingest/connector/fsspec/fsspec.py +359 -0
- unstructured_ingest/connector/fsspec/gcs.py +82 -0
- unstructured_ingest/connector/fsspec/s3.py +62 -0
- unstructured_ingest/connector/fsspec/sftp.py +81 -0
- unstructured_ingest/connector/git.py +124 -0
- unstructured_ingest/connector/github.py +174 -0
- unstructured_ingest/connector/gitlab.py +142 -0
- unstructured_ingest/connector/google_drive.py +348 -0
- unstructured_ingest/connector/hubspot.py +278 -0
- unstructured_ingest/connector/jira.py +469 -0
- unstructured_ingest/connector/kafka.py +293 -0
- unstructured_ingest/connector/local.py +139 -0
- unstructured_ingest/connector/mongodb.py +284 -0
- unstructured_ingest/connector/notion/__init__.py +0 -0
- unstructured_ingest/connector/notion/client.py +248 -0
- unstructured_ingest/connector/notion/connector.py +469 -0
- unstructured_ingest/connector/notion/helpers.py +584 -0
- unstructured_ingest/connector/notion/interfaces.py +32 -0
- unstructured_ingest/connector/notion/types/__init__.py +0 -0
- unstructured_ingest/connector/notion/types/block.py +96 -0
- unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
- unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
- unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
- unstructured_ingest/connector/notion/types/database.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
- unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
- unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/connector/notion/types/date.py +26 -0
- unstructured_ingest/connector/notion/types/file.py +51 -0
- unstructured_ingest/connector/notion/types/page.py +45 -0
- unstructured_ingest/connector/notion/types/parent.py +66 -0
- unstructured_ingest/connector/notion/types/rich_text.py +189 -0
- unstructured_ingest/connector/notion/types/user.py +76 -0
- unstructured_ingest/connector/onedrive.py +232 -0
- unstructured_ingest/connector/opensearch.py +218 -0
- unstructured_ingest/connector/outlook.py +285 -0
- unstructured_ingest/connector/pinecone.py +140 -0
- unstructured_ingest/connector/qdrant.py +144 -0
- unstructured_ingest/connector/reddit.py +166 -0
- unstructured_ingest/connector/registry.py +109 -0
- unstructured_ingest/connector/salesforce.py +301 -0
- unstructured_ingest/connector/sharepoint.py +573 -0
- unstructured_ingest/connector/slack.py +224 -0
- unstructured_ingest/connector/sql.py +199 -0
- unstructured_ingest/connector/vectara.py +253 -0
- unstructured_ingest/connector/weaviate.py +190 -0
- unstructured_ingest/connector/wikipedia.py +208 -0
- unstructured_ingest/embed/__init__.py +0 -0
- unstructured_ingest/embed/azure_openai.py +31 -0
- unstructured_ingest/embed/bedrock.py +193 -0
- unstructured_ingest/embed/huggingface.py +52 -0
- unstructured_ingest/embed/interfaces.py +117 -0
- unstructured_ingest/embed/mixedbreadai.py +233 -0
- unstructured_ingest/embed/octoai.py +130 -0
- unstructured_ingest/embed/openai.py +116 -0
- unstructured_ingest/embed/togetherai.py +106 -0
- unstructured_ingest/embed/vertexai.py +126 -0
- unstructured_ingest/embed/voyageai.py +130 -0
- unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
- unstructured_ingest/enhanced_dataclass/core.py +99 -0
- unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
- unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
- unstructured_ingest/error.py +49 -0
- unstructured_ingest/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/ingest_backoff/_common.py +102 -0
- unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
- unstructured_ingest/interfaces.py +852 -0
- unstructured_ingest/logger.py +130 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/pipeline/__init__.py +22 -0
- unstructured_ingest/pipeline/copy.py +19 -0
- unstructured_ingest/pipeline/doc_factory.py +12 -0
- unstructured_ingest/pipeline/interfaces.py +270 -0
- unstructured_ingest/pipeline/partition.py +60 -0
- unstructured_ingest/pipeline/permissions.py +12 -0
- unstructured_ingest/pipeline/pipeline.py +117 -0
- unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- unstructured_ingest/pipeline/reformat/chunking.py +134 -0
- unstructured_ingest/pipeline/reformat/embedding.py +64 -0
- unstructured_ingest/pipeline/source.py +77 -0
- unstructured_ingest/pipeline/utils.py +6 -0
- unstructured_ingest/pipeline/write.py +18 -0
- unstructured_ingest/processor.py +93 -0
- unstructured_ingest/runner/__init__.py +104 -0
- unstructured_ingest/runner/airtable.py +35 -0
- unstructured_ingest/runner/astradb.py +34 -0
- unstructured_ingest/runner/base_runner.py +89 -0
- unstructured_ingest/runner/biomed.py +45 -0
- unstructured_ingest/runner/confluence.py +35 -0
- unstructured_ingest/runner/delta_table.py +34 -0
- unstructured_ingest/runner/discord.py +35 -0
- unstructured_ingest/runner/elasticsearch.py +40 -0
- unstructured_ingest/runner/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/fsspec/azure.py +30 -0
- unstructured_ingest/runner/fsspec/box.py +28 -0
- unstructured_ingest/runner/fsspec/dropbox.py +30 -0
- unstructured_ingest/runner/fsspec/fsspec.py +40 -0
- unstructured_ingest/runner/fsspec/gcs.py +28 -0
- unstructured_ingest/runner/fsspec/s3.py +28 -0
- unstructured_ingest/runner/fsspec/sftp.py +28 -0
- unstructured_ingest/runner/github.py +37 -0
- unstructured_ingest/runner/gitlab.py +37 -0
- unstructured_ingest/runner/google_drive.py +35 -0
- unstructured_ingest/runner/hubspot.py +35 -0
- unstructured_ingest/runner/jira.py +35 -0
- unstructured_ingest/runner/kafka.py +34 -0
- unstructured_ingest/runner/local.py +23 -0
- unstructured_ingest/runner/mongodb.py +34 -0
- unstructured_ingest/runner/notion.py +61 -0
- unstructured_ingest/runner/onedrive.py +35 -0
- unstructured_ingest/runner/opensearch.py +40 -0
- unstructured_ingest/runner/outlook.py +33 -0
- unstructured_ingest/runner/reddit.py +35 -0
- unstructured_ingest/runner/salesforce.py +33 -0
- unstructured_ingest/runner/sharepoint.py +35 -0
- unstructured_ingest/runner/slack.py +33 -0
- unstructured_ingest/runner/utils.py +47 -0
- unstructured_ingest/runner/wikipedia.py +35 -0
- unstructured_ingest/runner/writers/__init__.py +48 -0
- unstructured_ingest/runner/writers/astradb.py +22 -0
- unstructured_ingest/runner/writers/azure_ai_search.py +24 -0
- unstructured_ingest/runner/writers/base_writer.py +26 -0
- unstructured_ingest/runner/writers/chroma.py +22 -0
- unstructured_ingest/runner/writers/clarifai.py +19 -0
- unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
- unstructured_ingest/runner/writers/delta_table.py +24 -0
- unstructured_ingest/runner/writers/elasticsearch.py +24 -0
- unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
- unstructured_ingest/runner/writers/fsspec/box.py +21 -0
- unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
- unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
- unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
- unstructured_ingest/runner/writers/kafka.py +21 -0
- unstructured_ingest/runner/writers/mongodb.py +21 -0
- unstructured_ingest/runner/writers/opensearch.py +26 -0
- unstructured_ingest/runner/writers/pinecone.py +21 -0
- unstructured_ingest/runner/writers/qdrant.py +19 -0
- unstructured_ingest/runner/writers/sql.py +22 -0
- unstructured_ingest/runner/writers/vectara.py +22 -0
- unstructured_ingest/runner/writers/weaviate.py +21 -0
- unstructured_ingest/utils/__init__.py +0 -0
- unstructured_ingest/utils/chunking.py +56 -0
- unstructured_ingest/utils/compression.py +118 -0
- unstructured_ingest/utils/data_prep.py +200 -0
- unstructured_ingest/utils/dep_check.py +78 -0
- unstructured_ingest/utils/google_filetype.py +9 -0
- unstructured_ingest/utils/string_and_date_utils.py +49 -0
- unstructured_ingest/utils/table.py +73 -0
- unstructured_ingest/v2/__init__.py +1 -0
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +4 -0
- unstructured_ingest/v2/cli/base/cmd.py +269 -0
- unstructured_ingest/v2/cli/base/dest.py +85 -0
- unstructured_ingest/v2/cli/base/importer.py +34 -0
- unstructured_ingest/v2/cli/base/src.py +85 -0
- unstructured_ingest/v2/cli/cli.py +24 -0
- unstructured_ingest/v2/cli/cmds.py +14 -0
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/cli/utils/click.py +237 -0
- unstructured_ingest/v2/cli/utils/model_conversion.py +222 -0
- unstructured_ingest/v2/constants.py +2 -0
- unstructured_ingest/v2/errors.py +18 -0
- unstructured_ingest/v2/interfaces/__init__.py +32 -0
- unstructured_ingest/v2/interfaces/connector.py +50 -0
- unstructured_ingest/v2/interfaces/downloader.py +89 -0
- unstructured_ingest/v2/interfaces/file_data.py +116 -0
- unstructured_ingest/v2/interfaces/indexer.py +30 -0
- unstructured_ingest/v2/interfaces/process.py +19 -0
- unstructured_ingest/v2/interfaces/processor.py +88 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +102 -0
- unstructured_ingest/v2/interfaces/uploader.py +53 -0
- unstructured_ingest/v2/logger.py +126 -0
- unstructured_ingest/v2/main.py +11 -0
- unstructured_ingest/v2/otel.py +111 -0
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +211 -0
- unstructured_ingest/v2/pipeline/otel.py +32 -0
- unstructured_ingest/v2/pipeline/pipeline.py +384 -0
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/steps/chunk.py +80 -0
- unstructured_ingest/v2/pipeline/steps/download.py +207 -0
- unstructured_ingest/v2/pipeline/steps/embed.py +79 -0
- unstructured_ingest/v2/pipeline/steps/filter.py +35 -0
- unstructured_ingest/v2/pipeline/steps/index.py +86 -0
- unstructured_ingest/v2/pipeline/steps/partition.py +79 -0
- unstructured_ingest/v2/pipeline/steps/stage.py +65 -0
- unstructured_ingest/v2/pipeline/steps/uncompress.py +50 -0
- unstructured_ingest/v2/pipeline/steps/upload.py +58 -0
- unstructured_ingest/v2/processes/__init__.py +18 -0
- unstructured_ingest/v2/processes/chunker.py +124 -0
- unstructured_ingest/v2/processes/connector_registry.py +69 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +117 -0
- unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
- unstructured_ingest/v2/processes/connectors/astradb.py +402 -0
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +276 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +190 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +207 -0
- unstructured_ingest/v2/processes/connectors/couchbase.py +334 -0
- unstructured_ingest/v2/processes/connectors/databricks/__init__.py +52 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +208 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +87 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +102 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +85 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +86 -0
- unstructured_ingest/v2/processes/connectors/delta_table.py +191 -0
- unstructured_ingest/v2/processes/connectors/discord.py +158 -0
- unstructured_ingest/v2/processes/connectors/duckdb/__init__.py +15 -0
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +100 -0
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +127 -0
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +126 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +470 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +195 -0
- unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +170 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +168 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +332 -0
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +185 -0
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +171 -0
- unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/v2/processes/connectors/gitlab.py +268 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +348 -0
- unstructured_ingest/v2/processes/connectors/kafka/__init__.py +17 -0
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py +121 -0
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +273 -0
- unstructured_ingest/v2/processes/connectors/kafka/local.py +103 -0
- unstructured_ingest/v2/processes/connectors/kdbai.py +148 -0
- unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +30 -0
- unstructured_ingest/v2/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +42 -0
- unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +169 -0
- unstructured_ingest/v2/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/v2/processes/connectors/local.py +217 -0
- unstructured_ingest/v2/processes/connectors/milvus.py +225 -0
- unstructured_ingest/v2/processes/connectors/mongodb.py +361 -0
- unstructured_ingest/v2/processes/connectors/neo4j.py +385 -0
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/client.py +349 -0
- unstructured_ingest/v2/processes/connectors/notion/connector.py +346 -0
- unstructured_ingest/v2/processes/connectors/notion/helpers.py +448 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +32 -0
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +96 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/code.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/template.py +30 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/people.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/select.py +69 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/status.py +81 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/v2/processes/connectors/notion/types/date.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/file.py +54 -0
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +45 -0
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +66 -0
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +189 -0
- unstructured_ingest/v2/processes/connectors/notion/types/user.py +79 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +447 -0
- unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +277 -0
- unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +16 -0
- unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +59 -0
- unstructured_ingest/v2/processes/connectors/qdrant/local.py +58 -0
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +160 -0
- unstructured_ingest/v2/processes/connectors/qdrant/server.py +60 -0
- unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
- unstructured_ingest/v2/processes/connectors/salesforce.py +303 -0
- unstructured_ingest/v2/processes/connectors/sharepoint.py +448 -0
- unstructured_ingest/v2/processes/connectors/slack.py +248 -0
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +27 -0
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +162 -0
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +166 -0
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +210 -0
- unstructured_ingest/v2/processes/connectors/sql/sql.py +434 -0
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +168 -0
- unstructured_ingest/v2/processes/connectors/utils.py +29 -0
- unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
- unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +165 -0
- unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/v2/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +267 -0
- unstructured_ingest/v2/processes/embedder.py +195 -0
- unstructured_ingest/v2/processes/filter.py +60 -0
- unstructured_ingest/v2/processes/partitioner.py +188 -0
- unstructured_ingest/v2/processes/uncompress.py +61 -0
- unstructured_ingest/v2/unstructured_api.py +128 -0
- unstructured_ingest/v2/utils.py +61 -0
- unstructured_ingest-0.3.13.dist-info/LICENSE.md +201 -0
- unstructured_ingest-0.3.13.dist-info/METADATA +205 -0
- unstructured_ingest-0.3.13.dist-info/RECORD +557 -0
- unstructured_ingest-0.3.13.dist-info/WHEEL +5 -0
- unstructured_ingest-0.3.13.dist-info/entry_points.txt +2 -0
- unstructured_ingest-0.3.13.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# type: ignore
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING, Annotated, Any, Optional
|
|
7
|
+
|
|
8
|
+
from pydantic import Field, Secret, ValidationError
|
|
9
|
+
from pydantic.functional_validators import BeforeValidator
|
|
10
|
+
|
|
11
|
+
from unstructured_ingest.embed.interfaces import (
|
|
12
|
+
AsyncBaseEmbeddingEncoder,
|
|
13
|
+
BaseEmbeddingEncoder,
|
|
14
|
+
EmbeddingConfig,
|
|
15
|
+
)
|
|
16
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
17
|
+
from unstructured_ingest.v2.errors import UserAuthError
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from vertexai.language_models import TextEmbeddingModel
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def conform_string_to_dict(value: Any) -> dict:
|
|
24
|
+
if isinstance(value, dict):
|
|
25
|
+
return value
|
|
26
|
+
if isinstance(value, str):
|
|
27
|
+
return json.loads(value)
|
|
28
|
+
raise ValidationError(f"Input could not be mapped to a valid dict: {value}")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
ApiKeyType = Secret[Annotated[dict, BeforeValidator(conform_string_to_dict)]]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class VertexAIEmbeddingConfig(EmbeddingConfig):
|
|
35
|
+
api_key: ApiKeyType
|
|
36
|
+
embedder_model_name: Optional[str] = Field(
|
|
37
|
+
default="textembedding-gecko@001", alias="model_name"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def wrap_error(self, e: Exception) -> Exception:
|
|
41
|
+
from google.auth.exceptions import GoogleAuthError
|
|
42
|
+
|
|
43
|
+
if isinstance(e, GoogleAuthError):
|
|
44
|
+
return UserAuthError(e)
|
|
45
|
+
return e
|
|
46
|
+
|
|
47
|
+
def register_application_credentials(self):
|
|
48
|
+
# TODO look into passing credentials in directly, rather than via env var and tmp file
|
|
49
|
+
application_credentials_path = Path("/tmp") / "google-vertex-app-credentials.json"
|
|
50
|
+
with application_credentials_path.open("w+") as credentials_file:
|
|
51
|
+
json.dump(self.api_key.get_secret_value(), credentials_file)
|
|
52
|
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(application_credentials_path)
|
|
53
|
+
|
|
54
|
+
@requires_dependencies(
|
|
55
|
+
["vertexai"],
|
|
56
|
+
extras="embed-vertexai",
|
|
57
|
+
)
|
|
58
|
+
def get_client(self) -> "TextEmbeddingModel":
|
|
59
|
+
"""Creates a VertexAI python client to embed elements."""
|
|
60
|
+
from vertexai.language_models import TextEmbeddingModel
|
|
61
|
+
|
|
62
|
+
self.register_application_credentials()
|
|
63
|
+
return TextEmbeddingModel.from_pretrained(self.embedder_model_name)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class VertexAIEmbeddingEncoder(BaseEmbeddingEncoder):
|
|
68
|
+
config: VertexAIEmbeddingConfig
|
|
69
|
+
|
|
70
|
+
def wrap_error(self, e: Exception) -> Exception:
|
|
71
|
+
return self.config.wrap_error(e=e)
|
|
72
|
+
|
|
73
|
+
def embed_query(self, query):
|
|
74
|
+
return self._embed_documents(elements=[query])[0]
|
|
75
|
+
|
|
76
|
+
def embed_documents(self, elements: list[dict]) -> list[dict]:
|
|
77
|
+
embeddings = self._embed_documents([e.get("text", "") for e in elements])
|
|
78
|
+
elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
|
|
79
|
+
return elements_with_embeddings
|
|
80
|
+
|
|
81
|
+
@requires_dependencies(
|
|
82
|
+
["vertexai"],
|
|
83
|
+
extras="embed-vertexai",
|
|
84
|
+
)
|
|
85
|
+
def _embed_documents(self, elements: list[str]) -> list[list[float]]:
|
|
86
|
+
from vertexai.language_models import TextEmbeddingInput
|
|
87
|
+
|
|
88
|
+
inputs = [TextEmbeddingInput(text=element) for element in elements]
|
|
89
|
+
try:
|
|
90
|
+
client = self.config.get_client()
|
|
91
|
+
embeddings = client.get_embeddings(inputs)
|
|
92
|
+
except Exception as e:
|
|
93
|
+
raise self.wrap_error(e=e)
|
|
94
|
+
return [e.values for e in embeddings]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class AsyncVertexAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
|
|
99
|
+
config: VertexAIEmbeddingConfig
|
|
100
|
+
|
|
101
|
+
def wrap_error(self, e: Exception) -> Exception:
|
|
102
|
+
return self.config.wrap_error(e=e)
|
|
103
|
+
|
|
104
|
+
async def embed_query(self, query):
|
|
105
|
+
embedding = await self._embed_documents(elements=[query])
|
|
106
|
+
return embedding[0]
|
|
107
|
+
|
|
108
|
+
async def embed_documents(self, elements: list[dict]) -> list[dict]:
|
|
109
|
+
embeddings = await self._embed_documents([e.get("text", "") for e in elements])
|
|
110
|
+
elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
|
|
111
|
+
return elements_with_embeddings
|
|
112
|
+
|
|
113
|
+
@requires_dependencies(
|
|
114
|
+
["vertexai"],
|
|
115
|
+
extras="embed-vertexai",
|
|
116
|
+
)
|
|
117
|
+
async def _embed_documents(self, elements: list[str]) -> list[list[float]]:
|
|
118
|
+
from vertexai.language_models import TextEmbeddingInput
|
|
119
|
+
|
|
120
|
+
inputs = [TextEmbeddingInput(text=element) for element in elements]
|
|
121
|
+
try:
|
|
122
|
+
client = self.config.get_client()
|
|
123
|
+
embeddings = await client.get_embeddings_async(inputs)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
raise self.wrap_error(e=e)
|
|
126
|
+
return [e.values for e in embeddings]
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import Field, SecretStr
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.embed.interfaces import (
|
|
7
|
+
AsyncBaseEmbeddingEncoder,
|
|
8
|
+
BaseEmbeddingEncoder,
|
|
9
|
+
EmbeddingConfig,
|
|
10
|
+
)
|
|
11
|
+
from unstructured_ingest.logger import logger
|
|
12
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
+
from unstructured_ingest.v2.errors import (
|
|
14
|
+
ProviderError,
|
|
15
|
+
UserAuthError,
|
|
16
|
+
UserError,
|
|
17
|
+
)
|
|
18
|
+
from unstructured_ingest.v2.errors import (
|
|
19
|
+
RateLimitError as CustomRateLimitError,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from voyageai import AsyncClient as AsyncVoyageAIClient
|
|
24
|
+
from voyageai import Client as VoyageAIClient
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class VoyageAIEmbeddingConfig(EmbeddingConfig):
|
|
28
|
+
api_key: SecretStr
|
|
29
|
+
embedder_model_name: str = Field(default="voyage-3", alias="model_name")
|
|
30
|
+
batch_size: Optional[int] = Field(default=None)
|
|
31
|
+
truncation: Optional[bool] = Field(default=None)
|
|
32
|
+
max_retries: int = 0
|
|
33
|
+
timeout_in_seconds: Optional[int] = None
|
|
34
|
+
|
|
35
|
+
def wrap_error(self, e: Exception) -> Exception:
|
|
36
|
+
# https://docs.voyageai.com/docs/error-codes
|
|
37
|
+
from voyageai.error import AuthenticationError, RateLimitError, VoyageError
|
|
38
|
+
|
|
39
|
+
if not isinstance(e, VoyageError):
|
|
40
|
+
logger.error(f"unhandled exception from openai: {e}", exc_info=True)
|
|
41
|
+
raise e
|
|
42
|
+
http_code = e.http_status
|
|
43
|
+
message = e.user_message
|
|
44
|
+
if isinstance(e, AuthenticationError):
|
|
45
|
+
return UserAuthError(message)
|
|
46
|
+
if isinstance(e, RateLimitError):
|
|
47
|
+
return CustomRateLimitError(message)
|
|
48
|
+
if 400 <= http_code < 500:
|
|
49
|
+
return UserError(message)
|
|
50
|
+
if http_code >= 500:
|
|
51
|
+
return ProviderError(message)
|
|
52
|
+
logger.error(f"unhandled exception from openai: {e}", exc_info=True)
|
|
53
|
+
return e
|
|
54
|
+
|
|
55
|
+
@requires_dependencies(
|
|
56
|
+
["voyageai"],
|
|
57
|
+
extras="embed-voyageai",
|
|
58
|
+
)
|
|
59
|
+
def get_client(self) -> "VoyageAIClient":
|
|
60
|
+
"""Creates a VoyageAI python client to embed elements."""
|
|
61
|
+
from voyageai import Client as VoyageAIClient
|
|
62
|
+
|
|
63
|
+
client = VoyageAIClient(
|
|
64
|
+
api_key=self.api_key.get_secret_value(),
|
|
65
|
+
max_retries=self.max_retries,
|
|
66
|
+
timeout=self.timeout_in_seconds,
|
|
67
|
+
)
|
|
68
|
+
return client
|
|
69
|
+
|
|
70
|
+
@requires_dependencies(
|
|
71
|
+
["voyageai"],
|
|
72
|
+
extras="embed-voyageai",
|
|
73
|
+
)
|
|
74
|
+
def get_async_client(self) -> "AsyncVoyageAIClient":
|
|
75
|
+
"""Creates a VoyageAI python client to embed elements."""
|
|
76
|
+
from voyageai import AsyncClient as AsyncVoyageAIClient
|
|
77
|
+
|
|
78
|
+
client = AsyncVoyageAIClient(
|
|
79
|
+
api_key=self.api_key.get_secret_value(),
|
|
80
|
+
max_retries=self.max_retries,
|
|
81
|
+
timeout=self.timeout_in_seconds,
|
|
82
|
+
)
|
|
83
|
+
return client
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class VoyageAIEmbeddingEncoder(BaseEmbeddingEncoder):
|
|
88
|
+
config: VoyageAIEmbeddingConfig
|
|
89
|
+
|
|
90
|
+
def wrap_error(self, e: Exception) -> Exception:
|
|
91
|
+
return self.config.wrap_error(e=e)
|
|
92
|
+
|
|
93
|
+
def _embed_documents(self, elements: list[str]) -> list[list[float]]:
|
|
94
|
+
client: VoyageAIClient = self.config.get_client()
|
|
95
|
+
try:
|
|
96
|
+
response = client.embed(texts=elements, model=self.config.embedder_model_name)
|
|
97
|
+
except Exception as e:
|
|
98
|
+
raise self.wrap_error(e=e)
|
|
99
|
+
return response.embeddings
|
|
100
|
+
|
|
101
|
+
def embed_documents(self, elements: list[dict]) -> list[dict]:
|
|
102
|
+
embeddings = self._embed_documents([e.get("text", "") for e in elements])
|
|
103
|
+
return self._add_embeddings_to_elements(elements, embeddings)
|
|
104
|
+
|
|
105
|
+
def embed_query(self, query: str) -> list[float]:
|
|
106
|
+
return self._embed_documents(elements=[query])[0]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass
|
|
110
|
+
class AsyncVoyageAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
|
|
111
|
+
config: VoyageAIEmbeddingConfig
|
|
112
|
+
|
|
113
|
+
def wrap_error(self, e: Exception) -> Exception:
|
|
114
|
+
return self.config.wrap_error(e=e)
|
|
115
|
+
|
|
116
|
+
async def _embed_documents(self, elements: list[str]) -> list[list[float]]:
|
|
117
|
+
client = self.config.get_async_client()
|
|
118
|
+
try:
|
|
119
|
+
response = await client.embed(texts=elements, model=self.config.embedder_model_name)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
raise self.wrap_error(e=e)
|
|
122
|
+
return response.embeddings
|
|
123
|
+
|
|
124
|
+
async def embed_documents(self, elements: list[dict]) -> list[dict]:
|
|
125
|
+
embeddings = await self._embed_documents([e.get("text", "") for e in elements])
|
|
126
|
+
return self._add_embeddings_to_elements(elements, embeddings)
|
|
127
|
+
|
|
128
|
+
async def embed_query(self, query: str) -> list[float]:
|
|
129
|
+
embedding = await self._embed_documents(elements=[query])
|
|
130
|
+
return embedding[0]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import _thread
|
|
2
|
+
import copy
|
|
3
|
+
import functools
|
|
4
|
+
from dataclasses import fields
|
|
5
|
+
|
|
6
|
+
from dataclasses_json.core import (
|
|
7
|
+
Collection,
|
|
8
|
+
Enum,
|
|
9
|
+
Mapping,
|
|
10
|
+
_encode_overrides,
|
|
11
|
+
_handle_undefined_parameters_safe,
|
|
12
|
+
_user_overrides_or_exts,
|
|
13
|
+
is_dataclass,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _recursive_repr(user_function):
|
|
18
|
+
# Copied from dataclasses as this method isn't exposed for importing
|
|
19
|
+
repr_running = set()
|
|
20
|
+
|
|
21
|
+
@functools.wraps(user_function)
|
|
22
|
+
def wrapper(self):
|
|
23
|
+
key = id(self), _thread.get_ident()
|
|
24
|
+
if key in repr_running:
|
|
25
|
+
return "..."
|
|
26
|
+
repr_running.add(key)
|
|
27
|
+
try:
|
|
28
|
+
result = user_function(self)
|
|
29
|
+
finally:
|
|
30
|
+
repr_running.discard(key)
|
|
31
|
+
return result
|
|
32
|
+
|
|
33
|
+
return wrapper
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _asdict(
|
|
37
|
+
obj,
|
|
38
|
+
encode_json=False,
|
|
39
|
+
redact_sensitive=False,
|
|
40
|
+
redacted_text="***REDACTED***",
|
|
41
|
+
apply_name_overload: bool = True,
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
A re-implementation of `asdict` (based on the original in the `dataclasses`
|
|
45
|
+
source) to support arbitrary Collection and Mapping types.
|
|
46
|
+
"""
|
|
47
|
+
if is_dataclass(obj):
|
|
48
|
+
result = []
|
|
49
|
+
overrides = _user_overrides_or_exts(obj)
|
|
50
|
+
for field in fields(obj):
|
|
51
|
+
if overrides[field.name].encoder:
|
|
52
|
+
value = getattr(obj, field.name)
|
|
53
|
+
else:
|
|
54
|
+
value = _asdict(
|
|
55
|
+
getattr(obj, field.name),
|
|
56
|
+
encode_json=encode_json,
|
|
57
|
+
redact_sensitive=redact_sensitive,
|
|
58
|
+
redacted_text=redacted_text,
|
|
59
|
+
apply_name_overload=apply_name_overload,
|
|
60
|
+
)
|
|
61
|
+
if getattr(field, "sensitive", False) and redact_sensitive and value:
|
|
62
|
+
value = redacted_text
|
|
63
|
+
if getattr(field, "overload_name", None) and apply_name_overload:
|
|
64
|
+
overload_name = getattr(field, "overload_name")
|
|
65
|
+
result.append((overload_name, value))
|
|
66
|
+
else:
|
|
67
|
+
result.append((field.name, value))
|
|
68
|
+
|
|
69
|
+
result = _handle_undefined_parameters_safe(cls=obj, kvs=dict(result), usage="to")
|
|
70
|
+
return _encode_overrides(
|
|
71
|
+
dict(result), _user_overrides_or_exts(obj), encode_json=encode_json
|
|
72
|
+
)
|
|
73
|
+
elif isinstance(obj, Mapping):
|
|
74
|
+
return {
|
|
75
|
+
_asdict(
|
|
76
|
+
k,
|
|
77
|
+
encode_json=encode_json,
|
|
78
|
+
redact_sensitive=redact_sensitive,
|
|
79
|
+
redacted_text=redacted_text,
|
|
80
|
+
): _asdict(
|
|
81
|
+
v,
|
|
82
|
+
encode_json=encode_json,
|
|
83
|
+
redact_sensitive=redact_sensitive,
|
|
84
|
+
redacted_text=redacted_text,
|
|
85
|
+
)
|
|
86
|
+
for k, v in obj.items()
|
|
87
|
+
}
|
|
88
|
+
elif isinstance(obj, Collection) and not isinstance(obj, (str, bytes, Enum)):
|
|
89
|
+
return [
|
|
90
|
+
_asdict(
|
|
91
|
+
v,
|
|
92
|
+
encode_json=encode_json,
|
|
93
|
+
redact_sensitive=redact_sensitive,
|
|
94
|
+
redacted_text=redacted_text,
|
|
95
|
+
)
|
|
96
|
+
for v in obj
|
|
97
|
+
]
|
|
98
|
+
else:
|
|
99
|
+
return copy.deepcopy(obj)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from dataclasses import MISSING, Field
|
|
3
|
+
|
|
4
|
+
from unstructured_ingest.enhanced_dataclass.core import _recursive_repr
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class EnhancedField(Field):
|
|
8
|
+
def __init__(self, *args, sensitive=False, overload_name: t.Optional[str] = None):
|
|
9
|
+
super().__init__(*args)
|
|
10
|
+
self.sensitive = sensitive
|
|
11
|
+
self.overload_name = overload_name
|
|
12
|
+
|
|
13
|
+
@_recursive_repr
|
|
14
|
+
def __repr__(self):
|
|
15
|
+
# Support for kw_only added in 3.10, to support as low as 3.8, need to dynamically map
|
|
16
|
+
fields_array = [
|
|
17
|
+
f"name={self.name!r}",
|
|
18
|
+
f"type={self.type!r}",
|
|
19
|
+
f"default={self.default!r}",
|
|
20
|
+
f"default_factory={self.default_factory!r}",
|
|
21
|
+
f"init={self.init!r}",
|
|
22
|
+
f"repr={self.repr!r}",
|
|
23
|
+
f"hash={self.hash!r}",
|
|
24
|
+
f"compare={self.compare!r}",
|
|
25
|
+
f"metadata={self.metadata!r}",
|
|
26
|
+
f"sensitive={self.sensitive!r}",
|
|
27
|
+
f"overload_name={self.overload_name!r}",
|
|
28
|
+
f"_field_type={self._field_type}",
|
|
29
|
+
]
|
|
30
|
+
if kw_only := getattr(self, "kw_only", None):
|
|
31
|
+
fields_array.append(f"kw_only={kw_only!r}")
|
|
32
|
+
return "Field({})".format(",".join(fields_array))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def enhanced_field(
|
|
36
|
+
*,
|
|
37
|
+
default=MISSING,
|
|
38
|
+
default_factory=MISSING,
|
|
39
|
+
init: bool = True,
|
|
40
|
+
repr: bool = True,
|
|
41
|
+
hash=None,
|
|
42
|
+
compare: bool = True,
|
|
43
|
+
metadata=None,
|
|
44
|
+
kw_only=MISSING,
|
|
45
|
+
sensitive: bool = False,
|
|
46
|
+
overload_name: t.Optional[str] = None,
|
|
47
|
+
):
|
|
48
|
+
if default is not MISSING and default_factory is not MISSING:
|
|
49
|
+
raise ValueError("cannot specify both default and default_factory")
|
|
50
|
+
args = [default, default_factory, init, repr, hash, compare, metadata]
|
|
51
|
+
# Support for kw_only added in 3.10, to support as low as 3.8, need to dynamically map
|
|
52
|
+
if "kw_only" in EnhancedField.__slots__:
|
|
53
|
+
args.append(kw_only)
|
|
54
|
+
return EnhancedField(*args, sensitive=sensitive, overload_name=overload_name)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import InitVar, fields
|
|
5
|
+
from typing import Any, Callable, Optional, Type, TypeVar, Union
|
|
6
|
+
|
|
7
|
+
import dataclasses_json.core as dataclasses_json_core
|
|
8
|
+
from dataclasses_json import DataClassJsonMixin
|
|
9
|
+
|
|
10
|
+
from unstructured_ingest.enhanced_dataclass.core import _asdict
|
|
11
|
+
|
|
12
|
+
A = TypeVar("A", bound="EnhancedDataClassJsonMixin")
|
|
13
|
+
|
|
14
|
+
# Monkey-patch _decode_dataclass class to support name override
|
|
15
|
+
og_decode_dataclass = dataclasses_json_core._decode_dataclass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def custom_decode_dataclass(cls, kvs, infer_missing):
|
|
19
|
+
dataclass_fields = fields(cls)
|
|
20
|
+
for f in [
|
|
21
|
+
field
|
|
22
|
+
for field in dataclass_fields
|
|
23
|
+
if hasattr(field, "overload_name") and getattr(field, "overload_name", None)
|
|
24
|
+
]:
|
|
25
|
+
field_name = f.name
|
|
26
|
+
overload_name = getattr(f, "overload_name")
|
|
27
|
+
if isinstance(kvs, dict) and overload_name in kvs:
|
|
28
|
+
kvs[field_name] = kvs.pop(overload_name)
|
|
29
|
+
return og_decode_dataclass(cls, kvs, infer_missing)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
dataclasses_json_core._decode_dataclass = custom_decode_dataclass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class EnhancedDataClassJsonMixin(DataClassJsonMixin):
|
|
36
|
+
"""A mixin class extending DataClassJsonMixin.
|
|
37
|
+
|
|
38
|
+
This class extends the functionality of DataClassJsonMixin to provide enhanced functionality
|
|
39
|
+
for JSON serialization and deserialization. It introduces options for redacting sensitive
|
|
40
|
+
information, custom encoding, and more advanced schema handling.
|
|
41
|
+
|
|
42
|
+
Attributes:
|
|
43
|
+
N/A (No additional attributes)
|
|
44
|
+
|
|
45
|
+
Methods:
|
|
46
|
+
to_json: Serialize the object to JSON format with customizable options.
|
|
47
|
+
from_dict: Deserialize a dictionary into an object of this class.
|
|
48
|
+
to_dict: Convert the object to a dictionary with customizable options.
|
|
49
|
+
schema: Generate a schema for validating and parsing JSON data based on this class.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def check_init_var(cls):
|
|
54
|
+
ann = cls.__dict__.get("__annotations__", {})
|
|
55
|
+
init_vars = {k: v for k, v in ann.items() if isinstance(v, InitVar)}
|
|
56
|
+
if init_vars:
|
|
57
|
+
raise TypeError(
|
|
58
|
+
"Class {} has the following fields defined with an InitVar which "
|
|
59
|
+
"cannot be used with EnhancedDataClassJsonMixin: {}".format(
|
|
60
|
+
cls.__name__, ", ".join(init_vars.keys())
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def to_json(
|
|
65
|
+
self,
|
|
66
|
+
*,
|
|
67
|
+
skipkeys: bool = False,
|
|
68
|
+
ensure_ascii: bool = True,
|
|
69
|
+
check_circular: bool = True,
|
|
70
|
+
allow_nan: bool = True,
|
|
71
|
+
indent: Optional[Union[int, str]] = None,
|
|
72
|
+
separators: Optional[tuple[str, str]] = None,
|
|
73
|
+
default: Optional[Callable[..., Any]] = None,
|
|
74
|
+
sort_keys: bool = False,
|
|
75
|
+
redact_sensitive: bool = False,
|
|
76
|
+
redacted_text: str = "***REDACTED***",
|
|
77
|
+
apply_name_overload: bool = True,
|
|
78
|
+
**kw: Any,
|
|
79
|
+
) -> str:
|
|
80
|
+
self.check_init_var()
|
|
81
|
+
return json.dumps(
|
|
82
|
+
self.to_dict(
|
|
83
|
+
encode_json=False,
|
|
84
|
+
redact_sensitive=redact_sensitive,
|
|
85
|
+
redacted_text=redacted_text,
|
|
86
|
+
apply_name_overload=apply_name_overload,
|
|
87
|
+
),
|
|
88
|
+
cls=dataclasses_json_core._ExtendedEncoder,
|
|
89
|
+
skipkeys=skipkeys,
|
|
90
|
+
ensure_ascii=ensure_ascii,
|
|
91
|
+
check_circular=check_circular,
|
|
92
|
+
allow_nan=allow_nan,
|
|
93
|
+
indent=indent,
|
|
94
|
+
separators=separators,
|
|
95
|
+
default=default,
|
|
96
|
+
sort_keys=sort_keys,
|
|
97
|
+
**kw,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def from_dict(
|
|
102
|
+
cls: Type[A],
|
|
103
|
+
kvs: dataclasses_json_core.Json,
|
|
104
|
+
*,
|
|
105
|
+
infer_missing=False,
|
|
106
|
+
apply_name_overload=False,
|
|
107
|
+
) -> A:
|
|
108
|
+
cls.check_init_var()
|
|
109
|
+
return dataclasses_json_core._decode_dataclass(cls, kvs, infer_missing)
|
|
110
|
+
|
|
111
|
+
def to_dict(
|
|
112
|
+
self,
|
|
113
|
+
encode_json: bool = False,
|
|
114
|
+
redact_sensitive: bool = False,
|
|
115
|
+
redacted_text: str = "***REDACTED***",
|
|
116
|
+
apply_name_overload: bool = True,
|
|
117
|
+
) -> dict[str, dataclasses_json_core.Json]:
|
|
118
|
+
self.check_init_var()
|
|
119
|
+
return _asdict(
|
|
120
|
+
self,
|
|
121
|
+
encode_json=encode_json,
|
|
122
|
+
redact_sensitive=redact_sensitive,
|
|
123
|
+
redacted_text=redacted_text,
|
|
124
|
+
apply_name_overload=apply_name_overload,
|
|
125
|
+
)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from functools import wraps
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class CustomError(Exception, ABC):
|
|
6
|
+
error_string: str
|
|
7
|
+
|
|
8
|
+
@classmethod
|
|
9
|
+
def wrap(cls, f):
|
|
10
|
+
"""
|
|
11
|
+
Provides a wrapper for a function that catches any exception and
|
|
12
|
+
re-raises it as the customer error. If the exception itself is already an instance
|
|
13
|
+
of the custom error, re-raises original error.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@wraps(f)
|
|
17
|
+
def wrapper(*args, **kwargs):
|
|
18
|
+
try:
|
|
19
|
+
return f(*args, **kwargs)
|
|
20
|
+
except BaseException as error:
|
|
21
|
+
if not isinstance(error, cls) and not issubclass(type(error), cls):
|
|
22
|
+
raise cls(cls.error_string.format(str(error))) from error
|
|
23
|
+
raise
|
|
24
|
+
|
|
25
|
+
return wrapper
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SourceConnectionError(CustomError):
|
|
29
|
+
error_string = "Error in getting data from upstream data source: {}"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SourceConnectionNetworkError(SourceConnectionError):
|
|
33
|
+
error_string = "Error in connecting to upstream data source: {}"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DestinationConnectionError(CustomError):
|
|
37
|
+
error_string = "Error in connecting to downstream data source: {}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class EmbeddingEncoderConnectionError(CustomError):
|
|
41
|
+
error_string = "Error in connecting to the embedding model provider: {}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class WriteError(CustomError):
|
|
45
|
+
error_string = "Error in writing to downstream data source: {}"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class PartitionError(CustomError):
|
|
49
|
+
error_string = "Error in partitioning content: {}"
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
import traceback
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# Default startup handler
|
|
7
|
+
def _log_start(details, logger, log_level):
|
|
8
|
+
max_tried = details.get("max_tries")
|
|
9
|
+
max_time = details.get("max_time")
|
|
10
|
+
if max_tried is not None and max_time is not None:
|
|
11
|
+
s = "%.1fs or %d tries"
|
|
12
|
+
s_args = [max_time, max_tried]
|
|
13
|
+
elif max_tried is not None:
|
|
14
|
+
s = "%d tries"
|
|
15
|
+
s_args = [max_tried]
|
|
16
|
+
else:
|
|
17
|
+
s = "%.1fs"
|
|
18
|
+
s_args = [max_time]
|
|
19
|
+
exception = details.get("exception")
|
|
20
|
+
if isinstance(exception, tuple):
|
|
21
|
+
exception = list(exception)
|
|
22
|
+
elif not isinstance(exception, list):
|
|
23
|
+
exception = [exception]
|
|
24
|
+
exception_s = ", ".join([e.__name__ for e in exception])
|
|
25
|
+
if log_level >= logging.INFO:
|
|
26
|
+
msg = f"Attempting %s(...), will retry for {s} given these issues: %s"
|
|
27
|
+
log_args = [details["target"].__name__] + s_args + [exception_s]
|
|
28
|
+
else:
|
|
29
|
+
msg = f"Attempting %s(%s), will retry for {s} given these issues: %s"
|
|
30
|
+
target_input_list = []
|
|
31
|
+
if args := details.get("args"):
|
|
32
|
+
target_input_list.extend([str(d) for d in args])
|
|
33
|
+
if kwargs := details.get("kwargs"):
|
|
34
|
+
target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()])
|
|
35
|
+
target_input = ", ".join(target_input_list) if target_input_list else ""
|
|
36
|
+
log_args = (
|
|
37
|
+
[
|
|
38
|
+
details["target"].__name__,
|
|
39
|
+
target_input,
|
|
40
|
+
]
|
|
41
|
+
+ s_args
|
|
42
|
+
+ [exception_s]
|
|
43
|
+
)
|
|
44
|
+
logger.log(log_level, msg, *log_args)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Default backoff handler
|
|
48
|
+
def _log_backoff(details, logger, log_level):
|
|
49
|
+
if log_level >= logging.INFO:
|
|
50
|
+
msg = "Backing off %s(...) for %.1fs (%s)"
|
|
51
|
+
log_args = [details["target"].__name__, details["tries"]]
|
|
52
|
+
else:
|
|
53
|
+
msg = "Backing off %.1fs seconds after %d tries calling function %s(%s) -> %s"
|
|
54
|
+
target_input_list = []
|
|
55
|
+
if args := details.get("args"):
|
|
56
|
+
target_input_list.extend([str(d) for d in args])
|
|
57
|
+
if kwargs := details.get("kwargs"):
|
|
58
|
+
target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()])
|
|
59
|
+
target_input = ", ".join(target_input_list) if target_input_list else ""
|
|
60
|
+
log_args = [
|
|
61
|
+
details["wait"],
|
|
62
|
+
details["tries"],
|
|
63
|
+
details["target"].__name__,
|
|
64
|
+
target_input,
|
|
65
|
+
]
|
|
66
|
+
exc_typ, exc, _ = sys.exc_info()
|
|
67
|
+
if exc is not None:
|
|
68
|
+
exc_fmt = traceback.format_exception_only(exc_typ, exc)[-1]
|
|
69
|
+
log_args.append(exc_fmt.rstrip("\n"))
|
|
70
|
+
else:
|
|
71
|
+
log_args.append(str(details["value"]))
|
|
72
|
+
logger.log(log_level, msg, *log_args)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Default giveup handler
|
|
76
|
+
def _log_giveup(details, logger, log_level):
|
|
77
|
+
if log_level >= logging.INFO:
|
|
78
|
+
msg = "Giving up %s(...) after %.1fs (%s)"
|
|
79
|
+
log_args = [details["target"].__name__, details["tries"]]
|
|
80
|
+
else:
|
|
81
|
+
msg = "Giving up after %d tries (%.1fs) calling function %s(%s) -> %s"
|
|
82
|
+
target_input_list = []
|
|
83
|
+
if args := details.get("args"):
|
|
84
|
+
target_input_list.extend([str(d) for d in args])
|
|
85
|
+
if kwargs := details.get("kwargs"):
|
|
86
|
+
target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()])
|
|
87
|
+
target_input = ", ".join(target_input_list) if target_input_list else "..."
|
|
88
|
+
log_args = [
|
|
89
|
+
details["tries"],
|
|
90
|
+
details["wait"],
|
|
91
|
+
details["target"].__name__,
|
|
92
|
+
target_input,
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
exc_typ, exc, _ = sys.exc_info()
|
|
96
|
+
if exc is not None:
|
|
97
|
+
exc_fmt = traceback.format_exception_only(exc_typ, exc)[-1]
|
|
98
|
+
log_args.append(exc_fmt.rstrip("\n"))
|
|
99
|
+
else:
|
|
100
|
+
log_args.append(details["value"])
|
|
101
|
+
|
|
102
|
+
logger.log(log_level, msg, *log_args)
|