unstructured-ingest 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/__init__.py +0 -0
- test/integration/__init__.py +0 -0
- test/integration/chunkers/__init__.py +0 -0
- test/integration/chunkers/test_chunkers.py +31 -0
- test/integration/connectors/__init__.py +0 -0
- test/integration/connectors/conftest.py +38 -0
- test/integration/connectors/databricks/__init__.py +0 -0
- test/integration/connectors/databricks/test_volumes_native.py +269 -0
- test/integration/connectors/discord/__init__.py +0 -0
- test/integration/connectors/discord/test_discord.py +90 -0
- test/integration/connectors/duckdb/__init__.py +0 -0
- test/integration/connectors/duckdb/conftest.py +14 -0
- test/integration/connectors/duckdb/test_duckdb.py +89 -0
- test/integration/connectors/duckdb/test_motherduck.py +95 -0
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +34 -0
- test/integration/connectors/elasticsearch/test_elasticsearch.py +330 -0
- test/integration/connectors/elasticsearch/test_opensearch.py +325 -0
- test/integration/connectors/sql/__init__.py +0 -0
- test/integration/connectors/sql/test_postgres.py +195 -0
- test/integration/connectors/sql/test_singlestore.py +176 -0
- test/integration/connectors/sql/test_snowflake.py +238 -0
- test/integration/connectors/sql/test_sqlite.py +162 -0
- test/integration/connectors/test_astradb.py +217 -0
- test/integration/connectors/test_azure_ai_search.py +255 -0
- test/integration/connectors/test_chroma.py +120 -0
- test/integration/connectors/test_confluence.py +113 -0
- test/integration/connectors/test_delta_table.py +185 -0
- test/integration/connectors/test_lancedb.py +247 -0
- test/integration/connectors/test_milvus.py +203 -0
- test/integration/connectors/test_mongodb.py +335 -0
- test/integration/connectors/test_neo4j.py +236 -0
- test/integration/connectors/test_notion.py +145 -0
- test/integration/connectors/test_onedrive.py +118 -0
- test/integration/connectors/test_pinecone.py +288 -0
- test/integration/connectors/test_qdrant.py +215 -0
- test/integration/connectors/test_redis.py +119 -0
- test/integration/connectors/test_s3.py +183 -0
- test/integration/connectors/test_vectara.py +270 -0
- test/integration/connectors/utils/__init__.py +0 -0
- test/integration/connectors/utils/constants.py +7 -0
- test/integration/connectors/utils/docker.py +151 -0
- test/integration/connectors/utils/docker_compose.py +59 -0
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +75 -0
- test/integration/connectors/utils/validation/equality.py +75 -0
- test/integration/connectors/utils/validation/source.py +299 -0
- test/integration/connectors/utils/validation/utils.py +36 -0
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +15 -0
- test/integration/connectors/weaviate/test_cloud.py +34 -0
- test/integration/connectors/weaviate/test_local.py +131 -0
- test/integration/embedders/__init__.py +0 -0
- test/integration/embedders/conftest.py +13 -0
- test/integration/embedders/test_azure_openai.py +59 -0
- test/integration/embedders/test_bedrock.py +103 -0
- test/integration/embedders/test_huggingface.py +26 -0
- test/integration/embedders/test_mixedbread.py +71 -0
- test/integration/embedders/test_octoai.py +77 -0
- test/integration/embedders/test_openai.py +76 -0
- test/integration/embedders/test_togetherai.py +71 -0
- test/integration/embedders/test_vertexai.py +65 -0
- test/integration/embedders/test_voyageai.py +65 -0
- test/integration/embedders/utils.py +68 -0
- test/integration/partitioners/__init__.py +0 -0
- test/integration/partitioners/test_partitioner.py +75 -0
- test/integration/utils.py +15 -0
- test/unit/__init__.py +0 -0
- test/unit/embed/__init__.py +0 -0
- test/unit/embed/test_mixedbreadai.py +42 -0
- test/unit/embed/test_octoai.py +27 -0
- test/unit/embed/test_openai.py +20 -0
- test/unit/embed/test_vertexai.py +25 -0
- test/unit/embed/test_voyageai.py +24 -0
- test/unit/test_error.py +27 -0
- test/unit/test_logger.py +78 -0
- test/unit/test_utils.py +184 -0
- test/unit/v2/__init__.py +0 -0
- test/unit/v2/chunkers/__init__.py +0 -0
- test/unit/v2/chunkers/test_chunkers.py +49 -0
- test/unit/v2/connectors/__init__.py +0 -0
- test/unit/v2/connectors/test_confluence.py +39 -0
- test/unit/v2/embedders/__init__.py +0 -0
- test/unit/v2/embedders/test_bedrock.py +36 -0
- test/unit/v2/embedders/test_huggingface.py +48 -0
- test/unit/v2/embedders/test_mixedbread.py +37 -0
- test/unit/v2/embedders/test_octoai.py +35 -0
- test/unit/v2/embedders/test_openai.py +35 -0
- test/unit/v2/embedders/test_togetherai.py +37 -0
- test/unit/v2/embedders/test_vertexai.py +37 -0
- test/unit/v2/embedders/test_voyageai.py +38 -0
- test/unit/v2/partitioners/__init__.py +0 -0
- test/unit/v2/partitioners/test_partitioner.py +63 -0
- test/unit/v2/test_interfaces.py +26 -0
- test/unit/v2/test_utils.py +82 -0
- test/unit/v2/utils/__init__.py +0 -0
- test/unit/v2/utils/data_generator.py +32 -0
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/__init__.py +14 -0
- unstructured_ingest/cli/base/__init__.py +0 -0
- unstructured_ingest/cli/base/cmd.py +19 -0
- unstructured_ingest/cli/base/dest.py +87 -0
- unstructured_ingest/cli/base/src.py +57 -0
- unstructured_ingest/cli/cli.py +37 -0
- unstructured_ingest/cli/cmd_factory.py +12 -0
- unstructured_ingest/cli/cmds/__init__.py +145 -0
- unstructured_ingest/cli/cmds/airtable.py +69 -0
- unstructured_ingest/cli/cmds/astradb.py +99 -0
- unstructured_ingest/cli/cmds/azure_ai_search.py +65 -0
- unstructured_ingest/cli/cmds/biomed.py +52 -0
- unstructured_ingest/cli/cmds/chroma.py +104 -0
- unstructured_ingest/cli/cmds/clarifai.py +71 -0
- unstructured_ingest/cli/cmds/confluence.py +69 -0
- unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
- unstructured_ingest/cli/cmds/delta_table.py +94 -0
- unstructured_ingest/cli/cmds/discord.py +47 -0
- unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
- unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
- unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
- unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
- unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
- unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
- unstructured_ingest/cli/cmds/github.py +54 -0
- unstructured_ingest/cli/cmds/gitlab.py +54 -0
- unstructured_ingest/cli/cmds/google_drive.py +49 -0
- unstructured_ingest/cli/cmds/hubspot.py +70 -0
- unstructured_ingest/cli/cmds/jira.py +71 -0
- unstructured_ingest/cli/cmds/kafka.py +102 -0
- unstructured_ingest/cli/cmds/local.py +43 -0
- unstructured_ingest/cli/cmds/mongodb.py +72 -0
- unstructured_ingest/cli/cmds/notion.py +48 -0
- unstructured_ingest/cli/cmds/onedrive.py +66 -0
- unstructured_ingest/cli/cmds/opensearch.py +117 -0
- unstructured_ingest/cli/cmds/outlook.py +67 -0
- unstructured_ingest/cli/cmds/pinecone.py +71 -0
- unstructured_ingest/cli/cmds/qdrant.py +124 -0
- unstructured_ingest/cli/cmds/reddit.py +67 -0
- unstructured_ingest/cli/cmds/salesforce.py +58 -0
- unstructured_ingest/cli/cmds/sharepoint.py +66 -0
- unstructured_ingest/cli/cmds/slack.py +56 -0
- unstructured_ingest/cli/cmds/sql.py +66 -0
- unstructured_ingest/cli/cmds/vectara.py +66 -0
- unstructured_ingest/cli/cmds/weaviate.py +98 -0
- unstructured_ingest/cli/cmds/wikipedia.py +40 -0
- unstructured_ingest/cli/common.py +7 -0
- unstructured_ingest/cli/interfaces.py +663 -0
- unstructured_ingest/cli/utils.py +205 -0
- unstructured_ingest/connector/__init__.py +0 -0
- unstructured_ingest/connector/airtable.py +309 -0
- unstructured_ingest/connector/astradb.py +267 -0
- unstructured_ingest/connector/azure_ai_search.py +144 -0
- unstructured_ingest/connector/biomed.py +320 -0
- unstructured_ingest/connector/chroma.py +158 -0
- unstructured_ingest/connector/clarifai.py +122 -0
- unstructured_ingest/connector/confluence.py +285 -0
- unstructured_ingest/connector/databricks_volumes.py +137 -0
- unstructured_ingest/connector/delta_table.py +203 -0
- unstructured_ingest/connector/discord.py +180 -0
- unstructured_ingest/connector/elasticsearch.py +396 -0
- unstructured_ingest/connector/fsspec/__init__.py +0 -0
- unstructured_ingest/connector/fsspec/azure.py +78 -0
- unstructured_ingest/connector/fsspec/box.py +109 -0
- unstructured_ingest/connector/fsspec/dropbox.py +160 -0
- unstructured_ingest/connector/fsspec/fsspec.py +359 -0
- unstructured_ingest/connector/fsspec/gcs.py +82 -0
- unstructured_ingest/connector/fsspec/s3.py +62 -0
- unstructured_ingest/connector/fsspec/sftp.py +81 -0
- unstructured_ingest/connector/git.py +124 -0
- unstructured_ingest/connector/github.py +174 -0
- unstructured_ingest/connector/gitlab.py +142 -0
- unstructured_ingest/connector/google_drive.py +348 -0
- unstructured_ingest/connector/hubspot.py +278 -0
- unstructured_ingest/connector/jira.py +469 -0
- unstructured_ingest/connector/kafka.py +293 -0
- unstructured_ingest/connector/local.py +139 -0
- unstructured_ingest/connector/mongodb.py +284 -0
- unstructured_ingest/connector/notion/__init__.py +0 -0
- unstructured_ingest/connector/notion/client.py +248 -0
- unstructured_ingest/connector/notion/connector.py +469 -0
- unstructured_ingest/connector/notion/helpers.py +584 -0
- unstructured_ingest/connector/notion/interfaces.py +32 -0
- unstructured_ingest/connector/notion/types/__init__.py +0 -0
- unstructured_ingest/connector/notion/types/block.py +96 -0
- unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
- unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
- unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
- unstructured_ingest/connector/notion/types/database.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
- unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
- unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/connector/notion/types/date.py +26 -0
- unstructured_ingest/connector/notion/types/file.py +51 -0
- unstructured_ingest/connector/notion/types/page.py +45 -0
- unstructured_ingest/connector/notion/types/parent.py +66 -0
- unstructured_ingest/connector/notion/types/rich_text.py +189 -0
- unstructured_ingest/connector/notion/types/user.py +76 -0
- unstructured_ingest/connector/onedrive.py +232 -0
- unstructured_ingest/connector/opensearch.py +218 -0
- unstructured_ingest/connector/outlook.py +285 -0
- unstructured_ingest/connector/pinecone.py +140 -0
- unstructured_ingest/connector/qdrant.py +144 -0
- unstructured_ingest/connector/reddit.py +166 -0
- unstructured_ingest/connector/registry.py +109 -0
- unstructured_ingest/connector/salesforce.py +301 -0
- unstructured_ingest/connector/sharepoint.py +573 -0
- unstructured_ingest/connector/slack.py +224 -0
- unstructured_ingest/connector/sql.py +199 -0
- unstructured_ingest/connector/vectara.py +253 -0
- unstructured_ingest/connector/weaviate.py +190 -0
- unstructured_ingest/connector/wikipedia.py +208 -0
- unstructured_ingest/embed/__init__.py +0 -0
- unstructured_ingest/embed/azure_openai.py +31 -0
- unstructured_ingest/embed/bedrock.py +193 -0
- unstructured_ingest/embed/huggingface.py +52 -0
- unstructured_ingest/embed/interfaces.py +117 -0
- unstructured_ingest/embed/mixedbreadai.py +233 -0
- unstructured_ingest/embed/octoai.py +130 -0
- unstructured_ingest/embed/openai.py +116 -0
- unstructured_ingest/embed/togetherai.py +106 -0
- unstructured_ingest/embed/vertexai.py +126 -0
- unstructured_ingest/embed/voyageai.py +130 -0
- unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
- unstructured_ingest/enhanced_dataclass/core.py +99 -0
- unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
- unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
- unstructured_ingest/error.py +49 -0
- unstructured_ingest/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/ingest_backoff/_common.py +102 -0
- unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
- unstructured_ingest/interfaces.py +852 -0
- unstructured_ingest/logger.py +130 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/pipeline/__init__.py +22 -0
- unstructured_ingest/pipeline/copy.py +19 -0
- unstructured_ingest/pipeline/doc_factory.py +12 -0
- unstructured_ingest/pipeline/interfaces.py +270 -0
- unstructured_ingest/pipeline/partition.py +60 -0
- unstructured_ingest/pipeline/permissions.py +12 -0
- unstructured_ingest/pipeline/pipeline.py +117 -0
- unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- unstructured_ingest/pipeline/reformat/chunking.py +134 -0
- unstructured_ingest/pipeline/reformat/embedding.py +64 -0
- unstructured_ingest/pipeline/source.py +77 -0
- unstructured_ingest/pipeline/utils.py +6 -0
- unstructured_ingest/pipeline/write.py +18 -0
- unstructured_ingest/processor.py +93 -0
- unstructured_ingest/runner/__init__.py +104 -0
- unstructured_ingest/runner/airtable.py +35 -0
- unstructured_ingest/runner/astradb.py +34 -0
- unstructured_ingest/runner/base_runner.py +89 -0
- unstructured_ingest/runner/biomed.py +45 -0
- unstructured_ingest/runner/confluence.py +35 -0
- unstructured_ingest/runner/delta_table.py +34 -0
- unstructured_ingest/runner/discord.py +35 -0
- unstructured_ingest/runner/elasticsearch.py +40 -0
- unstructured_ingest/runner/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/fsspec/azure.py +30 -0
- unstructured_ingest/runner/fsspec/box.py +28 -0
- unstructured_ingest/runner/fsspec/dropbox.py +30 -0
- unstructured_ingest/runner/fsspec/fsspec.py +40 -0
- unstructured_ingest/runner/fsspec/gcs.py +28 -0
- unstructured_ingest/runner/fsspec/s3.py +28 -0
- unstructured_ingest/runner/fsspec/sftp.py +28 -0
- unstructured_ingest/runner/github.py +37 -0
- unstructured_ingest/runner/gitlab.py +37 -0
- unstructured_ingest/runner/google_drive.py +35 -0
- unstructured_ingest/runner/hubspot.py +35 -0
- unstructured_ingest/runner/jira.py +35 -0
- unstructured_ingest/runner/kafka.py +34 -0
- unstructured_ingest/runner/local.py +23 -0
- unstructured_ingest/runner/mongodb.py +34 -0
- unstructured_ingest/runner/notion.py +61 -0
- unstructured_ingest/runner/onedrive.py +35 -0
- unstructured_ingest/runner/opensearch.py +40 -0
- unstructured_ingest/runner/outlook.py +33 -0
- unstructured_ingest/runner/reddit.py +35 -0
- unstructured_ingest/runner/salesforce.py +33 -0
- unstructured_ingest/runner/sharepoint.py +35 -0
- unstructured_ingest/runner/slack.py +33 -0
- unstructured_ingest/runner/utils.py +47 -0
- unstructured_ingest/runner/wikipedia.py +35 -0
- unstructured_ingest/runner/writers/__init__.py +48 -0
- unstructured_ingest/runner/writers/astradb.py +22 -0
- unstructured_ingest/runner/writers/azure_ai_search.py +24 -0
- unstructured_ingest/runner/writers/base_writer.py +26 -0
- unstructured_ingest/runner/writers/chroma.py +22 -0
- unstructured_ingest/runner/writers/clarifai.py +19 -0
- unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
- unstructured_ingest/runner/writers/delta_table.py +24 -0
- unstructured_ingest/runner/writers/elasticsearch.py +24 -0
- unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
- unstructured_ingest/runner/writers/fsspec/box.py +21 -0
- unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
- unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
- unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
- unstructured_ingest/runner/writers/kafka.py +21 -0
- unstructured_ingest/runner/writers/mongodb.py +21 -0
- unstructured_ingest/runner/writers/opensearch.py +26 -0
- unstructured_ingest/runner/writers/pinecone.py +21 -0
- unstructured_ingest/runner/writers/qdrant.py +19 -0
- unstructured_ingest/runner/writers/sql.py +22 -0
- unstructured_ingest/runner/writers/vectara.py +22 -0
- unstructured_ingest/runner/writers/weaviate.py +21 -0
- unstructured_ingest/utils/__init__.py +0 -0
- unstructured_ingest/utils/chunking.py +56 -0
- unstructured_ingest/utils/compression.py +118 -0
- unstructured_ingest/utils/data_prep.py +200 -0
- unstructured_ingest/utils/dep_check.py +78 -0
- unstructured_ingest/utils/google_filetype.py +9 -0
- unstructured_ingest/utils/string_and_date_utils.py +49 -0
- unstructured_ingest/utils/table.py +73 -0
- unstructured_ingest/v2/__init__.py +1 -0
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +4 -0
- unstructured_ingest/v2/cli/base/cmd.py +269 -0
- unstructured_ingest/v2/cli/base/dest.py +85 -0
- unstructured_ingest/v2/cli/base/importer.py +34 -0
- unstructured_ingest/v2/cli/base/src.py +85 -0
- unstructured_ingest/v2/cli/cli.py +24 -0
- unstructured_ingest/v2/cli/cmds.py +14 -0
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/cli/utils/click.py +237 -0
- unstructured_ingest/v2/cli/utils/model_conversion.py +222 -0
- unstructured_ingest/v2/constants.py +2 -0
- unstructured_ingest/v2/errors.py +18 -0
- unstructured_ingest/v2/interfaces/__init__.py +32 -0
- unstructured_ingest/v2/interfaces/connector.py +50 -0
- unstructured_ingest/v2/interfaces/downloader.py +89 -0
- unstructured_ingest/v2/interfaces/file_data.py +116 -0
- unstructured_ingest/v2/interfaces/indexer.py +30 -0
- unstructured_ingest/v2/interfaces/process.py +19 -0
- unstructured_ingest/v2/interfaces/processor.py +88 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +102 -0
- unstructured_ingest/v2/interfaces/uploader.py +53 -0
- unstructured_ingest/v2/logger.py +126 -0
- unstructured_ingest/v2/main.py +11 -0
- unstructured_ingest/v2/otel.py +111 -0
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +211 -0
- unstructured_ingest/v2/pipeline/otel.py +32 -0
- unstructured_ingest/v2/pipeline/pipeline.py +384 -0
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/steps/chunk.py +80 -0
- unstructured_ingest/v2/pipeline/steps/download.py +207 -0
- unstructured_ingest/v2/pipeline/steps/embed.py +79 -0
- unstructured_ingest/v2/pipeline/steps/filter.py +35 -0
- unstructured_ingest/v2/pipeline/steps/index.py +86 -0
- unstructured_ingest/v2/pipeline/steps/partition.py +79 -0
- unstructured_ingest/v2/pipeline/steps/stage.py +65 -0
- unstructured_ingest/v2/pipeline/steps/uncompress.py +50 -0
- unstructured_ingest/v2/pipeline/steps/upload.py +58 -0
- unstructured_ingest/v2/processes/__init__.py +18 -0
- unstructured_ingest/v2/processes/chunker.py +124 -0
- unstructured_ingest/v2/processes/connector_registry.py +69 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +117 -0
- unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
- unstructured_ingest/v2/processes/connectors/astradb.py +402 -0
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +276 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +190 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +207 -0
- unstructured_ingest/v2/processes/connectors/couchbase.py +334 -0
- unstructured_ingest/v2/processes/connectors/databricks/__init__.py +52 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +208 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +87 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +102 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +85 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +86 -0
- unstructured_ingest/v2/processes/connectors/delta_table.py +191 -0
- unstructured_ingest/v2/processes/connectors/discord.py +158 -0
- unstructured_ingest/v2/processes/connectors/duckdb/__init__.py +15 -0
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +100 -0
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +127 -0
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +126 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +470 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +195 -0
- unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +170 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +168 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +332 -0
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +185 -0
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +171 -0
- unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/v2/processes/connectors/gitlab.py +268 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +348 -0
- unstructured_ingest/v2/processes/connectors/kafka/__init__.py +17 -0
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py +121 -0
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +273 -0
- unstructured_ingest/v2/processes/connectors/kafka/local.py +103 -0
- unstructured_ingest/v2/processes/connectors/kdbai.py +148 -0
- unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +30 -0
- unstructured_ingest/v2/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +42 -0
- unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +169 -0
- unstructured_ingest/v2/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/v2/processes/connectors/local.py +217 -0
- unstructured_ingest/v2/processes/connectors/milvus.py +225 -0
- unstructured_ingest/v2/processes/connectors/mongodb.py +361 -0
- unstructured_ingest/v2/processes/connectors/neo4j.py +385 -0
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/client.py +349 -0
- unstructured_ingest/v2/processes/connectors/notion/connector.py +346 -0
- unstructured_ingest/v2/processes/connectors/notion/helpers.py +448 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +32 -0
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +96 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/code.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/template.py +30 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/people.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/select.py +69 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/status.py +81 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/v2/processes/connectors/notion/types/date.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/file.py +54 -0
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +45 -0
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +66 -0
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +189 -0
- unstructured_ingest/v2/processes/connectors/notion/types/user.py +79 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +447 -0
- unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +277 -0
- unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +16 -0
- unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +59 -0
- unstructured_ingest/v2/processes/connectors/qdrant/local.py +58 -0
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +160 -0
- unstructured_ingest/v2/processes/connectors/qdrant/server.py +60 -0
- unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
- unstructured_ingest/v2/processes/connectors/salesforce.py +303 -0
- unstructured_ingest/v2/processes/connectors/sharepoint.py +448 -0
- unstructured_ingest/v2/processes/connectors/slack.py +248 -0
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +27 -0
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +162 -0
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +166 -0
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +210 -0
- unstructured_ingest/v2/processes/connectors/sql/sql.py +434 -0
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +168 -0
- unstructured_ingest/v2/processes/connectors/utils.py +29 -0
- unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
- unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +165 -0
- unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/v2/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +267 -0
- unstructured_ingest/v2/processes/embedder.py +195 -0
- unstructured_ingest/v2/processes/filter.py +60 -0
- unstructured_ingest/v2/processes/partitioner.py +188 -0
- unstructured_ingest/v2/processes/uncompress.py +61 -0
- unstructured_ingest/v2/unstructured_api.py +128 -0
- unstructured_ingest/v2/utils.py +61 -0
- unstructured_ingest-0.3.13.dist-info/LICENSE.md +201 -0
- unstructured_ingest-0.3.13.dist-info/METADATA +205 -0
- unstructured_ingest-0.3.13.dist-info/RECORD +557 -0
- unstructured_ingest-0.3.13.dist-info/WHEEL +5 -0
- unstructured_ingest-0.3.13.dist-info/entry_points.txt +2 -0
- unstructured_ingest-0.3.13.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import importlib
|
|
5
|
+
from functools import wraps
|
|
6
|
+
from typing import (
|
|
7
|
+
Callable,
|
|
8
|
+
List,
|
|
9
|
+
Optional,
|
|
10
|
+
TypeVar,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from typing_extensions import ParamSpec
|
|
14
|
+
|
|
15
|
+
_T = TypeVar("_T")
|
|
16
|
+
_P = ParamSpec("_P")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def requires_dependencies(
|
|
20
|
+
dependencies: str | list[str],
|
|
21
|
+
extras: Optional[str] = None,
|
|
22
|
+
) -> Callable[[Callable[_P, _T]], Callable[_P, _T]]:
|
|
23
|
+
"""Decorator ensuring required modules are installed.
|
|
24
|
+
|
|
25
|
+
Use on functions with local imports to ensure required modules are available and log
|
|
26
|
+
an installation instruction if they're not.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
dependencies: Name(s) of module(s) required by the decorated function.
|
|
30
|
+
extras: unstructured-ingest extra which installs required `dependencies`. Defaults to None.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
ImportError: When at least one of the `dependencies` is not available.
|
|
34
|
+
"""
|
|
35
|
+
if isinstance(dependencies, str):
|
|
36
|
+
dependencies = [dependencies]
|
|
37
|
+
|
|
38
|
+
def decorator(func: Callable[_P, _T]) -> Callable[_P, _T]:
|
|
39
|
+
def run_check():
|
|
40
|
+
missing_deps: List[str] = []
|
|
41
|
+
for dep in dependencies:
|
|
42
|
+
if not dependency_exists(dep):
|
|
43
|
+
missing_deps.append(dep)
|
|
44
|
+
if len(missing_deps) > 0:
|
|
45
|
+
raise ImportError(
|
|
46
|
+
f"Following dependencies are missing: {', '.join(missing_deps)}. "
|
|
47
|
+
+ (
|
|
48
|
+
f"""Please install them using `pip install "unstructured-ingest[{extras}]"`.""" # noqa: E501
|
|
49
|
+
if extras
|
|
50
|
+
else f"Please install them using `pip install {' '.join(missing_deps)}`."
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@wraps(func)
|
|
55
|
+
def wrapper(*args: _P.args, **kwargs: _P.kwargs):
|
|
56
|
+
run_check()
|
|
57
|
+
return func(*args, **kwargs)
|
|
58
|
+
|
|
59
|
+
@wraps(func)
|
|
60
|
+
async def wrapper_async(*args: _P.args, **kwargs: _P.kwargs):
|
|
61
|
+
run_check()
|
|
62
|
+
return await func(*args, **kwargs)
|
|
63
|
+
|
|
64
|
+
if asyncio.iscoroutinefunction(func):
|
|
65
|
+
return wrapper_async
|
|
66
|
+
return wrapper
|
|
67
|
+
|
|
68
|
+
return decorator
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def dependency_exists(dependency: str):
|
|
72
|
+
try:
|
|
73
|
+
importlib.import_module(dependency)
|
|
74
|
+
except ImportError as e:
|
|
75
|
+
# Check to make sure this isn't some unrelated import error.
|
|
76
|
+
if dependency in repr(e):
|
|
77
|
+
return False
|
|
78
|
+
return True
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
GOOGLE_DRIVE_EXPORT_TYPES = {
|
|
2
|
+
"application/vnd.google-apps.document": "application/"
|
|
3
|
+
"vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
4
|
+
"application/vnd.google-apps.spreadsheet": "application/"
|
|
5
|
+
"vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
6
|
+
"application/vnd.google-apps.presentation": "application/"
|
|
7
|
+
"vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
8
|
+
"application/vnd.google-apps.photo": "image/jpeg",
|
|
9
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import typing as t
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from dateutil import parser
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def json_to_dict(json_string: str) -> t.Union[str, t.Dict[str, t.Any]]:
|
|
9
|
+
"""Helper function attempts to deserialize json string to a dictionary."""
|
|
10
|
+
try:
|
|
11
|
+
return json.loads(json_string)
|
|
12
|
+
except json.JSONDecodeError:
|
|
13
|
+
# Not necessary an error if it is a path or malformed json
|
|
14
|
+
pass
|
|
15
|
+
try:
|
|
16
|
+
# This is common when single quotes are used instead of double quotes
|
|
17
|
+
return json.loads(json_string.replace("'", '"'))
|
|
18
|
+
except json.JSONDecodeError:
|
|
19
|
+
# Not necessary an error if it is a path
|
|
20
|
+
pass
|
|
21
|
+
return json_string
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def ensure_isoformat_datetime(timestamp: t.Union[datetime, str]) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Ensures that the input value is converted to an ISO format datetime string.
|
|
27
|
+
Handles both datetime objects and strings.
|
|
28
|
+
"""
|
|
29
|
+
if isinstance(timestamp, datetime):
|
|
30
|
+
return timestamp.isoformat()
|
|
31
|
+
elif isinstance(timestamp, str):
|
|
32
|
+
try:
|
|
33
|
+
# Parse the datetime string in various formats
|
|
34
|
+
dt = parser.parse(timestamp)
|
|
35
|
+
return dt.isoformat()
|
|
36
|
+
except ValueError as e:
|
|
37
|
+
raise ValueError(f"String '{timestamp}' could not be parsed as a datetime.") from e
|
|
38
|
+
else:
|
|
39
|
+
raise TypeError(f"Expected input type datetime or str, but got {type(timestamp)}.")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def truncate_string_bytes(string: str, max_bytes: int, encoding: str = "utf-8") -> str:
|
|
43
|
+
"""
|
|
44
|
+
Truncates a string to a specified maximum number of bytes.
|
|
45
|
+
"""
|
|
46
|
+
encoded_string = str(string).encode(encoding)
|
|
47
|
+
if len(encoded_string) <= max_bytes:
|
|
48
|
+
return string
|
|
49
|
+
return encoded_string[:max_bytes].decode(encoding, errors="ignore")
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from unstructured_ingest.utils.data_prep import flatten_dict
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_default_pandas_dtypes() -> dict[str, Any]:
|
|
9
|
+
return {
|
|
10
|
+
"text": pd.StringDtype(), # type: ignore
|
|
11
|
+
"type": pd.StringDtype(), # type: ignore
|
|
12
|
+
"element_id": pd.StringDtype(), # type: ignore
|
|
13
|
+
"filename": pd.StringDtype(), # Optional[str] # type: ignore
|
|
14
|
+
"filetype": pd.StringDtype(), # Optional[str] # type: ignore
|
|
15
|
+
"file_directory": pd.StringDtype(), # Optional[str] # type: ignore
|
|
16
|
+
"last_modified": pd.StringDtype(), # Optional[str] # type: ignore
|
|
17
|
+
"attached_to_filename": pd.StringDtype(), # Optional[str] # type: ignore
|
|
18
|
+
"parent_id": pd.StringDtype(), # Optional[str], # type: ignore
|
|
19
|
+
"category_depth": "Int64", # Optional[int]
|
|
20
|
+
"image_path": pd.StringDtype(), # Optional[str] # type: ignore
|
|
21
|
+
"languages": object, # Optional[list[str]]
|
|
22
|
+
"page_number": "Int64", # Optional[int]
|
|
23
|
+
"page_name": pd.StringDtype(), # Optional[str] # type: ignore
|
|
24
|
+
"url": pd.StringDtype(), # Optional[str] # type: ignore
|
|
25
|
+
"link_urls": pd.StringDtype(), # Optional[str] # type: ignore
|
|
26
|
+
"link_texts": object, # Optional[list[str]]
|
|
27
|
+
"links": object,
|
|
28
|
+
"sent_from": object, # Optional[list[str]],
|
|
29
|
+
"sent_to": object, # Optional[list[str]]
|
|
30
|
+
"subject": pd.StringDtype(), # Optional[str] # type: ignore
|
|
31
|
+
"section": pd.StringDtype(), # Optional[str] # type: ignore
|
|
32
|
+
"header_footer_type": pd.StringDtype(), # Optional[str] # type: ignore
|
|
33
|
+
"emphasized_text_contents": object, # Optional[list[str]]
|
|
34
|
+
"emphasized_text_tags": object, # Optional[list[str]]
|
|
35
|
+
"text_as_html": pd.StringDtype(), # Optional[str] # type: ignore
|
|
36
|
+
"regex_metadata": object,
|
|
37
|
+
"max_characters": "Int64", # Optional[int]
|
|
38
|
+
"is_continuation": "boolean", # Optional[bool]
|
|
39
|
+
"detection_class_prob": float, # Optional[float],
|
|
40
|
+
"sender": pd.StringDtype(), # type: ignore
|
|
41
|
+
"coordinates_points": object,
|
|
42
|
+
"coordinates_system": pd.StringDtype(), # type: ignore
|
|
43
|
+
"coordinates_layout_width": float,
|
|
44
|
+
"coordinates_layout_height": float,
|
|
45
|
+
"data_source_url": pd.StringDtype(), # Optional[str] # type: ignore
|
|
46
|
+
"data_source_version": pd.StringDtype(), # Optional[str] # type: ignore
|
|
47
|
+
"data_source_record_locator": object,
|
|
48
|
+
"data_source_date_created": pd.StringDtype(), # Optional[str] # type: ignore
|
|
49
|
+
"data_source_date_modified": pd.StringDtype(), # Optional[str] # type: ignore
|
|
50
|
+
"data_source_date_processed": pd.StringDtype(), # Optional[str] # type: ignore
|
|
51
|
+
"data_source_permissions_data": object,
|
|
52
|
+
"embeddings": object,
|
|
53
|
+
"regex_metadata_key": object,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def convert_to_pandas_dataframe(
|
|
58
|
+
elements_dict: list[dict[str, Any]],
|
|
59
|
+
drop_empty_cols: bool = False,
|
|
60
|
+
) -> pd.DataFrame:
|
|
61
|
+
# Flatten metadata if it hasn't already been flattened
|
|
62
|
+
for d in elements_dict:
|
|
63
|
+
if metadata := d.pop("metadata", None):
|
|
64
|
+
d.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"]))
|
|
65
|
+
|
|
66
|
+
df = pd.DataFrame.from_dict(
|
|
67
|
+
elements_dict,
|
|
68
|
+
)
|
|
69
|
+
dt = {k: v for k, v in get_default_pandas_dtypes().items() if k in df.columns}
|
|
70
|
+
df = df.astype(dt)
|
|
71
|
+
if drop_empty_cols:
|
|
72
|
+
df.dropna(axis=1, how="all", inplace=True)
|
|
73
|
+
return df
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
File without changes
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from dataclasses import dataclass, field, fields
|
|
5
|
+
from typing import Any, Optional, Type, TypeVar
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from unstructured_ingest.v2.cli.base.importer import import_from_string
|
|
11
|
+
from unstructured_ingest.v2.cli.utils.click import extract_config
|
|
12
|
+
from unstructured_ingest.v2.cli.utils.model_conversion import options_from_base_model, post_check
|
|
13
|
+
from unstructured_ingest.v2.interfaces import ProcessorConfig
|
|
14
|
+
from unstructured_ingest.v2.logger import logger
|
|
15
|
+
from unstructured_ingest.v2.pipeline.pipeline import Pipeline
|
|
16
|
+
from unstructured_ingest.v2.processes.chunker import Chunker, ChunkerConfig
|
|
17
|
+
from unstructured_ingest.v2.processes.connector_registry import (
|
|
18
|
+
DownloaderT,
|
|
19
|
+
IndexerT,
|
|
20
|
+
RegistryEntry,
|
|
21
|
+
UploaderT,
|
|
22
|
+
UploadStager,
|
|
23
|
+
UploadStagerConfig,
|
|
24
|
+
UploadStagerT,
|
|
25
|
+
destination_registry,
|
|
26
|
+
source_registry,
|
|
27
|
+
)
|
|
28
|
+
from unstructured_ingest.v2.processes.connectors.local import LocalUploader, LocalUploaderConfig
|
|
29
|
+
from unstructured_ingest.v2.processes.embedder import Embedder, EmbedderConfig
|
|
30
|
+
from unstructured_ingest.v2.processes.filter import Filterer, FiltererConfig
|
|
31
|
+
from unstructured_ingest.v2.processes.partitioner import Partitioner, PartitionerConfig
|
|
32
|
+
|
|
33
|
+
CommandT = TypeVar("CommandT", bound=click.Command)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class BaseCmd(ABC):
|
|
38
|
+
cmd_name: str
|
|
39
|
+
registry_entry: RegistryEntry
|
|
40
|
+
default_configs: list[Type[BaseModel]] = field(default_factory=list)
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def get_registry_options(self):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
def get_default_options(self) -> list[click.Option]:
|
|
47
|
+
options = []
|
|
48
|
+
for extra in self.default_configs:
|
|
49
|
+
options.extend(options_from_base_model(model=extra))
|
|
50
|
+
return options
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def consolidate_options(cls, options: list[click.Option]) -> list[click.Option]:
|
|
54
|
+
option_names = [option.name for option in options]
|
|
55
|
+
duplicate_names = [name for name, count in Counter(option_names).items() if count > 1]
|
|
56
|
+
if not duplicate_names:
|
|
57
|
+
return options
|
|
58
|
+
consolidated_options = []
|
|
59
|
+
current_names = []
|
|
60
|
+
for option in options:
|
|
61
|
+
if option.name not in current_names:
|
|
62
|
+
current_names.append(option.name)
|
|
63
|
+
consolidated_options.append(option)
|
|
64
|
+
continue
|
|
65
|
+
existing_option = next(o for o in consolidated_options if o.name == option.name)
|
|
66
|
+
if existing_option.__dict__ == option.__dict__:
|
|
67
|
+
continue
|
|
68
|
+
option_diff = cls.get_options_diff(o1=option, o2=existing_option)
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"Conflicting duplicate {} option defined: {}".format(
|
|
71
|
+
option.name, " | ".join([f"{d[0]}: {d[1]}" for d in option_diff])
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
return consolidated_options
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def get_options_diff(o1: click.Option, o2: click.Option):
|
|
78
|
+
o1_dict = o1.__dict__
|
|
79
|
+
o2_dict = o2.__dict__
|
|
80
|
+
for d in [o1_dict, o2_dict]:
|
|
81
|
+
d["opts"] = ",".join(d["opts"])
|
|
82
|
+
d["secondary_opts"] = ",".join(d["secondary_opts"])
|
|
83
|
+
option_diff = set(o1_dict.items()) ^ set(o2_dict.items())
|
|
84
|
+
return option_diff
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def cmd_name_key(self):
|
|
88
|
+
return self.cmd_name.replace("-", "_")
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def cli_cmd_name(self):
|
|
92
|
+
return self.cmd_name.replace("_", "-")
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def cmd(self, ctx: click.Context, **options) -> None:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
def add_options(self, cmd: CommandT) -> CommandT:
|
|
99
|
+
options = self.get_registry_options()
|
|
100
|
+
options.extend(self.get_default_options())
|
|
101
|
+
post_check(options)
|
|
102
|
+
cmd.params.extend(options)
|
|
103
|
+
return cmd
|
|
104
|
+
|
|
105
|
+
def get_pipeline(
|
|
106
|
+
self,
|
|
107
|
+
src: str,
|
|
108
|
+
source_options: dict[str, Any],
|
|
109
|
+
dest: Optional[str] = None,
|
|
110
|
+
destination_options: Optional[dict[str, Any]] = None,
|
|
111
|
+
) -> Pipeline:
|
|
112
|
+
logger.debug(
|
|
113
|
+
f"creating pipeline from cli using source {src} with options: {source_options}"
|
|
114
|
+
)
|
|
115
|
+
pipeline_kwargs: dict[str, Any] = {
|
|
116
|
+
"context": self.get_processor_config(options=source_options),
|
|
117
|
+
"downloader": self.get_downloader(src=src, options=source_options),
|
|
118
|
+
"indexer": self.get_indexer(src=src, options=source_options),
|
|
119
|
+
"partitioner": self.get_partitioner(options=source_options),
|
|
120
|
+
}
|
|
121
|
+
if chunker := self.get_chunker(options=source_options):
|
|
122
|
+
pipeline_kwargs["chunker"] = chunker
|
|
123
|
+
if filterer := self.get_filterer(options=source_options):
|
|
124
|
+
pipeline_kwargs["filterer"] = filterer
|
|
125
|
+
if embedder := self.get_embedder(options=source_options):
|
|
126
|
+
pipeline_kwargs["embedder"] = embedder
|
|
127
|
+
if dest:
|
|
128
|
+
logger.debug(
|
|
129
|
+
f"setting destination on pipeline {dest} with options: {destination_options}"
|
|
130
|
+
)
|
|
131
|
+
if uploader_stager := self.get_upload_stager(dest=dest, options=destination_options):
|
|
132
|
+
pipeline_kwargs["stager"] = uploader_stager
|
|
133
|
+
pipeline_kwargs["uploader"] = self.get_uploader(dest=dest, options=destination_options)
|
|
134
|
+
else:
|
|
135
|
+
# Default to local uploader
|
|
136
|
+
# TODO remove after v1 no longer supported
|
|
137
|
+
destination_options = destination_options or {}
|
|
138
|
+
if "output_dir" not in destination_options:
|
|
139
|
+
destination_options["output_dir"] = source_options["output_dir"]
|
|
140
|
+
pipeline_kwargs["uploader"] = self.get_default_uploader(options=destination_options)
|
|
141
|
+
return Pipeline(**pipeline_kwargs)
|
|
142
|
+
|
|
143
|
+
@staticmethod
|
|
144
|
+
def get_default_uploader(options: dict[str, Any]) -> UploaderT:
|
|
145
|
+
uploader_config = extract_config(flat_data=options, config=LocalUploaderConfig)
|
|
146
|
+
return LocalUploader(upload_config=uploader_config)
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def get_chunker(options: dict[str, Any]) -> Optional[Chunker]:
|
|
150
|
+
chunker_config = extract_config(flat_data=options, config=ChunkerConfig)
|
|
151
|
+
if not chunker_config.chunking_strategy:
|
|
152
|
+
return None
|
|
153
|
+
return Chunker(config=chunker_config)
|
|
154
|
+
|
|
155
|
+
@staticmethod
|
|
156
|
+
def get_filterer(options: dict[str, Any]) -> Optional[Filterer]:
|
|
157
|
+
filterer_configs = extract_config(flat_data=options, config=FiltererConfig)
|
|
158
|
+
if not filterer_configs.model_dump():
|
|
159
|
+
return None
|
|
160
|
+
return Filterer(config=filterer_configs)
|
|
161
|
+
|
|
162
|
+
@staticmethod
|
|
163
|
+
def get_embedder(options: dict[str, Any]) -> Optional[Embedder]:
|
|
164
|
+
embedder_config = extract_config(flat_data=options, config=EmbedderConfig)
|
|
165
|
+
if not embedder_config.embedding_provider:
|
|
166
|
+
return None
|
|
167
|
+
return Embedder(config=embedder_config)
|
|
168
|
+
|
|
169
|
+
@staticmethod
|
|
170
|
+
def get_partitioner(options: dict[str, Any]) -> Partitioner:
|
|
171
|
+
partitioner_config = extract_config(flat_data=options, config=PartitionerConfig)
|
|
172
|
+
return Partitioner(config=partitioner_config)
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def get_processor_config(options: dict[str, Any]) -> ProcessorConfig:
|
|
176
|
+
return extract_config(flat_data=options, config=ProcessorConfig)
|
|
177
|
+
|
|
178
|
+
@staticmethod
|
|
179
|
+
def get_indexer(src: str, options: dict[str, Any]) -> IndexerT:
|
|
180
|
+
source_entry = source_registry[src]
|
|
181
|
+
indexer_kwargs: dict[str, Any] = {}
|
|
182
|
+
if indexer_config_cls := source_entry.indexer_config:
|
|
183
|
+
indexer_kwargs["index_config"] = extract_config(
|
|
184
|
+
flat_data=options, config=indexer_config_cls
|
|
185
|
+
)
|
|
186
|
+
if connection_config_cls := source_entry.connection_config:
|
|
187
|
+
indexer_kwargs["connection_config"] = extract_config(
|
|
188
|
+
flat_data=options, config=connection_config_cls
|
|
189
|
+
)
|
|
190
|
+
indexer_cls = source_entry.indexer
|
|
191
|
+
return indexer_cls(**indexer_kwargs)
|
|
192
|
+
|
|
193
|
+
@staticmethod
|
|
194
|
+
def get_downloader(src: str, options: dict[str, Any]) -> DownloaderT:
|
|
195
|
+
source_entry = source_registry[src]
|
|
196
|
+
downloader_kwargs: dict[str, Any] = {}
|
|
197
|
+
if downloader_config_cls := source_entry.downloader_config:
|
|
198
|
+
downloader_kwargs["download_config"] = extract_config(
|
|
199
|
+
flat_data=options, config=downloader_config_cls
|
|
200
|
+
)
|
|
201
|
+
if connection_config_cls := source_entry.connection_config:
|
|
202
|
+
downloader_kwargs["connection_config"] = extract_config(
|
|
203
|
+
flat_data=options, config=connection_config_cls
|
|
204
|
+
)
|
|
205
|
+
downloader_cls = source_entry.downloader
|
|
206
|
+
return downloader_cls(**downloader_kwargs)
|
|
207
|
+
|
|
208
|
+
@staticmethod
|
|
209
|
+
def get_custom_stager(
|
|
210
|
+
stager_reference: str, stager_config_kwargs: Optional[dict] = None
|
|
211
|
+
) -> Optional[UploadStagerT]:
|
|
212
|
+
uploader_cls = import_from_string(stager_reference)
|
|
213
|
+
if not inspect.isclass(uploader_cls):
|
|
214
|
+
raise ValueError(
|
|
215
|
+
f"custom stager must be a reference to a python class, got: {type(uploader_cls)}"
|
|
216
|
+
)
|
|
217
|
+
if not issubclass(uploader_cls, UploadStager):
|
|
218
|
+
raise ValueError(
|
|
219
|
+
"custom stager must be an implementation of the UploadStager interface"
|
|
220
|
+
)
|
|
221
|
+
fields_dict = {f.name: f.type for f in fields(uploader_cls)}
|
|
222
|
+
upload_stager_config_cls = fields_dict["upload_stager_config"]
|
|
223
|
+
if not inspect.isclass(upload_stager_config_cls):
|
|
224
|
+
raise ValueError(
|
|
225
|
+
f"custom stager config must be a class, got: {type(upload_stager_config_cls)}"
|
|
226
|
+
)
|
|
227
|
+
if not issubclass(upload_stager_config_cls, UploadStagerConfig):
|
|
228
|
+
raise ValueError(
|
|
229
|
+
"custom stager config must be an implementation "
|
|
230
|
+
"of the UploadStagerUploadStagerConfig interface"
|
|
231
|
+
)
|
|
232
|
+
upload_stager_kwargs: dict[str, Any] = {}
|
|
233
|
+
if stager_config_kwargs:
|
|
234
|
+
upload_stager_kwargs["upload_stager_config"] = upload_stager_config_cls(
|
|
235
|
+
**stager_config_kwargs
|
|
236
|
+
)
|
|
237
|
+
return uploader_cls(**upload_stager_kwargs)
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def get_upload_stager(dest: str, options: dict[str, Any]) -> Optional[UploadStagerT]:
|
|
241
|
+
if custom_stager := options.get("custom_stager"):
|
|
242
|
+
return BaseCmd.get_custom_stager(
|
|
243
|
+
stager_reference=custom_stager,
|
|
244
|
+
stager_config_kwargs=options.get("custom_stager_config_kwargs"),
|
|
245
|
+
)
|
|
246
|
+
dest_entry = destination_registry[dest]
|
|
247
|
+
upload_stager_kwargs: dict[str, Any] = {}
|
|
248
|
+
if upload_stager_config_cls := dest_entry.upload_stager_config:
|
|
249
|
+
upload_stager_kwargs["upload_stager_config"] = extract_config(
|
|
250
|
+
flat_data=options, config=upload_stager_config_cls
|
|
251
|
+
)
|
|
252
|
+
if upload_stager_cls := dest_entry.upload_stager:
|
|
253
|
+
return upload_stager_cls(**upload_stager_kwargs)
|
|
254
|
+
return None
|
|
255
|
+
|
|
256
|
+
@staticmethod
|
|
257
|
+
def get_uploader(dest, options: dict[str, Any]) -> UploaderT:
|
|
258
|
+
dest_entry = destination_registry[dest]
|
|
259
|
+
uploader_kwargs: dict[str, Any] = {}
|
|
260
|
+
if uploader_config_cls := dest_entry.uploader_config:
|
|
261
|
+
uploader_kwargs["upload_config"] = extract_config(
|
|
262
|
+
flat_data=options, config=uploader_config_cls
|
|
263
|
+
)
|
|
264
|
+
if connection_config_cls := dest_entry.connection_config:
|
|
265
|
+
uploader_kwargs["connection_config"] = extract_config(
|
|
266
|
+
flat_data=options, config=connection_config_cls
|
|
267
|
+
)
|
|
268
|
+
uploader_cls = dest_entry.uploader
|
|
269
|
+
return uploader_cls(**uploader_kwargs)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.v2.cli.base.cmd import BaseCmd
|
|
7
|
+
from unstructured_ingest.v2.cli.utils.click import Dict, conform_click_options
|
|
8
|
+
from unstructured_ingest.v2.cli.utils.model_conversion import options_from_base_model
|
|
9
|
+
from unstructured_ingest.v2.logger import logger
|
|
10
|
+
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class DestCmd(BaseCmd):
|
|
15
|
+
registry_entry: DestinationRegistryEntry
|
|
16
|
+
|
|
17
|
+
def get_registry_options(self):
|
|
18
|
+
options = []
|
|
19
|
+
configs = [
|
|
20
|
+
config
|
|
21
|
+
for config in [
|
|
22
|
+
self.registry_entry.uploader_config,
|
|
23
|
+
self.registry_entry.upload_stager_config,
|
|
24
|
+
self.registry_entry.connection_config,
|
|
25
|
+
]
|
|
26
|
+
if config
|
|
27
|
+
]
|
|
28
|
+
for config in configs:
|
|
29
|
+
options.extend(options_from_base_model(model=config))
|
|
30
|
+
options = self.consolidate_options(options=options)
|
|
31
|
+
return options
|
|
32
|
+
|
|
33
|
+
def cmd(self, ctx: click.Context, **options) -> None:
|
|
34
|
+
logger.setLevel(logging.DEBUG if options.get("verbose", False) else logging.INFO)
|
|
35
|
+
if not ctx.parent:
|
|
36
|
+
raise click.ClickException("destination command called without a parent")
|
|
37
|
+
if not ctx.parent.info_name:
|
|
38
|
+
raise click.ClickException("parent command missing info name")
|
|
39
|
+
source_cmd = ctx.parent.info_name.replace("-", "_")
|
|
40
|
+
source_options: dict = ctx.parent.params if ctx.parent else {}
|
|
41
|
+
conform_click_options(options)
|
|
42
|
+
try:
|
|
43
|
+
pipeline = self.get_pipeline(
|
|
44
|
+
src=source_cmd,
|
|
45
|
+
source_options=source_options,
|
|
46
|
+
dest=self.cmd_name,
|
|
47
|
+
destination_options=options,
|
|
48
|
+
)
|
|
49
|
+
pipeline.run()
|
|
50
|
+
except Exception as e:
|
|
51
|
+
logger.error(f"failed to run destination command {self.cmd_name}: {e}", exc_info=True)
|
|
52
|
+
raise click.ClickException(str(e)) from e
|
|
53
|
+
|
|
54
|
+
def get_cmd(self) -> click.Command:
|
|
55
|
+
# Dynamically create the command without the use of click decorators
|
|
56
|
+
fn = self.cmd
|
|
57
|
+
fn = click.pass_context(fn)
|
|
58
|
+
cmd = click.command(fn)
|
|
59
|
+
if not isinstance(cmd, click.core.Command):
|
|
60
|
+
raise ValueError(f"generated command was not of expected type Command: {type(cmd)}")
|
|
61
|
+
cmd.name = self.cli_cmd_name
|
|
62
|
+
cmd.short_help = "v2"
|
|
63
|
+
cmd.invoke_without_command = True
|
|
64
|
+
self.add_options(cmd)
|
|
65
|
+
cmd.params.append(
|
|
66
|
+
click.Option(
|
|
67
|
+
["--custom-stager"],
|
|
68
|
+
required=False,
|
|
69
|
+
type=str,
|
|
70
|
+
default=None,
|
|
71
|
+
help="Pass a pointer to a custom upload stager to use, "
|
|
72
|
+
"must be in format '<module>:<attribute>'",
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
cmd.params.append(
|
|
76
|
+
click.Option(
|
|
77
|
+
["--custom-stager-config-kwargs"],
|
|
78
|
+
required=False,
|
|
79
|
+
type=Dict(),
|
|
80
|
+
default=None,
|
|
81
|
+
help="Any kwargs to instantiate the configuration "
|
|
82
|
+
"associated with the customer stager",
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
return cmd
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ImportFromStringError(Exception):
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def import_from_string(import_str: Any) -> Any:
|
|
10
|
+
if not isinstance(import_str, str):
|
|
11
|
+
return import_str
|
|
12
|
+
|
|
13
|
+
module_str, _, attrs_str = import_str.partition(":")
|
|
14
|
+
if not module_str or not attrs_str:
|
|
15
|
+
message = 'Import string "{import_str}" must be in format "<module>:<attribute>".'
|
|
16
|
+
raise ImportFromStringError(message.format(import_str=import_str))
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
module = importlib.import_module(module_str)
|
|
20
|
+
except ModuleNotFoundError as exc:
|
|
21
|
+
if exc.name != module_str:
|
|
22
|
+
raise exc from None
|
|
23
|
+
message = 'Could not import module "{module_str}".'
|
|
24
|
+
raise ImportFromStringError(message.format(module_str=module_str))
|
|
25
|
+
|
|
26
|
+
instance = module
|
|
27
|
+
try:
|
|
28
|
+
for attr_str in attrs_str.split("."):
|
|
29
|
+
instance = getattr(instance, attr_str)
|
|
30
|
+
except AttributeError:
|
|
31
|
+
message = 'Attribute "{attrs_str}" not found in module "{module_str}".'
|
|
32
|
+
raise ImportFromStringError(message.format(attrs_str=attrs_str, module_str=module_str))
|
|
33
|
+
|
|
34
|
+
return instance
|