unstructured-ingest 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/__init__.py +0 -0
- test/integration/__init__.py +0 -0
- test/integration/chunkers/__init__.py +0 -0
- test/integration/chunkers/test_chunkers.py +31 -0
- test/integration/connectors/__init__.py +0 -0
- test/integration/connectors/conftest.py +38 -0
- test/integration/connectors/databricks/__init__.py +0 -0
- test/integration/connectors/databricks/test_volumes_native.py +269 -0
- test/integration/connectors/discord/__init__.py +0 -0
- test/integration/connectors/discord/test_discord.py +90 -0
- test/integration/connectors/duckdb/__init__.py +0 -0
- test/integration/connectors/duckdb/conftest.py +14 -0
- test/integration/connectors/duckdb/test_duckdb.py +89 -0
- test/integration/connectors/duckdb/test_motherduck.py +95 -0
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +34 -0
- test/integration/connectors/elasticsearch/test_elasticsearch.py +330 -0
- test/integration/connectors/elasticsearch/test_opensearch.py +325 -0
- test/integration/connectors/sql/__init__.py +0 -0
- test/integration/connectors/sql/test_postgres.py +195 -0
- test/integration/connectors/sql/test_singlestore.py +176 -0
- test/integration/connectors/sql/test_snowflake.py +238 -0
- test/integration/connectors/sql/test_sqlite.py +162 -0
- test/integration/connectors/test_astradb.py +217 -0
- test/integration/connectors/test_azure_ai_search.py +255 -0
- test/integration/connectors/test_chroma.py +120 -0
- test/integration/connectors/test_confluence.py +113 -0
- test/integration/connectors/test_delta_table.py +185 -0
- test/integration/connectors/test_lancedb.py +247 -0
- test/integration/connectors/test_milvus.py +203 -0
- test/integration/connectors/test_mongodb.py +335 -0
- test/integration/connectors/test_neo4j.py +236 -0
- test/integration/connectors/test_notion.py +145 -0
- test/integration/connectors/test_onedrive.py +118 -0
- test/integration/connectors/test_pinecone.py +288 -0
- test/integration/connectors/test_qdrant.py +215 -0
- test/integration/connectors/test_redis.py +119 -0
- test/integration/connectors/test_s3.py +183 -0
- test/integration/connectors/test_vectara.py +270 -0
- test/integration/connectors/utils/__init__.py +0 -0
- test/integration/connectors/utils/constants.py +7 -0
- test/integration/connectors/utils/docker.py +151 -0
- test/integration/connectors/utils/docker_compose.py +59 -0
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +75 -0
- test/integration/connectors/utils/validation/equality.py +75 -0
- test/integration/connectors/utils/validation/source.py +299 -0
- test/integration/connectors/utils/validation/utils.py +36 -0
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +15 -0
- test/integration/connectors/weaviate/test_cloud.py +34 -0
- test/integration/connectors/weaviate/test_local.py +131 -0
- test/integration/embedders/__init__.py +0 -0
- test/integration/embedders/conftest.py +13 -0
- test/integration/embedders/test_azure_openai.py +59 -0
- test/integration/embedders/test_bedrock.py +103 -0
- test/integration/embedders/test_huggingface.py +26 -0
- test/integration/embedders/test_mixedbread.py +71 -0
- test/integration/embedders/test_octoai.py +77 -0
- test/integration/embedders/test_openai.py +76 -0
- test/integration/embedders/test_togetherai.py +71 -0
- test/integration/embedders/test_vertexai.py +65 -0
- test/integration/embedders/test_voyageai.py +65 -0
- test/integration/embedders/utils.py +68 -0
- test/integration/partitioners/__init__.py +0 -0
- test/integration/partitioners/test_partitioner.py +75 -0
- test/integration/utils.py +15 -0
- test/unit/__init__.py +0 -0
- test/unit/embed/__init__.py +0 -0
- test/unit/embed/test_mixedbreadai.py +42 -0
- test/unit/embed/test_octoai.py +27 -0
- test/unit/embed/test_openai.py +20 -0
- test/unit/embed/test_vertexai.py +25 -0
- test/unit/embed/test_voyageai.py +24 -0
- test/unit/test_error.py +27 -0
- test/unit/test_logger.py +78 -0
- test/unit/test_utils.py +184 -0
- test/unit/v2/__init__.py +0 -0
- test/unit/v2/chunkers/__init__.py +0 -0
- test/unit/v2/chunkers/test_chunkers.py +49 -0
- test/unit/v2/connectors/__init__.py +0 -0
- test/unit/v2/connectors/test_confluence.py +39 -0
- test/unit/v2/embedders/__init__.py +0 -0
- test/unit/v2/embedders/test_bedrock.py +36 -0
- test/unit/v2/embedders/test_huggingface.py +48 -0
- test/unit/v2/embedders/test_mixedbread.py +37 -0
- test/unit/v2/embedders/test_octoai.py +35 -0
- test/unit/v2/embedders/test_openai.py +35 -0
- test/unit/v2/embedders/test_togetherai.py +37 -0
- test/unit/v2/embedders/test_vertexai.py +37 -0
- test/unit/v2/embedders/test_voyageai.py +38 -0
- test/unit/v2/partitioners/__init__.py +0 -0
- test/unit/v2/partitioners/test_partitioner.py +63 -0
- test/unit/v2/test_interfaces.py +26 -0
- test/unit/v2/test_utils.py +82 -0
- test/unit/v2/utils/__init__.py +0 -0
- test/unit/v2/utils/data_generator.py +32 -0
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/__init__.py +14 -0
- unstructured_ingest/cli/base/__init__.py +0 -0
- unstructured_ingest/cli/base/cmd.py +19 -0
- unstructured_ingest/cli/base/dest.py +87 -0
- unstructured_ingest/cli/base/src.py +57 -0
- unstructured_ingest/cli/cli.py +37 -0
- unstructured_ingest/cli/cmd_factory.py +12 -0
- unstructured_ingest/cli/cmds/__init__.py +145 -0
- unstructured_ingest/cli/cmds/airtable.py +69 -0
- unstructured_ingest/cli/cmds/astradb.py +99 -0
- unstructured_ingest/cli/cmds/azure_ai_search.py +65 -0
- unstructured_ingest/cli/cmds/biomed.py +52 -0
- unstructured_ingest/cli/cmds/chroma.py +104 -0
- unstructured_ingest/cli/cmds/clarifai.py +71 -0
- unstructured_ingest/cli/cmds/confluence.py +69 -0
- unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
- unstructured_ingest/cli/cmds/delta_table.py +94 -0
- unstructured_ingest/cli/cmds/discord.py +47 -0
- unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
- unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
- unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
- unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
- unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
- unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
- unstructured_ingest/cli/cmds/github.py +54 -0
- unstructured_ingest/cli/cmds/gitlab.py +54 -0
- unstructured_ingest/cli/cmds/google_drive.py +49 -0
- unstructured_ingest/cli/cmds/hubspot.py +70 -0
- unstructured_ingest/cli/cmds/jira.py +71 -0
- unstructured_ingest/cli/cmds/kafka.py +102 -0
- unstructured_ingest/cli/cmds/local.py +43 -0
- unstructured_ingest/cli/cmds/mongodb.py +72 -0
- unstructured_ingest/cli/cmds/notion.py +48 -0
- unstructured_ingest/cli/cmds/onedrive.py +66 -0
- unstructured_ingest/cli/cmds/opensearch.py +117 -0
- unstructured_ingest/cli/cmds/outlook.py +67 -0
- unstructured_ingest/cli/cmds/pinecone.py +71 -0
- unstructured_ingest/cli/cmds/qdrant.py +124 -0
- unstructured_ingest/cli/cmds/reddit.py +67 -0
- unstructured_ingest/cli/cmds/salesforce.py +58 -0
- unstructured_ingest/cli/cmds/sharepoint.py +66 -0
- unstructured_ingest/cli/cmds/slack.py +56 -0
- unstructured_ingest/cli/cmds/sql.py +66 -0
- unstructured_ingest/cli/cmds/vectara.py +66 -0
- unstructured_ingest/cli/cmds/weaviate.py +98 -0
- unstructured_ingest/cli/cmds/wikipedia.py +40 -0
- unstructured_ingest/cli/common.py +7 -0
- unstructured_ingest/cli/interfaces.py +663 -0
- unstructured_ingest/cli/utils.py +205 -0
- unstructured_ingest/connector/__init__.py +0 -0
- unstructured_ingest/connector/airtable.py +309 -0
- unstructured_ingest/connector/astradb.py +267 -0
- unstructured_ingest/connector/azure_ai_search.py +144 -0
- unstructured_ingest/connector/biomed.py +320 -0
- unstructured_ingest/connector/chroma.py +158 -0
- unstructured_ingest/connector/clarifai.py +122 -0
- unstructured_ingest/connector/confluence.py +285 -0
- unstructured_ingest/connector/databricks_volumes.py +137 -0
- unstructured_ingest/connector/delta_table.py +203 -0
- unstructured_ingest/connector/discord.py +180 -0
- unstructured_ingest/connector/elasticsearch.py +396 -0
- unstructured_ingest/connector/fsspec/__init__.py +0 -0
- unstructured_ingest/connector/fsspec/azure.py +78 -0
- unstructured_ingest/connector/fsspec/box.py +109 -0
- unstructured_ingest/connector/fsspec/dropbox.py +160 -0
- unstructured_ingest/connector/fsspec/fsspec.py +359 -0
- unstructured_ingest/connector/fsspec/gcs.py +82 -0
- unstructured_ingest/connector/fsspec/s3.py +62 -0
- unstructured_ingest/connector/fsspec/sftp.py +81 -0
- unstructured_ingest/connector/git.py +124 -0
- unstructured_ingest/connector/github.py +174 -0
- unstructured_ingest/connector/gitlab.py +142 -0
- unstructured_ingest/connector/google_drive.py +348 -0
- unstructured_ingest/connector/hubspot.py +278 -0
- unstructured_ingest/connector/jira.py +469 -0
- unstructured_ingest/connector/kafka.py +293 -0
- unstructured_ingest/connector/local.py +139 -0
- unstructured_ingest/connector/mongodb.py +284 -0
- unstructured_ingest/connector/notion/__init__.py +0 -0
- unstructured_ingest/connector/notion/client.py +248 -0
- unstructured_ingest/connector/notion/connector.py +469 -0
- unstructured_ingest/connector/notion/helpers.py +584 -0
- unstructured_ingest/connector/notion/interfaces.py +32 -0
- unstructured_ingest/connector/notion/types/__init__.py +0 -0
- unstructured_ingest/connector/notion/types/block.py +96 -0
- unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
- unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
- unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
- unstructured_ingest/connector/notion/types/database.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
- unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
- unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/connector/notion/types/date.py +26 -0
- unstructured_ingest/connector/notion/types/file.py +51 -0
- unstructured_ingest/connector/notion/types/page.py +45 -0
- unstructured_ingest/connector/notion/types/parent.py +66 -0
- unstructured_ingest/connector/notion/types/rich_text.py +189 -0
- unstructured_ingest/connector/notion/types/user.py +76 -0
- unstructured_ingest/connector/onedrive.py +232 -0
- unstructured_ingest/connector/opensearch.py +218 -0
- unstructured_ingest/connector/outlook.py +285 -0
- unstructured_ingest/connector/pinecone.py +140 -0
- unstructured_ingest/connector/qdrant.py +144 -0
- unstructured_ingest/connector/reddit.py +166 -0
- unstructured_ingest/connector/registry.py +109 -0
- unstructured_ingest/connector/salesforce.py +301 -0
- unstructured_ingest/connector/sharepoint.py +573 -0
- unstructured_ingest/connector/slack.py +224 -0
- unstructured_ingest/connector/sql.py +199 -0
- unstructured_ingest/connector/vectara.py +253 -0
- unstructured_ingest/connector/weaviate.py +190 -0
- unstructured_ingest/connector/wikipedia.py +208 -0
- unstructured_ingest/embed/__init__.py +0 -0
- unstructured_ingest/embed/azure_openai.py +31 -0
- unstructured_ingest/embed/bedrock.py +193 -0
- unstructured_ingest/embed/huggingface.py +52 -0
- unstructured_ingest/embed/interfaces.py +117 -0
- unstructured_ingest/embed/mixedbreadai.py +233 -0
- unstructured_ingest/embed/octoai.py +130 -0
- unstructured_ingest/embed/openai.py +116 -0
- unstructured_ingest/embed/togetherai.py +106 -0
- unstructured_ingest/embed/vertexai.py +126 -0
- unstructured_ingest/embed/voyageai.py +130 -0
- unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
- unstructured_ingest/enhanced_dataclass/core.py +99 -0
- unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
- unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
- unstructured_ingest/error.py +49 -0
- unstructured_ingest/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/ingest_backoff/_common.py +102 -0
- unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
- unstructured_ingest/interfaces.py +852 -0
- unstructured_ingest/logger.py +130 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/pipeline/__init__.py +22 -0
- unstructured_ingest/pipeline/copy.py +19 -0
- unstructured_ingest/pipeline/doc_factory.py +12 -0
- unstructured_ingest/pipeline/interfaces.py +270 -0
- unstructured_ingest/pipeline/partition.py +60 -0
- unstructured_ingest/pipeline/permissions.py +12 -0
- unstructured_ingest/pipeline/pipeline.py +117 -0
- unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- unstructured_ingest/pipeline/reformat/chunking.py +134 -0
- unstructured_ingest/pipeline/reformat/embedding.py +64 -0
- unstructured_ingest/pipeline/source.py +77 -0
- unstructured_ingest/pipeline/utils.py +6 -0
- unstructured_ingest/pipeline/write.py +18 -0
- unstructured_ingest/processor.py +93 -0
- unstructured_ingest/runner/__init__.py +104 -0
- unstructured_ingest/runner/airtable.py +35 -0
- unstructured_ingest/runner/astradb.py +34 -0
- unstructured_ingest/runner/base_runner.py +89 -0
- unstructured_ingest/runner/biomed.py +45 -0
- unstructured_ingest/runner/confluence.py +35 -0
- unstructured_ingest/runner/delta_table.py +34 -0
- unstructured_ingest/runner/discord.py +35 -0
- unstructured_ingest/runner/elasticsearch.py +40 -0
- unstructured_ingest/runner/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/fsspec/azure.py +30 -0
- unstructured_ingest/runner/fsspec/box.py +28 -0
- unstructured_ingest/runner/fsspec/dropbox.py +30 -0
- unstructured_ingest/runner/fsspec/fsspec.py +40 -0
- unstructured_ingest/runner/fsspec/gcs.py +28 -0
- unstructured_ingest/runner/fsspec/s3.py +28 -0
- unstructured_ingest/runner/fsspec/sftp.py +28 -0
- unstructured_ingest/runner/github.py +37 -0
- unstructured_ingest/runner/gitlab.py +37 -0
- unstructured_ingest/runner/google_drive.py +35 -0
- unstructured_ingest/runner/hubspot.py +35 -0
- unstructured_ingest/runner/jira.py +35 -0
- unstructured_ingest/runner/kafka.py +34 -0
- unstructured_ingest/runner/local.py +23 -0
- unstructured_ingest/runner/mongodb.py +34 -0
- unstructured_ingest/runner/notion.py +61 -0
- unstructured_ingest/runner/onedrive.py +35 -0
- unstructured_ingest/runner/opensearch.py +40 -0
- unstructured_ingest/runner/outlook.py +33 -0
- unstructured_ingest/runner/reddit.py +35 -0
- unstructured_ingest/runner/salesforce.py +33 -0
- unstructured_ingest/runner/sharepoint.py +35 -0
- unstructured_ingest/runner/slack.py +33 -0
- unstructured_ingest/runner/utils.py +47 -0
- unstructured_ingest/runner/wikipedia.py +35 -0
- unstructured_ingest/runner/writers/__init__.py +48 -0
- unstructured_ingest/runner/writers/astradb.py +22 -0
- unstructured_ingest/runner/writers/azure_ai_search.py +24 -0
- unstructured_ingest/runner/writers/base_writer.py +26 -0
- unstructured_ingest/runner/writers/chroma.py +22 -0
- unstructured_ingest/runner/writers/clarifai.py +19 -0
- unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
- unstructured_ingest/runner/writers/delta_table.py +24 -0
- unstructured_ingest/runner/writers/elasticsearch.py +24 -0
- unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
- unstructured_ingest/runner/writers/fsspec/box.py +21 -0
- unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
- unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
- unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
- unstructured_ingest/runner/writers/kafka.py +21 -0
- unstructured_ingest/runner/writers/mongodb.py +21 -0
- unstructured_ingest/runner/writers/opensearch.py +26 -0
- unstructured_ingest/runner/writers/pinecone.py +21 -0
- unstructured_ingest/runner/writers/qdrant.py +19 -0
- unstructured_ingest/runner/writers/sql.py +22 -0
- unstructured_ingest/runner/writers/vectara.py +22 -0
- unstructured_ingest/runner/writers/weaviate.py +21 -0
- unstructured_ingest/utils/__init__.py +0 -0
- unstructured_ingest/utils/chunking.py +56 -0
- unstructured_ingest/utils/compression.py +118 -0
- unstructured_ingest/utils/data_prep.py +200 -0
- unstructured_ingest/utils/dep_check.py +78 -0
- unstructured_ingest/utils/google_filetype.py +9 -0
- unstructured_ingest/utils/string_and_date_utils.py +49 -0
- unstructured_ingest/utils/table.py +73 -0
- unstructured_ingest/v2/__init__.py +1 -0
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +4 -0
- unstructured_ingest/v2/cli/base/cmd.py +269 -0
- unstructured_ingest/v2/cli/base/dest.py +85 -0
- unstructured_ingest/v2/cli/base/importer.py +34 -0
- unstructured_ingest/v2/cli/base/src.py +85 -0
- unstructured_ingest/v2/cli/cli.py +24 -0
- unstructured_ingest/v2/cli/cmds.py +14 -0
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/cli/utils/click.py +237 -0
- unstructured_ingest/v2/cli/utils/model_conversion.py +222 -0
- unstructured_ingest/v2/constants.py +2 -0
- unstructured_ingest/v2/errors.py +18 -0
- unstructured_ingest/v2/interfaces/__init__.py +32 -0
- unstructured_ingest/v2/interfaces/connector.py +50 -0
- unstructured_ingest/v2/interfaces/downloader.py +89 -0
- unstructured_ingest/v2/interfaces/file_data.py +116 -0
- unstructured_ingest/v2/interfaces/indexer.py +30 -0
- unstructured_ingest/v2/interfaces/process.py +19 -0
- unstructured_ingest/v2/interfaces/processor.py +88 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +102 -0
- unstructured_ingest/v2/interfaces/uploader.py +53 -0
- unstructured_ingest/v2/logger.py +126 -0
- unstructured_ingest/v2/main.py +11 -0
- unstructured_ingest/v2/otel.py +111 -0
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +211 -0
- unstructured_ingest/v2/pipeline/otel.py +32 -0
- unstructured_ingest/v2/pipeline/pipeline.py +384 -0
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/steps/chunk.py +80 -0
- unstructured_ingest/v2/pipeline/steps/download.py +207 -0
- unstructured_ingest/v2/pipeline/steps/embed.py +79 -0
- unstructured_ingest/v2/pipeline/steps/filter.py +35 -0
- unstructured_ingest/v2/pipeline/steps/index.py +86 -0
- unstructured_ingest/v2/pipeline/steps/partition.py +79 -0
- unstructured_ingest/v2/pipeline/steps/stage.py +65 -0
- unstructured_ingest/v2/pipeline/steps/uncompress.py +50 -0
- unstructured_ingest/v2/pipeline/steps/upload.py +58 -0
- unstructured_ingest/v2/processes/__init__.py +18 -0
- unstructured_ingest/v2/processes/chunker.py +124 -0
- unstructured_ingest/v2/processes/connector_registry.py +69 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +117 -0
- unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
- unstructured_ingest/v2/processes/connectors/astradb.py +402 -0
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +276 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +190 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +207 -0
- unstructured_ingest/v2/processes/connectors/couchbase.py +334 -0
- unstructured_ingest/v2/processes/connectors/databricks/__init__.py +52 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +208 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +87 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +102 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +85 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +86 -0
- unstructured_ingest/v2/processes/connectors/delta_table.py +191 -0
- unstructured_ingest/v2/processes/connectors/discord.py +158 -0
- unstructured_ingest/v2/processes/connectors/duckdb/__init__.py +15 -0
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +100 -0
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +127 -0
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +126 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +470 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +195 -0
- unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +170 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +168 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +332 -0
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +185 -0
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +171 -0
- unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/v2/processes/connectors/gitlab.py +268 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +348 -0
- unstructured_ingest/v2/processes/connectors/kafka/__init__.py +17 -0
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py +121 -0
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +273 -0
- unstructured_ingest/v2/processes/connectors/kafka/local.py +103 -0
- unstructured_ingest/v2/processes/connectors/kdbai.py +148 -0
- unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +30 -0
- unstructured_ingest/v2/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +42 -0
- unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +169 -0
- unstructured_ingest/v2/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/v2/processes/connectors/local.py +217 -0
- unstructured_ingest/v2/processes/connectors/milvus.py +225 -0
- unstructured_ingest/v2/processes/connectors/mongodb.py +361 -0
- unstructured_ingest/v2/processes/connectors/neo4j.py +385 -0
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/client.py +349 -0
- unstructured_ingest/v2/processes/connectors/notion/connector.py +346 -0
- unstructured_ingest/v2/processes/connectors/notion/helpers.py +448 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +32 -0
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +96 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/code.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/template.py +30 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/people.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/select.py +69 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/status.py +81 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/v2/processes/connectors/notion/types/date.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/file.py +54 -0
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +45 -0
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +66 -0
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +189 -0
- unstructured_ingest/v2/processes/connectors/notion/types/user.py +79 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +447 -0
- unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +277 -0
- unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +16 -0
- unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +59 -0
- unstructured_ingest/v2/processes/connectors/qdrant/local.py +58 -0
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +160 -0
- unstructured_ingest/v2/processes/connectors/qdrant/server.py +60 -0
- unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
- unstructured_ingest/v2/processes/connectors/salesforce.py +303 -0
- unstructured_ingest/v2/processes/connectors/sharepoint.py +448 -0
- unstructured_ingest/v2/processes/connectors/slack.py +248 -0
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +27 -0
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +162 -0
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +166 -0
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +210 -0
- unstructured_ingest/v2/processes/connectors/sql/sql.py +434 -0
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +168 -0
- unstructured_ingest/v2/processes/connectors/utils.py +29 -0
- unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
- unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +165 -0
- unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/v2/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +267 -0
- unstructured_ingest/v2/processes/embedder.py +195 -0
- unstructured_ingest/v2/processes/filter.py +60 -0
- unstructured_ingest/v2/processes/partitioner.py +188 -0
- unstructured_ingest/v2/processes/uncompress.py +61 -0
- unstructured_ingest/v2/unstructured_api.py +128 -0
- unstructured_ingest/v2/utils.py +61 -0
- unstructured_ingest-0.3.13.dist-info/LICENSE.md +201 -0
- unstructured_ingest-0.3.13.dist-info/METADATA +205 -0
- unstructured_ingest-0.3.13.dist-info/RECORD +557 -0
- unstructured_ingest-0.3.13.dist-info/WHEEL +5 -0
- unstructured_ingest-0.3.13.dist-info/entry_points.txt +2 -0
- unstructured_ingest-0.3.13.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import faker
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from test.unit.v2.utils.data_generator import generate_random_dictionary
|
|
8
|
+
from unstructured_ingest.embed.huggingface import (
|
|
9
|
+
HuggingFaceEmbeddingConfig,
|
|
10
|
+
HuggingFaceEmbeddingEncoder,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
fake = faker.Faker()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def generate_embedder_config_params() -> dict:
|
|
17
|
+
params = {}
|
|
18
|
+
if random.random() < 0.5:
|
|
19
|
+
params["embed_model_name"] = fake.word() if random.random() < 0.5 else None
|
|
20
|
+
params["embedder_model_kwargs"] = (
|
|
21
|
+
generate_random_dictionary(key_type=str, value_type=Any)
|
|
22
|
+
if random.random() < 0.5
|
|
23
|
+
else None
|
|
24
|
+
)
|
|
25
|
+
params["encode_kwargs"] = (
|
|
26
|
+
generate_random_dictionary(key_type=str, value_type=Any)
|
|
27
|
+
if random.random() < 0.5
|
|
28
|
+
else None
|
|
29
|
+
)
|
|
30
|
+
params["cache_folder"] = fake.file_path() if random.random() < 0.5 else None
|
|
31
|
+
return params
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pytest.mark.parametrize(
|
|
35
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
36
|
+
)
|
|
37
|
+
def test_embedder_config(embedder_config_params: dict):
|
|
38
|
+
embedder_config = HuggingFaceEmbeddingConfig.model_validate(embedder_config_params)
|
|
39
|
+
assert embedder_config
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@pytest.mark.parametrize(
|
|
43
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
44
|
+
)
|
|
45
|
+
def test_embedder(embedder_config_params: dict):
|
|
46
|
+
embedder_config = HuggingFaceEmbeddingConfig.model_validate(embedder_config_params)
|
|
47
|
+
embedder = HuggingFaceEmbeddingEncoder(config=embedder_config)
|
|
48
|
+
assert embedder
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
import faker
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.embed.mixedbreadai import (
|
|
7
|
+
MixedbreadAIEmbeddingConfig,
|
|
8
|
+
MixedbreadAIEmbeddingEncoder,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
fake = faker.Faker()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def generate_embedder_config_params() -> dict:
|
|
15
|
+
params = {
|
|
16
|
+
"api_key": fake.password(),
|
|
17
|
+
}
|
|
18
|
+
if random.random() < 0.5:
|
|
19
|
+
params["embedder_model_name"] = fake.word()
|
|
20
|
+
return params
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@pytest.mark.parametrize(
|
|
24
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
25
|
+
)
|
|
26
|
+
def test_embedder_config(embedder_config_params: dict):
|
|
27
|
+
embedder_config = MixedbreadAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
28
|
+
assert embedder_config
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.parametrize(
|
|
32
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
33
|
+
)
|
|
34
|
+
def test_embedder(embedder_config_params: dict):
|
|
35
|
+
embedder_config = MixedbreadAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
36
|
+
embedder = MixedbreadAIEmbeddingEncoder(config=embedder_config)
|
|
37
|
+
assert embedder
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
import faker
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.embed.octoai import OctoAiEmbeddingConfig, OctoAIEmbeddingEncoder
|
|
7
|
+
|
|
8
|
+
fake = faker.Faker()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def generate_embedder_config_params() -> dict:
|
|
12
|
+
params = {
|
|
13
|
+
"api_key": fake.password(),
|
|
14
|
+
}
|
|
15
|
+
if random.random() < 0.5:
|
|
16
|
+
params["embedder_model_name"] = fake.word()
|
|
17
|
+
params["base_url"] = fake.url()
|
|
18
|
+
return params
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@pytest.mark.parametrize(
|
|
22
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
23
|
+
)
|
|
24
|
+
def test_embedder_config(embedder_config_params: dict):
|
|
25
|
+
embedder_config = OctoAiEmbeddingConfig.model_validate(embedder_config_params)
|
|
26
|
+
assert embedder_config
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.parametrize(
|
|
30
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
31
|
+
)
|
|
32
|
+
def test_embedder(embedder_config_params: dict):
|
|
33
|
+
embedder_config = OctoAiEmbeddingConfig.model_validate(embedder_config_params)
|
|
34
|
+
embedder = OctoAIEmbeddingEncoder(config=embedder_config)
|
|
35
|
+
assert embedder
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
import faker
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
|
|
7
|
+
|
|
8
|
+
fake = faker.Faker()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def generate_embedder_config_params() -> dict:
|
|
12
|
+
params = {
|
|
13
|
+
"api_key": fake.password(),
|
|
14
|
+
}
|
|
15
|
+
if random.random() < 0.5:
|
|
16
|
+
params["embedder_model_name"] = fake.word()
|
|
17
|
+
params["base_url"] = fake.url()
|
|
18
|
+
return params
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@pytest.mark.parametrize(
|
|
22
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
23
|
+
)
|
|
24
|
+
def test_embedder_config(embedder_config_params: dict):
|
|
25
|
+
embedder_config = OpenAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
26
|
+
assert embedder_config
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.parametrize(
|
|
30
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
31
|
+
)
|
|
32
|
+
def test_embedder(embedder_config_params: dict):
|
|
33
|
+
embedder_config = OpenAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
34
|
+
embedder = OpenAIEmbeddingEncoder(config=embedder_config)
|
|
35
|
+
assert embedder
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
import faker
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.embed.togetherai import (
|
|
7
|
+
TogetherAIEmbeddingConfig,
|
|
8
|
+
TogetherAIEmbeddingEncoder,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
fake = faker.Faker()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def generate_embedder_config_params() -> dict:
|
|
15
|
+
params = {
|
|
16
|
+
"api_key": fake.password(),
|
|
17
|
+
}
|
|
18
|
+
if random.random() < 0.5:
|
|
19
|
+
params["embedder_model_name"] = fake.word()
|
|
20
|
+
return params
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@pytest.mark.parametrize(
|
|
24
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
25
|
+
)
|
|
26
|
+
def test_embedder_config(embedder_config_params: dict):
|
|
27
|
+
embedder_config = TogetherAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
28
|
+
assert embedder_config
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.parametrize(
|
|
32
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
33
|
+
)
|
|
34
|
+
def test_embedder(embedder_config_params: dict):
|
|
35
|
+
embedder_config = TogetherAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
36
|
+
embedder = TogetherAIEmbeddingEncoder(config=embedder_config)
|
|
37
|
+
assert embedder
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import random
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import faker
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from test.unit.v2.utils.data_generator import generate_random_dictionary
|
|
9
|
+
from unstructured_ingest.embed.vertexai import VertexAIEmbeddingConfig, VertexAIEmbeddingEncoder
|
|
10
|
+
|
|
11
|
+
fake = faker.Faker()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def generate_embedder_config_params() -> dict:
|
|
15
|
+
params = {
|
|
16
|
+
"api_key": json.dumps(generate_random_dictionary(key_type=str, value_type=Any)),
|
|
17
|
+
}
|
|
18
|
+
if random.random() < 0.5:
|
|
19
|
+
params["embedder_model_name"] = fake.word()
|
|
20
|
+
return params
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@pytest.mark.parametrize(
|
|
24
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
25
|
+
)
|
|
26
|
+
def test_embedder_config(embedder_config_params: dict):
|
|
27
|
+
embedder_config = VertexAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
28
|
+
assert embedder_config
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.parametrize(
|
|
32
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
33
|
+
)
|
|
34
|
+
def test_embedder(embedder_config_params: dict):
|
|
35
|
+
embedder_config = VertexAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
36
|
+
embedder = VertexAIEmbeddingEncoder(config=embedder_config)
|
|
37
|
+
assert embedder
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
import faker
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.embed.voyageai import VoyageAIEmbeddingConfig, VoyageAIEmbeddingEncoder
|
|
7
|
+
|
|
8
|
+
fake = faker.Faker()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def generate_embedder_config_params() -> dict:
|
|
12
|
+
params = {
|
|
13
|
+
"api_key": fake.password(),
|
|
14
|
+
}
|
|
15
|
+
if random.random() < 0.5:
|
|
16
|
+
params["embedder_model_name"] = fake.word()
|
|
17
|
+
params["batch_size"] = fake.random_int()
|
|
18
|
+
params["truncation"] = fake.boolean()
|
|
19
|
+
params["max_retries"] = fake.random_int()
|
|
20
|
+
params["timeout_in_seconds"] = fake.random_int()
|
|
21
|
+
return params
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.mark.parametrize(
|
|
25
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
26
|
+
)
|
|
27
|
+
def test_embedder_config(embedder_config_params: dict):
|
|
28
|
+
embedder_config = VoyageAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
29
|
+
assert embedder_config
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.mark.parametrize(
|
|
33
|
+
"embedder_config_params", [generate_embedder_config_params() for i in range(10)]
|
|
34
|
+
)
|
|
35
|
+
def test_embedder(embedder_config_params: dict):
|
|
36
|
+
embedder_config = VoyageAIEmbeddingConfig.model_validate(embedder_config_params)
|
|
37
|
+
embedder = VoyageAIEmbeddingEncoder(config=embedder_config)
|
|
38
|
+
assert embedder
|
|
File without changes
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import faker
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from test.unit.v2.utils.data_generator import generate_random_dictionary
|
|
8
|
+
from unstructured_ingest.v2.processes.partitioner import Partitioner, PartitionerConfig
|
|
9
|
+
|
|
10
|
+
fake = faker.Faker()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def generate_partitioner_config_params() -> dict:
|
|
14
|
+
params = {
|
|
15
|
+
"strategy": random.choice(["fast", "hi_res", "auto"]),
|
|
16
|
+
"ocr_languages": fake.words() if random.random() < 0.5 else None,
|
|
17
|
+
"encoding": fake.word() if random.random() < 0.5 else None,
|
|
18
|
+
"additional_partition_args": (
|
|
19
|
+
generate_random_dictionary(key_type=str, value_type=Any)
|
|
20
|
+
if random.random() < 0.5
|
|
21
|
+
else None
|
|
22
|
+
),
|
|
23
|
+
"skip_infer_table_types": fake.words() if random.random() < 0.5 else None,
|
|
24
|
+
"flatten_metadata": fake.boolean(),
|
|
25
|
+
"hi_res_model_name": fake.word() if random.random() < 0.5 else None,
|
|
26
|
+
}
|
|
27
|
+
random_val = random.random()
|
|
28
|
+
# Randomly set the fields_include to a random list[str]
|
|
29
|
+
if random_val < 0.5:
|
|
30
|
+
params["fields_include"] = fake.words()
|
|
31
|
+
|
|
32
|
+
# Randomly set the metadata_exclude or metadata_include to a valid
|
|
33
|
+
# list[str] or don't set it at all
|
|
34
|
+
if random.random() < (1 / 3):
|
|
35
|
+
params["metadata_exclude"] = fake.words()
|
|
36
|
+
elif random_val < (2 / 3):
|
|
37
|
+
params["metadata_include"] = fake.words()
|
|
38
|
+
|
|
39
|
+
# Randomly set the values associated with calling the api, or not at all
|
|
40
|
+
if random.random() < 0.5:
|
|
41
|
+
params["partition_by_api"]: True
|
|
42
|
+
params["partition_endpoint"] = fake.url()
|
|
43
|
+
params["api_key"] = fake.password()
|
|
44
|
+
else:
|
|
45
|
+
params["partition_by_api"]: False
|
|
46
|
+
return params
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@pytest.mark.parametrize(
|
|
50
|
+
"partition_config_params", [generate_partitioner_config_params() for i in range(10)]
|
|
51
|
+
)
|
|
52
|
+
def test_partition_config(partition_config_params: dict):
|
|
53
|
+
partition_config = PartitionerConfig.model_validate(partition_config_params)
|
|
54
|
+
assert partition_config
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@pytest.mark.parametrize(
|
|
58
|
+
"partition_config_params", [generate_partitioner_config_params() for i in range(10)]
|
|
59
|
+
)
|
|
60
|
+
def test_partitioner(partition_config_params: dict):
|
|
61
|
+
partition_config = PartitionerConfig.model_validate(partition_config_params)
|
|
62
|
+
partitioner = Partitioner(config=partition_config)
|
|
63
|
+
assert partitioner
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from pydantic import Secret, ValidationError
|
|
3
|
+
|
|
4
|
+
from unstructured_ingest.v2.interfaces import AccessConfig, ConnectionConfig
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_failing_connection_config():
|
|
8
|
+
class MyAccessConfig(AccessConfig):
|
|
9
|
+
sensitive_value: str
|
|
10
|
+
|
|
11
|
+
class MyConnectionConfig(ConnectionConfig):
|
|
12
|
+
access_config: MyAccessConfig
|
|
13
|
+
|
|
14
|
+
with pytest.raises(ValidationError):
|
|
15
|
+
MyConnectionConfig(access_config=MyAccessConfig(sensitive_value="this"))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_happy_path_connection_config():
|
|
19
|
+
class MyAccessConfig(AccessConfig):
|
|
20
|
+
sensitive_value: str
|
|
21
|
+
|
|
22
|
+
class MyConnectionConfig(ConnectionConfig):
|
|
23
|
+
access_config: Secret[MyAccessConfig]
|
|
24
|
+
|
|
25
|
+
connection_config = MyConnectionConfig(access_config=MyAccessConfig(sensitive_value="this"))
|
|
26
|
+
assert connection_config
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field, Secret, SecretStr
|
|
5
|
+
from pydantic.types import _SecretBase
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.v2.utils import serialize_base_model, serialize_base_model_json
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MockChildBaseModel(BaseModel):
|
|
11
|
+
child_secret_str: SecretStr
|
|
12
|
+
child_secret_float: Secret[float]
|
|
13
|
+
child_not_secret_dict: dict[str, Any] = Field(default_factory=dict)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MockBaseModel(BaseModel):
|
|
17
|
+
secret_str: SecretStr
|
|
18
|
+
not_secret_bool: bool
|
|
19
|
+
secret_child_base: Secret[MockChildBaseModel]
|
|
20
|
+
not_secret_list: list[int] = Field(default_factory=list)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
model = MockBaseModel(
|
|
24
|
+
secret_str="secret string",
|
|
25
|
+
not_secret_bool=False,
|
|
26
|
+
secret_child_base=MockChildBaseModel(
|
|
27
|
+
child_secret_str="child secret string",
|
|
28
|
+
child_secret_float=3.14,
|
|
29
|
+
child_not_secret_dict={"key": "value"},
|
|
30
|
+
),
|
|
31
|
+
not_secret_list=[1, 2, 3],
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_serialize_base_model():
|
|
36
|
+
|
|
37
|
+
serialized_dict = model.model_dump()
|
|
38
|
+
assert isinstance(serialized_dict["secret_str"], _SecretBase)
|
|
39
|
+
assert isinstance(serialized_dict["secret_child_base"], _SecretBase)
|
|
40
|
+
|
|
41
|
+
serialized_dict_w_secrets = serialize_base_model(model=model)
|
|
42
|
+
assert not isinstance(serialized_dict_w_secrets["secret_str"], _SecretBase)
|
|
43
|
+
assert not isinstance(serialized_dict_w_secrets["secret_child_base"], _SecretBase)
|
|
44
|
+
|
|
45
|
+
expected_dict = {
|
|
46
|
+
"secret_str": "secret string",
|
|
47
|
+
"not_secret_bool": False,
|
|
48
|
+
"secret_child_base": {
|
|
49
|
+
"child_secret_str": "child secret string",
|
|
50
|
+
"child_secret_float": 3.14,
|
|
51
|
+
"child_not_secret_dict": {"key": "value"},
|
|
52
|
+
},
|
|
53
|
+
"not_secret_list": [1, 2, 3],
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
assert serialized_dict_w_secrets == expected_dict
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_serialize_base_model_json():
|
|
60
|
+
serialized_json = model.model_dump_json()
|
|
61
|
+
serialized_dict = json.loads(serialized_json)
|
|
62
|
+
expected_dict = {
|
|
63
|
+
"secret_str": "**********",
|
|
64
|
+
"not_secret_bool": False,
|
|
65
|
+
"secret_child_base": "**********",
|
|
66
|
+
"not_secret_list": [1, 2, 3],
|
|
67
|
+
}
|
|
68
|
+
assert expected_dict == serialized_dict
|
|
69
|
+
|
|
70
|
+
serialized_json_w_secrets = serialize_base_model_json(model=model)
|
|
71
|
+
serialized_dict_w_secrets = json.loads(serialized_json_w_secrets)
|
|
72
|
+
expected_dict_w_secrets = {
|
|
73
|
+
"secret_str": "secret string",
|
|
74
|
+
"not_secret_bool": False,
|
|
75
|
+
"secret_child_base": {
|
|
76
|
+
"child_secret_str": "child secret string",
|
|
77
|
+
"child_secret_float": 3.14,
|
|
78
|
+
"child_not_secret_dict": {"key": "value"},
|
|
79
|
+
},
|
|
80
|
+
"not_secret_list": [1, 2, 3],
|
|
81
|
+
}
|
|
82
|
+
assert expected_dict_w_secrets == serialized_dict_w_secrets
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from typing import Any, Type
|
|
3
|
+
|
|
4
|
+
from faker import Faker
|
|
5
|
+
|
|
6
|
+
fake = Faker()
|
|
7
|
+
|
|
8
|
+
type_to_random_value_map = {
|
|
9
|
+
str: fake.sentence,
|
|
10
|
+
int: fake.random_int,
|
|
11
|
+
float: fake.random_digit,
|
|
12
|
+
bool: fake.boolean,
|
|
13
|
+
}
|
|
14
|
+
type_to_random_value_map_key = type_to_random_value_map.copy()
|
|
15
|
+
type_to_random_value_map_key[str] = fake.word
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def generate_random_dictionary(key_type: Type = str, value_type: Type = str) -> dict:
|
|
19
|
+
d = {}
|
|
20
|
+
num_keys = random.randint(1, 3)
|
|
21
|
+
for i in range(num_keys):
|
|
22
|
+
key = type_to_random_value_map_key[key_type]()
|
|
23
|
+
current_value_type = value_type
|
|
24
|
+
if current_value_type == Any:
|
|
25
|
+
current_value_type = random.choice(list(type_to_random_value_map.keys()) + [dict])
|
|
26
|
+
value = (
|
|
27
|
+
generate_random_dictionary(key_type=key_type, value_type=value_type)
|
|
28
|
+
if current_value_type is dict
|
|
29
|
+
else type_to_random_value_map[current_value_type]()
|
|
30
|
+
)
|
|
31
|
+
d[key] = value
|
|
32
|
+
return d
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.13" # pragma: no cover
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from unstructured_ingest.cli.cmds import base_dest_cmd_fns, base_src_cmd_fns
|
|
6
|
+
|
|
7
|
+
src: t.List[click.Group] = [v().get_src_cmd() for v in base_src_cmd_fns]
|
|
8
|
+
|
|
9
|
+
dest: t.List[click.Command] = [v().get_dest_cmd() for v in base_dest_cmd_fns]
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"src",
|
|
13
|
+
"dest",
|
|
14
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
from unstructured_ingest.cli.interfaces import CliConfig
|
|
6
|
+
from unstructured_ingest.interfaces import BaseConfig
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class BaseCmd(ABC):
|
|
11
|
+
cmd_name: str
|
|
12
|
+
cli_config: t.Optional[t.Type[BaseConfig]] = None
|
|
13
|
+
additional_cli_options: t.List[t.Type[CliConfig]] = field(default_factory=list)
|
|
14
|
+
addition_configs: t.Dict[str, t.Type[BaseConfig]] = field(default_factory=dict)
|
|
15
|
+
is_fsspec: bool = False
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def cmd_name_key(self):
|
|
19
|
+
return self.cmd_name.replace("-", "_")
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import typing as t
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.cli.base.cmd import BaseCmd
|
|
8
|
+
from unstructured_ingest.cli.cmd_factory import get_src_cmd
|
|
9
|
+
from unstructured_ingest.cli.common import (
|
|
10
|
+
log_options,
|
|
11
|
+
)
|
|
12
|
+
from unstructured_ingest.cli.interfaces import BaseConfig, CliFilesStorageConfig
|
|
13
|
+
from unstructured_ingest.cli.utils import (
|
|
14
|
+
add_options,
|
|
15
|
+
conform_click_options,
|
|
16
|
+
extract_config,
|
|
17
|
+
extract_configs,
|
|
18
|
+
)
|
|
19
|
+
from unstructured_ingest.logger import ingest_log_streaming_init, logger
|
|
20
|
+
from unstructured_ingest.runner.writers import writer_map
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class BaseDestCmd(BaseCmd):
|
|
25
|
+
write_config: t.Optional[t.Type[BaseConfig]] = None
|
|
26
|
+
|
|
27
|
+
def get_dest_runner(self, source_cmd: str, options: dict, parent_options: dict):
|
|
28
|
+
src_cmd_fn = get_src_cmd(cmd_name=source_cmd)
|
|
29
|
+
src_cmd = src_cmd_fn()
|
|
30
|
+
runner = src_cmd.get_source_runner(options=parent_options)
|
|
31
|
+
addition_configs = self.addition_configs
|
|
32
|
+
if "connector_config" not in addition_configs:
|
|
33
|
+
addition_configs["connector_config"] = self.cli_config
|
|
34
|
+
if self.write_config:
|
|
35
|
+
addition_configs["write_config"] = self.write_config
|
|
36
|
+
configs = extract_configs(
|
|
37
|
+
options,
|
|
38
|
+
validate=[self.cli_config] if self.cli_config else None,
|
|
39
|
+
extras=addition_configs,
|
|
40
|
+
add_defaults=False,
|
|
41
|
+
)
|
|
42
|
+
writer_cls = writer_map[self.cmd_name_key]
|
|
43
|
+
writer = writer_cls(**configs) # type: ignore
|
|
44
|
+
runner.writer = writer
|
|
45
|
+
runner.writer_kwargs = options
|
|
46
|
+
return runner
|
|
47
|
+
|
|
48
|
+
def check_dest_options(self, options: dict):
|
|
49
|
+
extract_config(flat_data=options, config=self.cli_config)
|
|
50
|
+
|
|
51
|
+
def dest(self, ctx: click.Context, **options):
|
|
52
|
+
if not ctx.parent:
|
|
53
|
+
raise click.ClickException("destination command called without a parent")
|
|
54
|
+
if not ctx.parent.info_name:
|
|
55
|
+
raise click.ClickException("parent command missing info name")
|
|
56
|
+
source_cmd = ctx.parent.info_name.replace("-", "_")
|
|
57
|
+
parent_options: dict = ctx.parent.params if ctx.parent else {}
|
|
58
|
+
conform_click_options(options)
|
|
59
|
+
verbose = parent_options.get("verbose", False)
|
|
60
|
+
ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
|
|
61
|
+
log_options(parent_options, verbose=verbose)
|
|
62
|
+
log_options(options, verbose=verbose)
|
|
63
|
+
try:
|
|
64
|
+
self.check_dest_options(options=options)
|
|
65
|
+
runner = self.get_dest_runner(
|
|
66
|
+
source_cmd=source_cmd,
|
|
67
|
+
options=options,
|
|
68
|
+
parent_options=parent_options,
|
|
69
|
+
)
|
|
70
|
+
runner.run(**parent_options)
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error(e, exc_info=True)
|
|
73
|
+
raise click.ClickException(str(e)) from e
|
|
74
|
+
|
|
75
|
+
def get_dest_cmd(self) -> click.Command:
|
|
76
|
+
# Dynamically create the command without the use of click decorators
|
|
77
|
+
fn = self.dest
|
|
78
|
+
fn = click.pass_context(fn)
|
|
79
|
+
cmd: click.Group = click.command(fn)
|
|
80
|
+
cmd.name = self.cmd_name
|
|
81
|
+
cmd.invoke_without_command = True
|
|
82
|
+
options = [self.cli_config] if self.cli_config else []
|
|
83
|
+
options += self.additional_cli_options
|
|
84
|
+
if self.is_fsspec and CliFilesStorageConfig not in options:
|
|
85
|
+
options.append(CliFilesStorageConfig)
|
|
86
|
+
add_options(cmd, extras=options, is_src=False)
|
|
87
|
+
return cmd
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.cli.base.cmd import BaseCmd
|
|
7
|
+
from unstructured_ingest.cli.common import (
|
|
8
|
+
log_options,
|
|
9
|
+
)
|
|
10
|
+
from unstructured_ingest.cli.interfaces import CliFilesStorageConfig
|
|
11
|
+
from unstructured_ingest.cli.utils import Group, add_options, conform_click_options, extract_configs
|
|
12
|
+
from unstructured_ingest.logger import ingest_log_streaming_init, logger
|
|
13
|
+
from unstructured_ingest.runner import runner_map
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class BaseSrcCmd(BaseCmd):
|
|
18
|
+
def get_source_runner(self, options: dict):
|
|
19
|
+
addition_configs = self.addition_configs
|
|
20
|
+
if "connector_config" not in addition_configs:
|
|
21
|
+
addition_configs["connector_config"] = self.cli_config
|
|
22
|
+
configs = extract_configs(
|
|
23
|
+
options,
|
|
24
|
+
validate=[self.cli_config] if self.cli_config else None,
|
|
25
|
+
extras=addition_configs,
|
|
26
|
+
)
|
|
27
|
+
runner = runner_map[self.cmd_name_key]
|
|
28
|
+
return runner(**configs) # type: ignore
|
|
29
|
+
|
|
30
|
+
def src(self, ctx: click.Context, **options):
|
|
31
|
+
if ctx.invoked_subcommand:
|
|
32
|
+
return
|
|
33
|
+
|
|
34
|
+
conform_click_options(options)
|
|
35
|
+
verbose = options.get("verbose", False)
|
|
36
|
+
ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
|
|
37
|
+
log_options(options, verbose=verbose)
|
|
38
|
+
try:
|
|
39
|
+
runner = self.get_source_runner(options=options)
|
|
40
|
+
runner.run(**options)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
logger.error(e, exc_info=True)
|
|
43
|
+
raise click.ClickException(str(e)) from e
|
|
44
|
+
|
|
45
|
+
def get_src_cmd(self) -> click.Group:
|
|
46
|
+
# Dynamically create the command without the use of click decorators
|
|
47
|
+
fn = self.src
|
|
48
|
+
fn = click.pass_context(fn)
|
|
49
|
+
cmd: click.Group = click.group(fn, cls=Group)
|
|
50
|
+
cmd.name = self.cmd_name
|
|
51
|
+
cmd.invoke_without_command = True
|
|
52
|
+
extra_options = [self.cli_config] if self.cli_config else []
|
|
53
|
+
extra_options += self.additional_cli_options
|
|
54
|
+
if self.is_fsspec and CliFilesStorageConfig not in extra_options:
|
|
55
|
+
extra_options.append(CliFilesStorageConfig)
|
|
56
|
+
add_options(cmd, extras=extra_options)
|
|
57
|
+
return cmd
|