unstructured-ingest 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/__init__.py +0 -0
- test/integration/__init__.py +0 -0
- test/integration/chunkers/__init__.py +0 -0
- test/integration/chunkers/test_chunkers.py +31 -0
- test/integration/connectors/__init__.py +0 -0
- test/integration/connectors/conftest.py +38 -0
- test/integration/connectors/databricks/__init__.py +0 -0
- test/integration/connectors/databricks/test_volumes_native.py +269 -0
- test/integration/connectors/discord/__init__.py +0 -0
- test/integration/connectors/discord/test_discord.py +90 -0
- test/integration/connectors/duckdb/__init__.py +0 -0
- test/integration/connectors/duckdb/conftest.py +14 -0
- test/integration/connectors/duckdb/test_duckdb.py +89 -0
- test/integration/connectors/duckdb/test_motherduck.py +95 -0
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +34 -0
- test/integration/connectors/elasticsearch/test_elasticsearch.py +330 -0
- test/integration/connectors/elasticsearch/test_opensearch.py +325 -0
- test/integration/connectors/sql/__init__.py +0 -0
- test/integration/connectors/sql/test_postgres.py +195 -0
- test/integration/connectors/sql/test_singlestore.py +176 -0
- test/integration/connectors/sql/test_snowflake.py +238 -0
- test/integration/connectors/sql/test_sqlite.py +162 -0
- test/integration/connectors/test_astradb.py +217 -0
- test/integration/connectors/test_azure_ai_search.py +255 -0
- test/integration/connectors/test_chroma.py +120 -0
- test/integration/connectors/test_confluence.py +113 -0
- test/integration/connectors/test_delta_table.py +185 -0
- test/integration/connectors/test_lancedb.py +247 -0
- test/integration/connectors/test_milvus.py +203 -0
- test/integration/connectors/test_mongodb.py +335 -0
- test/integration/connectors/test_neo4j.py +236 -0
- test/integration/connectors/test_notion.py +145 -0
- test/integration/connectors/test_onedrive.py +118 -0
- test/integration/connectors/test_pinecone.py +288 -0
- test/integration/connectors/test_qdrant.py +215 -0
- test/integration/connectors/test_redis.py +119 -0
- test/integration/connectors/test_s3.py +183 -0
- test/integration/connectors/test_vectara.py +270 -0
- test/integration/connectors/utils/__init__.py +0 -0
- test/integration/connectors/utils/constants.py +7 -0
- test/integration/connectors/utils/docker.py +151 -0
- test/integration/connectors/utils/docker_compose.py +59 -0
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +75 -0
- test/integration/connectors/utils/validation/equality.py +75 -0
- test/integration/connectors/utils/validation/source.py +299 -0
- test/integration/connectors/utils/validation/utils.py +36 -0
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +15 -0
- test/integration/connectors/weaviate/test_cloud.py +34 -0
- test/integration/connectors/weaviate/test_local.py +131 -0
- test/integration/embedders/__init__.py +0 -0
- test/integration/embedders/conftest.py +13 -0
- test/integration/embedders/test_azure_openai.py +59 -0
- test/integration/embedders/test_bedrock.py +103 -0
- test/integration/embedders/test_huggingface.py +26 -0
- test/integration/embedders/test_mixedbread.py +71 -0
- test/integration/embedders/test_octoai.py +77 -0
- test/integration/embedders/test_openai.py +76 -0
- test/integration/embedders/test_togetherai.py +71 -0
- test/integration/embedders/test_vertexai.py +65 -0
- test/integration/embedders/test_voyageai.py +65 -0
- test/integration/embedders/utils.py +68 -0
- test/integration/partitioners/__init__.py +0 -0
- test/integration/partitioners/test_partitioner.py +75 -0
- test/integration/utils.py +15 -0
- test/unit/__init__.py +0 -0
- test/unit/embed/__init__.py +0 -0
- test/unit/embed/test_mixedbreadai.py +42 -0
- test/unit/embed/test_octoai.py +27 -0
- test/unit/embed/test_openai.py +20 -0
- test/unit/embed/test_vertexai.py +25 -0
- test/unit/embed/test_voyageai.py +24 -0
- test/unit/test_error.py +27 -0
- test/unit/test_logger.py +78 -0
- test/unit/test_utils.py +184 -0
- test/unit/v2/__init__.py +0 -0
- test/unit/v2/chunkers/__init__.py +0 -0
- test/unit/v2/chunkers/test_chunkers.py +49 -0
- test/unit/v2/connectors/__init__.py +0 -0
- test/unit/v2/connectors/test_confluence.py +39 -0
- test/unit/v2/embedders/__init__.py +0 -0
- test/unit/v2/embedders/test_bedrock.py +36 -0
- test/unit/v2/embedders/test_huggingface.py +48 -0
- test/unit/v2/embedders/test_mixedbread.py +37 -0
- test/unit/v2/embedders/test_octoai.py +35 -0
- test/unit/v2/embedders/test_openai.py +35 -0
- test/unit/v2/embedders/test_togetherai.py +37 -0
- test/unit/v2/embedders/test_vertexai.py +37 -0
- test/unit/v2/embedders/test_voyageai.py +38 -0
- test/unit/v2/partitioners/__init__.py +0 -0
- test/unit/v2/partitioners/test_partitioner.py +63 -0
- test/unit/v2/test_interfaces.py +26 -0
- test/unit/v2/test_utils.py +82 -0
- test/unit/v2/utils/__init__.py +0 -0
- test/unit/v2/utils/data_generator.py +32 -0
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/__init__.py +14 -0
- unstructured_ingest/cli/base/__init__.py +0 -0
- unstructured_ingest/cli/base/cmd.py +19 -0
- unstructured_ingest/cli/base/dest.py +87 -0
- unstructured_ingest/cli/base/src.py +57 -0
- unstructured_ingest/cli/cli.py +37 -0
- unstructured_ingest/cli/cmd_factory.py +12 -0
- unstructured_ingest/cli/cmds/__init__.py +145 -0
- unstructured_ingest/cli/cmds/airtable.py +69 -0
- unstructured_ingest/cli/cmds/astradb.py +99 -0
- unstructured_ingest/cli/cmds/azure_ai_search.py +65 -0
- unstructured_ingest/cli/cmds/biomed.py +52 -0
- unstructured_ingest/cli/cmds/chroma.py +104 -0
- unstructured_ingest/cli/cmds/clarifai.py +71 -0
- unstructured_ingest/cli/cmds/confluence.py +69 -0
- unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
- unstructured_ingest/cli/cmds/delta_table.py +94 -0
- unstructured_ingest/cli/cmds/discord.py +47 -0
- unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
- unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
- unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
- unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
- unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
- unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
- unstructured_ingest/cli/cmds/github.py +54 -0
- unstructured_ingest/cli/cmds/gitlab.py +54 -0
- unstructured_ingest/cli/cmds/google_drive.py +49 -0
- unstructured_ingest/cli/cmds/hubspot.py +70 -0
- unstructured_ingest/cli/cmds/jira.py +71 -0
- unstructured_ingest/cli/cmds/kafka.py +102 -0
- unstructured_ingest/cli/cmds/local.py +43 -0
- unstructured_ingest/cli/cmds/mongodb.py +72 -0
- unstructured_ingest/cli/cmds/notion.py +48 -0
- unstructured_ingest/cli/cmds/onedrive.py +66 -0
- unstructured_ingest/cli/cmds/opensearch.py +117 -0
- unstructured_ingest/cli/cmds/outlook.py +67 -0
- unstructured_ingest/cli/cmds/pinecone.py +71 -0
- unstructured_ingest/cli/cmds/qdrant.py +124 -0
- unstructured_ingest/cli/cmds/reddit.py +67 -0
- unstructured_ingest/cli/cmds/salesforce.py +58 -0
- unstructured_ingest/cli/cmds/sharepoint.py +66 -0
- unstructured_ingest/cli/cmds/slack.py +56 -0
- unstructured_ingest/cli/cmds/sql.py +66 -0
- unstructured_ingest/cli/cmds/vectara.py +66 -0
- unstructured_ingest/cli/cmds/weaviate.py +98 -0
- unstructured_ingest/cli/cmds/wikipedia.py +40 -0
- unstructured_ingest/cli/common.py +7 -0
- unstructured_ingest/cli/interfaces.py +663 -0
- unstructured_ingest/cli/utils.py +205 -0
- unstructured_ingest/connector/__init__.py +0 -0
- unstructured_ingest/connector/airtable.py +309 -0
- unstructured_ingest/connector/astradb.py +267 -0
- unstructured_ingest/connector/azure_ai_search.py +144 -0
- unstructured_ingest/connector/biomed.py +320 -0
- unstructured_ingest/connector/chroma.py +158 -0
- unstructured_ingest/connector/clarifai.py +122 -0
- unstructured_ingest/connector/confluence.py +285 -0
- unstructured_ingest/connector/databricks_volumes.py +137 -0
- unstructured_ingest/connector/delta_table.py +203 -0
- unstructured_ingest/connector/discord.py +180 -0
- unstructured_ingest/connector/elasticsearch.py +396 -0
- unstructured_ingest/connector/fsspec/__init__.py +0 -0
- unstructured_ingest/connector/fsspec/azure.py +78 -0
- unstructured_ingest/connector/fsspec/box.py +109 -0
- unstructured_ingest/connector/fsspec/dropbox.py +160 -0
- unstructured_ingest/connector/fsspec/fsspec.py +359 -0
- unstructured_ingest/connector/fsspec/gcs.py +82 -0
- unstructured_ingest/connector/fsspec/s3.py +62 -0
- unstructured_ingest/connector/fsspec/sftp.py +81 -0
- unstructured_ingest/connector/git.py +124 -0
- unstructured_ingest/connector/github.py +174 -0
- unstructured_ingest/connector/gitlab.py +142 -0
- unstructured_ingest/connector/google_drive.py +348 -0
- unstructured_ingest/connector/hubspot.py +278 -0
- unstructured_ingest/connector/jira.py +469 -0
- unstructured_ingest/connector/kafka.py +293 -0
- unstructured_ingest/connector/local.py +139 -0
- unstructured_ingest/connector/mongodb.py +284 -0
- unstructured_ingest/connector/notion/__init__.py +0 -0
- unstructured_ingest/connector/notion/client.py +248 -0
- unstructured_ingest/connector/notion/connector.py +469 -0
- unstructured_ingest/connector/notion/helpers.py +584 -0
- unstructured_ingest/connector/notion/interfaces.py +32 -0
- unstructured_ingest/connector/notion/types/__init__.py +0 -0
- unstructured_ingest/connector/notion/types/block.py +96 -0
- unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
- unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
- unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
- unstructured_ingest/connector/notion/types/database.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
- unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
- unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/connector/notion/types/date.py +26 -0
- unstructured_ingest/connector/notion/types/file.py +51 -0
- unstructured_ingest/connector/notion/types/page.py +45 -0
- unstructured_ingest/connector/notion/types/parent.py +66 -0
- unstructured_ingest/connector/notion/types/rich_text.py +189 -0
- unstructured_ingest/connector/notion/types/user.py +76 -0
- unstructured_ingest/connector/onedrive.py +232 -0
- unstructured_ingest/connector/opensearch.py +218 -0
- unstructured_ingest/connector/outlook.py +285 -0
- unstructured_ingest/connector/pinecone.py +140 -0
- unstructured_ingest/connector/qdrant.py +144 -0
- unstructured_ingest/connector/reddit.py +166 -0
- unstructured_ingest/connector/registry.py +109 -0
- unstructured_ingest/connector/salesforce.py +301 -0
- unstructured_ingest/connector/sharepoint.py +573 -0
- unstructured_ingest/connector/slack.py +224 -0
- unstructured_ingest/connector/sql.py +199 -0
- unstructured_ingest/connector/vectara.py +253 -0
- unstructured_ingest/connector/weaviate.py +190 -0
- unstructured_ingest/connector/wikipedia.py +208 -0
- unstructured_ingest/embed/__init__.py +0 -0
- unstructured_ingest/embed/azure_openai.py +31 -0
- unstructured_ingest/embed/bedrock.py +193 -0
- unstructured_ingest/embed/huggingface.py +52 -0
- unstructured_ingest/embed/interfaces.py +117 -0
- unstructured_ingest/embed/mixedbreadai.py +233 -0
- unstructured_ingest/embed/octoai.py +130 -0
- unstructured_ingest/embed/openai.py +116 -0
- unstructured_ingest/embed/togetherai.py +106 -0
- unstructured_ingest/embed/vertexai.py +126 -0
- unstructured_ingest/embed/voyageai.py +130 -0
- unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
- unstructured_ingest/enhanced_dataclass/core.py +99 -0
- unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
- unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
- unstructured_ingest/error.py +49 -0
- unstructured_ingest/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/ingest_backoff/_common.py +102 -0
- unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
- unstructured_ingest/interfaces.py +852 -0
- unstructured_ingest/logger.py +130 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/pipeline/__init__.py +22 -0
- unstructured_ingest/pipeline/copy.py +19 -0
- unstructured_ingest/pipeline/doc_factory.py +12 -0
- unstructured_ingest/pipeline/interfaces.py +270 -0
- unstructured_ingest/pipeline/partition.py +60 -0
- unstructured_ingest/pipeline/permissions.py +12 -0
- unstructured_ingest/pipeline/pipeline.py +117 -0
- unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- unstructured_ingest/pipeline/reformat/chunking.py +134 -0
- unstructured_ingest/pipeline/reformat/embedding.py +64 -0
- unstructured_ingest/pipeline/source.py +77 -0
- unstructured_ingest/pipeline/utils.py +6 -0
- unstructured_ingest/pipeline/write.py +18 -0
- unstructured_ingest/processor.py +93 -0
- unstructured_ingest/runner/__init__.py +104 -0
- unstructured_ingest/runner/airtable.py +35 -0
- unstructured_ingest/runner/astradb.py +34 -0
- unstructured_ingest/runner/base_runner.py +89 -0
- unstructured_ingest/runner/biomed.py +45 -0
- unstructured_ingest/runner/confluence.py +35 -0
- unstructured_ingest/runner/delta_table.py +34 -0
- unstructured_ingest/runner/discord.py +35 -0
- unstructured_ingest/runner/elasticsearch.py +40 -0
- unstructured_ingest/runner/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/fsspec/azure.py +30 -0
- unstructured_ingest/runner/fsspec/box.py +28 -0
- unstructured_ingest/runner/fsspec/dropbox.py +30 -0
- unstructured_ingest/runner/fsspec/fsspec.py +40 -0
- unstructured_ingest/runner/fsspec/gcs.py +28 -0
- unstructured_ingest/runner/fsspec/s3.py +28 -0
- unstructured_ingest/runner/fsspec/sftp.py +28 -0
- unstructured_ingest/runner/github.py +37 -0
- unstructured_ingest/runner/gitlab.py +37 -0
- unstructured_ingest/runner/google_drive.py +35 -0
- unstructured_ingest/runner/hubspot.py +35 -0
- unstructured_ingest/runner/jira.py +35 -0
- unstructured_ingest/runner/kafka.py +34 -0
- unstructured_ingest/runner/local.py +23 -0
- unstructured_ingest/runner/mongodb.py +34 -0
- unstructured_ingest/runner/notion.py +61 -0
- unstructured_ingest/runner/onedrive.py +35 -0
- unstructured_ingest/runner/opensearch.py +40 -0
- unstructured_ingest/runner/outlook.py +33 -0
- unstructured_ingest/runner/reddit.py +35 -0
- unstructured_ingest/runner/salesforce.py +33 -0
- unstructured_ingest/runner/sharepoint.py +35 -0
- unstructured_ingest/runner/slack.py +33 -0
- unstructured_ingest/runner/utils.py +47 -0
- unstructured_ingest/runner/wikipedia.py +35 -0
- unstructured_ingest/runner/writers/__init__.py +48 -0
- unstructured_ingest/runner/writers/astradb.py +22 -0
- unstructured_ingest/runner/writers/azure_ai_search.py +24 -0
- unstructured_ingest/runner/writers/base_writer.py +26 -0
- unstructured_ingest/runner/writers/chroma.py +22 -0
- unstructured_ingest/runner/writers/clarifai.py +19 -0
- unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
- unstructured_ingest/runner/writers/delta_table.py +24 -0
- unstructured_ingest/runner/writers/elasticsearch.py +24 -0
- unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
- unstructured_ingest/runner/writers/fsspec/box.py +21 -0
- unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
- unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
- unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
- unstructured_ingest/runner/writers/kafka.py +21 -0
- unstructured_ingest/runner/writers/mongodb.py +21 -0
- unstructured_ingest/runner/writers/opensearch.py +26 -0
- unstructured_ingest/runner/writers/pinecone.py +21 -0
- unstructured_ingest/runner/writers/qdrant.py +19 -0
- unstructured_ingest/runner/writers/sql.py +22 -0
- unstructured_ingest/runner/writers/vectara.py +22 -0
- unstructured_ingest/runner/writers/weaviate.py +21 -0
- unstructured_ingest/utils/__init__.py +0 -0
- unstructured_ingest/utils/chunking.py +56 -0
- unstructured_ingest/utils/compression.py +118 -0
- unstructured_ingest/utils/data_prep.py +200 -0
- unstructured_ingest/utils/dep_check.py +78 -0
- unstructured_ingest/utils/google_filetype.py +9 -0
- unstructured_ingest/utils/string_and_date_utils.py +49 -0
- unstructured_ingest/utils/table.py +73 -0
- unstructured_ingest/v2/__init__.py +1 -0
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +4 -0
- unstructured_ingest/v2/cli/base/cmd.py +269 -0
- unstructured_ingest/v2/cli/base/dest.py +85 -0
- unstructured_ingest/v2/cli/base/importer.py +34 -0
- unstructured_ingest/v2/cli/base/src.py +85 -0
- unstructured_ingest/v2/cli/cli.py +24 -0
- unstructured_ingest/v2/cli/cmds.py +14 -0
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/cli/utils/click.py +237 -0
- unstructured_ingest/v2/cli/utils/model_conversion.py +222 -0
- unstructured_ingest/v2/constants.py +2 -0
- unstructured_ingest/v2/errors.py +18 -0
- unstructured_ingest/v2/interfaces/__init__.py +32 -0
- unstructured_ingest/v2/interfaces/connector.py +50 -0
- unstructured_ingest/v2/interfaces/downloader.py +89 -0
- unstructured_ingest/v2/interfaces/file_data.py +116 -0
- unstructured_ingest/v2/interfaces/indexer.py +30 -0
- unstructured_ingest/v2/interfaces/process.py +19 -0
- unstructured_ingest/v2/interfaces/processor.py +88 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +102 -0
- unstructured_ingest/v2/interfaces/uploader.py +53 -0
- unstructured_ingest/v2/logger.py +126 -0
- unstructured_ingest/v2/main.py +11 -0
- unstructured_ingest/v2/otel.py +111 -0
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +211 -0
- unstructured_ingest/v2/pipeline/otel.py +32 -0
- unstructured_ingest/v2/pipeline/pipeline.py +384 -0
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/steps/chunk.py +80 -0
- unstructured_ingest/v2/pipeline/steps/download.py +207 -0
- unstructured_ingest/v2/pipeline/steps/embed.py +79 -0
- unstructured_ingest/v2/pipeline/steps/filter.py +35 -0
- unstructured_ingest/v2/pipeline/steps/index.py +86 -0
- unstructured_ingest/v2/pipeline/steps/partition.py +79 -0
- unstructured_ingest/v2/pipeline/steps/stage.py +65 -0
- unstructured_ingest/v2/pipeline/steps/uncompress.py +50 -0
- unstructured_ingest/v2/pipeline/steps/upload.py +58 -0
- unstructured_ingest/v2/processes/__init__.py +18 -0
- unstructured_ingest/v2/processes/chunker.py +124 -0
- unstructured_ingest/v2/processes/connector_registry.py +69 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +117 -0
- unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
- unstructured_ingest/v2/processes/connectors/astradb.py +402 -0
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +276 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +190 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +207 -0
- unstructured_ingest/v2/processes/connectors/couchbase.py +334 -0
- unstructured_ingest/v2/processes/connectors/databricks/__init__.py +52 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +208 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +87 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +102 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +85 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +86 -0
- unstructured_ingest/v2/processes/connectors/delta_table.py +191 -0
- unstructured_ingest/v2/processes/connectors/discord.py +158 -0
- unstructured_ingest/v2/processes/connectors/duckdb/__init__.py +15 -0
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +100 -0
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +127 -0
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +126 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +470 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +195 -0
- unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +170 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +168 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +332 -0
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +185 -0
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +171 -0
- unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/v2/processes/connectors/gitlab.py +268 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +348 -0
- unstructured_ingest/v2/processes/connectors/kafka/__init__.py +17 -0
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py +121 -0
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +273 -0
- unstructured_ingest/v2/processes/connectors/kafka/local.py +103 -0
- unstructured_ingest/v2/processes/connectors/kdbai.py +148 -0
- unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +30 -0
- unstructured_ingest/v2/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +42 -0
- unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +169 -0
- unstructured_ingest/v2/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/v2/processes/connectors/local.py +217 -0
- unstructured_ingest/v2/processes/connectors/milvus.py +225 -0
- unstructured_ingest/v2/processes/connectors/mongodb.py +361 -0
- unstructured_ingest/v2/processes/connectors/neo4j.py +385 -0
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/client.py +349 -0
- unstructured_ingest/v2/processes/connectors/notion/connector.py +346 -0
- unstructured_ingest/v2/processes/connectors/notion/helpers.py +448 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +32 -0
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +96 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/code.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/template.py +30 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/people.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/select.py +69 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/status.py +81 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/v2/processes/connectors/notion/types/date.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/file.py +54 -0
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +45 -0
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +66 -0
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +189 -0
- unstructured_ingest/v2/processes/connectors/notion/types/user.py +79 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +447 -0
- unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +277 -0
- unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +16 -0
- unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +59 -0
- unstructured_ingest/v2/processes/connectors/qdrant/local.py +58 -0
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +160 -0
- unstructured_ingest/v2/processes/connectors/qdrant/server.py +60 -0
- unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
- unstructured_ingest/v2/processes/connectors/salesforce.py +303 -0
- unstructured_ingest/v2/processes/connectors/sharepoint.py +448 -0
- unstructured_ingest/v2/processes/connectors/slack.py +248 -0
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +27 -0
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +162 -0
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +166 -0
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +210 -0
- unstructured_ingest/v2/processes/connectors/sql/sql.py +434 -0
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +168 -0
- unstructured_ingest/v2/processes/connectors/utils.py +29 -0
- unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
- unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +165 -0
- unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/v2/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +267 -0
- unstructured_ingest/v2/processes/embedder.py +195 -0
- unstructured_ingest/v2/processes/filter.py +60 -0
- unstructured_ingest/v2/processes/partitioner.py +188 -0
- unstructured_ingest/v2/processes/uncompress.py +61 -0
- unstructured_ingest/v2/unstructured_api.py +128 -0
- unstructured_ingest/v2/utils.py +61 -0
- unstructured_ingest-0.3.13.dist-info/LICENSE.md +201 -0
- unstructured_ingest-0.3.13.dist-info/METADATA +205 -0
- unstructured_ingest-0.3.13.dist-info/RECORD +557 -0
- unstructured_ingest-0.3.13.dist-info/WHEEL +5 -0
- unstructured_ingest-0.3.13.dist-info/entry_points.txt +2 -0
- unstructured_ingest-0.3.13.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
|
|
4
|
+
from dataclasses_json.core import Json
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.connector.elasticsearch import (
|
|
7
|
+
ElasticsearchDestinationConnector,
|
|
8
|
+
ElasticsearchDocumentMeta,
|
|
9
|
+
ElasticsearchIngestDoc,
|
|
10
|
+
ElasticsearchIngestDocBatch,
|
|
11
|
+
ElasticsearchSourceConnector,
|
|
12
|
+
SimpleElasticsearchConfig,
|
|
13
|
+
)
|
|
14
|
+
from unstructured_ingest.enhanced_dataclass import enhanced_field
|
|
15
|
+
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
|
|
16
|
+
from unstructured_ingest.interfaces import AccessConfig, BaseSingleIngestDoc
|
|
17
|
+
from unstructured_ingest.logger import logger
|
|
18
|
+
from unstructured_ingest.utils.data_prep import flatten_dict, generator_batching_wbytes
|
|
19
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
20
|
+
|
|
21
|
+
if t.TYPE_CHECKING:
|
|
22
|
+
from opensearchpy import OpenSearch
|
|
23
|
+
|
|
24
|
+
"""Since the actual OpenSearch project is a fork of Elasticsearch, we are relying
|
|
25
|
+
heavily on the Elasticsearch connector code, inheriting the functionality as much as possible."""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class OpenSearchAccessConfig(AccessConfig):
|
|
30
|
+
hosts: t.Optional[t.List[str]] = None
|
|
31
|
+
username: t.Optional[str] = None
|
|
32
|
+
password: t.Optional[str] = enhanced_field(default=None, sensitive=True)
|
|
33
|
+
use_ssl: bool = False
|
|
34
|
+
verify_certs: bool = False
|
|
35
|
+
ssl_show_warn: bool = False
|
|
36
|
+
ca_certs: t.Optional[str] = None
|
|
37
|
+
client_cert: t.Optional[str] = None
|
|
38
|
+
client_key: t.Optional[str] = None
|
|
39
|
+
|
|
40
|
+
def to_dict(self, **kwargs) -> t.Dict[str, Json]:
|
|
41
|
+
d = super().to_dict(**kwargs)
|
|
42
|
+
d["http_auth"] = (self.username, self.password)
|
|
43
|
+
return d
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class SimpleOpenSearchConfig(SimpleElasticsearchConfig):
|
|
48
|
+
access_config: OpenSearchAccessConfig = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class OpenSearchIngestDoc(ElasticsearchIngestDoc):
|
|
53
|
+
"""Class encapsulating fetching a doc and writing processed results (but not
|
|
54
|
+
doing the processing!).
|
|
55
|
+
|
|
56
|
+
Current implementation creates a python OpenSearch client to fetch each doc,
|
|
57
|
+
rather than creating a client for each thread.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
connector_config: SimpleOpenSearchConfig
|
|
61
|
+
registry_name: str = "opensearch"
|
|
62
|
+
|
|
63
|
+
@SourceConnectionError.wrap
|
|
64
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
65
|
+
@BaseSingleIngestDoc.skip_if_file_exists
|
|
66
|
+
def get_file(self):
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class OpenSearchIngestDocBatch(ElasticsearchIngestDocBatch):
|
|
72
|
+
connector_config: SimpleOpenSearchConfig
|
|
73
|
+
ingest_docs: t.List[OpenSearchIngestDoc] = field(default_factory=list)
|
|
74
|
+
registry_name: str = "opensearch_batch"
|
|
75
|
+
|
|
76
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
77
|
+
def _get_docs(self):
|
|
78
|
+
from opensearchpy import OpenSearch
|
|
79
|
+
from opensearchpy.helpers import scan
|
|
80
|
+
|
|
81
|
+
ops = OpenSearch(**self.connector_config.access_config.to_dict(apply_name_overload=False))
|
|
82
|
+
scan_query = {
|
|
83
|
+
"_source": self.connector_config.fields,
|
|
84
|
+
"version": True,
|
|
85
|
+
"query": {"ids": {"values": self.list_of_ids}},
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
result = scan(
|
|
89
|
+
ops,
|
|
90
|
+
query=scan_query,
|
|
91
|
+
scroll="1m",
|
|
92
|
+
index=self.connector_config.index_name,
|
|
93
|
+
)
|
|
94
|
+
return list(result)
|
|
95
|
+
|
|
96
|
+
@SourceConnectionError.wrap
|
|
97
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
98
|
+
def get_files(self):
|
|
99
|
+
documents = self._get_docs()
|
|
100
|
+
for doc in documents:
|
|
101
|
+
ingest_doc = OpenSearchIngestDoc(
|
|
102
|
+
processor_config=self.processor_config,
|
|
103
|
+
read_config=self.read_config,
|
|
104
|
+
connector_config=self.connector_config,
|
|
105
|
+
document=doc,
|
|
106
|
+
document_meta=ElasticsearchDocumentMeta(
|
|
107
|
+
self.connector_config.index_name, doc["_id"]
|
|
108
|
+
),
|
|
109
|
+
)
|
|
110
|
+
ingest_doc.update_source_metadata()
|
|
111
|
+
doc_body = doc["_source"]
|
|
112
|
+
filename = ingest_doc.filename
|
|
113
|
+
flattened_dict = flatten_dict(dictionary=doc_body)
|
|
114
|
+
str_values = [str(value) for value in flattened_dict.values()]
|
|
115
|
+
concatenated_values = "\n".join(str_values)
|
|
116
|
+
|
|
117
|
+
filename.parent.mkdir(parents=True, exist_ok=True)
|
|
118
|
+
with open(filename, "w", encoding="utf8") as f:
|
|
119
|
+
f.write(concatenated_values)
|
|
120
|
+
self.ingest_docs.append(ingest_doc)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dataclass
|
|
124
|
+
class OpenSearchSourceConnector(ElasticsearchSourceConnector):
|
|
125
|
+
"""Fetches particular fields from all documents in a given opensearch cluster and index"""
|
|
126
|
+
|
|
127
|
+
connector_config: SimpleOpenSearchConfig
|
|
128
|
+
_ops: t.Optional["OpenSearch"] = field(init=False, default=None)
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def ops(self):
|
|
132
|
+
from opensearchpy import OpenSearch
|
|
133
|
+
|
|
134
|
+
if self._ops is None:
|
|
135
|
+
self._ops = OpenSearch(
|
|
136
|
+
**self.connector_config.access_config.to_dict(apply_name_overload=False)
|
|
137
|
+
)
|
|
138
|
+
return self._ops
|
|
139
|
+
|
|
140
|
+
def check_connection(self):
|
|
141
|
+
try:
|
|
142
|
+
assert self.ops.ping()
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
145
|
+
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
146
|
+
|
|
147
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
148
|
+
def _get_doc_ids(self):
|
|
149
|
+
"""Fetches all document ids in an index"""
|
|
150
|
+
from opensearchpy.helpers import scan
|
|
151
|
+
|
|
152
|
+
hits = scan(
|
|
153
|
+
self.ops,
|
|
154
|
+
query=self.scan_query,
|
|
155
|
+
scroll="1m",
|
|
156
|
+
index=self.connector_config.index_name,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return [hit["_id"] for hit in hits]
|
|
160
|
+
|
|
161
|
+
def get_ingest_docs(self):
|
|
162
|
+
"""Fetches all documents in an index, using ids that are fetched with _get_doc_ids"""
|
|
163
|
+
ids = self._get_doc_ids()
|
|
164
|
+
id_batches = [
|
|
165
|
+
ids[
|
|
166
|
+
i
|
|
167
|
+
* self.connector_config.batch_size : (i + 1) # noqa
|
|
168
|
+
* self.connector_config.batch_size
|
|
169
|
+
]
|
|
170
|
+
for i in range(
|
|
171
|
+
(len(ids) + self.connector_config.batch_size - 1)
|
|
172
|
+
// self.connector_config.batch_size
|
|
173
|
+
)
|
|
174
|
+
]
|
|
175
|
+
return [
|
|
176
|
+
OpenSearchIngestDocBatch(
|
|
177
|
+
connector_config=self.connector_config,
|
|
178
|
+
processor_config=self.processor_config,
|
|
179
|
+
read_config=self.read_config,
|
|
180
|
+
list_of_ids=batched_ids,
|
|
181
|
+
)
|
|
182
|
+
for batched_ids in id_batches
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class OpenSearchDestinationConnector(ElasticsearchDestinationConnector):
|
|
188
|
+
connector_config: SimpleOpenSearchConfig
|
|
189
|
+
_client: t.Optional["OpenSearch"] = field(init=False, default=None)
|
|
190
|
+
|
|
191
|
+
@DestinationConnectionError.wrap
|
|
192
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
193
|
+
def generate_client(self) -> "OpenSearch":
|
|
194
|
+
from opensearchpy import OpenSearch
|
|
195
|
+
|
|
196
|
+
return OpenSearch(**self.connector_config.access_config.to_dict(apply_name_overload=False))
|
|
197
|
+
|
|
198
|
+
@requires_dependencies(["opensearchpy"], extras="opensearch")
|
|
199
|
+
def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]]) -> None:
|
|
200
|
+
logger.info(
|
|
201
|
+
f"writing document batches to destination"
|
|
202
|
+
f" index named {self.connector_config.index_name}"
|
|
203
|
+
f" at {self.connector_config.access_config.hosts}"
|
|
204
|
+
f" with batch size (in bytes) {self.write_config.batch_size_bytes}"
|
|
205
|
+
f" with {self.write_config.num_processes} (number of) processes"
|
|
206
|
+
)
|
|
207
|
+
from opensearchpy.helpers import parallel_bulk
|
|
208
|
+
|
|
209
|
+
for batch in generator_batching_wbytes(
|
|
210
|
+
elements_dict, batch_size_limit_bytes=self.write_config.batch_size_bytes
|
|
211
|
+
):
|
|
212
|
+
for success, info in parallel_bulk(
|
|
213
|
+
self.client, batch, thread_count=self.write_config.num_processes
|
|
214
|
+
):
|
|
215
|
+
if not success:
|
|
216
|
+
logger.error(
|
|
217
|
+
"upload failed for a batch in opensearch destination connector:", info
|
|
218
|
+
)
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import os
|
|
3
|
+
import typing as t
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from itertools import chain
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from unstructured_ingest.enhanced_dataclass import enhanced_field
|
|
10
|
+
from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
12
|
+
AccessConfig,
|
|
13
|
+
BaseConnectorConfig,
|
|
14
|
+
BaseSingleIngestDoc,
|
|
15
|
+
BaseSourceConnector,
|
|
16
|
+
IngestDocCleanupMixin,
|
|
17
|
+
SourceConnectorCleanupMixin,
|
|
18
|
+
SourceMetadata,
|
|
19
|
+
)
|
|
20
|
+
from unstructured_ingest.logger import logger
|
|
21
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
22
|
+
|
|
23
|
+
MAX_NUM_EMAILS = 1000000 # Maximum number of emails per folder
|
|
24
|
+
if t.TYPE_CHECKING:
|
|
25
|
+
from office365.graph_client import GraphClient
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MissingFolderError(Exception):
|
|
29
|
+
"""There are no root folders with those names."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class OutlookAccessConfig(AccessConfig):
|
|
34
|
+
client_credential: str = enhanced_field(repr=False, sensitive=True, overload_name="client_cred")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class SimpleOutlookConfig(BaseConnectorConfig):
|
|
39
|
+
"""This class is getting the token."""
|
|
40
|
+
|
|
41
|
+
access_config: OutlookAccessConfig
|
|
42
|
+
user_email: str
|
|
43
|
+
client_id: str
|
|
44
|
+
tenant: t.Optional[str] = field(repr=False, default="common")
|
|
45
|
+
authority_url: t.Optional[str] = field(repr=False, default="https://login.microsoftonline.com")
|
|
46
|
+
outlook_folders: t.List[str] = field(default_factory=list)
|
|
47
|
+
recursive: bool = False
|
|
48
|
+
registry_name: str = "outlook"
|
|
49
|
+
|
|
50
|
+
def __post_init__(self):
|
|
51
|
+
if not (self.client_id and self.access_config.client_credential and self.user_email):
|
|
52
|
+
raise ValueError(
|
|
53
|
+
"Please provide one of the following mandatory values:"
|
|
54
|
+
"\nclient_id\nclient_cred\nuser_email",
|
|
55
|
+
)
|
|
56
|
+
self.token_factory = self._acquire_token
|
|
57
|
+
|
|
58
|
+
@requires_dependencies(["msal"])
|
|
59
|
+
def _acquire_token(self):
|
|
60
|
+
from msal import ConfidentialClientApplication
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
app = ConfidentialClientApplication(
|
|
64
|
+
authority=f"{self.authority_url}/{self.tenant}",
|
|
65
|
+
client_id=self.client_id,
|
|
66
|
+
client_credential=self.access_config.client_credential,
|
|
67
|
+
)
|
|
68
|
+
token = app.acquire_token_for_client(
|
|
69
|
+
scopes=["https://graph.microsoft.com/.default"],
|
|
70
|
+
)
|
|
71
|
+
except ValueError as exc:
|
|
72
|
+
logger.error("Couldn't set up credentials for Outlook")
|
|
73
|
+
raise exc
|
|
74
|
+
return token
|
|
75
|
+
|
|
76
|
+
@requires_dependencies(["office365"], extras="outlook")
|
|
77
|
+
def _get_client(self):
|
|
78
|
+
from office365.graph_client import GraphClient
|
|
79
|
+
|
|
80
|
+
return GraphClient(self.token_factory)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class OutlookIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
|
|
85
|
+
connector_config: SimpleOutlookConfig
|
|
86
|
+
message_id: str
|
|
87
|
+
registry_name: str = "outlook"
|
|
88
|
+
|
|
89
|
+
def __post_init__(self):
|
|
90
|
+
self._set_download_paths()
|
|
91
|
+
|
|
92
|
+
def hash_mail_name(self, id):
|
|
93
|
+
"""Outlook email ids are 152 char long. Hash to shorten to 16."""
|
|
94
|
+
return hashlib.sha256(id.encode("utf-8")).hexdigest()[:16]
|
|
95
|
+
|
|
96
|
+
def _set_download_paths(self) -> None:
|
|
97
|
+
"""Creates paths for downloading and parsing."""
|
|
98
|
+
download_path = Path(f"{self.read_config.download_dir}")
|
|
99
|
+
output_path = Path(f"{self.processor_config.output_dir}")
|
|
100
|
+
|
|
101
|
+
self.download_dir = download_path
|
|
102
|
+
self.download_filepath = (
|
|
103
|
+
download_path / f"{self.hash_mail_name(self.message_id)}.eml"
|
|
104
|
+
).resolve()
|
|
105
|
+
oname = f"{self.hash_mail_name(self.message_id)}.eml.json"
|
|
106
|
+
self.output_dir = output_path
|
|
107
|
+
self.output_filepath = (output_path / oname).resolve()
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def filename(self):
|
|
111
|
+
return Path(self.download_filepath).resolve()
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def _output_filename(self):
|
|
115
|
+
return Path(self.output_filepath).resolve()
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
|
|
119
|
+
return {
|
|
120
|
+
"message_id": self.message_id,
|
|
121
|
+
"user_email": self.connector_config.user_email,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
@requires_dependencies(["office365"], extras="outlook")
|
|
125
|
+
def update_source_metadata(self, **kwargs):
|
|
126
|
+
from office365.runtime.client_request_exception import ClientRequestException
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
client = self.connector_config._get_client()
|
|
130
|
+
msg = (
|
|
131
|
+
client.users[self.connector_config.user_email]
|
|
132
|
+
.messages[self.message_id]
|
|
133
|
+
.get()
|
|
134
|
+
.execute_query()
|
|
135
|
+
)
|
|
136
|
+
except ClientRequestException as e:
|
|
137
|
+
if e.response.status_code == 404:
|
|
138
|
+
self.source_metadata = SourceMetadata(
|
|
139
|
+
exists=False,
|
|
140
|
+
)
|
|
141
|
+
return
|
|
142
|
+
raise
|
|
143
|
+
self.source_metadata = SourceMetadata(
|
|
144
|
+
date_created=msg.created_datetime.isoformat(),
|
|
145
|
+
date_modified=msg.last_modified_datetime.isoformat(),
|
|
146
|
+
version=msg.get_property("changeKey"),
|
|
147
|
+
source_url=msg.get_property("webLink"),
|
|
148
|
+
exists=True,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
@SourceConnectionNetworkError.wrap
|
|
152
|
+
def _run_download(self, local_file):
|
|
153
|
+
client = self.connector_config._get_client()
|
|
154
|
+
client.users[self.connector_config.user_email].messages[self.message_id].download(
|
|
155
|
+
local_file,
|
|
156
|
+
).execute_query()
|
|
157
|
+
|
|
158
|
+
@SourceConnectionError.wrap
|
|
159
|
+
@BaseSingleIngestDoc.skip_if_file_exists
|
|
160
|
+
@requires_dependencies(["office365"], extras="outlook")
|
|
161
|
+
def get_file(self):
|
|
162
|
+
"""Relies on Office365 python sdk message object to do the download."""
|
|
163
|
+
try:
|
|
164
|
+
self.connector_config._get_client()
|
|
165
|
+
self.update_source_metadata()
|
|
166
|
+
if not self.download_dir.is_dir():
|
|
167
|
+
logger.debug(f"creating directory: {self.download_dir}")
|
|
168
|
+
self.download_dir.mkdir(parents=True, exist_ok=True)
|
|
169
|
+
|
|
170
|
+
with open(
|
|
171
|
+
os.path.join(
|
|
172
|
+
self.download_dir,
|
|
173
|
+
self.hash_mail_name(self.message_id) + ".eml",
|
|
174
|
+
),
|
|
175
|
+
"wb",
|
|
176
|
+
) as local_file:
|
|
177
|
+
self._run_download(local_file=local_file)
|
|
178
|
+
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.error(
|
|
181
|
+
f"Error while downloading and saving file: {self.hash_mail_name(self.message_id)}.",
|
|
182
|
+
)
|
|
183
|
+
logger.error(e)
|
|
184
|
+
return
|
|
185
|
+
logger.info(f"file downloaded: {self.hash_mail_name(self.message_id)}")
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@dataclass
|
|
190
|
+
class OutlookSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
|
|
191
|
+
connector_config: SimpleOutlookConfig
|
|
192
|
+
_client: t.Optional["GraphClient"] = field(init=False, default=None)
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def client(self) -> "GraphClient":
|
|
196
|
+
if self._client is None:
|
|
197
|
+
self._client = self.connector_config._get_client()
|
|
198
|
+
return self._client
|
|
199
|
+
|
|
200
|
+
def initialize(self):
|
|
201
|
+
try:
|
|
202
|
+
self.get_folder_ids()
|
|
203
|
+
except Exception as e:
|
|
204
|
+
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
205
|
+
|
|
206
|
+
def check_connection(self):
|
|
207
|
+
try:
|
|
208
|
+
_ = self.client
|
|
209
|
+
except Exception as e:
|
|
210
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
211
|
+
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
212
|
+
|
|
213
|
+
def recurse_folders(self, folder_id, main_folder_dict):
|
|
214
|
+
"""We only get a count of subfolders for any folder.
|
|
215
|
+
Have to make additional calls to get subfolder ids."""
|
|
216
|
+
subfolders = (
|
|
217
|
+
self.client.users[self.connector_config.user_email]
|
|
218
|
+
.mail_folders[folder_id]
|
|
219
|
+
.child_folders.get()
|
|
220
|
+
.execute_query()
|
|
221
|
+
)
|
|
222
|
+
for subfolder in subfolders:
|
|
223
|
+
for k, v in main_folder_dict.items():
|
|
224
|
+
if subfolder.get_property("parentFolderId") in v:
|
|
225
|
+
v.append(subfolder.id)
|
|
226
|
+
if subfolder.get_property("childFolderCount") > 0:
|
|
227
|
+
self.recurse_folders(subfolder.id, main_folder_dict)
|
|
228
|
+
|
|
229
|
+
def get_folder_ids(self):
|
|
230
|
+
"""Sets the mail folder ids and subfolder ids for requested root mail folders."""
|
|
231
|
+
self.root_folders = defaultdict(list)
|
|
232
|
+
root_folders_with_subfolders = []
|
|
233
|
+
get_root_folders = (
|
|
234
|
+
self.client.users[self.connector_config.user_email].mail_folders.get().execute_query()
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
for folder in get_root_folders:
|
|
238
|
+
self.root_folders[folder.display_name].append(folder.id)
|
|
239
|
+
if folder.get_property("childFolderCount") > 0:
|
|
240
|
+
root_folders_with_subfolders.append(folder.id)
|
|
241
|
+
|
|
242
|
+
for folder in root_folders_with_subfolders:
|
|
243
|
+
self.recurse_folders(folder, self.root_folders)
|
|
244
|
+
|
|
245
|
+
# Narrow down all mail folder ids (plus all subfolders) to the ones that were requested.
|
|
246
|
+
self.selected_folder_ids = list(
|
|
247
|
+
chain.from_iterable(
|
|
248
|
+
[
|
|
249
|
+
v
|
|
250
|
+
for k, v in self.root_folders.items()
|
|
251
|
+
if k.lower() in [x.lower() for x in self.connector_config.outlook_folders]
|
|
252
|
+
],
|
|
253
|
+
),
|
|
254
|
+
)
|
|
255
|
+
if not self.selected_folder_ids:
|
|
256
|
+
raise MissingFolderError(
|
|
257
|
+
"There are no root folders with the names: "
|
|
258
|
+
f"{self.connector_config.outlook_folders}",
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def get_ingest_docs(self):
|
|
262
|
+
"""Returns a list of all the message objects that are in the requested root folder(s)."""
|
|
263
|
+
filtered_messages = []
|
|
264
|
+
|
|
265
|
+
# Get all the relevant messages in the selected folders/subfolders.
|
|
266
|
+
for folder_id in self.selected_folder_ids:
|
|
267
|
+
messages = (
|
|
268
|
+
self.client.users[self.connector_config.user_email]
|
|
269
|
+
.mail_folders[folder_id]
|
|
270
|
+
.messages.get()
|
|
271
|
+
.top(MAX_NUM_EMAILS) # Prevents the return from paging
|
|
272
|
+
.execute_query()
|
|
273
|
+
)
|
|
274
|
+
# Skip empty list if there are no messages in folder.
|
|
275
|
+
if messages:
|
|
276
|
+
filtered_messages.append(messages)
|
|
277
|
+
return [
|
|
278
|
+
OutlookIngestDoc(
|
|
279
|
+
connector_config=self.connector_config,
|
|
280
|
+
processor_config=self.processor_config,
|
|
281
|
+
read_config=self.read_config,
|
|
282
|
+
message_id=message.id,
|
|
283
|
+
)
|
|
284
|
+
for message in list(chain.from_iterable(filtered_messages))
|
|
285
|
+
]
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import json
|
|
3
|
+
import multiprocessing as mp
|
|
4
|
+
import typing as t
|
|
5
|
+
import uuid
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.enhanced_dataclass import enhanced_field
|
|
9
|
+
from unstructured_ingest.enhanced_dataclass.core import _asdict
|
|
10
|
+
from unstructured_ingest.error import DestinationConnectionError, WriteError
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
12
|
+
AccessConfig,
|
|
13
|
+
BaseConnectorConfig,
|
|
14
|
+
BaseDestinationConnector,
|
|
15
|
+
ConfigSessionHandleMixin,
|
|
16
|
+
IngestDocSessionHandleMixin,
|
|
17
|
+
WriteConfig,
|
|
18
|
+
)
|
|
19
|
+
from unstructured_ingest.logger import logger
|
|
20
|
+
from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
|
|
21
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
22
|
+
|
|
23
|
+
if t.TYPE_CHECKING:
|
|
24
|
+
from pinecone import Index as PineconeIndex
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class PineconeAccessConfig(AccessConfig):
|
|
29
|
+
api_key: str = enhanced_field(sensitive=True)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class SimplePineconeConfig(ConfigSessionHandleMixin, BaseConnectorConfig):
|
|
34
|
+
index_name: str
|
|
35
|
+
environment: str
|
|
36
|
+
access_config: PineconeAccessConfig
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class PineconeWriteConfig(WriteConfig):
|
|
41
|
+
batch_size: int = 50
|
|
42
|
+
num_processes: int = 1
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class PineconeDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConnector):
|
|
47
|
+
write_config: PineconeWriteConfig
|
|
48
|
+
connector_config: SimplePineconeConfig
|
|
49
|
+
_index: t.Optional["PineconeIndex"] = None
|
|
50
|
+
|
|
51
|
+
def to_dict(self, **kwargs):
|
|
52
|
+
"""
|
|
53
|
+
The _index variable in this dataclass breaks deepcopy due to:
|
|
54
|
+
TypeError: cannot pickle '_thread.lock' object
|
|
55
|
+
When serializing, remove it, meaning client data will need to be reinitialized
|
|
56
|
+
when deserialized
|
|
57
|
+
"""
|
|
58
|
+
self_cp = copy.copy(self)
|
|
59
|
+
if hasattr(self_cp, "_index"):
|
|
60
|
+
setattr(self_cp, "_index", None)
|
|
61
|
+
return _asdict(self_cp, **kwargs)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def pinecone_index(self):
|
|
65
|
+
if self._index is None:
|
|
66
|
+
self._index = self.create_index()
|
|
67
|
+
return self._index
|
|
68
|
+
|
|
69
|
+
def initialize(self):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
@requires_dependencies(["pinecone"], extras="pinecone")
|
|
73
|
+
def create_index(self) -> "PineconeIndex":
|
|
74
|
+
from pinecone import Pinecone
|
|
75
|
+
from unstructured import __version__ as unstructured_version
|
|
76
|
+
|
|
77
|
+
pc = Pinecone(
|
|
78
|
+
api_key=self.connector_config.access_config.api_key,
|
|
79
|
+
source_tag=f"unstructured=={unstructured_version}",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
index = pc.Index(self.connector_config.index_name)
|
|
83
|
+
logger.debug(f"connected to index: {pc.describe_index(self.connector_config.index_name)}")
|
|
84
|
+
return index
|
|
85
|
+
|
|
86
|
+
@DestinationConnectionError.wrap
|
|
87
|
+
def check_connection(self):
|
|
88
|
+
_ = self.pinecone_index
|
|
89
|
+
|
|
90
|
+
@DestinationConnectionError.wrap
|
|
91
|
+
@requires_dependencies(["pinecone"], extras="pinecone")
|
|
92
|
+
def upsert_batch(self, batch):
|
|
93
|
+
import pinecone.core.client.exceptions
|
|
94
|
+
|
|
95
|
+
index = self.pinecone_index
|
|
96
|
+
try:
|
|
97
|
+
response = index.upsert(batch)
|
|
98
|
+
except pinecone.core.client.exceptions.ApiException as api_error:
|
|
99
|
+
raise WriteError(f"http error: {api_error}") from api_error
|
|
100
|
+
logger.debug(f"results: {response}")
|
|
101
|
+
|
|
102
|
+
def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
|
|
103
|
+
logger.info(
|
|
104
|
+
f"Upserting {len(elements_dict)} elements to destination "
|
|
105
|
+
f"index at {self.connector_config.index_name}",
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
pinecone_batch_size = self.write_config.batch_size
|
|
109
|
+
|
|
110
|
+
logger.info(f"using {self.write_config.num_processes} processes to upload")
|
|
111
|
+
if self.write_config.num_processes == 1:
|
|
112
|
+
for chunk in batch_generator(elements_dict, pinecone_batch_size):
|
|
113
|
+
self.upsert_batch(chunk) # noqa: E203
|
|
114
|
+
|
|
115
|
+
else:
|
|
116
|
+
with mp.Pool(
|
|
117
|
+
processes=self.write_config.num_processes,
|
|
118
|
+
) as pool:
|
|
119
|
+
pool.map(
|
|
120
|
+
self.upsert_batch, list(batch_generator(elements_dict, pinecone_batch_size))
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def normalize_dict(self, element_dict: dict) -> dict:
|
|
124
|
+
# While flatten_dict enables indexing on various fields,
|
|
125
|
+
# element_serialized enables easily reloading the element object to memory.
|
|
126
|
+
# element_serialized is formed without text/embeddings to avoid data bloating.
|
|
127
|
+
return {
|
|
128
|
+
"id": str(uuid.uuid4()),
|
|
129
|
+
"values": element_dict.pop("embeddings", None),
|
|
130
|
+
"metadata": {
|
|
131
|
+
"text": element_dict.pop("text", None),
|
|
132
|
+
"element_serialized": json.dumps(element_dict),
|
|
133
|
+
**flatten_dict(
|
|
134
|
+
element_dict,
|
|
135
|
+
separator="-",
|
|
136
|
+
flatten_lists=True,
|
|
137
|
+
remove_none=True,
|
|
138
|
+
),
|
|
139
|
+
},
|
|
140
|
+
}
|