unstructured-ingest 1.0.2__tar.gz → 1.2.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/.gitignore +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/PKG-INFO +18 -9
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/pyproject.toml +15 -11
- unstructured_ingest-1.2.34/unstructured_ingest/__version__.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/README.md +1 -1
- unstructured_ingest-1.2.34/unstructured_ingest/data_types/entities.py +17 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/azure_openai.py +11 -4
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/bedrock.py +148 -35
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/huggingface.py +11 -4
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/interfaces.py +11 -8
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/mixedbreadai.py +30 -44
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/octoai.py +27 -6
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/openai.py +51 -8
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/togetherai.py +38 -6
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/vertexai.py +4 -4
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/voyageai.py +10 -7
- unstructured_ingest-1.2.34/unstructured_ingest/error.py +156 -0
- unstructured_ingest-1.2.34/unstructured_ingest/errors_v2.py +156 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/interfaces/connector.py +7 -1
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/interfaces/downloader.py +2 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/otel.py +18 -1
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/airtable.py +2 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +1 -2
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/astradb.py +100 -8
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/chroma.py +2 -2
- unstructured_ingest-1.2.34/unstructured_ingest/processes/connectors/confluence.py +527 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/databricks/volumes.py +25 -11
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +3 -2
- unstructured_ingest-1.2.34/unstructured_ingest/processes/connectors/delta_table.py +310 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/discord.py +4 -3
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +1 -1
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +24 -24
- unstructured_ingest-1.2.34/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +523 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/fsspec/azure.py +1 -1
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/fsspec/box.py +1 -1
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +3 -2
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +133 -24
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/fsspec/gcs.py +2 -2
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/fsspec/s3.py +62 -11
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/github.py +8 -3
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/gitlab.py +8 -7
- unstructured_ingest-1.2.34/unstructured_ingest/processes/connectors/google_drive.py +848 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +77 -13
- unstructured_ingest-1.2.34/unstructured_ingest/processes/connectors/jira.py +522 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/kafka/kafka.py +5 -5
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/local.py +13 -12
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/milvus.py +94 -8
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/mongodb.py +29 -3
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/neo4j.py +59 -24
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/client.py +14 -14
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/connector.py +3 -1
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +40 -3
- unstructured_ingest-1.2.34/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +109 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +22 -3
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +1 -1
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/page.py +9 -2
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/user.py +10 -6
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/onedrive.py +25 -7
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/outlook.py +4 -3
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/pinecone.py +35 -7
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/redisdb.py +48 -21
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/salesforce.py +12 -10
- unstructured_ingest-1.2.34/unstructured_ingest/processes/connectors/sharepoint.py +282 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/slack.py +6 -4
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/sql/__init__.py +4 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/sql/snowflake.py +48 -19
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/sql/sql.py +21 -4
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/sql/sqlite.py +1 -0
- unstructured_ingest-1.2.34/unstructured_ingest/processes/connectors/sql/teradata.py +253 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/vectara.py +2 -2
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/weaviate/cloud.py +1 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +35 -15
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/zendesk/client.py +8 -2
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +14 -7
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/embedder.py +4 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/partitioner.py +1 -1
- unstructured_ingest-1.2.34/unstructured_ingest/processes/utils/__init__.py +8 -0
- unstructured_ingest-1.2.34/unstructured_ingest/processes/utils/logging/connector.py +365 -0
- unstructured_ingest-1.2.34/unstructured_ingest/processes/utils/logging/sanitizer.py +117 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/unstructured_api.py +1 -1
- unstructured_ingest-1.2.34/unstructured_ingest/utils/__init__.py +5 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/compression.py +2 -1
- unstructured_ingest-1.2.34/unstructured_ingest/utils/filesystem.py +27 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/html.py +15 -1
- unstructured_ingest-1.2.34/unstructured_ingest/utils/tls.py +15 -0
- unstructured_ingest-1.0.2/unstructured_ingest/__version__.py +0 -1
- unstructured_ingest-1.0.2/unstructured_ingest/error.py +0 -49
- unstructured_ingest-1.0.2/unstructured_ingest/errors_v2.py +0 -25
- unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/confluence.py +0 -308
- unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/delta_table.py +0 -196
- unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -201
- unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/google_drive.py +0 -488
- unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/jira.py +0 -455
- unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/sharepoint.py +0 -134
- unstructured_ingest-1.0.2/unstructured_ingest/processes/utils/__init__.py +0 -0
- unstructured_ingest-1.0.2/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/LICENSE.md +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/README.md +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/base/importer.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/cmds.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/utils/click.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/data_types/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/data_types/file_data.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/interfaces/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/interfaces/indexer.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/interfaces/process.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/interfaces/processor.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/interfaces/upload_stager.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/interfaces/uploader.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/logger.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/main.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/otel.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/download.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/embed.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/filter.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/index.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/partition.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/stage.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/pipeline/steps/upload.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/chunker.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connector_registry.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/utils.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/filter.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/uncompress.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/chunking.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/constants.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/dep_check.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/ndjson.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/pydantic_models.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/utils/table.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unstructured_ingest
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.34
|
|
4
4
|
Summary: Local ETL data pipeline to get data RAG ready
|
|
5
5
|
Author-email: Unstructured Technologies <devops@unstructuredai.io>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,14 +12,13 @@ Classifier: Intended Audience :: Science/Research
|
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
18
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
-
Requires-Python: <3.13,>=3.
|
|
19
|
+
Requires-Python: <3.13,>=3.10
|
|
20
|
+
Requires-Dist: certifi>=2025.7.14
|
|
21
21
|
Requires-Dist: click
|
|
22
|
-
Requires-Dist: dataclasses-json
|
|
23
22
|
Requires-Dist: opentelemetry-sdk
|
|
24
23
|
Requires-Dist: pydantic>=2.7
|
|
25
24
|
Requires-Dist: python-dateutil
|
|
@@ -36,6 +35,7 @@ Provides-Extra: azure-ai-search
|
|
|
36
35
|
Requires-Dist: azure-search-documents; extra == 'azure-ai-search'
|
|
37
36
|
Provides-Extra: bedrock
|
|
38
37
|
Requires-Dist: aioboto3; extra == 'bedrock'
|
|
38
|
+
Requires-Dist: aiobotocore[boto3]!=2.24.2; extra == 'bedrock'
|
|
39
39
|
Requires-Dist: boto3; extra == 'bedrock'
|
|
40
40
|
Provides-Extra: biomed
|
|
41
41
|
Requires-Dist: bs4; extra == 'biomed'
|
|
@@ -56,11 +56,13 @@ Provides-Extra: databricks-delta-tables
|
|
|
56
56
|
Requires-Dist: databricks-sql-connector; extra == 'databricks-delta-tables'
|
|
57
57
|
Requires-Dist: pandas; extra == 'databricks-delta-tables'
|
|
58
58
|
Provides-Extra: databricks-volumes
|
|
59
|
-
Requires-Dist: databricks-sdk; extra == 'databricks-volumes'
|
|
59
|
+
Requires-Dist: databricks-sdk>=0.70.0; extra == 'databricks-volumes'
|
|
60
60
|
Provides-Extra: delta-table
|
|
61
61
|
Requires-Dist: boto3; extra == 'delta-table'
|
|
62
62
|
Requires-Dist: deltalake; extra == 'delta-table'
|
|
63
63
|
Requires-Dist: pandas; extra == 'delta-table'
|
|
64
|
+
Requires-Dist: pyarrow; extra == 'delta-table'
|
|
65
|
+
Requires-Dist: tenacity; extra == 'delta-table'
|
|
64
66
|
Provides-Extra: discord
|
|
65
67
|
Requires-Dist: discord-py; extra == 'discord'
|
|
66
68
|
Provides-Extra: doc
|
|
@@ -74,7 +76,7 @@ Provides-Extra: duckdb
|
|
|
74
76
|
Requires-Dist: duckdb; extra == 'duckdb'
|
|
75
77
|
Requires-Dist: pandas; extra == 'duckdb'
|
|
76
78
|
Provides-Extra: elasticsearch
|
|
77
|
-
Requires-Dist: elasticsearch[async]; extra == 'elasticsearch'
|
|
79
|
+
Requires-Dist: elasticsearch[async]<9.0.0; extra == 'elasticsearch'
|
|
78
80
|
Provides-Extra: epub
|
|
79
81
|
Requires-Dist: unstructured[epub]; extra == 'epub'
|
|
80
82
|
Provides-Extra: gcs
|
|
@@ -88,6 +90,7 @@ Provides-Extra: gitlab
|
|
|
88
90
|
Requires-Dist: python-gitlab; extra == 'gitlab'
|
|
89
91
|
Provides-Extra: google-drive
|
|
90
92
|
Requires-Dist: google-api-python-client; extra == 'google-drive'
|
|
93
|
+
Requires-Dist: tenacity; extra == 'google-drive'
|
|
91
94
|
Provides-Extra: hubspot
|
|
92
95
|
Requires-Dist: hubspot-api-client; extra == 'hubspot'
|
|
93
96
|
Requires-Dist: urllib3; extra == 'hubspot'
|
|
@@ -115,7 +118,7 @@ Requires-Dist: unstructured[md]; extra == 'md'
|
|
|
115
118
|
Provides-Extra: milvus
|
|
116
119
|
Requires-Dist: pymilvus; extra == 'milvus'
|
|
117
120
|
Provides-Extra: mixedbreadai
|
|
118
|
-
Requires-Dist: mixedbread
|
|
121
|
+
Requires-Dist: mixedbread; extra == 'mixedbreadai'
|
|
119
122
|
Provides-Extra: mongodb
|
|
120
123
|
Requires-Dist: pymongo; extra == 'mongodb'
|
|
121
124
|
Provides-Extra: msg
|
|
@@ -142,7 +145,9 @@ Provides-Extra: openai
|
|
|
142
145
|
Requires-Dist: openai; extra == 'openai'
|
|
143
146
|
Requires-Dist: tiktoken; extra == 'openai'
|
|
144
147
|
Provides-Extra: opensearch
|
|
145
|
-
Requires-Dist:
|
|
148
|
+
Requires-Dist: boto3>=1.26.0; extra == 'opensearch'
|
|
149
|
+
Requires-Dist: botocore>=1.29.0; extra == 'opensearch'
|
|
150
|
+
Requires-Dist: opensearch-py<3.0.0,>=2.4.0; extra == 'opensearch'
|
|
146
151
|
Provides-Extra: org
|
|
147
152
|
Requires-Dist: unstructured[org]; extra == 'org'
|
|
148
153
|
Provides-Extra: outlook
|
|
@@ -164,7 +169,7 @@ Requires-Dist: qdrant-client; extra == 'qdrant'
|
|
|
164
169
|
Provides-Extra: reddit
|
|
165
170
|
Requires-Dist: praw; extra == 'reddit'
|
|
166
171
|
Provides-Extra: redis
|
|
167
|
-
Requires-Dist: redis; extra == 'redis'
|
|
172
|
+
Requires-Dist: redis<=5.3.0; extra == 'redis'
|
|
168
173
|
Provides-Extra: remote
|
|
169
174
|
Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
|
|
170
175
|
Provides-Extra: rst
|
|
@@ -192,6 +197,9 @@ Provides-Extra: snowflake
|
|
|
192
197
|
Requires-Dist: pandas; extra == 'snowflake'
|
|
193
198
|
Requires-Dist: psycopg2-binary; extra == 'snowflake'
|
|
194
199
|
Requires-Dist: snowflake-connector-python; extra == 'snowflake'
|
|
200
|
+
Provides-Extra: teradata
|
|
201
|
+
Requires-Dist: pandas; extra == 'teradata'
|
|
202
|
+
Requires-Dist: teradatasql; extra == 'teradata'
|
|
195
203
|
Provides-Extra: togetherai
|
|
196
204
|
Requires-Dist: together; extra == 'togetherai'
|
|
197
205
|
Provides-Extra: tsv
|
|
@@ -208,6 +216,7 @@ Requires-Dist: requests; extra == 'vectara'
|
|
|
208
216
|
Provides-Extra: vertexai
|
|
209
217
|
Requires-Dist: vertexai; extra == 'vertexai'
|
|
210
218
|
Provides-Extra: voyageai
|
|
219
|
+
Requires-Dist: langchain-core<1.0.0,>=0.3.81; extra == 'voyageai'
|
|
211
220
|
Requires-Dist: voyageai; extra == 'voyageai'
|
|
212
221
|
Provides-Extra: weaviate
|
|
213
222
|
Requires-Dist: weaviate-client; extra == 'weaviate'
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "unstructured_ingest"
|
|
3
3
|
description = "Local ETL data pipeline to get data RAG ready"
|
|
4
|
-
requires-python = ">=3.
|
|
4
|
+
requires-python = ">=3.10, <3.13"
|
|
5
5
|
authors = [{name = "Unstructured Technologies", email = "devops@unstructuredai.io"}]
|
|
6
6
|
classifiers = [
|
|
7
7
|
"Development Status :: 4 - Beta",
|
|
@@ -11,7 +11,6 @@ classifiers = [
|
|
|
11
11
|
"License :: OSI Approved :: Apache Software License",
|
|
12
12
|
"Operating System :: OS Independent",
|
|
13
13
|
"Programming Language :: Python :: 3",
|
|
14
|
-
"Programming Language :: Python :: 3.9",
|
|
15
14
|
"Programming Language :: Python :: 3.10",
|
|
16
15
|
"Programming Language :: Python :: 3.11",
|
|
17
16
|
"Programming Language :: Python :: 3.12",
|
|
@@ -72,6 +71,7 @@ sharepoint = ["requirements/connectors/sharepoint.txt"]
|
|
|
72
71
|
singlestore = ["requirements/connectors/singlestore.txt"]
|
|
73
72
|
slack = ["requirements/connectors/slack.txt"]
|
|
74
73
|
snowflake = ["requirements/connectors/snowflake.txt"]
|
|
74
|
+
teradata = ["requirements/connectors/teradata.txt"]
|
|
75
75
|
vastdb = ["requirements/connectors/vastdb.txt"]
|
|
76
76
|
vectara = ["requirements/connectors/vectara.txt"]
|
|
77
77
|
weaviate = ["requirements/connectors/weaviate.txt"]
|
|
@@ -136,31 +136,25 @@ test = [
|
|
|
136
136
|
"deepdiff",
|
|
137
137
|
"bs4",
|
|
138
138
|
"pandas",
|
|
139
|
-
|
|
140
139
|
# Connector specific deps
|
|
141
140
|
"cryptography",
|
|
142
141
|
"fsspec",
|
|
143
142
|
"vertexai",
|
|
144
143
|
"pyiceberg",
|
|
145
144
|
"pyarrow",
|
|
145
|
+
"networkx",
|
|
146
|
+
"htmlbuilder",
|
|
147
|
+
"office365-rest-python-client",
|
|
146
148
|
]
|
|
147
149
|
# Add constraints needed for CI
|
|
148
150
|
ci = [
|
|
149
|
-
# consistency with local-inference-pin
|
|
150
|
-
"protobuf<4.24",
|
|
151
151
|
"grpcio>=1.65.5",
|
|
152
152
|
# TODO: Pinned in transformers package, remove when that gets updated
|
|
153
153
|
"tokenizers>=0.19,<0.20",
|
|
154
|
-
# TODO: Constaint due to boto, with python before 3.10 not requiring openssl 1.1.1, remove when that gets
|
|
155
|
-
# updated or we drop support for 3.9
|
|
156
|
-
"urllib3<1.27",
|
|
157
154
|
# TODO: Constriant due to aiobotocore, remove when that gets updates:
|
|
158
155
|
"botocore<1.34.132",
|
|
159
156
|
# TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile
|
|
160
157
|
"importlib-metadata>=8.5.0",
|
|
161
|
-
# TODO: Constraint due to boto, with python before 3.10 not requiring openssl 1.1.1, remove when that gets
|
|
162
|
-
# updated or we drop support for 3.9
|
|
163
|
-
"urllib3<1.27",
|
|
164
158
|
"unstructured-client>= 0.25.8",
|
|
165
159
|
"fsspec==2024.5.0",
|
|
166
160
|
# python 3.12 support
|
|
@@ -172,6 +166,8 @@ ci = [
|
|
|
172
166
|
"lancedb<=0.15.0",
|
|
173
167
|
# TODO: versions higher than this are missing the macos wheel
|
|
174
168
|
"pykx==2.5.3",
|
|
169
|
+
# TODO: Constraint due to perf-analyzer platform compatibility issues
|
|
170
|
+
"tritonclient<=2.60.0", # Allow 2.60.0 (was working), prevent 2.61.0 (has perf-analyzer issues)
|
|
175
171
|
]
|
|
176
172
|
|
|
177
173
|
[project.scripts]
|
|
@@ -209,3 +205,11 @@ fail_under = 0
|
|
|
209
205
|
|
|
210
206
|
[tool.hatch.build.targets.sdist]
|
|
211
207
|
packages = ["/unstructured_ingest"]
|
|
208
|
+
|
|
209
|
+
[tool.codeflash]
|
|
210
|
+
# All paths are relative to this pyproject.toml's directory.
|
|
211
|
+
module-root = "unstructured_ingest"
|
|
212
|
+
tests-root = "test"
|
|
213
|
+
test-framework = "pytest"
|
|
214
|
+
ignore-paths = []
|
|
215
|
+
formatter-cmds = ["ruff check --exit-zero --fix $file", "ruff format $file"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.34" # pragma: no cover
|
|
@@ -7,7 +7,7 @@ source and destination connectors.
|
|
|
7
7
|
|
|
8
8
|
To manually run the cli:
|
|
9
9
|
```shell
|
|
10
|
-
PYTHONPATH=. python unstructured_ingest/
|
|
10
|
+
PYTHONPATH=. python unstructured_ingest/main.py --help
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
The `main.py` file simply wraps the generated Click command created in `cli.py`.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Entity(BaseModel):
|
|
5
|
+
type: str
|
|
6
|
+
entity: str
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class EntityRelationship(BaseModel):
|
|
10
|
+
to: str
|
|
11
|
+
from_: str = Field(..., alias="from")
|
|
12
|
+
relationship: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class EntitiesData(BaseModel):
|
|
16
|
+
items: list[Entity] = Field(default_factory=list)
|
|
17
|
+
relationships: list[EntityRelationship] = Field(default_factory=list)
|
{unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/azure_openai.py
RENAMED
|
@@ -9,6 +9,7 @@ from unstructured_ingest.embed.openai import (
|
|
|
9
9
|
OpenAIEmbeddingEncoder,
|
|
10
10
|
)
|
|
11
11
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
12
|
+
from unstructured_ingest.utils.tls import ssl_context_with_optional_ca_override
|
|
12
13
|
|
|
13
14
|
if TYPE_CHECKING:
|
|
14
15
|
from openai import AsyncAzureOpenAI, AzureOpenAI
|
|
@@ -16,14 +17,18 @@ if TYPE_CHECKING:
|
|
|
16
17
|
|
|
17
18
|
class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
|
|
18
19
|
api_version: str = Field(description="Azure API version", default="2024-06-01")
|
|
19
|
-
azure_endpoint: str
|
|
20
|
-
embedder_model_name: str = Field(
|
|
20
|
+
azure_endpoint: str = Field(description="Azure endpoint")
|
|
21
|
+
embedder_model_name: str = Field(
|
|
22
|
+
default="text-embedding-ada-002", alias="model_name", description="Azure OpenAI model name"
|
|
23
|
+
)
|
|
21
24
|
|
|
22
25
|
@requires_dependencies(["openai"], extras="openai")
|
|
23
26
|
def get_client(self) -> "AzureOpenAI":
|
|
24
|
-
from openai import AzureOpenAI
|
|
27
|
+
from openai import AzureOpenAI, DefaultHttpxClient
|
|
25
28
|
|
|
29
|
+
client = DefaultHttpxClient(verify=ssl_context_with_optional_ca_override())
|
|
26
30
|
return AzureOpenAI(
|
|
31
|
+
http_client=client,
|
|
27
32
|
api_key=self.api_key.get_secret_value(),
|
|
28
33
|
api_version=self.api_version,
|
|
29
34
|
azure_endpoint=self.azure_endpoint,
|
|
@@ -31,9 +36,11 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
|
|
|
31
36
|
|
|
32
37
|
@requires_dependencies(["openai"], extras="openai")
|
|
33
38
|
def get_async_client(self) -> "AsyncAzureOpenAI":
|
|
34
|
-
from openai import AsyncAzureOpenAI
|
|
39
|
+
from openai import AsyncAzureOpenAI, DefaultAsyncHttpxClient
|
|
35
40
|
|
|
41
|
+
client = DefaultAsyncHttpxClient(verify=ssl_context_with_optional_ca_override())
|
|
36
42
|
return AsyncAzureOpenAI(
|
|
43
|
+
http_client=client,
|
|
37
44
|
api_key=self.api_key.get_secret_value(),
|
|
38
45
|
api_version=self.api_version,
|
|
39
46
|
azure_endpoint=self.azure_endpoint,
|
{unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/bedrock.py
RENAMED
|
@@ -13,7 +13,7 @@ from unstructured_ingest.embed.interfaces import (
|
|
|
13
13
|
BaseEmbeddingEncoder,
|
|
14
14
|
EmbeddingConfig,
|
|
15
15
|
)
|
|
16
|
-
from unstructured_ingest.
|
|
16
|
+
from unstructured_ingest.error import (
|
|
17
17
|
ProviderError,
|
|
18
18
|
RateLimitError,
|
|
19
19
|
UserAuthError,
|
|
@@ -26,16 +26,32 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
27
27
|
from botocore.client import BaseClient
|
|
28
28
|
|
|
29
|
-
class
|
|
30
|
-
def invoke_model(
|
|
29
|
+
class BedrockRuntimeClient(BaseClient):
|
|
30
|
+
def invoke_model(
|
|
31
|
+
self,
|
|
32
|
+
body: str,
|
|
33
|
+
modelId: str,
|
|
34
|
+
accept: str,
|
|
35
|
+
contentType: str,
|
|
36
|
+
inferenceProfileId: str = None,
|
|
37
|
+
) -> dict:
|
|
31
38
|
pass
|
|
32
39
|
|
|
33
|
-
class
|
|
40
|
+
class AsyncBedrockRuntimeClient(BaseClient):
|
|
34
41
|
async def invoke_model(
|
|
35
|
-
self,
|
|
42
|
+
self,
|
|
43
|
+
body: str,
|
|
44
|
+
modelId: str,
|
|
45
|
+
accept: str,
|
|
46
|
+
contentType: str,
|
|
47
|
+
inferenceProfileId: str = None,
|
|
36
48
|
) -> dict:
|
|
37
49
|
pass
|
|
38
50
|
|
|
51
|
+
class BedrockClient(BaseClient):
|
|
52
|
+
def list_foundation_models(self, byOutputModality: str) -> dict:
|
|
53
|
+
pass
|
|
54
|
+
|
|
39
55
|
|
|
40
56
|
def conform_query(query: str, provider: str) -> dict:
|
|
41
57
|
# replace newlines, which can negatively affect performance.
|
|
@@ -54,10 +70,31 @@ def conform_query(query: str, provider: str) -> dict:
|
|
|
54
70
|
|
|
55
71
|
|
|
56
72
|
class BedrockEmbeddingConfig(EmbeddingConfig):
|
|
57
|
-
aws_access_key_id: SecretStr
|
|
58
|
-
aws_secret_access_key: SecretStr
|
|
59
|
-
|
|
60
|
-
|
|
73
|
+
aws_access_key_id: SecretStr | None = Field(description="aws access key id", default=None)
|
|
74
|
+
aws_secret_access_key: SecretStr | None = Field(
|
|
75
|
+
description="aws secret access key", default=None
|
|
76
|
+
)
|
|
77
|
+
region_name: str = Field(
|
|
78
|
+
description="aws region name",
|
|
79
|
+
default_factory=lambda: (
|
|
80
|
+
os.getenv("BEDROCK_REGION_NAME") or
|
|
81
|
+
os.getenv("AWS_DEFAULT_REGION") or
|
|
82
|
+
"us-west-2"
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
endpoint_url: str | None = Field(description="custom bedrock endpoint url", default=None)
|
|
86
|
+
access_method: str = Field(
|
|
87
|
+
description="authentication method", default="credentials"
|
|
88
|
+
) # "credentials" or "iam"
|
|
89
|
+
embedder_model_name: str = Field(
|
|
90
|
+
default="amazon.titan-embed-text-v1",
|
|
91
|
+
alias="model_name",
|
|
92
|
+
description="AWS Bedrock model name",
|
|
93
|
+
)
|
|
94
|
+
inference_profile_id: str | None = Field(
|
|
95
|
+
description="AWS Bedrock inference profile ID",
|
|
96
|
+
default_factory=lambda: os.getenv("BEDROCK_INFERENCE_PROFILE_ID"),
|
|
97
|
+
)
|
|
61
98
|
|
|
62
99
|
def wrap_error(self, e: Exception) -> Exception:
|
|
63
100
|
if is_internal_error(e=e):
|
|
@@ -87,19 +124,82 @@ class BedrockEmbeddingConfig(EmbeddingConfig):
|
|
|
87
124
|
logger.error(f"unhandled exception from bedrock: {e}", exc_info=True)
|
|
88
125
|
return e
|
|
89
126
|
|
|
127
|
+
def run_precheck(self) -> None:
|
|
128
|
+
# Validate access method and credentials configuration
|
|
129
|
+
if self.access_method == "credentials":
|
|
130
|
+
if not (self.aws_access_key_id and self.aws_secret_access_key):
|
|
131
|
+
raise ValueError(
|
|
132
|
+
"Credentials access method requires aws_access_key_id and aws_secret_access_key"
|
|
133
|
+
)
|
|
134
|
+
elif self.access_method == "iam":
|
|
135
|
+
# For IAM, credentials are handled by AWS SDK
|
|
136
|
+
pass
|
|
137
|
+
else:
|
|
138
|
+
raise ValueError(
|
|
139
|
+
f"Invalid access_method: {self.access_method}. Must be 'credentials' or 'iam'"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
client = self.get_bedrock_client()
|
|
143
|
+
try:
|
|
144
|
+
model_info = client.list_foundation_models(byOutputModality="EMBEDDING")
|
|
145
|
+
summaries = model_info.get("modelSummaries", [])
|
|
146
|
+
model_ids = [m["modelId"] for m in summaries]
|
|
147
|
+
arns = [":".join(m["modelArn"]) for m in summaries]
|
|
148
|
+
|
|
149
|
+
if self.embedder_model_name not in model_ids and self.embedder_model_name not in arns:
|
|
150
|
+
raise UserError(
|
|
151
|
+
"model '{}' not found either : {} or {}".format(
|
|
152
|
+
self.embedder_model_name, ", ".join(model_ids), ", ".join(arns)
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
except Exception as e:
|
|
156
|
+
raise self.wrap_error(e=e)
|
|
157
|
+
|
|
158
|
+
def get_client_kwargs(self) -> dict:
|
|
159
|
+
kwargs = {
|
|
160
|
+
"region_name": self.region_name,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if self.endpoint_url:
|
|
164
|
+
kwargs["endpoint_url"] = self.endpoint_url
|
|
165
|
+
|
|
166
|
+
if self.access_method == "credentials":
|
|
167
|
+
if self.aws_access_key_id and self.aws_secret_access_key:
|
|
168
|
+
kwargs["aws_access_key_id"] = self.aws_access_key_id.get_secret_value()
|
|
169
|
+
kwargs["aws_secret_access_key"] = self.aws_secret_access_key.get_secret_value()
|
|
170
|
+
else:
|
|
171
|
+
raise ValueError(
|
|
172
|
+
"Credentials access method requires aws_access_key_id and aws_secret_access_key"
|
|
173
|
+
)
|
|
174
|
+
elif self.access_method == "iam":
|
|
175
|
+
# For IAM, boto3 will use default credential chain (IAM roles, environment, etc.)
|
|
176
|
+
pass
|
|
177
|
+
else:
|
|
178
|
+
raise ValueError(
|
|
179
|
+
f"Invalid access_method: {self.access_method}. Must be 'credentials' or 'iam'"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
return kwargs
|
|
183
|
+
|
|
184
|
+
@requires_dependencies(
|
|
185
|
+
["boto3"],
|
|
186
|
+
extras="bedrock",
|
|
187
|
+
)
|
|
188
|
+
def get_bedrock_client(self) -> "BedrockClient":
|
|
189
|
+
import boto3
|
|
190
|
+
|
|
191
|
+
bedrock_client = boto3.client(service_name="bedrock", **self.get_client_kwargs())
|
|
192
|
+
|
|
193
|
+
return bedrock_client
|
|
194
|
+
|
|
90
195
|
@requires_dependencies(
|
|
91
196
|
["boto3", "numpy", "botocore"],
|
|
92
197
|
extras="bedrock",
|
|
93
198
|
)
|
|
94
|
-
def get_client(self) -> "
|
|
199
|
+
def get_client(self) -> "BedrockRuntimeClient":
|
|
95
200
|
import boto3
|
|
96
201
|
|
|
97
|
-
bedrock_client = boto3.client(
|
|
98
|
-
service_name="bedrock-runtime",
|
|
99
|
-
aws_access_key_id=self.aws_access_key_id.get_secret_value(),
|
|
100
|
-
aws_secret_access_key=self.aws_secret_access_key.get_secret_value(),
|
|
101
|
-
region_name=self.region_name,
|
|
102
|
-
)
|
|
202
|
+
bedrock_client = boto3.client(service_name="bedrock-runtime", **self.get_client_kwargs())
|
|
103
203
|
|
|
104
204
|
return bedrock_client
|
|
105
205
|
|
|
@@ -108,16 +208,11 @@ class BedrockEmbeddingConfig(EmbeddingConfig):
|
|
|
108
208
|
extras="bedrock",
|
|
109
209
|
)
|
|
110
210
|
@asynccontextmanager
|
|
111
|
-
async def get_async_client(self) -> AsyncIterable["
|
|
211
|
+
async def get_async_client(self) -> AsyncIterable["AsyncBedrockRuntimeClient"]:
|
|
112
212
|
import aioboto3
|
|
113
213
|
|
|
114
214
|
session = aioboto3.Session()
|
|
115
|
-
async with session.client(
|
|
116
|
-
"bedrock-runtime",
|
|
117
|
-
aws_access_key_id=self.aws_access_key_id.get_secret_value(),
|
|
118
|
-
aws_secret_access_key=self.aws_secret_access_key.get_secret_value(),
|
|
119
|
-
region_name=self.region_name,
|
|
120
|
-
) as aws_bedrock:
|
|
215
|
+
async with session.client("bedrock-runtime", **self.get_client_kwargs()) as aws_bedrock:
|
|
121
216
|
yield aws_bedrock
|
|
122
217
|
|
|
123
218
|
|
|
@@ -125,6 +220,9 @@ class BedrockEmbeddingConfig(EmbeddingConfig):
|
|
|
125
220
|
class BedrockEmbeddingEncoder(BaseEmbeddingEncoder):
|
|
126
221
|
config: BedrockEmbeddingConfig
|
|
127
222
|
|
|
223
|
+
def precheck(self):
|
|
224
|
+
self.config.run_precheck()
|
|
225
|
+
|
|
128
226
|
def wrap_error(self, e: Exception) -> Exception:
|
|
129
227
|
return self.config.wrap_error(e=e)
|
|
130
228
|
|
|
@@ -136,12 +234,18 @@ class BedrockEmbeddingEncoder(BaseEmbeddingEncoder):
|
|
|
136
234
|
bedrock_client = self.config.get_client()
|
|
137
235
|
# invoke bedrock API
|
|
138
236
|
try:
|
|
139
|
-
|
|
140
|
-
body
|
|
141
|
-
modelId
|
|
142
|
-
accept
|
|
143
|
-
contentType
|
|
144
|
-
|
|
237
|
+
invoke_params = {
|
|
238
|
+
"body": json.dumps(body),
|
|
239
|
+
"modelId": self.config.embedder_model_name,
|
|
240
|
+
"accept": "application/json",
|
|
241
|
+
"contentType": "application/json",
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
# Add inference profile if configured
|
|
245
|
+
if self.config.inference_profile_id:
|
|
246
|
+
invoke_params["inferenceProfileId"] = self.config.inference_profile_id
|
|
247
|
+
|
|
248
|
+
response = bedrock_client.invoke_model(**invoke_params)
|
|
145
249
|
except Exception as e:
|
|
146
250
|
raise self.wrap_error(e=e)
|
|
147
251
|
|
|
@@ -168,6 +272,9 @@ class BedrockEmbeddingEncoder(BaseEmbeddingEncoder):
|
|
|
168
272
|
class AsyncBedrockEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
|
|
169
273
|
config: BedrockEmbeddingConfig
|
|
170
274
|
|
|
275
|
+
def precheck(self):
|
|
276
|
+
self.config.run_precheck()
|
|
277
|
+
|
|
171
278
|
def wrap_error(self, e: Exception) -> Exception:
|
|
172
279
|
return self.config.wrap_error(e=e)
|
|
173
280
|
|
|
@@ -179,12 +286,18 @@ class AsyncBedrockEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
|
|
|
179
286
|
async with self.config.get_async_client() as bedrock_client:
|
|
180
287
|
# invoke bedrock API
|
|
181
288
|
try:
|
|
182
|
-
|
|
183
|
-
body
|
|
184
|
-
modelId
|
|
185
|
-
accept
|
|
186
|
-
contentType
|
|
187
|
-
|
|
289
|
+
invoke_params = {
|
|
290
|
+
"body": json.dumps(body),
|
|
291
|
+
"modelId": self.config.embedder_model_name,
|
|
292
|
+
"accept": "application/json",
|
|
293
|
+
"contentType": "application/json",
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
# Add inference profile if configured
|
|
297
|
+
if self.config.inference_profile_id:
|
|
298
|
+
invoke_params["inferenceProfileId"] = self.config.inference_profile_id
|
|
299
|
+
|
|
300
|
+
response = await bedrock_client.invoke_model(**invoke_params)
|
|
188
301
|
except Exception as e:
|
|
189
302
|
raise self.wrap_error(e=e)
|
|
190
303
|
async with response.get("body") as client_response:
|
{unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/huggingface.py
RENAMED
|
@@ -15,15 +15,22 @@ if TYPE_CHECKING:
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class HuggingFaceEmbeddingConfig(EmbeddingConfig):
|
|
18
|
-
embedder_model_name: Optional[str] = Field(
|
|
18
|
+
embedder_model_name: Optional[str] = Field(
|
|
19
|
+
default="all-MiniLM-L6-v2", alias="model_name", description="HuggingFace model name"
|
|
20
|
+
)
|
|
19
21
|
embedder_model_kwargs: Optional[dict] = Field(
|
|
20
|
-
default_factory=lambda: {"device": "cpu"},
|
|
22
|
+
default_factory=lambda: {"device": "cpu"},
|
|
23
|
+
alias="model_kwargs",
|
|
24
|
+
description="additional model parameters",
|
|
25
|
+
)
|
|
26
|
+
encode_kwargs: Optional[dict] = Field(
|
|
27
|
+
default_factory=lambda: {"normalize_embeddings": False},
|
|
28
|
+
description="additional embedding parameters",
|
|
21
29
|
)
|
|
22
|
-
encode_kwargs: Optional[dict] = Field(default_factory=lambda: {"normalize_embeddings": False})
|
|
23
30
|
|
|
24
31
|
@requires_dependencies(
|
|
25
32
|
["sentence_transformers"],
|
|
26
|
-
extras="
|
|
33
|
+
extras="huggingface",
|
|
27
34
|
)
|
|
28
35
|
def get_client(self) -> "SentenceTransformer":
|
|
29
36
|
from sentence_transformers import SentenceTransformer
|
{unstructured_ingest-1.0.2 → unstructured_ingest-1.2.34}/unstructured_ingest/embed/interfaces.py
RENAMED
|
@@ -20,6 +20,9 @@ class EmbeddingConfig(BaseModel):
|
|
|
20
20
|
class BaseEncoder(ABC):
|
|
21
21
|
config: EmbeddingConfig
|
|
22
22
|
|
|
23
|
+
def precheck(self):
|
|
24
|
+
pass
|
|
25
|
+
|
|
23
26
|
def initialize(self):
|
|
24
27
|
"""Initializes the embedding encoder class. Should also validate the instance
|
|
25
28
|
is properly configured: e.g., embed a single a element"""
|
|
@@ -64,14 +67,14 @@ class BaseEmbeddingEncoder(BaseEncoder, ABC):
|
|
|
64
67
|
elements = elements.copy()
|
|
65
68
|
elements_with_text = [e for e in elements if e.get("text")]
|
|
66
69
|
texts = [e["text"] for e in elements_with_text]
|
|
67
|
-
|
|
70
|
+
all_embeddings = []
|
|
68
71
|
try:
|
|
69
72
|
for batch in batch_generator(texts, batch_size=self.config.batch_size or len(texts)):
|
|
70
|
-
|
|
71
|
-
|
|
73
|
+
embeddings_batch = self.embed_batch(client=client, batch=batch)
|
|
74
|
+
all_embeddings.extend(embeddings_batch)
|
|
72
75
|
except Exception as e:
|
|
73
76
|
raise self.wrap_error(e=e)
|
|
74
|
-
for element, embedding in zip(elements_with_text,
|
|
77
|
+
for element, embedding in zip(elements_with_text, all_embeddings, strict=True):
|
|
75
78
|
element[EMBEDDINGS_KEY] = embedding
|
|
76
79
|
return elements
|
|
77
80
|
|
|
@@ -120,14 +123,14 @@ class AsyncBaseEmbeddingEncoder(BaseEncoder, ABC):
|
|
|
120
123
|
elements = elements.copy()
|
|
121
124
|
elements_with_text = [e for e in elements if e.get("text")]
|
|
122
125
|
texts = [e["text"] for e in elements_with_text]
|
|
123
|
-
|
|
126
|
+
all_embeddings = []
|
|
124
127
|
try:
|
|
125
128
|
for batch in batch_generator(texts, batch_size=self.config.batch_size or len(texts)):
|
|
126
|
-
|
|
127
|
-
|
|
129
|
+
embeddings_batch = await self.embed_batch(client=client, batch=batch)
|
|
130
|
+
all_embeddings.extend(embeddings_batch)
|
|
128
131
|
except Exception as e:
|
|
129
132
|
raise self.wrap_error(e=e)
|
|
130
|
-
for element, embedding in zip(elements_with_text,
|
|
133
|
+
for element, embedding in zip(elements_with_text, all_embeddings, strict=True):
|
|
131
134
|
element[EMBEDDINGS_KEY] = embedding
|
|
132
135
|
return elements
|
|
133
136
|
|