unstructured-ingest 1.0.21__tar.gz → 1.0.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/PKG-INFO +2 -2
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/pyproject.toml +2 -2
- unstructured_ingest-1.0.23/unstructured_ingest/__version__.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/mixedbreadai.py +28 -45
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/jira.py +188 -170
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +22 -3
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +1 -1
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +1 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +1 -0
- unstructured_ingest-1.0.21/unstructured_ingest/__version__.py +0 -1
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/.gitignore +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/LICENSE.md +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/README.md +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/README.md +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/importer.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/cmds.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/utils/click.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/data_types/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/data_types/entities.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/data_types/file_data.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/azure_openai.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/bedrock.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/huggingface.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/interfaces.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/octoai.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/openai.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/togetherai.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/vertexai.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/voyageai.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/error.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/errors_v2.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/connector.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/downloader.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/indexer.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/process.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/processor.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/upload_stager.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/uploader.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/logger.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/main.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/otel.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/otel.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/download.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/embed.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/filter.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/index.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/partition.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/stage.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/upload.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/chunker.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connector_registry.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/airtable.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/astradb.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/chroma.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/confluence.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/discord.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/github.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/google_drive.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/local.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/milvus.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -1
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/outlook.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/slack.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/utils.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/vectara.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/embedder.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/filter.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/partitioner.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/uncompress.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/unstructured_api.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/chunking.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/compression.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/constants.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/dep_check.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/html.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/ndjson.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/pydantic_models.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/table.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unstructured_ingest
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.23
|
|
4
4
|
Summary: Local ETL data pipeline to get data RAG ready
|
|
5
5
|
Author-email: Unstructured Technologies <devops@unstructuredai.io>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -114,7 +114,7 @@ Requires-Dist: unstructured[md]; extra == 'md'
|
|
|
114
114
|
Provides-Extra: milvus
|
|
115
115
|
Requires-Dist: pymilvus; extra == 'milvus'
|
|
116
116
|
Provides-Extra: mixedbreadai
|
|
117
|
-
Requires-Dist: mixedbread
|
|
117
|
+
Requires-Dist: mixedbread; extra == 'mixedbreadai'
|
|
118
118
|
Provides-Extra: mongodb
|
|
119
119
|
Requires-Dist: pymongo; extra == 'mongodb'
|
|
120
120
|
Provides-Extra: msg
|
|
@@ -136,14 +136,14 @@ test = [
|
|
|
136
136
|
"deepdiff",
|
|
137
137
|
"bs4",
|
|
138
138
|
"pandas",
|
|
139
|
-
|
|
140
139
|
# Connector specific deps
|
|
141
140
|
"cryptography",
|
|
142
141
|
"fsspec",
|
|
143
142
|
"vertexai",
|
|
144
143
|
"pyiceberg",
|
|
145
144
|
"pyarrow",
|
|
146
|
-
"networkx"
|
|
145
|
+
"networkx",
|
|
146
|
+
"htmlbuilder",
|
|
147
147
|
]
|
|
148
148
|
# Add constraints needed for CI
|
|
149
149
|
ci = [
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.23" # pragma: no cover
|
{unstructured_ingest-1.0.21 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/mixedbreadai.py
RENAMED
|
@@ -19,8 +19,7 @@ TRUNCATION_STRATEGY = "end"
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
|
-
from
|
|
23
|
-
from mixedbread_ai.core import RequestOptions
|
|
22
|
+
from mixedbread import AsyncMixedbread, Mixedbread
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
|
|
@@ -44,31 +43,33 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
|
|
|
44
43
|
)
|
|
45
44
|
|
|
46
45
|
@requires_dependencies(
|
|
47
|
-
["
|
|
48
|
-
extras="mixedbreadai",
|
|
46
|
+
["mixedbread"],
|
|
47
|
+
extras="embed-mixedbreadai",
|
|
49
48
|
)
|
|
50
|
-
def get_client(self) -> "
|
|
49
|
+
def get_client(self) -> "Mixedbread":
|
|
51
50
|
"""
|
|
52
51
|
Create the Mixedbread AI client.
|
|
53
52
|
|
|
54
53
|
Returns:
|
|
55
|
-
|
|
54
|
+
Mixedbread: Initialized client.
|
|
56
55
|
"""
|
|
57
|
-
from
|
|
56
|
+
from mixedbread import Mixedbread
|
|
58
57
|
|
|
59
|
-
return
|
|
58
|
+
return Mixedbread(
|
|
60
59
|
api_key=self.api_key.get_secret_value(),
|
|
60
|
+
max_retries=MAX_RETRIES,
|
|
61
61
|
)
|
|
62
62
|
|
|
63
63
|
@requires_dependencies(
|
|
64
|
-
["
|
|
65
|
-
extras="mixedbreadai",
|
|
64
|
+
["mixedbread"],
|
|
65
|
+
extras="embed-mixedbreadai",
|
|
66
66
|
)
|
|
67
|
-
def get_async_client(self) -> "
|
|
68
|
-
from
|
|
67
|
+
def get_async_client(self) -> "AsyncMixedbread":
|
|
68
|
+
from mixedbread import AsyncMixedbread
|
|
69
69
|
|
|
70
|
-
return
|
|
70
|
+
return AsyncMixedbread(
|
|
71
71
|
api_key=self.api_key.get_secret_value(),
|
|
72
|
+
max_retries=MAX_RETRIES,
|
|
72
73
|
)
|
|
73
74
|
|
|
74
75
|
|
|
@@ -88,29 +89,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
|
|
|
88
89
|
return self.embed_query(query="Q")
|
|
89
90
|
|
|
90
91
|
@requires_dependencies(
|
|
91
|
-
["
|
|
92
|
+
["mixedbread"],
|
|
92
93
|
extras="embed-mixedbreadai",
|
|
93
94
|
)
|
|
94
|
-
def
|
|
95
|
-
from mixedbread_ai.core import RequestOptions
|
|
96
|
-
|
|
97
|
-
return RequestOptions(
|
|
98
|
-
max_retries=MAX_RETRIES,
|
|
99
|
-
timeout_in_seconds=TIMEOUT,
|
|
100
|
-
additional_headers={"User-Agent": USER_AGENT},
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
def get_client(self) -> "MixedbreadAI":
|
|
95
|
+
def get_client(self) -> "Mixedbread":
|
|
104
96
|
return self.config.get_client()
|
|
105
97
|
|
|
106
|
-
def embed_batch(self, client: "
|
|
107
|
-
response = client.
|
|
98
|
+
def embed_batch(self, client: "Mixedbread", batch: list[str]) -> list[list[float]]:
|
|
99
|
+
response = client.embed(
|
|
108
100
|
model=self.config.embedder_model_name,
|
|
101
|
+
input=batch,
|
|
109
102
|
normalized=True,
|
|
110
103
|
encoding_format=ENCODING_FORMAT,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
input=batch,
|
|
104
|
+
extra_headers={"User-Agent": USER_AGENT},
|
|
105
|
+
timeout=TIMEOUT,
|
|
114
106
|
)
|
|
115
107
|
return [datum.embedding for datum in response.data]
|
|
116
108
|
|
|
@@ -124,28 +116,19 @@ class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
|
|
|
124
116
|
return await self.embed_query(query="Q")
|
|
125
117
|
|
|
126
118
|
@requires_dependencies(
|
|
127
|
-
["
|
|
119
|
+
["mixedbread"],
|
|
128
120
|
extras="embed-mixedbreadai",
|
|
129
121
|
)
|
|
130
|
-
def
|
|
131
|
-
from mixedbread_ai.core import RequestOptions
|
|
132
|
-
|
|
133
|
-
return RequestOptions(
|
|
134
|
-
max_retries=MAX_RETRIES,
|
|
135
|
-
timeout_in_seconds=TIMEOUT,
|
|
136
|
-
additional_headers={"User-Agent": USER_AGENT},
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
def get_client(self) -> "AsyncMixedbreadAI":
|
|
122
|
+
def get_client(self) -> "AsyncMixedbread":
|
|
140
123
|
return self.config.get_async_client()
|
|
141
124
|
|
|
142
|
-
async def embed_batch(self, client: "
|
|
143
|
-
response = await client.
|
|
125
|
+
async def embed_batch(self, client: "AsyncMixedbread", batch: list[str]) -> list[list[float]]:
|
|
126
|
+
response = await client.embed(
|
|
144
127
|
model=self.config.embedder_model_name,
|
|
128
|
+
input=batch,
|
|
145
129
|
normalized=True,
|
|
146
130
|
encoding_format=ENCODING_FORMAT,
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
input=batch,
|
|
131
|
+
extra_headers={"User-Agent": USER_AGENT},
|
|
132
|
+
timeout=TIMEOUT,
|
|
150
133
|
)
|
|
151
134
|
return [datum.embedding for datum in response.data]
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import math
|
|
2
1
|
from collections import abc
|
|
3
2
|
from contextlib import contextmanager
|
|
4
3
|
from dataclasses import dataclass, field
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from
|
|
5
|
+
from time import time
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Generator, List, Optional, Union, cast
|
|
7
7
|
|
|
8
|
-
from pydantic import Field, Secret
|
|
8
|
+
from pydantic import BaseModel, Field, Secret
|
|
9
9
|
|
|
10
10
|
from unstructured_ingest.data_types.file_data import (
|
|
11
11
|
FileData,
|
|
@@ -21,6 +21,7 @@ from unstructured_ingest.interfaces import (
|
|
|
21
21
|
DownloadResponse,
|
|
22
22
|
Indexer,
|
|
23
23
|
IndexerConfig,
|
|
24
|
+
download_responses,
|
|
24
25
|
)
|
|
25
26
|
from unstructured_ingest.logger import logger
|
|
26
27
|
from unstructured_ingest.processes.connector_registry import (
|
|
@@ -37,24 +38,13 @@ DEFAULT_C_SEP = " " * 5
|
|
|
37
38
|
DEFAULT_R_SEP = "\n"
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
|
|
41
|
-
class JiraIssueMetadata:
|
|
41
|
+
class JiraIssueMetadata(BaseModel):
|
|
42
42
|
id: str
|
|
43
43
|
key: str
|
|
44
|
-
board_id: Optional[str] = None
|
|
45
44
|
|
|
46
|
-
|
|
47
|
-
def project_id(self) -> str:
|
|
45
|
+
def get_project_id(self) -> str:
|
|
48
46
|
return self.key.split("-")[0]
|
|
49
47
|
|
|
50
|
-
def to_dict(self) -> Dict[str, Union[str, None]]:
|
|
51
|
-
return {
|
|
52
|
-
"id": self.id,
|
|
53
|
-
"key": self.key,
|
|
54
|
-
"board_id": self.board_id,
|
|
55
|
-
"project_id": self.project_id,
|
|
56
|
-
}
|
|
57
|
-
|
|
58
48
|
|
|
59
49
|
class FieldGetter(dict):
|
|
60
50
|
def __getitem__(self, key):
|
|
@@ -77,52 +67,32 @@ def nested_object_to_field_getter(obj: dict) -> Union[FieldGetter, dict]:
|
|
|
77
67
|
return obj
|
|
78
68
|
|
|
79
69
|
|
|
80
|
-
def
|
|
81
|
-
""
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
num_iterations = math.ceil(number_of_issues_to_fetch / kwargs["limit"])
|
|
107
|
-
|
|
108
|
-
for _ in range(num_iterations):
|
|
109
|
-
response = func(*args, **kwargs)
|
|
110
|
-
if isinstance(response, list):
|
|
111
|
-
all_results += response
|
|
112
|
-
elif isinstance(response, dict):
|
|
113
|
-
if results_key not in response:
|
|
114
|
-
raise KeyError(f'Response object is missing "{results_key}" key.')
|
|
115
|
-
all_results += response[results_key]
|
|
116
|
-
else:
|
|
117
|
-
raise TypeError(
|
|
118
|
-
f"""Unexpected response type from Jira API.
|
|
119
|
-
Response type has to be either list or dict, got: {type(response).__name__}."""
|
|
120
|
-
)
|
|
121
|
-
kwargs["start"] += kwargs["limit"]
|
|
122
|
-
|
|
123
|
-
return all_results
|
|
124
|
-
|
|
125
|
-
return wrapper
|
|
70
|
+
def api_token_based_generator(
|
|
71
|
+
fn: Callable, key: str = "issues", **kwargs
|
|
72
|
+
) -> Generator[dict, None, None]:
|
|
73
|
+
nextPageToken = kwargs.pop("nextPageToken", None)
|
|
74
|
+
while True:
|
|
75
|
+
resp = fn(nextPageToken=nextPageToken, **kwargs)
|
|
76
|
+
issues = resp.get(key, [])
|
|
77
|
+
for issue in issues:
|
|
78
|
+
yield issue
|
|
79
|
+
nextPageToken = resp.get("nextPageToken")
|
|
80
|
+
if not nextPageToken:
|
|
81
|
+
break
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def api_page_based_generator(
|
|
85
|
+
fn: Callable, key: str = "issues", **kwargs
|
|
86
|
+
) -> Generator[dict, None, None]:
|
|
87
|
+
start = kwargs.pop("start", 0)
|
|
88
|
+
while True:
|
|
89
|
+
resp = fn(start=start, **kwargs)
|
|
90
|
+
issues = resp.get(key, [])
|
|
91
|
+
if not issues:
|
|
92
|
+
break
|
|
93
|
+
for issue in issues:
|
|
94
|
+
yield issue
|
|
95
|
+
start += len(issues)
|
|
126
96
|
|
|
127
97
|
|
|
128
98
|
class JiraAccessConfig(AccessConfig):
|
|
@@ -201,9 +171,17 @@ class JiraConnectionConfig(ConnectionConfig):
|
|
|
201
171
|
|
|
202
172
|
|
|
203
173
|
class JiraIndexerConfig(IndexerConfig):
|
|
204
|
-
projects: Optional[
|
|
205
|
-
boards: Optional[
|
|
206
|
-
issues: Optional[
|
|
174
|
+
projects: Optional[list[str]] = Field(None, description="List of project keys")
|
|
175
|
+
boards: Optional[list[str]] = Field(None, description="List of board IDs")
|
|
176
|
+
issues: Optional[list[str]] = Field(None, description="List of issue keys or IDs")
|
|
177
|
+
status_filters: Optional[list[str]] = Field(
|
|
178
|
+
default=None,
|
|
179
|
+
description="List of status filters, if provided will only return issues that have these statuses", # noqa: E501
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def model_post_init(self, context: Any, /) -> None:
|
|
183
|
+
if not self.projects and not self.boards and not self.issues:
|
|
184
|
+
raise ValueError("At least one of projects, boards, or issues must be provided.")
|
|
207
185
|
|
|
208
186
|
|
|
209
187
|
@dataclass
|
|
@@ -228,122 +206,103 @@ class JiraIndexer(Indexer):
|
|
|
228
206
|
)
|
|
229
207
|
logger.info("Connection to Jira successful.")
|
|
230
208
|
|
|
231
|
-
def
|
|
209
|
+
def _get_issues_within_projects(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
232
210
|
with self.connection_config.get_client() as client:
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
if not number_of_issues_to_fetch:
|
|
239
|
-
logger.warning(f"No issues found in project: {project_key}. Skipping!")
|
|
240
|
-
return []
|
|
241
|
-
get_project_issues = issues_fetcher_wrapper(
|
|
242
|
-
client.get_all_project_issues,
|
|
243
|
-
results_key="issues",
|
|
244
|
-
number_of_issues_to_fetch=number_of_issues_to_fetch,
|
|
245
|
-
)
|
|
246
|
-
issues = get_project_issues(project=project_key, fields=["key", "id"])
|
|
247
|
-
logger.debug(f"Found {len(issues)} issues in project: {project_key}")
|
|
248
|
-
return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
|
|
249
|
-
|
|
250
|
-
def _get_issues_within_projects(self) -> List[JiraIssueMetadata]:
|
|
251
|
-
project_keys = self.index_config.projects
|
|
252
|
-
if not project_keys:
|
|
253
|
-
# for when a component list is provided, without any projects
|
|
254
|
-
if self.index_config.boards or self.index_config.issues:
|
|
255
|
-
return []
|
|
256
|
-
# for when no components are provided. all projects will be ingested
|
|
257
|
-
else:
|
|
258
|
-
with self.connection_config.get_client() as client:
|
|
259
|
-
project_keys = [project["key"] for project in client.projects()]
|
|
260
|
-
return [
|
|
261
|
-
issue
|
|
262
|
-
for project_key in project_keys
|
|
263
|
-
for issue in self._get_issues_within_single_project(project_key)
|
|
264
|
-
]
|
|
211
|
+
fields = ["key", "id"]
|
|
212
|
+
jql = "project in ({})".format(", ".join(self.index_config.projects))
|
|
213
|
+
jql = self._update_jql(jql)
|
|
214
|
+
for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
|
|
215
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
265
216
|
|
|
266
217
|
def _get_issues_within_single_board(self, board_id: str) -> List[JiraIssueMetadata]:
|
|
267
218
|
with self.connection_config.get_client() as client:
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
219
|
+
fields = ["key", "id"]
|
|
220
|
+
if self.index_config.status_filters:
|
|
221
|
+
jql = "status in ({}) ORDER BY id".format(
|
|
222
|
+
", ".join([f'"{s}"' for s in self.index_config.status_filters])
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
jql = "ORDER BY id"
|
|
226
|
+
for issue in api_page_based_generator(
|
|
227
|
+
fn=client.get_issues_for_board, board_id=board_id, fields=fields, jql=jql
|
|
228
|
+
):
|
|
229
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
230
|
+
|
|
231
|
+
def _get_issues_within_boards(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
280
232
|
if not self.index_config.boards:
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
issue
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
]
|
|
294
|
-
return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
|
|
295
|
-
|
|
296
|
-
def get_issues(self) -> List[JiraIssueMetadata]:
|
|
297
|
-
issues = [
|
|
298
|
-
*self._get_issues_within_boards(),
|
|
299
|
-
*self._get_issues_within_projects(),
|
|
300
|
-
*self._get_issues(),
|
|
301
|
-
]
|
|
302
|
-
# Select unique issues by issue 'id'.
|
|
303
|
-
# Since boards issues are fetched first,
|
|
304
|
-
# if there are duplicates, the board issues will be kept,
|
|
305
|
-
# in order to keep issue 'board_id' information.
|
|
306
|
-
seen = set()
|
|
307
|
-
unique_issues: List[JiraIssueMetadata] = []
|
|
308
|
-
for issue in issues:
|
|
309
|
-
if issue.id not in seen:
|
|
310
|
-
unique_issues.append(issue)
|
|
311
|
-
seen.add(issue.id)
|
|
312
|
-
return unique_issues
|
|
233
|
+
yield
|
|
234
|
+
for board_id in self.index_config.boards:
|
|
235
|
+
for issue in self._get_issues_within_single_board(board_id=board_id):
|
|
236
|
+
yield issue
|
|
237
|
+
|
|
238
|
+
def _update_jql(self, jql: str) -> str:
|
|
239
|
+
if self.index_config.status_filters:
|
|
240
|
+
jql += " and status in ({})".format(
|
|
241
|
+
", ".join([f'"{s}"' for s in self.index_config.status_filters])
|
|
242
|
+
)
|
|
243
|
+
jql = jql + " ORDER BY id"
|
|
244
|
+
return jql
|
|
313
245
|
|
|
314
|
-
def
|
|
315
|
-
|
|
246
|
+
def _get_issues_by_keys(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
247
|
+
with self.connection_config.get_client() as client:
|
|
248
|
+
fields = ["key", "id"]
|
|
249
|
+
jql = "key in ({})".format(", ".join(self.index_config.issues))
|
|
250
|
+
jql = self._update_jql(jql)
|
|
251
|
+
for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
|
|
252
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
253
|
+
|
|
254
|
+
def _create_file_data_from_issue(self, issue: JiraIssueMetadata) -> FileData:
|
|
255
|
+
# Build metadata
|
|
256
|
+
metadata = FileDataSourceMetadata(
|
|
257
|
+
date_processed=str(time()),
|
|
258
|
+
record_locator=issue.model_dump(),
|
|
259
|
+
)
|
|
316
260
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
metadata = FileDataSourceMetadata(
|
|
321
|
-
date_processed=str(time()),
|
|
322
|
-
record_locator=issue.to_dict(),
|
|
323
|
-
)
|
|
261
|
+
# Construct relative path and filename
|
|
262
|
+
filename = f"{issue.id}.txt"
|
|
263
|
+
relative_path = str(Path(issue.get_project_id()) / filename)
|
|
324
264
|
|
|
325
|
-
|
|
326
|
-
filename
|
|
327
|
-
relative_path
|
|
265
|
+
source_identifiers = SourceIdentifiers(
|
|
266
|
+
filename=filename,
|
|
267
|
+
fullpath=relative_path,
|
|
268
|
+
rel_path=relative_path,
|
|
269
|
+
)
|
|
328
270
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
)
|
|
271
|
+
file_data = FileData(
|
|
272
|
+
identifier=issue.id,
|
|
273
|
+
connector_type=self.connector_type,
|
|
274
|
+
metadata=metadata,
|
|
275
|
+
additional_metadata=issue.model_dump(),
|
|
276
|
+
source_identifiers=source_identifiers,
|
|
277
|
+
)
|
|
278
|
+
return file_data
|
|
279
|
+
|
|
280
|
+
def get_generators(self) -> List[Callable]:
|
|
281
|
+
generators = []
|
|
282
|
+
if self.index_config.boards:
|
|
283
|
+
generators.append(self._get_issues_within_boards)
|
|
284
|
+
if self.index_config.issues:
|
|
285
|
+
generators.append(self._get_issues_by_keys)
|
|
286
|
+
if self.index_config.projects:
|
|
287
|
+
generators.append(self._get_issues_within_projects)
|
|
288
|
+
return generators
|
|
334
289
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
290
|
+
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
291
|
+
seen_keys = []
|
|
292
|
+
for gen in self.get_generators():
|
|
293
|
+
for issue in gen():
|
|
294
|
+
if not issue:
|
|
295
|
+
continue
|
|
296
|
+
if issue.key in seen_keys:
|
|
297
|
+
continue
|
|
298
|
+
seen_keys.append(issue.key)
|
|
299
|
+
yield self._create_file_data_from_issue(issue=issue)
|
|
343
300
|
|
|
344
301
|
|
|
345
302
|
class JiraDownloaderConfig(DownloaderConfig):
|
|
346
|
-
|
|
303
|
+
download_attachments: bool = Field(
|
|
304
|
+
default=False, description="If True, will download any attachments and process as well"
|
|
305
|
+
)
|
|
347
306
|
|
|
348
307
|
|
|
349
308
|
@dataclass
|
|
@@ -448,7 +407,56 @@ class JiraDownloader(Downloader):
|
|
|
448
407
|
logger.error(f"Failed to fetch issue with key: {issue_key}: {e}", exc_info=True)
|
|
449
408
|
raise SourceConnectionError(f"Failed to fetch issue with key: {issue_key}: {e}")
|
|
450
409
|
|
|
451
|
-
def
|
|
410
|
+
def generate_attachment_file_data(
|
|
411
|
+
self, attachment_dict: dict, parent_filedata: FileData
|
|
412
|
+
) -> FileData:
|
|
413
|
+
new_filedata = parent_filedata.model_copy(deep=True)
|
|
414
|
+
if new_filedata.metadata.record_locator is None:
|
|
415
|
+
new_filedata.metadata.record_locator = {}
|
|
416
|
+
new_filedata.metadata.record_locator["parent_issue"] = (
|
|
417
|
+
parent_filedata.metadata.record_locator["id"]
|
|
418
|
+
)
|
|
419
|
+
# Append an identifier for attachment to not conflict with issue ids
|
|
420
|
+
new_filedata.identifier = "{}a".format(attachment_dict["id"])
|
|
421
|
+
filename = attachment_dict["filename"]
|
|
422
|
+
new_filedata.metadata.filesize_bytes = attachment_dict.pop("size", None)
|
|
423
|
+
new_filedata.metadata.date_created = attachment_dict.pop("created", None)
|
|
424
|
+
new_filedata.metadata.url = attachment_dict.pop("self", None)
|
|
425
|
+
new_filedata.metadata.record_locator = attachment_dict
|
|
426
|
+
new_filedata.source_identifiers = SourceIdentifiers(
|
|
427
|
+
filename=filename,
|
|
428
|
+
fullpath=(Path(str(attachment_dict["id"])) / Path(filename)).as_posix(),
|
|
429
|
+
)
|
|
430
|
+
return new_filedata
|
|
431
|
+
|
|
432
|
+
def process_attachments(
|
|
433
|
+
self, file_data: FileData, attachments: list[dict]
|
|
434
|
+
) -> list[DownloadResponse]:
|
|
435
|
+
with self.connection_config.get_client() as client:
|
|
436
|
+
download_path = self.get_download_path(file_data)
|
|
437
|
+
attachment_download_dir = download_path.parent / "attachments"
|
|
438
|
+
attachment_download_dir.mkdir(parents=True, exist_ok=True)
|
|
439
|
+
download_responses = []
|
|
440
|
+
for attachment in attachments:
|
|
441
|
+
attachment_filename = Path(attachment["filename"])
|
|
442
|
+
attachment_id = attachment["id"]
|
|
443
|
+
attachment_download_path = attachment_download_dir / Path(
|
|
444
|
+
attachment_id
|
|
445
|
+
).with_suffix(attachment_filename.suffix)
|
|
446
|
+
resp = client.get_attachment_content(attachment_id=attachment_id)
|
|
447
|
+
with open(attachment_download_path, "wb") as f:
|
|
448
|
+
f.write(resp)
|
|
449
|
+
attachment_filedata = self.generate_attachment_file_data(
|
|
450
|
+
attachment_dict=attachment, parent_filedata=file_data
|
|
451
|
+
)
|
|
452
|
+
download_responses.append(
|
|
453
|
+
self.generate_download_response(
|
|
454
|
+
file_data=attachment_filedata, download_path=attachment_download_path
|
|
455
|
+
)
|
|
456
|
+
)
|
|
457
|
+
return download_responses
|
|
458
|
+
|
|
459
|
+
def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
|
|
452
460
|
issue_key = file_data.additional_metadata.get("key")
|
|
453
461
|
if not issue_key:
|
|
454
462
|
raise ValueError("Issue key not found in metadata.")
|
|
@@ -463,7 +471,17 @@ class JiraDownloader(Downloader):
|
|
|
463
471
|
with open(download_path, "w") as f:
|
|
464
472
|
f.write(issue_str)
|
|
465
473
|
self.update_file_data(file_data, issue)
|
|
466
|
-
|
|
474
|
+
download_response = self.generate_download_response(
|
|
475
|
+
file_data=file_data, download_path=download_path
|
|
476
|
+
)
|
|
477
|
+
if self.download_config.download_attachments and (
|
|
478
|
+
attachments := issue.get("fields", {}).get("attachment")
|
|
479
|
+
):
|
|
480
|
+
attachment_responses = self.process_attachments(
|
|
481
|
+
file_data=file_data, attachments=attachments
|
|
482
|
+
)
|
|
483
|
+
download_response = [download_response] + attachment_responses
|
|
484
|
+
return download_response
|
|
467
485
|
|
|
468
486
|
|
|
469
487
|
jira_source_entry = SourceRegistryEntry(
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
|
+
from unstructured_ingest.logger import logger
|
|
3
4
|
from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
|
|
4
5
|
|
|
5
6
|
from .checkbox import Checkbox, CheckboxCell
|
|
@@ -25,6 +26,13 @@ from .unique_id import UniqueID, UniqueIDCell
|
|
|
25
26
|
from .url import URL, URLCell
|
|
26
27
|
from .verification import Verification, VerificationCell
|
|
27
28
|
|
|
29
|
+
# It's possible to add 'button' property to Notion database.
|
|
30
|
+
# However, current Notion API documentation doesn't mention it.
|
|
31
|
+
# Buttons are only functional inside Notion UI. We can simply
|
|
32
|
+
# ignore them so that the we don't throw an error when trying
|
|
33
|
+
# to map 'button' properties.
|
|
34
|
+
unsupported_db_prop_types = ["button"]
|
|
35
|
+
|
|
28
36
|
db_prop_type_mapping = {
|
|
29
37
|
"checkbox": Checkbox,
|
|
30
38
|
"created_by": CreatedBy,
|
|
@@ -55,7 +63,13 @@ def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
|
|
|
55
63
|
mapped_dict = {}
|
|
56
64
|
for k, v in props.items():
|
|
57
65
|
try:
|
|
58
|
-
|
|
66
|
+
property_type = v["type"]
|
|
67
|
+
if property_type in unsupported_db_prop_types:
|
|
68
|
+
logger.warning(
|
|
69
|
+
f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
|
|
70
|
+
)
|
|
71
|
+
continue
|
|
72
|
+
mapped_dict[k] = db_prop_type_mapping[property_type].from_dict(v) # type: ignore
|
|
59
73
|
except KeyError as ke:
|
|
60
74
|
raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
|
|
61
75
|
|
|
@@ -92,8 +106,13 @@ def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
|
|
|
92
106
|
mapped_dict = {}
|
|
93
107
|
for k, v in props.items():
|
|
94
108
|
try:
|
|
95
|
-
|
|
96
|
-
|
|
109
|
+
property_type = v["type"]
|
|
110
|
+
if property_type in unsupported_db_prop_types:
|
|
111
|
+
logger.warning(
|
|
112
|
+
f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
|
|
113
|
+
)
|
|
114
|
+
continue
|
|
115
|
+
mapped_dict[k] = db_cell_type_mapping[property_type].from_dict(v) # type: ignore
|
|
97
116
|
except KeyError as ke:
|
|
98
117
|
raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
|
|
99
118
|
|