unstructured-ingest 1.0.19__tar.gz → 1.0.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/PKG-INFO +2 -2
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/pyproject.toml +2 -2
- unstructured_ingest-1.0.23/unstructured_ingest/__version__.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/mixedbreadai.py +28 -45
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/jira.py +209 -171
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +22 -3
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +1 -1
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +1 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +30 -13
- unstructured_ingest-1.0.19/unstructured_ingest/__version__.py +0 -1
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/.gitignore +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/LICENSE.md +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/README.md +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/README.md +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/importer.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/cmds.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/utils/click.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/data_types/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/data_types/entities.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/data_types/file_data.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/azure_openai.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/bedrock.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/huggingface.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/interfaces.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/octoai.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/openai.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/togetherai.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/vertexai.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/voyageai.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/error.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/errors_v2.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/connector.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/downloader.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/indexer.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/process.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/processor.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/upload_stager.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/interfaces/uploader.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/logger.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/main.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/otel.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/otel.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/download.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/embed.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/filter.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/index.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/partition.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/stage.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/pipeline/steps/upload.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/chunker.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connector_registry.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/airtable.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/astradb.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/chroma.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/confluence.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/discord.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/github.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/google_drive.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/local.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/milvus.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -1
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/outlook.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/slack.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/utils.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/vectara.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/embedder.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/filter.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/partitioner.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/uncompress.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/unstructured_api.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/chunking.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/compression.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/constants.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/dep_check.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/html.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/ndjson.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/pydantic_models.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/utils/table.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unstructured_ingest
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.23
|
|
4
4
|
Summary: Local ETL data pipeline to get data RAG ready
|
|
5
5
|
Author-email: Unstructured Technologies <devops@unstructuredai.io>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -114,7 +114,7 @@ Requires-Dist: unstructured[md]; extra == 'md'
|
|
|
114
114
|
Provides-Extra: milvus
|
|
115
115
|
Requires-Dist: pymilvus; extra == 'milvus'
|
|
116
116
|
Provides-Extra: mixedbreadai
|
|
117
|
-
Requires-Dist: mixedbread
|
|
117
|
+
Requires-Dist: mixedbread; extra == 'mixedbreadai'
|
|
118
118
|
Provides-Extra: mongodb
|
|
119
119
|
Requires-Dist: pymongo; extra == 'mongodb'
|
|
120
120
|
Provides-Extra: msg
|
|
@@ -136,14 +136,14 @@ test = [
|
|
|
136
136
|
"deepdiff",
|
|
137
137
|
"bs4",
|
|
138
138
|
"pandas",
|
|
139
|
-
|
|
140
139
|
# Connector specific deps
|
|
141
140
|
"cryptography",
|
|
142
141
|
"fsspec",
|
|
143
142
|
"vertexai",
|
|
144
143
|
"pyiceberg",
|
|
145
144
|
"pyarrow",
|
|
146
|
-
"networkx"
|
|
145
|
+
"networkx",
|
|
146
|
+
"htmlbuilder",
|
|
147
147
|
]
|
|
148
148
|
# Add constraints needed for CI
|
|
149
149
|
ci = [
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.23" # pragma: no cover
|
{unstructured_ingest-1.0.19 → unstructured_ingest-1.0.23}/unstructured_ingest/embed/mixedbreadai.py
RENAMED
|
@@ -19,8 +19,7 @@ TRUNCATION_STRATEGY = "end"
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
|
-
from
|
|
23
|
-
from mixedbread_ai.core import RequestOptions
|
|
22
|
+
from mixedbread import AsyncMixedbread, Mixedbread
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
|
|
@@ -44,31 +43,33 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
|
|
|
44
43
|
)
|
|
45
44
|
|
|
46
45
|
@requires_dependencies(
|
|
47
|
-
["
|
|
48
|
-
extras="mixedbreadai",
|
|
46
|
+
["mixedbread"],
|
|
47
|
+
extras="embed-mixedbreadai",
|
|
49
48
|
)
|
|
50
|
-
def get_client(self) -> "
|
|
49
|
+
def get_client(self) -> "Mixedbread":
|
|
51
50
|
"""
|
|
52
51
|
Create the Mixedbread AI client.
|
|
53
52
|
|
|
54
53
|
Returns:
|
|
55
|
-
|
|
54
|
+
Mixedbread: Initialized client.
|
|
56
55
|
"""
|
|
57
|
-
from
|
|
56
|
+
from mixedbread import Mixedbread
|
|
58
57
|
|
|
59
|
-
return
|
|
58
|
+
return Mixedbread(
|
|
60
59
|
api_key=self.api_key.get_secret_value(),
|
|
60
|
+
max_retries=MAX_RETRIES,
|
|
61
61
|
)
|
|
62
62
|
|
|
63
63
|
@requires_dependencies(
|
|
64
|
-
["
|
|
65
|
-
extras="mixedbreadai",
|
|
64
|
+
["mixedbread"],
|
|
65
|
+
extras="embed-mixedbreadai",
|
|
66
66
|
)
|
|
67
|
-
def get_async_client(self) -> "
|
|
68
|
-
from
|
|
67
|
+
def get_async_client(self) -> "AsyncMixedbread":
|
|
68
|
+
from mixedbread import AsyncMixedbread
|
|
69
69
|
|
|
70
|
-
return
|
|
70
|
+
return AsyncMixedbread(
|
|
71
71
|
api_key=self.api_key.get_secret_value(),
|
|
72
|
+
max_retries=MAX_RETRIES,
|
|
72
73
|
)
|
|
73
74
|
|
|
74
75
|
|
|
@@ -88,29 +89,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
|
|
|
88
89
|
return self.embed_query(query="Q")
|
|
89
90
|
|
|
90
91
|
@requires_dependencies(
|
|
91
|
-
["
|
|
92
|
+
["mixedbread"],
|
|
92
93
|
extras="embed-mixedbreadai",
|
|
93
94
|
)
|
|
94
|
-
def
|
|
95
|
-
from mixedbread_ai.core import RequestOptions
|
|
96
|
-
|
|
97
|
-
return RequestOptions(
|
|
98
|
-
max_retries=MAX_RETRIES,
|
|
99
|
-
timeout_in_seconds=TIMEOUT,
|
|
100
|
-
additional_headers={"User-Agent": USER_AGENT},
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
def get_client(self) -> "MixedbreadAI":
|
|
95
|
+
def get_client(self) -> "Mixedbread":
|
|
104
96
|
return self.config.get_client()
|
|
105
97
|
|
|
106
|
-
def embed_batch(self, client: "
|
|
107
|
-
response = client.
|
|
98
|
+
def embed_batch(self, client: "Mixedbread", batch: list[str]) -> list[list[float]]:
|
|
99
|
+
response = client.embed(
|
|
108
100
|
model=self.config.embedder_model_name,
|
|
101
|
+
input=batch,
|
|
109
102
|
normalized=True,
|
|
110
103
|
encoding_format=ENCODING_FORMAT,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
input=batch,
|
|
104
|
+
extra_headers={"User-Agent": USER_AGENT},
|
|
105
|
+
timeout=TIMEOUT,
|
|
114
106
|
)
|
|
115
107
|
return [datum.embedding for datum in response.data]
|
|
116
108
|
|
|
@@ -124,28 +116,19 @@ class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
|
|
|
124
116
|
return await self.embed_query(query="Q")
|
|
125
117
|
|
|
126
118
|
@requires_dependencies(
|
|
127
|
-
["
|
|
119
|
+
["mixedbread"],
|
|
128
120
|
extras="embed-mixedbreadai",
|
|
129
121
|
)
|
|
130
|
-
def
|
|
131
|
-
from mixedbread_ai.core import RequestOptions
|
|
132
|
-
|
|
133
|
-
return RequestOptions(
|
|
134
|
-
max_retries=MAX_RETRIES,
|
|
135
|
-
timeout_in_seconds=TIMEOUT,
|
|
136
|
-
additional_headers={"User-Agent": USER_AGENT},
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
def get_client(self) -> "AsyncMixedbreadAI":
|
|
122
|
+
def get_client(self) -> "AsyncMixedbread":
|
|
140
123
|
return self.config.get_async_client()
|
|
141
124
|
|
|
142
|
-
async def embed_batch(self, client: "
|
|
143
|
-
response = await client.
|
|
125
|
+
async def embed_batch(self, client: "AsyncMixedbread", batch: list[str]) -> list[list[float]]:
|
|
126
|
+
response = await client.embed(
|
|
144
127
|
model=self.config.embedder_model_name,
|
|
128
|
+
input=batch,
|
|
145
129
|
normalized=True,
|
|
146
130
|
encoding_format=ENCODING_FORMAT,
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
input=batch,
|
|
131
|
+
extra_headers={"User-Agent": USER_AGENT},
|
|
132
|
+
timeout=TIMEOUT,
|
|
150
133
|
)
|
|
151
134
|
return [datum.embedding for datum in response.data]
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import math
|
|
2
1
|
from collections import abc
|
|
3
2
|
from contextlib import contextmanager
|
|
4
3
|
from dataclasses import dataclass, field
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from
|
|
5
|
+
from time import time
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Generator, List, Optional, Union, cast
|
|
7
7
|
|
|
8
|
-
from pydantic import Field, Secret
|
|
8
|
+
from pydantic import BaseModel, Field, Secret
|
|
9
9
|
|
|
10
10
|
from unstructured_ingest.data_types.file_data import (
|
|
11
11
|
FileData,
|
|
@@ -21,6 +21,7 @@ from unstructured_ingest.interfaces import (
|
|
|
21
21
|
DownloadResponse,
|
|
22
22
|
Indexer,
|
|
23
23
|
IndexerConfig,
|
|
24
|
+
download_responses,
|
|
24
25
|
)
|
|
25
26
|
from unstructured_ingest.logger import logger
|
|
26
27
|
from unstructured_ingest.processes.connector_registry import (
|
|
@@ -37,24 +38,13 @@ DEFAULT_C_SEP = " " * 5
|
|
|
37
38
|
DEFAULT_R_SEP = "\n"
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
|
|
41
|
-
class JiraIssueMetadata:
|
|
41
|
+
class JiraIssueMetadata(BaseModel):
|
|
42
42
|
id: str
|
|
43
43
|
key: str
|
|
44
|
-
board_id: Optional[str] = None
|
|
45
44
|
|
|
46
|
-
|
|
47
|
-
def project_id(self) -> str:
|
|
45
|
+
def get_project_id(self) -> str:
|
|
48
46
|
return self.key.split("-")[0]
|
|
49
47
|
|
|
50
|
-
def to_dict(self) -> Dict[str, Union[str, None]]:
|
|
51
|
-
return {
|
|
52
|
-
"id": self.id,
|
|
53
|
-
"key": self.key,
|
|
54
|
-
"board_id": self.board_id,
|
|
55
|
-
"project_id": self.project_id,
|
|
56
|
-
}
|
|
57
|
-
|
|
58
48
|
|
|
59
49
|
class FieldGetter(dict):
|
|
60
50
|
def __getitem__(self, key):
|
|
@@ -77,52 +67,32 @@ def nested_object_to_field_getter(obj: dict) -> Union[FieldGetter, dict]:
|
|
|
77
67
|
return obj
|
|
78
68
|
|
|
79
69
|
|
|
80
|
-
def
|
|
81
|
-
""
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
num_iterations = math.ceil(number_of_issues_to_fetch / kwargs["limit"])
|
|
107
|
-
|
|
108
|
-
for _ in range(num_iterations):
|
|
109
|
-
response = func(*args, **kwargs)
|
|
110
|
-
if isinstance(response, list):
|
|
111
|
-
all_results += response
|
|
112
|
-
elif isinstance(response, dict):
|
|
113
|
-
if results_key not in response:
|
|
114
|
-
raise KeyError(f'Response object is missing "{results_key}" key.')
|
|
115
|
-
all_results += response[results_key]
|
|
116
|
-
else:
|
|
117
|
-
raise TypeError(
|
|
118
|
-
f"""Unexpected response type from Jira API.
|
|
119
|
-
Response type has to be either list or dict, got: {type(response).__name__}."""
|
|
120
|
-
)
|
|
121
|
-
kwargs["start"] += kwargs["limit"]
|
|
122
|
-
|
|
123
|
-
return all_results
|
|
124
|
-
|
|
125
|
-
return wrapper
|
|
70
|
+
def api_token_based_generator(
|
|
71
|
+
fn: Callable, key: str = "issues", **kwargs
|
|
72
|
+
) -> Generator[dict, None, None]:
|
|
73
|
+
nextPageToken = kwargs.pop("nextPageToken", None)
|
|
74
|
+
while True:
|
|
75
|
+
resp = fn(nextPageToken=nextPageToken, **kwargs)
|
|
76
|
+
issues = resp.get(key, [])
|
|
77
|
+
for issue in issues:
|
|
78
|
+
yield issue
|
|
79
|
+
nextPageToken = resp.get("nextPageToken")
|
|
80
|
+
if not nextPageToken:
|
|
81
|
+
break
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def api_page_based_generator(
|
|
85
|
+
fn: Callable, key: str = "issues", **kwargs
|
|
86
|
+
) -> Generator[dict, None, None]:
|
|
87
|
+
start = kwargs.pop("start", 0)
|
|
88
|
+
while True:
|
|
89
|
+
resp = fn(start=start, **kwargs)
|
|
90
|
+
issues = resp.get(key, [])
|
|
91
|
+
if not issues:
|
|
92
|
+
break
|
|
93
|
+
for issue in issues:
|
|
94
|
+
yield issue
|
|
95
|
+
start += len(issues)
|
|
126
96
|
|
|
127
97
|
|
|
128
98
|
class JiraAccessConfig(AccessConfig):
|
|
@@ -169,8 +139,28 @@ class JiraConnectionConfig(ConnectionConfig):
|
|
|
169
139
|
def get_client(self) -> Generator["Jira", None, None]:
|
|
170
140
|
from atlassian import Jira
|
|
171
141
|
|
|
142
|
+
class CustomJira(Jira):
|
|
143
|
+
"""
|
|
144
|
+
Custom Jira class to fix the issue with the get_project_issues_count method.
|
|
145
|
+
This class inherits from the original Jira class and overrides the method to
|
|
146
|
+
handle the response correctly.
|
|
147
|
+
Once the issue is fixed in the original library, this class can be removed.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def __init__(self, *args, **kwargs):
|
|
151
|
+
super().__init__(*args, **kwargs)
|
|
152
|
+
|
|
153
|
+
def get_project_issues_count(self, project: str) -> int:
|
|
154
|
+
jql = f'project = "{project}" '
|
|
155
|
+
response = self.jql(jql, fields="*none")
|
|
156
|
+
response = cast("dict", response)
|
|
157
|
+
if "total" in response:
|
|
158
|
+
return response["total"]
|
|
159
|
+
else:
|
|
160
|
+
return len(response["issues"])
|
|
161
|
+
|
|
172
162
|
access_configs = self.access_config.get_secret_value()
|
|
173
|
-
with
|
|
163
|
+
with CustomJira(
|
|
174
164
|
url=self.url,
|
|
175
165
|
username=self.username,
|
|
176
166
|
password=access_configs.password,
|
|
@@ -181,9 +171,17 @@ class JiraConnectionConfig(ConnectionConfig):
|
|
|
181
171
|
|
|
182
172
|
|
|
183
173
|
class JiraIndexerConfig(IndexerConfig):
|
|
184
|
-
projects: Optional[
|
|
185
|
-
boards: Optional[
|
|
186
|
-
issues: Optional[
|
|
174
|
+
projects: Optional[list[str]] = Field(None, description="List of project keys")
|
|
175
|
+
boards: Optional[list[str]] = Field(None, description="List of board IDs")
|
|
176
|
+
issues: Optional[list[str]] = Field(None, description="List of issue keys or IDs")
|
|
177
|
+
status_filters: Optional[list[str]] = Field(
|
|
178
|
+
default=None,
|
|
179
|
+
description="List of status filters, if provided will only return issues that have these statuses", # noqa: E501
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def model_post_init(self, context: Any, /) -> None:
|
|
183
|
+
if not self.projects and not self.boards and not self.issues:
|
|
184
|
+
raise ValueError("At least one of projects, boards, or issues must be provided.")
|
|
187
185
|
|
|
188
186
|
|
|
189
187
|
@dataclass
|
|
@@ -208,122 +206,103 @@ class JiraIndexer(Indexer):
|
|
|
208
206
|
)
|
|
209
207
|
logger.info("Connection to Jira successful.")
|
|
210
208
|
|
|
211
|
-
def
|
|
209
|
+
def _get_issues_within_projects(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
212
210
|
with self.connection_config.get_client() as client:
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
if not number_of_issues_to_fetch:
|
|
219
|
-
logger.warning(f"No issues found in project: {project_key}. Skipping!")
|
|
220
|
-
return []
|
|
221
|
-
get_project_issues = issues_fetcher_wrapper(
|
|
222
|
-
client.get_all_project_issues,
|
|
223
|
-
results_key="issues",
|
|
224
|
-
number_of_issues_to_fetch=number_of_issues_to_fetch,
|
|
225
|
-
)
|
|
226
|
-
issues = get_project_issues(project=project_key, fields=["key", "id"])
|
|
227
|
-
logger.debug(f"Found {len(issues)} issues in project: {project_key}")
|
|
228
|
-
return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
|
|
229
|
-
|
|
230
|
-
def _get_issues_within_projects(self) -> List[JiraIssueMetadata]:
|
|
231
|
-
project_keys = self.index_config.projects
|
|
232
|
-
if not project_keys:
|
|
233
|
-
# for when a component list is provided, without any projects
|
|
234
|
-
if self.index_config.boards or self.index_config.issues:
|
|
235
|
-
return []
|
|
236
|
-
# for when no components are provided. all projects will be ingested
|
|
237
|
-
else:
|
|
238
|
-
with self.connection_config.get_client() as client:
|
|
239
|
-
project_keys = [project["key"] for project in client.projects()]
|
|
240
|
-
return [
|
|
241
|
-
issue
|
|
242
|
-
for project_key in project_keys
|
|
243
|
-
for issue in self._get_issues_within_single_project(project_key)
|
|
244
|
-
]
|
|
211
|
+
fields = ["key", "id"]
|
|
212
|
+
jql = "project in ({})".format(", ".join(self.index_config.projects))
|
|
213
|
+
jql = self._update_jql(jql)
|
|
214
|
+
for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
|
|
215
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
245
216
|
|
|
246
217
|
def _get_issues_within_single_board(self, board_id: str) -> List[JiraIssueMetadata]:
|
|
247
218
|
with self.connection_config.get_client() as client:
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
219
|
+
fields = ["key", "id"]
|
|
220
|
+
if self.index_config.status_filters:
|
|
221
|
+
jql = "status in ({}) ORDER BY id".format(
|
|
222
|
+
", ".join([f'"{s}"' for s in self.index_config.status_filters])
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
jql = "ORDER BY id"
|
|
226
|
+
for issue in api_page_based_generator(
|
|
227
|
+
fn=client.get_issues_for_board, board_id=board_id, fields=fields, jql=jql
|
|
228
|
+
):
|
|
229
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
230
|
+
|
|
231
|
+
def _get_issues_within_boards(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
260
232
|
if not self.index_config.boards:
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
issue
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
]
|
|
274
|
-
return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
|
|
275
|
-
|
|
276
|
-
def get_issues(self) -> List[JiraIssueMetadata]:
|
|
277
|
-
issues = [
|
|
278
|
-
*self._get_issues_within_boards(),
|
|
279
|
-
*self._get_issues_within_projects(),
|
|
280
|
-
*self._get_issues(),
|
|
281
|
-
]
|
|
282
|
-
# Select unique issues by issue 'id'.
|
|
283
|
-
# Since boards issues are fetched first,
|
|
284
|
-
# if there are duplicates, the board issues will be kept,
|
|
285
|
-
# in order to keep issue 'board_id' information.
|
|
286
|
-
seen = set()
|
|
287
|
-
unique_issues: List[JiraIssueMetadata] = []
|
|
288
|
-
for issue in issues:
|
|
289
|
-
if issue.id not in seen:
|
|
290
|
-
unique_issues.append(issue)
|
|
291
|
-
seen.add(issue.id)
|
|
292
|
-
return unique_issues
|
|
233
|
+
yield
|
|
234
|
+
for board_id in self.index_config.boards:
|
|
235
|
+
for issue in self._get_issues_within_single_board(board_id=board_id):
|
|
236
|
+
yield issue
|
|
237
|
+
|
|
238
|
+
def _update_jql(self, jql: str) -> str:
|
|
239
|
+
if self.index_config.status_filters:
|
|
240
|
+
jql += " and status in ({})".format(
|
|
241
|
+
", ".join([f'"{s}"' for s in self.index_config.status_filters])
|
|
242
|
+
)
|
|
243
|
+
jql = jql + " ORDER BY id"
|
|
244
|
+
return jql
|
|
293
245
|
|
|
294
|
-
def
|
|
295
|
-
|
|
246
|
+
def _get_issues_by_keys(self) -> Generator[JiraIssueMetadata, None, None]:
|
|
247
|
+
with self.connection_config.get_client() as client:
|
|
248
|
+
fields = ["key", "id"]
|
|
249
|
+
jql = "key in ({})".format(", ".join(self.index_config.issues))
|
|
250
|
+
jql = self._update_jql(jql)
|
|
251
|
+
for issue in api_token_based_generator(client.enhanced_jql, jql=jql, fields=fields):
|
|
252
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
253
|
+
|
|
254
|
+
def _create_file_data_from_issue(self, issue: JiraIssueMetadata) -> FileData:
|
|
255
|
+
# Build metadata
|
|
256
|
+
metadata = FileDataSourceMetadata(
|
|
257
|
+
date_processed=str(time()),
|
|
258
|
+
record_locator=issue.model_dump(),
|
|
259
|
+
)
|
|
296
260
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
metadata = FileDataSourceMetadata(
|
|
301
|
-
date_processed=str(time()),
|
|
302
|
-
record_locator=issue.to_dict(),
|
|
303
|
-
)
|
|
261
|
+
# Construct relative path and filename
|
|
262
|
+
filename = f"{issue.id}.txt"
|
|
263
|
+
relative_path = str(Path(issue.get_project_id()) / filename)
|
|
304
264
|
|
|
305
|
-
|
|
306
|
-
filename
|
|
307
|
-
relative_path
|
|
265
|
+
source_identifiers = SourceIdentifiers(
|
|
266
|
+
filename=filename,
|
|
267
|
+
fullpath=relative_path,
|
|
268
|
+
rel_path=relative_path,
|
|
269
|
+
)
|
|
308
270
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
)
|
|
271
|
+
file_data = FileData(
|
|
272
|
+
identifier=issue.id,
|
|
273
|
+
connector_type=self.connector_type,
|
|
274
|
+
metadata=metadata,
|
|
275
|
+
additional_metadata=issue.model_dump(),
|
|
276
|
+
source_identifiers=source_identifiers,
|
|
277
|
+
)
|
|
278
|
+
return file_data
|
|
279
|
+
|
|
280
|
+
def get_generators(self) -> List[Callable]:
|
|
281
|
+
generators = []
|
|
282
|
+
if self.index_config.boards:
|
|
283
|
+
generators.append(self._get_issues_within_boards)
|
|
284
|
+
if self.index_config.issues:
|
|
285
|
+
generators.append(self._get_issues_by_keys)
|
|
286
|
+
if self.index_config.projects:
|
|
287
|
+
generators.append(self._get_issues_within_projects)
|
|
288
|
+
return generators
|
|
314
289
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
290
|
+
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
291
|
+
seen_keys = []
|
|
292
|
+
for gen in self.get_generators():
|
|
293
|
+
for issue in gen():
|
|
294
|
+
if not issue:
|
|
295
|
+
continue
|
|
296
|
+
if issue.key in seen_keys:
|
|
297
|
+
continue
|
|
298
|
+
seen_keys.append(issue.key)
|
|
299
|
+
yield self._create_file_data_from_issue(issue=issue)
|
|
323
300
|
|
|
324
301
|
|
|
325
302
|
class JiraDownloaderConfig(DownloaderConfig):
|
|
326
|
-
|
|
303
|
+
download_attachments: bool = Field(
|
|
304
|
+
default=False, description="If True, will download any attachments and process as well"
|
|
305
|
+
)
|
|
327
306
|
|
|
328
307
|
|
|
329
308
|
@dataclass
|
|
@@ -428,7 +407,56 @@ class JiraDownloader(Downloader):
|
|
|
428
407
|
logger.error(f"Failed to fetch issue with key: {issue_key}: {e}", exc_info=True)
|
|
429
408
|
raise SourceConnectionError(f"Failed to fetch issue with key: {issue_key}: {e}")
|
|
430
409
|
|
|
431
|
-
def
|
|
410
|
+
def generate_attachment_file_data(
|
|
411
|
+
self, attachment_dict: dict, parent_filedata: FileData
|
|
412
|
+
) -> FileData:
|
|
413
|
+
new_filedata = parent_filedata.model_copy(deep=True)
|
|
414
|
+
if new_filedata.metadata.record_locator is None:
|
|
415
|
+
new_filedata.metadata.record_locator = {}
|
|
416
|
+
new_filedata.metadata.record_locator["parent_issue"] = (
|
|
417
|
+
parent_filedata.metadata.record_locator["id"]
|
|
418
|
+
)
|
|
419
|
+
# Append an identifier for attachment to not conflict with issue ids
|
|
420
|
+
new_filedata.identifier = "{}a".format(attachment_dict["id"])
|
|
421
|
+
filename = attachment_dict["filename"]
|
|
422
|
+
new_filedata.metadata.filesize_bytes = attachment_dict.pop("size", None)
|
|
423
|
+
new_filedata.metadata.date_created = attachment_dict.pop("created", None)
|
|
424
|
+
new_filedata.metadata.url = attachment_dict.pop("self", None)
|
|
425
|
+
new_filedata.metadata.record_locator = attachment_dict
|
|
426
|
+
new_filedata.source_identifiers = SourceIdentifiers(
|
|
427
|
+
filename=filename,
|
|
428
|
+
fullpath=(Path(str(attachment_dict["id"])) / Path(filename)).as_posix(),
|
|
429
|
+
)
|
|
430
|
+
return new_filedata
|
|
431
|
+
|
|
432
|
+
def process_attachments(
|
|
433
|
+
self, file_data: FileData, attachments: list[dict]
|
|
434
|
+
) -> list[DownloadResponse]:
|
|
435
|
+
with self.connection_config.get_client() as client:
|
|
436
|
+
download_path = self.get_download_path(file_data)
|
|
437
|
+
attachment_download_dir = download_path.parent / "attachments"
|
|
438
|
+
attachment_download_dir.mkdir(parents=True, exist_ok=True)
|
|
439
|
+
download_responses = []
|
|
440
|
+
for attachment in attachments:
|
|
441
|
+
attachment_filename = Path(attachment["filename"])
|
|
442
|
+
attachment_id = attachment["id"]
|
|
443
|
+
attachment_download_path = attachment_download_dir / Path(
|
|
444
|
+
attachment_id
|
|
445
|
+
).with_suffix(attachment_filename.suffix)
|
|
446
|
+
resp = client.get_attachment_content(attachment_id=attachment_id)
|
|
447
|
+
with open(attachment_download_path, "wb") as f:
|
|
448
|
+
f.write(resp)
|
|
449
|
+
attachment_filedata = self.generate_attachment_file_data(
|
|
450
|
+
attachment_dict=attachment, parent_filedata=file_data
|
|
451
|
+
)
|
|
452
|
+
download_responses.append(
|
|
453
|
+
self.generate_download_response(
|
|
454
|
+
file_data=attachment_filedata, download_path=attachment_download_path
|
|
455
|
+
)
|
|
456
|
+
)
|
|
457
|
+
return download_responses
|
|
458
|
+
|
|
459
|
+
def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
|
|
432
460
|
issue_key = file_data.additional_metadata.get("key")
|
|
433
461
|
if not issue_key:
|
|
434
462
|
raise ValueError("Issue key not found in metadata.")
|
|
@@ -443,7 +471,17 @@ class JiraDownloader(Downloader):
|
|
|
443
471
|
with open(download_path, "w") as f:
|
|
444
472
|
f.write(issue_str)
|
|
445
473
|
self.update_file_data(file_data, issue)
|
|
446
|
-
|
|
474
|
+
download_response = self.generate_download_response(
|
|
475
|
+
file_data=file_data, download_path=download_path
|
|
476
|
+
)
|
|
477
|
+
if self.download_config.download_attachments and (
|
|
478
|
+
attachments := issue.get("fields", {}).get("attachment")
|
|
479
|
+
):
|
|
480
|
+
attachment_responses = self.process_attachments(
|
|
481
|
+
file_data=file_data, attachments=attachments
|
|
482
|
+
)
|
|
483
|
+
download_response = [download_response] + attachment_responses
|
|
484
|
+
return download_response
|
|
447
485
|
|
|
448
486
|
|
|
449
487
|
jira_source_entry = SourceRegistryEntry(
|