unstructured-ingest 1.0.40__tar.gz → 1.0.41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/PKG-INFO +1 -1
- unstructured_ingest-1.0.41/unstructured_ingest/__version__.py +1 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/airtable.py +1 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/astradb.py +5 -2
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/confluence.py +1 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/databricks/volumes.py +7 -5
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/discord.py +1 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +9 -2
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/gitlab.py +7 -6
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/jira.py +1 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/local.py +11 -11
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/mongodb.py +5 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/connector.py +2 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/onedrive.py +1 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/outlook.py +3 -2
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/salesforce.py +6 -4
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/slack.py +5 -3
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/sql/sql.py +6 -1
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +10 -6
- unstructured_ingest-1.0.40/unstructured_ingest/__version__.py +0 -1
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/.gitignore +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/LICENSE.md +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/README.md +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/pyproject.toml +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/README.md +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/importer.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/cmds.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/utils/click.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/data_types/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/data_types/entities.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/data_types/file_data.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/azure_openai.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/bedrock.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/huggingface.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/interfaces.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/mixedbreadai.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/octoai.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/openai.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/togetherai.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/vertexai.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/voyageai.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/error.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/errors_v2.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/connector.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/downloader.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/indexer.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/process.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/processor.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/upload_stager.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/uploader.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/logger.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/main.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/otel.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/otel.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/download.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/embed.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/filter.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/index.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/partition.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/stage.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/steps/upload.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/chunker.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connector_registry.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/chroma.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/github.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/google_drive.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/milvus.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/utils.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/vectara.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/embedder.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/filter.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/partitioner.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/uncompress.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/unstructured_api.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/chunking.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/compression.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/constants.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/dep_check.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/html.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/ndjson.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/pydantic_models.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/utils/table.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.41" # pragma: no cover
|
|
@@ -195,8 +195,10 @@ class AstraDBIndexer(Indexer):
|
|
|
195
195
|
all_ids = self._get_doc_ids()
|
|
196
196
|
ids = list(all_ids)
|
|
197
197
|
id_batches = batch_generator(ids, self.index_config.batch_size)
|
|
198
|
-
|
|
199
198
|
for batch in id_batches:
|
|
199
|
+
batch_items = [BatchItem(identifier=b) for b in batch]
|
|
200
|
+
display_name = (f"{self.index_config.collection_name}-{self.index_config.keyspace}"
|
|
201
|
+
f"-[{batch_items[0].identifier}..{batch_items[-1].identifier}]")
|
|
200
202
|
fd = AstraDBBatchFileData(
|
|
201
203
|
connector_type=CONNECTOR_TYPE,
|
|
202
204
|
metadata=FileDataSourceMetadata(
|
|
@@ -206,7 +208,8 @@ class AstraDBIndexer(Indexer):
|
|
|
206
208
|
collection_name=self.index_config.collection_name,
|
|
207
209
|
keyspace=self.index_config.keyspace,
|
|
208
210
|
),
|
|
209
|
-
batch_items=
|
|
211
|
+
batch_items=batch_items,
|
|
212
|
+
display_name=display_name,
|
|
210
213
|
)
|
|
211
214
|
yield fd
|
|
212
215
|
|
|
@@ -133,14 +133,15 @@ class DatabricksVolumesIndexer(Indexer, ABC):
|
|
|
133
133
|
if rel_path.startswith("/"):
|
|
134
134
|
rel_path = rel_path[1:]
|
|
135
135
|
filename = Path(file_info.path).name
|
|
136
|
+
source_identifiers = SourceIdentifiers(
|
|
137
|
+
filename=filename,
|
|
138
|
+
rel_path=rel_path,
|
|
139
|
+
fullpath=file_info.path,
|
|
140
|
+
)
|
|
136
141
|
yield FileData(
|
|
137
142
|
identifier=str(uuid5(NAMESPACE_DNS, file_info.path)),
|
|
138
143
|
connector_type=self.connector_type,
|
|
139
|
-
source_identifiers=
|
|
140
|
-
filename=filename,
|
|
141
|
-
rel_path=rel_path,
|
|
142
|
-
fullpath=file_info.path,
|
|
143
|
-
),
|
|
144
|
+
source_identifiers=source_identifiers,
|
|
144
145
|
additional_metadata={
|
|
145
146
|
"catalog": self.index_config.catalog,
|
|
146
147
|
"path": file_info.path,
|
|
@@ -148,6 +149,7 @@ class DatabricksVolumesIndexer(Indexer, ABC):
|
|
|
148
149
|
metadata=FileDataSourceMetadata(
|
|
149
150
|
url=file_info.path, date_modified=str(file_info.modification_time)
|
|
150
151
|
),
|
|
152
|
+
display_name=source_identifiers.fullpath,
|
|
151
153
|
)
|
|
152
154
|
except Exception as e:
|
|
153
155
|
raise self.connection_config.wrap_error(e=e)
|
|
@@ -199,17 +199,24 @@ class ElasticsearchIndexer(Indexer):
|
|
|
199
199
|
all_ids = self._get_doc_ids()
|
|
200
200
|
ids = list(all_ids)
|
|
201
201
|
for batch in batch_generator(ids, self.index_config.batch_size):
|
|
202
|
+
batch_items = [BatchItem(identifier=b) for b in batch]
|
|
203
|
+
url = f"{self.connection_config.hosts[0]}/{self.index_config.index_name}"
|
|
204
|
+
display_name = (
|
|
205
|
+
f"url={url}, batch_size={len(batch_items)} "
|
|
206
|
+
f"ids={batch_items[0].identifier}..{batch_items[-1].identifier}"
|
|
207
|
+
) # noqa: E501
|
|
202
208
|
# Make sure the hash is always a positive number to create identified
|
|
203
209
|
yield ElasticsearchBatchFileData(
|
|
204
210
|
connector_type=CONNECTOR_TYPE,
|
|
205
211
|
metadata=FileDataSourceMetadata(
|
|
206
|
-
url=
|
|
212
|
+
url=url,
|
|
207
213
|
date_processed=str(time()),
|
|
208
214
|
),
|
|
209
215
|
additional_metadata=ElastisearchAdditionalMetadata(
|
|
210
216
|
index_name=self.index_config.index_name,
|
|
211
217
|
),
|
|
212
|
-
batch_items=
|
|
218
|
+
batch_items=batch_items,
|
|
219
|
+
display_name=display_name,
|
|
213
220
|
)
|
|
214
221
|
|
|
215
222
|
|
|
@@ -190,21 +190,22 @@ class GitLabIndexer(Indexer):
|
|
|
190
190
|
"file_path": file["path"],
|
|
191
191
|
"ref": ref,
|
|
192
192
|
}
|
|
193
|
-
|
|
193
|
+
source_identifiers = SourceIdentifiers(
|
|
194
|
+
fullpath=file["path"],
|
|
195
|
+
filename=Path(file["path"]).name,
|
|
196
|
+
rel_path=relative_path,
|
|
197
|
+
)
|
|
194
198
|
yield FileData(
|
|
195
199
|
identifier=file["id"],
|
|
196
200
|
connector_type=CONNECTOR_TYPE,
|
|
197
|
-
source_identifiers=
|
|
198
|
-
fullpath=file["path"],
|
|
199
|
-
filename=Path(file["path"]).name,
|
|
200
|
-
rel_path=relative_path,
|
|
201
|
-
),
|
|
201
|
+
source_identifiers=source_identifiers,
|
|
202
202
|
metadata=FileDataSourceMetadata(
|
|
203
203
|
url=file["id"],
|
|
204
204
|
record_locator=record_locator,
|
|
205
205
|
permissions_data=[{"mode": file["mode"]}],
|
|
206
206
|
),
|
|
207
207
|
additional_metadata={},
|
|
208
|
+
display_name=source_identifiers.fullpath,
|
|
208
209
|
)
|
|
209
210
|
|
|
210
211
|
|
|
@@ -119,21 +119,21 @@ class LocalIndexer(Indexer):
|
|
|
119
119
|
|
|
120
120
|
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
121
121
|
for file_path in self.list_files():
|
|
122
|
+
source_identifiers = SourceIdentifiers(
|
|
123
|
+
fullpath=str(file_path.resolve()),
|
|
124
|
+
filename=file_path.name,
|
|
125
|
+
rel_path=(
|
|
126
|
+
str(file_path.resolve()).replace(str(self.index_config.path.resolve()), "")[1:]
|
|
127
|
+
if not self.index_config.path.is_file()
|
|
128
|
+
else self.index_config.path.name
|
|
129
|
+
),
|
|
130
|
+
)
|
|
122
131
|
file_data = FileData(
|
|
123
132
|
identifier=str(file_path.resolve()),
|
|
124
133
|
connector_type=CONNECTOR_TYPE,
|
|
125
|
-
source_identifiers=
|
|
126
|
-
fullpath=str(file_path.resolve()),
|
|
127
|
-
filename=file_path.name,
|
|
128
|
-
rel_path=(
|
|
129
|
-
str(file_path.resolve()).replace(str(self.index_config.path.resolve()), "")[
|
|
130
|
-
1:
|
|
131
|
-
]
|
|
132
|
-
if not self.index_config.path.is_file()
|
|
133
|
-
else self.index_config.path.name
|
|
134
|
-
),
|
|
135
|
-
),
|
|
134
|
+
source_identifiers=source_identifiers,
|
|
136
135
|
metadata=self.get_file_metadata(path=file_path),
|
|
136
|
+
display_name=source_identifiers.fullpath,
|
|
137
137
|
)
|
|
138
138
|
yield file_data
|
|
139
139
|
|
|
@@ -149,6 +149,10 @@ class MongoDBIndexer(Indexer):
|
|
|
149
149
|
|
|
150
150
|
for id_batch in batch_generator(ids, batch_size=batch_size):
|
|
151
151
|
# Make sure the hash is always a positive number to create identifier
|
|
152
|
+
display_name = (
|
|
153
|
+
f"{self.index_config.database}.{self.index_config.collection}, "
|
|
154
|
+
f"batch {id_batch[0]}-{id_batch[-1]}"
|
|
155
|
+
)
|
|
152
156
|
metadata = FileDataSourceMetadata(
|
|
153
157
|
date_processed=str(time()),
|
|
154
158
|
record_locator={
|
|
@@ -164,6 +168,7 @@ class MongoDBIndexer(Indexer):
|
|
|
164
168
|
additional_metadata=MongoDBAdditionalMetadata(
|
|
165
169
|
collection=self.index_config.collection, database=self.index_config.database
|
|
166
170
|
),
|
|
171
|
+
display_name=display_name,
|
|
167
172
|
)
|
|
168
173
|
yield file_data
|
|
169
174
|
|
|
@@ -174,6 +174,7 @@ class NotionIndexer(Indexer):
|
|
|
174
174
|
source_identifiers=source_identifiers,
|
|
175
175
|
metadata=metadata,
|
|
176
176
|
additional_metadata=additional_metadata,
|
|
177
|
+
display_name=source_identifiers.fullpath,
|
|
177
178
|
)
|
|
178
179
|
except Exception as e:
|
|
179
180
|
logger.error(f"Error retrieving page {page_id}: {e}")
|
|
@@ -210,6 +211,7 @@ class NotionIndexer(Indexer):
|
|
|
210
211
|
source_identifiers=source_identifiers,
|
|
211
212
|
metadata=metadata,
|
|
212
213
|
additional_metadata=additional_metadata,
|
|
214
|
+
display_name=source_identifiers.fullpath,
|
|
213
215
|
)
|
|
214
216
|
except Exception as e:
|
|
215
217
|
logger.error(f"Error retrieving database {database_id}: {e}")
|
|
@@ -149,11 +149,11 @@ class OutlookIndexer(Indexer):
|
|
|
149
149
|
|
|
150
150
|
def _message_to_file_data(self, message: "Message") -> FileData:
|
|
151
151
|
fullpath = self._generate_fullpath(message)
|
|
152
|
-
|
|
152
|
+
source_identifiers = SourceIdentifiers(filename=fullpath.name, fullpath=str(fullpath))
|
|
153
153
|
return FileData(
|
|
154
154
|
identifier=message.id,
|
|
155
155
|
connector_type=CONNECTOR_TYPE,
|
|
156
|
-
source_identifiers=
|
|
156
|
+
source_identifiers=source_identifiers,
|
|
157
157
|
metadata=FileDataSourceMetadata(
|
|
158
158
|
url=message.resource_url,
|
|
159
159
|
version=message.change_key,
|
|
@@ -178,6 +178,7 @@ class OutlookIndexer(Indexer):
|
|
|
178
178
|
"has_attachments": message.has_attachments,
|
|
179
179
|
"importance": message.importance,
|
|
180
180
|
},
|
|
181
|
+
display_name=source_identifiers.fullpath,
|
|
181
182
|
)
|
|
182
183
|
|
|
183
184
|
def _generate_fullpath(self, message: "Message") -> Path:
|
|
@@ -182,14 +182,15 @@ class SalesforceIndexer(Indexer):
|
|
|
182
182
|
record_with_extension = record["Id"] + self.get_file_extension(
|
|
183
183
|
record["attributes"]["type"]
|
|
184
184
|
)
|
|
185
|
+
source_identifiers = SourceIdentifiers(
|
|
186
|
+
filename=record_with_extension,
|
|
187
|
+
fullpath=f"{record['attributes']['type']}/{record_with_extension}",
|
|
188
|
+
)
|
|
185
189
|
files_list.append(
|
|
186
190
|
FileData(
|
|
187
191
|
connector_type=CONNECTOR_TYPE,
|
|
188
192
|
identifier=record["Id"],
|
|
189
|
-
source_identifiers=
|
|
190
|
-
filename=record_with_extension,
|
|
191
|
-
fullpath=f"{record['attributes']['type']}/{record_with_extension}",
|
|
192
|
-
),
|
|
193
|
+
source_identifiers=source_identifiers,
|
|
193
194
|
metadata=FileDataSourceMetadata(
|
|
194
195
|
url=record["attributes"]["url"],
|
|
195
196
|
version=str(parser.parse(record["SystemModstamp"]).timestamp()),
|
|
@@ -200,6 +201,7 @@ class SalesforceIndexer(Indexer):
|
|
|
200
201
|
record_locator={"id": record["Id"]},
|
|
201
202
|
),
|
|
202
203
|
additional_metadata={"record_type": record["attributes"]["type"]},
|
|
204
|
+
display_name=source_identifiers.fullpath,
|
|
203
205
|
)
|
|
204
206
|
)
|
|
205
207
|
except SalesforceMalformedRequest as e:
|
|
@@ -122,12 +122,13 @@ class SlackIndexer(Indexer):
|
|
|
122
122
|
identifier = hashlib.sha256(identifier_base.encode("utf-8")).hexdigest()
|
|
123
123
|
filename = identifier[:16]
|
|
124
124
|
|
|
125
|
+
source_identifiers = SourceIdentifiers(
|
|
126
|
+
filename=f"{filename}.xml", fullpath=f"{filename}.xml"
|
|
127
|
+
)
|
|
125
128
|
return FileData(
|
|
126
129
|
identifier=identifier,
|
|
127
130
|
connector_type=CONNECTOR_TYPE,
|
|
128
|
-
source_identifiers=
|
|
129
|
-
filename=f"{filename}.xml", fullpath=f"{filename}.xml"
|
|
130
|
-
),
|
|
131
|
+
source_identifiers=source_identifiers,
|
|
131
132
|
metadata=FileDataSourceMetadata(
|
|
132
133
|
date_created=ts_oldest,
|
|
133
134
|
date_modified=ts_newest,
|
|
@@ -138,6 +139,7 @@ class SlackIndexer(Indexer):
|
|
|
138
139
|
"latest": ts_newest,
|
|
139
140
|
},
|
|
140
141
|
),
|
|
142
|
+
display_name=source_identifiers.fullpath,
|
|
141
143
|
)
|
|
142
144
|
|
|
143
145
|
@SourceConnectionError.wrap
|
|
@@ -130,7 +130,11 @@ class SQLIndexer(Indexer, ABC):
|
|
|
130
130
|
(len(ids) + self.index_config.batch_size - 1) // self.index_config.batch_size
|
|
131
131
|
)
|
|
132
132
|
]
|
|
133
|
+
|
|
133
134
|
for batch in id_batches:
|
|
135
|
+
batch_items = [BatchItem(identifier=str(b)) for b in batch]
|
|
136
|
+
display_name = (f"{self.index_config.table_name}-{self.index_config.id_column}"
|
|
137
|
+
f"-[{batch_items[0].identifier}..{batch_items[-1].identifier}]")
|
|
134
138
|
# Make sure the hash is always a positive number to create identified
|
|
135
139
|
yield SqlBatchFileData(
|
|
136
140
|
connector_type=self.connector_type,
|
|
@@ -140,7 +144,8 @@ class SQLIndexer(Indexer, ABC):
|
|
|
140
144
|
additional_metadata=SqlAdditionalMetadata(
|
|
141
145
|
table_name=self.index_config.table_name, id_column=self.index_config.id_column
|
|
142
146
|
),
|
|
143
|
-
batch_items=
|
|
147
|
+
batch_items=batch_items,
|
|
148
|
+
display_name=display_name
|
|
144
149
|
)
|
|
145
150
|
|
|
146
151
|
|
|
@@ -86,12 +86,13 @@ class ZendeskIndexer(Indexer):
|
|
|
86
86
|
async def get_tickets(self) -> AsyncGenerator[ZendeskFileData, None]:
|
|
87
87
|
async with self.connection_config.get_client() as client:
|
|
88
88
|
async for ticket in client.get_tickets():
|
|
89
|
+
source_identifiers = SourceIdentifiers(
|
|
90
|
+
filename=f"{ticket.id}.txt", fullpath=f"tickets/{ticket.id}.txt"
|
|
91
|
+
)
|
|
89
92
|
yield ZendeskFileData(
|
|
90
93
|
identifier=str(ticket.id),
|
|
91
94
|
connector_type=self.connector_type,
|
|
92
|
-
source_identifiers=
|
|
93
|
-
filename=f"{ticket.id}.txt", fullpath=f"tickets/{ticket.id}.txt"
|
|
94
|
-
),
|
|
95
|
+
source_identifiers=source_identifiers,
|
|
95
96
|
additional_metadata=ZendeskAdditionalMetadata(
|
|
96
97
|
item_type="ticket", content=ticket
|
|
97
98
|
),
|
|
@@ -101,17 +102,19 @@ class ZendeskIndexer(Indexer):
|
|
|
101
102
|
date_modified=ticket.updated_at.isoformat() if ticket.updated_at else None,
|
|
102
103
|
date_processed=str(time()),
|
|
103
104
|
),
|
|
105
|
+
display_name=source_identifiers.fullpath,
|
|
104
106
|
)
|
|
105
107
|
|
|
106
108
|
async def get_articles(self) -> AsyncGenerator[ZendeskFileData, None]:
|
|
107
109
|
async with self.connection_config.get_client() as client:
|
|
108
110
|
async for article in client.get_articles():
|
|
111
|
+
source_identifiers = SourceIdentifiers(
|
|
112
|
+
filename=f"{article.id}.html", fullpath=f"articles/{article.id}.html"
|
|
113
|
+
)
|
|
109
114
|
yield ZendeskFileData(
|
|
110
115
|
identifier=str(article.id),
|
|
111
116
|
connector_type=self.connector_type,
|
|
112
|
-
source_identifiers=
|
|
113
|
-
filename=f"{article.id}.html", fullpath=f"articles/{article.id}.html"
|
|
114
|
-
),
|
|
117
|
+
source_identifiers=source_identifiers,
|
|
115
118
|
additional_metadata=ZendeskAdditionalMetadata(
|
|
116
119
|
item_type="article", content=article
|
|
117
120
|
),
|
|
@@ -123,6 +126,7 @@ class ZendeskIndexer(Indexer):
|
|
|
123
126
|
),
|
|
124
127
|
date_processed=str(time()),
|
|
125
128
|
),
|
|
129
|
+
display_name=source_identifiers.fullpath,
|
|
126
130
|
)
|
|
127
131
|
|
|
128
132
|
async def run_async(self, **kwargs: Any) -> AsyncGenerator[ZendeskFileData, None]:
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.0.40" # pragma: no cover
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/cmd.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/dest.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/importer.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/base/src.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/utils/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/cli/utils/click.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/data_types/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/data_types/entities.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/azure_openai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/bedrock.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/huggingface.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/interfaces.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/mixedbreadai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/octoai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/openai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/togetherai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/vertexai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/embed/voyageai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/indexer.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/process.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/interfaces/uploader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/interfaces.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/otel.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/pipeline/pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.40 → unstructured_ingest-1.0.41}/unstructured_ingest/processes/chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|