unstructured-ingest 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/__init__.py +14 -0
- unstructured_ingest/cli/base/__init__.py +0 -0
- unstructured_ingest/cli/base/cmd.py +19 -0
- unstructured_ingest/cli/base/dest.py +87 -0
- unstructured_ingest/cli/base/src.py +57 -0
- unstructured_ingest/cli/cli.py +32 -0
- unstructured_ingest/cli/cmd_factory.py +12 -0
- unstructured_ingest/cli/cmds/__init__.py +145 -0
- unstructured_ingest/cli/cmds/airtable.py +69 -0
- unstructured_ingest/cli/cmds/astra.py +99 -0
- unstructured_ingest/cli/cmds/azure_cognitive_search.py +65 -0
- unstructured_ingest/cli/cmds/biomed.py +52 -0
- unstructured_ingest/cli/cmds/chroma.py +104 -0
- unstructured_ingest/cli/cmds/clarifai.py +71 -0
- unstructured_ingest/cli/cmds/confluence.py +69 -0
- unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
- unstructured_ingest/cli/cmds/delta_table.py +94 -0
- unstructured_ingest/cli/cmds/discord.py +47 -0
- unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
- unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
- unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
- unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
- unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
- unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
- unstructured_ingest/cli/cmds/github.py +54 -0
- unstructured_ingest/cli/cmds/gitlab.py +54 -0
- unstructured_ingest/cli/cmds/google_drive.py +49 -0
- unstructured_ingest/cli/cmds/hubspot.py +70 -0
- unstructured_ingest/cli/cmds/jira.py +71 -0
- unstructured_ingest/cli/cmds/kafka.py +102 -0
- unstructured_ingest/cli/cmds/local.py +43 -0
- unstructured_ingest/cli/cmds/mongodb.py +72 -0
- unstructured_ingest/cli/cmds/notion.py +48 -0
- unstructured_ingest/cli/cmds/onedrive.py +66 -0
- unstructured_ingest/cli/cmds/opensearch.py +117 -0
- unstructured_ingest/cli/cmds/outlook.py +67 -0
- unstructured_ingest/cli/cmds/pinecone.py +71 -0
- unstructured_ingest/cli/cmds/qdrant.py +124 -0
- unstructured_ingest/cli/cmds/reddit.py +67 -0
- unstructured_ingest/cli/cmds/salesforce.py +58 -0
- unstructured_ingest/cli/cmds/sharepoint.py +66 -0
- unstructured_ingest/cli/cmds/slack.py +56 -0
- unstructured_ingest/cli/cmds/sql.py +66 -0
- unstructured_ingest/cli/cmds/vectara.py +66 -0
- unstructured_ingest/cli/cmds/weaviate.py +98 -0
- unstructured_ingest/cli/cmds/wikipedia.py +40 -0
- unstructured_ingest/cli/common.py +7 -0
- unstructured_ingest/cli/interfaces.py +656 -0
- unstructured_ingest/cli/utils.py +205 -0
- unstructured_ingest/connector/__init__.py +0 -0
- unstructured_ingest/connector/airtable.py +309 -0
- unstructured_ingest/connector/astra.py +237 -0
- unstructured_ingest/connector/azure_cognitive_search.py +144 -0
- unstructured_ingest/connector/biomed.py +313 -0
- unstructured_ingest/connector/chroma.py +158 -0
- unstructured_ingest/connector/clarifai.py +122 -0
- unstructured_ingest/connector/confluence.py +285 -0
- unstructured_ingest/connector/databricks_volumes.py +137 -0
- unstructured_ingest/connector/delta_table.py +203 -0
- unstructured_ingest/connector/discord.py +180 -0
- unstructured_ingest/connector/elasticsearch.py +396 -0
- unstructured_ingest/connector/fsspec/__init__.py +0 -0
- unstructured_ingest/connector/fsspec/azure.py +78 -0
- unstructured_ingest/connector/fsspec/box.py +109 -0
- unstructured_ingest/connector/fsspec/dropbox.py +160 -0
- unstructured_ingest/connector/fsspec/fsspec.py +359 -0
- unstructured_ingest/connector/fsspec/gcs.py +82 -0
- unstructured_ingest/connector/fsspec/s3.py +62 -0
- unstructured_ingest/connector/fsspec/sftp.py +81 -0
- unstructured_ingest/connector/git.py +124 -0
- unstructured_ingest/connector/github.py +173 -0
- unstructured_ingest/connector/gitlab.py +142 -0
- unstructured_ingest/connector/google_drive.py +349 -0
- unstructured_ingest/connector/hubspot.py +278 -0
- unstructured_ingest/connector/jira.py +469 -0
- unstructured_ingest/connector/kafka.py +294 -0
- unstructured_ingest/connector/local.py +139 -0
- unstructured_ingest/connector/mongodb.py +285 -0
- unstructured_ingest/connector/notion/__init__.py +0 -0
- unstructured_ingest/connector/notion/client.py +233 -0
- unstructured_ingest/connector/notion/connector.py +468 -0
- unstructured_ingest/connector/notion/helpers.py +584 -0
- unstructured_ingest/connector/notion/interfaces.py +32 -0
- unstructured_ingest/connector/notion/types/__init__.py +0 -0
- unstructured_ingest/connector/notion/types/block.py +95 -0
- unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
- unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
- unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
- unstructured_ingest/connector/notion/types/database.py +72 -0
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
- unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
- unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/connector/notion/types/date.py +26 -0
- unstructured_ingest/connector/notion/types/file.py +51 -0
- unstructured_ingest/connector/notion/types/page.py +44 -0
- unstructured_ingest/connector/notion/types/parent.py +66 -0
- unstructured_ingest/connector/notion/types/rich_text.py +189 -0
- unstructured_ingest/connector/notion/types/user.py +76 -0
- unstructured_ingest/connector/onedrive.py +232 -0
- unstructured_ingest/connector/opensearch.py +218 -0
- unstructured_ingest/connector/outlook.py +285 -0
- unstructured_ingest/connector/pinecone.py +140 -0
- unstructured_ingest/connector/qdrant.py +144 -0
- unstructured_ingest/connector/reddit.py +166 -0
- unstructured_ingest/connector/registry.py +109 -0
- unstructured_ingest/connector/salesforce.py +301 -0
- unstructured_ingest/connector/sharepoint.py +573 -0
- unstructured_ingest/connector/slack.py +224 -0
- unstructured_ingest/connector/sql.py +199 -0
- unstructured_ingest/connector/vectara.py +248 -0
- unstructured_ingest/connector/weaviate.py +190 -0
- unstructured_ingest/connector/wikipedia.py +208 -0
- unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
- unstructured_ingest/enhanced_dataclass/core.py +99 -0
- unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
- unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
- unstructured_ingest/error.py +49 -0
- unstructured_ingest/evaluate.py +338 -0
- unstructured_ingest/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/ingest_backoff/_common.py +102 -0
- unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
- unstructured_ingest/interfaces.py +838 -0
- unstructured_ingest/logger.py +130 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/pipeline/__init__.py +22 -0
- unstructured_ingest/pipeline/copy.py +19 -0
- unstructured_ingest/pipeline/doc_factory.py +12 -0
- unstructured_ingest/pipeline/interfaces.py +265 -0
- unstructured_ingest/pipeline/partition.py +60 -0
- unstructured_ingest/pipeline/permissions.py +12 -0
- unstructured_ingest/pipeline/pipeline.py +117 -0
- unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- unstructured_ingest/pipeline/reformat/chunking.py +130 -0
- unstructured_ingest/pipeline/reformat/embedding.py +66 -0
- unstructured_ingest/pipeline/source.py +77 -0
- unstructured_ingest/pipeline/utils.py +6 -0
- unstructured_ingest/pipeline/write.py +18 -0
- unstructured_ingest/processor.py +93 -0
- unstructured_ingest/runner/__init__.py +104 -0
- unstructured_ingest/runner/airtable.py +35 -0
- unstructured_ingest/runner/astra.py +34 -0
- unstructured_ingest/runner/base_runner.py +89 -0
- unstructured_ingest/runner/biomed.py +45 -0
- unstructured_ingest/runner/confluence.py +35 -0
- unstructured_ingest/runner/delta_table.py +34 -0
- unstructured_ingest/runner/discord.py +35 -0
- unstructured_ingest/runner/elasticsearch.py +40 -0
- unstructured_ingest/runner/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/fsspec/azure.py +30 -0
- unstructured_ingest/runner/fsspec/box.py +28 -0
- unstructured_ingest/runner/fsspec/dropbox.py +30 -0
- unstructured_ingest/runner/fsspec/fsspec.py +40 -0
- unstructured_ingest/runner/fsspec/gcs.py +28 -0
- unstructured_ingest/runner/fsspec/s3.py +28 -0
- unstructured_ingest/runner/fsspec/sftp.py +28 -0
- unstructured_ingest/runner/github.py +37 -0
- unstructured_ingest/runner/gitlab.py +37 -0
- unstructured_ingest/runner/google_drive.py +35 -0
- unstructured_ingest/runner/hubspot.py +35 -0
- unstructured_ingest/runner/jira.py +35 -0
- unstructured_ingest/runner/kafka.py +34 -0
- unstructured_ingest/runner/local.py +23 -0
- unstructured_ingest/runner/mongodb.py +34 -0
- unstructured_ingest/runner/notion.py +61 -0
- unstructured_ingest/runner/onedrive.py +35 -0
- unstructured_ingest/runner/opensearch.py +40 -0
- unstructured_ingest/runner/outlook.py +33 -0
- unstructured_ingest/runner/reddit.py +35 -0
- unstructured_ingest/runner/salesforce.py +33 -0
- unstructured_ingest/runner/sharepoint.py +35 -0
- unstructured_ingest/runner/slack.py +33 -0
- unstructured_ingest/runner/utils.py +47 -0
- unstructured_ingest/runner/wikipedia.py +35 -0
- unstructured_ingest/runner/writers/__init__.py +48 -0
- unstructured_ingest/runner/writers/astra.py +22 -0
- unstructured_ingest/runner/writers/azure_cognitive_search.py +24 -0
- unstructured_ingest/runner/writers/base_writer.py +26 -0
- unstructured_ingest/runner/writers/chroma.py +22 -0
- unstructured_ingest/runner/writers/clarifai.py +19 -0
- unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
- unstructured_ingest/runner/writers/delta_table.py +24 -0
- unstructured_ingest/runner/writers/elasticsearch.py +24 -0
- unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
- unstructured_ingest/runner/writers/fsspec/box.py +21 -0
- unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
- unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
- unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
- unstructured_ingest/runner/writers/kafka.py +21 -0
- unstructured_ingest/runner/writers/mongodb.py +21 -0
- unstructured_ingest/runner/writers/opensearch.py +26 -0
- unstructured_ingest/runner/writers/pinecone.py +21 -0
- unstructured_ingest/runner/writers/qdrant.py +19 -0
- unstructured_ingest/runner/writers/sql.py +22 -0
- unstructured_ingest/runner/writers/vectara.py +22 -0
- unstructured_ingest/runner/writers/weaviate.py +21 -0
- unstructured_ingest/utils/__init__.py +0 -0
- unstructured_ingest/utils/compression.py +117 -0
- unstructured_ingest/utils/data_prep.py +112 -0
- unstructured_ingest/utils/dep_check.py +66 -0
- unstructured_ingest/utils/string_and_date_utils.py +39 -0
- unstructured_ingest/utils/table.py +73 -0
- unstructured_ingest/v2/__init__.py +1 -0
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +4 -0
- unstructured_ingest/v2/cli/base/cmd.py +215 -0
- unstructured_ingest/v2/cli/base/dest.py +76 -0
- unstructured_ingest/v2/cli/base/importer.py +34 -0
- unstructured_ingest/v2/cli/base/src.py +70 -0
- unstructured_ingest/v2/cli/cli.py +24 -0
- unstructured_ingest/v2/cli/cmds/__init__.py +87 -0
- unstructured_ingest/v2/cli/cmds/astra.py +85 -0
- unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +72 -0
- unstructured_ingest/v2/cli/cmds/chroma.py +108 -0
- unstructured_ingest/v2/cli/cmds/databricks_volumes.py +161 -0
- unstructured_ingest/v2/cli/cmds/elasticsearch.py +159 -0
- unstructured_ingest/v2/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/v2/cli/cmds/fsspec/azure.py +84 -0
- unstructured_ingest/v2/cli/cmds/fsspec/box.py +58 -0
- unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +58 -0
- unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +77 -0
- unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +81 -0
- unstructured_ingest/v2/cli/cmds/fsspec/s3.py +84 -0
- unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +80 -0
- unstructured_ingest/v2/cli/cmds/google_drive.py +74 -0
- unstructured_ingest/v2/cli/cmds/local.py +60 -0
- unstructured_ingest/v2/cli/cmds/mongodb.py +62 -0
- unstructured_ingest/v2/cli/cmds/onedrive.py +91 -0
- unstructured_ingest/v2/cli/cmds/opensearch.py +93 -0
- unstructured_ingest/v2/cli/cmds/pinecone.py +62 -0
- unstructured_ingest/v2/cli/cmds/salesforce.py +79 -0
- unstructured_ingest/v2/cli/cmds/sharepoint.py +112 -0
- unstructured_ingest/v2/cli/cmds/singlestore.py +96 -0
- unstructured_ingest/v2/cli/cmds/sql.py +84 -0
- unstructured_ingest/v2/cli/cmds/weaviate.py +100 -0
- unstructured_ingest/v2/cli/configs/__init__.py +6 -0
- unstructured_ingest/v2/cli/configs/chunk.py +89 -0
- unstructured_ingest/v2/cli/configs/embed.py +74 -0
- unstructured_ingest/v2/cli/configs/partition.py +99 -0
- unstructured_ingest/v2/cli/configs/processor.py +88 -0
- unstructured_ingest/v2/cli/interfaces.py +27 -0
- unstructured_ingest/v2/cli/utils.py +240 -0
- unstructured_ingest/v2/example.py +37 -0
- unstructured_ingest/v2/interfaces/__init__.py +29 -0
- unstructured_ingest/v2/interfaces/connector.py +32 -0
- unstructured_ingest/v2/interfaces/downloader.py +79 -0
- unstructured_ingest/v2/interfaces/file_data.py +49 -0
- unstructured_ingest/v2/interfaces/indexer.py +28 -0
- unstructured_ingest/v2/interfaces/process.py +20 -0
- unstructured_ingest/v2/interfaces/processor.py +48 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +48 -0
- unstructured_ingest/v2/interfaces/uploader.py +39 -0
- unstructured_ingest/v2/logger.py +126 -0
- unstructured_ingest/v2/main.py +11 -0
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +167 -0
- unstructured_ingest/v2/pipeline/pipeline.py +284 -0
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/steps/chunk.py +85 -0
- unstructured_ingest/v2/pipeline/steps/download.py +124 -0
- unstructured_ingest/v2/pipeline/steps/embed.py +84 -0
- unstructured_ingest/v2/pipeline/steps/index.py +61 -0
- unstructured_ingest/v2/pipeline/steps/partition.py +78 -0
- unstructured_ingest/v2/pipeline/steps/stage.py +64 -0
- unstructured_ingest/v2/pipeline/steps/uncompress.py +68 -0
- unstructured_ingest/v2/pipeline/steps/upload.py +73 -0
- unstructured_ingest/v2/pipeline/utils.py +15 -0
- unstructured_ingest/v2/processes/__init__.py +0 -0
- unstructured_ingest/v2/processes/chunker.py +97 -0
- unstructured_ingest/v2/processes/connector_registry.py +63 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +77 -0
- unstructured_ingest/v2/processes/connectors/astra.py +152 -0
- unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +211 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +204 -0
- unstructured_ingest/v2/processes/connectors/databricks_volumes.py +96 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch.py +401 -0
- unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +144 -0
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +131 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +130 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +342 -0
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +141 -0
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +164 -0
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +166 -0
- unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +335 -0
- unstructured_ingest/v2/processes/connectors/local.py +204 -0
- unstructured_ingest/v2/processes/connectors/mongodb.py +138 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +216 -0
- unstructured_ingest/v2/processes/connectors/opensearch.py +155 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +178 -0
- unstructured_ingest/v2/processes/connectors/salesforce.py +293 -0
- unstructured_ingest/v2/processes/connectors/sharepoint.py +412 -0
- unstructured_ingest/v2/processes/connectors/singlestore.py +160 -0
- unstructured_ingest/v2/processes/connectors/sql.py +269 -0
- unstructured_ingest/v2/processes/connectors/utils.py +19 -0
- unstructured_ingest/v2/processes/connectors/weaviate.py +235 -0
- unstructured_ingest/v2/processes/embedder.py +76 -0
- unstructured_ingest/v2/processes/partitioner.py +166 -0
- unstructured_ingest/v2/processes/uncompress.py +43 -0
- unstructured_ingest-0.0.0.dist-info/METADATA +319 -0
- unstructured_ingest-0.0.0.dist-info/RECORD +356 -0
- unstructured_ingest-0.0.0.dist-info/WHEEL +5 -0
- unstructured_ingest-0.0.0.dist-info/entry_points.txt +2 -0
- unstructured_ingest-0.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
import math
|
|
2
|
+
import typing as t
|
|
3
|
+
from collections import abc
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from functools import cached_property
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from unstructured_ingest.enhanced_dataclass import enhanced_field
|
|
10
|
+
from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
|
|
11
|
+
from unstructured_ingest.interfaces import (
|
|
12
|
+
AccessConfig,
|
|
13
|
+
BaseConnectorConfig,
|
|
14
|
+
BaseSessionHandle,
|
|
15
|
+
BaseSingleIngestDoc,
|
|
16
|
+
BaseSourceConnector,
|
|
17
|
+
ConfigSessionHandleMixin,
|
|
18
|
+
IngestDocCleanupMixin,
|
|
19
|
+
IngestDocSessionHandleMixin,
|
|
20
|
+
SourceConnectorCleanupMixin,
|
|
21
|
+
SourceMetadata,
|
|
22
|
+
)
|
|
23
|
+
from unstructured_ingest.logger import logger
|
|
24
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
|
+
|
|
26
|
+
if t.TYPE_CHECKING:
|
|
27
|
+
from atlassian import Jira
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class JiraSessionHandle(BaseSessionHandle):
|
|
32
|
+
service: "Jira"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@requires_dependencies(["atlassian"], extras="jira")
|
|
36
|
+
def create_jira_object(url, user_email, api_token):
|
|
37
|
+
"""
|
|
38
|
+
Creates a jira object for interacting with Jira Cloud.
|
|
39
|
+
Args:
|
|
40
|
+
url: URL to Jira Cloud organization
|
|
41
|
+
user_email: Email for the user with the permissions
|
|
42
|
+
api_token: API Token, generated for the user
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Jira object
|
|
46
|
+
"""
|
|
47
|
+
from atlassian import Jira
|
|
48
|
+
|
|
49
|
+
jira = Jira(
|
|
50
|
+
url,
|
|
51
|
+
username=user_email,
|
|
52
|
+
password=api_token,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
response = jira.get_permissions("BROWSE_PROJECTS")
|
|
56
|
+
permitted = response["permissions"]["BROWSE_PROJECTS"]["havePermission"]
|
|
57
|
+
|
|
58
|
+
if permitted:
|
|
59
|
+
return jira
|
|
60
|
+
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
"""The user with the provided *user_email* and the *api_token*
|
|
64
|
+
is not permitted to browse projects for the jira organization
|
|
65
|
+
for the provided *url*. Try checking user_email, api_token,
|
|
66
|
+
and the url arguments.""",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class JiraAccessConfig(AccessConfig):
|
|
72
|
+
api_token: str = enhanced_field(sensitive=True)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class SimpleJiraConfig(ConfigSessionHandleMixin, BaseConnectorConfig):
|
|
77
|
+
"""Connector config where:
|
|
78
|
+
user_email is the email to authenticate into Atlassian (Jira) Cloud,
|
|
79
|
+
api_token is the api token to authenticate into Atlassian (Jira) Cloud,
|
|
80
|
+
url is the URL pointing to the Atlassian (Jira) Cloud instance,
|
|
81
|
+
list_of_projects is a list of project that is aimed to be ingested.
|
|
82
|
+
|
|
83
|
+
Check ...
|
|
84
|
+
for more info on the api_token.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
user_email: str
|
|
88
|
+
access_config: JiraAccessConfig
|
|
89
|
+
url: str
|
|
90
|
+
projects: t.Optional[t.List[str]] = None
|
|
91
|
+
boards: t.Optional[t.List[str]] = None
|
|
92
|
+
issues: t.Optional[t.List[str]] = None
|
|
93
|
+
|
|
94
|
+
def create_session_handle(
|
|
95
|
+
self,
|
|
96
|
+
) -> JiraSessionHandle:
|
|
97
|
+
service = create_jira_object(
|
|
98
|
+
url=self.url, user_email=self.user_email, api_token=self.access_config.api_token
|
|
99
|
+
)
|
|
100
|
+
return JiraSessionHandle(service=service)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclass
|
|
104
|
+
class JiraFileMeta:
|
|
105
|
+
"""Metadata specifying:
|
|
106
|
+
project_id: id for the jira project that the issue locates in, and
|
|
107
|
+
issue_key: key for the issue that is being reached to.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
project_id: str
|
|
111
|
+
board_id: t.Optional[str]
|
|
112
|
+
issue_key: str
|
|
113
|
+
issue_id: str
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# An implementation to obtain nested-defaultdict functionality.
|
|
117
|
+
# Keys have default values in a recursive manner, allowing
|
|
118
|
+
# limitless templates to parse an api response object.
|
|
119
|
+
def nested_object_to_field_getter(object):
|
|
120
|
+
if isinstance(object, abc.Mapping):
|
|
121
|
+
new_object = {}
|
|
122
|
+
for k, v in object.items():
|
|
123
|
+
if isinstance(v, abc.Mapping):
|
|
124
|
+
new_object[k] = FieldGetter(nested_object_to_field_getter(v))
|
|
125
|
+
else:
|
|
126
|
+
new_object[k] = v
|
|
127
|
+
return FieldGetter(new_object)
|
|
128
|
+
else:
|
|
129
|
+
return object
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class FieldGetter(dict):
|
|
133
|
+
def __getitem__(self, key):
|
|
134
|
+
value = super().__getitem__(key) if key in self else None
|
|
135
|
+
if value is None:
|
|
136
|
+
value = FieldGetter({})
|
|
137
|
+
return value
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def form_templated_string(issue, parsed_fields, c_sep="|||", r_sep="\n\n\n"):
|
|
141
|
+
"""Forms a template string via parsing the fields from the API response object on the issue
|
|
142
|
+
The template string will be saved to the disk, and then will be processed by partition."""
|
|
143
|
+
return r_sep.join(
|
|
144
|
+
[
|
|
145
|
+
_get_id_fields_for_issue(issue),
|
|
146
|
+
_get_project_fields_for_issue(parsed_fields),
|
|
147
|
+
_get_dropdown_fields_for_issue(parsed_fields),
|
|
148
|
+
_get_subtasks_for_issue(parsed_fields),
|
|
149
|
+
_get_comments_for_issue(parsed_fields),
|
|
150
|
+
_get_text_fields_for_issue(parsed_fields),
|
|
151
|
+
],
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
DEFAULT_C_SEP = " " * 5
|
|
156
|
+
DEFAULT_R_SEP = "\n"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _get_id_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
|
|
160
|
+
id, key = issue["id"], issue["key"]
|
|
161
|
+
return f"IssueID_IssueKey:{id}{c_sep}{key}{r_sep}"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _get_project_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
|
|
165
|
+
if "project" in issue:
|
|
166
|
+
return (
|
|
167
|
+
f"""ProjectID_Key:{issue["project"]["key"]}{c_sep}{issue["project"]["name"]}{r_sep}"""
|
|
168
|
+
)
|
|
169
|
+
else:
|
|
170
|
+
return ""
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _get_dropdown_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
|
|
174
|
+
return f"""
|
|
175
|
+
IssueType:{issue["issuetype"]["name"]}
|
|
176
|
+
{r_sep}
|
|
177
|
+
Status:{issue["status"]["name"]}
|
|
178
|
+
{r_sep}
|
|
179
|
+
Priority:{issue["priority"]}
|
|
180
|
+
{r_sep}
|
|
181
|
+
AssigneeID_Name:{issue["assignee"]["accountId"]}{c_sep}{issue["assignee"]["displayName"]}
|
|
182
|
+
{r_sep}
|
|
183
|
+
ReporterAdr_Name:{issue["reporter"]["emailAddress"]}{c_sep}{issue["reporter"]["displayName"]}
|
|
184
|
+
{r_sep}
|
|
185
|
+
Labels:{c_sep.join(issue["labels"])}
|
|
186
|
+
{r_sep}
|
|
187
|
+
Components:{c_sep.join([component["name"] for component in issue["components"]])}
|
|
188
|
+
{r_sep}
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _get_subtasks_for_issue(issue):
|
|
193
|
+
return ""
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _get_text_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
|
|
197
|
+
return f"""
|
|
198
|
+
{issue["summary"]}
|
|
199
|
+
{r_sep}
|
|
200
|
+
{issue["description"]}
|
|
201
|
+
{r_sep}
|
|
202
|
+
{c_sep.join([atch["self"] for atch in issue["attachment"]])}
|
|
203
|
+
{r_sep}
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _get_comments_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
|
|
208
|
+
return c_sep.join(
|
|
209
|
+
[_get_fields_for_comment(comment) for comment in issue["comment"]["comments"]],
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _get_fields_for_comment(comment, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
|
|
214
|
+
return f"{comment['author']['displayName']}{c_sep}{comment['body']}{r_sep}"
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def scroll_wrapper(func, results_key="results"):
|
|
218
|
+
def wrapper(*args, **kwargs):
|
|
219
|
+
"""Wraps a function to obtain scroll functionality.
|
|
220
|
+
Function needs to be able to accept 'start' and 'limit' arguments."""
|
|
221
|
+
if "number_of_items_to_fetch" in kwargs:
|
|
222
|
+
number_of_items_to_fetch = kwargs["number_of_items_to_fetch"]
|
|
223
|
+
del kwargs["number_of_items_to_fetch"]
|
|
224
|
+
else:
|
|
225
|
+
number_of_items_to_fetch = 100
|
|
226
|
+
|
|
227
|
+
kwargs["limit"] = min(100, number_of_items_to_fetch)
|
|
228
|
+
kwargs["start"] = kwargs.get("start", 0)
|
|
229
|
+
|
|
230
|
+
all_results = []
|
|
231
|
+
num_iterations = math.ceil(number_of_items_to_fetch / kwargs["limit"])
|
|
232
|
+
|
|
233
|
+
for _ in range(num_iterations):
|
|
234
|
+
response = func(*args, **kwargs)
|
|
235
|
+
if isinstance(response, list):
|
|
236
|
+
all_results += func(*args, **kwargs)
|
|
237
|
+
elif isinstance(response, dict):
|
|
238
|
+
if results_key not in response:
|
|
239
|
+
raise KeyError(
|
|
240
|
+
"Response object has no known keys to \
|
|
241
|
+
access the results, such as 'results' or 'values'.",
|
|
242
|
+
)
|
|
243
|
+
all_results += func(*args, **kwargs)[results_key]
|
|
244
|
+
kwargs["start"] += kwargs["limit"]
|
|
245
|
+
|
|
246
|
+
return all_results[:number_of_items_to_fetch]
|
|
247
|
+
|
|
248
|
+
return wrapper
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@dataclass
|
|
252
|
+
class JiraIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, BaseSingleIngestDoc):
|
|
253
|
+
"""Class encapsulating fetching a doc and writing processed results (but not
|
|
254
|
+
doing the processing).
|
|
255
|
+
|
|
256
|
+
Current implementation creates a Jira connection object
|
|
257
|
+
to fetch each doc, rather than creating a it for each thread.
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
connector_config: SimpleJiraConfig
|
|
261
|
+
file_meta: t.Optional[JiraFileMeta] = None
|
|
262
|
+
registry_name: str = "jira"
|
|
263
|
+
|
|
264
|
+
@cached_property
|
|
265
|
+
def record_locator(self): # Values must be JSON-serializable
|
|
266
|
+
"""A dictionary with any data necessary to uniquely identify the document on
|
|
267
|
+
the source system."""
|
|
268
|
+
return {
|
|
269
|
+
"base_url": self.connector_config.url,
|
|
270
|
+
"issue_key": self.file_meta.issue_key,
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
@cached_property
|
|
274
|
+
@SourceConnectionNetworkError.wrap
|
|
275
|
+
def issue(self):
|
|
276
|
+
"""Gets issue data"""
|
|
277
|
+
jira = self.session_handle.service
|
|
278
|
+
return jira.issue(self.file_meta.issue_key)
|
|
279
|
+
|
|
280
|
+
@cached_property
|
|
281
|
+
def parsed_fields(self):
|
|
282
|
+
return nested_object_to_field_getter(self.issue["fields"])
|
|
283
|
+
|
|
284
|
+
@property
|
|
285
|
+
def grouping_folder_name(self):
|
|
286
|
+
if self.file_meta.board_id:
|
|
287
|
+
return self.file_meta.board_id
|
|
288
|
+
else:
|
|
289
|
+
return self.file_meta.project_id
|
|
290
|
+
|
|
291
|
+
@property
|
|
292
|
+
def filename(self):
|
|
293
|
+
download_file = f"{self.file_meta.issue_id}.txt"
|
|
294
|
+
|
|
295
|
+
return (
|
|
296
|
+
Path(self.read_config.download_dir) / self.grouping_folder_name / download_file
|
|
297
|
+
).resolve()
|
|
298
|
+
|
|
299
|
+
@property
|
|
300
|
+
def _output_filename(self):
|
|
301
|
+
"""Create output file path."""
|
|
302
|
+
output_file = f"{self.file_meta.issue_id}.json"
|
|
303
|
+
|
|
304
|
+
return (
|
|
305
|
+
Path(self.processor_config.output_dir) / self.grouping_folder_name / output_file
|
|
306
|
+
).resolve()
|
|
307
|
+
|
|
308
|
+
@property
|
|
309
|
+
def version(self) -> t.Optional[str]:
|
|
310
|
+
return None
|
|
311
|
+
|
|
312
|
+
def update_source_metadata(self, **kwargs) -> None:
|
|
313
|
+
exists = bool(self.issue)
|
|
314
|
+
if not exists:
|
|
315
|
+
self.source_metadata = SourceMetadata(
|
|
316
|
+
exists=exists,
|
|
317
|
+
)
|
|
318
|
+
return
|
|
319
|
+
|
|
320
|
+
self.source_metadata = SourceMetadata(
|
|
321
|
+
date_created=datetime.strptime(
|
|
322
|
+
self.parsed_fields["created"],
|
|
323
|
+
"%Y-%m-%dT%H:%M:%S.%f%z",
|
|
324
|
+
).isoformat(),
|
|
325
|
+
date_modified=datetime.strptime(
|
|
326
|
+
self.parsed_fields["updated"],
|
|
327
|
+
"%Y-%m-%dT%H:%M:%S.%f%z",
|
|
328
|
+
).isoformat(),
|
|
329
|
+
source_url=f"{self.connector_config.url}/browse/{self.file_meta.issue_key}",
|
|
330
|
+
exists=exists,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
@SourceConnectionError.wrap
|
|
334
|
+
@requires_dependencies(["atlassian"], extras="jira")
|
|
335
|
+
@BaseSingleIngestDoc.skip_if_file_exists
|
|
336
|
+
def get_file(self):
|
|
337
|
+
document = form_templated_string(self.issue, self.parsed_fields)
|
|
338
|
+
self.update_source_metadata()
|
|
339
|
+
self.filename.parent.mkdir(parents=True, exist_ok=True)
|
|
340
|
+
|
|
341
|
+
with open(self.filename, "w", encoding="utf8") as f:
|
|
342
|
+
f.write(document)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@dataclass
|
|
346
|
+
class JiraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
|
|
347
|
+
"""Fetches issues from projects in an Atlassian (Jira) Cloud instance."""
|
|
348
|
+
|
|
349
|
+
connector_config: SimpleJiraConfig
|
|
350
|
+
_jira: t.Optional["Jira"] = field(init=False, default=None)
|
|
351
|
+
|
|
352
|
+
@property
|
|
353
|
+
def jira(self) -> "Jira":
|
|
354
|
+
if self._jira is None:
|
|
355
|
+
try:
|
|
356
|
+
self._jira = self.connector_config.create_session_handle().service
|
|
357
|
+
except Exception as e:
|
|
358
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
359
|
+
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
360
|
+
return self._jira
|
|
361
|
+
|
|
362
|
+
@requires_dependencies(["atlassian"], extras="jira")
|
|
363
|
+
def initialize(self):
|
|
364
|
+
_ = self.jira
|
|
365
|
+
|
|
366
|
+
def check_connection(self):
|
|
367
|
+
_ = self.jira
|
|
368
|
+
|
|
369
|
+
@requires_dependencies(["atlassian"], extras="jira")
|
|
370
|
+
def _get_all_project_ids(self):
|
|
371
|
+
"""Fetches ids for all projects in a Jira domain."""
|
|
372
|
+
project_ids = [project["key"] for project in self.jira.projects()]
|
|
373
|
+
return project_ids
|
|
374
|
+
|
|
375
|
+
@requires_dependencies(["atlassian"], extras="jira")
|
|
376
|
+
def _get_issues_within_one_project(
|
|
377
|
+
self,
|
|
378
|
+
project_id: str,
|
|
379
|
+
):
|
|
380
|
+
get_issues_with_scroll = scroll_wrapper(self.jira.get_all_project_issues)
|
|
381
|
+
results = get_issues_with_scroll(project=project_id, fields=["key"])
|
|
382
|
+
|
|
383
|
+
return [(issue["key"], issue["id"], None) for issue in results]
|
|
384
|
+
|
|
385
|
+
@requires_dependencies(["atlassian"], extras="jira")
|
|
386
|
+
def _get_issue_keys_within_projects(self, project_ids: t.Optional[t.List[str]] = None):
|
|
387
|
+
if project_ids is None:
|
|
388
|
+
# for when a component list is provided, without any projects
|
|
389
|
+
if bool(self.connector_config.boards or self.connector_config.issues):
|
|
390
|
+
return []
|
|
391
|
+
# for when no components are provided. all projects will be ingested
|
|
392
|
+
else:
|
|
393
|
+
return self._get_all_project_ids()
|
|
394
|
+
|
|
395
|
+
# for when a component list is provided, including some projects
|
|
396
|
+
issue_keys_all = [self._get_issues_within_one_project(project_id=id) for id in project_ids]
|
|
397
|
+
|
|
398
|
+
issue_keys_flattened = [
|
|
399
|
+
(issue_key, issue_id, None)
|
|
400
|
+
for issue_keys_project in issue_keys_all
|
|
401
|
+
for issue_key, issue_id, board_id in issue_keys_project
|
|
402
|
+
]
|
|
403
|
+
|
|
404
|
+
return issue_keys_flattened
|
|
405
|
+
|
|
406
|
+
def _get_issues_within_one_board(self, board_id: str):
|
|
407
|
+
get_issues_with_scroll = scroll_wrapper(
|
|
408
|
+
self.jira.get_issues_for_board,
|
|
409
|
+
results_key="issues",
|
|
410
|
+
)
|
|
411
|
+
results = get_issues_with_scroll(board_id=board_id, fields=["key"], jql=None)
|
|
412
|
+
|
|
413
|
+
return [(issue["key"], issue["id"], board_id) for issue in results]
|
|
414
|
+
|
|
415
|
+
def _get_issue_keys_within_boards(self, board_ids):
|
|
416
|
+
if board_ids is None:
|
|
417
|
+
return []
|
|
418
|
+
|
|
419
|
+
issue_keys_all = [self._get_issues_within_one_board(board_id=id) for id in board_ids]
|
|
420
|
+
|
|
421
|
+
issue_keys_flattened = [
|
|
422
|
+
(issue_key, issue_id, board_id)
|
|
423
|
+
for issue_keys_board in issue_keys_all
|
|
424
|
+
for issue_key, issue_id, board_id in issue_keys_board
|
|
425
|
+
]
|
|
426
|
+
return issue_keys_flattened
|
|
427
|
+
|
|
428
|
+
def get_issues_info(self, issues):
|
|
429
|
+
issues_info = [self.jira.get_issue(issue, ["key", "id"]) for issue in issues]
|
|
430
|
+
return [(info["key"], info["id"], None) for info in issues_info]
|
|
431
|
+
|
|
432
|
+
def get_issue_keys_for_given_components(self):
|
|
433
|
+
issues = []
|
|
434
|
+
|
|
435
|
+
if self.connector_config.projects:
|
|
436
|
+
issues += self._get_issue_keys_within_projects(self.connector_config.projects)
|
|
437
|
+
if self.connector_config.boards:
|
|
438
|
+
issues += self._get_issue_keys_within_boards(self.connector_config.boards)
|
|
439
|
+
if self.connector_config.issues:
|
|
440
|
+
issues += self.get_issues_info(self.connector_config.issues)
|
|
441
|
+
|
|
442
|
+
return issues
|
|
443
|
+
|
|
444
|
+
def get_ingest_docs(self):
|
|
445
|
+
"""Fetches all issues in a project."""
|
|
446
|
+
if bool(
|
|
447
|
+
self.connector_config.projects
|
|
448
|
+
or self.connector_config.boards
|
|
449
|
+
or self.connector_config.issues,
|
|
450
|
+
):
|
|
451
|
+
issue_keys_and_ids = self.get_issue_keys_for_given_components()
|
|
452
|
+
else:
|
|
453
|
+
# gets all issue ids from all projects
|
|
454
|
+
issue_keys_and_ids = self._get_issue_keys_within_projects()
|
|
455
|
+
|
|
456
|
+
return [
|
|
457
|
+
JiraIngestDoc(
|
|
458
|
+
connector_config=self.connector_config,
|
|
459
|
+
processor_config=self.processor_config,
|
|
460
|
+
read_config=self.read_config,
|
|
461
|
+
file_meta=JiraFileMeta(
|
|
462
|
+
issue_id=issue_id,
|
|
463
|
+
issue_key=issue_key,
|
|
464
|
+
project_id=issue_key.split("-")[0],
|
|
465
|
+
board_id=board_id,
|
|
466
|
+
),
|
|
467
|
+
)
|
|
468
|
+
for issue_key, issue_id, board_id in issue_keys_and_ids
|
|
469
|
+
]
|