unstructured-ingest 1.0.21__tar.gz → 1.0.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (239) hide show
  1. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/PKG-INFO +2 -2
  2. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/pyproject.toml +2 -2
  3. unstructured_ingest-1.0.24/unstructured_ingest/__version__.py +1 -0
  4. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/mixedbreadai.py +28 -45
  5. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/jira.py +197 -191
  6. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +22 -3
  7. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +1 -0
  8. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +1 -0
  9. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +1 -0
  10. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +1 -0
  11. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +1 -0
  12. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +1 -0
  13. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +1 -0
  14. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +1 -1
  15. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +1 -0
  16. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +1 -0
  17. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +1 -0
  18. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +1 -0
  19. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +1 -0
  20. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +1 -0
  21. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +1 -0
  22. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +1 -0
  23. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +1 -0
  24. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +1 -0
  25. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +1 -0
  26. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +1 -0
  27. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +1 -0
  28. unstructured_ingest-1.0.21/unstructured_ingest/__version__.py +0 -1
  29. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/.gitignore +0 -0
  30. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/LICENSE.md +0 -0
  31. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/README.md +0 -0
  32. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/__init__.py +0 -0
  33. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/README.md +0 -0
  34. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/__init__.py +0 -0
  35. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/base/__init__.py +0 -0
  36. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/base/cmd.py +0 -0
  37. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/base/dest.py +0 -0
  38. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/base/importer.py +0 -0
  39. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/base/src.py +0 -0
  40. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/cli.py +0 -0
  41. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/cmds.py +0 -0
  42. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/utils/__init__.py +0 -0
  43. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/utils/click.py +0 -0
  44. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
  45. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/data_types/__init__.py +0 -0
  46. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/data_types/entities.py +0 -0
  47. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/data_types/file_data.py +0 -0
  48. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/__init__.py +0 -0
  49. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/azure_openai.py +0 -0
  50. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/bedrock.py +0 -0
  51. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/huggingface.py +0 -0
  52. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/interfaces.py +0 -0
  53. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/octoai.py +0 -0
  54. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/openai.py +0 -0
  55. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/togetherai.py +0 -0
  56. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/vertexai.py +0 -0
  57. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/embed/voyageai.py +0 -0
  58. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/error.py +0 -0
  59. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/errors_v2.py +0 -0
  60. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/interfaces/__init__.py +0 -0
  61. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/interfaces/connector.py +0 -0
  62. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/interfaces/downloader.py +0 -0
  63. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/interfaces/indexer.py +0 -0
  64. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/interfaces/process.py +0 -0
  65. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/interfaces/processor.py +0 -0
  66. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/interfaces/upload_stager.py +0 -0
  67. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/interfaces/uploader.py +0 -0
  68. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/logger.py +0 -0
  69. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/main.py +0 -0
  70. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/otel.py +0 -0
  71. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/__init__.py +0 -0
  72. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/interfaces.py +0 -0
  73. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/otel.py +0 -0
  74. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/pipeline.py +0 -0
  75. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
  76. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
  77. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/download.py +0 -0
  78. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/embed.py +0 -0
  79. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/filter.py +0 -0
  80. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/index.py +0 -0
  81. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/partition.py +0 -0
  82. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/stage.py +0 -0
  83. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
  84. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/pipeline/steps/upload.py +0 -0
  85. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/__init__.py +0 -0
  86. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/chunker.py +0 -0
  87. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connector_registry.py +0 -0
  88. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/__init__.py +0 -0
  89. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/airtable.py +0 -0
  90. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  91. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
  92. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
  93. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/astradb.py +0 -0
  94. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
  95. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/chroma.py +0 -0
  96. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/confluence.py +0 -0
  97. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
  98. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
  99. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
  100. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
  101. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
  102. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
  103. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
  104. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
  105. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
  106. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/discord.py +0 -0
  107. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
  108. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
  109. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
  110. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
  111. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
  112. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
  113. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
  114. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
  115. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
  116. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
  117. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
  118. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
  119. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
  120. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
  121. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
  122. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
  123. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/github.py +0 -0
  124. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
  125. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/google_drive.py +0 -0
  126. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
  127. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +0 -0
  128. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
  129. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
  130. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
  131. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
  132. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
  133. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
  134. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
  135. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
  136. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
  137. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
  138. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
  139. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
  140. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/local.py +0 -0
  141. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/milvus.py +0 -0
  142. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
  143. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
  144. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  145. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
  146. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
  147. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
  148. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
  149. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
  150. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
  151. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
  152. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
  153. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  154. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
  155. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
  156. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
  157. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
  158. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
  159. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
  160. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
  161. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
  162. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
  163. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
  164. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
  165. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
  166. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
  167. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
  168. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
  169. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
  170. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
  171. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
  172. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
  173. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
  174. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
  175. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
  176. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
  177. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
  178. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
  179. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
  180. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
  181. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
  182. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
  183. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
  184. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
  185. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -1
  186. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
  187. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
  188. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
  189. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
  190. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
  191. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
  192. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
  193. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/outlook.py +0 -0
  194. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
  195. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
  196. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
  197. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
  198. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
  199. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
  200. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
  201. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
  202. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
  203. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/slack.py +0 -0
  204. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
  205. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
  206. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
  207. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
  208. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
  209. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
  210. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
  211. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
  212. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/utils.py +0 -0
  213. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/vectara.py +0 -0
  214. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
  215. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
  216. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
  217. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
  218. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
  219. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  220. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
  221. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
  222. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/embedder.py +0 -0
  223. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/filter.py +0 -0
  224. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/partitioner.py +0 -0
  225. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/uncompress.py +0 -0
  226. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/utils/__init__.py +0 -0
  227. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
  228. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/unstructured_api.py +0 -0
  229. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/__init__.py +0 -0
  230. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/chunking.py +0 -0
  231. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/compression.py +0 -0
  232. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/constants.py +0 -0
  233. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/data_prep.py +0 -0
  234. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/dep_check.py +0 -0
  235. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/html.py +0 -0
  236. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/ndjson.py +0 -0
  237. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/pydantic_models.py +0 -0
  238. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  239. {unstructured_ingest-1.0.21 → unstructured_ingest-1.0.24}/unstructured_ingest/utils/table.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.21
3
+ Version: 1.0.24
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -114,7 +114,7 @@ Requires-Dist: unstructured[md]; extra == 'md'
114
114
  Provides-Extra: milvus
115
115
  Requires-Dist: pymilvus; extra == 'milvus'
116
116
  Provides-Extra: mixedbreadai
117
- Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
117
+ Requires-Dist: mixedbread; extra == 'mixedbreadai'
118
118
  Provides-Extra: mongodb
119
119
  Requires-Dist: pymongo; extra == 'mongodb'
120
120
  Provides-Extra: msg
@@ -136,14 +136,14 @@ test = [
136
136
  "deepdiff",
137
137
  "bs4",
138
138
  "pandas",
139
-
140
139
  # Connector specific deps
141
140
  "cryptography",
142
141
  "fsspec",
143
142
  "vertexai",
144
143
  "pyiceberg",
145
144
  "pyarrow",
146
- "networkx"
145
+ "networkx",
146
+ "htmlbuilder",
147
147
  ]
148
148
  # Add constraints needed for CI
149
149
  ci = [
@@ -0,0 +1 @@
1
+ __version__ = "1.0.24" # pragma: no cover
@@ -19,8 +19,7 @@ TRUNCATION_STRATEGY = "end"
19
19
 
20
20
 
21
21
  if TYPE_CHECKING:
22
- from mixedbread_ai.client import AsyncMixedbreadAI, MixedbreadAI
23
- from mixedbread_ai.core import RequestOptions
22
+ from mixedbread import AsyncMixedbread, Mixedbread
24
23
 
25
24
 
26
25
  class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
@@ -44,31 +43,33 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
44
43
  )
45
44
 
46
45
  @requires_dependencies(
47
- ["mixedbread_ai"],
48
- extras="mixedbreadai",
46
+ ["mixedbread"],
47
+ extras="embed-mixedbreadai",
49
48
  )
50
- def get_client(self) -> "MixedbreadAI":
49
+ def get_client(self) -> "Mixedbread":
51
50
  """
52
51
  Create the Mixedbread AI client.
53
52
 
54
53
  Returns:
55
- MixedbreadAI: Initialized client.
54
+ Mixedbread: Initialized client.
56
55
  """
57
- from mixedbread_ai.client import MixedbreadAI
56
+ from mixedbread import Mixedbread
58
57
 
59
- return MixedbreadAI(
58
+ return Mixedbread(
60
59
  api_key=self.api_key.get_secret_value(),
60
+ max_retries=MAX_RETRIES,
61
61
  )
62
62
 
63
63
  @requires_dependencies(
64
- ["mixedbread_ai"],
65
- extras="mixedbreadai",
64
+ ["mixedbread"],
65
+ extras="embed-mixedbreadai",
66
66
  )
67
- def get_async_client(self) -> "AsyncMixedbreadAI":
68
- from mixedbread_ai.client import AsyncMixedbreadAI
67
+ def get_async_client(self) -> "AsyncMixedbread":
68
+ from mixedbread import AsyncMixedbread
69
69
 
70
- return AsyncMixedbreadAI(
70
+ return AsyncMixedbread(
71
71
  api_key=self.api_key.get_secret_value(),
72
+ max_retries=MAX_RETRIES,
72
73
  )
73
74
 
74
75
 
@@ -88,29 +89,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
88
89
  return self.embed_query(query="Q")
89
90
 
90
91
  @requires_dependencies(
91
- ["mixedbread_ai"],
92
+ ["mixedbread"],
92
93
  extras="embed-mixedbreadai",
93
94
  )
94
- def get_request_options(self) -> "RequestOptions":
95
- from mixedbread_ai.core import RequestOptions
96
-
97
- return RequestOptions(
98
- max_retries=MAX_RETRIES,
99
- timeout_in_seconds=TIMEOUT,
100
- additional_headers={"User-Agent": USER_AGENT},
101
- )
102
-
103
- def get_client(self) -> "MixedbreadAI":
95
+ def get_client(self) -> "Mixedbread":
104
96
  return self.config.get_client()
105
97
 
106
- def embed_batch(self, client: "MixedbreadAI", batch: list[str]) -> list[list[float]]:
107
- response = client.embeddings(
98
+ def embed_batch(self, client: "Mixedbread", batch: list[str]) -> list[list[float]]:
99
+ response = client.embed(
108
100
  model=self.config.embedder_model_name,
101
+ input=batch,
109
102
  normalized=True,
110
103
  encoding_format=ENCODING_FORMAT,
111
- truncation_strategy=TRUNCATION_STRATEGY,
112
- request_options=self.get_request_options(),
113
- input=batch,
104
+ extra_headers={"User-Agent": USER_AGENT},
105
+ timeout=TIMEOUT,
114
106
  )
115
107
  return [datum.embedding for datum in response.data]
116
108
 
@@ -124,28 +116,19 @@ class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
124
116
  return await self.embed_query(query="Q")
125
117
 
126
118
  @requires_dependencies(
127
- ["mixedbread_ai"],
119
+ ["mixedbread"],
128
120
  extras="embed-mixedbreadai",
129
121
  )
130
- def get_request_options(self) -> "RequestOptions":
131
- from mixedbread_ai.core import RequestOptions
132
-
133
- return RequestOptions(
134
- max_retries=MAX_RETRIES,
135
- timeout_in_seconds=TIMEOUT,
136
- additional_headers={"User-Agent": USER_AGENT},
137
- )
138
-
139
- def get_client(self) -> "AsyncMixedbreadAI":
122
+ def get_client(self) -> "AsyncMixedbread":
140
123
  return self.config.get_async_client()
141
124
 
142
- async def embed_batch(self, client: "AsyncMixedbreadAI", batch: list[str]) -> list[list[float]]:
143
- response = await client.embeddings(
125
+ async def embed_batch(self, client: "AsyncMixedbread", batch: list[str]) -> list[list[float]]:
126
+ response = await client.embed(
144
127
  model=self.config.embedder_model_name,
128
+ input=batch,
145
129
  normalized=True,
146
130
  encoding_format=ENCODING_FORMAT,
147
- truncation_strategy=TRUNCATION_STRATEGY,
148
- request_options=self.get_request_options(),
149
- input=batch,
131
+ extra_headers={"User-Agent": USER_AGENT},
132
+ timeout=TIMEOUT,
150
133
  )
151
134
  return [datum.embedding for datum in response.data]
@@ -1,11 +1,11 @@
1
- import math
2
1
  from collections import abc
3
2
  from contextlib import contextmanager
4
3
  from dataclasses import dataclass, field
5
4
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union, cast
5
+ from time import time
6
+ from typing import TYPE_CHECKING, Any, Callable, Generator, List, Optional, Union
7
7
 
8
- from pydantic import Field, Secret
8
+ from pydantic import BaseModel, Field, Secret
9
9
 
10
10
  from unstructured_ingest.data_types.file_data import (
11
11
  FileData,
@@ -21,6 +21,7 @@ from unstructured_ingest.interfaces import (
21
21
  DownloadResponse,
22
22
  Indexer,
23
23
  IndexerConfig,
24
+ download_responses,
24
25
  )
25
26
  from unstructured_ingest.logger import logger
26
27
  from unstructured_ingest.processes.connector_registry import (
@@ -37,24 +38,13 @@ DEFAULT_C_SEP = " " * 5
37
38
  DEFAULT_R_SEP = "\n"
38
39
 
39
40
 
40
- @dataclass
41
- class JiraIssueMetadata:
41
+ class JiraIssueMetadata(BaseModel):
42
42
  id: str
43
43
  key: str
44
- board_id: Optional[str] = None
45
44
 
46
- @property
47
- def project_id(self) -> str:
45
+ def get_project_id(self) -> str:
48
46
  return self.key.split("-")[0]
49
47
 
50
- def to_dict(self) -> Dict[str, Union[str, None]]:
51
- return {
52
- "id": self.id,
53
- "key": self.key,
54
- "board_id": self.board_id,
55
- "project_id": self.project_id,
56
- }
57
-
58
48
 
59
49
  class FieldGetter(dict):
60
50
  def __getitem__(self, key):
@@ -77,52 +67,32 @@ def nested_object_to_field_getter(obj: dict) -> Union[FieldGetter, dict]:
77
67
  return obj
78
68
 
79
69
 
80
- def issues_fetcher_wrapper(func, results_key="results", number_of_issues_to_fetch: int = 100):
81
- """
82
- A decorator function that wraps around a function to fetch issues from Jira API in a paginated
83
- manner. This is required because the Jira API has a limit of 100 issues per request.
84
-
85
- Args:
86
- func (callable): The function to be wrapped. This function should accept `limit` and `start`
87
- as keyword arguments.
88
- results_key (str, optional): The key in the response dictionary that contains the list of
89
- results. Defaults to "results".
90
- number_of_issues_to_fetch (int, optional): The total number of issues to fetch. Defaults to
91
- 100.
92
-
93
- Returns:
94
- list: A list of all fetched issues.
95
-
96
- Raises:
97
- KeyError: If the response dictionary does not contain the specified `results_key`.
98
- TypeError: If the response type from the Jira API is neither list nor dict.
99
- """
100
-
101
- def wrapper(*args, **kwargs) -> list:
102
- kwargs["limit"] = min(100, number_of_issues_to_fetch)
103
- kwargs["start"] = kwargs.get("start", 0)
104
-
105
- all_results = []
106
- num_iterations = math.ceil(number_of_issues_to_fetch / kwargs["limit"])
107
-
108
- for _ in range(num_iterations):
109
- response = func(*args, **kwargs)
110
- if isinstance(response, list):
111
- all_results += response
112
- elif isinstance(response, dict):
113
- if results_key not in response:
114
- raise KeyError(f'Response object is missing "{results_key}" key.')
115
- all_results += response[results_key]
116
- else:
117
- raise TypeError(
118
- f"""Unexpected response type from Jira API.
119
- Response type has to be either list or dict, got: {type(response).__name__}."""
120
- )
121
- kwargs["start"] += kwargs["limit"]
122
-
123
- return all_results
124
-
125
- return wrapper
70
+ def api_token_based_generator(
71
+ fn: Callable, key: str = "issues", **kwargs
72
+ ) -> Generator[dict, None, None]:
73
+ nextPageToken = kwargs.pop("nextPageToken", None)
74
+ while True:
75
+ resp = fn(nextPageToken=nextPageToken, **kwargs)
76
+ issues = resp.get(key, [])
77
+ for issue in issues:
78
+ yield issue
79
+ nextPageToken = resp.get("nextPageToken")
80
+ if not nextPageToken:
81
+ break
82
+
83
+
84
+ def api_page_based_generator(
85
+ fn: Callable, key: str = "issues", **kwargs
86
+ ) -> Generator[dict, None, None]:
87
+ start = kwargs.pop("start", 0)
88
+ while True:
89
+ resp = fn(start=start, **kwargs)
90
+ issues = resp.get(key, [])
91
+ if not issues:
92
+ break
93
+ for issue in issues:
94
+ yield issue
95
+ start += len(issues)
126
96
 
127
97
 
128
98
  class JiraAccessConfig(AccessConfig):
@@ -169,28 +139,8 @@ class JiraConnectionConfig(ConnectionConfig):
169
139
  def get_client(self) -> Generator["Jira", None, None]:
170
140
  from atlassian import Jira
171
141
 
172
- class CustomJira(Jira):
173
- """
174
- Custom Jira class to fix the issue with the get_project_issues_count method.
175
- This class inherits from the original Jira class and overrides the method to
176
- handle the response correctly.
177
- Once the issue is fixed in the original library, this class can be removed.
178
- """
179
-
180
- def __init__(self, *args, **kwargs):
181
- super().__init__(*args, **kwargs)
182
-
183
- def get_project_issues_count(self, project: str) -> int:
184
- jql = f'project = "{project}" '
185
- response = self.jql(jql, fields="*none")
186
- response = cast("dict", response)
187
- if "total" in response:
188
- return response["total"]
189
- else:
190
- return len(response["issues"])
191
-
192
142
  access_configs = self.access_config.get_secret_value()
193
- with CustomJira(
143
+ with Jira(
194
144
  url=self.url,
195
145
  username=self.username,
196
146
  password=access_configs.password,
@@ -201,9 +151,17 @@ class JiraConnectionConfig(ConnectionConfig):
201
151
 
202
152
 
203
153
  class JiraIndexerConfig(IndexerConfig):
204
- projects: Optional[List[str]] = Field(None, description="List of project keys")
205
- boards: Optional[List[str]] = Field(None, description="List of board IDs")
206
- issues: Optional[List[str]] = Field(None, description="List of issue keys or IDs")
154
+ projects: Optional[list[str]] = Field(None, description="List of project keys")
155
+ boards: Optional[list[str]] = Field(None, description="List of board IDs")
156
+ issues: Optional[list[str]] = Field(None, description="List of issue keys or IDs")
157
+ status_filters: Optional[list[str]] = Field(
158
+ default=None,
159
+ description="List of status filters, if provided will only return issues that have these statuses", # noqa: E501
160
+ )
161
+
162
+ def model_post_init(self, context: Any, /) -> None:
163
+ if not self.projects and not self.boards and not self.issues:
164
+ raise ValueError("At least one of projects, boards, or issues must be provided.")
207
165
 
208
166
 
209
167
  @dataclass
@@ -228,122 +186,111 @@ class JiraIndexer(Indexer):
228
186
  )
229
187
  logger.info("Connection to Jira successful.")
230
188
 
231
- def _get_issues_within_single_project(self, project_key: str) -> List[JiraIssueMetadata]:
189
+ def run_jql(self, jql: str, **kwargs) -> Generator[JiraIssueMetadata, None, None]:
232
190
  with self.connection_config.get_client() as client:
233
- number_of_issues_to_fetch = client.get_project_issues_count(project=project_key)
234
- if isinstance(number_of_issues_to_fetch, dict):
235
- if "total" not in number_of_issues_to_fetch:
236
- raise KeyError('Response object is missing "total" key.')
237
- number_of_issues_to_fetch = number_of_issues_to_fetch["total"]
238
- if not number_of_issues_to_fetch:
239
- logger.warning(f"No issues found in project: {project_key}. Skipping!")
240
- return []
241
- get_project_issues = issues_fetcher_wrapper(
242
- client.get_all_project_issues,
243
- results_key="issues",
244
- number_of_issues_to_fetch=number_of_issues_to_fetch,
245
- )
246
- issues = get_project_issues(project=project_key, fields=["key", "id"])
247
- logger.debug(f"Found {len(issues)} issues in project: {project_key}")
248
- return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
249
-
250
- def _get_issues_within_projects(self) -> List[JiraIssueMetadata]:
251
- project_keys = self.index_config.projects
252
- if not project_keys:
253
- # for when a component list is provided, without any projects
254
- if self.index_config.boards or self.index_config.issues:
255
- return []
256
- # for when no components are provided. all projects will be ingested
191
+ if client.cloud:
192
+ for issue in api_token_based_generator(client.enhanced_jql, jql=jql, **kwargs):
193
+ yield JiraIssueMetadata.model_validate(issue)
257
194
  else:
258
- with self.connection_config.get_client() as client:
259
- project_keys = [project["key"] for project in client.projects()]
260
- return [
261
- issue
262
- for project_key in project_keys
263
- for issue in self._get_issues_within_single_project(project_key)
264
- ]
195
+ for issue in api_page_based_generator(client.jql, jql=jql, **kwargs):
196
+ yield JiraIssueMetadata.model_validate(issue)
197
+
198
+ def _get_issues_within_projects(self) -> Generator[JiraIssueMetadata, None, None]:
199
+ fields = ["key", "id", "status"]
200
+ jql = "project in ({})".format(", ".join(self.index_config.projects))
201
+ jql = self._update_jql(jql)
202
+ logger.debug(f"running jql: {jql}")
203
+ return self.run_jql(jql=jql, fields=fields)
265
204
 
266
205
  def _get_issues_within_single_board(self, board_id: str) -> List[JiraIssueMetadata]:
267
206
  with self.connection_config.get_client() as client:
268
- get_board_issues = issues_fetcher_wrapper(
269
- client.get_issues_for_board,
270
- results_key="issues",
271
- )
272
- issues = get_board_issues(board_id=board_id, fields=["key", "id"], jql=None)
273
- logger.debug(f"Found {len(issues)} issues in board: {board_id}")
274
- return [
275
- JiraIssueMetadata(id=issue["id"], key=issue["key"], board_id=board_id)
276
- for issue in issues
277
- ]
278
-
279
- def _get_issues_within_boards(self) -> List[JiraIssueMetadata]:
207
+ fields = ["key", "id"]
208
+ if self.index_config.status_filters:
209
+ jql = "status in ({}) ORDER BY id".format(
210
+ ", ".join([f'"{s}"' for s in self.index_config.status_filters])
211
+ )
212
+ else:
213
+ jql = "ORDER BY id"
214
+ logger.debug(f"running jql for board {board_id}: {jql}")
215
+ for issue in api_page_based_generator(
216
+ fn=client.get_issues_for_board, board_id=board_id, fields=fields, jql=jql
217
+ ):
218
+ yield JiraIssueMetadata.model_validate(issue)
219
+
220
+ def _get_issues_within_boards(self) -> Generator[JiraIssueMetadata, None, None]:
280
221
  if not self.index_config.boards:
281
- return []
282
- return [
283
- issue
284
- for board_id in self.index_config.boards
285
- for issue in self._get_issues_within_single_board(board_id)
286
- ]
287
-
288
- def _get_issues(self) -> List[JiraIssueMetadata]:
289
- with self.connection_config.get_client() as client:
290
- issues = [
291
- client.get_issue(issue_id_or_key=issue_key, fields=["key", "id"])
292
- for issue_key in self.index_config.issues or []
293
- ]
294
- return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
295
-
296
- def get_issues(self) -> List[JiraIssueMetadata]:
297
- issues = [
298
- *self._get_issues_within_boards(),
299
- *self._get_issues_within_projects(),
300
- *self._get_issues(),
301
- ]
302
- # Select unique issues by issue 'id'.
303
- # Since boards issues are fetched first,
304
- # if there are duplicates, the board issues will be kept,
305
- # in order to keep issue 'board_id' information.
306
- seen = set()
307
- unique_issues: List[JiraIssueMetadata] = []
308
- for issue in issues:
309
- if issue.id not in seen:
310
- unique_issues.append(issue)
311
- seen.add(issue.id)
312
- return unique_issues
313
-
314
- def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
315
- from time import time
316
-
317
- issues = self.get_issues()
318
- for issue in issues:
319
- # Build metadata
320
- metadata = FileDataSourceMetadata(
321
- date_processed=str(time()),
322
- record_locator=issue.to_dict(),
222
+ yield
223
+ for board_id in self.index_config.boards:
224
+ for issue in self._get_issues_within_single_board(board_id=board_id):
225
+ yield issue
226
+
227
+ def _update_jql(self, jql: str) -> str:
228
+ if self.index_config.status_filters:
229
+ jql += " and status in ({})".format(
230
+ ", ".join([f'"{s}"' for s in self.index_config.status_filters])
323
231
  )
232
+ jql = jql + " ORDER BY id"
233
+ return jql
234
+
235
+ def _get_issues_by_keys(self) -> Generator[JiraIssueMetadata, None, None]:
236
+ fields = ["key", "id"]
237
+ jql = "key in ({})".format(", ".join(self.index_config.issues))
238
+ jql = self._update_jql(jql)
239
+ logger.debug(f"running jql: {jql}")
240
+ return self.run_jql(jql=jql, fields=fields)
241
+
242
+ def _create_file_data_from_issue(self, issue: JiraIssueMetadata) -> FileData:
243
+ # Build metadata
244
+ metadata = FileDataSourceMetadata(
245
+ date_processed=str(time()),
246
+ record_locator=issue.model_dump(),
247
+ )
324
248
 
325
- # Construct relative path and filename
326
- filename = f"{issue.id}.txt"
327
- relative_path = str(Path(issue.project_id) / filename)
249
+ # Construct relative path and filename
250
+ filename = f"{issue.id}.txt"
251
+ relative_path = str(Path(issue.get_project_id()) / filename)
328
252
 
329
- source_identifiers = SourceIdentifiers(
330
- filename=filename,
331
- fullpath=relative_path,
332
- rel_path=relative_path,
333
- )
253
+ source_identifiers = SourceIdentifiers(
254
+ filename=filename,
255
+ fullpath=relative_path,
256
+ rel_path=relative_path,
257
+ )
334
258
 
335
- file_data = FileData(
336
- identifier=issue.id,
337
- connector_type=self.connector_type,
338
- metadata=metadata,
339
- additional_metadata=issue.to_dict(),
340
- source_identifiers=source_identifiers,
341
- )
342
- yield file_data
259
+ file_data = FileData(
260
+ identifier=issue.id,
261
+ connector_type=self.connector_type,
262
+ metadata=metadata,
263
+ additional_metadata=issue.model_dump(),
264
+ source_identifiers=source_identifiers,
265
+ )
266
+ return file_data
267
+
268
+ def get_generators(self) -> List[Callable]:
269
+ generators = []
270
+ if self.index_config.boards:
271
+ generators.append(self._get_issues_within_boards)
272
+ if self.index_config.issues:
273
+ generators.append(self._get_issues_by_keys)
274
+ if self.index_config.projects:
275
+ generators.append(self._get_issues_within_projects)
276
+ return generators
277
+
278
+ def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
279
+ seen_keys = []
280
+ for gen in self.get_generators():
281
+ for issue in gen():
282
+ if not issue:
283
+ continue
284
+ if issue.key in seen_keys:
285
+ continue
286
+ seen_keys.append(issue.key)
287
+ yield self._create_file_data_from_issue(issue=issue)
343
288
 
344
289
 
345
290
  class JiraDownloaderConfig(DownloaderConfig):
346
- pass
291
+ download_attachments: bool = Field(
292
+ default=False, description="If True, will download any attachments and process as well"
293
+ )
347
294
 
348
295
 
349
296
  @dataclass
@@ -448,7 +395,56 @@ class JiraDownloader(Downloader):
448
395
  logger.error(f"Failed to fetch issue with key: {issue_key}: {e}", exc_info=True)
449
396
  raise SourceConnectionError(f"Failed to fetch issue with key: {issue_key}: {e}")
450
397
 
451
- def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
398
+ def generate_attachment_file_data(
399
+ self, attachment_dict: dict, parent_filedata: FileData
400
+ ) -> FileData:
401
+ new_filedata = parent_filedata.model_copy(deep=True)
402
+ if new_filedata.metadata.record_locator is None:
403
+ new_filedata.metadata.record_locator = {}
404
+ new_filedata.metadata.record_locator["parent_issue"] = (
405
+ parent_filedata.metadata.record_locator["id"]
406
+ )
407
+ # Append an identifier for attachment to not conflict with issue ids
408
+ new_filedata.identifier = "{}a".format(attachment_dict["id"])
409
+ filename = attachment_dict["filename"]
410
+ new_filedata.metadata.filesize_bytes = attachment_dict.pop("size", None)
411
+ new_filedata.metadata.date_created = attachment_dict.pop("created", None)
412
+ new_filedata.metadata.url = attachment_dict.pop("self", None)
413
+ new_filedata.metadata.record_locator = attachment_dict
414
+ new_filedata.source_identifiers = SourceIdentifiers(
415
+ filename=filename,
416
+ fullpath=(Path(str(attachment_dict["id"])) / Path(filename)).as_posix(),
417
+ )
418
+ return new_filedata
419
+
420
+ def process_attachments(
421
+ self, file_data: FileData, attachments: list[dict]
422
+ ) -> list[DownloadResponse]:
423
+ with self.connection_config.get_client() as client:
424
+ download_path = self.get_download_path(file_data)
425
+ attachment_download_dir = download_path.parent / "attachments"
426
+ attachment_download_dir.mkdir(parents=True, exist_ok=True)
427
+ download_responses = []
428
+ for attachment in attachments:
429
+ attachment_filename = Path(attachment["filename"])
430
+ attachment_id = attachment["id"]
431
+ attachment_download_path = attachment_download_dir / Path(
432
+ attachment_id
433
+ ).with_suffix(attachment_filename.suffix)
434
+ resp = client.get_attachment_content(attachment_id=attachment_id)
435
+ with open(attachment_download_path, "wb") as f:
436
+ f.write(resp)
437
+ attachment_filedata = self.generate_attachment_file_data(
438
+ attachment_dict=attachment, parent_filedata=file_data
439
+ )
440
+ download_responses.append(
441
+ self.generate_download_response(
442
+ file_data=attachment_filedata, download_path=attachment_download_path
443
+ )
444
+ )
445
+ return download_responses
446
+
447
+ def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
452
448
  issue_key = file_data.additional_metadata.get("key")
453
449
  if not issue_key:
454
450
  raise ValueError("Issue key not found in metadata.")
@@ -463,7 +459,17 @@ class JiraDownloader(Downloader):
463
459
  with open(download_path, "w") as f:
464
460
  f.write(issue_str)
465
461
  self.update_file_data(file_data, issue)
466
- return self.generate_download_response(file_data=file_data, download_path=download_path)
462
+ download_response = self.generate_download_response(
463
+ file_data=file_data, download_path=download_path
464
+ )
465
+ if self.download_config.download_attachments and (
466
+ attachments := issue.get("fields", {}).get("attachment")
467
+ ):
468
+ attachment_responses = self.process_attachments(
469
+ file_data=file_data, attachments=attachments
470
+ )
471
+ download_response = [download_response] + attachment_responses
472
+ return download_response
467
473
 
468
474
 
469
475
  jira_source_entry = SourceRegistryEntry(