unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (568) hide show
  1. examples/airtable.py +44 -0
  2. examples/azure_cognitive_search.py +55 -0
  3. examples/chroma.py +54 -0
  4. examples/couchbase.py +55 -0
  5. examples/databricks_volumes_dest.py +55 -0
  6. examples/databricks_volumes_source.py +53 -0
  7. examples/delta_table.py +45 -0
  8. examples/discord_example.py +36 -0
  9. examples/elasticsearch.py +49 -0
  10. examples/google_drive.py +45 -0
  11. examples/kdbai.py +54 -0
  12. examples/local.py +36 -0
  13. examples/milvus.py +44 -0
  14. examples/mongodb.py +53 -0
  15. examples/opensearch.py +50 -0
  16. examples/pinecone.py +57 -0
  17. examples/s3.py +38 -0
  18. examples/salesforce.py +44 -0
  19. examples/sharepoint.py +47 -0
  20. examples/singlestore.py +49 -0
  21. examples/sql.py +90 -0
  22. examples/vectara.py +54 -0
  23. examples/weaviate.py +44 -0
  24. test/integration/chunkers/test_chunkers.py +1 -1
  25. test/integration/connectors/conftest.py +1 -1
  26. test/integration/connectors/databricks/test_volumes_native.py +3 -3
  27. test/integration/connectors/discord/test_discord.py +1 -1
  28. test/integration/connectors/duckdb/test_duckdb.py +2 -2
  29. test/integration/connectors/duckdb/test_motherduck.py +2 -2
  30. test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
  31. test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
  32. test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
  33. test/integration/connectors/sql/test_postgres.py +2 -2
  34. test/integration/connectors/sql/test_singlestore.py +2 -2
  35. test/integration/connectors/sql/test_snowflake.py +2 -2
  36. test/integration/connectors/sql/test_sqlite.py +2 -2
  37. test/integration/connectors/sql/test_vastdb.py +1 -1
  38. test/integration/connectors/test_astradb.py +2 -2
  39. test/integration/connectors/test_azure_ai_search.py +2 -2
  40. test/integration/connectors/test_chroma.py +2 -2
  41. test/integration/connectors/test_confluence.py +1 -1
  42. test/integration/connectors/test_delta_table.py +2 -2
  43. test/integration/connectors/test_dropbox.py +2 -2
  44. test/integration/connectors/test_github.py +1 -1
  45. test/integration/connectors/test_google_drive.py +2 -2
  46. test/integration/connectors/test_jira.py +1 -1
  47. test/integration/connectors/test_lancedb.py +7 -7
  48. test/integration/connectors/test_milvus.py +2 -2
  49. test/integration/connectors/test_mongodb.py +2 -2
  50. test/integration/connectors/test_neo4j.py +7 -7
  51. test/integration/connectors/test_notion.py +2 -2
  52. test/integration/connectors/test_onedrive.py +2 -2
  53. test/integration/connectors/test_pinecone.py +3 -3
  54. test/integration/connectors/test_qdrant.py +6 -6
  55. test/integration/connectors/test_redis.py +3 -3
  56. test/integration/connectors/test_s3.py +3 -3
  57. test/integration/connectors/test_sharepoint.py +1 -1
  58. test/integration/connectors/test_vectara.py +4 -4
  59. test/integration/connectors/test_zendesk.py +2 -2
  60. test/integration/connectors/utils/validation/destination.py +2 -2
  61. test/integration/connectors/utils/validation/source.py +2 -2
  62. test/integration/connectors/weaviate/test_cloud.py +1 -1
  63. test/integration/connectors/weaviate/test_local.py +2 -2
  64. test/integration/embedders/test_azure_openai.py +1 -1
  65. test/integration/embedders/test_bedrock.py +2 -2
  66. test/integration/embedders/test_huggingface.py +1 -1
  67. test/integration/embedders/test_mixedbread.py +1 -1
  68. test/integration/embedders/test_octoai.py +2 -2
  69. test/integration/embedders/test_openai.py +2 -2
  70. test/integration/embedders/test_togetherai.py +2 -2
  71. test/integration/embedders/test_vertexai.py +1 -1
  72. test/integration/embedders/test_voyageai.py +1 -1
  73. test/integration/partitioners/test_partitioner.py +2 -2
  74. test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
  75. test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
  76. test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
  77. test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
  78. test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
  79. test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
  80. test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
  81. test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
  82. test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
  83. test/unit/test_html.py +1 -1
  84. test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
  85. test/unit/test_utils.py +106 -97
  86. unstructured_ingest/__version__.py +1 -1
  87. unstructured_ingest/cli/__init__.py +0 -14
  88. unstructured_ingest/cli/base/__init__.py +4 -0
  89. unstructured_ingest/cli/base/cmd.py +259 -9
  90. unstructured_ingest/cli/base/dest.py +58 -61
  91. unstructured_ingest/cli/base/src.py +54 -36
  92. unstructured_ingest/cli/cli.py +4 -17
  93. unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
  94. unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
  95. unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
  96. unstructured_ingest/embed/bedrock.py +3 -3
  97. unstructured_ingest/embed/octoai.py +3 -3
  98. unstructured_ingest/embed/openai.py +3 -3
  99. unstructured_ingest/embed/togetherai.py +4 -4
  100. unstructured_ingest/embed/vertexai.py +1 -1
  101. unstructured_ingest/embed/voyageai.py +4 -4
  102. unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
  103. unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
  104. unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
  105. unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
  106. unstructured_ingest/{v2/otel.py → otel.py} +1 -1
  107. unstructured_ingest/pipeline/__init__.py +0 -22
  108. unstructured_ingest/pipeline/interfaces.py +179 -238
  109. unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
  110. unstructured_ingest/pipeline/pipeline.py +388 -97
  111. unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
  112. unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
  113. unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
  114. unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
  115. unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
  116. unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
  117. unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
  118. unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
  119. unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
  120. unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
  121. unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
  122. unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
  123. unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
  124. unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
  125. unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
  126. unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
  127. unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
  128. unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
  129. unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
  130. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
  131. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
  132. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
  133. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
  134. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
  135. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
  136. unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
  137. unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
  138. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
  139. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
  140. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
  141. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
  142. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
  143. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
  144. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
  145. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
  146. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
  147. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
  148. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
  149. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
  150. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
  151. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
  152. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
  153. unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
  154. unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
  155. unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
  156. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
  157. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
  158. unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
  159. unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
  160. unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
  161. unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
  162. unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
  163. unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
  164. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
  165. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
  166. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
  167. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
  168. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
  169. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
  170. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
  171. unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
  172. unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
  173. unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
  174. unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
  175. unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
  176. unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
  177. unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
  178. unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
  179. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  180. unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
  181. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
  182. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
  183. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
  184. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
  185. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
  186. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
  187. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
  188. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
  189. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
  190. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
  191. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
  192. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
  193. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
  194. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
  195. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
  196. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
  197. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
  198. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
  199. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
  200. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
  201. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
  202. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
  203. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
  204. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
  205. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
  206. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
  207. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
  208. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
  209. unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
  210. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
  211. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
  212. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
  213. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
  214. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
  215. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
  216. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
  217. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
  218. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
  219. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
  220. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
  221. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
  222. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
  223. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
  224. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
  225. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
  226. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
  227. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
  228. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
  229. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
  230. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
  231. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
  232. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
  233. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
  234. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
  235. unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
  236. unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
  237. unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
  238. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
  239. unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +55 -27
  240. unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
  241. unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
  242. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
  243. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
  244. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
  245. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
  246. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
  247. unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
  248. unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
  249. unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +8 -8
  250. unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
  251. unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
  252. unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
  253. unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
  254. unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
  255. unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
  256. unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
  257. unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
  258. unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
  259. unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
  260. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
  261. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
  262. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
  263. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
  264. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
  265. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
  266. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
  267. unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
  268. unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
  269. unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
  270. unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
  271. unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
  272. unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
  273. unstructured_ingest/utils/compression.py +1 -48
  274. unstructured_ingest/utils/data_prep.py +9 -1
  275. unstructured_ingest/utils/html.py +3 -3
  276. unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
  277. unstructured_ingest/utils/string_and_date_utils.py +1 -1
  278. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/METADATA +98 -97
  279. unstructured_ingest-0.7.1.dist-info/RECORD +370 -0
  280. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/top_level.txt +1 -0
  281. test/unit/v2/test_utils.py +0 -82
  282. unstructured_ingest/cli/cmd_factory.py +0 -12
  283. unstructured_ingest/cli/cmds/__init__.py +0 -145
  284. unstructured_ingest/cli/cmds/airtable.py +0 -69
  285. unstructured_ingest/cli/cmds/astradb.py +0 -99
  286. unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
  287. unstructured_ingest/cli/cmds/biomed.py +0 -52
  288. unstructured_ingest/cli/cmds/chroma.py +0 -104
  289. unstructured_ingest/cli/cmds/clarifai.py +0 -71
  290. unstructured_ingest/cli/cmds/confluence.py +0 -69
  291. unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
  292. unstructured_ingest/cli/cmds/delta_table.py +0 -94
  293. unstructured_ingest/cli/cmds/discord.py +0 -47
  294. unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
  295. unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
  296. unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
  297. unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
  298. unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
  299. unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
  300. unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
  301. unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
  302. unstructured_ingest/cli/cmds/github.py +0 -54
  303. unstructured_ingest/cli/cmds/gitlab.py +0 -54
  304. unstructured_ingest/cli/cmds/google_drive.py +0 -49
  305. unstructured_ingest/cli/cmds/hubspot.py +0 -70
  306. unstructured_ingest/cli/cmds/jira.py +0 -71
  307. unstructured_ingest/cli/cmds/kafka.py +0 -102
  308. unstructured_ingest/cli/cmds/local.py +0 -43
  309. unstructured_ingest/cli/cmds/mongodb.py +0 -72
  310. unstructured_ingest/cli/cmds/notion.py +0 -48
  311. unstructured_ingest/cli/cmds/onedrive.py +0 -66
  312. unstructured_ingest/cli/cmds/opensearch.py +0 -117
  313. unstructured_ingest/cli/cmds/outlook.py +0 -67
  314. unstructured_ingest/cli/cmds/pinecone.py +0 -71
  315. unstructured_ingest/cli/cmds/qdrant.py +0 -124
  316. unstructured_ingest/cli/cmds/reddit.py +0 -67
  317. unstructured_ingest/cli/cmds/salesforce.py +0 -58
  318. unstructured_ingest/cli/cmds/sharepoint.py +0 -66
  319. unstructured_ingest/cli/cmds/slack.py +0 -56
  320. unstructured_ingest/cli/cmds/sql.py +0 -66
  321. unstructured_ingest/cli/cmds/vectara.py +0 -66
  322. unstructured_ingest/cli/cmds/weaviate.py +0 -98
  323. unstructured_ingest/cli/cmds/wikipedia.py +0 -40
  324. unstructured_ingest/cli/common.py +0 -7
  325. unstructured_ingest/cli/interfaces.py +0 -663
  326. unstructured_ingest/cli/utils.py +0 -205
  327. unstructured_ingest/connector/airtable.py +0 -309
  328. unstructured_ingest/connector/astradb.py +0 -267
  329. unstructured_ingest/connector/azure_ai_search.py +0 -144
  330. unstructured_ingest/connector/biomed.py +0 -320
  331. unstructured_ingest/connector/chroma.py +0 -158
  332. unstructured_ingest/connector/clarifai.py +0 -122
  333. unstructured_ingest/connector/confluence.py +0 -285
  334. unstructured_ingest/connector/databricks_volumes.py +0 -137
  335. unstructured_ingest/connector/delta_table.py +0 -203
  336. unstructured_ingest/connector/discord.py +0 -180
  337. unstructured_ingest/connector/elasticsearch.py +0 -396
  338. unstructured_ingest/connector/fsspec/azure.py +0 -78
  339. unstructured_ingest/connector/fsspec/box.py +0 -109
  340. unstructured_ingest/connector/fsspec/dropbox.py +0 -160
  341. unstructured_ingest/connector/fsspec/fsspec.py +0 -359
  342. unstructured_ingest/connector/fsspec/gcs.py +0 -82
  343. unstructured_ingest/connector/fsspec/s3.py +0 -62
  344. unstructured_ingest/connector/fsspec/sftp.py +0 -81
  345. unstructured_ingest/connector/git.py +0 -124
  346. unstructured_ingest/connector/github.py +0 -174
  347. unstructured_ingest/connector/gitlab.py +0 -142
  348. unstructured_ingest/connector/google_drive.py +0 -348
  349. unstructured_ingest/connector/hubspot.py +0 -278
  350. unstructured_ingest/connector/jira.py +0 -469
  351. unstructured_ingest/connector/kafka.py +0 -293
  352. unstructured_ingest/connector/local.py +0 -139
  353. unstructured_ingest/connector/mongodb.py +0 -284
  354. unstructured_ingest/connector/notion/client.py +0 -248
  355. unstructured_ingest/connector/notion/connector.py +0 -469
  356. unstructured_ingest/connector/notion/helpers.py +0 -584
  357. unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
  358. unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
  359. unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
  360. unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
  361. unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
  362. unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
  363. unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
  364. unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
  365. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
  366. unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
  367. unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
  368. unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
  369. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
  370. unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
  371. unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
  372. unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
  373. unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
  374. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
  375. unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
  376. unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
  377. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
  378. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
  379. unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
  380. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
  381. unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
  382. unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
  383. unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
  384. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
  385. unstructured_ingest/connector/notion/types/date.py +0 -26
  386. unstructured_ingest/connector/notion/types/file.py +0 -51
  387. unstructured_ingest/connector/notion/types/user.py +0 -76
  388. unstructured_ingest/connector/onedrive.py +0 -232
  389. unstructured_ingest/connector/opensearch.py +0 -218
  390. unstructured_ingest/connector/outlook.py +0 -285
  391. unstructured_ingest/connector/pinecone.py +0 -150
  392. unstructured_ingest/connector/qdrant.py +0 -144
  393. unstructured_ingest/connector/reddit.py +0 -166
  394. unstructured_ingest/connector/registry.py +0 -109
  395. unstructured_ingest/connector/salesforce.py +0 -301
  396. unstructured_ingest/connector/sharepoint.py +0 -573
  397. unstructured_ingest/connector/slack.py +0 -224
  398. unstructured_ingest/connector/sql.py +0 -199
  399. unstructured_ingest/connector/vectara.py +0 -253
  400. unstructured_ingest/connector/weaviate.py +0 -190
  401. unstructured_ingest/connector/wikipedia.py +0 -208
  402. unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
  403. unstructured_ingest/enhanced_dataclass/core.py +0 -99
  404. unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
  405. unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
  406. unstructured_ingest/interfaces.py +0 -852
  407. unstructured_ingest/pipeline/copy.py +0 -19
  408. unstructured_ingest/pipeline/doc_factory.py +0 -12
  409. unstructured_ingest/pipeline/partition.py +0 -60
  410. unstructured_ingest/pipeline/permissions.py +0 -12
  411. unstructured_ingest/pipeline/reformat/chunking.py +0 -134
  412. unstructured_ingest/pipeline/reformat/embedding.py +0 -64
  413. unstructured_ingest/pipeline/source.py +0 -77
  414. unstructured_ingest/pipeline/utils.py +0 -6
  415. unstructured_ingest/pipeline/write.py +0 -18
  416. unstructured_ingest/processor.py +0 -93
  417. unstructured_ingest/runner/__init__.py +0 -104
  418. unstructured_ingest/runner/airtable.py +0 -35
  419. unstructured_ingest/runner/astradb.py +0 -34
  420. unstructured_ingest/runner/base_runner.py +0 -89
  421. unstructured_ingest/runner/biomed.py +0 -45
  422. unstructured_ingest/runner/confluence.py +0 -35
  423. unstructured_ingest/runner/delta_table.py +0 -34
  424. unstructured_ingest/runner/discord.py +0 -35
  425. unstructured_ingest/runner/elasticsearch.py +0 -40
  426. unstructured_ingest/runner/fsspec/azure.py +0 -30
  427. unstructured_ingest/runner/fsspec/box.py +0 -28
  428. unstructured_ingest/runner/fsspec/dropbox.py +0 -30
  429. unstructured_ingest/runner/fsspec/fsspec.py +0 -40
  430. unstructured_ingest/runner/fsspec/gcs.py +0 -28
  431. unstructured_ingest/runner/fsspec/s3.py +0 -28
  432. unstructured_ingest/runner/fsspec/sftp.py +0 -28
  433. unstructured_ingest/runner/github.py +0 -37
  434. unstructured_ingest/runner/gitlab.py +0 -37
  435. unstructured_ingest/runner/google_drive.py +0 -35
  436. unstructured_ingest/runner/hubspot.py +0 -35
  437. unstructured_ingest/runner/jira.py +0 -35
  438. unstructured_ingest/runner/kafka.py +0 -34
  439. unstructured_ingest/runner/local.py +0 -23
  440. unstructured_ingest/runner/mongodb.py +0 -34
  441. unstructured_ingest/runner/notion.py +0 -61
  442. unstructured_ingest/runner/onedrive.py +0 -35
  443. unstructured_ingest/runner/opensearch.py +0 -40
  444. unstructured_ingest/runner/outlook.py +0 -33
  445. unstructured_ingest/runner/reddit.py +0 -35
  446. unstructured_ingest/runner/salesforce.py +0 -33
  447. unstructured_ingest/runner/sharepoint.py +0 -35
  448. unstructured_ingest/runner/slack.py +0 -33
  449. unstructured_ingest/runner/utils.py +0 -47
  450. unstructured_ingest/runner/wikipedia.py +0 -35
  451. unstructured_ingest/runner/writers/__init__.py +0 -48
  452. unstructured_ingest/runner/writers/astradb.py +0 -22
  453. unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
  454. unstructured_ingest/runner/writers/base_writer.py +0 -26
  455. unstructured_ingest/runner/writers/chroma.py +0 -22
  456. unstructured_ingest/runner/writers/clarifai.py +0 -19
  457. unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
  458. unstructured_ingest/runner/writers/delta_table.py +0 -24
  459. unstructured_ingest/runner/writers/elasticsearch.py +0 -24
  460. unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
  461. unstructured_ingest/runner/writers/fsspec/box.py +0 -21
  462. unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
  463. unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
  464. unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
  465. unstructured_ingest/runner/writers/kafka.py +0 -21
  466. unstructured_ingest/runner/writers/mongodb.py +0 -21
  467. unstructured_ingest/runner/writers/opensearch.py +0 -26
  468. unstructured_ingest/runner/writers/pinecone.py +0 -21
  469. unstructured_ingest/runner/writers/qdrant.py +0 -19
  470. unstructured_ingest/runner/writers/sql.py +0 -22
  471. unstructured_ingest/runner/writers/vectara.py +0 -22
  472. unstructured_ingest/runner/writers/weaviate.py +0 -21
  473. unstructured_ingest/utils/google_filetype.py +0 -9
  474. unstructured_ingest/v2/__init__.py +0 -1
  475. unstructured_ingest/v2/cli/__init__.py +0 -0
  476. unstructured_ingest/v2/cli/base/__init__.py +0 -4
  477. unstructured_ingest/v2/cli/base/cmd.py +0 -269
  478. unstructured_ingest/v2/cli/base/dest.py +0 -85
  479. unstructured_ingest/v2/cli/base/src.py +0 -85
  480. unstructured_ingest/v2/cli/cli.py +0 -24
  481. unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  482. unstructured_ingest/v2/logger.py +0 -126
  483. unstructured_ingest/v2/main.py +0 -11
  484. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  485. unstructured_ingest/v2/pipeline/interfaces.py +0 -211
  486. unstructured_ingest/v2/pipeline/pipeline.py +0 -408
  487. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  488. unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  489. unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
  490. unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
  491. unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  492. unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
  493. unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  494. unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
  495. unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
  496. unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
  497. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
  498. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
  499. unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
  500. unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
  501. unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
  502. unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
  503. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
  504. unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
  505. unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
  506. unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
  507. unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
  508. unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
  509. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
  510. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
  511. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
  512. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
  513. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
  514. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
  515. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
  516. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
  517. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
  518. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
  519. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
  520. unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
  521. unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
  522. unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
  523. unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
  524. unstructured_ingest/v2/processes/utils/__init__.py +0 -0
  525. unstructured_ingest/v2/types/__init__.py +0 -0
  526. unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
  527. {test/unit/v2 → examples}/__init__.py +0 -0
  528. /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
  529. /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
  530. /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
  531. /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
  532. /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
  533. /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
  534. /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
  535. /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
  536. /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
  537. /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
  538. /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
  539. /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
  540. /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
  541. /test/unit/{v2/utils → utils}/__init__.py +0 -0
  542. /test/unit/{v2/utils → utils}/data_generator.py +0 -0
  543. /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
  544. /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  545. /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
  546. /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
  547. /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
  548. /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
  549. /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
  550. /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
  551. /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
  552. /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
  553. /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
  554. /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
  555. /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
  556. /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
  557. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
  558. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
  559. /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
  560. /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
  561. /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
  562. /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
  563. /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
  564. /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
  565. /unstructured_ingest/{v2 → utils}/constants.py +0 -0
  566. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/LICENSE.md +0 -0
  567. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/WHEEL +0 -0
  568. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/entry_points.txt +0 -0
@@ -1,54 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import CliConfig, DelimitedString
8
- from unstructured_ingest.connector.github import SimpleGitHubConfig
9
-
10
-
11
- @dataclass
12
- class GithubCliConfig(SimpleGitHubConfig, CliConfig):
13
- @staticmethod
14
- def get_cli_options() -> t.List[click.Option]:
15
- options = [
16
- click.Option(
17
- ["--url"],
18
- required=True,
19
- type=str,
20
- help="URL to GitHub repository, e.g. "
21
- '"https://github.com/Unstructured-IO/unstructured", or '
22
- 'a repository owner/name pair, e.g. "Unstructured-IO/unstructured"',
23
- ),
24
- click.Option(
25
- ["--git-access-token"],
26
- default=None,
27
- help="A GitHub or GitLab access token, "
28
- "see https://docs.github.com/en/authentication or "
29
- "https://docs.gitlab.com/ee/api/rest/index.html#personalprojectgroup-access-tokens",
30
- ),
31
- click.Option(
32
- ["--git-branch"],
33
- default=None,
34
- type=str,
35
- help="The branch for which to fetch files from. If not given,"
36
- " the default repository branch is used.",
37
- ),
38
- click.Option(
39
- ["--git-file-glob"],
40
- default=None,
41
- type=DelimitedString(),
42
- help="A comma-separated list of file globs to limit which "
43
- "types of files are accepted, e.g. '*.html,*.txt'",
44
- ),
45
- ]
46
- return options
47
-
48
-
49
- def get_base_src_cmd() -> BaseSrcCmd:
50
- cmd_cls = BaseSrcCmd(
51
- cmd_name="github",
52
- cli_config=GithubCliConfig,
53
- )
54
- return cmd_cls
@@ -1,54 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import CliConfig, DelimitedString
8
- from unstructured_ingest.connector.gitlab import SimpleGitlabConfig
9
-
10
-
11
- @dataclass
12
- class GitlabCliConfig(SimpleGitlabConfig, CliConfig):
13
- @staticmethod
14
- def get_cli_options() -> t.List[click.Option]:
15
- options = [
16
- click.Option(
17
- ["--url"],
18
- required=True,
19
- type=str,
20
- help="URL to GitHub repository, e.g. "
21
- '"https://github.com/Unstructured-IO/unstructured", or '
22
- 'a repository owner/name pair, e.g. "Unstructured-IO/unstructured"',
23
- ),
24
- click.Option(
25
- ["--git-access-token"],
26
- default=None,
27
- help="A GitHub or GitLab access token, "
28
- "see https://docs.github.com/en/authentication or "
29
- "https://docs.gitlab.com/ee/api/rest/index.html#personalprojectgroup-access-tokens",
30
- ),
31
- click.Option(
32
- ["--git-branch"],
33
- default=None,
34
- type=str,
35
- help="The branch for which to fetch files from. If not given,"
36
- " the default repository branch is used.",
37
- ),
38
- click.Option(
39
- ["--git-file-glob"],
40
- default=None,
41
- type=DelimitedString(),
42
- help="A comma-separated list of file globs to limit which types of "
43
- "files are accepted, e.g. '*.html,*.txt'",
44
- ),
45
- ]
46
- return options
47
-
48
-
49
- def get_base_src_cmd() -> BaseSrcCmd:
50
- cmd_cls = BaseSrcCmd(
51
- cmd_name="gitlab",
52
- cli_config=GitlabCliConfig,
53
- )
54
- return cmd_cls
@@ -1,49 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- CliRecursiveConfig,
10
- FileOrJson,
11
- )
12
- from unstructured_ingest.connector.google_drive import SimpleGoogleDriveConfig
13
-
14
-
15
- @dataclass
16
- class GoogleDriveCliConfig(SimpleGoogleDriveConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--drive-id"],
22
- required=True,
23
- type=str,
24
- help="Google Drive File or Folder ID.",
25
- ),
26
- click.Option(
27
- ["--service-account-key"],
28
- required=True,
29
- type=FileOrJson(),
30
- help="Either the file path of the credentials file to use or a json string of "
31
- "those values to use for authentication",
32
- ),
33
- click.Option(
34
- ["--extension"],
35
- default=None,
36
- type=str,
37
- help="Filters the files to be processed based on extension e.g. .jpg, .docx, etc.",
38
- ),
39
- ]
40
- return options
41
-
42
-
43
- def get_base_src_cmd() -> BaseSrcCmd:
44
- cmd_cls = BaseSrcCmd(
45
- cmd_name="google-drive",
46
- cli_config=GoogleDriveCliConfig,
47
- additional_cli_options=[CliRecursiveConfig],
48
- )
49
- return cmd_cls
@@ -1,70 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import CliMixin, DelimitedString, Dict
8
- from unstructured_ingest.connector.hubspot import HubSpotObjectTypes, SimpleHubSpotConfig
9
-
10
- OBJECT_TYPES = {t.value for t in HubSpotObjectTypes}
11
-
12
-
13
- def validate_custom_property(ctx, param, value) -> t.Dict[str, t.List[str]]:
14
- if not value:
15
- return value
16
- for k in value:
17
- if k not in OBJECT_TYPES:
18
- raise ValueError(f"Invalid object type: {k}, must be one of {OBJECT_TYPES}")
19
- if not isinstance(value[k], list):
20
- raise ValueError(f"Invalid type: {type(value[k])}, must be a Python list.")
21
- return value
22
-
23
-
24
- @dataclass
25
- class HubSpotCliConfig(SimpleHubSpotConfig, CliMixin):
26
- @staticmethod
27
- def get_cli_options() -> t.List[click.Option]:
28
- options = [
29
- click.Option(
30
- ["--api-token"],
31
- required=True,
32
- type=str,
33
- help="Access token to perform operations on Hubspot. \
34
- Check \
35
- https://developers.hubspot.com/docs/api/private-apps/ \
36
- for more info",
37
- ),
38
- click.Option(
39
- ["--object-types"],
40
- default=None,
41
- required=False,
42
- type=DelimitedString(choices=OBJECT_TYPES),
43
- is_flag=False,
44
- help=f"Object to include in the process.\
45
- Must be a subset of {','.join(OBJECT_TYPES)}.\
46
- If the argument is omitted all objects listed will be processed.",
47
- ),
48
- click.Option(
49
- ["--custom-properties"],
50
- default=None,
51
- required=False,
52
- type=Dict(),
53
- is_flag=False,
54
- callback=validate_custom_property,
55
- help="Custom property to process information from.\
56
- It should be a json-like string in the form\
57
- <object_type>:[<custom_property_id>, ..., <custom_property_id>]\
58
- Must be internal name of the variable. If the property is missing, \
59
- it will be omitted.",
60
- ),
61
- ]
62
- return options
63
-
64
-
65
- def get_base_src_cmd() -> BaseSrcCmd:
66
- cmd_cls = BaseSrcCmd(
67
- cmd_name="hubspot",
68
- cli_config=HubSpotCliConfig,
69
- )
70
- return cmd_cls
@@ -1,71 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- DelimitedString,
10
- )
11
- from unstructured_ingest.connector.jira import SimpleJiraConfig
12
-
13
-
14
- @dataclass
15
- class JiraCliConfig(SimpleJiraConfig, CliConfig):
16
- @staticmethod
17
- def get_cli_options() -> t.List[click.Option]:
18
- options = [
19
- click.Option(
20
- ["--api-token"],
21
- required=True,
22
- type=str,
23
- help="API Token to authenticate into Jira (into Atlassian). \
24
- Check \
25
- https://developer.atlassian.com/cloud/jira/platform/basic-auth-for-rest-apis/ \
26
- for more info.",
27
- ),
28
- click.Option(
29
- ["--url"],
30
- required=True,
31
- type=str,
32
- help="URL to Atlassian (Jira) Cloud, e.g. "
33
- '"unstructured-jira-connector-test.atlassian.net"',
34
- ),
35
- click.Option(
36
- ["--user-email"],
37
- required=True,
38
- type=str,
39
- help="Email to authenticate into Atlassian (Jira) Cloud.",
40
- ),
41
- click.Option(
42
- ["--projects"],
43
- default=None,
44
- type=DelimitedString(),
45
- help="Comma-delimited Project ids or keys. Use Jira UI or the "
46
- "API to find or obtain keys. Alternatively, use API to obtain ids.",
47
- ),
48
- click.Option(
49
- ["--boards"],
50
- default=None,
51
- type=DelimitedString(),
52
- help="Comma-delimited Board ids. Check board URL, or use the "
53
- "API to find the board ids.",
54
- ),
55
- click.Option(
56
- ["--issues"],
57
- default=None,
58
- type=DelimitedString(),
59
- help="Comma-delimited Issue ids or keys. Use Jira UI or the API to "
60
- "find or obtain keys. Alternatively, use API to obtain ids.",
61
- ),
62
- ]
63
- return options
64
-
65
-
66
- def get_base_src_cmd() -> BaseSrcCmd:
67
- cmd_cls = BaseSrcCmd(
68
- cmd_name="jira",
69
- cli_config=JiraCliConfig,
70
- )
71
- return cmd_cls
@@ -1,102 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import CliConfig
8
- from unstructured_ingest.connector.kafka import KafkaWriteConfig, SimpleKafkaConfig
9
-
10
- CMD_NAME = "kafka"
11
-
12
-
13
- @dataclass
14
- class KafkaCliConfig(SimpleKafkaConfig, CliConfig):
15
- @staticmethod
16
- def get_cli_options() -> t.List[click.Option]:
17
- options = [
18
- click.Option(
19
- ["--bootstrap-server"], required=True, type=str, help="Broker server hostname"
20
- ),
21
- click.Option(
22
- ["--port"],
23
- required=True,
24
- type=str,
25
- help="The bootstrap port",
26
- ),
27
- click.Option(
28
- ["--topic"],
29
- required=True,
30
- type=str,
31
- help="The topic to write into.'",
32
- ),
33
- click.Option(
34
- ["--kafka-api-key"],
35
- required=False,
36
- type=str,
37
- help="The API KEY",
38
- ),
39
- click.Option(
40
- ["--secret"],
41
- required=False,
42
- type=str,
43
- help="The secret",
44
- ),
45
- click.Option(
46
- ["--num-messages-to-consume"],
47
- required=False,
48
- type=int,
49
- default=1,
50
- help="The number of messages to consume before unblocking the consumer",
51
- ),
52
- click.Option(
53
- ["--timeout"],
54
- required=False,
55
- type=float,
56
- default=1.0,
57
- help="Maximum time to block waiting for message(Seconds)",
58
- ),
59
- click.Option(
60
- ["--confluent"],
61
- required=False,
62
- type=bool,
63
- default=True,
64
- help="Whether this Kafka instance is from Confluent",
65
- ),
66
- ]
67
- return options
68
-
69
-
70
- @dataclass
71
- class KafkaCliWriteConfig(KafkaWriteConfig, CliConfig):
72
- @staticmethod
73
- def get_cli_options() -> t.List[click.Option]:
74
- options = [
75
- click.Option(
76
- ["--batch-size"],
77
- default=4,
78
- type=int,
79
- help="Number of records per batch",
80
- ),
81
- ]
82
- return options
83
-
84
-
85
- def get_base_src_cmd() -> BaseSrcCmd:
86
- cmd_cls = BaseSrcCmd(
87
- cmd_name=CMD_NAME,
88
- cli_config=KafkaCliConfig,
89
- )
90
- return cmd_cls
91
-
92
-
93
- def get_base_dest_cmd():
94
- from unstructured_ingest.cli.base.dest import BaseDestCmd
95
-
96
- cmd_cls = BaseDestCmd(
97
- cmd_name=CMD_NAME,
98
- cli_config=KafkaCliConfig,
99
- additional_cli_options=[KafkaCliWriteConfig],
100
- write_config=KafkaWriteConfig,
101
- )
102
- return cmd_cls
@@ -1,43 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- CliRecursiveConfig,
10
- DelimitedString,
11
- )
12
- from unstructured_ingest.connector.local import SimpleLocalConfig
13
-
14
-
15
- @dataclass
16
- class LocalCliConfig(SimpleLocalConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--input-path"],
22
- required=True,
23
- type=click.Path(file_okay=True, dir_okay=True, exists=True),
24
- help="Path to the location in the local file system that will be processed.",
25
- ),
26
- click.Option(
27
- ["--file-glob"],
28
- default=None,
29
- type=DelimitedString(),
30
- help="A comma-separated list of file globs to limit which types of "
31
- "local files are accepted, e.g. '*.html,*.txt'",
32
- ),
33
- ]
34
- return options
35
-
36
-
37
- def get_base_src_cmd() -> BaseSrcCmd:
38
- cmd_cls = BaseSrcCmd(
39
- cmd_name="local",
40
- cli_config=LocalCliConfig,
41
- additional_cli_options=[CliRecursiveConfig],
42
- )
43
- return cmd_cls
@@ -1,72 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import CliConfig, DelimitedString
8
- from unstructured_ingest.connector.mongodb import SimpleMongoDBConfig
9
- from unstructured_ingest.interfaces import WriteConfig
10
-
11
- CMD_NAME = "mongodb"
12
-
13
-
14
- @dataclass
15
- class MongoDBCliConfig(SimpleMongoDBConfig, CliConfig):
16
- @staticmethod
17
- def get_cli_options() -> t.List[click.Option]:
18
- options = [
19
- click.Option(
20
- ["--uri"],
21
- help="URI to user when connecting",
22
- ),
23
- click.Option(
24
- ["--host"],
25
- type=DelimitedString(),
26
- help="hostname or IP address or Unix domain socket path of a single mongod or "
27
- "mongos instance to connect to, or a list of hostnames",
28
- ),
29
- click.Option(["--port"], type=int, default=27017),
30
- click.Option(
31
- ["--database"], type=str, required=True, help="database name to connect to"
32
- ),
33
- click.Option(
34
- ["--collection"], required=True, type=str, help="collection name to connect to"
35
- ),
36
- ]
37
- return options
38
-
39
-
40
- @dataclass
41
- class MongoDBReadConfig(SimpleMongoDBConfig, CliConfig):
42
- @staticmethod
43
- def get_cli_options() -> t.List[click.Option]:
44
- options = [
45
- click.Option(
46
- ["--batch-size"],
47
- default=100,
48
- type=click.IntRange(0),
49
- help="how many records to read at a time per process",
50
- ),
51
- ]
52
- return options
53
-
54
-
55
- def get_base_src_cmd() -> BaseSrcCmd:
56
- cmd_cls = BaseSrcCmd(
57
- cmd_name=CMD_NAME,
58
- cli_config=MongoDBCliConfig,
59
- additional_cli_options=[MongoDBReadConfig],
60
- )
61
- return cmd_cls
62
-
63
-
64
- def get_base_dest_cmd():
65
- from unstructured_ingest.cli.base.dest import BaseDestCmd
66
-
67
- cmd_cls = BaseDestCmd(
68
- cmd_name=CMD_NAME,
69
- cli_config=MongoDBCliConfig,
70
- write_config=WriteConfig,
71
- )
72
- return cmd_cls
@@ -1,48 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- CliRecursiveConfig,
10
- DelimitedString,
11
- )
12
- from unstructured_ingest.connector.notion.connector import SimpleNotionConfig
13
-
14
-
15
- @dataclass
16
- class NotionCliConfig(SimpleNotionConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--notion-api-key"],
22
- required=True,
23
- type=str,
24
- help="API key for Notion api",
25
- ),
26
- click.Option(
27
- ["--page-ids"],
28
- default=None,
29
- type=DelimitedString(),
30
- help="Notion page IDs to pull text from",
31
- ),
32
- click.Option(
33
- ["--database-ids"],
34
- default=None,
35
- type=DelimitedString(),
36
- help="Notion database IDs to pull text from",
37
- ),
38
- ]
39
- return options
40
-
41
-
42
- def get_base_src_cmd() -> BaseSrcCmd:
43
- cmd_cls = BaseSrcCmd(
44
- cmd_name="notion",
45
- cli_config=NotionCliConfig,
46
- additional_cli_options=[CliRecursiveConfig],
47
- )
48
- return cmd_cls
@@ -1,66 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- CliRecursiveConfig,
10
- )
11
- from unstructured_ingest.connector.onedrive import SimpleOneDriveConfig
12
-
13
-
14
- @dataclass
15
- class OnedriveCliConfig(SimpleOneDriveConfig, CliConfig):
16
- @staticmethod
17
- def get_cli_options() -> t.List[click.Option]:
18
- options = [
19
- click.Option(
20
- ["--client-id"],
21
- required=True,
22
- type=str,
23
- help="Microsoft app client ID",
24
- ),
25
- click.Option(
26
- ["--client-cred"],
27
- required=True,
28
- type=str,
29
- help="Microsoft App client secret",
30
- ),
31
- click.Option(
32
- ["--user-pname"],
33
- required=True,
34
- type=str,
35
- help="User principal name, usually is your Azure AD email.",
36
- ),
37
- click.Option(
38
- ["--tenant"],
39
- default="common",
40
- type=str,
41
- help="ID or domain name associated with your Azure AD instance",
42
- ),
43
- click.Option(
44
- ["--path"],
45
- default=None,
46
- type=str,
47
- help="Folder to start parsing files from.",
48
- ),
49
- click.Option(
50
- ["--authority-url"],
51
- default="https://login.microsoftonline.com",
52
- type=str,
53
- help="Authentication token provider for Microsoft apps, default is "
54
- "https://login.microsoftonline.com",
55
- ),
56
- ]
57
- return options
58
-
59
-
60
- def get_base_src_cmd() -> BaseSrcCmd:
61
- cmd_cls = BaseSrcCmd(
62
- cmd_name="onedrive",
63
- cli_config=OnedriveCliConfig,
64
- additional_cli_options=[CliRecursiveConfig],
65
- )
66
- return cmd_cls