unstructured-ingest 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (568) hide show
  1. examples/airtable.py +44 -0
  2. examples/azure_cognitive_search.py +55 -0
  3. examples/chroma.py +54 -0
  4. examples/couchbase.py +55 -0
  5. examples/databricks_volumes_dest.py +55 -0
  6. examples/databricks_volumes_source.py +53 -0
  7. examples/delta_table.py +45 -0
  8. examples/discord_example.py +36 -0
  9. examples/elasticsearch.py +49 -0
  10. examples/google_drive.py +45 -0
  11. examples/kdbai.py +54 -0
  12. examples/local.py +36 -0
  13. examples/milvus.py +44 -0
  14. examples/mongodb.py +53 -0
  15. examples/opensearch.py +50 -0
  16. examples/pinecone.py +57 -0
  17. examples/s3.py +38 -0
  18. examples/salesforce.py +44 -0
  19. examples/sharepoint.py +47 -0
  20. examples/singlestore.py +49 -0
  21. examples/sql.py +90 -0
  22. examples/vectara.py +54 -0
  23. examples/weaviate.py +44 -0
  24. test/integration/chunkers/test_chunkers.py +1 -1
  25. test/integration/connectors/conftest.py +1 -1
  26. test/integration/connectors/databricks/test_volumes_native.py +3 -3
  27. test/integration/connectors/discord/test_discord.py +1 -1
  28. test/integration/connectors/duckdb/test_duckdb.py +2 -2
  29. test/integration/connectors/duckdb/test_motherduck.py +2 -2
  30. test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
  31. test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
  32. test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
  33. test/integration/connectors/sql/test_postgres.py +2 -2
  34. test/integration/connectors/sql/test_singlestore.py +2 -2
  35. test/integration/connectors/sql/test_snowflake.py +2 -2
  36. test/integration/connectors/sql/test_sqlite.py +2 -2
  37. test/integration/connectors/sql/test_vastdb.py +1 -1
  38. test/integration/connectors/test_astradb.py +2 -2
  39. test/integration/connectors/test_azure_ai_search.py +2 -2
  40. test/integration/connectors/test_chroma.py +2 -2
  41. test/integration/connectors/test_confluence.py +1 -1
  42. test/integration/connectors/test_delta_table.py +2 -2
  43. test/integration/connectors/test_dropbox.py +2 -2
  44. test/integration/connectors/test_github.py +49 -0
  45. test/integration/connectors/test_google_drive.py +2 -2
  46. test/integration/connectors/test_jira.py +1 -1
  47. test/integration/connectors/test_lancedb.py +7 -7
  48. test/integration/connectors/test_milvus.py +2 -2
  49. test/integration/connectors/test_mongodb.py +2 -2
  50. test/integration/connectors/test_neo4j.py +7 -7
  51. test/integration/connectors/test_notion.py +2 -2
  52. test/integration/connectors/test_onedrive.py +2 -2
  53. test/integration/connectors/test_pinecone.py +3 -3
  54. test/integration/connectors/test_qdrant.py +6 -6
  55. test/integration/connectors/test_redis.py +3 -3
  56. test/integration/connectors/test_s3.py +3 -3
  57. test/integration/connectors/test_sharepoint.py +1 -1
  58. test/integration/connectors/test_vectara.py +4 -4
  59. test/integration/connectors/test_zendesk.py +2 -2
  60. test/integration/connectors/utils/validation/destination.py +2 -2
  61. test/integration/connectors/utils/validation/source.py +2 -2
  62. test/integration/connectors/weaviate/test_cloud.py +1 -1
  63. test/integration/connectors/weaviate/test_local.py +2 -2
  64. test/integration/embedders/test_azure_openai.py +1 -1
  65. test/integration/embedders/test_bedrock.py +2 -2
  66. test/integration/embedders/test_huggingface.py +1 -1
  67. test/integration/embedders/test_mixedbread.py +1 -1
  68. test/integration/embedders/test_octoai.py +2 -2
  69. test/integration/embedders/test_openai.py +2 -2
  70. test/integration/embedders/test_togetherai.py +2 -2
  71. test/integration/embedders/test_vertexai.py +1 -1
  72. test/integration/embedders/test_voyageai.py +1 -1
  73. test/integration/partitioners/test_partitioner.py +2 -2
  74. test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
  75. test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
  76. test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
  77. test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
  78. test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
  79. test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
  80. test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
  81. test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
  82. test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
  83. test/unit/test_html.py +1 -1
  84. test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
  85. test/unit/test_utils.py +106 -97
  86. unstructured_ingest/__version__.py +1 -1
  87. unstructured_ingest/cli/__init__.py +0 -14
  88. unstructured_ingest/cli/base/__init__.py +4 -0
  89. unstructured_ingest/cli/base/cmd.py +259 -9
  90. unstructured_ingest/cli/base/dest.py +58 -61
  91. unstructured_ingest/cli/base/src.py +54 -36
  92. unstructured_ingest/cli/cli.py +4 -17
  93. unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
  94. unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
  95. unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
  96. unstructured_ingest/embed/bedrock.py +3 -3
  97. unstructured_ingest/embed/octoai.py +3 -3
  98. unstructured_ingest/embed/openai.py +3 -3
  99. unstructured_ingest/embed/togetherai.py +4 -4
  100. unstructured_ingest/embed/vertexai.py +1 -1
  101. unstructured_ingest/embed/voyageai.py +4 -4
  102. unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
  103. unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
  104. unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
  105. unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
  106. unstructured_ingest/{v2/otel.py → otel.py} +1 -1
  107. unstructured_ingest/pipeline/__init__.py +0 -22
  108. unstructured_ingest/pipeline/interfaces.py +179 -238
  109. unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
  110. unstructured_ingest/pipeline/pipeline.py +388 -97
  111. unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
  112. unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
  113. unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
  114. unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
  115. unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
  116. unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
  117. unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
  118. unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
  119. unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
  120. unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
  121. unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
  122. unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +14 -11
  123. unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
  124. unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
  125. unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
  126. unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
  127. unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
  128. unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
  129. unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
  130. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +12 -11
  131. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
  132. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
  133. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
  134. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
  135. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
  136. unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
  137. unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
  138. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
  139. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
  140. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
  141. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
  142. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
  143. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
  144. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
  145. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
  146. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
  147. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
  148. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
  149. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
  150. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
  151. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
  152. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
  153. unstructured_ingest/processes/connectors/github.py +221 -0
  154. unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
  155. unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
  156. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
  157. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
  158. unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
  159. unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
  160. unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
  161. unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
  162. unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
  163. unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
  164. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
  165. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
  166. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
  167. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
  168. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
  169. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
  170. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
  171. unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
  172. unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
  173. unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
  174. unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
  175. unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
  176. unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
  177. unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
  178. unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
  179. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  180. unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
  181. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
  182. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
  183. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
  184. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
  185. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
  186. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
  187. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
  188. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
  189. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
  190. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
  191. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
  192. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
  193. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
  194. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
  195. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
  196. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
  197. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
  198. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
  199. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
  200. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
  201. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
  202. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
  203. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
  204. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
  205. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
  206. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
  207. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
  208. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
  209. unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
  210. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
  211. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
  212. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
  213. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
  214. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
  215. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
  216. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
  217. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
  218. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
  219. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
  220. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
  221. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
  222. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
  223. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
  224. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
  225. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
  226. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
  227. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
  228. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
  229. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
  230. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
  231. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
  232. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
  233. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
  234. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
  235. unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
  236. unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
  237. unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
  238. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
  239. unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +10 -10
  240. unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
  241. unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
  242. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
  243. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
  244. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
  245. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
  246. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
  247. unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
  248. unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
  249. unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +7 -7
  250. unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
  251. unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
  252. unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +11 -9
  253. unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
  254. unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
  255. unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
  256. unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
  257. unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
  258. unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
  259. unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
  260. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
  261. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
  262. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
  263. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
  264. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
  265. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
  266. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
  267. unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
  268. unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
  269. unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
  270. unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
  271. unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
  272. unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
  273. unstructured_ingest/utils/compression.py +1 -48
  274. unstructured_ingest/utils/data_prep.py +9 -1
  275. unstructured_ingest/utils/html.py +3 -3
  276. unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
  277. unstructured_ingest/utils/string_and_date_utils.py +1 -1
  278. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/METADATA +99 -99
  279. unstructured_ingest-0.7.0.dist-info/RECORD +370 -0
  280. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/top_level.txt +1 -0
  281. test/unit/v2/test_utils.py +0 -82
  282. unstructured_ingest/cli/cmd_factory.py +0 -12
  283. unstructured_ingest/cli/cmds/__init__.py +0 -145
  284. unstructured_ingest/cli/cmds/airtable.py +0 -69
  285. unstructured_ingest/cli/cmds/astradb.py +0 -99
  286. unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
  287. unstructured_ingest/cli/cmds/biomed.py +0 -52
  288. unstructured_ingest/cli/cmds/chroma.py +0 -104
  289. unstructured_ingest/cli/cmds/clarifai.py +0 -71
  290. unstructured_ingest/cli/cmds/confluence.py +0 -69
  291. unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
  292. unstructured_ingest/cli/cmds/delta_table.py +0 -94
  293. unstructured_ingest/cli/cmds/discord.py +0 -47
  294. unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
  295. unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
  296. unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
  297. unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
  298. unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
  299. unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
  300. unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
  301. unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
  302. unstructured_ingest/cli/cmds/github.py +0 -54
  303. unstructured_ingest/cli/cmds/gitlab.py +0 -54
  304. unstructured_ingest/cli/cmds/google_drive.py +0 -49
  305. unstructured_ingest/cli/cmds/hubspot.py +0 -70
  306. unstructured_ingest/cli/cmds/jira.py +0 -71
  307. unstructured_ingest/cli/cmds/kafka.py +0 -102
  308. unstructured_ingest/cli/cmds/local.py +0 -43
  309. unstructured_ingest/cli/cmds/mongodb.py +0 -72
  310. unstructured_ingest/cli/cmds/notion.py +0 -48
  311. unstructured_ingest/cli/cmds/onedrive.py +0 -66
  312. unstructured_ingest/cli/cmds/opensearch.py +0 -117
  313. unstructured_ingest/cli/cmds/outlook.py +0 -67
  314. unstructured_ingest/cli/cmds/pinecone.py +0 -71
  315. unstructured_ingest/cli/cmds/qdrant.py +0 -124
  316. unstructured_ingest/cli/cmds/reddit.py +0 -67
  317. unstructured_ingest/cli/cmds/salesforce.py +0 -58
  318. unstructured_ingest/cli/cmds/sharepoint.py +0 -66
  319. unstructured_ingest/cli/cmds/slack.py +0 -56
  320. unstructured_ingest/cli/cmds/sql.py +0 -66
  321. unstructured_ingest/cli/cmds/vectara.py +0 -66
  322. unstructured_ingest/cli/cmds/weaviate.py +0 -98
  323. unstructured_ingest/cli/cmds/wikipedia.py +0 -40
  324. unstructured_ingest/cli/common.py +0 -7
  325. unstructured_ingest/cli/interfaces.py +0 -663
  326. unstructured_ingest/cli/utils.py +0 -205
  327. unstructured_ingest/connector/airtable.py +0 -309
  328. unstructured_ingest/connector/astradb.py +0 -267
  329. unstructured_ingest/connector/azure_ai_search.py +0 -144
  330. unstructured_ingest/connector/biomed.py +0 -320
  331. unstructured_ingest/connector/chroma.py +0 -158
  332. unstructured_ingest/connector/clarifai.py +0 -122
  333. unstructured_ingest/connector/confluence.py +0 -285
  334. unstructured_ingest/connector/databricks_volumes.py +0 -137
  335. unstructured_ingest/connector/delta_table.py +0 -203
  336. unstructured_ingest/connector/discord.py +0 -180
  337. unstructured_ingest/connector/elasticsearch.py +0 -396
  338. unstructured_ingest/connector/fsspec/azure.py +0 -78
  339. unstructured_ingest/connector/fsspec/box.py +0 -109
  340. unstructured_ingest/connector/fsspec/dropbox.py +0 -160
  341. unstructured_ingest/connector/fsspec/fsspec.py +0 -359
  342. unstructured_ingest/connector/fsspec/gcs.py +0 -82
  343. unstructured_ingest/connector/fsspec/s3.py +0 -62
  344. unstructured_ingest/connector/fsspec/sftp.py +0 -81
  345. unstructured_ingest/connector/git.py +0 -124
  346. unstructured_ingest/connector/github.py +0 -174
  347. unstructured_ingest/connector/gitlab.py +0 -142
  348. unstructured_ingest/connector/google_drive.py +0 -348
  349. unstructured_ingest/connector/hubspot.py +0 -278
  350. unstructured_ingest/connector/jira.py +0 -469
  351. unstructured_ingest/connector/kafka.py +0 -293
  352. unstructured_ingest/connector/local.py +0 -139
  353. unstructured_ingest/connector/mongodb.py +0 -284
  354. unstructured_ingest/connector/notion/client.py +0 -248
  355. unstructured_ingest/connector/notion/connector.py +0 -469
  356. unstructured_ingest/connector/notion/helpers.py +0 -584
  357. unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
  358. unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
  359. unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
  360. unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
  361. unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
  362. unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
  363. unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
  364. unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
  365. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
  366. unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
  367. unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
  368. unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
  369. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
  370. unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
  371. unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
  372. unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
  373. unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
  374. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
  375. unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
  376. unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
  377. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
  378. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
  379. unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
  380. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
  381. unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
  382. unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
  383. unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
  384. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
  385. unstructured_ingest/connector/notion/types/date.py +0 -26
  386. unstructured_ingest/connector/notion/types/file.py +0 -51
  387. unstructured_ingest/connector/notion/types/user.py +0 -76
  388. unstructured_ingest/connector/onedrive.py +0 -232
  389. unstructured_ingest/connector/opensearch.py +0 -218
  390. unstructured_ingest/connector/outlook.py +0 -285
  391. unstructured_ingest/connector/pinecone.py +0 -150
  392. unstructured_ingest/connector/qdrant.py +0 -144
  393. unstructured_ingest/connector/reddit.py +0 -166
  394. unstructured_ingest/connector/registry.py +0 -109
  395. unstructured_ingest/connector/salesforce.py +0 -301
  396. unstructured_ingest/connector/sharepoint.py +0 -573
  397. unstructured_ingest/connector/slack.py +0 -224
  398. unstructured_ingest/connector/sql.py +0 -199
  399. unstructured_ingest/connector/vectara.py +0 -253
  400. unstructured_ingest/connector/weaviate.py +0 -190
  401. unstructured_ingest/connector/wikipedia.py +0 -208
  402. unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
  403. unstructured_ingest/enhanced_dataclass/core.py +0 -99
  404. unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
  405. unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
  406. unstructured_ingest/interfaces.py +0 -852
  407. unstructured_ingest/pipeline/copy.py +0 -19
  408. unstructured_ingest/pipeline/doc_factory.py +0 -12
  409. unstructured_ingest/pipeline/partition.py +0 -60
  410. unstructured_ingest/pipeline/permissions.py +0 -12
  411. unstructured_ingest/pipeline/reformat/chunking.py +0 -134
  412. unstructured_ingest/pipeline/reformat/embedding.py +0 -64
  413. unstructured_ingest/pipeline/source.py +0 -77
  414. unstructured_ingest/pipeline/utils.py +0 -6
  415. unstructured_ingest/pipeline/write.py +0 -18
  416. unstructured_ingest/processor.py +0 -93
  417. unstructured_ingest/runner/__init__.py +0 -104
  418. unstructured_ingest/runner/airtable.py +0 -35
  419. unstructured_ingest/runner/astradb.py +0 -34
  420. unstructured_ingest/runner/base_runner.py +0 -89
  421. unstructured_ingest/runner/biomed.py +0 -45
  422. unstructured_ingest/runner/confluence.py +0 -35
  423. unstructured_ingest/runner/delta_table.py +0 -34
  424. unstructured_ingest/runner/discord.py +0 -35
  425. unstructured_ingest/runner/elasticsearch.py +0 -40
  426. unstructured_ingest/runner/fsspec/azure.py +0 -30
  427. unstructured_ingest/runner/fsspec/box.py +0 -28
  428. unstructured_ingest/runner/fsspec/dropbox.py +0 -30
  429. unstructured_ingest/runner/fsspec/fsspec.py +0 -40
  430. unstructured_ingest/runner/fsspec/gcs.py +0 -28
  431. unstructured_ingest/runner/fsspec/s3.py +0 -28
  432. unstructured_ingest/runner/fsspec/sftp.py +0 -28
  433. unstructured_ingest/runner/github.py +0 -37
  434. unstructured_ingest/runner/gitlab.py +0 -37
  435. unstructured_ingest/runner/google_drive.py +0 -35
  436. unstructured_ingest/runner/hubspot.py +0 -35
  437. unstructured_ingest/runner/jira.py +0 -35
  438. unstructured_ingest/runner/kafka.py +0 -34
  439. unstructured_ingest/runner/local.py +0 -23
  440. unstructured_ingest/runner/mongodb.py +0 -34
  441. unstructured_ingest/runner/notion.py +0 -61
  442. unstructured_ingest/runner/onedrive.py +0 -35
  443. unstructured_ingest/runner/opensearch.py +0 -40
  444. unstructured_ingest/runner/outlook.py +0 -33
  445. unstructured_ingest/runner/reddit.py +0 -35
  446. unstructured_ingest/runner/salesforce.py +0 -33
  447. unstructured_ingest/runner/sharepoint.py +0 -35
  448. unstructured_ingest/runner/slack.py +0 -33
  449. unstructured_ingest/runner/utils.py +0 -47
  450. unstructured_ingest/runner/wikipedia.py +0 -35
  451. unstructured_ingest/runner/writers/__init__.py +0 -48
  452. unstructured_ingest/runner/writers/astradb.py +0 -22
  453. unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
  454. unstructured_ingest/runner/writers/base_writer.py +0 -26
  455. unstructured_ingest/runner/writers/chroma.py +0 -22
  456. unstructured_ingest/runner/writers/clarifai.py +0 -19
  457. unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
  458. unstructured_ingest/runner/writers/delta_table.py +0 -24
  459. unstructured_ingest/runner/writers/elasticsearch.py +0 -24
  460. unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
  461. unstructured_ingest/runner/writers/fsspec/box.py +0 -21
  462. unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
  463. unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
  464. unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
  465. unstructured_ingest/runner/writers/kafka.py +0 -21
  466. unstructured_ingest/runner/writers/mongodb.py +0 -21
  467. unstructured_ingest/runner/writers/opensearch.py +0 -26
  468. unstructured_ingest/runner/writers/pinecone.py +0 -21
  469. unstructured_ingest/runner/writers/qdrant.py +0 -19
  470. unstructured_ingest/runner/writers/sql.py +0 -22
  471. unstructured_ingest/runner/writers/vectara.py +0 -22
  472. unstructured_ingest/runner/writers/weaviate.py +0 -21
  473. unstructured_ingest/utils/google_filetype.py +0 -9
  474. unstructured_ingest/v2/__init__.py +0 -1
  475. unstructured_ingest/v2/cli/__init__.py +0 -0
  476. unstructured_ingest/v2/cli/base/__init__.py +0 -4
  477. unstructured_ingest/v2/cli/base/cmd.py +0 -269
  478. unstructured_ingest/v2/cli/base/dest.py +0 -85
  479. unstructured_ingest/v2/cli/base/src.py +0 -85
  480. unstructured_ingest/v2/cli/cli.py +0 -24
  481. unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  482. unstructured_ingest/v2/logger.py +0 -126
  483. unstructured_ingest/v2/main.py +0 -11
  484. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  485. unstructured_ingest/v2/pipeline/interfaces.py +0 -211
  486. unstructured_ingest/v2/pipeline/pipeline.py +0 -408
  487. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  488. unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  489. unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
  490. unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
  491. unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  492. unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
  493. unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  494. unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
  495. unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
  496. unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
  497. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
  498. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
  499. unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
  500. unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
  501. unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
  502. unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
  503. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
  504. unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
  505. unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
  506. unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
  507. unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
  508. unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
  509. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
  510. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
  511. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
  512. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
  513. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
  514. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
  515. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
  516. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
  517. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
  518. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
  519. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
  520. unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
  521. unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
  522. unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
  523. unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
  524. unstructured_ingest/v2/processes/utils/__init__.py +0 -0
  525. unstructured_ingest/v2/types/__init__.py +0 -0
  526. unstructured_ingest-0.6.2.dist-info/RECORD +0 -589
  527. {test/unit/v2 → examples}/__init__.py +0 -0
  528. /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
  529. /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
  530. /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
  531. /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
  532. /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
  533. /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
  534. /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
  535. /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
  536. /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
  537. /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
  538. /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
  539. /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
  540. /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
  541. /test/unit/{v2/utils → utils}/__init__.py +0 -0
  542. /test/unit/{v2/utils → utils}/data_generator.py +0 -0
  543. /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
  544. /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  545. /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
  546. /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
  547. /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
  548. /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
  549. /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
  550. /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
  551. /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
  552. /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
  553. /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
  554. /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
  555. /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
  556. /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
  557. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
  558. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
  559. /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
  560. /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
  561. /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
  562. /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
  563. /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
  564. /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
  565. /unstructured_ingest/{v2 → utils}/constants.py +0 -0
  566. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/LICENSE.md +0 -0
  567. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/WHEEL +0 -0
  568. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/entry_points.txt +0 -0
@@ -1,69 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- )
10
- from unstructured_ingest.connector.airtable import SimpleAirtableConfig
11
-
12
-
13
- @dataclass
14
- class AirtableCliConfig(SimpleAirtableConfig, CliConfig):
15
- @staticmethod
16
- def get_cli_options() -> t.List[click.Option]:
17
- options = [
18
- click.Option(
19
- ["--personal-access-token"],
20
- default=None,
21
- help="Personal access token to authenticate into Airtable. Check: "
22
- "https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens "
23
- "for more info",
24
- ),
25
- click.Option(
26
- ["--list-of-paths"],
27
- default=None,
28
- help="""
29
- A list of paths that specify the locations to ingest data from within Airtable.
30
-
31
- If this argument is not set, the connector ingests all tables within each and every base.
32
- --list-of-paths: path1 path2 path3 ….
33
- path: base_id/table_id(optional)/view_id(optional)/
34
-
35
- To obtain (base, table, view) ids in bulk, check:
36
- https://airtable.com/developers/web/api/list-bases (base ids)
37
- https://airtable.com/developers/web/api/get-base-schema (table and view ids)
38
- https://pyairtable.readthedocs.io/en/latest/metadata.html (base, table and view ids)
39
-
40
- To obtain specific ids from Airtable UI, go to your workspace, and copy any
41
- relevant id from the URL structure:
42
- https://airtable.com/appAbcDeF1ghijKlm/tblABcdEfG1HIJkLm/viwABCDEfg6hijKLM
43
- appAbcDeF1ghijKlm -> base_id
44
- tblABcdEfG1HIJkLm -> table_id
45
- viwABCDEfg6hijKLM -> view_id
46
-
47
- You can also check: https://support.airtable.com/docs/finding-airtable-ids
48
-
49
- Here is an example for one --list-of-paths:
50
- base1/ → gets the entirety of all tables inside base1
51
- base1/table1 → gets all rows and columns within table1 in base1
52
- base1/table1/view1 → gets the rows and columns that are
53
- visible in view1 for the table1 in base1
54
-
55
- Examples to invalid airtable_paths:
56
- table1 → has to mention base to be valid
57
- base1/view1 → has to mention table to be valid
58
- """,
59
- ),
60
- ]
61
- return options
62
-
63
-
64
- def get_base_src_cmd() -> BaseSrcCmd:
65
- cmd_cls = BaseSrcCmd(
66
- cmd_name="airtable",
67
- cli_config=AirtableCliConfig,
68
- )
69
- return cmd_cls
@@ -1,99 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.interfaces import CliConfig, Dict
7
- from unstructured_ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig
8
-
9
-
10
- @dataclass
11
- class AstraDBCliConfig(SimpleAstraDBConfig, CliConfig):
12
- @staticmethod
13
- def get_cli_options() -> t.List[click.Option]:
14
- options = [
15
- click.Option(
16
- ["--token"],
17
- required=True,
18
- type=str,
19
- help="Astra DB Token with access to the database.",
20
- envvar="ASTRA_DB_APPLICATION_TOKEN",
21
- show_envvar=True,
22
- ),
23
- click.Option(
24
- ["--api-endpoint"],
25
- required=True,
26
- type=str,
27
- help="The API endpoint for the Astra DB.",
28
- envvar="ASTRA_DB_API_ENDPOINT",
29
- show_envvar=True,
30
- ),
31
- click.Option(
32
- ["--collection-name"],
33
- required=False,
34
- type=str,
35
- help="The name of the Astra DB collection. "
36
- "Note that the collection name must only include letters, "
37
- "numbers, and underscores.",
38
- ),
39
- click.Option(
40
- ["--keyspace"],
41
- required=False,
42
- default=None,
43
- type=str,
44
- help="The Astra DB connection keyspace.",
45
- ),
46
- ]
47
- return options
48
-
49
-
50
- @dataclass
51
- class AstraDBCliWriteConfig(AstraDBWriteConfig, CliConfig):
52
- @staticmethod
53
- def get_cli_options() -> t.List[click.Option]:
54
- options = [
55
- click.Option(
56
- ["--embedding-dimension"],
57
- required=True,
58
- default=384,
59
- type=int,
60
- help="The dimensionality of the embeddings",
61
- ),
62
- click.Option(
63
- ["--requested-indexing-policy"],
64
- required=False,
65
- default=None,
66
- type=Dict(),
67
- help="The indexing policy to use for the collection."
68
- 'example: \'{"deny": ["metadata"]}\' ',
69
- ),
70
- click.Option(
71
- ["--batch-size"],
72
- default=20,
73
- type=int,
74
- help="Number of records per batch",
75
- ),
76
- ]
77
- return options
78
-
79
-
80
- def get_base_src_cmd():
81
- from unstructured_ingest.cli.base.src import BaseSrcCmd
82
-
83
- cmd_cls = BaseSrcCmd(
84
- cmd_name="astradb",
85
- cli_config=AstraDBCliConfig,
86
- )
87
- return cmd_cls
88
-
89
-
90
- def get_base_dest_cmd():
91
- from unstructured_ingest.cli.base.dest import BaseDestCmd
92
-
93
- cmd_cls = BaseDestCmd(
94
- cmd_name="astradb",
95
- cli_config=AstraDBCliConfig,
96
- additional_cli_options=[AstraDBCliWriteConfig],
97
- write_config=AstraDBWriteConfig,
98
- )
99
- return cmd_cls
@@ -1,65 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.interfaces import (
7
- CliConfig,
8
- )
9
- from unstructured_ingest.connector.azure_ai_search import (
10
- AzureAISearchWriteConfig,
11
- SimpleAzureAISearchStorageConfig,
12
- )
13
-
14
-
15
- @dataclass
16
- class AzureAISearchCliConfig(SimpleAzureAISearchStorageConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--key"],
22
- required=True,
23
- type=str,
24
- help="Key credential used for authenticating to an Azure service.",
25
- envvar="AZURE_SEARCH_API_KEY",
26
- show_envvar=True,
27
- ),
28
- click.Option(
29
- ["--endpoint"],
30
- required=True,
31
- type=str,
32
- help="The URL endpoint of an Azure search service. "
33
- "In the form of https://{{service_name}}.search.windows.net",
34
- envvar="AZURE_SEARCH_ENDPOINT",
35
- show_envvar=True,
36
- ),
37
- ]
38
- return options
39
-
40
-
41
- @dataclass
42
- class AzureAISearchCliWriteConfig(AzureAISearchWriteConfig, CliConfig):
43
- @staticmethod
44
- def get_cli_options() -> t.List[click.Option]:
45
- options = [
46
- click.Option(
47
- ["--index"],
48
- required=True,
49
- type=str,
50
- help="The name of the index to connect to",
51
- ),
52
- ]
53
- return options
54
-
55
-
56
- def get_base_dest_cmd():
57
- from unstructured_ingest.cli.base.dest import BaseDestCmd
58
-
59
- cmd_cls = BaseDestCmd(
60
- cmd_name="azure-ai-search",
61
- cli_config=AzureAISearchCliConfig,
62
- additional_cli_options=[AzureAISearchCliWriteConfig],
63
- write_config=AzureAISearchCliWriteConfig,
64
- )
65
- return cmd_cls
@@ -1,52 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- )
10
- from unstructured_ingest.connector.biomed import SimpleBiomedConfig
11
-
12
-
13
- @dataclass
14
- class BiomedCliConfig(SimpleBiomedConfig, CliConfig):
15
- @staticmethod
16
- def get_cli_options() -> t.List[click.Option]:
17
- options = [
18
- click.Option(
19
- ["--api-id"],
20
- default=None,
21
- help="ID parameter for OA Web Service API.",
22
- ),
23
- click.Option(
24
- ["--api-from"],
25
- default=None,
26
- help="From parameter for OA Web Service API.",
27
- ),
28
- click.Option(
29
- ["--api-until"],
30
- default=None,
31
- help="Until parameter for OA Web Service API.",
32
- ),
33
- click.Option(
34
- ["--path"],
35
- default=None,
36
- help="PMC Open Access FTP Directory Path.",
37
- ),
38
- click.Option(
39
- ["--max-request-time"],
40
- default=45,
41
- help="(In seconds) Max request time to OA Web Service API.",
42
- ),
43
- ]
44
- return options
45
-
46
-
47
- def get_base_src_cmd() -> BaseSrcCmd:
48
- cmd_cls = BaseSrcCmd(
49
- cmd_name="biomed",
50
- cli_config=BiomedCliConfig,
51
- )
52
- return cmd_cls
@@ -1,104 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.interfaces import CliConfig, Dict
7
- from unstructured_ingest.connector.chroma import ChromaWriteConfig, SimpleChromaConfig
8
-
9
-
10
- @dataclass
11
- class ChromaCliConfig(SimpleChromaConfig, CliConfig):
12
- @staticmethod
13
- def get_cli_options() -> t.List[click.Option]:
14
- options = [
15
- click.Option(
16
- ["--path"],
17
- required=False,
18
- type=str,
19
- help="Location where Chroma is persisted," "if not connecting via http.",
20
- ),
21
- click.Option(
22
- ["--settings"],
23
- required=False,
24
- type=Dict(),
25
- help="A dictionary of settings to communicate with the chroma server."
26
- 'example: \'{"persist_directory":"./chroma-persist"}\' ',
27
- ),
28
- click.Option(
29
- ["--tenant"],
30
- required=False,
31
- default="default_tenant",
32
- type=str,
33
- help="The tenant to use for this client. Chroma defaults to 'default_tenant'.",
34
- ),
35
- click.Option(
36
- ["--database"],
37
- required=False,
38
- default="default_database",
39
- type=str,
40
- help="The database to use for this client."
41
- "Chroma defaults to 'default_database'.",
42
- ),
43
- click.Option(
44
- ["--host"],
45
- required=False,
46
- type=str,
47
- help="The hostname of the Chroma server.",
48
- ),
49
- click.Option(
50
- ["--port"],
51
- required=False,
52
- type=int,
53
- help="The port of the Chroma server.",
54
- ),
55
- click.Option(
56
- ["--ssl"],
57
- required=False,
58
- default=False,
59
- is_flag=True,
60
- type=bool,
61
- help="Whether to use SSL to connect to the Chroma server.",
62
- ),
63
- click.Option(
64
- ["--headers"],
65
- required=False,
66
- type=Dict(),
67
- help="A dictionary of headers to send to the Chroma server."
68
- 'example: \'{"Authorization":"Basic()"}\' ',
69
- ),
70
- click.Option(
71
- ["--collection-name"],
72
- required=True,
73
- type=str,
74
- help="The name of the Chroma collection to write into.",
75
- ),
76
- ]
77
- return options
78
-
79
-
80
- @dataclass
81
- class ChromaCliWriteConfig(ChromaWriteConfig, CliConfig):
82
- @staticmethod
83
- def get_cli_options() -> t.List[click.Option]:
84
- options = [
85
- click.Option(
86
- ["--batch-size"],
87
- default=100,
88
- type=int,
89
- help="Number of records per batch",
90
- ),
91
- ]
92
- return options
93
-
94
-
95
- def get_base_dest_cmd():
96
- from unstructured_ingest.cli.base.dest import BaseDestCmd
97
-
98
- cmd_cls = BaseDestCmd(
99
- cmd_name="chroma",
100
- cli_config=ChromaCliConfig,
101
- additional_cli_options=[ChromaCliWriteConfig],
102
- write_config=ChromaWriteConfig,
103
- )
104
- return cmd_cls
@@ -1,71 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.interfaces import CliConfig
7
- from unstructured_ingest.connector.clarifai import (
8
- ClarifaiWriteConfig,
9
- SimpleClarifaiConfig,
10
- )
11
-
12
- CMD_NAME = "clarifai"
13
-
14
-
15
- @dataclass
16
- class ClarifaiCliConfig(SimpleClarifaiConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--api-key"],
22
- required=True,
23
- type=str,
24
- help="The CLARIFAI_PAT of the user to access clarifai platform apps and models",
25
- envvar="CLARIFAI_PAT",
26
- show_envvar=True,
27
- ),
28
- click.Option(
29
- ["--app-id"],
30
- required=True,
31
- type=str,
32
- help="Clarifai app name/id",
33
- ),
34
- click.Option(
35
- ["--user-id"],
36
- required=True,
37
- type=str,
38
- help="Clarifai User name/ID",
39
- ),
40
- click.Option(
41
- ["--dataset-id"], type=str, default=None, help="Clarifai App Dataset ID (optional)"
42
- ),
43
- ]
44
- return options
45
-
46
-
47
- @dataclass
48
- class ClarifaiCliWriteConfig(ClarifaiWriteConfig, CliConfig):
49
- @staticmethod
50
- def get_cli_options() -> t.List[click.option]:
51
- options = [
52
- click.Option(
53
- ["--batch-size"],
54
- type=int,
55
- default=50,
56
- help="No of inputs upload per batch",
57
- ),
58
- ]
59
- return options
60
-
61
-
62
- def get_base_dest_cmd():
63
- from unstructured_ingest.cli.base.dest import BaseDestCmd
64
-
65
- cmd_cls = BaseDestCmd(
66
- cmd_name=CMD_NAME,
67
- cli_config=ClarifaiCliConfig,
68
- additional_cli_options=[ClarifaiCliWriteConfig],
69
- write_config=ClarifaiWriteConfig,
70
- )
71
- return cmd_cls
@@ -1,69 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.base.src import BaseSrcCmd
7
- from unstructured_ingest.cli.interfaces import (
8
- CliConfig,
9
- DelimitedString,
10
- )
11
- from unstructured_ingest.connector.confluence import SimpleConfluenceConfig
12
-
13
-
14
- @dataclass
15
- class ConfluenceCliConfig(SimpleConfluenceConfig, CliConfig):
16
- @staticmethod
17
- def get_cli_options() -> t.List[click.Option]:
18
- options = [
19
- click.Option(
20
- ["--api-token"],
21
- required=True,
22
- help="API Token to authenticate into Confluence Cloud. "
23
- "Check "
24
- "https://developer.atlassian.com/cloud/confluence/basic-auth-for-rest-apis/ "
25
- "for more info.",
26
- ),
27
- click.Option(
28
- ["--url"],
29
- required=True,
30
- help='URL to Confluence Cloud, e.g. "unstructured-ingest-test.atlassian.net"',
31
- ),
32
- click.Option(
33
- ["--user-email"],
34
- required=True,
35
- help="Email to authenticate into Confluence Cloud",
36
- ),
37
- click.Option(
38
- ["--spaces"],
39
- default=None,
40
- type=DelimitedString(),
41
- help="A list of confluence space ids to be fetched. From each fetched space, "
42
- "--num-of-docs-from-each-space number of docs will be ingested. "
43
- "--spaces and --num-of-spaces cannot be used at the same time",
44
- ),
45
- click.Option(
46
- ["--max-num-of-docs-from-each-space"],
47
- default=100,
48
- help="Number of documents to be aimed to be ingested from each fetched "
49
- "confluence space. If any space has fewer documents, all the documents from "
50
- "that space will be ingested. Documents are not necessarily "
51
- "ingested in order of creation date.",
52
- ),
53
- click.Option(
54
- ["--max-num-of-spaces"],
55
- default=500,
56
- help="Number of confluence space ids to be fetched. From each fetched space, "
57
- "--num-of-docs-from-each-space number of docs will be ingested. "
58
- "--spaces and --num-of-spaces cannot be used at the same time",
59
- ),
60
- ]
61
- return options
62
-
63
-
64
- def get_base_src_cmd() -> BaseSrcCmd:
65
- cmd_cls = BaseSrcCmd(
66
- cmd_name="confluence",
67
- cli_config=ConfluenceCliConfig,
68
- )
69
- return cmd_cls
@@ -1,163 +0,0 @@
1
- import typing as t
2
- from dataclasses import dataclass
3
-
4
- import click
5
-
6
- from unstructured_ingest.cli.interfaces import CliConfig
7
- from unstructured_ingest.connector.databricks_volumes import (
8
- DatabricksVolumesWriteConfig,
9
- SimpleDatabricksVolumesConfig,
10
- )
11
-
12
- CMD_NAME = "databricks-volumes"
13
-
14
-
15
- @dataclass
16
- class DatabricksVolumesCliConfig(SimpleDatabricksVolumesConfig, CliConfig):
17
- @staticmethod
18
- def get_cli_options() -> t.List[click.Option]:
19
- options = [
20
- click.Option(
21
- ["--host"],
22
- type=str,
23
- default=None,
24
- help="The Databricks host URL for either the "
25
- "Databricks workspace endpoint or the "
26
- "Databricks accounts endpoint.",
27
- ),
28
- click.Option(
29
- ["--account-id"],
30
- type=str,
31
- default=None,
32
- help="The Databricks account ID for the Databricks "
33
- "accounts endpoint. Only has effect when Host is "
34
- "either https://accounts.cloud.databricks.com/ (AWS), "
35
- "https://accounts.azuredatabricks.net/ (Azure), "
36
- "or https://accounts.gcp.databricks.com/ (GCP).",
37
- ),
38
- click.Option(
39
- ["--username"],
40
- type=str,
41
- default=None,
42
- help="The Databricks username part of basic authentication. "
43
- "Only possible when Host is *.cloud.databricks.com (AWS).",
44
- ),
45
- click.Option(
46
- ["--password"],
47
- type=str,
48
- default=None,
49
- help="The Databricks password part of basic authentication. "
50
- "Only possible when Host is *.cloud.databricks.com (AWS).",
51
- ),
52
- click.Option(["--client-id"], type=str, default=None),
53
- click.Option(["--client-secret"], type=str, default=None),
54
- click.Option(
55
- ["--token"],
56
- type=str,
57
- default=None,
58
- help="The Databricks personal access token (PAT) (AWS, Azure, and GCP) or "
59
- "Azure Active Directory (Azure AD) token (Azure).",
60
- ),
61
- click.Option(
62
- ["--azure-workspace-resource-id"],
63
- type=str,
64
- default=None,
65
- help="The Azure Resource Manager ID for the Azure Databricks workspace, "
66
- "which is exchanged for a Databricks host URL.",
67
- ),
68
- click.Option(
69
- ["--azure-client-secret"],
70
- type=str,
71
- default=None,
72
- help="The Azure AD service principal’s client secret.",
73
- ),
74
- click.Option(
75
- ["--azure-client-id"],
76
- type=str,
77
- default=None,
78
- help="The Azure AD service principal’s application ID.",
79
- ),
80
- click.Option(
81
- ["--azure-tenant-id"],
82
- type=str,
83
- default=None,
84
- help="The Azure AD service principal’s tenant ID.",
85
- ),
86
- click.Option(
87
- ["--azure-environment"],
88
- type=str,
89
- default=None,
90
- help="The Azure environment type (such as Public, UsGov, China, and Germany) for a "
91
- "specific set of API endpoints. Defaults to PUBLIC.",
92
- ),
93
- click.Option(
94
- ["--auth-type"],
95
- type=str,
96
- default=None,
97
- help="When multiple auth attributes are available in the "
98
- "environment, use the auth type specified by this "
99
- "argument. This argument also holds the currently "
100
- "selected auth.",
101
- ),
102
- click.Option(["--cluster-id"], type=str, default=None),
103
- click.Option(["--google-credentials"], type=str, default=None),
104
- click.Option(["--google-service-account"], type=str, default=None),
105
- ]
106
- return options
107
-
108
-
109
- @dataclass
110
- class DatabricksVolumesCliWriteConfig(DatabricksVolumesWriteConfig, CliConfig):
111
- @staticmethod
112
- def get_cli_options() -> t.List[click.Option]:
113
- options = [
114
- click.Option(
115
- ["--volume"], type=str, required=True, help="Name of volume in the Unity Catalog"
116
- ),
117
- click.Option(
118
- ["--catalog"],
119
- type=str,
120
- required=True,
121
- help="Name of the catalog in the Databricks Unity Catalog service",
122
- ),
123
- click.Option(
124
- ["--volume-path"],
125
- type=str,
126
- required=False,
127
- default=None,
128
- help="Optional path within the volume to write to",
129
- ),
130
- click.Option(
131
- ["--overwrite"],
132
- type=bool,
133
- is_flag=True,
134
- help="If true, an existing file will be overwritten.",
135
- ),
136
- click.Option(
137
- ["--encoding"],
138
- type=str,
139
- required=True,
140
- default="utf-8",
141
- help="Encoding applied to the data when written to the volume",
142
- ),
143
- click.Option(
144
- ["--schema"],
145
- type=str,
146
- required=True,
147
- default="default",
148
- help="Schema associated with the volume to write to in the Unity Catalog service",
149
- ),
150
- ]
151
- return options
152
-
153
-
154
- def get_base_dest_cmd():
155
- from unstructured_ingest.cli.base.dest import BaseDestCmd
156
-
157
- cmd_cls = BaseDestCmd(
158
- cmd_name=CMD_NAME,
159
- cli_config=DatabricksVolumesCliConfig,
160
- additional_cli_options=[DatabricksVolumesCliWriteConfig],
161
- write_config=DatabricksVolumesWriteConfig,
162
- )
163
- return cmd_cls