unstructured-ingest 0.7.2__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (187) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/cli/README.md +28 -0
  3. unstructured_ingest/embed/mixedbreadai.py +0 -1
  4. unstructured_ingest/interfaces/upload_stager.py +2 -2
  5. unstructured_ingest/interfaces/uploader.py +3 -3
  6. unstructured_ingest/main.py +0 -0
  7. unstructured_ingest/pipeline/interfaces.py +1 -1
  8. unstructured_ingest/pipeline/pipeline.py +1 -1
  9. unstructured_ingest/processes/chunker.py +4 -0
  10. unstructured_ingest/processes/connectors/airtable.py +4 -2
  11. unstructured_ingest/processes/connectors/astradb.py +48 -34
  12. unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
  13. unstructured_ingest/processes/connectors/confluence.py +0 -1
  14. unstructured_ingest/processes/connectors/databricks/volumes_aws.py +1 -1
  15. unstructured_ingest/processes/connectors/databricks/volumes_azure.py +2 -2
  16. unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +1 -1
  17. unstructured_ingest/processes/connectors/databricks/volumes_table.py +1 -2
  18. unstructured_ingest/processes/connectors/delta_table.py +1 -0
  19. unstructured_ingest/processes/connectors/duckdb/base.py +2 -2
  20. unstructured_ingest/processes/connectors/duckdb/duckdb.py +3 -3
  21. unstructured_ingest/processes/connectors/duckdb/motherduck.py +3 -3
  22. unstructured_ingest/processes/connectors/fsspec/s3.py +5 -3
  23. unstructured_ingest/processes/connectors/gitlab.py +1 -2
  24. unstructured_ingest/processes/connectors/google_drive.py +0 -2
  25. unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -7
  26. unstructured_ingest/processes/connectors/kdbai.py +1 -0
  27. unstructured_ingest/processes/connectors/outlook.py +1 -2
  28. unstructured_ingest/processes/connectors/pinecone.py +0 -1
  29. unstructured_ingest/processes/connectors/redisdb.py +28 -24
  30. unstructured_ingest/processes/connectors/salesforce.py +1 -1
  31. unstructured_ingest/processes/connectors/slack.py +1 -2
  32. unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +5 -0
  33. unstructured_ingest/processes/connectors/sql/postgres.py +7 -1
  34. unstructured_ingest/processes/connectors/sql/singlestore.py +11 -6
  35. unstructured_ingest/processes/connectors/sql/snowflake.py +5 -0
  36. unstructured_ingest/processes/connectors/sql/sql.py +3 -4
  37. unstructured_ingest/processes/connectors/sql/sqlite.py +5 -0
  38. unstructured_ingest/processes/connectors/sql/vastdb.py +7 -3
  39. unstructured_ingest/processes/connectors/vectara.py +0 -2
  40. unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -2
  41. unstructured_ingest/processes/embedder.py +2 -2
  42. unstructured_ingest/processes/filter.py +1 -1
  43. unstructured_ingest/processes/partitioner.py +4 -0
  44. unstructured_ingest/processes/utils/blob_storage.py +2 -2
  45. unstructured_ingest/unstructured_api.py +13 -8
  46. unstructured_ingest/utils/data_prep.py +8 -32
  47. unstructured_ingest-1.0.2.dist-info/METADATA +226 -0
  48. {unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.2.dist-info}/RECORD +50 -184
  49. {unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.2.dist-info}/WHEEL +1 -2
  50. examples/__init__.py +0 -0
  51. examples/airtable.py +0 -44
  52. examples/azure_cognitive_search.py +0 -55
  53. examples/chroma.py +0 -54
  54. examples/couchbase.py +0 -55
  55. examples/databricks_volumes_dest.py +0 -55
  56. examples/databricks_volumes_source.py +0 -53
  57. examples/delta_table.py +0 -45
  58. examples/discord_example.py +0 -36
  59. examples/elasticsearch.py +0 -49
  60. examples/google_drive.py +0 -45
  61. examples/kdbai.py +0 -54
  62. examples/local.py +0 -36
  63. examples/milvus.py +0 -44
  64. examples/mongodb.py +0 -53
  65. examples/opensearch.py +0 -50
  66. examples/pinecone.py +0 -57
  67. examples/s3.py +0 -38
  68. examples/salesforce.py +0 -44
  69. examples/sharepoint.py +0 -47
  70. examples/singlestore.py +0 -49
  71. examples/sql.py +0 -90
  72. examples/vectara.py +0 -54
  73. examples/weaviate.py +0 -44
  74. test/__init__.py +0 -0
  75. test/integration/__init__.py +0 -0
  76. test/integration/chunkers/__init__.py +0 -0
  77. test/integration/chunkers/test_chunkers.py +0 -31
  78. test/integration/connectors/__init__.py +0 -0
  79. test/integration/connectors/conftest.py +0 -38
  80. test/integration/connectors/databricks/__init__.py +0 -0
  81. test/integration/connectors/databricks/test_volumes_native.py +0 -273
  82. test/integration/connectors/discord/__init__.py +0 -0
  83. test/integration/connectors/discord/test_discord.py +0 -90
  84. test/integration/connectors/duckdb/__init__.py +0 -0
  85. test/integration/connectors/duckdb/conftest.py +0 -14
  86. test/integration/connectors/duckdb/test_duckdb.py +0 -90
  87. test/integration/connectors/duckdb/test_motherduck.py +0 -95
  88. test/integration/connectors/elasticsearch/__init__.py +0 -0
  89. test/integration/connectors/elasticsearch/conftest.py +0 -34
  90. test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -331
  91. test/integration/connectors/elasticsearch/test_opensearch.py +0 -326
  92. test/integration/connectors/sql/__init__.py +0 -0
  93. test/integration/connectors/sql/test_databricks_delta_tables.py +0 -170
  94. test/integration/connectors/sql/test_postgres.py +0 -201
  95. test/integration/connectors/sql/test_singlestore.py +0 -182
  96. test/integration/connectors/sql/test_snowflake.py +0 -244
  97. test/integration/connectors/sql/test_sqlite.py +0 -168
  98. test/integration/connectors/sql/test_vastdb.py +0 -34
  99. test/integration/connectors/test_astradb.py +0 -287
  100. test/integration/connectors/test_azure_ai_search.py +0 -254
  101. test/integration/connectors/test_chroma.py +0 -136
  102. test/integration/connectors/test_confluence.py +0 -111
  103. test/integration/connectors/test_delta_table.py +0 -183
  104. test/integration/connectors/test_dropbox.py +0 -151
  105. test/integration/connectors/test_github.py +0 -49
  106. test/integration/connectors/test_google_drive.py +0 -257
  107. test/integration/connectors/test_jira.py +0 -67
  108. test/integration/connectors/test_lancedb.py +0 -247
  109. test/integration/connectors/test_milvus.py +0 -208
  110. test/integration/connectors/test_mongodb.py +0 -335
  111. test/integration/connectors/test_neo4j.py +0 -244
  112. test/integration/connectors/test_notion.py +0 -152
  113. test/integration/connectors/test_onedrive.py +0 -163
  114. test/integration/connectors/test_pinecone.py +0 -387
  115. test/integration/connectors/test_qdrant.py +0 -216
  116. test/integration/connectors/test_redis.py +0 -143
  117. test/integration/connectors/test_s3.py +0 -184
  118. test/integration/connectors/test_sharepoint.py +0 -222
  119. test/integration/connectors/test_vectara.py +0 -282
  120. test/integration/connectors/test_zendesk.py +0 -120
  121. test/integration/connectors/utils/__init__.py +0 -0
  122. test/integration/connectors/utils/constants.py +0 -13
  123. test/integration/connectors/utils/docker.py +0 -151
  124. test/integration/connectors/utils/docker_compose.py +0 -59
  125. test/integration/connectors/utils/validation/__init__.py +0 -0
  126. test/integration/connectors/utils/validation/destination.py +0 -77
  127. test/integration/connectors/utils/validation/equality.py +0 -76
  128. test/integration/connectors/utils/validation/source.py +0 -331
  129. test/integration/connectors/utils/validation/utils.py +0 -36
  130. test/integration/connectors/weaviate/__init__.py +0 -0
  131. test/integration/connectors/weaviate/conftest.py +0 -15
  132. test/integration/connectors/weaviate/test_cloud.py +0 -39
  133. test/integration/connectors/weaviate/test_local.py +0 -152
  134. test/integration/embedders/__init__.py +0 -0
  135. test/integration/embedders/conftest.py +0 -13
  136. test/integration/embedders/test_azure_openai.py +0 -57
  137. test/integration/embedders/test_bedrock.py +0 -103
  138. test/integration/embedders/test_huggingface.py +0 -24
  139. test/integration/embedders/test_mixedbread.py +0 -71
  140. test/integration/embedders/test_octoai.py +0 -75
  141. test/integration/embedders/test_openai.py +0 -74
  142. test/integration/embedders/test_togetherai.py +0 -71
  143. test/integration/embedders/test_vertexai.py +0 -63
  144. test/integration/embedders/test_voyageai.py +0 -79
  145. test/integration/embedders/utils.py +0 -66
  146. test/integration/partitioners/__init__.py +0 -0
  147. test/integration/partitioners/test_partitioner.py +0 -76
  148. test/integration/utils.py +0 -15
  149. test/unit/__init__.py +0 -0
  150. test/unit/chunkers/__init__.py +0 -0
  151. test/unit/chunkers/test_chunkers.py +0 -49
  152. test/unit/connectors/__init__.py +0 -0
  153. test/unit/connectors/ibm_watsonx/__init__.py +0 -0
  154. test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +0 -459
  155. test/unit/connectors/motherduck/__init__.py +0 -0
  156. test/unit/connectors/motherduck/test_base.py +0 -73
  157. test/unit/connectors/sql/__init__.py +0 -0
  158. test/unit/connectors/sql/test_sql.py +0 -152
  159. test/unit/connectors/test_confluence.py +0 -71
  160. test/unit/connectors/test_jira.py +0 -401
  161. test/unit/embed/__init__.py +0 -0
  162. test/unit/embed/test_mixedbreadai.py +0 -42
  163. test/unit/embed/test_octoai.py +0 -27
  164. test/unit/embed/test_openai.py +0 -28
  165. test/unit/embed/test_vertexai.py +0 -25
  166. test/unit/embed/test_voyageai.py +0 -24
  167. test/unit/embedders/__init__.py +0 -0
  168. test/unit/embedders/test_bedrock.py +0 -36
  169. test/unit/embedders/test_huggingface.py +0 -48
  170. test/unit/embedders/test_mixedbread.py +0 -37
  171. test/unit/embedders/test_octoai.py +0 -35
  172. test/unit/embedders/test_openai.py +0 -35
  173. test/unit/embedders/test_togetherai.py +0 -37
  174. test/unit/embedders/test_vertexai.py +0 -37
  175. test/unit/embedders/test_voyageai.py +0 -38
  176. test/unit/partitioners/__init__.py +0 -0
  177. test/unit/partitioners/test_partitioner.py +0 -63
  178. test/unit/test_error.py +0 -27
  179. test/unit/test_html.py +0 -112
  180. test/unit/test_interfaces.py +0 -26
  181. test/unit/test_utils.py +0 -220
  182. test/unit/utils/__init__.py +0 -0
  183. test/unit/utils/data_generator.py +0 -32
  184. unstructured_ingest-0.7.2.dist-info/METADATA +0 -383
  185. unstructured_ingest-0.7.2.dist-info/top_level.txt +0 -3
  186. {unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.2.dist-info}/entry_points.txt +0 -0
  187. {unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.2.dist-info/licenses}/LICENSE.md +0 -0
@@ -0,0 +1,226 @@
1
+ Metadata-Version: 2.4
2
+ Name: unstructured_ingest
3
+ Version: 1.0.2
4
+ Summary: Local ETL data pipeline to get data RAG ready
5
+ Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
+ License-Expression: Apache-2.0
7
+ License-File: LICENSE.md
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Education
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: <3.13,>=3.9
21
+ Requires-Dist: click
22
+ Requires-Dist: dataclasses-json
23
+ Requires-Dist: opentelemetry-sdk
24
+ Requires-Dist: pydantic>=2.7
25
+ Requires-Dist: python-dateutil
26
+ Requires-Dist: tqdm
27
+ Provides-Extra: airtable
28
+ Requires-Dist: pandas; extra == 'airtable'
29
+ Requires-Dist: pyairtable; extra == 'airtable'
30
+ Provides-Extra: astradb
31
+ Requires-Dist: astrapy>2.0.0; extra == 'astradb'
32
+ Provides-Extra: azure
33
+ Requires-Dist: adlfs; extra == 'azure'
34
+ Requires-Dist: fsspec; extra == 'azure'
35
+ Provides-Extra: azure-ai-search
36
+ Requires-Dist: azure-search-documents; extra == 'azure-ai-search'
37
+ Provides-Extra: bedrock
38
+ Requires-Dist: aioboto3; extra == 'bedrock'
39
+ Requires-Dist: boto3; extra == 'bedrock'
40
+ Provides-Extra: biomed
41
+ Requires-Dist: bs4; extra == 'biomed'
42
+ Requires-Dist: requests; extra == 'biomed'
43
+ Provides-Extra: box
44
+ Requires-Dist: boxfs; extra == 'box'
45
+ Requires-Dist: fsspec; extra == 'box'
46
+ Provides-Extra: chroma
47
+ Requires-Dist: chromadb; extra == 'chroma'
48
+ Provides-Extra: clarifai
49
+ Requires-Dist: clarifai; extra == 'clarifai'
50
+ Provides-Extra: confluence
51
+ Requires-Dist: atlassian-python-api; extra == 'confluence'
52
+ Requires-Dist: requests; extra == 'confluence'
53
+ Provides-Extra: couchbase
54
+ Requires-Dist: couchbase; extra == 'couchbase'
55
+ Provides-Extra: databricks-delta-tables
56
+ Requires-Dist: databricks-sql-connector; extra == 'databricks-delta-tables'
57
+ Requires-Dist: pandas; extra == 'databricks-delta-tables'
58
+ Provides-Extra: databricks-volumes
59
+ Requires-Dist: databricks-sdk; extra == 'databricks-volumes'
60
+ Provides-Extra: delta-table
61
+ Requires-Dist: boto3; extra == 'delta-table'
62
+ Requires-Dist: deltalake; extra == 'delta-table'
63
+ Requires-Dist: pandas; extra == 'delta-table'
64
+ Provides-Extra: discord
65
+ Requires-Dist: discord-py; extra == 'discord'
66
+ Provides-Extra: doc
67
+ Requires-Dist: unstructured[doc]; extra == 'doc'
68
+ Provides-Extra: docx
69
+ Requires-Dist: unstructured[docx]; extra == 'docx'
70
+ Provides-Extra: dropbox
71
+ Requires-Dist: dropboxdrivefs; extra == 'dropbox'
72
+ Requires-Dist: fsspec; extra == 'dropbox'
73
+ Provides-Extra: duckdb
74
+ Requires-Dist: duckdb; extra == 'duckdb'
75
+ Requires-Dist: pandas; extra == 'duckdb'
76
+ Provides-Extra: elasticsearch
77
+ Requires-Dist: elasticsearch[async]; extra == 'elasticsearch'
78
+ Provides-Extra: epub
79
+ Requires-Dist: unstructured[epub]; extra == 'epub'
80
+ Provides-Extra: gcs
81
+ Requires-Dist: bs4; extra == 'gcs'
82
+ Requires-Dist: fsspec; extra == 'gcs'
83
+ Requires-Dist: gcsfs; extra == 'gcs'
84
+ Provides-Extra: github
85
+ Requires-Dist: pygithub>1.58.0; extra == 'github'
86
+ Requires-Dist: requests; extra == 'github'
87
+ Provides-Extra: gitlab
88
+ Requires-Dist: python-gitlab; extra == 'gitlab'
89
+ Provides-Extra: google-drive
90
+ Requires-Dist: google-api-python-client; extra == 'google-drive'
91
+ Provides-Extra: hubspot
92
+ Requires-Dist: hubspot-api-client; extra == 'hubspot'
93
+ Requires-Dist: urllib3; extra == 'hubspot'
94
+ Provides-Extra: huggingface
95
+ Requires-Dist: sentence-transformers; extra == 'huggingface'
96
+ Provides-Extra: ibm-watsonx-s3
97
+ Requires-Dist: httpx; extra == 'ibm-watsonx-s3'
98
+ Requires-Dist: pandas; extra == 'ibm-watsonx-s3'
99
+ Requires-Dist: pyarrow; extra == 'ibm-watsonx-s3'
100
+ Requires-Dist: pyiceberg; extra == 'ibm-watsonx-s3'
101
+ Requires-Dist: tenacity; extra == 'ibm-watsonx-s3'
102
+ Provides-Extra: image
103
+ Requires-Dist: unstructured[image]; extra == 'image'
104
+ Provides-Extra: jira
105
+ Requires-Dist: atlassian-python-api; extra == 'jira'
106
+ Provides-Extra: kafka
107
+ Requires-Dist: confluent-kafka; extra == 'kafka'
108
+ Provides-Extra: kdbai
109
+ Requires-Dist: kdbai-client>=1.4.0; extra == 'kdbai'
110
+ Requires-Dist: pandas; extra == 'kdbai'
111
+ Provides-Extra: lancedb
112
+ Requires-Dist: lancedb; extra == 'lancedb'
113
+ Provides-Extra: md
114
+ Requires-Dist: unstructured[md]; extra == 'md'
115
+ Provides-Extra: milvus
116
+ Requires-Dist: pymilvus; extra == 'milvus'
117
+ Provides-Extra: mixedbreadai
118
+ Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
119
+ Provides-Extra: mongodb
120
+ Requires-Dist: pymongo; extra == 'mongodb'
121
+ Provides-Extra: msg
122
+ Requires-Dist: unstructured[msg]; extra == 'msg'
123
+ Provides-Extra: neo4j
124
+ Requires-Dist: cymple; extra == 'neo4j'
125
+ Requires-Dist: neo4j-rust-ext; extra == 'neo4j'
126
+ Requires-Dist: networkx; extra == 'neo4j'
127
+ Provides-Extra: notion
128
+ Requires-Dist: backoff; extra == 'notion'
129
+ Requires-Dist: htmlbuilder; extra == 'notion'
130
+ Requires-Dist: httpx; extra == 'notion'
131
+ Requires-Dist: notion-client; extra == 'notion'
132
+ Provides-Extra: octoai
133
+ Requires-Dist: openai; extra == 'octoai'
134
+ Requires-Dist: tiktoken; extra == 'octoai'
135
+ Provides-Extra: odt
136
+ Requires-Dist: unstructured[odt]; extra == 'odt'
137
+ Provides-Extra: onedrive
138
+ Requires-Dist: msal; extra == 'onedrive'
139
+ Requires-Dist: office365-rest-python-client; extra == 'onedrive'
140
+ Requires-Dist: requests; extra == 'onedrive'
141
+ Provides-Extra: openai
142
+ Requires-Dist: openai; extra == 'openai'
143
+ Requires-Dist: tiktoken; extra == 'openai'
144
+ Provides-Extra: opensearch
145
+ Requires-Dist: opensearch-py; extra == 'opensearch'
146
+ Provides-Extra: org
147
+ Requires-Dist: unstructured[org]; extra == 'org'
148
+ Provides-Extra: outlook
149
+ Requires-Dist: msal; extra == 'outlook'
150
+ Requires-Dist: office365-rest-python-client; extra == 'outlook'
151
+ Provides-Extra: pdf
152
+ Requires-Dist: unstructured[pdf]; extra == 'pdf'
153
+ Provides-Extra: pinecone
154
+ Requires-Dist: pinecone; extra == 'pinecone'
155
+ Provides-Extra: postgres
156
+ Requires-Dist: pandas; extra == 'postgres'
157
+ Requires-Dist: psycopg2-binary; extra == 'postgres'
158
+ Provides-Extra: ppt
159
+ Requires-Dist: unstructured[ppt]; extra == 'ppt'
160
+ Provides-Extra: pptx
161
+ Requires-Dist: unstructured[pptx]; extra == 'pptx'
162
+ Provides-Extra: qdrant
163
+ Requires-Dist: qdrant-client; extra == 'qdrant'
164
+ Provides-Extra: reddit
165
+ Requires-Dist: praw; extra == 'reddit'
166
+ Provides-Extra: redis
167
+ Requires-Dist: redis; extra == 'redis'
168
+ Provides-Extra: remote
169
+ Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
170
+ Provides-Extra: rst
171
+ Requires-Dist: unstructured[rst]; extra == 'rst'
172
+ Provides-Extra: rtf
173
+ Requires-Dist: unstructured[rtf]; extra == 'rtf'
174
+ Provides-Extra: s3
175
+ Requires-Dist: fsspec; extra == 's3'
176
+ Requires-Dist: s3fs; extra == 's3'
177
+ Provides-Extra: salesforce
178
+ Requires-Dist: simple-salesforce; extra == 'salesforce'
179
+ Provides-Extra: sftp
180
+ Requires-Dist: fsspec; extra == 'sftp'
181
+ Requires-Dist: paramiko; extra == 'sftp'
182
+ Provides-Extra: sharepoint
183
+ Requires-Dist: msal; extra == 'sharepoint'
184
+ Requires-Dist: office365-rest-python-client; extra == 'sharepoint'
185
+ Requires-Dist: requests; extra == 'sharepoint'
186
+ Provides-Extra: singlestore
187
+ Requires-Dist: pandas; extra == 'singlestore'
188
+ Requires-Dist: singlestoredb; extra == 'singlestore'
189
+ Provides-Extra: slack
190
+ Requires-Dist: slack-sdk[optional]; extra == 'slack'
191
+ Provides-Extra: snowflake
192
+ Requires-Dist: pandas; extra == 'snowflake'
193
+ Requires-Dist: psycopg2-binary; extra == 'snowflake'
194
+ Requires-Dist: snowflake-connector-python; extra == 'snowflake'
195
+ Provides-Extra: togetherai
196
+ Requires-Dist: together; extra == 'togetherai'
197
+ Provides-Extra: tsv
198
+ Requires-Dist: unstructured[tsv]; extra == 'tsv'
199
+ Provides-Extra: vastdb
200
+ Requires-Dist: ibis; extra == 'vastdb'
201
+ Requires-Dist: pandas; extra == 'vastdb'
202
+ Requires-Dist: pyarrow; extra == 'vastdb'
203
+ Requires-Dist: vastdb; extra == 'vastdb'
204
+ Provides-Extra: vectara
205
+ Requires-Dist: aiofiles; extra == 'vectara'
206
+ Requires-Dist: httpx; extra == 'vectara'
207
+ Requires-Dist: requests; extra == 'vectara'
208
+ Provides-Extra: vertexai
209
+ Requires-Dist: vertexai; extra == 'vertexai'
210
+ Provides-Extra: voyageai
211
+ Requires-Dist: voyageai; extra == 'voyageai'
212
+ Provides-Extra: weaviate
213
+ Requires-Dist: weaviate-client; extra == 'weaviate'
214
+ Provides-Extra: wikipedia
215
+ Requires-Dist: wikipedia; extra == 'wikipedia'
216
+ Provides-Extra: xlsx
217
+ Requires-Dist: unstructured[xlsx]; extra == 'xlsx'
218
+ Provides-Extra: zendesk
219
+ Requires-Dist: aiofiles; extra == 'zendesk'
220
+ Requires-Dist: bs4; extra == 'zendesk'
221
+ Requires-Dist: httpx; extra == 'zendesk'
222
+ Description-Content-Type: text/markdown
223
+
224
+ # Unstructured Ingest
225
+
226
+ For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
@@ -1,145 +1,12 @@
1
- examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- examples/airtable.py,sha256=4n6B8l_fmnlZLdk9SIfYticOTgZGQBjBfktNwFQf9Zc,1831
3
- examples/azure_cognitive_search.py,sha256=KThZyRQ2HhTndBcGBn1dlr0FARB2PmBBRRnKJU5AuQU,2279
4
- examples/chroma.py,sha256=fLqcpnbMAlJEe96SCMm17ZGOtcnaDQBGusURhLDwth8,2208
5
- examples/couchbase.py,sha256=u4fmZb4eBYEcBgWCaWOKSxxxV1v-YpyaGG7J4ept1w0,2261
6
- examples/databricks_volumes_dest.py,sha256=xoSVgmS4dNKOKGKtin0ojB20mO7vfCJ8FQ_DFIgMW-U,2329
7
- examples/databricks_volumes_source.py,sha256=DT7PXW6CTOGSH5lzuZMfasjV6vgoG28R68I9Cb98JvQ,2278
8
- examples/delta_table.py,sha256=0InvWFYZZt6TolYtWlKp9KliOnRxFNubInbwqlBWMIA,1898
9
- examples/discord_example.py,sha256=ZNzKVxk7hfUF0qZdOLkh7fGItaiIdo6gh7JYhwr2vyQ,1624
10
- examples/elasticsearch.py,sha256=KQXIYCE44w_CRZQGanlfP9ZY9NZ5gclxKKvtZnd-IRA,2129
11
- examples/google_drive.py,sha256=iOjkC8iWQ3sqM3TTsL8Ng_yZbm2C5xfws1a9MTZuV7M,1677
12
- examples/kdbai.py,sha256=W7yT2lnYOPbftqTiSHQNgWVwQC4UXJ8QhkPqSdiG_Co,2225
13
- examples/local.py,sha256=WuN3SqxoXRlh2xhMZPh4R9EdCNfjQUhurAlGVqGuUF8,1569
14
- examples/milvus.py,sha256=TKtH1Rxrj3Hr9d1BUx7qEK468Xb0ux7Ak1RukPXSOOo,1877
15
- examples/mongodb.py,sha256=eP43TY-rjOeWnVk4m5jSSWJSWXcy6xRYFuLWusBZXws,2160
16
- examples/opensearch.py,sha256=8YmQpvOB9HBQqoC47ht-lX34SpkoaDlezbQOHRG82cw,2103
17
- examples/pinecone.py,sha256=URXalj5-0eTVnmfzD0icCB1brGklplU4P8l0jrawCjI,2479
18
- examples/s3.py,sha256=23y_lPUkPo50rDMZC7cc3kBaSOf5pP_xl_7HO0Mb3c8,1742
19
- examples/salesforce.py,sha256=tiO6hdRI79H_oORPnIf1FvB0IuGTYG2KzZlnqC_J9Cw,1888
20
- examples/sharepoint.py,sha256=a0h2zU28m6bW5g17b8BDrcsHzdzjSgb--gYV80bRqs0,2067
21
- examples/singlestore.py,sha256=UAdBOtIcmhyRkZ-pIh7rrY7Yt_Ed8t3puulZ-MhaSfU,2060
22
- examples/sql.py,sha256=YSmLD7Ri2a8CvBxRJWxPQefqV4kV8kF3W0l3TXu_iyY,2997
23
- examples/vectara.py,sha256=bWSsMQL3hEEt5CLR9CZFnuplrSAeLj_EiADipRU_Gkw,2247
24
- examples/weaviate.py,sha256=QUtYJ-y7eYfm69T316-aUm1imQZnoSJ09RGSVQAoTck,1906
25
- test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- test/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- test/integration/utils.py,sha256=CWqzEGw6TA_ZoP9hRUkW64TWYssooBbufcTRmbJvod8,401
28
- test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- test/integration/chunkers/test_chunkers.py,sha256=MTPVBCBvh54fBqi_53oPkrH9QJtvJeE9YEXDOZ8G0so,1059
30
- test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- test/integration/connectors/conftest.py,sha256=3weYhwyXyAVVk8DsLqScDOk-PZwsrHQLs1RdYa1EYqQ,1015
32
- test/integration/connectors/test_astradb.py,sha256=1hFqd9NI283t6lQZryBJMfJoRfP6PKVHTl_6X3Nk0bM,9925
33
- test/integration/connectors/test_azure_ai_search.py,sha256=NMSjIbFO1EWqr8U-Owd4qdC3pDROjg7zNAXr5yGSsH4,9699
34
- test/integration/connectors/test_chroma.py,sha256=_K4CUp9x5O_Uuw1uwDi1R0jIvzcaW0wI8tXfAV1hh7c,4536
35
- test/integration/connectors/test_confluence.py,sha256=3sAFQxRUrS7xXIQXQBccY0r5kihnP8rc_sAs-44g4Ys,3587
36
- test/integration/connectors/test_delta_table.py,sha256=-ivYySHKgFRCd6PUIK-fy6yOkku3uj4YhDDEUlqi-tM,6915
37
- test/integration/connectors/test_dropbox.py,sha256=sW-NOXA0-4xtAUgnSnPGg-OhiIhKviROJPpxUt8y8s8,4939
38
- test/integration/connectors/test_github.py,sha256=wFWRFhfhTUz3byk2FSe9qVv7xMxcBjHGs1FlhjtnTAQ,1500
39
- test/integration/connectors/test_google_drive.py,sha256=ceiFoVnaguTsQrLHZk8jv-IZ-i_EP4wenHSX-QKHvTM,10300
40
- test/integration/connectors/test_jira.py,sha256=0FnxFe42d32EGuArfxxnfINkoYNoCsgJjP7ZU6fePu8,2073
41
- test/integration/connectors/test_lancedb.py,sha256=E8yFuvQMx68w1s1PXIBP8gUlNuUpCtiGNYd7YnDA6Aw,9213
42
- test/integration/connectors/test_milvus.py,sha256=_cYmJMocsZuUroalT5uc9rcsHFnTIpJyJjIwK8oDDYc,7177
43
- test/integration/connectors/test_mongodb.py,sha256=wt5o-7qtMtjGv0IPKlhEnD3-sJjBX8cv1acn1Mcq-TY,12450
44
- test/integration/connectors/test_neo4j.py,sha256=BSOqRTY4ZV8o6TV1MOmUKQq7DzRFU_z9umjDk-yw-Jg,8450
45
- test/integration/connectors/test_notion.py,sha256=3OXFcSM-jE1_E_JoGw--pz-cv3dPZvt18scACJiHjwo,5397
46
- test/integration/connectors/test_onedrive.py,sha256=0SZB818cNsxYZlBJJpuvU1PqsFDxRaOiLfJTRcc9Bv0,5233
47
- test/integration/connectors/test_pinecone.py,sha256=0XBK9xxZhry6Rnv0s-chIWgtN5d1p8Lx-kEEQMNuAnA,13650
48
- test/integration/connectors/test_qdrant.py,sha256=z3RThQJKzCafCtfH0ocy_DNDlzmDdu_opQH3mKTn0CE,8031
49
- test/integration/connectors/test_redis.py,sha256=gUU6Dv616tX9KANiqhkMrPWvmhbV2Gk1pNuIv2MvZG8,5093
50
- test/integration/connectors/test_s3.py,sha256=Cd9HsPjrSB6xss1DO4YHqSORJJ2pUXNVaAlrlpqjZS8,7477
51
- test/integration/connectors/test_sharepoint.py,sha256=wq4G6J5ffXhYquUySVd5UUtYWC43RoXgCSY4fdlX2z0,7643
52
- test/integration/connectors/test_vectara.py,sha256=08GIh6J2QTSuupdDOJ_TiyQrYYK3vamUaEQe3_B5-WY,9278
53
- test/integration/connectors/test_zendesk.py,sha256=15bl3wy0pLxS2dkBlE11yPOX71k6Vbxo0BEFQK-qcFs,3724
54
- test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- test/integration/connectors/databricks/test_volumes_native.py,sha256=RbxImt0JPnvl8TAK_rAtIspaFuRmEFzjL9rLqkm6Juk,9563
56
- test/integration/connectors/discord/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- test/integration/connectors/discord/test_discord.py,sha256=WeB0ST572GvELMlgorRMwRxYIWkleIobXn6ULhjo1rw,3173
58
- test/integration/connectors/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- test/integration/connectors/duckdb/conftest.py,sha256=rlBHMJTiJ2a5xbvIxTOyhhcuTBc9DO-yTzD6Kf8X3hY,301
60
- test/integration/connectors/duckdb/test_duckdb.py,sha256=o3CKTG2T15QyWwPTh7Yyv42eJ39opm7g9b508y1o1m8,2973
61
- test/integration/connectors/duckdb/test_motherduck.py,sha256=S2EtKVy4HE0ysB3OvdFOUe5O-0w5Bk4Loy2Gyyff8b0,3218
62
- test/integration/connectors/elasticsearch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
- test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2uHuhGU9mZ33vpeTPhHtRpQfs,989
64
- test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=NUUL88Z7uWfnEopZ0wkQtWMA94WDZw87v_oCkPShVM4,12076
65
- test/integration/connectors/elasticsearch/test_opensearch.py,sha256=nV3gHD_tb79KRE8DqfUWpOfzem9LjojACspUHTSI7dw,11454
66
- test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=hYfmT9ud6NqJk1pibaF-1LvNgrfXBGUq0FTHkcIvICE,6145
68
- test/integration/connectors/sql/test_postgres.py,sha256=b0do75CAxKFEgoODx7mNQqo5vEgZbWHSifMNV_H2IgU,6974
69
- test/integration/connectors/sql/test_singlestore.py,sha256=uuw_T8EefwMcRD7clzviJxwb45f9k4G2dZIYqbNhM1s,6157
70
- test/integration/connectors/sql/test_snowflake.py,sha256=dNHR8fk5V1WsvN6P1CesPKCsRbKTuf6zXgjyf7vgueg,7498
71
- test/integration/connectors/sql/test_sqlite.py,sha256=6By1-XKiGCA5KlR1DHlM6ArU5c_2GjM5mE2RhMoNPg8,5960
72
- test/integration/connectors/sql/test_vastdb.py,sha256=A0W-kHl1GRf2zHCmTWXOJjV8HPi3xlWvCTKgjebVZUY,1066
73
- test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
- test/integration/connectors/utils/constants.py,sha256=JhTk6YNw7JVpkk-Pl8zn2YYkExeL1oE9VBWm_kMYGfo,369
75
- test/integration/connectors/utils/docker.py,sha256=4g1STiSbYN5qcmDTXyPxVJgwx97O6wk7n-DJ-zgzgag,4971
76
- test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
77
- test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
- test/integration/connectors/utils/validation/destination.py,sha256=m5RHgZ3_h6HA2SsWbg15rmKhZjzsyKqOtFlUqpy33SI,2746
79
- test/integration/connectors/utils/validation/equality.py,sha256=R6d_1c-Si5518WJcBcshF_wBRnywnZ0ORQ-NL0xNmGo,2602
80
- test/integration/connectors/utils/validation/source.py,sha256=WX67a1tYpyUFXvSxxZrTLEkpyVqZiUXhAsJ11RQzcqQ,13701
81
- test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
82
- test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
- test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
84
- test/integration/connectors/weaviate/test_cloud.py,sha256=1r16tNUSsq8JawfjgeRWtcfw2COYma0b298mBDZU__o,1281
85
- test/integration/connectors/weaviate/test_local.py,sha256=q8vSpmFeTapSoUSNChIpc6qfyMdcICo28CJSm7L7V-o,5337
86
- test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
88
- test/integration/embedders/test_azure_openai.py,sha256=nQle565sZu9N4xjWHBOfiOFGtldOdIK6S1YID00hK78,1787
89
- test/integration/embedders/test_bedrock.py,sha256=-hy4wPmgBpXrk7OtPbZ8IE0qip6Ht1GptGLpmpqHxiw,3550
90
- test/integration/embedders/test_huggingface.py,sha256=jNNBpXUA_UpgXXVo_Q2xYGFaknIZJu8eKJ4ifq19eig,986
91
- test/integration/embedders/test_mixedbread.py,sha256=0Mcq9JU9wAJ_Wz2Enwyz4qGSbivDDqmE6ZJxlZpMuWw,1993
92
- test/integration/embedders/test_octoai.py,sha256=R6NcBxPL_sVJLGmVTEjsaxfqjnLWxViXwL6vaze666s,2194
93
- test/integration/embedders/test_openai.py,sha256=iwk56ZK1gllFtatcp6W_PA7x9h2M2ReIg4GXIWXp5qo,2124
94
- test/integration/embedders/test_togetherai.py,sha256=5PzIdnvjMpjods_rhUqQ2nbVRXgld_F7OGWssnHxa0I,2202
95
- test/integration/embedders/test_vertexai.py,sha256=xpjauYnRBxOqft0HXEMFk1iRoeQJm7E2eSunZbjj_H4,1827
96
- test/integration/embedders/test_voyageai.py,sha256=kcuGxhG6kR1XVqmTw7La3MXnIC06CtjOq_n48nkF-eQ,2411
97
- test/integration/embedders/utils.py,sha256=Sqqg-X31ZV1hojqPQBaZgM2lb2u8cG6s6OnH9JRsFjs,2717
98
- test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
- test/integration/partitioners/test_partitioner.py,sha256=UYQd9x2-66F_FFeulC_2eg3FtjswK0Mt9Hwmg4b_pPs,2784
100
- test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
- test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
102
- test/unit/test_html.py,sha256=ubsck9pVOnPDFL0P8TZkko_46MIaFLlSNQcsgFDgYoE,4496
103
- test/unit/test_interfaces.py,sha256=Gv3WMJsw_3xPLy3nI3dIcJuLa2WvKYszSjI_W9XLtVM,787
104
- test/unit/test_utils.py,sha256=xeSM02zOChSOO3dzDOVAEiQme1rQ8drjnJF93S3BFmk,7247
105
- test/unit/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
- test/unit/chunkers/test_chunkers.py,sha256=wRxbSj7P1FwRGDyVcARkm8CQSVCBCro3nTe54UoUBzc,1769
107
- test/unit/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
- test/unit/connectors/test_confluence.py,sha256=Hr91nKw6018FEih-vSrVXFk0p0E9bSL1IeZVDxvITJ0,1916
109
- test/unit/connectors/test_jira.py,sha256=sPRjoBVDmc-o2RWilcjs-VW_jkafIqSXBE9duCELfoA,12110
110
- test/unit/connectors/ibm_watsonx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
- test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py,sha256=Yj9swJ_phiam-CFBzGnAFCkd8_oqzdA3ZQJQdpV8T1E,14503
112
- test/unit/connectors/motherduck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
- test/unit/connectors/motherduck/test_base.py,sha256=4SHI3Hx1a28eNE_VDbl8gAssNZRIUNSGPNZgrkzjYWs,2429
114
- test/unit/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
- test/unit/connectors/sql/test_sql.py,sha256=SfWYDBrR7pHFziKVEe6IAq5E3EQIz99ikQN3LnF1DrY,4622
116
- test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
117
- test/unit/embed/test_mixedbreadai.py,sha256=Z9A9jg5eJRF4OgYTgbIzQUI27J16uv2qj2kp_Rv0r9k,1428
118
- test/unit/embed/test_octoai.py,sha256=CWVrieqJh-N40J9n3nzqQPLOH9T1_mldkpZYRiHKxrg,1055
119
- test/unit/embed/test_openai.py,sha256=RQ-4QIcRvq0JSBFNit_NRcy61EsOv7xh_TcKJKHwHGM,1186
120
- test/unit/embed/test_vertexai.py,sha256=k_dK-yR_yx1RAOpmAgfcPo-osRDJP9aRCMCsJmQPxYI,1050
121
- test/unit/embed/test_voyageai.py,sha256=QWoDZEX8cAIkTgn4NtIyGKzOAu-GmudD4VMujnfi1Gg,983
122
- test/unit/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
123
- test/unit/embedders/test_bedrock.py,sha256=HMaweO_v_9Y1SE2m5QImXP73cb26vNTUfc1onTBa1-g,1074
124
- test/unit/embedders/test_huggingface.py,sha256=BpMC_AMlifjNf4Y61yBNR_8UU3H_x3ut2NnpFuB4kDo,1543
125
- test/unit/embedders/test_mixedbread.py,sha256=8yT942TVVXC5EkrT_ReZie1In537BaAD6esRjntgxuU,1021
126
- test/unit/embedders/test_octoai.py,sha256=JMfrFz25QfEh0ieB4bJneZd4XtNcdPOnNsN1Fj7gU-Q,1012
127
- test/unit/embedders/test_openai.py,sha256=HoEW95289Ijgo3PJ-pEaDOknfdkSjPXTgkXmE6jJomY,1012
128
- test/unit/embedders/test_togetherai.py,sha256=s24V_geDNZzblU74sSdC_m4Lqlzjp00RMpy56ptfdx0,1009
129
- test/unit/embedders/test_vertexai.py,sha256=4gLJaV9Nr2k_SgA-EyJ_sDvm8XvyGbn2zTs4F4CXU2g,1142
130
- test/unit/embedders/test_voyageai.py,sha256=VaWthF64pmxc-fOBbAQsEzMw7tV4t4Nz_H_Cc5tuAYQ,1193
131
- test/unit/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
- test/unit/partitioners/test_partitioner.py,sha256=eJoUDbiKtweyU1WYfsY5KqVqoPjbx1MUsyHkbvvTNEk,2275
133
- test/unit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
- test/unit/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
135
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
136
- unstructured_ingest/__version__.py,sha256=7O8GlC09PP-XuUDOj6bhRUtbOuUgpBT2COw4AjU1kk0,42
2
+ unstructured_ingest/__version__.py,sha256=tMfsOjk6uygoNUsekl3a802jffTlVo6ELbuAqqeWH0c,42
137
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
138
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
139
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
140
6
  unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
141
7
  unstructured_ingest/otel.py,sha256=NsUqOolA0gt69eFhZLABjVpcKoM9aus-AbxIKqWqPTc,4127
142
- unstructured_ingest/unstructured_api.py,sha256=hWUXUhGtyfi2OcDR-BriHJyT4jJywf4zfG1qpSCf9Bo,5002
8
+ unstructured_ingest/unstructured_api.py,sha256=4e2ZNWIihk0eje4R3ZQ0NOYNbmMZDv_O-rnJo94kaGE,5127
9
+ unstructured_ingest/cli/README.md,sha256=5LfM0ys1aFyCiCjlwZsi_9Mb5Nrq3MmYt3IpmUybnCE,1507
143
10
  unstructured_ingest/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
144
11
  unstructured_ingest/cli/cli.py,sha256=ZeIE9jP8fe7260nE8v7xYgLdqX9OtkQXAXSGWIkHLcA,645
145
12
  unstructured_ingest/cli/cmds.py,sha256=EhDW5UX4V-N8Svjba4w7YWnRYl26__ADwzNXrfFBxM4,483
@@ -158,7 +25,7 @@ unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM
158
25
  unstructured_ingest/embed/bedrock.py,sha256=t58V_QQjWPO62CTuP0aLFMDisPeXpxG2xSFGUhN-JvI,7726
159
26
  unstructured_ingest/embed/huggingface.py,sha256=-ZD17O_H_UnK80fqig6y6wNKJckjx0HuAkY5vgPvk8M,2259
160
27
  unstructured_ingest/embed/interfaces.py,sha256=SdB3t8eMPB8CbXzOYBpgwjzTvyb4T19L61Sr6Jy3_rw,5099
161
- unstructured_ingest/embed/mixedbreadai.py,sha256=-Y0J27G9CL1t3ZTIeNjTjRviErSMAzJRf2zgDgMHUmg,4499
28
+ unstructured_ingest/embed/mixedbreadai.py,sha256=z8RaG1hGBL840yElvI1Dbnf7llsOGEBbZ2X_QlFflZg,4498
162
29
  unstructured_ingest/embed/octoai.py,sha256=136UzSuQgV8Nxel2pB8Iv-4AvlFU6RRCa7N64fWFl6o,3855
163
30
  unstructured_ingest/embed/openai.py,sha256=hK98QXb_8oN1E-QwNT6JElzYOxG1mvZCFYQW57pjv0E,3372
164
31
  unstructured_ingest/embed/togetherai.py,sha256=T0v0_yTovy3sSeLPvk3PJccqcnmqCc_vxYs6pumjK3I,2983
@@ -170,12 +37,12 @@ unstructured_ingest/interfaces/downloader.py,sha256=xX0ZzsFRSzZb7SAeoeQph8sIbVq1
170
37
  unstructured_ingest/interfaces/indexer.py,sha256=c2FwWJEQHfFD6vO-tGfYLpLiIs-TYViLAt8YmHfDbaM,824
171
38
  unstructured_ingest/interfaces/process.py,sha256=S3A_9gkwwGC-iQxvnpj3Er6IJAjAT5npzpSgxuFAzUM,449
172
39
  unstructured_ingest/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
173
- unstructured_ingest/interfaces/upload_stager.py,sha256=nsS8VgFX44YIaESneyXQfa-mQUsKzowVdhwLpJjWxGM,3137
174
- unstructured_ingest/interfaces/uploader.py,sha256=6Ax-Qn7it0yKVdUBun5-lJ3jxISPEq2b2gLXpzJh5Lg,2038
40
+ unstructured_ingest/interfaces/upload_stager.py,sha256=eYhbdM0Dt8FValZAe41dWnxehhvfMLDOSTp7UoR5HB0,3147
41
+ unstructured_ingest/interfaces/uploader.py,sha256=6HyWttmosKreuWJCFp3TxKCuzDCj_RJdGEPwxhwapQk,2053
175
42
  unstructured_ingest/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
176
- unstructured_ingest/pipeline/interfaces.py,sha256=Um2dQoRVppiRRYWkKnvRrYI9HdkV229Tu8_y2HmEaCc,8646
43
+ unstructured_ingest/pipeline/interfaces.py,sha256=Jb62t0P52hzzRWe-zHxcySgINMnPDOWc4dfJooYUEC8,8642
177
44
  unstructured_ingest/pipeline/otel.py,sha256=wUVmUPWIk_X3yw0MuI-5QJ2wU2rQgaapinnS98iQBxI,1082
178
- unstructured_ingest/pipeline/pipeline.py,sha256=2DShnmC5Hn5_YVswKQwejgCr6JT7BixMDgSrOeQGogk,16785
45
+ unstructured_ingest/pipeline/pipeline.py,sha256=LKCY7kcTfWOYF8k9k3Rw8sYZdNNAH8Qo_qZFHNIkyEU,16781
179
46
  unstructured_ingest/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
47
  unstructured_ingest/pipeline/steps/chunk.py,sha256=LiJMzgB0ZEDnPuYz5IwuK2k2iJoBIcfftp9cVXMOlf0,3179
181
48
  unstructured_ingest/pipeline/steps/download.py,sha256=cw8KbZ4CNZ_on4xam-VehNnLvKkUourazvcaUB-ihGY,8205
@@ -187,53 +54,53 @@ unstructured_ingest/pipeline/steps/stage.py,sha256=oobrvLtZOOPEnXQXMDUnhaaKhheuS
187
54
  unstructured_ingest/pipeline/steps/uncompress.py,sha256=clyZKwKQLLKbkQDD2q98Aw1UAe3VqUY0n7_KWtGVMSw,1756
188
55
  unstructured_ingest/pipeline/steps/upload.py,sha256=4hvh--03jzbGlxO0l1_2D5ec_EaGu04I5bFsxH0MnTg,1986
189
56
  unstructured_ingest/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
190
- unstructured_ingest/processes/chunker.py,sha256=HCZM294_EmA1nnPsGA0OYVN8oPkF-EkskzNmwuWH4iw,5600
57
+ unstructured_ingest/processes/chunker.py,sha256=v6ICNdBqjyAOk8f_cQajIJRdrzGUSm1UFGnQqzihpi0,5806
191
58
  unstructured_ingest/processes/connector_registry.py,sha256=PoNhB-qOxvalaN-ssPWWhZSQ9QDdKMa6KzskCYYebfA,2195
192
- unstructured_ingest/processes/embedder.py,sha256=ViDf-zpoAadroxdYVzeM_g5sfXdvTu7qbgm-3RUQPNk,7884
193
- unstructured_ingest/processes/filter.py,sha256=EaNmJkv598ok6JkyJMPGwnNPpmlXXepN-Po_nv_Y_so,2151
194
- unstructured_ingest/processes/partitioner.py,sha256=BMwSLTVhTYsydZy7S55EQGf-d1G2HXPYovGc4sMhvLk,9911
59
+ unstructured_ingest/processes/embedder.py,sha256=jJcnAyRWNX5XN2WpPcT8bunbFeVlUa1P3yk4G_JIcvo,7894
60
+ unstructured_ingest/processes/filter.py,sha256=oc3SYukRYfzx8sdJqF3KxdwZcrA-1U8PTAipMdZkW0c,2148
61
+ unstructured_ingest/processes/partitioner.py,sha256=Kn_BSFYvOkwo8fqThw_cOpgD0Um-AdoSqclZplcdNBA,10109
195
62
  unstructured_ingest/processes/uncompress.py,sha256=o9JL3Bza4KPUTmrB39-v_5SuK_fYwhwFAhjQi2Pm8h8,2426
196
63
  unstructured_ingest/processes/connectors/__init__.py,sha256=cR4ZH2dpPod7QR6OsgMx8X9kpFcEc1TVfQndUNoKGzI,6812
197
- unstructured_ingest/processes/connectors/airtable.py,sha256=NcXTGqtBvx83JvCakcczCkj8zkRIYXYBOTs7pL5phzs,8955
198
- unstructured_ingest/processes/connectors/astradb.py,sha256=illjFV-INZ8iOTNPjvhbK2g1xHuuNM5Gr0Qoh7Xjjsk,18297
199
- unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=x5kBnC01YQ61Ndovmkft9gq9jPl2o8G_Dbs_butQ7Kk,11528
64
+ unstructured_ingest/processes/connectors/airtable.py,sha256=smx5qBSUKwM8V6Xcc7ikrf8hYQUQ94YrB1L0WVeRDv0,9024
65
+ unstructured_ingest/processes/connectors/astradb.py,sha256=Ob9wQgDxa6BXDPZBOqooNKQgvjIZcMwIe4fW3VlI7h8,18929
66
+ unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE2hGFfjGc_jNFzlUwiRlCtIkuu7tmnk,11524
200
67
  unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
201
- unstructured_ingest/processes/connectors/confluence.py,sha256=0yqnJBgawx58rog07jK7keJ6sfZ-UEz3hz0WMGksoOA,12091
68
+ unstructured_ingest/processes/connectors/confluence.py,sha256=BbZ-Ecdcn92X8dHQ0egEJtBoX16gM0-zMcBLdn-wQsM,12090
202
69
  unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
203
- unstructured_ingest/processes/connectors/delta_table.py,sha256=ZiHiqZf81i_Hxwasde_sba7kDQjmjV5-Jvy-fOTIqLs,7279
70
+ unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy3rCjrrTGMutG2INIrwCeoIohRY,7340
204
71
  unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
205
72
  unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
206
- unstructured_ingest/processes/connectors/gitlab.py,sha256=t69yLnBd5NtcLYmluNDbcRU1tXi8zTM75ofEc1k4oAY,10032
207
- unstructured_ingest/processes/connectors/google_drive.py,sha256=GfxnjBRSY_eZQd1OkkLfD8DCXuwMpmysQs1ZAE6vbD8,20057
73
+ unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
74
+ unstructured_ingest/processes/connectors/google_drive.py,sha256=CqUwtK4NhKhNfozsunVzFUsKMYBEgRS1eci2pIZLnJE,20055
208
75
  unstructured_ingest/processes/connectors/jira.py,sha256=eG8yTn8ZVEz7rBJ-ha8i_d9hEh6VALN6QJT_vbYvbL0,17142
209
- unstructured_ingest/processes/connectors/kdbai.py,sha256=EbaBNnFk21LzqtwBOHoDPdwVaIchAYXz7_mf061lTcQ,5119
76
+ unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
210
77
  unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
211
78
  unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
212
79
  unstructured_ingest/processes/connectors/mongodb.py,sha256=1g_5bfbS6lah3nsOXqLAanR3zNYJ47_Njw_uV-uj3_U,14324
213
80
  unstructured_ingest/processes/connectors/neo4j.py,sha256=eAM2XWSLA5caKJmbcd7ctn2TapreIJEXRoHoxT1OZwA,18718
214
81
  unstructured_ingest/processes/connectors/onedrive.py,sha256=VBkKlbJgR7uKlKTnjNybAw6ZawLKflDPpy2uVvgWYWw,19296
215
- unstructured_ingest/processes/connectors/outlook.py,sha256=FfHV9OfajGbj5VQZccqHsSyYJ0f6a4CLGQJi1s9UJjo,9294
216
- unstructured_ingest/processes/connectors/pinecone.py,sha256=TG-1hVfOsKFepxPfy2MCwEVBEZF4msg8lfNQZBpo35Y,13980
217
- unstructured_ingest/processes/connectors/redisdb.py,sha256=5LX6KtuNCzqjHqnJPw0zdKLE0iLx7Dk5RN9e_KT-up4,6975
218
- unstructured_ingest/processes/connectors/salesforce.py,sha256=a2Erx5pXbxKIj--oJWTGk2TeOcdmipuxgleazbD62o4,11664
82
+ unstructured_ingest/processes/connectors/outlook.py,sha256=zHM5frO7CqQG0-KcTyX49aZeSlsvVrl8kh_lR_ESgQw,9275
83
+ unstructured_ingest/processes/connectors/pinecone.py,sha256=BdO1PS_Y6FOeL-7uPl-Eh6ij1wHOwMkopOzKQGQ9Ac0,13979
84
+ unstructured_ingest/processes/connectors/redisdb.py,sha256=YzvSlfHs83XWsWMaIC3bV5enKfxejMQ9BQ8CtXfnJ5o,6923
85
+ unstructured_ingest/processes/connectors/salesforce.py,sha256=OaKEWCqZrirHqFJ650K5jSPwYlWefPOapas8Y-4D9oc,11661
219
86
  unstructured_ingest/processes/connectors/sharepoint.py,sha256=PowaqMzWr-VCW1rnwcAeRhHyE55kJ9J9FCVlrmtzN0E,4827
220
- unstructured_ingest/processes/connectors/slack.py,sha256=e4ntATdht_olAPsco1DKwlrOkpKLyDznPO1NJmsr0A8,9243
87
+ unstructured_ingest/processes/connectors/slack.py,sha256=EkFj9PcAu5_gF2xLogikKDADLbJYq-_jvchzYrTdLO4,9224
221
88
  unstructured_ingest/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
222
- unstructured_ingest/processes/connectors/vectara.py,sha256=frKJkc7ffstQhXD9-HkAGoQAofGkl6AsnKJhGcl8LgA,12294
89
+ unstructured_ingest/processes/connectors/vectara.py,sha256=xrC6jkgW8BII4UjdzUelDu122xT484cpfMTK2wl-sko,12292
223
90
  unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
224
91
  unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
225
92
  unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
226
93
  unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
227
94
  unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
228
- unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=RP9rq2sfysygiqzXj6eX0CXeZpxk65xmrz7HZnWRQWA,2961
229
- unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=NOHsRbvG4IB-8HWlwvCw2uuJqEwcvz332NC2kSBUAH8,3746
230
- unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=89N7YqtW8nHFmiq4_D201Ib-zXTLYU98Ap71goGoNEY,2997
95
+ unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6aRTLSdc4GChCL4mz2b-IanderW8j1IqezX6YA,2958
96
+ unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
97
+ unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
231
98
  unstructured_ingest/processes/connectors/databricks/volumes_native.py,sha256=pivySGMmFSsyuB42ARAWAPXFQ7qTQxO3dfEoE23pBNM,3104
232
- unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=ctQSoStHR1P1alhIH84Mpqmw4Wtnt3FsmBG7iH14iPE,8214
99
+ unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=tqi6PpYpIBMTZcYZXl5Lw0YuawyDvjHI08TKPFFTTr0,8194
233
100
  unstructured_ingest/processes/connectors/duckdb/__init__.py,sha256=Dr6BRJJGefJnnp_vn5W5gBd7vrCCXTMLweuDIqTP-fM,558
234
- unstructured_ingest/processes/connectors/duckdb/base.py,sha256=-8TKht_HCssKM8EBA6rszAB2GdbeGZ25Vbp-Y_gRTfI,2935
235
- unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=-C4gxvy52TFopc7LdIaJeNgBhx5UUH2DRABuyun5W9Y,4429
236
- unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=lxE7vyEj3lD3SdGwgfuMhZKvRPbynscPu9u_xukwM30,4692
101
+ unstructured_ingest/processes/connectors/duckdb/base.py,sha256=bTLhilg6mgERNCpeeNNl7wxy3xkOt23O9XpCyD0WVY4,2945
102
+ unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=jsmibTd_yvYzkCT05HhCJvplyobtjfNILC3zyTuCcVY,4464
103
+ unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=Atr2MjJQGFGWh5aeiQsLpUbFw-aCZH-ABI1LprDh5VI,4727
237
104
  unstructured_ingest/processes/connectors/elasticsearch/__init__.py,sha256=M8mmBWoP6J5R3hxg6BQUMexYlTUxUxdBoIcjUop8yt8,826
238
105
  unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py,sha256=iNedi-JVkAvdF15CbKwVRwXJazyST6ha3zcNyyGwVmQ,19003
239
106
  unstructured_ingest/processes/connectors/elasticsearch/opensearch.py,sha256=wggHvw8h-X0-3WPNxj9rt2xkrE7Pv7CV0B0KzTMzBB4,6944
@@ -243,11 +110,11 @@ unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4Z
243
110
  unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
244
111
  unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=13TJmipcz9mYQT6Qi4WmqUV3veHIhbLZIW_70qY-5tI,14469
245
112
  unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
246
- unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=6tICE_qECwnWW7ViiF08Ax1y91EkDhfs8Bf-d7udJeA,7125
113
+ unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=2ZV6b2E2pIsf_ab1Lty74FwpMnJZhpQUdamPgpwcKsQ,7141
247
114
  unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
248
115
  unstructured_ingest/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
249
116
  unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py,sha256=kf0UpgdAY2KK1R1FbAB6GEBBAIOeYQ8cZIr3bp660qM,374
250
- unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py,sha256=vTW12EN3WIYgxtonedamSNuwDPt8sPsbp0ehQ81enf4,11601
117
+ unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py,sha256=SpZIqjreXpLTpZfezhG6xkZ_h7w-QWmdjXDBG6mlddQ,11729
251
118
  unstructured_ingest/processes/connectors/kafka/__init__.py,sha256=pFN2cWwAStiGTAsQ616GIWKi_hDv0s74ZvNqhJEp1Pc,751
252
119
  unstructured_ingest/processes/connectors/kafka/cloud.py,sha256=Ki6iOLoZ86tYWdnLnMWYvb2hUCneKqo4mTJcfXh7YoQ,3432
253
120
  unstructured_ingest/processes/connectors/kafka/kafka.py,sha256=7NMvWijfoliyAgnmz8TM8oJt5x7RDzC-ABPdYAm7J3w,10306
@@ -335,13 +202,13 @@ unstructured_ingest/processes/connectors/qdrant/local.py,sha256=3b43kSVoGMcFWTRi
335
202
  unstructured_ingest/processes/connectors/qdrant/qdrant.py,sha256=1Y1-nfKqt4YooqKMqRKVE_ItV0S1v__PTaEhI3vvtOE,5456
336
203
  unstructured_ingest/processes/connectors/qdrant/server.py,sha256=biyF4xr6e7CH0loj_OPt02Xrx4DMkkxqYMAsVXuJ5-Q,1607
337
204
  unstructured_ingest/processes/connectors/sql/__init__.py,sha256=WNO7jSL1ABw7K5IxLc-eeKWGGJDk7jCp_OTLdTTkZug,2056
338
- unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py,sha256=9fu-SmBkkrhxF6gC2FFd7tAsPoTswtM3YlMw3fYGjB4,9084
339
- unstructured_ingest/processes/connectors/sql/postgres.py,sha256=MY_jDMdXj82UsGQDcpMbMDnl7wpNuyFaHh1_QxJtFv4,5115
340
- unstructured_ingest/processes/connectors/sql/singlestore.py,sha256=162FnOvQ9fERUUQarfiHfTTimLj_5y5MoBl8j0toCtA,5702
341
- unstructured_ingest/processes/connectors/sql/snowflake.py,sha256=92PZYhAGEwbhMJ-tqwfAqfLH59BTZ3rZZn5az6GhAKw,9367
342
- unstructured_ingest/processes/connectors/sql/sql.py,sha256=uUPTJYuUDrivRd8Z66NA-JXLXJo4dsYbBwfotxFp2kI,15722
343
- unstructured_ingest/processes/connectors/sql/sqlite.py,sha256=mpzthGrRjVtGsmm7E_eCZzxuLUtpgxki0lx1y7s8l5I,5310
344
- unstructured_ingest/processes/connectors/sql/vastdb.py,sha256=vXYQ5it2uK-PjvKduBQa31MI4y6vAD60-gGE3hp6dPc,9605
205
+ unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py,sha256=_IZFFGQUsHV9ScOOISrm6c5FSd4PnX91ePj_COat-gk,9320
206
+ unstructured_ingest/processes/connectors/sql/postgres.py,sha256=kDIL8Cj45EDpKqit1_araRpP4v3cb__QbYqoINg9f2k,5403
207
+ unstructured_ingest/processes/connectors/sql/singlestore.py,sha256=B46lpvyAj1AArpACi9MXbXD1-52zF6Dsj3RJtD1g4r0,5955
208
+ unstructured_ingest/processes/connectors/sql/snowflake.py,sha256=GSEoNrIoJM7p-Q-PrFiONamoxWzjQG8wZJG3mw5Uwdk,9589
209
+ unstructured_ingest/processes/connectors/sql/sql.py,sha256=yUGnv4MF_vT3VHdg7hhGiTD0be94ll-HyhHmRKQp_vQ,15712
210
+ unstructured_ingest/processes/connectors/sql/sqlite.py,sha256=V3OfRrXGGhTa_R2FPA-ysn95HHCv9x_VEBKVDsSGsbs,5549
211
+ unstructured_ingest/processes/connectors/sql/vastdb.py,sha256=trhvUBumDmj2rLjmxFBKw9L9wF6ZpssF0wfmRaG97H0,9803
345
212
  unstructured_ingest/processes/connectors/weaviate/__init__.py,sha256=1Vnz8hm_Cf3NkQUTz5ZD4QkbLSVql4UvRoY2j2FnC9k,853
346
213
  unstructured_ingest/processes/connectors/weaviate/cloud.py,sha256=tDQ4Vfph1RwADzS0Lk4TSoeT6TZ2gX9DNi78yXkgDw0,6245
347
214
  unstructured_ingest/processes/connectors/weaviate/embedded.py,sha256=buizqBd6PSbd9VgRrOj43GZEorBpDFkUIkE6sN9emhw,3008
@@ -349,23 +216,22 @@ unstructured_ingest/processes/connectors/weaviate/local.py,sha256=4fgZsL9dgnWuaS
349
216
  unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=SqtGcQgejGH0N1R49tGrUtGcTB8mt7sywXmWFTIcpB8,12866
350
217
  unstructured_ingest/processes/connectors/zendesk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
351
218
  unstructured_ingest/processes/connectors/zendesk/client.py,sha256=GvPIpx4aYdD58-edHgvCFjFao94uR0O5Yf4dT9NCmSk,11952
352
- unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=kBBiSmP4_2Z8mdOzHRXK1vE-i0xvrLLn5E3rRdThxwg,9035
219
+ unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=j5zS_7vJmYDEQtysz_UfwIUH65gc4r-Zjc1LocJr9FM,9033
353
220
  unstructured_ingest/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
354
- unstructured_ingest/processes/utils/blob_storage.py,sha256=4pY2MMdw7ib_taAshZxFLlhxaRnrrJFndHDDt09qhTM,1013
221
+ unstructured_ingest/processes/utils/blob_storage.py,sha256=apMUmm9loxdbTRkkLH4VhG9kUVyiw9PFUJheSDxSxPk,1023
355
222
  unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
356
223
  unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
357
224
  unstructured_ingest/utils/compression.py,sha256=_BkFREoa0fkJ6z-1lY76HCmy8mLymbPCg55iMUQTd5c,2653
358
225
  unstructured_ingest/utils/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
359
- unstructured_ingest/utils/data_prep.py,sha256=s4i_P5VYcOk9O1rsAfSiFpWdxfnc02WfQf5j3iOdiVo,8150
226
+ unstructured_ingest/utils/data_prep.py,sha256=yqrv7x_nlj0y3uaN0m0Bnsekb7VIQnwABWPa24KU5QI,7426
360
227
  unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
361
228
  unstructured_ingest/utils/html.py,sha256=0WduP8tI5S3nHFQi6XHNPHgsIC9j3iWwyIayX9gDLiE,6386
362
229
  unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
363
230
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
364
231
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
365
232
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
366
- unstructured_ingest-0.7.2.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
367
- unstructured_ingest-0.7.2.dist-info/METADATA,sha256=BjJRt_WKMPbiOWOxGZPs3Q9ZmwHRkPfF0FbWT7X7lA4,15050
368
- unstructured_ingest-0.7.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
369
- unstructured_ingest-0.7.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
370
- unstructured_ingest-0.7.2.dist-info/top_level.txt,sha256=85vUyT6fV2A5eCEM3M3FPRUUI9vZOVK1xVZt7eo1oV8,34
371
- unstructured_ingest-0.7.2.dist-info/RECORD,,
233
+ unstructured_ingest-1.0.2.dist-info/METADATA,sha256=jmorweX10DhCfe--4Uz_9mQ5HIyjcd5qigZt_jP_c1c,8719
234
+ unstructured_ingest-1.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
235
+ unstructured_ingest-1.0.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
236
+ unstructured_ingest-1.0.2.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
237
+ unstructured_ingest-1.0.2.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
examples/__init__.py DELETED
File without changes
examples/airtable.py DELETED
@@ -1,44 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- from unstructured_ingest.interfaces import ProcessorConfig
5
- from unstructured_ingest.logger import logger
6
- from unstructured_ingest.pipeline.pipeline import Pipeline
7
- from unstructured_ingest.processes.chunker import ChunkerConfig
8
- from unstructured_ingest.processes.connectors.airtable import (
9
- CONNECTOR_TYPE,
10
- AirtableAccessConfig,
11
- AirtableConnectionConfig,
12
- AirtableDownloaderConfig,
13
- AirtableIndexerConfig,
14
- )
15
- from unstructured_ingest.processes.connectors.local import (
16
- LocalUploaderConfig,
17
- )
18
- from unstructured_ingest.processes.embedder import EmbedderConfig
19
- from unstructured_ingest.processes.partitioner import PartitionerConfig
20
-
21
- base_path = Path(__file__).parent.parent.parent.parent
22
- docs_path = base_path / "example-docs"
23
- work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
24
- output_path = work_dir / "output"
25
- download_path = work_dir / "download"
26
-
27
- if __name__ == "__main__":
28
- logger.info(f"writing all content in: {work_dir.resolve()}")
29
- Pipeline.from_configs(
30
- context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
31
- indexer_config=AirtableIndexerConfig(
32
- list_of_paths=["app5YQxSfp220fWtm", "appJ43QmP8I17zu88"]
33
- ),
34
- downloader_config=AirtableDownloaderConfig(download_dir=download_path),
35
- source_connection_config=AirtableConnectionConfig(
36
- access_config=AirtableAccessConfig(
37
- personal_access_token=os.getenv("AIRTABLE_PERSONAL_ACCESS_TOKEN")
38
- )
39
- ),
40
- partitioner_config=PartitionerConfig(strategy="fast"),
41
- chunker_config=ChunkerConfig(chunking_strategy="by_title"),
42
- embedder_config=EmbedderConfig(embedding_provider="huggingface"),
43
- uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
44
- ).run()