unstructured-ingest 0.7.1__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (192) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/cli/README.md +28 -0
  3. unstructured_ingest/embed/mixedbreadai.py +0 -1
  4. unstructured_ingest/interfaces/upload_stager.py +2 -2
  5. unstructured_ingest/interfaces/uploader.py +3 -3
  6. unstructured_ingest/logger.py +2 -93
  7. unstructured_ingest/main.py +0 -0
  8. unstructured_ingest/pipeline/interfaces.py +1 -1
  9. unstructured_ingest/pipeline/pipeline.py +1 -1
  10. unstructured_ingest/processes/chunker.py +4 -0
  11. unstructured_ingest/processes/connectors/airtable.py +4 -2
  12. unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +10 -0
  13. unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +23 -0
  14. unstructured_ingest/processes/connectors/astradb.py +2 -2
  15. unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
  16. unstructured_ingest/processes/connectors/confluence.py +0 -1
  17. unstructured_ingest/processes/connectors/databricks/volumes_aws.py +1 -1
  18. unstructured_ingest/processes/connectors/databricks/volumes_azure.py +2 -2
  19. unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +1 -1
  20. unstructured_ingest/processes/connectors/databricks/volumes_table.py +1 -2
  21. unstructured_ingest/processes/connectors/delta_table.py +1 -0
  22. unstructured_ingest/processes/connectors/duckdb/base.py +2 -2
  23. unstructured_ingest/processes/connectors/duckdb/duckdb.py +3 -3
  24. unstructured_ingest/processes/connectors/duckdb/motherduck.py +3 -3
  25. unstructured_ingest/processes/connectors/fsspec/s3.py +5 -3
  26. unstructured_ingest/processes/connectors/gitlab.py +1 -2
  27. unstructured_ingest/processes/connectors/google_drive.py +0 -2
  28. unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -7
  29. unstructured_ingest/processes/connectors/kdbai.py +1 -0
  30. unstructured_ingest/processes/connectors/outlook.py +1 -2
  31. unstructured_ingest/processes/connectors/pinecone.py +0 -1
  32. unstructured_ingest/processes/connectors/redisdb.py +28 -24
  33. unstructured_ingest/processes/connectors/salesforce.py +1 -1
  34. unstructured_ingest/processes/connectors/slack.py +1 -2
  35. unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +5 -0
  36. unstructured_ingest/processes/connectors/sql/postgres.py +7 -1
  37. unstructured_ingest/processes/connectors/sql/singlestore.py +11 -6
  38. unstructured_ingest/processes/connectors/sql/snowflake.py +5 -0
  39. unstructured_ingest/processes/connectors/sql/sql.py +3 -4
  40. unstructured_ingest/processes/connectors/sql/sqlite.py +5 -0
  41. unstructured_ingest/processes/connectors/sql/vastdb.py +7 -3
  42. unstructured_ingest/processes/connectors/vectara.py +0 -2
  43. unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -2
  44. unstructured_ingest/processes/embedder.py +2 -2
  45. unstructured_ingest/processes/filter.py +1 -1
  46. unstructured_ingest/processes/partitioner.py +4 -0
  47. unstructured_ingest/processes/utils/blob_storage.py +2 -2
  48. unstructured_ingest/unstructured_api.py +13 -8
  49. unstructured_ingest/utils/data_prep.py +8 -32
  50. unstructured_ingest/utils/string_and_date_utils.py +3 -3
  51. unstructured_ingest-1.0.1.dist-info/METADATA +226 -0
  52. {unstructured_ingest-0.7.1.dist-info → unstructured_ingest-1.0.1.dist-info}/RECORD +54 -187
  53. {unstructured_ingest-0.7.1.dist-info → unstructured_ingest-1.0.1.dist-info}/WHEEL +1 -2
  54. examples/__init__.py +0 -0
  55. examples/airtable.py +0 -44
  56. examples/azure_cognitive_search.py +0 -55
  57. examples/chroma.py +0 -54
  58. examples/couchbase.py +0 -55
  59. examples/databricks_volumes_dest.py +0 -55
  60. examples/databricks_volumes_source.py +0 -53
  61. examples/delta_table.py +0 -45
  62. examples/discord_example.py +0 -36
  63. examples/elasticsearch.py +0 -49
  64. examples/google_drive.py +0 -45
  65. examples/kdbai.py +0 -54
  66. examples/local.py +0 -36
  67. examples/milvus.py +0 -44
  68. examples/mongodb.py +0 -53
  69. examples/opensearch.py +0 -50
  70. examples/pinecone.py +0 -57
  71. examples/s3.py +0 -38
  72. examples/salesforce.py +0 -44
  73. examples/sharepoint.py +0 -47
  74. examples/singlestore.py +0 -49
  75. examples/sql.py +0 -90
  76. examples/vectara.py +0 -54
  77. examples/weaviate.py +0 -44
  78. test/__init__.py +0 -0
  79. test/integration/__init__.py +0 -0
  80. test/integration/chunkers/__init__.py +0 -0
  81. test/integration/chunkers/test_chunkers.py +0 -31
  82. test/integration/connectors/__init__.py +0 -0
  83. test/integration/connectors/conftest.py +0 -38
  84. test/integration/connectors/databricks/__init__.py +0 -0
  85. test/integration/connectors/databricks/test_volumes_native.py +0 -273
  86. test/integration/connectors/discord/__init__.py +0 -0
  87. test/integration/connectors/discord/test_discord.py +0 -90
  88. test/integration/connectors/duckdb/__init__.py +0 -0
  89. test/integration/connectors/duckdb/conftest.py +0 -14
  90. test/integration/connectors/duckdb/test_duckdb.py +0 -90
  91. test/integration/connectors/duckdb/test_motherduck.py +0 -95
  92. test/integration/connectors/elasticsearch/__init__.py +0 -0
  93. test/integration/connectors/elasticsearch/conftest.py +0 -34
  94. test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -331
  95. test/integration/connectors/elasticsearch/test_opensearch.py +0 -326
  96. test/integration/connectors/sql/__init__.py +0 -0
  97. test/integration/connectors/sql/test_databricks_delta_tables.py +0 -170
  98. test/integration/connectors/sql/test_postgres.py +0 -201
  99. test/integration/connectors/sql/test_singlestore.py +0 -182
  100. test/integration/connectors/sql/test_snowflake.py +0 -244
  101. test/integration/connectors/sql/test_sqlite.py +0 -168
  102. test/integration/connectors/sql/test_vastdb.py +0 -34
  103. test/integration/connectors/test_astradb.py +0 -287
  104. test/integration/connectors/test_azure_ai_search.py +0 -254
  105. test/integration/connectors/test_chroma.py +0 -136
  106. test/integration/connectors/test_confluence.py +0 -111
  107. test/integration/connectors/test_delta_table.py +0 -183
  108. test/integration/connectors/test_dropbox.py +0 -151
  109. test/integration/connectors/test_github.py +0 -49
  110. test/integration/connectors/test_google_drive.py +0 -257
  111. test/integration/connectors/test_jira.py +0 -67
  112. test/integration/connectors/test_lancedb.py +0 -247
  113. test/integration/connectors/test_milvus.py +0 -208
  114. test/integration/connectors/test_mongodb.py +0 -335
  115. test/integration/connectors/test_neo4j.py +0 -244
  116. test/integration/connectors/test_notion.py +0 -152
  117. test/integration/connectors/test_onedrive.py +0 -163
  118. test/integration/connectors/test_pinecone.py +0 -387
  119. test/integration/connectors/test_qdrant.py +0 -216
  120. test/integration/connectors/test_redis.py +0 -143
  121. test/integration/connectors/test_s3.py +0 -184
  122. test/integration/connectors/test_sharepoint.py +0 -222
  123. test/integration/connectors/test_vectara.py +0 -282
  124. test/integration/connectors/test_zendesk.py +0 -120
  125. test/integration/connectors/utils/__init__.py +0 -0
  126. test/integration/connectors/utils/constants.py +0 -13
  127. test/integration/connectors/utils/docker.py +0 -151
  128. test/integration/connectors/utils/docker_compose.py +0 -59
  129. test/integration/connectors/utils/validation/__init__.py +0 -0
  130. test/integration/connectors/utils/validation/destination.py +0 -77
  131. test/integration/connectors/utils/validation/equality.py +0 -76
  132. test/integration/connectors/utils/validation/source.py +0 -331
  133. test/integration/connectors/utils/validation/utils.py +0 -36
  134. test/integration/connectors/weaviate/__init__.py +0 -0
  135. test/integration/connectors/weaviate/conftest.py +0 -15
  136. test/integration/connectors/weaviate/test_cloud.py +0 -39
  137. test/integration/connectors/weaviate/test_local.py +0 -152
  138. test/integration/embedders/__init__.py +0 -0
  139. test/integration/embedders/conftest.py +0 -13
  140. test/integration/embedders/test_azure_openai.py +0 -57
  141. test/integration/embedders/test_bedrock.py +0 -103
  142. test/integration/embedders/test_huggingface.py +0 -24
  143. test/integration/embedders/test_mixedbread.py +0 -71
  144. test/integration/embedders/test_octoai.py +0 -75
  145. test/integration/embedders/test_openai.py +0 -74
  146. test/integration/embedders/test_togetherai.py +0 -71
  147. test/integration/embedders/test_vertexai.py +0 -63
  148. test/integration/embedders/test_voyageai.py +0 -79
  149. test/integration/embedders/utils.py +0 -66
  150. test/integration/partitioners/__init__.py +0 -0
  151. test/integration/partitioners/test_partitioner.py +0 -76
  152. test/integration/utils.py +0 -15
  153. test/unit/__init__.py +0 -0
  154. test/unit/chunkers/__init__.py +0 -0
  155. test/unit/chunkers/test_chunkers.py +0 -49
  156. test/unit/connectors/__init__.py +0 -0
  157. test/unit/connectors/ibm_watsonx/__init__.py +0 -0
  158. test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +0 -459
  159. test/unit/connectors/motherduck/__init__.py +0 -0
  160. test/unit/connectors/motherduck/test_base.py +0 -73
  161. test/unit/connectors/sql/__init__.py +0 -0
  162. test/unit/connectors/sql/test_sql.py +0 -152
  163. test/unit/connectors/test_confluence.py +0 -71
  164. test/unit/connectors/test_jira.py +0 -401
  165. test/unit/embed/__init__.py +0 -0
  166. test/unit/embed/test_mixedbreadai.py +0 -42
  167. test/unit/embed/test_octoai.py +0 -27
  168. test/unit/embed/test_openai.py +0 -28
  169. test/unit/embed/test_vertexai.py +0 -25
  170. test/unit/embed/test_voyageai.py +0 -24
  171. test/unit/embedders/__init__.py +0 -0
  172. test/unit/embedders/test_bedrock.py +0 -36
  173. test/unit/embedders/test_huggingface.py +0 -48
  174. test/unit/embedders/test_mixedbread.py +0 -37
  175. test/unit/embedders/test_octoai.py +0 -35
  176. test/unit/embedders/test_openai.py +0 -35
  177. test/unit/embedders/test_togetherai.py +0 -37
  178. test/unit/embedders/test_vertexai.py +0 -37
  179. test/unit/embedders/test_voyageai.py +0 -38
  180. test/unit/partitioners/__init__.py +0 -0
  181. test/unit/partitioners/test_partitioner.py +0 -63
  182. test/unit/test_error.py +0 -27
  183. test/unit/test_html.py +0 -112
  184. test/unit/test_interfaces.py +0 -26
  185. test/unit/test_logger.py +0 -78
  186. test/unit/test_utils.py +0 -220
  187. test/unit/utils/__init__.py +0 -0
  188. test/unit/utils/data_generator.py +0 -32
  189. unstructured_ingest-0.7.1.dist-info/METADATA +0 -383
  190. unstructured_ingest-0.7.1.dist-info/top_level.txt +0 -3
  191. {unstructured_ingest-0.7.1.dist-info → unstructured_ingest-1.0.1.dist-info}/entry_points.txt +0 -0
  192. {unstructured_ingest-0.7.1.dist-info → unstructured_ingest-1.0.1.dist-info/licenses}/LICENSE.md +0 -0
@@ -0,0 +1,226 @@
1
+ Metadata-Version: 2.4
2
+ Name: unstructured_ingest
3
+ Version: 1.0.1
4
+ Summary: Local ETL data pipeline to get data RAG ready
5
+ Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
+ License-Expression: Apache-2.0
7
+ License-File: LICENSE.md
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Education
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: <3.13,>=3.9
21
+ Requires-Dist: click
22
+ Requires-Dist: dataclasses-json
23
+ Requires-Dist: opentelemetry-sdk
24
+ Requires-Dist: pydantic>=2.7
25
+ Requires-Dist: python-dateutil
26
+ Requires-Dist: tqdm
27
+ Provides-Extra: airtable
28
+ Requires-Dist: pandas; extra == 'airtable'
29
+ Requires-Dist: pyairtable; extra == 'airtable'
30
+ Provides-Extra: astradb
31
+ Requires-Dist: astrapy; extra == 'astradb'
32
+ Provides-Extra: azure
33
+ Requires-Dist: adlfs; extra == 'azure'
34
+ Requires-Dist: fsspec; extra == 'azure'
35
+ Provides-Extra: azure-ai-search
36
+ Requires-Dist: azure-search-documents; extra == 'azure-ai-search'
37
+ Provides-Extra: bedrock
38
+ Requires-Dist: aioboto3; extra == 'bedrock'
39
+ Requires-Dist: boto3; extra == 'bedrock'
40
+ Provides-Extra: biomed
41
+ Requires-Dist: bs4; extra == 'biomed'
42
+ Requires-Dist: requests; extra == 'biomed'
43
+ Provides-Extra: box
44
+ Requires-Dist: boxfs; extra == 'box'
45
+ Requires-Dist: fsspec; extra == 'box'
46
+ Provides-Extra: chroma
47
+ Requires-Dist: chromadb; extra == 'chroma'
48
+ Provides-Extra: clarifai
49
+ Requires-Dist: clarifai; extra == 'clarifai'
50
+ Provides-Extra: confluence
51
+ Requires-Dist: atlassian-python-api; extra == 'confluence'
52
+ Requires-Dist: requests; extra == 'confluence'
53
+ Provides-Extra: couchbase
54
+ Requires-Dist: couchbase; extra == 'couchbase'
55
+ Provides-Extra: databricks-delta-tables
56
+ Requires-Dist: databricks-sql-connector; extra == 'databricks-delta-tables'
57
+ Requires-Dist: pandas; extra == 'databricks-delta-tables'
58
+ Provides-Extra: databricks-volumes
59
+ Requires-Dist: databricks-sdk; extra == 'databricks-volumes'
60
+ Provides-Extra: delta-table
61
+ Requires-Dist: boto3; extra == 'delta-table'
62
+ Requires-Dist: deltalake; extra == 'delta-table'
63
+ Requires-Dist: pandas; extra == 'delta-table'
64
+ Provides-Extra: discord
65
+ Requires-Dist: discord-py; extra == 'discord'
66
+ Provides-Extra: doc
67
+ Requires-Dist: unstructured[doc]; extra == 'doc'
68
+ Provides-Extra: docx
69
+ Requires-Dist: unstructured[docx]; extra == 'docx'
70
+ Provides-Extra: dropbox
71
+ Requires-Dist: dropboxdrivefs; extra == 'dropbox'
72
+ Requires-Dist: fsspec; extra == 'dropbox'
73
+ Provides-Extra: duckdb
74
+ Requires-Dist: duckdb; extra == 'duckdb'
75
+ Requires-Dist: pandas; extra == 'duckdb'
76
+ Provides-Extra: elasticsearch
77
+ Requires-Dist: elasticsearch[async]; extra == 'elasticsearch'
78
+ Provides-Extra: epub
79
+ Requires-Dist: unstructured[epub]; extra == 'epub'
80
+ Provides-Extra: gcs
81
+ Requires-Dist: bs4; extra == 'gcs'
82
+ Requires-Dist: fsspec; extra == 'gcs'
83
+ Requires-Dist: gcsfs; extra == 'gcs'
84
+ Provides-Extra: github
85
+ Requires-Dist: pygithub>1.58.0; extra == 'github'
86
+ Requires-Dist: requests; extra == 'github'
87
+ Provides-Extra: gitlab
88
+ Requires-Dist: python-gitlab; extra == 'gitlab'
89
+ Provides-Extra: google-drive
90
+ Requires-Dist: google-api-python-client; extra == 'google-drive'
91
+ Provides-Extra: hubspot
92
+ Requires-Dist: hubspot-api-client; extra == 'hubspot'
93
+ Requires-Dist: urllib3; extra == 'hubspot'
94
+ Provides-Extra: huggingface
95
+ Requires-Dist: sentence-transformers; extra == 'huggingface'
96
+ Provides-Extra: ibm-watsonx-s3
97
+ Requires-Dist: httpx; extra == 'ibm-watsonx-s3'
98
+ Requires-Dist: pandas; extra == 'ibm-watsonx-s3'
99
+ Requires-Dist: pyarrow; extra == 'ibm-watsonx-s3'
100
+ Requires-Dist: pyiceberg; extra == 'ibm-watsonx-s3'
101
+ Requires-Dist: tenacity; extra == 'ibm-watsonx-s3'
102
+ Provides-Extra: image
103
+ Requires-Dist: unstructured[image]; extra == 'image'
104
+ Provides-Extra: jira
105
+ Requires-Dist: atlassian-python-api; extra == 'jira'
106
+ Provides-Extra: kafka
107
+ Requires-Dist: confluent-kafka; extra == 'kafka'
108
+ Provides-Extra: kdbai
109
+ Requires-Dist: kdbai-client>=1.4.0; extra == 'kdbai'
110
+ Requires-Dist: pandas; extra == 'kdbai'
111
+ Provides-Extra: lancedb
112
+ Requires-Dist: lancedb; extra == 'lancedb'
113
+ Provides-Extra: md
114
+ Requires-Dist: unstructured[md]; extra == 'md'
115
+ Provides-Extra: milvus
116
+ Requires-Dist: pymilvus; extra == 'milvus'
117
+ Provides-Extra: mixedbreadai
118
+ Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
119
+ Provides-Extra: mongodb
120
+ Requires-Dist: pymongo; extra == 'mongodb'
121
+ Provides-Extra: msg
122
+ Requires-Dist: unstructured[msg]; extra == 'msg'
123
+ Provides-Extra: neo4j
124
+ Requires-Dist: cymple; extra == 'neo4j'
125
+ Requires-Dist: neo4j-rust-ext; extra == 'neo4j'
126
+ Requires-Dist: networkx; extra == 'neo4j'
127
+ Provides-Extra: notion
128
+ Requires-Dist: backoff; extra == 'notion'
129
+ Requires-Dist: htmlbuilder; extra == 'notion'
130
+ Requires-Dist: httpx; extra == 'notion'
131
+ Requires-Dist: notion-client; extra == 'notion'
132
+ Provides-Extra: octoai
133
+ Requires-Dist: openai; extra == 'octoai'
134
+ Requires-Dist: tiktoken; extra == 'octoai'
135
+ Provides-Extra: odt
136
+ Requires-Dist: unstructured[odt]; extra == 'odt'
137
+ Provides-Extra: onedrive
138
+ Requires-Dist: msal; extra == 'onedrive'
139
+ Requires-Dist: office365-rest-python-client; extra == 'onedrive'
140
+ Requires-Dist: requests; extra == 'onedrive'
141
+ Provides-Extra: openai
142
+ Requires-Dist: openai; extra == 'openai'
143
+ Requires-Dist: tiktoken; extra == 'openai'
144
+ Provides-Extra: opensearch
145
+ Requires-Dist: opensearch-py; extra == 'opensearch'
146
+ Provides-Extra: org
147
+ Requires-Dist: unstructured[org]; extra == 'org'
148
+ Provides-Extra: outlook
149
+ Requires-Dist: msal; extra == 'outlook'
150
+ Requires-Dist: office365-rest-python-client; extra == 'outlook'
151
+ Provides-Extra: pdf
152
+ Requires-Dist: unstructured[pdf]; extra == 'pdf'
153
+ Provides-Extra: pinecone
154
+ Requires-Dist: pinecone; extra == 'pinecone'
155
+ Provides-Extra: postgres
156
+ Requires-Dist: pandas; extra == 'postgres'
157
+ Requires-Dist: psycopg2-binary; extra == 'postgres'
158
+ Provides-Extra: ppt
159
+ Requires-Dist: unstructured[ppt]; extra == 'ppt'
160
+ Provides-Extra: pptx
161
+ Requires-Dist: unstructured[pptx]; extra == 'pptx'
162
+ Provides-Extra: qdrant
163
+ Requires-Dist: qdrant-client; extra == 'qdrant'
164
+ Provides-Extra: reddit
165
+ Requires-Dist: praw; extra == 'reddit'
166
+ Provides-Extra: redis
167
+ Requires-Dist: redis; extra == 'redis'
168
+ Provides-Extra: remote
169
+ Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
170
+ Provides-Extra: rst
171
+ Requires-Dist: unstructured[rst]; extra == 'rst'
172
+ Provides-Extra: rtf
173
+ Requires-Dist: unstructured[rtf]; extra == 'rtf'
174
+ Provides-Extra: s3
175
+ Requires-Dist: fsspec; extra == 's3'
176
+ Requires-Dist: s3fs; extra == 's3'
177
+ Provides-Extra: salesforce
178
+ Requires-Dist: simple-salesforce; extra == 'salesforce'
179
+ Provides-Extra: sftp
180
+ Requires-Dist: fsspec; extra == 'sftp'
181
+ Requires-Dist: paramiko; extra == 'sftp'
182
+ Provides-Extra: sharepoint
183
+ Requires-Dist: msal; extra == 'sharepoint'
184
+ Requires-Dist: office365-rest-python-client; extra == 'sharepoint'
185
+ Requires-Dist: requests; extra == 'sharepoint'
186
+ Provides-Extra: singlestore
187
+ Requires-Dist: pandas; extra == 'singlestore'
188
+ Requires-Dist: singlestoredb; extra == 'singlestore'
189
+ Provides-Extra: slack
190
+ Requires-Dist: slack-sdk[optional]; extra == 'slack'
191
+ Provides-Extra: snowflake
192
+ Requires-Dist: pandas; extra == 'snowflake'
193
+ Requires-Dist: psycopg2-binary; extra == 'snowflake'
194
+ Requires-Dist: snowflake-connector-python; extra == 'snowflake'
195
+ Provides-Extra: togetherai
196
+ Requires-Dist: together; extra == 'togetherai'
197
+ Provides-Extra: tsv
198
+ Requires-Dist: unstructured[tsv]; extra == 'tsv'
199
+ Provides-Extra: vastdb
200
+ Requires-Dist: ibis; extra == 'vastdb'
201
+ Requires-Dist: pandas; extra == 'vastdb'
202
+ Requires-Dist: pyarrow; extra == 'vastdb'
203
+ Requires-Dist: vastdb; extra == 'vastdb'
204
+ Provides-Extra: vectara
205
+ Requires-Dist: aiofiles; extra == 'vectara'
206
+ Requires-Dist: httpx; extra == 'vectara'
207
+ Requires-Dist: requests; extra == 'vectara'
208
+ Provides-Extra: vertexai
209
+ Requires-Dist: vertexai; extra == 'vertexai'
210
+ Provides-Extra: voyageai
211
+ Requires-Dist: voyageai; extra == 'voyageai'
212
+ Provides-Extra: weaviate
213
+ Requires-Dist: weaviate-client; extra == 'weaviate'
214
+ Provides-Extra: wikipedia
215
+ Requires-Dist: wikipedia; extra == 'wikipedia'
216
+ Provides-Extra: xlsx
217
+ Requires-Dist: unstructured[xlsx]; extra == 'xlsx'
218
+ Provides-Extra: zendesk
219
+ Requires-Dist: aiofiles; extra == 'zendesk'
220
+ Requires-Dist: bs4; extra == 'zendesk'
221
+ Requires-Dist: httpx; extra == 'zendesk'
222
+ Description-Content-Type: text/markdown
223
+
224
+ # Unstructured Ingest
225
+
226
+ For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
@@ -1,146 +1,12 @@
1
- examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- examples/airtable.py,sha256=4n6B8l_fmnlZLdk9SIfYticOTgZGQBjBfktNwFQf9Zc,1831
3
- examples/azure_cognitive_search.py,sha256=KThZyRQ2HhTndBcGBn1dlr0FARB2PmBBRRnKJU5AuQU,2279
4
- examples/chroma.py,sha256=fLqcpnbMAlJEe96SCMm17ZGOtcnaDQBGusURhLDwth8,2208
5
- examples/couchbase.py,sha256=u4fmZb4eBYEcBgWCaWOKSxxxV1v-YpyaGG7J4ept1w0,2261
6
- examples/databricks_volumes_dest.py,sha256=xoSVgmS4dNKOKGKtin0ojB20mO7vfCJ8FQ_DFIgMW-U,2329
7
- examples/databricks_volumes_source.py,sha256=DT7PXW6CTOGSH5lzuZMfasjV6vgoG28R68I9Cb98JvQ,2278
8
- examples/delta_table.py,sha256=0InvWFYZZt6TolYtWlKp9KliOnRxFNubInbwqlBWMIA,1898
9
- examples/discord_example.py,sha256=ZNzKVxk7hfUF0qZdOLkh7fGItaiIdo6gh7JYhwr2vyQ,1624
10
- examples/elasticsearch.py,sha256=KQXIYCE44w_CRZQGanlfP9ZY9NZ5gclxKKvtZnd-IRA,2129
11
- examples/google_drive.py,sha256=iOjkC8iWQ3sqM3TTsL8Ng_yZbm2C5xfws1a9MTZuV7M,1677
12
- examples/kdbai.py,sha256=W7yT2lnYOPbftqTiSHQNgWVwQC4UXJ8QhkPqSdiG_Co,2225
13
- examples/local.py,sha256=WuN3SqxoXRlh2xhMZPh4R9EdCNfjQUhurAlGVqGuUF8,1569
14
- examples/milvus.py,sha256=TKtH1Rxrj3Hr9d1BUx7qEK468Xb0ux7Ak1RukPXSOOo,1877
15
- examples/mongodb.py,sha256=eP43TY-rjOeWnVk4m5jSSWJSWXcy6xRYFuLWusBZXws,2160
16
- examples/opensearch.py,sha256=8YmQpvOB9HBQqoC47ht-lX34SpkoaDlezbQOHRG82cw,2103
17
- examples/pinecone.py,sha256=URXalj5-0eTVnmfzD0icCB1brGklplU4P8l0jrawCjI,2479
18
- examples/s3.py,sha256=23y_lPUkPo50rDMZC7cc3kBaSOf5pP_xl_7HO0Mb3c8,1742
19
- examples/salesforce.py,sha256=tiO6hdRI79H_oORPnIf1FvB0IuGTYG2KzZlnqC_J9Cw,1888
20
- examples/sharepoint.py,sha256=a0h2zU28m6bW5g17b8BDrcsHzdzjSgb--gYV80bRqs0,2067
21
- examples/singlestore.py,sha256=UAdBOtIcmhyRkZ-pIh7rrY7Yt_Ed8t3puulZ-MhaSfU,2060
22
- examples/sql.py,sha256=YSmLD7Ri2a8CvBxRJWxPQefqV4kV8kF3W0l3TXu_iyY,2997
23
- examples/vectara.py,sha256=bWSsMQL3hEEt5CLR9CZFnuplrSAeLj_EiADipRU_Gkw,2247
24
- examples/weaviate.py,sha256=QUtYJ-y7eYfm69T316-aUm1imQZnoSJ09RGSVQAoTck,1906
25
- test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- test/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- test/integration/utils.py,sha256=CWqzEGw6TA_ZoP9hRUkW64TWYssooBbufcTRmbJvod8,401
28
- test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- test/integration/chunkers/test_chunkers.py,sha256=MTPVBCBvh54fBqi_53oPkrH9QJtvJeE9YEXDOZ8G0so,1059
30
- test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- test/integration/connectors/conftest.py,sha256=3weYhwyXyAVVk8DsLqScDOk-PZwsrHQLs1RdYa1EYqQ,1015
32
- test/integration/connectors/test_astradb.py,sha256=1hFqd9NI283t6lQZryBJMfJoRfP6PKVHTl_6X3Nk0bM,9925
33
- test/integration/connectors/test_azure_ai_search.py,sha256=NMSjIbFO1EWqr8U-Owd4qdC3pDROjg7zNAXr5yGSsH4,9699
34
- test/integration/connectors/test_chroma.py,sha256=_K4CUp9x5O_Uuw1uwDi1R0jIvzcaW0wI8tXfAV1hh7c,4536
35
- test/integration/connectors/test_confluence.py,sha256=3sAFQxRUrS7xXIQXQBccY0r5kihnP8rc_sAs-44g4Ys,3587
36
- test/integration/connectors/test_delta_table.py,sha256=-ivYySHKgFRCd6PUIK-fy6yOkku3uj4YhDDEUlqi-tM,6915
37
- test/integration/connectors/test_dropbox.py,sha256=sW-NOXA0-4xtAUgnSnPGg-OhiIhKviROJPpxUt8y8s8,4939
38
- test/integration/connectors/test_github.py,sha256=wFWRFhfhTUz3byk2FSe9qVv7xMxcBjHGs1FlhjtnTAQ,1500
39
- test/integration/connectors/test_google_drive.py,sha256=ceiFoVnaguTsQrLHZk8jv-IZ-i_EP4wenHSX-QKHvTM,10300
40
- test/integration/connectors/test_jira.py,sha256=0FnxFe42d32EGuArfxxnfINkoYNoCsgJjP7ZU6fePu8,2073
41
- test/integration/connectors/test_lancedb.py,sha256=E8yFuvQMx68w1s1PXIBP8gUlNuUpCtiGNYd7YnDA6Aw,9213
42
- test/integration/connectors/test_milvus.py,sha256=_cYmJMocsZuUroalT5uc9rcsHFnTIpJyJjIwK8oDDYc,7177
43
- test/integration/connectors/test_mongodb.py,sha256=wt5o-7qtMtjGv0IPKlhEnD3-sJjBX8cv1acn1Mcq-TY,12450
44
- test/integration/connectors/test_neo4j.py,sha256=BSOqRTY4ZV8o6TV1MOmUKQq7DzRFU_z9umjDk-yw-Jg,8450
45
- test/integration/connectors/test_notion.py,sha256=3OXFcSM-jE1_E_JoGw--pz-cv3dPZvt18scACJiHjwo,5397
46
- test/integration/connectors/test_onedrive.py,sha256=0SZB818cNsxYZlBJJpuvU1PqsFDxRaOiLfJTRcc9Bv0,5233
47
- test/integration/connectors/test_pinecone.py,sha256=0XBK9xxZhry6Rnv0s-chIWgtN5d1p8Lx-kEEQMNuAnA,13650
48
- test/integration/connectors/test_qdrant.py,sha256=z3RThQJKzCafCtfH0ocy_DNDlzmDdu_opQH3mKTn0CE,8031
49
- test/integration/connectors/test_redis.py,sha256=gUU6Dv616tX9KANiqhkMrPWvmhbV2Gk1pNuIv2MvZG8,5093
50
- test/integration/connectors/test_s3.py,sha256=Cd9HsPjrSB6xss1DO4YHqSORJJ2pUXNVaAlrlpqjZS8,7477
51
- test/integration/connectors/test_sharepoint.py,sha256=wq4G6J5ffXhYquUySVd5UUtYWC43RoXgCSY4fdlX2z0,7643
52
- test/integration/connectors/test_vectara.py,sha256=08GIh6J2QTSuupdDOJ_TiyQrYYK3vamUaEQe3_B5-WY,9278
53
- test/integration/connectors/test_zendesk.py,sha256=15bl3wy0pLxS2dkBlE11yPOX71k6Vbxo0BEFQK-qcFs,3724
54
- test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- test/integration/connectors/databricks/test_volumes_native.py,sha256=RbxImt0JPnvl8TAK_rAtIspaFuRmEFzjL9rLqkm6Juk,9563
56
- test/integration/connectors/discord/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- test/integration/connectors/discord/test_discord.py,sha256=WeB0ST572GvELMlgorRMwRxYIWkleIobXn6ULhjo1rw,3173
58
- test/integration/connectors/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- test/integration/connectors/duckdb/conftest.py,sha256=rlBHMJTiJ2a5xbvIxTOyhhcuTBc9DO-yTzD6Kf8X3hY,301
60
- test/integration/connectors/duckdb/test_duckdb.py,sha256=o3CKTG2T15QyWwPTh7Yyv42eJ39opm7g9b508y1o1m8,2973
61
- test/integration/connectors/duckdb/test_motherduck.py,sha256=S2EtKVy4HE0ysB3OvdFOUe5O-0w5Bk4Loy2Gyyff8b0,3218
62
- test/integration/connectors/elasticsearch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
- test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2uHuhGU9mZ33vpeTPhHtRpQfs,989
64
- test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=NUUL88Z7uWfnEopZ0wkQtWMA94WDZw87v_oCkPShVM4,12076
65
- test/integration/connectors/elasticsearch/test_opensearch.py,sha256=nV3gHD_tb79KRE8DqfUWpOfzem9LjojACspUHTSI7dw,11454
66
- test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=hYfmT9ud6NqJk1pibaF-1LvNgrfXBGUq0FTHkcIvICE,6145
68
- test/integration/connectors/sql/test_postgres.py,sha256=b0do75CAxKFEgoODx7mNQqo5vEgZbWHSifMNV_H2IgU,6974
69
- test/integration/connectors/sql/test_singlestore.py,sha256=uuw_T8EefwMcRD7clzviJxwb45f9k4G2dZIYqbNhM1s,6157
70
- test/integration/connectors/sql/test_snowflake.py,sha256=dNHR8fk5V1WsvN6P1CesPKCsRbKTuf6zXgjyf7vgueg,7498
71
- test/integration/connectors/sql/test_sqlite.py,sha256=6By1-XKiGCA5KlR1DHlM6ArU5c_2GjM5mE2RhMoNPg8,5960
72
- test/integration/connectors/sql/test_vastdb.py,sha256=A0W-kHl1GRf2zHCmTWXOJjV8HPi3xlWvCTKgjebVZUY,1066
73
- test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
- test/integration/connectors/utils/constants.py,sha256=JhTk6YNw7JVpkk-Pl8zn2YYkExeL1oE9VBWm_kMYGfo,369
75
- test/integration/connectors/utils/docker.py,sha256=4g1STiSbYN5qcmDTXyPxVJgwx97O6wk7n-DJ-zgzgag,4971
76
- test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
77
- test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
- test/integration/connectors/utils/validation/destination.py,sha256=m5RHgZ3_h6HA2SsWbg15rmKhZjzsyKqOtFlUqpy33SI,2746
79
- test/integration/connectors/utils/validation/equality.py,sha256=R6d_1c-Si5518WJcBcshF_wBRnywnZ0ORQ-NL0xNmGo,2602
80
- test/integration/connectors/utils/validation/source.py,sha256=WX67a1tYpyUFXvSxxZrTLEkpyVqZiUXhAsJ11RQzcqQ,13701
81
- test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
82
- test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
- test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
84
- test/integration/connectors/weaviate/test_cloud.py,sha256=1r16tNUSsq8JawfjgeRWtcfw2COYma0b298mBDZU__o,1281
85
- test/integration/connectors/weaviate/test_local.py,sha256=q8vSpmFeTapSoUSNChIpc6qfyMdcICo28CJSm7L7V-o,5337
86
- test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
88
- test/integration/embedders/test_azure_openai.py,sha256=nQle565sZu9N4xjWHBOfiOFGtldOdIK6S1YID00hK78,1787
89
- test/integration/embedders/test_bedrock.py,sha256=-hy4wPmgBpXrk7OtPbZ8IE0qip6Ht1GptGLpmpqHxiw,3550
90
- test/integration/embedders/test_huggingface.py,sha256=jNNBpXUA_UpgXXVo_Q2xYGFaknIZJu8eKJ4ifq19eig,986
91
- test/integration/embedders/test_mixedbread.py,sha256=0Mcq9JU9wAJ_Wz2Enwyz4qGSbivDDqmE6ZJxlZpMuWw,1993
92
- test/integration/embedders/test_octoai.py,sha256=R6NcBxPL_sVJLGmVTEjsaxfqjnLWxViXwL6vaze666s,2194
93
- test/integration/embedders/test_openai.py,sha256=iwk56ZK1gllFtatcp6W_PA7x9h2M2ReIg4GXIWXp5qo,2124
94
- test/integration/embedders/test_togetherai.py,sha256=5PzIdnvjMpjods_rhUqQ2nbVRXgld_F7OGWssnHxa0I,2202
95
- test/integration/embedders/test_vertexai.py,sha256=xpjauYnRBxOqft0HXEMFk1iRoeQJm7E2eSunZbjj_H4,1827
96
- test/integration/embedders/test_voyageai.py,sha256=kcuGxhG6kR1XVqmTw7La3MXnIC06CtjOq_n48nkF-eQ,2411
97
- test/integration/embedders/utils.py,sha256=Sqqg-X31ZV1hojqPQBaZgM2lb2u8cG6s6OnH9JRsFjs,2717
98
- test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
- test/integration/partitioners/test_partitioner.py,sha256=UYQd9x2-66F_FFeulC_2eg3FtjswK0Mt9Hwmg4b_pPs,2784
100
- test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
- test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
102
- test/unit/test_html.py,sha256=ubsck9pVOnPDFL0P8TZkko_46MIaFLlSNQcsgFDgYoE,4496
103
- test/unit/test_interfaces.py,sha256=Gv3WMJsw_3xPLy3nI3dIcJuLa2WvKYszSjI_W9XLtVM,787
104
- test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
105
- test/unit/test_utils.py,sha256=xeSM02zOChSOO3dzDOVAEiQme1rQ8drjnJF93S3BFmk,7247
106
- test/unit/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
- test/unit/chunkers/test_chunkers.py,sha256=wRxbSj7P1FwRGDyVcARkm8CQSVCBCro3nTe54UoUBzc,1769
108
- test/unit/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
109
- test/unit/connectors/test_confluence.py,sha256=Hr91nKw6018FEih-vSrVXFk0p0E9bSL1IeZVDxvITJ0,1916
110
- test/unit/connectors/test_jira.py,sha256=sPRjoBVDmc-o2RWilcjs-VW_jkafIqSXBE9duCELfoA,12110
111
- test/unit/connectors/ibm_watsonx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
- test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py,sha256=Yj9swJ_phiam-CFBzGnAFCkd8_oqzdA3ZQJQdpV8T1E,14503
113
- test/unit/connectors/motherduck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
- test/unit/connectors/motherduck/test_base.py,sha256=4SHI3Hx1a28eNE_VDbl8gAssNZRIUNSGPNZgrkzjYWs,2429
115
- test/unit/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
- test/unit/connectors/sql/test_sql.py,sha256=SfWYDBrR7pHFziKVEe6IAq5E3EQIz99ikQN3LnF1DrY,4622
117
- test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
- test/unit/embed/test_mixedbreadai.py,sha256=Z9A9jg5eJRF4OgYTgbIzQUI27J16uv2qj2kp_Rv0r9k,1428
119
- test/unit/embed/test_octoai.py,sha256=CWVrieqJh-N40J9n3nzqQPLOH9T1_mldkpZYRiHKxrg,1055
120
- test/unit/embed/test_openai.py,sha256=RQ-4QIcRvq0JSBFNit_NRcy61EsOv7xh_TcKJKHwHGM,1186
121
- test/unit/embed/test_vertexai.py,sha256=k_dK-yR_yx1RAOpmAgfcPo-osRDJP9aRCMCsJmQPxYI,1050
122
- test/unit/embed/test_voyageai.py,sha256=QWoDZEX8cAIkTgn4NtIyGKzOAu-GmudD4VMujnfi1Gg,983
123
- test/unit/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
- test/unit/embedders/test_bedrock.py,sha256=HMaweO_v_9Y1SE2m5QImXP73cb26vNTUfc1onTBa1-g,1074
125
- test/unit/embedders/test_huggingface.py,sha256=BpMC_AMlifjNf4Y61yBNR_8UU3H_x3ut2NnpFuB4kDo,1543
126
- test/unit/embedders/test_mixedbread.py,sha256=8yT942TVVXC5EkrT_ReZie1In537BaAD6esRjntgxuU,1021
127
- test/unit/embedders/test_octoai.py,sha256=JMfrFz25QfEh0ieB4bJneZd4XtNcdPOnNsN1Fj7gU-Q,1012
128
- test/unit/embedders/test_openai.py,sha256=HoEW95289Ijgo3PJ-pEaDOknfdkSjPXTgkXmE6jJomY,1012
129
- test/unit/embedders/test_togetherai.py,sha256=s24V_geDNZzblU74sSdC_m4Lqlzjp00RMpy56ptfdx0,1009
130
- test/unit/embedders/test_vertexai.py,sha256=4gLJaV9Nr2k_SgA-EyJ_sDvm8XvyGbn2zTs4F4CXU2g,1142
131
- test/unit/embedders/test_voyageai.py,sha256=VaWthF64pmxc-fOBbAQsEzMw7tV4t4Nz_H_Cc5tuAYQ,1193
132
- test/unit/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
133
- test/unit/partitioners/test_partitioner.py,sha256=eJoUDbiKtweyU1WYfsY5KqVqoPjbx1MUsyHkbvvTNEk,2275
134
- test/unit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
135
- test/unit/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
136
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
137
- unstructured_ingest/__version__.py,sha256=YP5dlQlaTHZ-KOck8o_UzdjIFae7iENB5d3AMIKlZ3M,42
2
+ unstructured_ingest/__version__.py,sha256=Bkcw0TdkF4pWY_01piNW3D1XaG9Q-r4aIMSbnIeStCE,42
138
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
139
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
140
- unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
5
+ unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
141
6
  unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
142
7
  unstructured_ingest/otel.py,sha256=NsUqOolA0gt69eFhZLABjVpcKoM9aus-AbxIKqWqPTc,4127
143
- unstructured_ingest/unstructured_api.py,sha256=hWUXUhGtyfi2OcDR-BriHJyT4jJywf4zfG1qpSCf9Bo,5002
8
+ unstructured_ingest/unstructured_api.py,sha256=4e2ZNWIihk0eje4R3ZQ0NOYNbmMZDv_O-rnJo94kaGE,5127
9
+ unstructured_ingest/cli/README.md,sha256=5LfM0ys1aFyCiCjlwZsi_9Mb5Nrq3MmYt3IpmUybnCE,1507
144
10
  unstructured_ingest/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
11
  unstructured_ingest/cli/cli.py,sha256=ZeIE9jP8fe7260nE8v7xYgLdqX9OtkQXAXSGWIkHLcA,645
146
12
  unstructured_ingest/cli/cmds.py,sha256=EhDW5UX4V-N8Svjba4w7YWnRYl26__ADwzNXrfFBxM4,483
@@ -159,7 +25,7 @@ unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM
159
25
  unstructured_ingest/embed/bedrock.py,sha256=t58V_QQjWPO62CTuP0aLFMDisPeXpxG2xSFGUhN-JvI,7726
160
26
  unstructured_ingest/embed/huggingface.py,sha256=-ZD17O_H_UnK80fqig6y6wNKJckjx0HuAkY5vgPvk8M,2259
161
27
  unstructured_ingest/embed/interfaces.py,sha256=SdB3t8eMPB8CbXzOYBpgwjzTvyb4T19L61Sr6Jy3_rw,5099
162
- unstructured_ingest/embed/mixedbreadai.py,sha256=-Y0J27G9CL1t3ZTIeNjTjRviErSMAzJRf2zgDgMHUmg,4499
28
+ unstructured_ingest/embed/mixedbreadai.py,sha256=z8RaG1hGBL840yElvI1Dbnf7llsOGEBbZ2X_QlFflZg,4498
163
29
  unstructured_ingest/embed/octoai.py,sha256=136UzSuQgV8Nxel2pB8Iv-4AvlFU6RRCa7N64fWFl6o,3855
164
30
  unstructured_ingest/embed/openai.py,sha256=hK98QXb_8oN1E-QwNT6JElzYOxG1mvZCFYQW57pjv0E,3372
165
31
  unstructured_ingest/embed/togetherai.py,sha256=T0v0_yTovy3sSeLPvk3PJccqcnmqCc_vxYs6pumjK3I,2983
@@ -171,12 +37,12 @@ unstructured_ingest/interfaces/downloader.py,sha256=xX0ZzsFRSzZb7SAeoeQph8sIbVq1
171
37
  unstructured_ingest/interfaces/indexer.py,sha256=c2FwWJEQHfFD6vO-tGfYLpLiIs-TYViLAt8YmHfDbaM,824
172
38
  unstructured_ingest/interfaces/process.py,sha256=S3A_9gkwwGC-iQxvnpj3Er6IJAjAT5npzpSgxuFAzUM,449
173
39
  unstructured_ingest/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
174
- unstructured_ingest/interfaces/upload_stager.py,sha256=nsS8VgFX44YIaESneyXQfa-mQUsKzowVdhwLpJjWxGM,3137
175
- unstructured_ingest/interfaces/uploader.py,sha256=6Ax-Qn7it0yKVdUBun5-lJ3jxISPEq2b2gLXpzJh5Lg,2038
40
+ unstructured_ingest/interfaces/upload_stager.py,sha256=eYhbdM0Dt8FValZAe41dWnxehhvfMLDOSTp7UoR5HB0,3147
41
+ unstructured_ingest/interfaces/uploader.py,sha256=6HyWttmosKreuWJCFp3TxKCuzDCj_RJdGEPwxhwapQk,2053
176
42
  unstructured_ingest/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
177
- unstructured_ingest/pipeline/interfaces.py,sha256=Um2dQoRVppiRRYWkKnvRrYI9HdkV229Tu8_y2HmEaCc,8646
43
+ unstructured_ingest/pipeline/interfaces.py,sha256=Jb62t0P52hzzRWe-zHxcySgINMnPDOWc4dfJooYUEC8,8642
178
44
  unstructured_ingest/pipeline/otel.py,sha256=wUVmUPWIk_X3yw0MuI-5QJ2wU2rQgaapinnS98iQBxI,1082
179
- unstructured_ingest/pipeline/pipeline.py,sha256=2DShnmC5Hn5_YVswKQwejgCr6JT7BixMDgSrOeQGogk,16785
45
+ unstructured_ingest/pipeline/pipeline.py,sha256=LKCY7kcTfWOYF8k9k3Rw8sYZdNNAH8Qo_qZFHNIkyEU,16781
180
46
  unstructured_ingest/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
47
  unstructured_ingest/pipeline/steps/chunk.py,sha256=LiJMzgB0ZEDnPuYz5IwuK2k2iJoBIcfftp9cVXMOlf0,3179
182
48
  unstructured_ingest/pipeline/steps/download.py,sha256=cw8KbZ4CNZ_on4xam-VehNnLvKkUourazvcaUB-ihGY,8205
@@ -188,51 +54,53 @@ unstructured_ingest/pipeline/steps/stage.py,sha256=oobrvLtZOOPEnXQXMDUnhaaKhheuS
188
54
  unstructured_ingest/pipeline/steps/uncompress.py,sha256=clyZKwKQLLKbkQDD2q98Aw1UAe3VqUY0n7_KWtGVMSw,1756
189
55
  unstructured_ingest/pipeline/steps/upload.py,sha256=4hvh--03jzbGlxO0l1_2D5ec_EaGu04I5bFsxH0MnTg,1986
190
56
  unstructured_ingest/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
191
- unstructured_ingest/processes/chunker.py,sha256=HCZM294_EmA1nnPsGA0OYVN8oPkF-EkskzNmwuWH4iw,5600
57
+ unstructured_ingest/processes/chunker.py,sha256=v6ICNdBqjyAOk8f_cQajIJRdrzGUSm1UFGnQqzihpi0,5806
192
58
  unstructured_ingest/processes/connector_registry.py,sha256=PoNhB-qOxvalaN-ssPWWhZSQ9QDdKMa6KzskCYYebfA,2195
193
- unstructured_ingest/processes/embedder.py,sha256=ViDf-zpoAadroxdYVzeM_g5sfXdvTu7qbgm-3RUQPNk,7884
194
- unstructured_ingest/processes/filter.py,sha256=EaNmJkv598ok6JkyJMPGwnNPpmlXXepN-Po_nv_Y_so,2151
195
- unstructured_ingest/processes/partitioner.py,sha256=BMwSLTVhTYsydZy7S55EQGf-d1G2HXPYovGc4sMhvLk,9911
59
+ unstructured_ingest/processes/embedder.py,sha256=jJcnAyRWNX5XN2WpPcT8bunbFeVlUa1P3yk4G_JIcvo,7894
60
+ unstructured_ingest/processes/filter.py,sha256=oc3SYukRYfzx8sdJqF3KxdwZcrA-1U8PTAipMdZkW0c,2148
61
+ unstructured_ingest/processes/partitioner.py,sha256=Kn_BSFYvOkwo8fqThw_cOpgD0Um-AdoSqclZplcdNBA,10109
196
62
  unstructured_ingest/processes/uncompress.py,sha256=o9JL3Bza4KPUTmrB39-v_5SuK_fYwhwFAhjQi2Pm8h8,2426
197
63
  unstructured_ingest/processes/connectors/__init__.py,sha256=cR4ZH2dpPod7QR6OsgMx8X9kpFcEc1TVfQndUNoKGzI,6812
198
- unstructured_ingest/processes/connectors/airtable.py,sha256=NcXTGqtBvx83JvCakcczCkj8zkRIYXYBOTs7pL5phzs,8955
199
- unstructured_ingest/processes/connectors/astradb.py,sha256=illjFV-INZ8iOTNPjvhbK2g1xHuuNM5Gr0Qoh7Xjjsk,18297
200
- unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=x5kBnC01YQ61Ndovmkft9gq9jPl2o8G_Dbs_butQ7Kk,11528
64
+ unstructured_ingest/processes/connectors/airtable.py,sha256=smx5qBSUKwM8V6Xcc7ikrf8hYQUQ94YrB1L0WVeRDv0,9024
65
+ unstructured_ingest/processes/connectors/astradb.py,sha256=ONt8vHv5h8B6goGba9l0YPS0y5EnSAoowtfq92-E-RY,18307
66
+ unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE2hGFfjGc_jNFzlUwiRlCtIkuu7tmnk,11524
201
67
  unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
202
- unstructured_ingest/processes/connectors/confluence.py,sha256=0yqnJBgawx58rog07jK7keJ6sfZ-UEz3hz0WMGksoOA,12091
68
+ unstructured_ingest/processes/connectors/confluence.py,sha256=BbZ-Ecdcn92X8dHQ0egEJtBoX16gM0-zMcBLdn-wQsM,12090
203
69
  unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
204
- unstructured_ingest/processes/connectors/delta_table.py,sha256=ZiHiqZf81i_Hxwasde_sba7kDQjmjV5-Jvy-fOTIqLs,7279
70
+ unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy3rCjrrTGMutG2INIrwCeoIohRY,7340
205
71
  unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
206
72
  unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
207
- unstructured_ingest/processes/connectors/gitlab.py,sha256=t69yLnBd5NtcLYmluNDbcRU1tXi8zTM75ofEc1k4oAY,10032
208
- unstructured_ingest/processes/connectors/google_drive.py,sha256=GfxnjBRSY_eZQd1OkkLfD8DCXuwMpmysQs1ZAE6vbD8,20057
73
+ unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
74
+ unstructured_ingest/processes/connectors/google_drive.py,sha256=CqUwtK4NhKhNfozsunVzFUsKMYBEgRS1eci2pIZLnJE,20055
209
75
  unstructured_ingest/processes/connectors/jira.py,sha256=eG8yTn8ZVEz7rBJ-ha8i_d9hEh6VALN6QJT_vbYvbL0,17142
210
- unstructured_ingest/processes/connectors/kdbai.py,sha256=EbaBNnFk21LzqtwBOHoDPdwVaIchAYXz7_mf061lTcQ,5119
76
+ unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
211
77
  unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
212
78
  unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
213
79
  unstructured_ingest/processes/connectors/mongodb.py,sha256=1g_5bfbS6lah3nsOXqLAanR3zNYJ47_Njw_uV-uj3_U,14324
214
80
  unstructured_ingest/processes/connectors/neo4j.py,sha256=eAM2XWSLA5caKJmbcd7ctn2TapreIJEXRoHoxT1OZwA,18718
215
81
  unstructured_ingest/processes/connectors/onedrive.py,sha256=VBkKlbJgR7uKlKTnjNybAw6ZawLKflDPpy2uVvgWYWw,19296
216
- unstructured_ingest/processes/connectors/outlook.py,sha256=FfHV9OfajGbj5VQZccqHsSyYJ0f6a4CLGQJi1s9UJjo,9294
217
- unstructured_ingest/processes/connectors/pinecone.py,sha256=TG-1hVfOsKFepxPfy2MCwEVBEZF4msg8lfNQZBpo35Y,13980
218
- unstructured_ingest/processes/connectors/redisdb.py,sha256=5LX6KtuNCzqjHqnJPw0zdKLE0iLx7Dk5RN9e_KT-up4,6975
219
- unstructured_ingest/processes/connectors/salesforce.py,sha256=a2Erx5pXbxKIj--oJWTGk2TeOcdmipuxgleazbD62o4,11664
82
+ unstructured_ingest/processes/connectors/outlook.py,sha256=zHM5frO7CqQG0-KcTyX49aZeSlsvVrl8kh_lR_ESgQw,9275
83
+ unstructured_ingest/processes/connectors/pinecone.py,sha256=BdO1PS_Y6FOeL-7uPl-Eh6ij1wHOwMkopOzKQGQ9Ac0,13979
84
+ unstructured_ingest/processes/connectors/redisdb.py,sha256=YzvSlfHs83XWsWMaIC3bV5enKfxejMQ9BQ8CtXfnJ5o,6923
85
+ unstructured_ingest/processes/connectors/salesforce.py,sha256=OaKEWCqZrirHqFJ650K5jSPwYlWefPOapas8Y-4D9oc,11661
220
86
  unstructured_ingest/processes/connectors/sharepoint.py,sha256=PowaqMzWr-VCW1rnwcAeRhHyE55kJ9J9FCVlrmtzN0E,4827
221
- unstructured_ingest/processes/connectors/slack.py,sha256=e4ntATdht_olAPsco1DKwlrOkpKLyDznPO1NJmsr0A8,9243
87
+ unstructured_ingest/processes/connectors/slack.py,sha256=EkFj9PcAu5_gF2xLogikKDADLbJYq-_jvchzYrTdLO4,9224
222
88
  unstructured_ingest/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
223
- unstructured_ingest/processes/connectors/vectara.py,sha256=frKJkc7ffstQhXD9-HkAGoQAofGkl6AsnKJhGcl8LgA,12294
89
+ unstructured_ingest/processes/connectors/vectara.py,sha256=xrC6jkgW8BII4UjdzUelDu122xT484cpfMTK2wl-sko,12292
224
90
  unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
+ unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
92
+ unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
225
93
  unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
226
94
  unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
227
- unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=RP9rq2sfysygiqzXj6eX0CXeZpxk65xmrz7HZnWRQWA,2961
228
- unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=NOHsRbvG4IB-8HWlwvCw2uuJqEwcvz332NC2kSBUAH8,3746
229
- unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=89N7YqtW8nHFmiq4_D201Ib-zXTLYU98Ap71goGoNEY,2997
95
+ unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6aRTLSdc4GChCL4mz2b-IanderW8j1IqezX6YA,2958
96
+ unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
97
+ unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
230
98
  unstructured_ingest/processes/connectors/databricks/volumes_native.py,sha256=pivySGMmFSsyuB42ARAWAPXFQ7qTQxO3dfEoE23pBNM,3104
231
- unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=ctQSoStHR1P1alhIH84Mpqmw4Wtnt3FsmBG7iH14iPE,8214
99
+ unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=tqi6PpYpIBMTZcYZXl5Lw0YuawyDvjHI08TKPFFTTr0,8194
232
100
  unstructured_ingest/processes/connectors/duckdb/__init__.py,sha256=Dr6BRJJGefJnnp_vn5W5gBd7vrCCXTMLweuDIqTP-fM,558
233
- unstructured_ingest/processes/connectors/duckdb/base.py,sha256=-8TKht_HCssKM8EBA6rszAB2GdbeGZ25Vbp-Y_gRTfI,2935
234
- unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=-C4gxvy52TFopc7LdIaJeNgBhx5UUH2DRABuyun5W9Y,4429
235
- unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=lxE7vyEj3lD3SdGwgfuMhZKvRPbynscPu9u_xukwM30,4692
101
+ unstructured_ingest/processes/connectors/duckdb/base.py,sha256=bTLhilg6mgERNCpeeNNl7wxy3xkOt23O9XpCyD0WVY4,2945
102
+ unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=jsmibTd_yvYzkCT05HhCJvplyobtjfNILC3zyTuCcVY,4464
103
+ unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=Atr2MjJQGFGWh5aeiQsLpUbFw-aCZH-ABI1LprDh5VI,4727
236
104
  unstructured_ingest/processes/connectors/elasticsearch/__init__.py,sha256=M8mmBWoP6J5R3hxg6BQUMexYlTUxUxdBoIcjUop8yt8,826
237
105
  unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py,sha256=iNedi-JVkAvdF15CbKwVRwXJazyST6ha3zcNyyGwVmQ,19003
238
106
  unstructured_ingest/processes/connectors/elasticsearch/opensearch.py,sha256=wggHvw8h-X0-3WPNxj9rt2xkrE7Pv7CV0B0KzTMzBB4,6944
@@ -242,11 +110,11 @@ unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4Z
242
110
  unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
243
111
  unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=13TJmipcz9mYQT6Qi4WmqUV3veHIhbLZIW_70qY-5tI,14469
244
112
  unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
245
- unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=6tICE_qECwnWW7ViiF08Ax1y91EkDhfs8Bf-d7udJeA,7125
113
+ unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=2ZV6b2E2pIsf_ab1Lty74FwpMnJZhpQUdamPgpwcKsQ,7141
246
114
  unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
247
115
  unstructured_ingest/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
248
116
  unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py,sha256=kf0UpgdAY2KK1R1FbAB6GEBBAIOeYQ8cZIr3bp660qM,374
249
- unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py,sha256=vTW12EN3WIYgxtonedamSNuwDPt8sPsbp0ehQ81enf4,11601
117
+ unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py,sha256=SpZIqjreXpLTpZfezhG6xkZ_h7w-QWmdjXDBG6mlddQ,11729
250
118
  unstructured_ingest/processes/connectors/kafka/__init__.py,sha256=pFN2cWwAStiGTAsQ616GIWKi_hDv0s74ZvNqhJEp1Pc,751
251
119
  unstructured_ingest/processes/connectors/kafka/cloud.py,sha256=Ki6iOLoZ86tYWdnLnMWYvb2hUCneKqo4mTJcfXh7YoQ,3432
252
120
  unstructured_ingest/processes/connectors/kafka/kafka.py,sha256=7NMvWijfoliyAgnmz8TM8oJt5x7RDzC-ABPdYAm7J3w,10306
@@ -334,13 +202,13 @@ unstructured_ingest/processes/connectors/qdrant/local.py,sha256=3b43kSVoGMcFWTRi
334
202
  unstructured_ingest/processes/connectors/qdrant/qdrant.py,sha256=1Y1-nfKqt4YooqKMqRKVE_ItV0S1v__PTaEhI3vvtOE,5456
335
203
  unstructured_ingest/processes/connectors/qdrant/server.py,sha256=biyF4xr6e7CH0loj_OPt02Xrx4DMkkxqYMAsVXuJ5-Q,1607
336
204
  unstructured_ingest/processes/connectors/sql/__init__.py,sha256=WNO7jSL1ABw7K5IxLc-eeKWGGJDk7jCp_OTLdTTkZug,2056
337
- unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py,sha256=9fu-SmBkkrhxF6gC2FFd7tAsPoTswtM3YlMw3fYGjB4,9084
338
- unstructured_ingest/processes/connectors/sql/postgres.py,sha256=MY_jDMdXj82UsGQDcpMbMDnl7wpNuyFaHh1_QxJtFv4,5115
339
- unstructured_ingest/processes/connectors/sql/singlestore.py,sha256=162FnOvQ9fERUUQarfiHfTTimLj_5y5MoBl8j0toCtA,5702
340
- unstructured_ingest/processes/connectors/sql/snowflake.py,sha256=92PZYhAGEwbhMJ-tqwfAqfLH59BTZ3rZZn5az6GhAKw,9367
341
- unstructured_ingest/processes/connectors/sql/sql.py,sha256=uUPTJYuUDrivRd8Z66NA-JXLXJo4dsYbBwfotxFp2kI,15722
342
- unstructured_ingest/processes/connectors/sql/sqlite.py,sha256=mpzthGrRjVtGsmm7E_eCZzxuLUtpgxki0lx1y7s8l5I,5310
343
- unstructured_ingest/processes/connectors/sql/vastdb.py,sha256=vXYQ5it2uK-PjvKduBQa31MI4y6vAD60-gGE3hp6dPc,9605
205
+ unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py,sha256=_IZFFGQUsHV9ScOOISrm6c5FSd4PnX91ePj_COat-gk,9320
206
+ unstructured_ingest/processes/connectors/sql/postgres.py,sha256=kDIL8Cj45EDpKqit1_araRpP4v3cb__QbYqoINg9f2k,5403
207
+ unstructured_ingest/processes/connectors/sql/singlestore.py,sha256=B46lpvyAj1AArpACi9MXbXD1-52zF6Dsj3RJtD1g4r0,5955
208
+ unstructured_ingest/processes/connectors/sql/snowflake.py,sha256=GSEoNrIoJM7p-Q-PrFiONamoxWzjQG8wZJG3mw5Uwdk,9589
209
+ unstructured_ingest/processes/connectors/sql/sql.py,sha256=yUGnv4MF_vT3VHdg7hhGiTD0be94ll-HyhHmRKQp_vQ,15712
210
+ unstructured_ingest/processes/connectors/sql/sqlite.py,sha256=V3OfRrXGGhTa_R2FPA-ysn95HHCv9x_VEBKVDsSGsbs,5549
211
+ unstructured_ingest/processes/connectors/sql/vastdb.py,sha256=trhvUBumDmj2rLjmxFBKw9L9wF6ZpssF0wfmRaG97H0,9803
344
212
  unstructured_ingest/processes/connectors/weaviate/__init__.py,sha256=1Vnz8hm_Cf3NkQUTz5ZD4QkbLSVql4UvRoY2j2FnC9k,853
345
213
  unstructured_ingest/processes/connectors/weaviate/cloud.py,sha256=tDQ4Vfph1RwADzS0Lk4TSoeT6TZ2gX9DNi78yXkgDw0,6245
346
214
  unstructured_ingest/processes/connectors/weaviate/embedded.py,sha256=buizqBd6PSbd9VgRrOj43GZEorBpDFkUIkE6sN9emhw,3008
@@ -348,23 +216,22 @@ unstructured_ingest/processes/connectors/weaviate/local.py,sha256=4fgZsL9dgnWuaS
348
216
  unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=SqtGcQgejGH0N1R49tGrUtGcTB8mt7sywXmWFTIcpB8,12866
349
217
  unstructured_ingest/processes/connectors/zendesk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
350
218
  unstructured_ingest/processes/connectors/zendesk/client.py,sha256=GvPIpx4aYdD58-edHgvCFjFao94uR0O5Yf4dT9NCmSk,11952
351
- unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=kBBiSmP4_2Z8mdOzHRXK1vE-i0xvrLLn5E3rRdThxwg,9035
219
+ unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=j5zS_7vJmYDEQtysz_UfwIUH65gc4r-Zjc1LocJr9FM,9033
352
220
  unstructured_ingest/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
353
- unstructured_ingest/processes/utils/blob_storage.py,sha256=4pY2MMdw7ib_taAshZxFLlhxaRnrrJFndHDDt09qhTM,1013
221
+ unstructured_ingest/processes/utils/blob_storage.py,sha256=apMUmm9loxdbTRkkLH4VhG9kUVyiw9PFUJheSDxSxPk,1023
354
222
  unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
355
223
  unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
356
224
  unstructured_ingest/utils/compression.py,sha256=_BkFREoa0fkJ6z-1lY76HCmy8mLymbPCg55iMUQTd5c,2653
357
225
  unstructured_ingest/utils/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
358
- unstructured_ingest/utils/data_prep.py,sha256=s4i_P5VYcOk9O1rsAfSiFpWdxfnc02WfQf5j3iOdiVo,8150
226
+ unstructured_ingest/utils/data_prep.py,sha256=yqrv7x_nlj0y3uaN0m0Bnsekb7VIQnwABWPa24KU5QI,7426
359
227
  unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
360
228
  unstructured_ingest/utils/html.py,sha256=0WduP8tI5S3nHFQi6XHNPHgsIC9j3iWwyIayX9gDLiE,6386
361
229
  unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
362
230
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
363
- unstructured_ingest/utils/string_and_date_utils.py,sha256=QBj8HXZGvDZQSULLOQwJ8tb3r2aYrTBQ71rkiV6gZdI,2519
231
+ unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
364
232
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
365
- unstructured_ingest-0.7.1.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
366
- unstructured_ingest-0.7.1.dist-info/METADATA,sha256=Ex_8EkItZzbGEoRJwR7Fqm_t0aajIZLVdtzwL7XBsQw,15050
367
- unstructured_ingest-0.7.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
368
- unstructured_ingest-0.7.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
369
- unstructured_ingest-0.7.1.dist-info/top_level.txt,sha256=85vUyT6fV2A5eCEM3M3FPRUUI9vZOVK1xVZt7eo1oV8,34
370
- unstructured_ingest-0.7.1.dist-info/RECORD,,
233
+ unstructured_ingest-1.0.1.dist-info/METADATA,sha256=k_kEG2BSsnNaIyDSJWiciUW0Z-HDiPF_flO6kLjn8QI,8713
234
+ unstructured_ingest-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
235
+ unstructured_ingest-1.0.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
236
+ unstructured_ingest-1.0.1.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
237
+ unstructured_ingest-1.0.1.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
examples/__init__.py DELETED
File without changes
examples/airtable.py DELETED
@@ -1,44 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- from unstructured_ingest.interfaces import ProcessorConfig
5
- from unstructured_ingest.logger import logger
6
- from unstructured_ingest.pipeline.pipeline import Pipeline
7
- from unstructured_ingest.processes.chunker import ChunkerConfig
8
- from unstructured_ingest.processes.connectors.airtable import (
9
- CONNECTOR_TYPE,
10
- AirtableAccessConfig,
11
- AirtableConnectionConfig,
12
- AirtableDownloaderConfig,
13
- AirtableIndexerConfig,
14
- )
15
- from unstructured_ingest.processes.connectors.local import (
16
- LocalUploaderConfig,
17
- )
18
- from unstructured_ingest.processes.embedder import EmbedderConfig
19
- from unstructured_ingest.processes.partitioner import PartitionerConfig
20
-
21
- base_path = Path(__file__).parent.parent.parent.parent
22
- docs_path = base_path / "example-docs"
23
- work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
24
- output_path = work_dir / "output"
25
- download_path = work_dir / "download"
26
-
27
- if __name__ == "__main__":
28
- logger.info(f"writing all content in: {work_dir.resolve()}")
29
- Pipeline.from_configs(
30
- context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
31
- indexer_config=AirtableIndexerConfig(
32
- list_of_paths=["app5YQxSfp220fWtm", "appJ43QmP8I17zu88"]
33
- ),
34
- downloader_config=AirtableDownloaderConfig(download_dir=download_path),
35
- source_connection_config=AirtableConnectionConfig(
36
- access_config=AirtableAccessConfig(
37
- personal_access_token=os.getenv("AIRTABLE_PERSONAL_ACCESS_TOKEN")
38
- )
39
- ),
40
- partitioner_config=PartitionerConfig(strategy="fast"),
41
- chunker_config=ChunkerConfig(chunking_strategy="by_title"),
42
- embedder_config=EmbedderConfig(embedding_provider="huggingface"),
43
- uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
44
- ).run()