unstructured-ingest 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/processes/connectors/astradb.py +1 -1
- {unstructured_ingest-0.5.16.dist-info → unstructured_ingest-0.5.17.dist-info}/METADATA +23 -23
- {unstructured_ingest-0.5.16.dist-info → unstructured_ingest-0.5.17.dist-info}/RECORD +8 -8
- {unstructured_ingest-0.5.16.dist-info → unstructured_ingest-0.5.17.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.16.dist-info → unstructured_ingest-0.5.17.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.16.dist-info → unstructured_ingest-0.5.17.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.16.dist-info → unstructured_ingest-0.5.17.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.17" # pragma: no cover
|
|
@@ -112,7 +112,6 @@ def get_astra_collection(
|
|
|
112
112
|
collection_name: str,
|
|
113
113
|
keyspace: str,
|
|
114
114
|
) -> "AstraDBCollection":
|
|
115
|
-
|
|
116
115
|
astra_db = get_astra_db(connection_config=connection_config, keyspace=keyspace)
|
|
117
116
|
|
|
118
117
|
# astradb will return a collection object in all cases (even if it doesn't exist)
|
|
@@ -315,6 +314,7 @@ class AstraDBUploadStager(UploadStager):
|
|
|
315
314
|
text_as_html, MAX_CONTENT_PARAM_BYTE_SIZE
|
|
316
315
|
)
|
|
317
316
|
metadata["original_elements"] = format_and_truncate_orig_elements(element_dict)
|
|
317
|
+
metadata.pop("orig_elements", None)
|
|
318
318
|
|
|
319
319
|
def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
|
|
320
320
|
self.truncate_dict_elements(element_dict)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.17
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist:
|
|
25
|
+
Requires-Dist: python-dateutil
|
|
26
26
|
Requires-Dist: opentelemetry-sdk
|
|
27
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: pandas
|
|
28
|
+
Requires-Dist: click
|
|
28
29
|
Requires-Dist: dataclasses_json
|
|
29
30
|
Requires-Dist: pydantic>=2.7
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist: click
|
|
31
|
+
Requires-Dist: tqdm
|
|
32
32
|
Provides-Extra: remote
|
|
33
33
|
Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
|
|
34
34
|
Provides-Extra: csv
|
|
@@ -66,13 +66,13 @@ Requires-Dist: pyairtable; extra == "airtable"
|
|
|
66
66
|
Provides-Extra: astradb
|
|
67
67
|
Requires-Dist: astrapy; extra == "astradb"
|
|
68
68
|
Provides-Extra: azure
|
|
69
|
-
Requires-Dist: adlfs; extra == "azure"
|
|
70
69
|
Requires-Dist: fsspec; extra == "azure"
|
|
70
|
+
Requires-Dist: adlfs; extra == "azure"
|
|
71
71
|
Provides-Extra: azure-ai-search
|
|
72
72
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
73
73
|
Provides-Extra: biomed
|
|
74
|
-
Requires-Dist: requests; extra == "biomed"
|
|
75
74
|
Requires-Dist: bs4; extra == "biomed"
|
|
75
|
+
Requires-Dist: requests; extra == "biomed"
|
|
76
76
|
Provides-Extra: box
|
|
77
77
|
Requires-Dist: boxfs; extra == "box"
|
|
78
78
|
Requires-Dist: fsspec; extra == "box"
|
|
@@ -81,26 +81,26 @@ Requires-Dist: chromadb; extra == "chroma"
|
|
|
81
81
|
Provides-Extra: clarifai
|
|
82
82
|
Requires-Dist: clarifai; extra == "clarifai"
|
|
83
83
|
Provides-Extra: confluence
|
|
84
|
-
Requires-Dist: requests; extra == "confluence"
|
|
85
84
|
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
85
|
+
Requires-Dist: requests; extra == "confluence"
|
|
86
86
|
Provides-Extra: couchbase
|
|
87
87
|
Requires-Dist: couchbase; extra == "couchbase"
|
|
88
88
|
Provides-Extra: delta-table
|
|
89
|
-
Requires-Dist: deltalake; extra == "delta-table"
|
|
90
89
|
Requires-Dist: boto3; extra == "delta-table"
|
|
90
|
+
Requires-Dist: deltalake; extra == "delta-table"
|
|
91
91
|
Provides-Extra: discord
|
|
92
92
|
Requires-Dist: discord.py; extra == "discord"
|
|
93
93
|
Provides-Extra: dropbox
|
|
94
|
-
Requires-Dist: fsspec; extra == "dropbox"
|
|
95
94
|
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
95
|
+
Requires-Dist: fsspec; extra == "dropbox"
|
|
96
96
|
Provides-Extra: duckdb
|
|
97
97
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
98
98
|
Provides-Extra: elasticsearch
|
|
99
99
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
100
100
|
Provides-Extra: gcs
|
|
101
|
-
Requires-Dist: fsspec; extra == "gcs"
|
|
102
101
|
Requires-Dist: bs4; extra == "gcs"
|
|
103
102
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
103
|
+
Requires-Dist: fsspec; extra == "gcs"
|
|
104
104
|
Provides-Extra: github
|
|
105
105
|
Requires-Dist: requests; extra == "github"
|
|
106
106
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
@@ -124,18 +124,18 @@ Requires-Dist: pymilvus; extra == "milvus"
|
|
|
124
124
|
Provides-Extra: mongodb
|
|
125
125
|
Requires-Dist: pymongo; extra == "mongodb"
|
|
126
126
|
Provides-Extra: neo4j
|
|
127
|
+
Requires-Dist: networkx; extra == "neo4j"
|
|
127
128
|
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
128
129
|
Requires-Dist: cymple; extra == "neo4j"
|
|
129
|
-
Requires-Dist: networkx; extra == "neo4j"
|
|
130
130
|
Provides-Extra: notion
|
|
131
|
-
Requires-Dist: httpx; extra == "notion"
|
|
132
131
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
133
132
|
Requires-Dist: notion-client; extra == "notion"
|
|
133
|
+
Requires-Dist: httpx; extra == "notion"
|
|
134
134
|
Requires-Dist: backoff; extra == "notion"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
136
137
|
Requires-Dist: msal; extra == "onedrive"
|
|
137
138
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
138
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
139
139
|
Provides-Extra: opensearch
|
|
140
140
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
141
141
|
Provides-Extra: outlook
|
|
@@ -152,21 +152,21 @@ Requires-Dist: praw; extra == "reddit"
|
|
|
152
152
|
Provides-Extra: redis
|
|
153
153
|
Requires-Dist: redis; extra == "redis"
|
|
154
154
|
Provides-Extra: s3
|
|
155
|
-
Requires-Dist: s3fs; extra == "s3"
|
|
156
155
|
Requires-Dist: fsspec; extra == "s3"
|
|
156
|
+
Requires-Dist: s3fs; extra == "s3"
|
|
157
157
|
Provides-Extra: sharepoint
|
|
158
158
|
Requires-Dist: msal; extra == "sharepoint"
|
|
159
159
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
160
160
|
Provides-Extra: salesforce
|
|
161
161
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
162
162
|
Provides-Extra: sftp
|
|
163
|
-
Requires-Dist: fsspec; extra == "sftp"
|
|
164
163
|
Requires-Dist: paramiko; extra == "sftp"
|
|
164
|
+
Requires-Dist: fsspec; extra == "sftp"
|
|
165
165
|
Provides-Extra: slack
|
|
166
166
|
Requires-Dist: slack_sdk[optional]; extra == "slack"
|
|
167
167
|
Provides-Extra: snowflake
|
|
168
|
-
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
169
168
|
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
169
|
+
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
170
170
|
Provides-Extra: wikipedia
|
|
171
171
|
Requires-Dist: wikipedia; extra == "wikipedia"
|
|
172
172
|
Provides-Extra: weaviate
|
|
@@ -178,13 +178,13 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
|
178
178
|
Provides-Extra: singlestore
|
|
179
179
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
180
180
|
Provides-Extra: vectara
|
|
181
|
-
Requires-Dist: aiofiles; extra == "vectara"
|
|
182
|
-
Requires-Dist: requests; extra == "vectara"
|
|
183
181
|
Requires-Dist: httpx; extra == "vectara"
|
|
182
|
+
Requires-Dist: requests; extra == "vectara"
|
|
183
|
+
Requires-Dist: aiofiles; extra == "vectara"
|
|
184
184
|
Provides-Extra: vastdb
|
|
185
|
-
Requires-Dist: ibis; extra == "vastdb"
|
|
186
|
-
Requires-Dist: vastdb; extra == "vastdb"
|
|
187
185
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
186
|
+
Requires-Dist: vastdb; extra == "vastdb"
|
|
187
|
+
Requires-Dist: ibis; extra == "vastdb"
|
|
188
188
|
Provides-Extra: zendesk
|
|
189
189
|
Requires-Dist: httpx; extra == "zendesk"
|
|
190
190
|
Requires-Dist: bs4; extra == "zendesk"
|
|
@@ -192,8 +192,8 @@ Requires-Dist: aiofiles; extra == "zendesk"
|
|
|
192
192
|
Provides-Extra: embed-huggingface
|
|
193
193
|
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
194
194
|
Provides-Extra: embed-octoai
|
|
195
|
-
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
196
195
|
Requires-Dist: openai; extra == "embed-octoai"
|
|
196
|
+
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
197
197
|
Provides-Extra: embed-vertexai
|
|
198
198
|
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
199
199
|
Provides-Extra: embed-voyageai
|
|
@@ -201,8 +201,8 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
201
201
|
Provides-Extra: embed-mixedbreadai
|
|
202
202
|
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
203
203
|
Provides-Extra: openai
|
|
204
|
-
Requires-Dist: tiktoken; extra == "openai"
|
|
205
204
|
Requires-Dist: openai; extra == "openai"
|
|
205
|
+
Requires-Dist: tiktoken; extra == "openai"
|
|
206
206
|
Provides-Extra: bedrock
|
|
207
207
|
Requires-Dist: aioboto3; extra == "bedrock"
|
|
208
208
|
Requires-Dist: boto3; extra == "bedrock"
|
|
@@ -111,7 +111,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
111
111
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
112
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
113
113
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
114
|
-
unstructured_ingest/__version__.py,sha256=
|
|
114
|
+
unstructured_ingest/__version__.py,sha256=KerQQWRY3fKuaApvHFeNECgY6_9Sn7bl2FfaUuLCr4c,43
|
|
115
115
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
116
116
|
unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
|
|
117
117
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -428,7 +428,7 @@ unstructured_ingest/v2/processes/partitioner.py,sha256=HxopDSbovLh_1epeGeVtuWEX7
|
|
|
428
428
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
429
429
|
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=ebLvZes84qRx4eS20SkvlVH6WIIM76hifyUgkUJ-dfg,6588
|
|
430
430
|
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
|
|
431
|
-
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=
|
|
431
|
+
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=5xc5pWFicE_-2BV38oK-nnzAMI2EzF-q8XAqQ3qPUR8,18249
|
|
432
432
|
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
|
|
433
433
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
|
|
434
434
|
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=gSs4-AxL0gfeWdJfP7JfCrQSQNLoJRkvHquKK9RJvpQ,12043
|
|
@@ -577,9 +577,9 @@ unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=DDAYQB7catK
|
|
|
577
577
|
unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=R8SXYkRhVUoWEHdGCt2CzcTxxuFundw_0GlGZ34YmbM,8987
|
|
578
578
|
unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
579
579
|
unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=EWvK4HRYubr9i1UyMhv5cU9u0UzVkCDC_BIm4Uxab7Y,964
|
|
580
|
-
unstructured_ingest-0.5.
|
|
581
|
-
unstructured_ingest-0.5.
|
|
582
|
-
unstructured_ingest-0.5.
|
|
583
|
-
unstructured_ingest-0.5.
|
|
584
|
-
unstructured_ingest-0.5.
|
|
585
|
-
unstructured_ingest-0.5.
|
|
580
|
+
unstructured_ingest-0.5.17.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
581
|
+
unstructured_ingest-0.5.17.dist-info/METADATA,sha256=cfGRkKOAflmCnSh-KoaCVyBcWD2SN_onWWc0tVaZ8fc,8465
|
|
582
|
+
unstructured_ingest-0.5.17.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
583
|
+
unstructured_ingest-0.5.17.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
584
|
+
unstructured_ingest-0.5.17.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
585
|
+
unstructured_ingest-0.5.17.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.5.16.dist-info → unstructured_ingest-0.5.17.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|