unstructured-ingest 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "0.5.16" # pragma: no cover
1
+ __version__ = "0.5.17" # pragma: no cover
@@ -112,7 +112,6 @@ def get_astra_collection(
112
112
  collection_name: str,
113
113
  keyspace: str,
114
114
  ) -> "AstraDBCollection":
115
-
116
115
  astra_db = get_astra_db(connection_config=connection_config, keyspace=keyspace)
117
116
 
118
117
  # astradb will return a collection object in all cases (even if it doesn't exist)
@@ -315,6 +314,7 @@ class AstraDBUploadStager(UploadStager):
315
314
  text_as_html, MAX_CONTENT_PARAM_BYTE_SIZE
316
315
  )
317
316
  metadata["original_elements"] = format_and_truncate_orig_elements(element_dict)
317
+ metadata.pop("orig_elements", None)
318
318
 
319
319
  def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
320
320
  self.truncate_dict_elements(element_dict)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: unstructured-ingest
3
- Version: 0.5.16
3
+ Version: 0.5.17
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: pandas
25
+ Requires-Dist: python-dateutil
26
26
  Requires-Dist: opentelemetry-sdk
27
- Requires-Dist: tqdm
27
+ Requires-Dist: pandas
28
+ Requires-Dist: click
28
29
  Requires-Dist: dataclasses_json
29
30
  Requires-Dist: pydantic>=2.7
30
- Requires-Dist: python-dateutil
31
- Requires-Dist: click
31
+ Requires-Dist: tqdm
32
32
  Provides-Extra: remote
33
33
  Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
34
34
  Provides-Extra: csv
@@ -66,13 +66,13 @@ Requires-Dist: pyairtable; extra == "airtable"
66
66
  Provides-Extra: astradb
67
67
  Requires-Dist: astrapy; extra == "astradb"
68
68
  Provides-Extra: azure
69
- Requires-Dist: adlfs; extra == "azure"
70
69
  Requires-Dist: fsspec; extra == "azure"
70
+ Requires-Dist: adlfs; extra == "azure"
71
71
  Provides-Extra: azure-ai-search
72
72
  Requires-Dist: azure-search-documents; extra == "azure-ai-search"
73
73
  Provides-Extra: biomed
74
- Requires-Dist: requests; extra == "biomed"
75
74
  Requires-Dist: bs4; extra == "biomed"
75
+ Requires-Dist: requests; extra == "biomed"
76
76
  Provides-Extra: box
77
77
  Requires-Dist: boxfs; extra == "box"
78
78
  Requires-Dist: fsspec; extra == "box"
@@ -81,26 +81,26 @@ Requires-Dist: chromadb; extra == "chroma"
81
81
  Provides-Extra: clarifai
82
82
  Requires-Dist: clarifai; extra == "clarifai"
83
83
  Provides-Extra: confluence
84
- Requires-Dist: requests; extra == "confluence"
85
84
  Requires-Dist: atlassian-python-api; extra == "confluence"
85
+ Requires-Dist: requests; extra == "confluence"
86
86
  Provides-Extra: couchbase
87
87
  Requires-Dist: couchbase; extra == "couchbase"
88
88
  Provides-Extra: delta-table
89
- Requires-Dist: deltalake; extra == "delta-table"
90
89
  Requires-Dist: boto3; extra == "delta-table"
90
+ Requires-Dist: deltalake; extra == "delta-table"
91
91
  Provides-Extra: discord
92
92
  Requires-Dist: discord.py; extra == "discord"
93
93
  Provides-Extra: dropbox
94
- Requires-Dist: fsspec; extra == "dropbox"
95
94
  Requires-Dist: dropboxdrivefs; extra == "dropbox"
95
+ Requires-Dist: fsspec; extra == "dropbox"
96
96
  Provides-Extra: duckdb
97
97
  Requires-Dist: duckdb; extra == "duckdb"
98
98
  Provides-Extra: elasticsearch
99
99
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
100
100
  Provides-Extra: gcs
101
- Requires-Dist: fsspec; extra == "gcs"
102
101
  Requires-Dist: bs4; extra == "gcs"
103
102
  Requires-Dist: gcsfs; extra == "gcs"
103
+ Requires-Dist: fsspec; extra == "gcs"
104
104
  Provides-Extra: github
105
105
  Requires-Dist: requests; extra == "github"
106
106
  Requires-Dist: pygithub>1.58.0; extra == "github"
@@ -124,18 +124,18 @@ Requires-Dist: pymilvus; extra == "milvus"
124
124
  Provides-Extra: mongodb
125
125
  Requires-Dist: pymongo; extra == "mongodb"
126
126
  Provides-Extra: neo4j
127
+ Requires-Dist: networkx; extra == "neo4j"
127
128
  Requires-Dist: neo4j-rust-ext; extra == "neo4j"
128
129
  Requires-Dist: cymple; extra == "neo4j"
129
- Requires-Dist: networkx; extra == "neo4j"
130
130
  Provides-Extra: notion
131
- Requires-Dist: httpx; extra == "notion"
132
131
  Requires-Dist: htmlBuilder; extra == "notion"
133
132
  Requires-Dist: notion-client; extra == "notion"
133
+ Requires-Dist: httpx; extra == "notion"
134
134
  Requires-Dist: backoff; extra == "notion"
135
135
  Provides-Extra: onedrive
136
+ Requires-Dist: bs4; extra == "onedrive"
136
137
  Requires-Dist: msal; extra == "onedrive"
137
138
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
138
- Requires-Dist: bs4; extra == "onedrive"
139
139
  Provides-Extra: opensearch
140
140
  Requires-Dist: opensearch-py; extra == "opensearch"
141
141
  Provides-Extra: outlook
@@ -152,21 +152,21 @@ Requires-Dist: praw; extra == "reddit"
152
152
  Provides-Extra: redis
153
153
  Requires-Dist: redis; extra == "redis"
154
154
  Provides-Extra: s3
155
- Requires-Dist: s3fs; extra == "s3"
156
155
  Requires-Dist: fsspec; extra == "s3"
156
+ Requires-Dist: s3fs; extra == "s3"
157
157
  Provides-Extra: sharepoint
158
158
  Requires-Dist: msal; extra == "sharepoint"
159
159
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
160
160
  Provides-Extra: salesforce
161
161
  Requires-Dist: simple-salesforce; extra == "salesforce"
162
162
  Provides-Extra: sftp
163
- Requires-Dist: fsspec; extra == "sftp"
164
163
  Requires-Dist: paramiko; extra == "sftp"
164
+ Requires-Dist: fsspec; extra == "sftp"
165
165
  Provides-Extra: slack
166
166
  Requires-Dist: slack_sdk[optional]; extra == "slack"
167
167
  Provides-Extra: snowflake
168
- Requires-Dist: psycopg2-binary; extra == "snowflake"
169
168
  Requires-Dist: snowflake-connector-python; extra == "snowflake"
169
+ Requires-Dist: psycopg2-binary; extra == "snowflake"
170
170
  Provides-Extra: wikipedia
171
171
  Requires-Dist: wikipedia; extra == "wikipedia"
172
172
  Provides-Extra: weaviate
@@ -178,13 +178,13 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
178
178
  Provides-Extra: singlestore
179
179
  Requires-Dist: singlestoredb; extra == "singlestore"
180
180
  Provides-Extra: vectara
181
- Requires-Dist: aiofiles; extra == "vectara"
182
- Requires-Dist: requests; extra == "vectara"
183
181
  Requires-Dist: httpx; extra == "vectara"
182
+ Requires-Dist: requests; extra == "vectara"
183
+ Requires-Dist: aiofiles; extra == "vectara"
184
184
  Provides-Extra: vastdb
185
- Requires-Dist: ibis; extra == "vastdb"
186
- Requires-Dist: vastdb; extra == "vastdb"
187
185
  Requires-Dist: pyarrow; extra == "vastdb"
186
+ Requires-Dist: vastdb; extra == "vastdb"
187
+ Requires-Dist: ibis; extra == "vastdb"
188
188
  Provides-Extra: zendesk
189
189
  Requires-Dist: httpx; extra == "zendesk"
190
190
  Requires-Dist: bs4; extra == "zendesk"
@@ -192,8 +192,8 @@ Requires-Dist: aiofiles; extra == "zendesk"
192
192
  Provides-Extra: embed-huggingface
193
193
  Requires-Dist: sentence-transformers; extra == "embed-huggingface"
194
194
  Provides-Extra: embed-octoai
195
- Requires-Dist: tiktoken; extra == "embed-octoai"
196
195
  Requires-Dist: openai; extra == "embed-octoai"
196
+ Requires-Dist: tiktoken; extra == "embed-octoai"
197
197
  Provides-Extra: embed-vertexai
198
198
  Requires-Dist: vertexai; extra == "embed-vertexai"
199
199
  Provides-Extra: embed-voyageai
@@ -201,8 +201,8 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
201
201
  Provides-Extra: embed-mixedbreadai
202
202
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
203
203
  Provides-Extra: openai
204
- Requires-Dist: tiktoken; extra == "openai"
205
204
  Requires-Dist: openai; extra == "openai"
205
+ Requires-Dist: tiktoken; extra == "openai"
206
206
  Provides-Extra: bedrock
207
207
  Requires-Dist: aioboto3; extra == "bedrock"
208
208
  Requires-Dist: boto3; extra == "bedrock"
@@ -111,7 +111,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
111
111
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
112
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
113
113
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
114
- unstructured_ingest/__version__.py,sha256=EgX3pL6NG5u1RONYNW1ysr-stCliU2U7MUb-vn-absY,43
114
+ unstructured_ingest/__version__.py,sha256=KerQQWRY3fKuaApvHFeNECgY6_9Sn7bl2FfaUuLCr4c,43
115
115
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
116
116
  unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
117
117
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -428,7 +428,7 @@ unstructured_ingest/v2/processes/partitioner.py,sha256=HxopDSbovLh_1epeGeVtuWEX7
428
428
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
429
429
  unstructured_ingest/v2/processes/connectors/__init__.py,sha256=ebLvZes84qRx4eS20SkvlVH6WIIM76hifyUgkUJ-dfg,6588
430
430
  unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
431
- unstructured_ingest/v2/processes/connectors/astradb.py,sha256=E6fB4anCd_gtSzVUsZ5pDrfdxs5AWERQM_NEfeenfEs,18202
431
+ unstructured_ingest/v2/processes/connectors/astradb.py,sha256=5xc5pWFicE_-2BV38oK-nnzAMI2EzF-q8XAqQ3qPUR8,18249
432
432
  unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
433
433
  unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
434
434
  unstructured_ingest/v2/processes/connectors/confluence.py,sha256=gSs4-AxL0gfeWdJfP7JfCrQSQNLoJRkvHquKK9RJvpQ,12043
@@ -577,9 +577,9 @@ unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=DDAYQB7catK
577
577
  unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=R8SXYkRhVUoWEHdGCt2CzcTxxuFundw_0GlGZ34YmbM,8987
578
578
  unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
579
579
  unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=EWvK4HRYubr9i1UyMhv5cU9u0UzVkCDC_BIm4Uxab7Y,964
580
- unstructured_ingest-0.5.16.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
581
- unstructured_ingest-0.5.16.dist-info/METADATA,sha256=Sm1qizGZbPHlXmzpcPhDuIsO-uWO-mrpfQZhovwhTQI,8465
582
- unstructured_ingest-0.5.16.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
583
- unstructured_ingest-0.5.16.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
584
- unstructured_ingest-0.5.16.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
585
- unstructured_ingest-0.5.16.dist-info/RECORD,,
580
+ unstructured_ingest-0.5.17.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
581
+ unstructured_ingest-0.5.17.dist-info/METADATA,sha256=cfGRkKOAflmCnSh-KoaCVyBcWD2SN_onWWc0tVaZ8fc,8465
582
+ unstructured_ingest-0.5.17.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
583
+ unstructured_ingest-0.5.17.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
584
+ unstructured_ingest-0.5.17.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
585
+ unstructured_ingest-0.5.17.dist-info/RECORD,,