unstructured-ingest 0.5.21__py3-none-any.whl → 0.5.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "0.5.21" # pragma: no cover
1
+ __version__ = "0.5.23" # pragma: no cover
@@ -1,6 +1,13 @@
1
+ from unstructured_ingest.v2.types.file_data import (
2
+ BatchFileData,
3
+ BatchItem,
4
+ FileData,
5
+ FileDataSourceMetadata,
6
+ SourceIdentifiers,
7
+ )
8
+
1
9
  from .connector import AccessConfig, BaseConnector, ConnectionConfig
2
10
  from .downloader import Downloader, DownloaderConfig, DownloadResponse, download_responses
3
- from .file_data import BatchFileData, BatchItem, FileData, FileDataSourceMetadata, SourceIdentifiers
4
11
  from .indexer import Indexer, IndexerConfig
5
12
  from .process import BaseProcess
6
13
  from .processor import ProcessorConfig
@@ -1,116 +1,13 @@
1
- import json
2
- from pathlib import Path
3
- from typing import Any, Optional
4
- from uuid import NAMESPACE_DNS, uuid5
5
-
6
- from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
7
-
8
- from unstructured_ingest.v2.logger import logger
9
-
10
-
11
- class SourceIdentifiers(BaseModel):
12
- filename: str
13
- fullpath: str
14
- rel_path: Optional[str] = None
15
-
16
- @property
17
- def filename_stem(self) -> str:
18
- return Path(self.filename).stem
19
-
20
- @property
21
- def relative_path(self) -> str:
22
- return self.rel_path or self.fullpath
23
-
24
-
25
- class FileDataSourceMetadata(BaseModel):
26
- url: Optional[str] = None
27
- version: Optional[str] = None
28
- record_locator: Optional[dict[str, Any]] = None
29
- date_created: Optional[str] = None
30
- date_modified: Optional[str] = None
31
- date_processed: Optional[str] = None
32
- permissions_data: Optional[list[dict[str, Any]]] = None
33
- filesize_bytes: Optional[int] = None
34
-
35
-
36
- class FileData(BaseModel):
37
- identifier: str
38
- connector_type: str
39
- source_identifiers: SourceIdentifiers
40
- metadata: FileDataSourceMetadata = Field(default_factory=lambda: FileDataSourceMetadata())
41
- additional_metadata: dict[str, Any] = Field(default_factory=dict)
42
- reprocess: bool = False
43
- local_download_path: Optional[str] = None
44
- display_name: Optional[str] = None
45
-
46
- @classmethod
47
- def from_file(cls, path: str) -> "FileData":
48
- path = Path(path).resolve()
49
- if not path.exists() or not path.is_file():
50
- raise ValueError(f"file path not valid: {path}")
51
- with open(str(path.resolve()), "rb") as f:
52
- file_data_dict = json.load(f)
53
- file_data = cls.model_validate(file_data_dict)
54
- return file_data
55
-
56
- @classmethod
57
- def cast(cls, file_data: "FileData", **kwargs) -> "FileData":
58
- file_data_dict = file_data.model_dump()
59
- return cls.model_validate(file_data_dict, **kwargs)
60
-
61
- def to_file(self, path: str) -> None:
62
- path = Path(path).resolve()
63
- path.parent.mkdir(parents=True, exist_ok=True)
64
- with open(str(path.resolve()), "w") as f:
65
- json.dump(self.model_dump(), f, indent=2)
66
-
67
-
68
- class BatchItem(BaseModel):
69
- identifier: str
70
- version: Optional[str] = None
71
-
72
-
73
- class BatchFileData(FileData):
74
- identifier: str = Field(init=False)
75
- batch_items: list[BatchItem]
76
- source_identifiers: Optional[SourceIdentifiers] = None
77
-
78
- @field_validator("batch_items")
79
- @classmethod
80
- def check_batch_items(cls, v: list[BatchItem]) -> list[BatchItem]:
81
- if not v:
82
- raise ValueError("batch items cannot be empty")
83
- all_identifiers = [item.identifier for item in v]
84
- if len(all_identifiers) != len(set(all_identifiers)):
85
- raise ValueError(f"duplicate identifiers: {all_identifiers}")
86
- sorted_batch_items = sorted(v, key=lambda item: item.identifier)
87
- return sorted_batch_items
88
-
89
- @model_validator(mode="before")
90
- @classmethod
91
- def populate_identifier(cls, data: Any) -> Any:
92
- if isinstance(data, dict) and "identifier" not in data:
93
- batch_items = data["batch_items"]
94
- identifier_data = json.dumps(
95
- {item.identifier: item.version for item in batch_items}, sort_keys=True
96
- )
97
- data["identifier"] = str(uuid5(NAMESPACE_DNS, str(identifier_data)))
98
- return data
99
-
100
-
101
- def file_data_from_file(path: str) -> FileData:
102
- try:
103
- return BatchFileData.from_file(path=path)
104
- except ValidationError:
105
- logger.debug(f"{path} not detected as batch file data")
106
-
107
- return FileData.from_file(path=path)
108
-
109
-
110
- def file_data_from_dict(data: dict) -> FileData:
111
- try:
112
- return BatchFileData.model_validate(data)
113
- except ValidationError:
114
- logger.debug(f"{data} not valid for batch file data")
115
-
116
- return FileData.model_validate(data)
1
+ """
2
+ COMPATABILITY NOTICE:
3
+ This file has moved to the v2/types/ module.
4
+ The following line exists for backward compatibility.
5
+ """
6
+
7
+ from unstructured_ingest.v2.types.file_data import * # noqa - star imports are bad, but this is for maximal backward compatability
8
+
9
+ # Eventually this file should go away. Let's start warning users now:
10
+ logger.warning( # noqa - using logger from the star import
11
+ "Importing file_data.py through interfaces is deprecated. "
12
+ "Please use unstructured_ingest.v2.types.file_data instead!"
13
+ )
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  from time import time
7
7
  from typing import TYPE_CHECKING, Any, Generator, Optional, Union
8
8
 
9
- from pydantic import BaseModel, Field, Secret, SecretStr
9
+ from pydantic import BaseModel, Field, Secret, SecretStr, field_validator
10
10
 
11
11
  from unstructured_ingest.error import (
12
12
  DestinationConnectionError,
@@ -98,6 +98,12 @@ class ElasticsearchConnectionConfig(ConnectionConfig):
98
98
  ca_certs: Optional[Path] = None
99
99
  access_config: Secret[ElasticsearchAccessConfig]
100
100
 
101
+ @field_validator("hosts", mode="before")
102
+ def to_list(cls, value):
103
+ if isinstance(value, str):
104
+ return [value]
105
+ return value
106
+
101
107
  def get_client_kwargs(self) -> dict:
102
108
  # Update auth related fields to conform to what the SDK expects based on the
103
109
  # supported methods:
@@ -2,7 +2,7 @@ from dataclasses import dataclass, field
2
2
  from pathlib import Path
3
3
  from typing import TYPE_CHECKING, Optional
4
4
 
5
- from pydantic import BaseModel, Field, Secret
5
+ from pydantic import BaseModel, Field, Secret, field_validator
6
6
 
7
7
  from unstructured_ingest.error import (
8
8
  DestinationConnectionError,
@@ -78,6 +78,12 @@ class OpenSearchConnectionConfig(ConnectionConfig):
78
78
 
79
79
  access_config: Secret[OpenSearchAccessConfig]
80
80
 
81
+ @field_validator("hosts", mode="before")
82
+ def to_list(cls, value):
83
+ if isinstance(value, str):
84
+ return [value]
85
+ return value
86
+
81
87
  def get_client_kwargs(self) -> dict:
82
88
  # Update auth related fields to conform to what the SDK expects based on the
83
89
  # supported methods:
File without changes
@@ -0,0 +1,116 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, Optional
4
+ from uuid import NAMESPACE_DNS, uuid5
5
+
6
+ from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
7
+
8
+ from unstructured_ingest.v2.logger import logger
9
+
10
+
11
+ class SourceIdentifiers(BaseModel):
12
+ filename: str
13
+ fullpath: str
14
+ rel_path: Optional[str] = None
15
+
16
+ @property
17
+ def filename_stem(self) -> str:
18
+ return Path(self.filename).stem
19
+
20
+ @property
21
+ def relative_path(self) -> str:
22
+ return self.rel_path or self.fullpath
23
+
24
+
25
+ class FileDataSourceMetadata(BaseModel):
26
+ url: Optional[str] = None
27
+ version: Optional[str] = None
28
+ record_locator: Optional[dict[str, Any]] = None
29
+ date_created: Optional[str] = None
30
+ date_modified: Optional[str] = None
31
+ date_processed: Optional[str] = None
32
+ permissions_data: Optional[list[dict[str, Any]]] = None
33
+ filesize_bytes: Optional[int] = None
34
+
35
+
36
+ class FileData(BaseModel):
37
+ identifier: str
38
+ connector_type: str
39
+ source_identifiers: SourceIdentifiers
40
+ metadata: FileDataSourceMetadata = Field(default_factory=lambda: FileDataSourceMetadata())
41
+ additional_metadata: dict[str, Any] = Field(default_factory=dict)
42
+ reprocess: bool = False
43
+ local_download_path: Optional[str] = None
44
+ display_name: Optional[str] = None
45
+
46
+ @classmethod
47
+ def from_file(cls, path: str) -> "FileData":
48
+ path = Path(path).resolve()
49
+ if not path.exists() or not path.is_file():
50
+ raise ValueError(f"file path not valid: {path}")
51
+ with open(str(path.resolve()), "rb") as f:
52
+ file_data_dict = json.load(f)
53
+ file_data = cls.model_validate(file_data_dict)
54
+ return file_data
55
+
56
+ @classmethod
57
+ def cast(cls, file_data: "FileData", **kwargs) -> "FileData":
58
+ file_data_dict = file_data.model_dump()
59
+ return cls.model_validate(file_data_dict, **kwargs)
60
+
61
+ def to_file(self, path: str) -> None:
62
+ path = Path(path).resolve()
63
+ path.parent.mkdir(parents=True, exist_ok=True)
64
+ with open(str(path.resolve()), "w") as f:
65
+ json.dump(self.model_dump(), f, indent=2)
66
+
67
+
68
+ class BatchItem(BaseModel):
69
+ identifier: str
70
+ version: Optional[str] = None
71
+
72
+
73
+ class BatchFileData(FileData):
74
+ identifier: str = Field(init=False)
75
+ batch_items: list[BatchItem]
76
+ source_identifiers: Optional[SourceIdentifiers] = None
77
+
78
+ @field_validator("batch_items")
79
+ @classmethod
80
+ def check_batch_items(cls, v: list[BatchItem]) -> list[BatchItem]:
81
+ if not v:
82
+ raise ValueError("batch items cannot be empty")
83
+ all_identifiers = [item.identifier for item in v]
84
+ if len(all_identifiers) != len(set(all_identifiers)):
85
+ raise ValueError(f"duplicate identifiers: {all_identifiers}")
86
+ sorted_batch_items = sorted(v, key=lambda item: item.identifier)
87
+ return sorted_batch_items
88
+
89
+ @model_validator(mode="before")
90
+ @classmethod
91
+ def populate_identifier(cls, data: Any) -> Any:
92
+ if isinstance(data, dict) and "identifier" not in data:
93
+ batch_items = data["batch_items"]
94
+ identifier_data = json.dumps(
95
+ {item.identifier: item.version for item in batch_items}, sort_keys=True
96
+ )
97
+ data["identifier"] = str(uuid5(NAMESPACE_DNS, str(identifier_data)))
98
+ return data
99
+
100
+
101
+ def file_data_from_file(path: str) -> FileData:
102
+ try:
103
+ return BatchFileData.from_file(path=path)
104
+ except ValidationError:
105
+ logger.debug(f"{path} not detected as batch file data")
106
+
107
+ return FileData.from_file(path=path)
108
+
109
+
110
+ def file_data_from_dict(data: dict) -> FileData:
111
+ try:
112
+ return BatchFileData.model_validate(data)
113
+ except ValidationError:
114
+ logger.debug(f"{data} not valid for batch file data")
115
+
116
+ return FileData.model_validate(data)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: unstructured-ingest
3
- Version: 0.5.21
3
+ Version: 0.5.23
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,12 +22,12 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: click
26
25
  Requires-Dist: dataclasses_json
27
- Requires-Dist: pydantic>=2.7
28
- Requires-Dist: python-dateutil
29
26
  Requires-Dist: opentelemetry-sdk
27
+ Requires-Dist: click
30
28
  Requires-Dist: tqdm
29
+ Requires-Dist: python-dateutil
30
+ Requires-Dist: pydantic>=2.7
31
31
  Requires-Dist: numpy
32
32
  Requires-Dist: pandas
33
33
  Provides-Extra: remote
@@ -103,8 +103,8 @@ Requires-Dist: astrapy; extra == "astradb"
103
103
  Requires-Dist: numpy; extra == "astradb"
104
104
  Requires-Dist: pandas; extra == "astradb"
105
105
  Provides-Extra: azure
106
- Requires-Dist: fsspec; extra == "azure"
107
106
  Requires-Dist: adlfs; extra == "azure"
107
+ Requires-Dist: fsspec; extra == "azure"
108
108
  Requires-Dist: numpy; extra == "azure"
109
109
  Requires-Dist: pandas; extra == "azure"
110
110
  Provides-Extra: azure-ai-search
@@ -117,8 +117,8 @@ Requires-Dist: requests; extra == "biomed"
117
117
  Requires-Dist: numpy; extra == "biomed"
118
118
  Requires-Dist: pandas; extra == "biomed"
119
119
  Provides-Extra: box
120
- Requires-Dist: fsspec; extra == "box"
121
120
  Requires-Dist: boxfs; extra == "box"
121
+ Requires-Dist: fsspec; extra == "box"
122
122
  Requires-Dist: numpy; extra == "box"
123
123
  Requires-Dist: pandas; extra == "box"
124
124
  Provides-Extra: chroma
@@ -148,8 +148,8 @@ Requires-Dist: discord.py; extra == "discord"
148
148
  Requires-Dist: numpy; extra == "discord"
149
149
  Requires-Dist: pandas; extra == "discord"
150
150
  Provides-Extra: dropbox
151
- Requires-Dist: fsspec; extra == "dropbox"
152
151
  Requires-Dist: dropboxdrivefs; extra == "dropbox"
152
+ Requires-Dist: fsspec; extra == "dropbox"
153
153
  Requires-Dist: numpy; extra == "dropbox"
154
154
  Requires-Dist: pandas; extra == "dropbox"
155
155
  Provides-Extra: duckdb
@@ -161,9 +161,9 @@ Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
161
161
  Requires-Dist: numpy; extra == "elasticsearch"
162
162
  Requires-Dist: pandas; extra == "elasticsearch"
163
163
  Provides-Extra: gcs
164
- Requires-Dist: bs4; extra == "gcs"
165
- Requires-Dist: fsspec; extra == "gcs"
166
164
  Requires-Dist: gcsfs; extra == "gcs"
165
+ Requires-Dist: fsspec; extra == "gcs"
166
+ Requires-Dist: bs4; extra == "gcs"
167
167
  Requires-Dist: numpy; extra == "gcs"
168
168
  Requires-Dist: pandas; extra == "gcs"
169
169
  Provides-Extra: github
@@ -185,10 +185,10 @@ Requires-Dist: hubspot-api-client; extra == "hubspot"
185
185
  Requires-Dist: numpy; extra == "hubspot"
186
186
  Requires-Dist: pandas; extra == "hubspot"
187
187
  Provides-Extra: ibm-watsonx-s3
188
- Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
189
188
  Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
190
- Requires-Dist: httpx; extra == "ibm-watsonx-s3"
191
189
  Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
190
+ Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
191
+ Requires-Dist: httpx; extra == "ibm-watsonx-s3"
192
192
  Requires-Dist: numpy; extra == "ibm-watsonx-s3"
193
193
  Requires-Dist: pandas; extra == "ibm-watsonx-s3"
194
194
  Provides-Extra: jira
@@ -216,22 +216,22 @@ Requires-Dist: pymongo; extra == "mongodb"
216
216
  Requires-Dist: numpy; extra == "mongodb"
217
217
  Requires-Dist: pandas; extra == "mongodb"
218
218
  Provides-Extra: neo4j
219
- Requires-Dist: networkx; extra == "neo4j"
220
219
  Requires-Dist: neo4j-rust-ext; extra == "neo4j"
220
+ Requires-Dist: networkx; extra == "neo4j"
221
221
  Requires-Dist: cymple; extra == "neo4j"
222
222
  Requires-Dist: numpy; extra == "neo4j"
223
223
  Requires-Dist: pandas; extra == "neo4j"
224
224
  Provides-Extra: notion
225
- Requires-Dist: httpx; extra == "notion"
225
+ Requires-Dist: backoff; extra == "notion"
226
226
  Requires-Dist: htmlBuilder; extra == "notion"
227
227
  Requires-Dist: notion-client; extra == "notion"
228
- Requires-Dist: backoff; extra == "notion"
228
+ Requires-Dist: httpx; extra == "notion"
229
229
  Requires-Dist: numpy; extra == "notion"
230
230
  Requires-Dist: pandas; extra == "notion"
231
231
  Provides-Extra: onedrive
232
- Requires-Dist: bs4; extra == "onedrive"
233
- Requires-Dist: msal; extra == "onedrive"
234
232
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
233
+ Requires-Dist: msal; extra == "onedrive"
234
+ Requires-Dist: bs4; extra == "onedrive"
235
235
  Requires-Dist: numpy; extra == "onedrive"
236
236
  Requires-Dist: pandas; extra == "onedrive"
237
237
  Provides-Extra: opensearch
@@ -239,8 +239,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
239
239
  Requires-Dist: numpy; extra == "opensearch"
240
240
  Requires-Dist: pandas; extra == "opensearch"
241
241
  Provides-Extra: outlook
242
- Requires-Dist: msal; extra == "outlook"
243
242
  Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
243
+ Requires-Dist: msal; extra == "outlook"
244
244
  Requires-Dist: numpy; extra == "outlook"
245
245
  Requires-Dist: pandas; extra == "outlook"
246
246
  Provides-Extra: pinecone
@@ -264,13 +264,13 @@ Requires-Dist: redis; extra == "redis"
264
264
  Requires-Dist: numpy; extra == "redis"
265
265
  Requires-Dist: pandas; extra == "redis"
266
266
  Provides-Extra: s3
267
- Requires-Dist: fsspec; extra == "s3"
268
267
  Requires-Dist: s3fs; extra == "s3"
268
+ Requires-Dist: fsspec; extra == "s3"
269
269
  Requires-Dist: numpy; extra == "s3"
270
270
  Requires-Dist: pandas; extra == "s3"
271
271
  Provides-Extra: sharepoint
272
- Requires-Dist: msal; extra == "sharepoint"
273
272
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
273
+ Requires-Dist: msal; extra == "sharepoint"
274
274
  Requires-Dist: numpy; extra == "sharepoint"
275
275
  Requires-Dist: pandas; extra == "sharepoint"
276
276
  Provides-Extra: salesforce
@@ -287,8 +287,8 @@ Requires-Dist: slack_sdk[optional]; extra == "slack"
287
287
  Requires-Dist: numpy; extra == "slack"
288
288
  Requires-Dist: pandas; extra == "slack"
289
289
  Provides-Extra: snowflake
290
- Requires-Dist: psycopg2-binary; extra == "snowflake"
291
290
  Requires-Dist: snowflake-connector-python; extra == "snowflake"
291
+ Requires-Dist: psycopg2-binary; extra == "snowflake"
292
292
  Requires-Dist: numpy; extra == "snowflake"
293
293
  Requires-Dist: pandas; extra == "snowflake"
294
294
  Provides-Extra: wikipedia
@@ -312,21 +312,21 @@ Requires-Dist: singlestoredb; extra == "singlestore"
312
312
  Requires-Dist: numpy; extra == "singlestore"
313
313
  Requires-Dist: pandas; extra == "singlestore"
314
314
  Provides-Extra: vectara
315
- Requires-Dist: httpx; extra == "vectara"
316
315
  Requires-Dist: aiofiles; extra == "vectara"
317
316
  Requires-Dist: requests; extra == "vectara"
317
+ Requires-Dist: httpx; extra == "vectara"
318
318
  Requires-Dist: numpy; extra == "vectara"
319
319
  Requires-Dist: pandas; extra == "vectara"
320
320
  Provides-Extra: vastdb
321
- Requires-Dist: ibis; extra == "vastdb"
322
- Requires-Dist: pyarrow; extra == "vastdb"
323
321
  Requires-Dist: vastdb; extra == "vastdb"
322
+ Requires-Dist: pyarrow; extra == "vastdb"
323
+ Requires-Dist: ibis; extra == "vastdb"
324
324
  Requires-Dist: numpy; extra == "vastdb"
325
325
  Requires-Dist: pandas; extra == "vastdb"
326
326
  Provides-Extra: zendesk
327
327
  Requires-Dist: bs4; extra == "zendesk"
328
- Requires-Dist: httpx; extra == "zendesk"
329
328
  Requires-Dist: aiofiles; extra == "zendesk"
329
+ Requires-Dist: httpx; extra == "zendesk"
330
330
  Requires-Dist: numpy; extra == "zendesk"
331
331
  Requires-Dist: pandas; extra == "zendesk"
332
332
  Provides-Extra: embed-huggingface
@@ -334,8 +334,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
334
334
  Requires-Dist: numpy; extra == "embed-huggingface"
335
335
  Requires-Dist: pandas; extra == "embed-huggingface"
336
336
  Provides-Extra: embed-octoai
337
- Requires-Dist: tiktoken; extra == "embed-octoai"
338
337
  Requires-Dist: openai; extra == "embed-octoai"
338
+ Requires-Dist: tiktoken; extra == "embed-octoai"
339
339
  Requires-Dist: numpy; extra == "embed-octoai"
340
340
  Requires-Dist: pandas; extra == "embed-octoai"
341
341
  Provides-Extra: embed-vertexai
@@ -351,13 +351,13 @@ Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
351
351
  Requires-Dist: numpy; extra == "embed-mixedbreadai"
352
352
  Requires-Dist: pandas; extra == "embed-mixedbreadai"
353
353
  Provides-Extra: openai
354
- Requires-Dist: tiktoken; extra == "openai"
355
354
  Requires-Dist: openai; extra == "openai"
355
+ Requires-Dist: tiktoken; extra == "openai"
356
356
  Requires-Dist: numpy; extra == "openai"
357
357
  Requires-Dist: pandas; extra == "openai"
358
358
  Provides-Extra: bedrock
359
- Requires-Dist: aioboto3; extra == "bedrock"
360
359
  Requires-Dist: boto3; extra == "bedrock"
360
+ Requires-Dist: aioboto3; extra == "bedrock"
361
361
  Requires-Dist: numpy; extra == "bedrock"
362
362
  Requires-Dist: pandas; extra == "bedrock"
363
363
  Provides-Extra: togetherai
@@ -113,7 +113,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
113
113
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
115
115
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
116
- unstructured_ingest/__version__.py,sha256=b5BrQJjlBZoPiM_J1cJDbJABGvcwaDFb_Bvwb0AHN10,43
116
+ unstructured_ingest/__version__.py,sha256=zwHqD3LgpFA-cY-rrS_2n5Kv-NY8b8mUJsGrAiSe2kA,43
117
117
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
118
118
  unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
119
119
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -398,10 +398,10 @@ unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdj
398
398
  unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
399
399
  unstructured_ingest/v2/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFBIEBe4A_n7mH4,7827
400
400
  unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1KRxlwITNW1xl1YxMAG8BcTk0,7604
401
- unstructured_ingest/v2/interfaces/__init__.py,sha256=Xp7-345QpM6MG7V7G4ZrVERjADAUBiPAY88PKaMRyqY,1005
401
+ unstructured_ingest/v2/interfaces/__init__.py,sha256=Jn5qtWOnmBZzsb2PoQYN3Xj5xHa9thSVc0BEoIN0Pw0,1059
402
402
  unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
403
403
  unstructured_ingest/v2/interfaces/downloader.py,sha256=Qi_wISgUACZKEPu5p1kUaG3uiCXcr3zWg9z9uRDwoOk,2927
404
- unstructured_ingest/v2/interfaces/file_data.py,sha256=kowOhvYy0q_-khX3IuR111AfjkdQezEfxjzK6QDH7oA,3836
404
+ unstructured_ingest/v2/interfaces/file_data.py,sha256=DQYzXr8yjlm6VkGuwQLGJ1sia4Gr0d__POAFLrow1PE,525
405
405
  unstructured_ingest/v2/interfaces/indexer.py,sha256=i0oftyifXefxfKa4a3sCfSwkzWGSPE6EvC9sg6fwZgk,833
406
406
  unstructured_ingest/v2/interfaces/process.py,sha256=S3A_9gkwwGC-iQxvnpj3Er6IJAjAT5npzpSgxuFAzUM,449
407
407
  unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
@@ -468,8 +468,8 @@ unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=o3J81DnSwt3lmA
468
468
  unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=NIo2CCiPiuTFotNC891Mbelzg01knItryYGUtOM96xg,4393
469
469
  unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=RW-Cw94Hs3ZsN8Kb4ciSh_N-Qkp0cqkw_xkJbt8CDNU,4656
470
470
  unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
471
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=MEKU64OsiQmbLPb3ken-WWCIV6-pnFbs_6kjJweG-SY,18813
472
- unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
471
+ unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=KmlQCA7LXppxhL9e27LBBqNT999nUcc39qe2IkZsUJ8,18988
472
+ unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=tzOV0eNMyVHMXE5nedp6u0yyWC0Gn_blklg2ZdoOa4c,6956
473
473
  unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
474
474
  unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=kw0UfGI2fx3oQ8jVpzF45pH8Qg_QP_que5C_VXgnktc,7156
475
475
  unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=aJCtCHRBAauLwdWEQe704Cm4UHv-ukTXV2bT3SBENVk,5881
@@ -581,9 +581,11 @@ unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=DDAYQB7catK
581
581
  unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=R8SXYkRhVUoWEHdGCt2CzcTxxuFundw_0GlGZ34YmbM,8987
582
582
  unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
583
583
  unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=EWvK4HRYubr9i1UyMhv5cU9u0UzVkCDC_BIm4Uxab7Y,964
584
- unstructured_ingest-0.5.21.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
585
- unstructured_ingest-0.5.21.dist-info/METADATA,sha256=c1bUHvgG6X9QOiAD669sVHAFkGfI2tBTRBM-eRJBLiU,14999
586
- unstructured_ingest-0.5.21.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
587
- unstructured_ingest-0.5.21.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
588
- unstructured_ingest-0.5.21.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
589
- unstructured_ingest-0.5.21.dist-info/RECORD,,
584
+ unstructured_ingest/v2/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
585
+ unstructured_ingest/v2/types/file_data.py,sha256=kowOhvYy0q_-khX3IuR111AfjkdQezEfxjzK6QDH7oA,3836
586
+ unstructured_ingest-0.5.23.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
587
+ unstructured_ingest-0.5.23.dist-info/METADATA,sha256=yEHUhxSR1EF-2IoXViunb9iiNlEy9p0LgMTngzwtjLM,14999
588
+ unstructured_ingest-0.5.23.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
589
+ unstructured_ingest-0.5.23.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
590
+ unstructured_ingest-0.5.23.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
591
+ unstructured_ingest-0.5.23.dist-info/RECORD,,