unstructured-ingest 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "1.2.1" # pragma: no cover
1
+ __version__ = "1.2.2" # pragma: no cover
@@ -36,9 +36,11 @@ class Downloader(BaseProcess, BaseConnector, ABC):
36
36
  def get_download_path(self, file_data: FileData) -> Optional[Path]:
37
37
  if not file_data.source_identifiers:
38
38
  return None
39
+
39
40
  rel_path = file_data.source_identifiers.relative_path
40
41
  if not rel_path:
41
42
  return None
43
+
42
44
  rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
43
45
  return self.download_dir / Path(rel_path)
44
46
 
@@ -264,12 +264,31 @@ FsspecDownloaderConfigT = TypeVar("FsspecDownloaderConfigT", bound=FsspecDownloa
264
264
 
265
265
  @dataclass
266
266
  class FsspecDownloader(Downloader):
267
+ TEMP_DIR_PREFIX = "unstructured_"
268
+
267
269
  protocol: str
268
270
  connection_config: FsspecConnectionConfigT
269
271
  connector_type: str = CONNECTOR_TYPE
270
272
  download_config: Optional[FsspecDownloaderConfigT] = field(
271
273
  default_factory=lambda: FsspecDownloaderConfig()
272
274
  )
275
+
276
+ def get_download_path(self, file_data: FileData) -> Optional[Path]:
277
+ has_source_identifiers = file_data.source_identifiers is not None
278
+ has_filename = has_source_identifiers and file_data.source_identifiers.filename
279
+
280
+ if not (has_source_identifiers and has_filename):
281
+ return None
282
+
283
+ filename = file_data.source_identifiers.filename
284
+
285
+ mkdir_concurrent_safe(self.download_dir)
286
+
287
+ temp_dir = tempfile.mkdtemp(
288
+ prefix=self.TEMP_DIR_PREFIX,
289
+ dir=self.download_dir
290
+ )
291
+ return Path(temp_dir) / filename
273
292
 
274
293
  def is_async(self) -> bool:
275
294
  with self.connection_config.get_client(protocol=self.protocol) as client:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.2.1
3
+ Version: 1.2.2
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=r2tk3QlR-3IlKjbYsMIts7ynZhCAyO4v-dzzULrlUCM,42
2
+ unstructured_ingest/__version__.py,sha256=HPy7TMxiKrkQS-Rrw57HuZN3ZHBCTvYH8fjgFH1cXxs,41
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -34,7 +34,7 @@ unstructured_ingest/embed/vertexai.py,sha256=DphvPhiYdXTMrQxJCd-64vMs4iVdLY_BphH
34
34
  unstructured_ingest/embed/voyageai.py,sha256=EOrYzaoXOZ6C4fNkMlCgb8KA8rdfgVXN3USMFpnn0Bs,4698
35
35
  unstructured_ingest/interfaces/__init__.py,sha256=QIkWqjsq9INTa89gPuXlMlQL4s3y5TqLmPkuVuTyXcs,795
36
36
  unstructured_ingest/interfaces/connector.py,sha256=wYWIEAL99KdQDDzzDYSf_yE8p1wjThSPMgEV5qyfiPc,1885
37
- unstructured_ingest/interfaces/downloader.py,sha256=xX0ZzsFRSzZb7SAeoeQph8sIbVq13DRw-3MYkdADrY0,2918
37
+ unstructured_ingest/interfaces/downloader.py,sha256=7pJ4wpWrP645lgTx9dO0rni8chiCAjsnOFaXRtJe8IY,2936
38
38
  unstructured_ingest/interfaces/indexer.py,sha256=c2FwWJEQHfFD6vO-tGfYLpLiIs-TYViLAt8YmHfDbaM,824
39
39
  unstructured_ingest/interfaces/process.py,sha256=S3A_9gkwwGC-iQxvnpj3Er6IJAjAT5npzpSgxuFAzUM,449
40
40
  unstructured_ingest/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
@@ -109,7 +109,7 @@ unstructured_ingest/processes/connectors/fsspec/__init__.py,sha256=3HTdw4L4mdN4W
109
109
  unstructured_ingest/processes/connectors/fsspec/azure.py,sha256=31VNiG5YnXfhrFX7QJ2O1ubeWHxbe1sYVIztefbscAQ,7148
110
110
  unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4ZpI1fKTsJuIDfXuAzx_a4FzxG4,5873
111
111
  unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
112
- unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=fA9jtXnr1P4wr8VBpZ1Lx9TsZzH-FDqHoBvPUH0DnWk,17827
112
+ unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=yIvaII_uQ6ANibyj9aysM6c7fg5vUuL2eccLb51LhWk,18497
113
113
  unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
114
114
  unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=P5nd3hamhLFO3l5nV3lMuIxHtb_rZYFP4F6q_py3xpc,7492
115
115
  unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
@@ -235,8 +235,8 @@ unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3r
235
235
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
236
236
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
237
237
  unstructured_ingest/utils/tls.py,sha256=Ra8Mii1F4VqErRreg76PBI0eAqPBC009l0sSHa8FdnA,448
238
- unstructured_ingest-1.2.1.dist-info/METADATA,sha256=dABn7DHmV7FLVs7oG3G_ltcDE5OSERhhZdGfJAoDtN4,8826
239
- unstructured_ingest-1.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
240
- unstructured_ingest-1.2.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
241
- unstructured_ingest-1.2.1.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
242
- unstructured_ingest-1.2.1.dist-info/RECORD,,
238
+ unstructured_ingest-1.2.2.dist-info/METADATA,sha256=kLg62BHEhhU0BK_73Qc0XqsKtrf5XN3pzD40eGXW3xM,8826
239
+ unstructured_ingest-1.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
240
+ unstructured_ingest-1.2.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
241
+ unstructured_ingest-1.2.2.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
242
+ unstructured_ingest-1.2.2.dist-info/RECORD,,