unstructured-ingest 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/cli/utils/model_conversion.py +17 -1
- unstructured_ingest/v2/interfaces/file_data.py +1 -0
- unstructured_ingest/v2/pipeline/steps/download.py +4 -7
- unstructured_ingest/v2/pipeline/steps/uncompress.py +3 -16
- unstructured_ingest/v2/processes/connectors/chroma.py +7 -6
- unstructured_ingest/v2/processes/connectors/google_drive.py +6 -5
- unstructured_ingest/v2/processes/connectors/milvus.py +30 -13
- unstructured_ingest/v2/processes/connectors/opensearch.py +29 -28
- unstructured_ingest/v2/processes/connectors/utils.py +11 -1
- unstructured_ingest/v2/processes/uncompress.py +25 -7
- {unstructured_ingest-0.0.7.dist-info → unstructured_ingest-0.0.9.dist-info}/METADATA +314 -314
- {unstructured_ingest-0.0.7.dist-info → unstructured_ingest-0.0.9.dist-info}/RECORD +17 -17
- {unstructured_ingest-0.0.7.dist-info → unstructured_ingest-0.0.9.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.0.7.dist-info → unstructured_ingest-0.0.9.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.0.7.dist-info → unstructured_ingest-0.0.9.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.0.7.dist-info → unstructured_ingest-0.0.9.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.9" # pragma: no cover
|
|
@@ -3,7 +3,18 @@ import datetime
|
|
|
3
3
|
from collections import Counter
|
|
4
4
|
from enum import EnumMeta
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import (
|
|
7
|
+
Annotated,
|
|
8
|
+
Any,
|
|
9
|
+
Callable,
|
|
10
|
+
Literal,
|
|
11
|
+
Optional,
|
|
12
|
+
Type,
|
|
13
|
+
TypedDict,
|
|
14
|
+
Union,
|
|
15
|
+
get_args,
|
|
16
|
+
get_origin,
|
|
17
|
+
)
|
|
7
18
|
from uuid import UUID
|
|
8
19
|
|
|
9
20
|
import click
|
|
@@ -102,6 +113,11 @@ def get_type_from_annotation(field_type: Any) -> click.ParamType:
|
|
|
102
113
|
if field_origin is Union and len(field_args) == 2 and NoneType in field_args:
|
|
103
114
|
field_type = next(field_arg for field_arg in field_args if field_arg is not None)
|
|
104
115
|
return get_type_from_annotation(field_type=field_type)
|
|
116
|
+
if field_origin is Annotated:
|
|
117
|
+
field_origin = field_args[0]
|
|
118
|
+
field_metadata = field_args[1]
|
|
119
|
+
if isinstance(field_metadata, click.ParamType):
|
|
120
|
+
return field_metadata
|
|
105
121
|
if field_origin is Secret and len(field_args) == 1:
|
|
106
122
|
field_type = next(field_arg for field_arg in field_args if field_arg is not None)
|
|
107
123
|
return get_type_from_annotation(field_type=field_type)
|
|
@@ -42,6 +42,7 @@ class FileData(DataClassJsonMixin):
|
|
|
42
42
|
metadata: FileDataSourceMetadata = field(default_factory=lambda: FileDataSourceMetadata())
|
|
43
43
|
additional_metadata: dict[str, Any] = field(default_factory=dict)
|
|
44
44
|
reprocess: bool = False
|
|
45
|
+
local_download_path: Optional[str] = None
|
|
45
46
|
|
|
46
47
|
@classmethod
|
|
47
48
|
def from_file(cls, path: str) -> "FileData":
|
|
@@ -68,10 +68,9 @@ class DownloadStep(PipelineStep):
|
|
|
68
68
|
def update_file_data(
|
|
69
69
|
self, file_data: FileData, file_data_path: Path, download_path: Path
|
|
70
70
|
) -> None:
|
|
71
|
+
file_data.local_download_path = str(download_path.resolve())
|
|
71
72
|
file_size_bytes = download_path.stat().st_size
|
|
72
|
-
changed = False
|
|
73
73
|
if not file_data.metadata.filesize_bytes and file_size_bytes:
|
|
74
|
-
changed = True
|
|
75
74
|
file_data.metadata.filesize_bytes = file_size_bytes
|
|
76
75
|
if (
|
|
77
76
|
file_data.metadata.filesize_bytes
|
|
@@ -82,12 +81,10 @@ class DownloadStep(PipelineStep):
|
|
|
82
81
|
f"({file_data.metadata.filesize_bytes}) doesn't "
|
|
83
82
|
f"match size of local file: {file_size_bytes}, updating"
|
|
84
83
|
)
|
|
85
|
-
changed = True
|
|
86
84
|
file_data.metadata.filesize_bytes = file_size_bytes
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
json.dump(file_data.to_dict(), file, indent=2)
|
|
85
|
+
logger.debug(f"Updating file data with new content: {file_data.to_dict()}")
|
|
86
|
+
with file_data_path.open("w") as file:
|
|
87
|
+
json.dump(file_data.to_dict(), file, indent=2)
|
|
91
88
|
|
|
92
89
|
async def _run_async(self, fn: Callable, file_data_path: str) -> list[DownloadStepResponse]:
|
|
93
90
|
file_data = FileData.from_file(path=file_data_path)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
from dataclasses import dataclass
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Callable, TypedDict
|
|
4
5
|
|
|
@@ -15,6 +16,7 @@ class UncompressStepResponse(TypedDict):
|
|
|
15
16
|
path: str
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
@dataclass
|
|
18
20
|
class UncompressStep(PipelineStep):
|
|
19
21
|
process: Uncompressor
|
|
20
22
|
identifier: str = STEP_ID
|
|
@@ -23,21 +25,6 @@ class UncompressStep(PipelineStep):
|
|
|
23
25
|
config = self.process.config.json() if self.process.config else None
|
|
24
26
|
logger.info(f"Created {self.identifier} with configs: {config}")
|
|
25
27
|
|
|
26
|
-
def _run(self, path: str, file_data_path: str) -> list[UncompressStepResponse]:
|
|
27
|
-
file_data = FileData.from_file(path=file_data_path)
|
|
28
|
-
new_file_data = self.process.run(file_data=file_data)
|
|
29
|
-
responses = []
|
|
30
|
-
for new_file in new_file_data:
|
|
31
|
-
new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json"
|
|
32
|
-
new_file.to_file(path=str(new_file_data_path.resolve()))
|
|
33
|
-
responses.append(
|
|
34
|
-
UncompressStepResponse(
|
|
35
|
-
path=new_file.source_identifiers.fullpath,
|
|
36
|
-
file_data_path=str(new_file_data_path),
|
|
37
|
-
)
|
|
38
|
-
)
|
|
39
|
-
return responses
|
|
40
|
-
|
|
41
28
|
async def _run_async(
|
|
42
29
|
self, fn: Callable, path: str, file_data_path: str
|
|
43
30
|
) -> list[UncompressStepResponse]:
|
|
@@ -56,7 +43,7 @@ class UncompressStep(PipelineStep):
|
|
|
56
43
|
new_file.to_file(path=str(new_file_data_path.resolve()))
|
|
57
44
|
responses.append(
|
|
58
45
|
UncompressStepResponse(
|
|
59
|
-
path=new_file.
|
|
46
|
+
path=new_file.local_download_path,
|
|
60
47
|
file_data_path=str(new_file_data_path),
|
|
61
48
|
)
|
|
62
49
|
)
|
|
@@ -3,10 +3,11 @@ import uuid
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from datetime import date, datetime
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
6
|
+
from typing import TYPE_CHECKING, Annotated, Any, Optional
|
|
7
7
|
|
|
8
8
|
from dateutil import parser
|
|
9
9
|
from pydantic import Field, Secret
|
|
10
|
+
from pydantic.functional_validators import BeforeValidator
|
|
10
11
|
|
|
11
12
|
from unstructured_ingest.error import DestinationConnectionError
|
|
12
13
|
from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
|
|
@@ -21,9 +22,9 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
21
22
|
UploadStagerConfig,
|
|
22
23
|
)
|
|
23
24
|
from unstructured_ingest.v2.logger import logger
|
|
24
|
-
from unstructured_ingest.v2.processes.connector_registry import
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
26
|
+
|
|
27
|
+
from .utils import conform_string_to_dict
|
|
27
28
|
|
|
28
29
|
if TYPE_CHECKING:
|
|
29
30
|
from chromadb import Client
|
|
@@ -32,10 +33,10 @@ CONNECTOR_TYPE = "chroma"
|
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
class ChromaAccessConfig(AccessConfig):
|
|
35
|
-
settings: Optional[dict
|
|
36
|
+
settings: Optional[Annotated[dict, BeforeValidator(conform_string_to_dict)]] = Field(
|
|
36
37
|
default=None, description="A dictionary of settings to communicate with the chroma server."
|
|
37
38
|
)
|
|
38
|
-
headers: Optional[dict
|
|
39
|
+
headers: Optional[Annotated[dict, BeforeValidator(conform_string_to_dict)]] = Field(
|
|
39
40
|
default=None, description="A dictionary of headers to send to the Chroma server."
|
|
40
41
|
)
|
|
41
42
|
|
|
@@ -2,10 +2,11 @@ import io
|
|
|
2
2
|
import json
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
5
|
+
from typing import TYPE_CHECKING, Annotated, Any, Generator, Optional
|
|
6
6
|
|
|
7
7
|
from dateutil import parser
|
|
8
8
|
from pydantic import Field, Secret
|
|
9
|
+
from pydantic.functional_validators import BeforeValidator
|
|
9
10
|
|
|
10
11
|
from unstructured_ingest.error import (
|
|
11
12
|
SourceConnectionError,
|
|
@@ -26,9 +27,9 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
26
27
|
download_responses,
|
|
27
28
|
)
|
|
28
29
|
from unstructured_ingest.v2.logger import logger
|
|
29
|
-
from unstructured_ingest.v2.processes.connector_registry import
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
|
|
31
|
+
|
|
32
|
+
from .utils import conform_string_to_dict
|
|
32
33
|
|
|
33
34
|
CONNECTOR_TYPE = "google_drive"
|
|
34
35
|
|
|
@@ -38,7 +39,7 @@ if TYPE_CHECKING:
|
|
|
38
39
|
|
|
39
40
|
|
|
40
41
|
class GoogleDriveAccessConfig(AccessConfig):
|
|
41
|
-
service_account_key: Optional[dict] = Field(
|
|
42
|
+
service_account_key: Optional[Annotated[dict, BeforeValidator(conform_string_to_dict)]] = Field(
|
|
42
43
|
default=None, description="Credentials values to use for authentication"
|
|
43
44
|
)
|
|
44
45
|
service_account_key_path: Optional[Path] = Field(
|
|
@@ -67,7 +67,15 @@ class MilvusConnectionConfig(ConnectionConfig):
|
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
class MilvusUploadStagerConfig(UploadStagerConfig):
|
|
70
|
-
|
|
70
|
+
|
|
71
|
+
fields_to_include: Optional[list[str]] = None
|
|
72
|
+
"""If set - list of fields to include in the output.
|
|
73
|
+
Unspecified fields are removed from the elements.
|
|
74
|
+
This action takse place after metadata flattening.
|
|
75
|
+
Missing fields will cause stager to throw KeyError."""
|
|
76
|
+
|
|
77
|
+
flatten_metadata: bool = True
|
|
78
|
+
"""If set - flatten "metadata" key and put contents directly into data"""
|
|
71
79
|
|
|
72
80
|
|
|
73
81
|
@dataclass
|
|
@@ -85,8 +93,26 @@ class MilvusUploadStager(UploadStager):
|
|
|
85
93
|
pass
|
|
86
94
|
return parser.parse(date_string).timestamp()
|
|
87
95
|
|
|
88
|
-
|
|
89
|
-
|
|
96
|
+
def conform_dict(self, data: dict) -> None:
|
|
97
|
+
if self.upload_stager_config.flatten_metadata and (metadata := data.pop("metadata", None)):
|
|
98
|
+
data.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"]))
|
|
99
|
+
|
|
100
|
+
# TODO: milvus sdk doesn't seem to support defaults via the schema yet,
|
|
101
|
+
# remove once that gets updated
|
|
102
|
+
defaults = {"is_continuation": False}
|
|
103
|
+
for default in defaults:
|
|
104
|
+
if default not in data:
|
|
105
|
+
data[default] = defaults[default]
|
|
106
|
+
|
|
107
|
+
if self.upload_stager_config.fields_to_include:
|
|
108
|
+
data_keys = set(data.keys())
|
|
109
|
+
for data_key in data_keys:
|
|
110
|
+
if data_key not in self.upload_stager_config.fields_to_include:
|
|
111
|
+
data.pop(data_key)
|
|
112
|
+
for field_include_key in self.upload_stager_config.fields_to_include:
|
|
113
|
+
if field_include_key not in data:
|
|
114
|
+
raise KeyError(f"Field '{field_include_key}' is missing in data!")
|
|
115
|
+
|
|
90
116
|
datetime_columns = [
|
|
91
117
|
"data_source_date_created",
|
|
92
118
|
"data_source_date_modified",
|
|
@@ -96,21 +122,12 @@ class MilvusUploadStager(UploadStager):
|
|
|
96
122
|
|
|
97
123
|
json_dumps_fields = ["languages", "data_source_permissions_data"]
|
|
98
124
|
|
|
99
|
-
# TODO: milvus sdk doesn't seem to support defaults via the schema yet,
|
|
100
|
-
# remove once that gets updated
|
|
101
|
-
defaults = {"is_continuation": False}
|
|
102
|
-
|
|
103
|
-
if metadata := data.pop("metadata", None):
|
|
104
|
-
data.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"]))
|
|
105
125
|
for datetime_column in datetime_columns:
|
|
106
126
|
if datetime_column in data:
|
|
107
|
-
data[datetime_column] =
|
|
127
|
+
data[datetime_column] = self.parse_date_string(data[datetime_column])
|
|
108
128
|
for json_dumps_field in json_dumps_fields:
|
|
109
129
|
if json_dumps_field in data:
|
|
110
130
|
data[json_dumps_field] = json.dumps(data[json_dumps_field])
|
|
111
|
-
for default in defaults:
|
|
112
|
-
if default not in data:
|
|
113
|
-
data[default] = defaults[default]
|
|
114
131
|
|
|
115
132
|
def run(
|
|
116
133
|
self,
|
|
@@ -39,22 +39,6 @@ heavily on the Elasticsearch connector code, inheriting the functionality as muc
|
|
|
39
39
|
|
|
40
40
|
class OpenSearchAccessConfig(AccessConfig):
|
|
41
41
|
password: Optional[str] = Field(default=None, description="password when using basic auth")
|
|
42
|
-
use_ssl: bool = Field(default=False, description="use ssl for the connection")
|
|
43
|
-
verify_certs: bool = Field(default=False, description="whether to verify SSL certificates")
|
|
44
|
-
ssl_show_warn: bool = Field(
|
|
45
|
-
default=False, description="show warning when verify certs is disabled"
|
|
46
|
-
)
|
|
47
|
-
ca_certs: Optional[Path] = Field(default=None, description="path to CA bundle")
|
|
48
|
-
client_cert: Optional[Path] = Field(
|
|
49
|
-
default=None,
|
|
50
|
-
description="path to the file containing the private key and the certificate,"
|
|
51
|
-
" or cert only if using client_key",
|
|
52
|
-
)
|
|
53
|
-
client_key: Optional[Path] = Field(
|
|
54
|
-
default=None,
|
|
55
|
-
description="path to the file containing the private key"
|
|
56
|
-
" if using separate cert and key files",
|
|
57
|
-
)
|
|
58
42
|
|
|
59
43
|
|
|
60
44
|
class OpenSearchClientInput(BaseModel):
|
|
@@ -75,6 +59,23 @@ class OpenSearchConnectionConfig(ConnectionConfig):
|
|
|
75
59
|
examples=["http://localhost:9200"],
|
|
76
60
|
)
|
|
77
61
|
username: Optional[str] = Field(default=None, description="username when using basic auth")
|
|
62
|
+
use_ssl: bool = Field(default=False, description="use ssl for the connection")
|
|
63
|
+
verify_certs: bool = Field(default=False, description="whether to verify SSL certificates")
|
|
64
|
+
ssl_show_warn: bool = Field(
|
|
65
|
+
default=False, description="show warning when verify certs is disabled"
|
|
66
|
+
)
|
|
67
|
+
ca_certs: Optional[Path] = Field(default=None, description="path to CA bundle")
|
|
68
|
+
client_cert: Optional[Path] = Field(
|
|
69
|
+
default=None,
|
|
70
|
+
description="path to the file containing the private key and the certificate,"
|
|
71
|
+
" or cert only if using client_key",
|
|
72
|
+
)
|
|
73
|
+
client_key: Optional[Path] = Field(
|
|
74
|
+
default=None,
|
|
75
|
+
description="path to the file containing the private key"
|
|
76
|
+
" if using separate cert and key files",
|
|
77
|
+
)
|
|
78
|
+
|
|
78
79
|
access_config: Secret[OpenSearchAccessConfig]
|
|
79
80
|
|
|
80
81
|
def get_client_kwargs(self) -> dict:
|
|
@@ -85,18 +86,18 @@ class OpenSearchConnectionConfig(ConnectionConfig):
|
|
|
85
86
|
client_input_kwargs = {}
|
|
86
87
|
if self.hosts:
|
|
87
88
|
client_input_kwargs["hosts"] = self.hosts
|
|
88
|
-
if
|
|
89
|
-
client_input_kwargs["use_ssl"] =
|
|
90
|
-
if
|
|
91
|
-
client_input_kwargs["verify_certs"] =
|
|
92
|
-
if
|
|
93
|
-
client_input_kwargs["ssl_show_warn"] =
|
|
94
|
-
if
|
|
95
|
-
client_input_kwargs["ca_certs"] = str(
|
|
96
|
-
if
|
|
97
|
-
client_input_kwargs["client_cert"] = str(
|
|
98
|
-
if
|
|
99
|
-
client_input_kwargs["client_key"] = str(
|
|
89
|
+
if self.use_ssl:
|
|
90
|
+
client_input_kwargs["use_ssl"] = self.use_ssl
|
|
91
|
+
if self.verify_certs:
|
|
92
|
+
client_input_kwargs["verify_certs"] = self.verify_certs
|
|
93
|
+
if self.ssl_show_warn:
|
|
94
|
+
client_input_kwargs["ssl_show_warn"] = self.ssl_show_warn
|
|
95
|
+
if self.ca_certs:
|
|
96
|
+
client_input_kwargs["ca_certs"] = str(self.ca_certs)
|
|
97
|
+
if self.client_cert:
|
|
98
|
+
client_input_kwargs["client_cert"] = str(self.client_cert)
|
|
99
|
+
if self.client_key:
|
|
100
|
+
client_input_kwargs["client_key"] = str(self.client_key)
|
|
100
101
|
if self.username and access_config.password:
|
|
101
102
|
client_input_kwargs["http_auth"] = (self.username, access_config.password)
|
|
102
103
|
client_input = OpenSearchClientInput(**client_input_kwargs)
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from datetime import datetime
|
|
2
|
-
from typing import Union
|
|
3
|
+
from typing import Any, Union
|
|
3
4
|
|
|
4
5
|
from dateutil import parser
|
|
6
|
+
from pydantic import ValidationError
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
def parse_datetime(date_value: Union[int, str, float, datetime]) -> datetime:
|
|
@@ -17,3 +19,11 @@ def parse_datetime(date_value: Union[int, str, float, datetime]) -> datetime:
|
|
|
17
19
|
return datetime.fromtimestamp(timestamp)
|
|
18
20
|
except ValueError:
|
|
19
21
|
return parser.parse(date_value)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def conform_string_to_dict(value: Any) -> dict:
|
|
25
|
+
if isinstance(value, dict):
|
|
26
|
+
return value
|
|
27
|
+
if isinstance(value, str):
|
|
28
|
+
return json.loads(value)
|
|
29
|
+
raise ValidationError(f"Input could not be mapped to a valid dict: {value}")
|
|
@@ -3,12 +3,14 @@ from copy import copy
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Any
|
|
6
|
+
from uuid import NAMESPACE_DNS, uuid5
|
|
6
7
|
|
|
7
8
|
from pydantic import BaseModel
|
|
8
9
|
|
|
9
10
|
from unstructured_ingest.utils.compression import TAR_FILE_EXT, ZIP_FILE_EXT, uncompress_file
|
|
10
|
-
from unstructured_ingest.v2.interfaces import FileData
|
|
11
|
+
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
11
12
|
from unstructured_ingest.v2.interfaces.process import BaseProcess
|
|
13
|
+
from unstructured_ingest.v2.logger import logger
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class UncompressConfig(BaseModel):
|
|
@@ -23,19 +25,35 @@ class Uncompressor(BaseProcess, ABC):
|
|
|
23
25
|
return True
|
|
24
26
|
|
|
25
27
|
def run(self, file_data: FileData, **kwargs: Any) -> list[FileData]:
|
|
26
|
-
local_filepath = Path(file_data.
|
|
28
|
+
local_filepath = Path(file_data.local_download_path)
|
|
27
29
|
if local_filepath.suffix not in TAR_FILE_EXT + ZIP_FILE_EXT:
|
|
28
30
|
return [file_data]
|
|
29
31
|
new_path = uncompress_file(filename=str(local_filepath))
|
|
30
32
|
new_files = [i for i in Path(new_path).rglob("*") if i.is_file()]
|
|
31
33
|
responses = []
|
|
34
|
+
logger.debug(
|
|
35
|
+
"uncompressed {} files from original file {}: {}".format(
|
|
36
|
+
len(new_files), local_filepath, ", ".join([str(f) for f in new_files])
|
|
37
|
+
)
|
|
38
|
+
)
|
|
32
39
|
for f in new_files:
|
|
33
40
|
new_file_data = copy(file_data)
|
|
34
|
-
new_file_data.
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
new_file_data.identifier = str(uuid5(NAMESPACE_DNS, str(f)))
|
|
42
|
+
new_file_data.local_download_path = str(f.resolve())
|
|
43
|
+
new_rel_download_path = str(f).replace(str(Path(local_filepath.parent)), "")[1:]
|
|
44
|
+
new_file_data.source_identifiers = SourceIdentifiers(
|
|
45
|
+
filename=f.name,
|
|
46
|
+
fullpath=file_data.source_identifiers.fullpath.replace(
|
|
47
|
+
file_data.source_identifiers.filename, new_rel_download_path
|
|
48
|
+
),
|
|
49
|
+
rel_path=(
|
|
50
|
+
file_data.source_identifiers.rel_path.replace(
|
|
51
|
+
file_data.source_identifiers.filename, new_rel_download_path
|
|
52
|
+
)
|
|
53
|
+
if file_data.source_identifiers.rel_path
|
|
54
|
+
else None
|
|
55
|
+
),
|
|
56
|
+
)
|
|
39
57
|
responses.append(new_file_data)
|
|
40
58
|
return responses
|
|
41
59
|
|