truefoundry 0.5.3rc4__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truefoundry might be problematic. Click here for more details.
- truefoundry/__init__.py +10 -1
- truefoundry/autodeploy/cli.py +2 -2
- truefoundry/cli/__main__.py +0 -4
- truefoundry/cli/util.py +12 -3
- truefoundry/common/auth_service_client.py +7 -4
- truefoundry/common/constants.py +3 -1
- truefoundry/common/credential_provider.py +7 -8
- truefoundry/common/exceptions.py +11 -7
- truefoundry/common/request_utils.py +96 -14
- truefoundry/common/servicefoundry_client.py +31 -29
- truefoundry/common/session.py +93 -0
- truefoundry/common/storage_provider_utils.py +331 -0
- truefoundry/common/utils.py +9 -9
- truefoundry/common/warnings.py +21 -0
- truefoundry/deploy/builder/builders/tfy_python_buildpack/dockerfile_template.py +8 -20
- truefoundry/deploy/cli/commands/deploy_command.py +4 -4
- truefoundry/deploy/lib/clients/servicefoundry_client.py +14 -38
- truefoundry/deploy/lib/dao/application.py +2 -2
- truefoundry/deploy/lib/dao/workspace.py +1 -1
- truefoundry/deploy/lib/session.py +8 -1
- truefoundry/deploy/v2/lib/deploy.py +2 -2
- truefoundry/deploy/v2/lib/deploy_workflow.py +1 -1
- truefoundry/deploy/v2/lib/patched_models.py +70 -4
- truefoundry/deploy/v2/lib/source.py +2 -1
- truefoundry/ml/artifact/truefoundry_artifact_repo.py +33 -297
- truefoundry/ml/autogen/client/__init__.py +2 -2
- truefoundry/ml/autogen/client/api/mlfoundry_artifacts_api.py +18 -16
- truefoundry/ml/autogen/client/models/__init__.py +2 -2
- truefoundry/ml/autogen/client/models/artifact_version_manifest.py +23 -5
- truefoundry/ml/autogen/client/models/{get_artifact_tags_response_dto.py → get_artifact_version_aliases_response_dto.py} +12 -10
- truefoundry/ml/autogen/client/models/model_version_manifest.py +16 -5
- truefoundry/ml/autogen/client_README.md +2 -2
- truefoundry/ml/autogen/entities/artifacts.py +4 -9
- truefoundry/ml/clients/servicefoundry_client.py +36 -15
- truefoundry/ml/exceptions.py +2 -1
- truefoundry/ml/log_types/artifacts/artifact.py +16 -15
- truefoundry/ml/log_types/artifacts/model.py +20 -19
- truefoundry/ml/log_types/artifacts/utils.py +2 -2
- truefoundry/ml/mlfoundry_api.py +6 -38
- truefoundry/ml/mlfoundry_run.py +6 -15
- truefoundry/ml/model_framework.py +2 -1
- truefoundry/ml/session.py +69 -97
- truefoundry/workflow/remote_filesystem/tfy_signed_url_client.py +42 -9
- truefoundry/workflow/remote_filesystem/tfy_signed_url_fs.py +126 -7
- {truefoundry-0.5.3rc4.dist-info → truefoundry-0.5.4.dist-info}/METADATA +2 -2
- {truefoundry-0.5.3rc4.dist-info → truefoundry-0.5.4.dist-info}/RECORD +48 -54
- {truefoundry-0.5.3rc4.dist-info → truefoundry-0.5.4.dist-info}/WHEEL +1 -1
- truefoundry/cli/commands/pat.py +0 -24
- truefoundry/deploy/lib/auth/servicefoundry_session.py +0 -61
- truefoundry/gateway/__init__.py +0 -1
- truefoundry/gateway/cli/cli.py +0 -51
- truefoundry/gateway/lib/client.py +0 -51
- truefoundry/gateway/lib/entities.py +0 -33
- truefoundry/gateway/lib/models.py +0 -67
- truefoundry/ml/clients/entities.py +0 -8
- truefoundry/ml/clients/utils.py +0 -122
- {truefoundry-0.5.3rc4.dist-info → truefoundry-0.5.4.dist-info}/entry_points.txt +0 -0
truefoundry/ml/mlfoundry_run.py
CHANGED
|
@@ -17,7 +17,6 @@ from typing import (
|
|
|
17
17
|
from urllib.parse import urljoin, urlsplit
|
|
18
18
|
|
|
19
19
|
from truefoundry import version
|
|
20
|
-
from truefoundry.common.utils import relogin_error_message
|
|
21
20
|
from truefoundry.ml import constants
|
|
22
21
|
from truefoundry.ml.autogen.client import ( # type: ignore[attr-defined]
|
|
23
22
|
ArtifactType,
|
|
@@ -55,7 +54,7 @@ from truefoundry.ml.log_types.artifacts.model import (
|
|
|
55
54
|
)
|
|
56
55
|
from truefoundry.ml.logger import logger
|
|
57
56
|
from truefoundry.ml.run_utils import ParamsType, flatten_dict, process_params
|
|
58
|
-
from truefoundry.ml.session import ACTIVE_RUNS, _get_api_client
|
|
57
|
+
from truefoundry.ml.session import ACTIVE_RUNS, _get_api_client
|
|
59
58
|
from truefoundry.ml.validation_utils import (
|
|
60
59
|
MAX_ENTITY_KEY_LENGTH,
|
|
61
60
|
MAX_METRICS_PER_BATCH,
|
|
@@ -72,7 +71,7 @@ if TYPE_CHECKING:
|
|
|
72
71
|
|
|
73
72
|
def _ensure_not_deleted(method):
|
|
74
73
|
@functools.wraps(method)
|
|
75
|
-
def _check_deleted_or_not(self, *args, **kwargs):
|
|
74
|
+
def _check_deleted_or_not(self: "MlFoundryRun", *args, **kwargs):
|
|
76
75
|
if self._deleted:
|
|
77
76
|
raise MlFoundryException("Run was deleted, cannot access a deleted Run")
|
|
78
77
|
else:
|
|
@@ -230,18 +229,10 @@ class MlFoundryRun:
|
|
|
230
229
|
@_ensure_not_deleted
|
|
231
230
|
def dashboard_link(self) -> str:
|
|
232
231
|
"""Get Mlfoundry dashboard link for a `run`"""
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
raise MlFoundryException(
|
|
236
|
-
relogin_error_message(
|
|
237
|
-
"No active session found. Perhaps you are not logged in?",
|
|
238
|
-
)
|
|
239
|
-
)
|
|
240
|
-
base_url = "{uri.scheme}://{uri.netloc}/".format(
|
|
241
|
-
uri=urlsplit(session.tracking_uri)
|
|
232
|
+
tfy_host = "{uri.scheme}://{uri.netloc}/".format(
|
|
233
|
+
uri=urlsplit(self._api_client.tfy_host)
|
|
242
234
|
)
|
|
243
|
-
|
|
244
|
-
return urljoin(base_url, f"mlfoundry/{self._experiment_id}/run/{self.run_id}/")
|
|
235
|
+
return urljoin(tfy_host, f"mlfoundry/{self._experiment_id}/run/{self.run_id}/")
|
|
245
236
|
|
|
246
237
|
@_ensure_not_deleted
|
|
247
238
|
def end(self, status: RunStatus = RunStatus.FINISHED):
|
|
@@ -581,7 +572,7 @@ class MlFoundryRun:
|
|
|
581
572
|
)
|
|
582
573
|
|
|
583
574
|
return _log_artifact_version(
|
|
584
|
-
self,
|
|
575
|
+
run=self,
|
|
585
576
|
name=name,
|
|
586
577
|
artifact_paths=artifact_paths,
|
|
587
578
|
description=description,
|
|
@@ -20,6 +20,7 @@ from truefoundry.common.utils import (
|
|
|
20
20
|
get_python_version_major_minor,
|
|
21
21
|
list_pip_packages_installed,
|
|
22
22
|
)
|
|
23
|
+
from truefoundry.common.warnings import TrueFoundryDeprecationWarning
|
|
23
24
|
from truefoundry.ml.autogen.client import ( # type: ignore[attr-defined]
|
|
24
25
|
ModelVersionEnvironment,
|
|
25
26
|
SklearnSerializationFormat,
|
|
@@ -260,7 +261,7 @@ class _ModelFramework(BaseModel):
|
|
|
260
261
|
if isinstance(framework, (str, ModelFramework)):
|
|
261
262
|
warnings.warn(
|
|
262
263
|
"Passing a string or ModelFramework Enum is deprecated. Please use a ModelFrameworkType object.",
|
|
263
|
-
|
|
264
|
+
category=TrueFoundryDeprecationWarning,
|
|
264
265
|
stacklevel=2,
|
|
265
266
|
)
|
|
266
267
|
|
truefoundry/ml/session.py
CHANGED
|
@@ -3,26 +3,22 @@ import threading
|
|
|
3
3
|
import weakref
|
|
4
4
|
from typing import TYPE_CHECKING, Dict, Optional
|
|
5
5
|
|
|
6
|
-
from truefoundry.common.credential_provider import (
|
|
7
|
-
CredentialProvider,
|
|
8
|
-
EnvCredentialProvider,
|
|
9
|
-
FileCredentialProvider,
|
|
10
|
-
)
|
|
11
|
-
from truefoundry.common.entities import Token, UserInfo
|
|
12
6
|
from truefoundry.common.request_utils import urllib3_retry
|
|
7
|
+
from truefoundry.common.session import Session
|
|
13
8
|
from truefoundry.common.utils import get_tfy_servers_config, relogin_error_message
|
|
14
9
|
from truefoundry.ml.autogen.client import ( # type: ignore[attr-defined]
|
|
15
10
|
ApiClient,
|
|
16
11
|
Configuration,
|
|
17
12
|
)
|
|
18
|
-
from truefoundry.ml.clients.entities import HostCreds
|
|
19
13
|
from truefoundry.ml.exceptions import MlFoundryException
|
|
20
14
|
from truefoundry.ml.logger import logger
|
|
15
|
+
from truefoundry.version import __version__
|
|
21
16
|
|
|
22
17
|
if TYPE_CHECKING:
|
|
23
18
|
from truefoundry.ml.mlfoundry_run import MlFoundryRun
|
|
24
19
|
|
|
25
20
|
SESSION_LOCK = threading.RLock()
|
|
21
|
+
ACTIVE_SESSION: Optional["MLFoundrySession"] = None
|
|
26
22
|
|
|
27
23
|
|
|
28
24
|
class ActiveRuns:
|
|
@@ -51,19 +47,7 @@ ACTIVE_RUNS = ActiveRuns()
|
|
|
51
47
|
atexit.register(ACTIVE_RUNS.close_active_runs)
|
|
52
48
|
|
|
53
49
|
|
|
54
|
-
class Session:
|
|
55
|
-
def __init__(self, cred_provider: CredentialProvider):
|
|
56
|
-
# Note: Whenever a new session is initialized all the active runs are ended
|
|
57
|
-
self._closed = False
|
|
58
|
-
self._cred_provider: Optional[CredentialProvider] = cred_provider
|
|
59
|
-
self._user_info: Optional[UserInfo] = self._cred_provider.token.to_user_info()
|
|
60
|
-
|
|
61
|
-
def close(self):
|
|
62
|
-
logger.debug("Closing existing session")
|
|
63
|
-
self._closed = True
|
|
64
|
-
self._user_info = None
|
|
65
|
-
self._cred_provider = None
|
|
66
|
-
|
|
50
|
+
class MLFoundrySession(Session):
|
|
67
51
|
def _assert_not_closed(self):
|
|
68
52
|
if self._closed:
|
|
69
53
|
raise MlFoundryException(
|
|
@@ -72,100 +56,88 @@ class Session:
|
|
|
72
56
|
"`truefoundry.ml.get_client()` function call) can be used"
|
|
73
57
|
)
|
|
74
58
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
59
|
+
def close(self):
|
|
60
|
+
global ACTIVE_RUNS
|
|
61
|
+
logger.debug("Closing existing session")
|
|
62
|
+
ACTIVE_RUNS.close_active_runs()
|
|
63
|
+
super().close()
|
|
78
64
|
|
|
79
|
-
@
|
|
80
|
-
def
|
|
81
|
-
|
|
82
|
-
|
|
65
|
+
@classmethod
|
|
66
|
+
def new(cls) -> "MLFoundrySession":
|
|
67
|
+
global ACTIVE_SESSION
|
|
68
|
+
with SESSION_LOCK:
|
|
69
|
+
new_session = cls()
|
|
70
|
+
if ACTIVE_SESSION and ACTIVE_SESSION == new_session:
|
|
71
|
+
return ACTIVE_SESSION
|
|
72
|
+
|
|
73
|
+
if ACTIVE_SESSION:
|
|
74
|
+
ACTIVE_SESSION.close()
|
|
75
|
+
|
|
76
|
+
ACTIVE_SESSION = new_session
|
|
77
|
+
logger.info(
|
|
78
|
+
"Logged in to %r as %r (%s)",
|
|
79
|
+
new_session.tfy_host,
|
|
80
|
+
new_session.user_info.user_id,
|
|
81
|
+
new_session.user_info.email or new_session.user_info.user_type.value,
|
|
82
|
+
)
|
|
83
83
|
|
|
84
|
-
|
|
85
|
-
@property
|
|
86
|
-
def tracking_uri(self) -> str:
|
|
87
|
-
return self._cred_provider.base_url
|
|
88
|
-
|
|
89
|
-
def __eq__(self, other: object) -> bool:
|
|
90
|
-
if not isinstance(other, Session):
|
|
91
|
-
return False
|
|
92
|
-
return (
|
|
93
|
-
type(self._cred_provider) == type(other._cred_provider) # noqa: E721
|
|
94
|
-
and self.user_info == other.user_info
|
|
95
|
-
and self.tracking_uri == other.tracking_uri
|
|
96
|
-
)
|
|
84
|
+
return ACTIVE_SESSION
|
|
97
85
|
|
|
98
|
-
def get_host_creds(self) -> HostCreds:
|
|
99
|
-
tracking_uri = get_tfy_servers_config(self.tracking_uri).mlfoundry_server_url
|
|
100
|
-
return HostCreds(
|
|
101
|
-
host=tracking_uri, token=self._cred_provider.token.access_token
|
|
102
|
-
)
|
|
103
86
|
|
|
87
|
+
class MLFoundryServerApiClient(ApiClient):
|
|
88
|
+
def __init__(self, session: Optional[MLFoundrySession] = None, *args, **kwargs):
|
|
89
|
+
self.session = session
|
|
90
|
+
super().__init__(*args, **kwargs)
|
|
104
91
|
|
|
105
|
-
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_session(cls, session: MLFoundrySession) -> "MLFoundryServerApiClient":
|
|
94
|
+
mlfoundry_server_url = get_tfy_servers_config(
|
|
95
|
+
session.tfy_host
|
|
96
|
+
).mlfoundry_server_url
|
|
97
|
+
configuration = Configuration(
|
|
98
|
+
host=mlfoundry_server_url.rstrip("/"),
|
|
99
|
+
access_token=session.access_token,
|
|
100
|
+
)
|
|
101
|
+
configuration.retries = urllib3_retry(retries=2)
|
|
102
|
+
api_client = cls(session=session, configuration=configuration)
|
|
103
|
+
api_client.user_agent = f"truefoundry-cli/{__version__}"
|
|
104
|
+
return api_client
|
|
105
|
+
|
|
106
|
+
def _ensure_session(self):
|
|
107
|
+
if self.session is None:
|
|
108
|
+
raise MlFoundryException(
|
|
109
|
+
relogin_error_message(
|
|
110
|
+
"No active session found. Perhaps you are not logged in?",
|
|
111
|
+
)
|
|
112
|
+
)
|
|
106
113
|
|
|
114
|
+
@property
|
|
115
|
+
def tfy_host(self) -> str:
|
|
116
|
+
self._ensure_session()
|
|
117
|
+
assert self.session is not None
|
|
118
|
+
return self.session.tfy_host
|
|
107
119
|
|
|
108
|
-
|
|
109
|
-
|
|
120
|
+
@property
|
|
121
|
+
def access_token(self) -> str:
|
|
122
|
+
self._ensure_session()
|
|
123
|
+
assert self.session is not None
|
|
124
|
+
return self.session.access_token
|
|
110
125
|
|
|
111
126
|
|
|
112
127
|
def _get_api_client(
|
|
113
|
-
session: Optional[
|
|
128
|
+
session: Optional[MLFoundrySession] = None,
|
|
114
129
|
allow_anonymous: bool = False,
|
|
115
|
-
) ->
|
|
116
|
-
|
|
130
|
+
) -> MLFoundryServerApiClient:
|
|
131
|
+
global ACTIVE_SESSION
|
|
117
132
|
|
|
118
|
-
session = session or
|
|
133
|
+
session = session or ACTIVE_SESSION
|
|
119
134
|
if session is None:
|
|
120
135
|
if allow_anonymous:
|
|
121
|
-
return
|
|
136
|
+
return MLFoundryServerApiClient(session=None)
|
|
122
137
|
else:
|
|
123
138
|
raise MlFoundryException(
|
|
124
139
|
relogin_error_message(
|
|
125
140
|
"No active session found. Perhaps you are not logged in?",
|
|
126
141
|
)
|
|
127
142
|
)
|
|
128
|
-
|
|
129
|
-
creds = session.get_host_creds()
|
|
130
|
-
configuration = Configuration(
|
|
131
|
-
host=creds.host.rstrip("/"),
|
|
132
|
-
access_token=creds.token,
|
|
133
|
-
)
|
|
134
|
-
configuration.retries = urllib3_retry(retries=2)
|
|
135
|
-
api_client = ApiClient(configuration=configuration)
|
|
136
|
-
api_client.user_agent = f"truefoundry-cli/{__version__}"
|
|
137
|
-
return api_client
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def init_session() -> Session:
|
|
141
|
-
with SESSION_LOCK:
|
|
142
|
-
final_cred_provider = None
|
|
143
|
-
for cred_provider in [EnvCredentialProvider, FileCredentialProvider]:
|
|
144
|
-
if cred_provider.can_provide():
|
|
145
|
-
final_cred_provider = cred_provider()
|
|
146
|
-
break
|
|
147
|
-
if final_cred_provider is None:
|
|
148
|
-
raise MlFoundryException(
|
|
149
|
-
relogin_error_message(
|
|
150
|
-
"No active session found. Perhaps you are not logged in?",
|
|
151
|
-
)
|
|
152
|
-
)
|
|
153
|
-
new_session = Session(cred_provider=final_cred_provider)
|
|
154
|
-
|
|
155
|
-
global ACTIVE_SESSION
|
|
156
|
-
if ACTIVE_SESSION and ACTIVE_SESSION == new_session:
|
|
157
|
-
return ACTIVE_SESSION
|
|
158
|
-
|
|
159
|
-
ACTIVE_RUNS.close_active_runs()
|
|
160
|
-
|
|
161
|
-
if ACTIVE_SESSION:
|
|
162
|
-
ACTIVE_SESSION.close()
|
|
163
|
-
ACTIVE_SESSION = new_session
|
|
164
|
-
|
|
165
|
-
logger.info(
|
|
166
|
-
"Logged in to %r as %r (%s)",
|
|
167
|
-
ACTIVE_SESSION.tracking_uri,
|
|
168
|
-
ACTIVE_SESSION.user_info.user_id,
|
|
169
|
-
ACTIVE_SESSION.user_info.email or ACTIVE_SESSION.user_info.user_type.value,
|
|
170
|
-
)
|
|
171
|
-
return ACTIVE_SESSION
|
|
143
|
+
return MLFoundryServerApiClient.from_session(session)
|
|
@@ -13,6 +13,7 @@ from truefoundry.common.constants import (
|
|
|
13
13
|
TFY_INTERNAL_SIGNED_URL_SERVER_TOKEN_ENV_KEY,
|
|
14
14
|
)
|
|
15
15
|
from truefoundry.common.request_utils import requests_retry_session
|
|
16
|
+
from truefoundry.common.storage_provider_utils import MultiPartUploadStorageProvider
|
|
16
17
|
from truefoundry.pydantic_v1 import BaseModel, Field
|
|
17
18
|
from truefoundry.workflow.remote_filesystem.logger import log_time, logger
|
|
18
19
|
|
|
@@ -20,6 +21,9 @@ LOG_PREFIX = "[tfy][fs]"
|
|
|
20
21
|
DEFAULT_TTL = ENV_VARS.TFY_INTERNAL_SIGNED_URL_SERVER_DEFAULT_TTL
|
|
21
22
|
MAX_TIMEOUT = ENV_VARS.TFY_INTERNAL_SIGNED_URL_SERVER_MAX_TIMEOUT
|
|
22
23
|
REQUEST_TIMEOUT = ENV_VARS.TFY_INTERNAL_SIGNED_URL_REQUEST_TIMEOUT
|
|
24
|
+
MULTIPART_UPLOAD_FINALIZE_SIGNED_URL_TIMEOUT = (
|
|
25
|
+
ENV_VARS.TFY_INTERNAL_MULTIPART_UPLOAD_FINALIZE_SIGNED_URL_TIMEOUT
|
|
26
|
+
)
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
class SignedURLAPIResponseDto(BaseModel):
|
|
@@ -36,6 +40,18 @@ class SignedURLExistsAPIResponseDto(BaseModel):
|
|
|
36
40
|
exists: bool
|
|
37
41
|
|
|
38
42
|
|
|
43
|
+
class PartSignedUrl(BaseModel):
|
|
44
|
+
partNumber: int
|
|
45
|
+
signedUrl: str
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SignedURLMultipartUploadAPIResponseDto(BaseModel):
|
|
49
|
+
uploadId: str
|
|
50
|
+
partSignedUrls: List[PartSignedUrl]
|
|
51
|
+
finalizeSignedUrl: str
|
|
52
|
+
storageProvider: MultiPartUploadStorageProvider
|
|
53
|
+
|
|
54
|
+
|
|
39
55
|
class FileInfo(BaseModel):
|
|
40
56
|
path: str
|
|
41
57
|
is_directory: bool = Field(..., alias="isDirectory")
|
|
@@ -56,6 +72,7 @@ class SignedURLServerEndpoint(str, Enum):
|
|
|
56
72
|
EXISTS = "/v1/exists"
|
|
57
73
|
IS_DIRECTORY = "/v1/is-dir"
|
|
58
74
|
LIST_FILES = "/v1/list-files"
|
|
75
|
+
CREATE_MUTLIPART_UPLOAD = "/v1/multipart-upload"
|
|
59
76
|
|
|
60
77
|
|
|
61
78
|
class SignedURLClient:
|
|
@@ -98,9 +115,9 @@ class SignedURLClient:
|
|
|
98
115
|
self,
|
|
99
116
|
endpoint: str,
|
|
100
117
|
method: str = "GET",
|
|
101
|
-
payload: Optional[Dict] = None,
|
|
102
|
-
headers: Optional[Dict] = None,
|
|
103
|
-
) -> Dict:
|
|
118
|
+
payload: Optional[Dict[str, Any]] = None,
|
|
119
|
+
headers: Optional[Dict[str, Any]] = None,
|
|
120
|
+
) -> Dict[str, Any]:
|
|
104
121
|
"""Internal method to handle requests to the signed URL server."""
|
|
105
122
|
url = urljoin(self.base_url, endpoint)
|
|
106
123
|
try:
|
|
@@ -116,9 +133,9 @@ class SignedURLClient:
|
|
|
116
133
|
def _make_server_api_call(
|
|
117
134
|
self,
|
|
118
135
|
endpoint: SignedURLServerEndpoint,
|
|
119
|
-
params: Optional[Dict] = None,
|
|
120
|
-
headers: Optional[Dict] = None,
|
|
121
|
-
) -> Dict:
|
|
136
|
+
params: Optional[Dict[str, Any]] = None,
|
|
137
|
+
headers: Optional[Dict[str, Any]] = None,
|
|
138
|
+
) -> Dict[str, Any]:
|
|
122
139
|
"""Get a signed URL for the specified operation and URI."""
|
|
123
140
|
query_string = urlencode(params or {})
|
|
124
141
|
endpoint_with_params = f"{endpoint.value}?{query_string}"
|
|
@@ -131,7 +148,7 @@ class SignedURLClient:
|
|
|
131
148
|
self,
|
|
132
149
|
signed_url: str,
|
|
133
150
|
data: Union[bytes, io.BufferedReader],
|
|
134
|
-
headers: Optional[Dict] = None,
|
|
151
|
+
headers: Optional[Dict[str, Any]] = None,
|
|
135
152
|
) -> None:
|
|
136
153
|
"""
|
|
137
154
|
Upload data to the specified storage path using a signed URL.
|
|
@@ -190,12 +207,28 @@ class SignedURLClient:
|
|
|
190
207
|
)
|
|
191
208
|
return storage_uri
|
|
192
209
|
|
|
210
|
+
def create_multipart_upload(
|
|
211
|
+
self, storage_uri: str, num_parts: int
|
|
212
|
+
) -> SignedURLMultipartUploadAPIResponseDto:
|
|
213
|
+
response = self._make_server_api_call(
|
|
214
|
+
endpoint=SignedURLServerEndpoint.CREATE_MUTLIPART_UPLOAD,
|
|
215
|
+
params={
|
|
216
|
+
"path": storage_uri,
|
|
217
|
+
"numParts": num_parts,
|
|
218
|
+
"partExpiryInSeconds": self.ttl,
|
|
219
|
+
"finalizationExpiryInSeconds": MULTIPART_UPLOAD_FINALIZE_SIGNED_URL_TIMEOUT,
|
|
220
|
+
},
|
|
221
|
+
headers=self.signed_url_server_headers,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
return SignedURLMultipartUploadAPIResponseDto.parse_obj(response)
|
|
225
|
+
|
|
193
226
|
@log_time(prefix=LOG_PREFIX)
|
|
194
227
|
def _download_file(
|
|
195
228
|
self,
|
|
196
229
|
signed_url: str,
|
|
197
230
|
local_path: Optional[str] = None,
|
|
198
|
-
headers: Optional[Dict] = None,
|
|
231
|
+
headers: Optional[Dict[str, Any]] = None,
|
|
199
232
|
) -> Optional[bytes]:
|
|
200
233
|
"""Common method to download a file using a signed URL."""
|
|
201
234
|
try:
|
|
@@ -237,7 +270,7 @@ class SignedURLClient:
|
|
|
237
270
|
return local_path
|
|
238
271
|
|
|
239
272
|
@log_time(prefix=LOG_PREFIX)
|
|
240
|
-
def download_to_bytes(self, storage_uri: str) -> bytes:
|
|
273
|
+
def download_to_bytes(self, storage_uri: str) -> Optional[bytes]:
|
|
241
274
|
"""Download a file from the specified storage path and return it as bytes."""
|
|
242
275
|
response = self._make_server_api_call(
|
|
243
276
|
endpoint=SignedURLServerEndpoint.READ,
|
|
@@ -2,18 +2,44 @@
|
|
|
2
2
|
# pylint: disable=W0223
|
|
3
3
|
import io
|
|
4
4
|
import os
|
|
5
|
+
from concurrent.futures import FIRST_EXCEPTION, Future, ThreadPoolExecutor, wait
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from
|
|
7
|
+
from threading import Event
|
|
8
|
+
from typing import List, Optional, Tuple
|
|
7
9
|
|
|
8
10
|
from fsspec.spec import DEFAULT_CALLBACK, AbstractBufferedFile, AbstractFileSystem
|
|
9
11
|
|
|
10
12
|
from truefoundry.common.constants import ENV_VARS
|
|
11
|
-
from truefoundry.
|
|
13
|
+
from truefoundry.common.storage_provider_utils import (
|
|
14
|
+
MultiPartUpload,
|
|
15
|
+
SignedURL,
|
|
16
|
+
_FileMultiPartInfo,
|
|
17
|
+
decide_file_parts,
|
|
18
|
+
s3_compatible_multipart_upload,
|
|
19
|
+
)
|
|
20
|
+
from truefoundry.workflow.remote_filesystem.logger import log_time, logger
|
|
12
21
|
from truefoundry.workflow.remote_filesystem.tfy_signed_url_client import (
|
|
13
22
|
LOG_PREFIX,
|
|
14
23
|
SignedURLClient,
|
|
24
|
+
SignedURLMultipartUploadAPIResponseDto,
|
|
15
25
|
)
|
|
16
26
|
|
|
27
|
+
MULTIPART_SUPPORTED_PROVIDERS = ["s3"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _add_file_for_upload(
|
|
31
|
+
local_file: str,
|
|
32
|
+
remote_path: str,
|
|
33
|
+
files_for_normal_upload: List[Tuple[str, str, _FileMultiPartInfo]],
|
|
34
|
+
files_for_multipart_upload: List[Tuple[str, str, _FileMultiPartInfo]],
|
|
35
|
+
multipart_upload_allowed: bool,
|
|
36
|
+
):
|
|
37
|
+
multipart_info = decide_file_parts(local_file, multipart_upload_allowed)
|
|
38
|
+
if multipart_info.num_parts == 1:
|
|
39
|
+
files_for_normal_upload.append((remote_path, local_file, multipart_info))
|
|
40
|
+
else:
|
|
41
|
+
files_for_multipart_upload.append((remote_path, local_file, multipart_info))
|
|
42
|
+
|
|
17
43
|
|
|
18
44
|
class SignedURLFileSystem(AbstractFileSystem):
|
|
19
45
|
def __init__(
|
|
@@ -100,7 +126,13 @@ class SignedURLFileSystem(AbstractFileSystem):
|
|
|
100
126
|
maxdepth=None,
|
|
101
127
|
**kwargs,
|
|
102
128
|
):
|
|
129
|
+
files_for_normal_upload: List[Tuple[str, str, _FileMultiPartInfo]] = []
|
|
130
|
+
files_for_multipart_upload: List[Tuple[str, str, _FileMultiPartInfo]] = []
|
|
103
131
|
local_path = Path(lpath)
|
|
132
|
+
multipart_upload_allowed = (
|
|
133
|
+
self.protocol in MULTIPART_SUPPORTED_PROVIDERS
|
|
134
|
+
and not ENV_VARS.TFY_ARTIFACTS_DISABLE_MULTIPART_UPLOAD
|
|
135
|
+
)
|
|
104
136
|
if local_path.is_dir():
|
|
105
137
|
if not recursive:
|
|
106
138
|
raise ValueError(
|
|
@@ -127,15 +159,102 @@ class SignedURLFileSystem(AbstractFileSystem):
|
|
|
127
159
|
for file in files:
|
|
128
160
|
local_file_path = Path(root) / file
|
|
129
161
|
remote_file_path = f"{remote_dir}/{file}"
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
162
|
+
_add_file_for_upload(
|
|
163
|
+
local_file=str(local_file_path),
|
|
164
|
+
remote_path=str(remote_file_path),
|
|
165
|
+
files_for_normal_upload=files_for_normal_upload,
|
|
166
|
+
files_for_multipart_upload=files_for_multipart_upload,
|
|
167
|
+
multipart_upload_allowed=multipart_upload_allowed,
|
|
133
168
|
)
|
|
134
|
-
return None
|
|
135
169
|
else:
|
|
136
170
|
if rpath.endswith("/"):
|
|
137
171
|
rpath = os.path.join(rpath, local_path.name)
|
|
138
|
-
|
|
172
|
+
_add_file_for_upload(
|
|
173
|
+
local_file=str(lpath),
|
|
174
|
+
remote_path=str(rpath),
|
|
175
|
+
files_for_normal_upload=files_for_normal_upload,
|
|
176
|
+
files_for_multipart_upload=files_for_multipart_upload,
|
|
177
|
+
multipart_upload_allowed=multipart_upload_allowed,
|
|
178
|
+
)
|
|
179
|
+
return self._upload(
|
|
180
|
+
files_for_normal_upload=files_for_normal_upload,
|
|
181
|
+
files_for_multipart_upload=files_for_multipart_upload,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
def _upload(
|
|
185
|
+
self,
|
|
186
|
+
files_for_normal_upload: List[Tuple[str, str, _FileMultiPartInfo]],
|
|
187
|
+
files_for_multipart_upload: List[Tuple[str, str, _FileMultiPartInfo]],
|
|
188
|
+
):
|
|
189
|
+
abort_event = Event()
|
|
190
|
+
with ThreadPoolExecutor(
|
|
191
|
+
max_workers=ENV_VARS.TFY_ARTIFACTS_UPLOAD_MAX_WORKERS
|
|
192
|
+
) as executor:
|
|
193
|
+
futures: List[Future] = []
|
|
194
|
+
for remote_path, local_path, _ in files_for_normal_upload:
|
|
195
|
+
futures.append(
|
|
196
|
+
executor.submit(
|
|
197
|
+
self.client.upload,
|
|
198
|
+
file_path=local_path,
|
|
199
|
+
storage_uri=remote_path,
|
|
200
|
+
)
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
done, not_done = wait(futures, return_when=FIRST_EXCEPTION)
|
|
204
|
+
if len(not_done) > 0:
|
|
205
|
+
abort_event.set()
|
|
206
|
+
for future in not_done:
|
|
207
|
+
future.cancel()
|
|
208
|
+
for future in done:
|
|
209
|
+
if future.exception() is not None:
|
|
210
|
+
raise future.exception()
|
|
211
|
+
|
|
212
|
+
for remote_path, local_path, multipart_info in files_for_multipart_upload:
|
|
213
|
+
self._multipart_upload(
|
|
214
|
+
local_file=local_path,
|
|
215
|
+
artifact_path=remote_path,
|
|
216
|
+
multipart_info=multipart_info,
|
|
217
|
+
executor=executor,
|
|
218
|
+
abort_event=abort_event,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
def _multipart_upload(
|
|
222
|
+
self,
|
|
223
|
+
local_file: str,
|
|
224
|
+
artifact_path: str,
|
|
225
|
+
multipart_info: _FileMultiPartInfo,
|
|
226
|
+
executor: ThreadPoolExecutor,
|
|
227
|
+
abort_event: Optional[Event] = None,
|
|
228
|
+
):
|
|
229
|
+
logger.info(
|
|
230
|
+
"Uploading %s to %s using multipart upload",
|
|
231
|
+
local_file,
|
|
232
|
+
artifact_path,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
multipart_upload: SignedURLMultipartUploadAPIResponseDto = (
|
|
236
|
+
self.client.create_multipart_upload(
|
|
237
|
+
storage_uri=artifact_path,
|
|
238
|
+
num_parts=multipart_info.num_parts,
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
s3_compatible_multipart_upload(
|
|
242
|
+
multipart_upload=MultiPartUpload(
|
|
243
|
+
storage_provider=multipart_upload.storageProvider,
|
|
244
|
+
part_signed_urls=[
|
|
245
|
+
SignedURL(signed_url=url.signedUrl)
|
|
246
|
+
for url in multipart_upload.partSignedUrls
|
|
247
|
+
],
|
|
248
|
+
s3_compatible_upload_id=multipart_upload.uploadId,
|
|
249
|
+
finalize_signed_url=SignedURL(
|
|
250
|
+
signed_url=multipart_upload.finalizeSignedUrl
|
|
251
|
+
),
|
|
252
|
+
),
|
|
253
|
+
local_file=local_file,
|
|
254
|
+
executor=executor,
|
|
255
|
+
multipart_info=multipart_info,
|
|
256
|
+
abort_event=abort_event,
|
|
257
|
+
)
|
|
139
258
|
|
|
140
259
|
@log_time(prefix=LOG_PREFIX)
|
|
141
260
|
def isdir(self, path):
|