truefoundry 0.4.4rc12__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truefoundry might be problematic. Click here for more details.
- truefoundry/common/constants.py +6 -1
- truefoundry/common/utils.py +0 -18
- truefoundry/logger.py +1 -0
- truefoundry/ml/__init__.py +36 -0
- truefoundry/ml/artifact/truefoundry_artifact_repo.py +433 -415
- truefoundry/ml/autogen/client/__init__.py +29 -6
- truefoundry/ml/autogen/client/api/__init__.py +3 -3
- truefoundry/ml/autogen/client/api/deprecated_api.py +7 -7
- truefoundry/ml/autogen/client/api/generate_code_snippet_api.py +526 -0
- truefoundry/ml/autogen/client/models/__init__.py +26 -3
- truefoundry/ml/autogen/client/models/command.py +152 -0
- truefoundry/ml/autogen/client/models/create_workflow_task_config_request_dto.py +72 -0
- truefoundry/ml/autogen/client/models/external_model_source.py +3 -2
- truefoundry/ml/autogen/client/models/fast_ai_framework.py +75 -0
- truefoundry/ml/autogen/client/models/framework.py +250 -14
- truefoundry/ml/autogen/client/models/gluon_framework.py +74 -0
- truefoundry/ml/autogen/client/models/{upload_model_source.py → h2_o_framework.py} +11 -11
- truefoundry/ml/autogen/client/models/keras_framework.py +74 -0
- truefoundry/ml/autogen/client/models/light_gbm_framework.py +75 -0
- truefoundry/ml/autogen/client/models/model_version_manifest.py +1 -1
- truefoundry/ml/autogen/client/models/onnx_framework.py +74 -0
- truefoundry/ml/autogen/client/models/paddle_framework.py +75 -0
- truefoundry/ml/autogen/client/models/py_torch_framework.py +75 -0
- truefoundry/ml/autogen/client/models/sklearn_framework.py +75 -0
- truefoundry/ml/autogen/client/models/source.py +9 -32
- truefoundry/ml/autogen/client/models/spa_cy_framework.py +74 -0
- truefoundry/ml/autogen/client/models/stats_models_framework.py +75 -0
- truefoundry/ml/autogen/client/models/{tensorflow_framework.py → tensor_flow_framework.py} +10 -9
- truefoundry/ml/autogen/client/models/transformers_framework.py +3 -2
- truefoundry/ml/autogen/client/models/trigger_job_run_config_request_dto.py +90 -0
- truefoundry/ml/autogen/client/models/trigger_job_run_config_response_dto.py +71 -0
- truefoundry/ml/autogen/client/models/truefoundry_model_source.py +5 -3
- truefoundry/ml/autogen/client/models/xg_boost_framework.py +75 -0
- truefoundry/ml/autogen/client_README.md +22 -5
- truefoundry/ml/autogen/entities/artifacts.py +19 -2
- truefoundry/ml/log_types/artifacts/artifact.py +10 -6
- truefoundry/ml/log_types/artifacts/dataset.py +13 -5
- truefoundry/ml/log_types/artifacts/general_artifact.py +3 -1
- truefoundry/ml/log_types/artifacts/model.py +172 -194
- truefoundry/ml/log_types/artifacts/utils.py +43 -26
- truefoundry/ml/log_types/image/image.py +2 -0
- truefoundry/ml/log_types/plot.py +2 -0
- truefoundry/ml/mlfoundry_api.py +47 -18
- truefoundry/ml/mlfoundry_run.py +27 -12
- truefoundry/ml/model_framework.py +169 -0
- truefoundry/workflow/__init__.py +3 -1
- truefoundry/workflow/remote_filesystem/__init__.py +8 -0
- truefoundry/workflow/remote_filesystem/logger.py +36 -0
- truefoundry/{common → workflow/remote_filesystem}/tfy_signed_url_client.py +1 -2
- truefoundry/{common → workflow/remote_filesystem}/tfy_signed_url_fs.py +5 -2
- {truefoundry-0.4.4rc12.dist-info → truefoundry-0.5.0rc1.dist-info}/METADATA +1 -1
- {truefoundry-0.4.4rc12.dist-info → truefoundry-0.5.0rc1.dist-info}/RECORD +54 -36
- truefoundry/ml/autogen/client/api/python_deployment_config_api.py +0 -201
- {truefoundry-0.4.4rc12.dist-info → truefoundry-0.5.0rc1.dist-info}/WHEEL +0 -0
- {truefoundry-0.4.4rc12.dist-info → truefoundry-0.5.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -2,31 +2,31 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
4
|
import posixpath
|
|
5
|
-
import shutil
|
|
6
|
-
import tempfile
|
|
7
5
|
from pathlib import Path
|
|
8
6
|
from typing import Any, Dict, Optional, Sequence, Tuple, Union
|
|
9
7
|
|
|
10
8
|
from truefoundry.ml.exceptions import MlFoundryException
|
|
11
9
|
from truefoundry.ml.log_types.artifacts.constants import DESCRIPTION_MAX_LENGTH
|
|
12
10
|
|
|
13
|
-
logger = logging.getLogger(
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
14
12
|
|
|
15
13
|
|
|
16
|
-
def _copy_tree(
|
|
14
|
+
def _copy_tree(
|
|
15
|
+
root_dir: str, src_path: str, dest_path: str, dest_to_src: Dict[str, str]
|
|
16
|
+
):
|
|
17
17
|
os.makedirs(dest_path, exist_ok=True)
|
|
18
18
|
for item in os.listdir(src_path):
|
|
19
19
|
src = os.path.join(src_path, item)
|
|
20
20
|
dest = os.path.join(dest_path, item)
|
|
21
21
|
if os.path.isdir(src):
|
|
22
22
|
_copy_tree(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
23
|
+
root_dir=root_dir,
|
|
24
|
+
src_path=src,
|
|
25
|
+
dest_path=dest,
|
|
26
|
+
dest_to_src=dest_to_src,
|
|
27
27
|
)
|
|
28
28
|
else:
|
|
29
|
-
|
|
29
|
+
dest_to_src[dest] = src
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def is_destination_path_dirlike(dest_path) -> bool:
|
|
@@ -48,9 +48,9 @@ def _copy_additional_files(
|
|
|
48
48
|
model_dir: Optional[str], # relative to files_dir e.g "model/"
|
|
49
49
|
additional_files: Sequence[Tuple[Union[str, Path], Optional[str]]],
|
|
50
50
|
ignore_model_dir_dest_conflict: bool = False,
|
|
51
|
-
|
|
51
|
+
existing_dest_to_src_map: Optional[Dict[str, str]] = None,
|
|
52
|
+
) -> Dict[str, str]:
|
|
52
53
|
"""
|
|
53
|
-
|
|
54
54
|
File copying examples:
|
|
55
55
|
# non ambiguous
|
|
56
56
|
# a.txt -> /tmp/ result /tmp/a.txt
|
|
@@ -69,6 +69,7 @@ def _copy_additional_files(
|
|
|
69
69
|
# .gitignore -> /tmp/.gitinclude result /tmp/.gitinclude
|
|
70
70
|
# a.txt -> /tmp/a result /tmp/a
|
|
71
71
|
"""
|
|
72
|
+
dest_to_src = existing_dest_to_src_map or {}
|
|
72
73
|
for src_path, dest_path in additional_files:
|
|
73
74
|
src_path = str(src_path)
|
|
74
75
|
if not os.path.exists(src_path):
|
|
@@ -99,26 +100,47 @@ def _copy_additional_files(
|
|
|
99
100
|
_src = src_path
|
|
100
101
|
if is_destination_path_dirlike(dest_abs_path):
|
|
101
102
|
os.makedirs(dest_abs_path, exist_ok=True)
|
|
102
|
-
|
|
103
|
-
os.path.join(dest_abs_path, os.path.basename(_src)), files_abs_dir
|
|
104
|
-
)
|
|
103
|
+
dest_abs_path = os.path.join(dest_abs_path, os.path.basename(_src))
|
|
105
104
|
else:
|
|
106
105
|
os.makedirs(os.path.dirname(dest_abs_path), exist_ok=True)
|
|
107
|
-
|
|
106
|
+
_dst = os.path.relpath(dest_abs_path, files_abs_dir)
|
|
108
107
|
logger.info(f"Adding file {_src} as /{_dst}")
|
|
109
|
-
|
|
108
|
+
dest_to_src[dest_abs_path] = src_path
|
|
110
109
|
elif os.path.isdir(src_path):
|
|
111
110
|
os.makedirs(dest_abs_path, exist_ok=True)
|
|
112
111
|
_src = src_path.rstrip("/")
|
|
113
112
|
_dst = os.path.relpath(dest_abs_path, files_abs_dir).rstrip("/")
|
|
114
113
|
logger.info(f"Adding contents of {_src}/ to /{_dst}/")
|
|
115
114
|
_copy_tree(
|
|
115
|
+
root_dir=root_dir,
|
|
116
116
|
src_path=src_path,
|
|
117
117
|
dest_path=dest_abs_path,
|
|
118
|
-
|
|
119
|
-
ignore_dangling_symlinks=False,
|
|
118
|
+
dest_to_src=dest_to_src,
|
|
120
119
|
)
|
|
121
120
|
|
|
121
|
+
return dest_to_src
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _make_dest_to_src_map_from_dir(root_dir: str) -> Dict[str, str]:
|
|
125
|
+
dest_to_src_map = {}
|
|
126
|
+
for root, _, files in os.walk(root_dir):
|
|
127
|
+
for file in files:
|
|
128
|
+
src = os.path.join(root, file)
|
|
129
|
+
dest = src
|
|
130
|
+
dest_to_src_map[dest] = src
|
|
131
|
+
return dest_to_src_map
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _get_src_dest_pairs(
|
|
135
|
+
root_dir: str,
|
|
136
|
+
dest_to_src_map: Dict[str, str],
|
|
137
|
+
) -> Sequence[Tuple[str, str]]:
|
|
138
|
+
src_dest_pairs = [
|
|
139
|
+
(src_path, os.path.relpath(dest_abs_path, root_dir))
|
|
140
|
+
for dest_abs_path, src_path in dest_to_src_map.items()
|
|
141
|
+
]
|
|
142
|
+
return src_dest_pairs
|
|
143
|
+
|
|
122
144
|
|
|
123
145
|
def _validate_description(description: Optional[str]):
|
|
124
146
|
if description is not None:
|
|
@@ -141,8 +163,8 @@ def _validate_artifact_metadata(metadata: Dict[str, Any]):
|
|
|
141
163
|
raise MlFoundryException("`metadata` must be json serializable dict") from ve
|
|
142
164
|
|
|
143
165
|
|
|
144
|
-
def
|
|
145
|
-
|
|
166
|
+
def calculate_total_size(
|
|
167
|
+
paths: Sequence[str],
|
|
146
168
|
):
|
|
147
169
|
"""
|
|
148
170
|
Tells about the size of the artifact
|
|
@@ -153,9 +175,4 @@ def calculate_local_directory_size(
|
|
|
153
175
|
Returns:
|
|
154
176
|
total size of the artifact
|
|
155
177
|
"""
|
|
156
|
-
|
|
157
|
-
for path, _dirs, files in os.walk(directory.name):
|
|
158
|
-
for f in files:
|
|
159
|
-
file_path = os.path.join(path, f)
|
|
160
|
-
total_size += os.stat(file_path).st_size
|
|
161
|
-
return total_size
|
|
178
|
+
return sum(os.stat(os.path.realpath(file_path)).st_size for file_path in paths)
|
|
@@ -18,6 +18,7 @@ from truefoundry.ml.log_types.artifacts.constants import (
|
|
|
18
18
|
FILES_DIR,
|
|
19
19
|
INTERNAL_METADATA_PATH,
|
|
20
20
|
)
|
|
21
|
+
from truefoundry.ml.log_types.artifacts.utils import _make_dest_to_src_map_from_dir
|
|
21
22
|
from truefoundry.ml.log_types.image.constants import (
|
|
22
23
|
DEFAULT_IMAGE_FORMAT,
|
|
23
24
|
IMAGE_KEY_REGEX,
|
|
@@ -344,6 +345,7 @@ class Image:
|
|
|
344
345
|
name=key,
|
|
345
346
|
artifact_type=ArtifactType.IMAGE,
|
|
346
347
|
artifact_dir=temp_dir,
|
|
348
|
+
dest_to_src_map=_make_dest_to_src_map_from_dir(root_dir=temp_dir.name),
|
|
347
349
|
internal_metadata=internal_metadata,
|
|
348
350
|
step=step,
|
|
349
351
|
)
|
truefoundry/ml/log_types/plot.py
CHANGED
|
@@ -18,6 +18,7 @@ from truefoundry.ml.log_types.artifacts.constants import (
|
|
|
18
18
|
FILES_DIR,
|
|
19
19
|
INTERNAL_METADATA_PATH,
|
|
20
20
|
)
|
|
21
|
+
from truefoundry.ml.log_types.artifacts.utils import _make_dest_to_src_map_from_dir
|
|
21
22
|
from truefoundry.ml.log_types.pydantic_base import PydanticBase
|
|
22
23
|
from truefoundry.ml.log_types.utils import validate_key_name
|
|
23
24
|
from truefoundry.pydantic_v1 import BaseModel
|
|
@@ -186,6 +187,7 @@ class Plot:
|
|
|
186
187
|
name=key,
|
|
187
188
|
artifact_type=ArtifactType.PLOT,
|
|
188
189
|
artifact_dir=temp_dir,
|
|
190
|
+
dest_to_src_map=_make_dest_to_src_map_from_dir(root_dir=temp_dir.name),
|
|
189
191
|
internal_metadata=internal_metadata,
|
|
190
192
|
step=step,
|
|
191
193
|
)
|
truefoundry/ml/mlfoundry_api.py
CHANGED
|
@@ -2,7 +2,17 @@ import os
|
|
|
2
2
|
import time
|
|
3
3
|
import uuid
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import (
|
|
6
|
+
TYPE_CHECKING,
|
|
7
|
+
Any,
|
|
8
|
+
Dict,
|
|
9
|
+
Iterator,
|
|
10
|
+
List,
|
|
11
|
+
Optional,
|
|
12
|
+
Sequence,
|
|
13
|
+
Tuple,
|
|
14
|
+
Union,
|
|
15
|
+
)
|
|
6
16
|
|
|
7
17
|
import coolname
|
|
8
18
|
import pandas as pd
|
|
@@ -42,7 +52,11 @@ from truefoundry.ml.log_types.artifacts.artifact import (
|
|
|
42
52
|
)
|
|
43
53
|
from truefoundry.ml.log_types.artifacts.dataset import DataDirectory
|
|
44
54
|
from truefoundry.ml.log_types.artifacts.general_artifact import _log_artifact_version
|
|
45
|
-
from truefoundry.ml.log_types.artifacts.model import
|
|
55
|
+
from truefoundry.ml.log_types.artifacts.model import (
|
|
56
|
+
BlobStorageModelDirectory,
|
|
57
|
+
ModelVersion,
|
|
58
|
+
_log_model_version,
|
|
59
|
+
)
|
|
46
60
|
from truefoundry.ml.logger import logger
|
|
47
61
|
from truefoundry.ml.mlfoundry_run import MlFoundryRun
|
|
48
62
|
from truefoundry.ml.session import (
|
|
@@ -57,6 +71,9 @@ from truefoundry.ml.validation_utils import (
|
|
|
57
71
|
_validate_run_name,
|
|
58
72
|
)
|
|
59
73
|
|
|
74
|
+
if TYPE_CHECKING:
|
|
75
|
+
from truefoundry.ml import ModelFrameworkType
|
|
76
|
+
|
|
60
77
|
_SEARCH_MAX_RESULTS_DEFAULT = 1000
|
|
61
78
|
|
|
62
79
|
_INTERNAL_ENV_VARS = [
|
|
@@ -1209,12 +1226,12 @@ class MlFoundry:
|
|
|
1209
1226
|
*,
|
|
1210
1227
|
ml_repo: str,
|
|
1211
1228
|
name: str,
|
|
1212
|
-
model_file_or_folder: str,
|
|
1213
|
-
framework: Optional[Union[ModelFramework, str]],
|
|
1229
|
+
model_file_or_folder: Union[str, BlobStorageModelDirectory],
|
|
1214
1230
|
additional_files: Sequence[Tuple[Union[str, Path], Optional[str]]] = (),
|
|
1215
1231
|
description: Optional[str] = None,
|
|
1216
1232
|
metadata: Optional[Dict[str, Any]] = None,
|
|
1217
1233
|
progress: Optional[bool] = None,
|
|
1234
|
+
framework: Optional[Union[str, ModelFramework, "ModelFrameworkType"]] = None,
|
|
1218
1235
|
) -> ModelVersion:
|
|
1219
1236
|
"""
|
|
1220
1237
|
Serialize and log a versioned model under the current ml_repo. Each logged model generates a new version
|
|
@@ -1226,12 +1243,24 @@ class MlFoundry:
|
|
|
1226
1243
|
name (str): Name of the model. If a model with this name already exists under the current ML Repo,
|
|
1227
1244
|
the logged model will be added as a new version under that `name`. If no models exist with the given
|
|
1228
1245
|
`name`, the given model will be logged as version 1.
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1246
|
+
|
|
1247
|
+
model_file_or_folder (Union[str, BlobStorageModelDirectory]):
|
|
1248
|
+
str:
|
|
1249
|
+
Path to either a single file or a folder containing model files.
|
|
1250
|
+
This folder is typically created using serialization methods from libraries or frameworks,
|
|
1251
|
+
e.g., `joblib.dump`, `model.save_pretrained(...)`, `torch.save(...)`, or `model.save(...)`.
|
|
1252
|
+
BlobStorageModelDirectory:
|
|
1253
|
+
uri (str): URI to the model file or folder in a storage integration associated with the specified ML Repo.
|
|
1254
|
+
The model files or folder must reside within the same storage integration as the specified ML Repo.
|
|
1255
|
+
Accepted URI formats include `s3://integration-bucket-name/prefix/path/to/model` or `gs://integration-bucket-name/prefix/path/to/model`.
|
|
1256
|
+
If the URI points to a model in a different storage integration, an error will be raised indicating "Invalid source URI."
|
|
1257
|
+
|
|
1258
|
+
framework (Optional[Union[ModelFramework, ModelFrameworkType]]): Framework used for model serialization.
|
|
1259
|
+
Supported frameworks values (ModelFrameworkType) can be imported from `from truefoundry.ml import *`.
|
|
1260
|
+
Supported frameworks can be found in `truefoundry.ml.enums.ModelFramework`.
|
|
1261
|
+
Can also be `None` if the framework is not known or not supported.
|
|
1262
|
+
**Deprecated**: Prefer `ModelFrameworkType` over `enums.ModelFramework`.
|
|
1263
|
+
|
|
1235
1264
|
additional_files (Sequence[Tuple[Union[str, Path], Optional[str]]], optional): A list of pairs
|
|
1236
1265
|
of (source path, destination path) to add additional files and folders
|
|
1237
1266
|
to the model version contents. The first member of the pair should be a file or directory path
|
|
@@ -1245,10 +1274,12 @@ class MlFoundry:
|
|
|
1245
1274
|
You can also add additional files to model/ subdirectory by specifying the destination path as model/
|
|
1246
1275
|
|
|
1247
1276
|
```python
|
|
1277
|
+
from truefoundry.ml import TensorFlowFramework
|
|
1278
|
+
|
|
1248
1279
|
run.log_model(
|
|
1249
1280
|
name="xyz",
|
|
1250
1281
|
model_file_or_folder="clf.joblib",
|
|
1251
|
-
framework=
|
|
1282
|
+
framework=TensorFlowFramework(),
|
|
1252
1283
|
additional_files=[("foo.txt", "foo/bar/foo.txt"), ("tokenizer/", "foo/tokenizer/")]
|
|
1253
1284
|
)
|
|
1254
1285
|
```
|
|
@@ -1281,8 +1312,7 @@ class MlFoundry:
|
|
|
1281
1312
|
### Sklearn
|
|
1282
1313
|
|
|
1283
1314
|
```python
|
|
1284
|
-
from truefoundry.ml import get_client
|
|
1285
|
-
from truefoundry.ml.enums import ModelFramework
|
|
1315
|
+
from truefoundry.ml import get_client, SklearnFramework
|
|
1286
1316
|
|
|
1287
1317
|
import joblib
|
|
1288
1318
|
import numpy as np
|
|
@@ -1307,7 +1337,7 @@ class MlFoundry:
|
|
|
1307
1337
|
ml_repo="my-classification-project",
|
|
1308
1338
|
name="my-sklearn-model",
|
|
1309
1339
|
model_file_or_folder="sklearn-pipeline.joblib",
|
|
1310
|
-
framework=
|
|
1340
|
+
framework=SklearnFramework(),
|
|
1311
1341
|
metadata={"accuracy": 0.99, "f1": 0.80},
|
|
1312
1342
|
step=1, # step number, useful when using iterative algorithms like SGD
|
|
1313
1343
|
)
|
|
@@ -1317,8 +1347,7 @@ class MlFoundry:
|
|
|
1317
1347
|
### Huggingface Transformers
|
|
1318
1348
|
|
|
1319
1349
|
```python
|
|
1320
|
-
from truefoundry.ml import get_client
|
|
1321
|
-
from truefoundry.ml.enums import ModelFramework
|
|
1350
|
+
from truefoundry.ml import get_client, TransformersFramework, LibraryName
|
|
1322
1351
|
|
|
1323
1352
|
import torch
|
|
1324
1353
|
from transformers import AutoTokenizer, AutoConfig, pipeline, AutoModelForCausalLM
|
|
@@ -1342,7 +1371,7 @@ class MlFoundry:
|
|
|
1342
1371
|
ml_repo="my-llm-project",
|
|
1343
1372
|
name="my-transformers-model",
|
|
1344
1373
|
model_file_or_folder="my-transformers-model/",
|
|
1345
|
-
framework=
|
|
1374
|
+
framework=TransformersFramework(library_name=LibraryName.TRANSFORMERS, pipeline_tag='text-generation')
|
|
1346
1375
|
)
|
|
1347
1376
|
print(model_version.fqn)
|
|
1348
1377
|
```
|
|
@@ -1356,12 +1385,12 @@ class MlFoundry:
|
|
|
1356
1385
|
ml_repo_id=ml_repo_id,
|
|
1357
1386
|
name=name,
|
|
1358
1387
|
model_file_or_folder=model_file_or_folder,
|
|
1359
|
-
framework=framework,
|
|
1360
1388
|
additional_files=additional_files,
|
|
1361
1389
|
description=description,
|
|
1362
1390
|
metadata=metadata,
|
|
1363
1391
|
step=None,
|
|
1364
1392
|
progress=progress,
|
|
1393
|
+
framework=framework,
|
|
1365
1394
|
)
|
|
1366
1395
|
logger.info(f"Logged model successfully with fqn {model_version.fqn!r}")
|
|
1367
1396
|
return model_version
|
truefoundry/ml/mlfoundry_run.py
CHANGED
|
@@ -20,7 +20,7 @@ from urllib.parse import urljoin, urlsplit
|
|
|
20
20
|
|
|
21
21
|
from truefoundry import version
|
|
22
22
|
from truefoundry.common.utils import relogin_error_message
|
|
23
|
-
from truefoundry.ml import constants
|
|
23
|
+
from truefoundry.ml import constants
|
|
24
24
|
from truefoundry.ml.autogen.client import ( # type: ignore[attr-defined]
|
|
25
25
|
ArtifactType,
|
|
26
26
|
DeleteRunRequest,
|
|
@@ -40,13 +40,17 @@ from truefoundry.ml.autogen.client import ( # type: ignore[attr-defined]
|
|
|
40
40
|
UpdateRunRequestDto,
|
|
41
41
|
)
|
|
42
42
|
from truefoundry.ml.entities import Metric
|
|
43
|
-
from truefoundry.ml.enums import RunStatus
|
|
43
|
+
from truefoundry.ml.enums import ModelFramework, RunStatus
|
|
44
44
|
from truefoundry.ml.exceptions import MlFoundryException
|
|
45
45
|
from truefoundry.ml.internal_namespace import NAMESPACE
|
|
46
46
|
from truefoundry.ml.log_types import Image, Plot
|
|
47
47
|
from truefoundry.ml.log_types.artifacts.artifact import ArtifactPath, ArtifactVersion
|
|
48
48
|
from truefoundry.ml.log_types.artifacts.general_artifact import _log_artifact_version
|
|
49
|
-
from truefoundry.ml.log_types.artifacts.model import
|
|
49
|
+
from truefoundry.ml.log_types.artifacts.model import (
|
|
50
|
+
BlobStorageModelDirectory,
|
|
51
|
+
ModelVersion,
|
|
52
|
+
_log_model_version,
|
|
53
|
+
)
|
|
50
54
|
from truefoundry.ml.logger import logger
|
|
51
55
|
from truefoundry.ml.run_utils import ParamsType, flatten_dict, process_params
|
|
52
56
|
from truefoundry.ml.session import ACTIVE_RUNS, _get_api_client, get_active_session
|
|
@@ -61,6 +65,8 @@ if TYPE_CHECKING:
|
|
|
61
65
|
import matplotlib
|
|
62
66
|
import plotly
|
|
63
67
|
|
|
68
|
+
from truefoundry.ml import ModelFrameworkType
|
|
69
|
+
|
|
64
70
|
|
|
65
71
|
def _ensure_not_deleted(method):
|
|
66
72
|
@functools.wraps(method)
|
|
@@ -920,13 +926,13 @@ class MlFoundryRun:
|
|
|
920
926
|
self,
|
|
921
927
|
*,
|
|
922
928
|
name: str,
|
|
923
|
-
model_file_or_folder: str,
|
|
924
|
-
framework: Optional[Union[enums.ModelFramework, str]],
|
|
929
|
+
model_file_or_folder: Union[str, BlobStorageModelDirectory],
|
|
925
930
|
additional_files: Sequence[Tuple[Union[str, Path], Optional[str]]] = (),
|
|
926
931
|
description: Optional[str] = None,
|
|
927
932
|
metadata: Optional[Dict[str, Any]] = None,
|
|
928
933
|
step: int = 0,
|
|
929
934
|
progress: Optional[bool] = None,
|
|
935
|
+
framework: Optional[Union[str, ModelFramework, "ModelFrameworkType"]] = None,
|
|
930
936
|
) -> ModelVersion:
|
|
931
937
|
# TODO (chiragjn): Document mapping of framework to list of valid model save kwargs
|
|
932
938
|
# TODO (chiragjn): Add more examples
|
|
@@ -939,12 +945,21 @@ class MlFoundryRun:
|
|
|
939
945
|
name (str): Name of the model. If a model with this name already exists under the current ML Repo,
|
|
940
946
|
the logged model will be added as a new version under that `name`. If no models exist with the given
|
|
941
947
|
`name`, the given model will be logged as version 1.
|
|
942
|
-
model_file_or_folder (str):
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
+
model_file_or_folder (Union[str, BlobStorageModelDirectory]):
|
|
949
|
+
str:
|
|
950
|
+
Path to either a single file or a folder containing model files.
|
|
951
|
+
This folder is typically created using serialization methods from libraries or frameworks,
|
|
952
|
+
e.g., `joblib.dump`, `model.save_pretrained(...)`, `torch.save(...)`, or `model.save(...)`.
|
|
953
|
+
BlobStorageModelDirectory:
|
|
954
|
+
uri (str): URI to the model file or folder in a storage integration associated with the specified ML Repo.
|
|
955
|
+
The model files or folder must reside within the same storage integration as the specified ML Repo.
|
|
956
|
+
Accepted URI formats include `s3://integration-bucket-name/prefix/path/to/model` or `gs://integration-bucket-name/prefix/path/to/model`.
|
|
957
|
+
If the URI points to a model in a different storage integration, an error will be raised indicating "Invalid source URI."
|
|
958
|
+
framework (Optional[Union[ModelFramework, ModelFrameworkType]]): Framework used for model serialization.
|
|
959
|
+
Supported frameworks values (ModelFrameworkType) can be imported from `from truefoundry.ml import *`.
|
|
960
|
+
Supported frameworks can be found in `truefoundry.ml.enums.ModelFramework`.
|
|
961
|
+
Can also be `None` if the framework is not known or not supported.
|
|
962
|
+
**Deprecated**: Prefer `ModelFrameworkType` over `enums.ModelFramework`.
|
|
948
963
|
additional_files (Sequence[Tuple[Union[str, Path], Optional[str]]], optional): A list of pairs
|
|
949
964
|
of (source path, destination path) to add additional files and folders
|
|
950
965
|
to the model version contents. The first member of the pair should be a file or directory path
|
|
@@ -1064,12 +1079,12 @@ class MlFoundryRun:
|
|
|
1064
1079
|
run=self,
|
|
1065
1080
|
name=name,
|
|
1066
1081
|
model_file_or_folder=model_file_or_folder,
|
|
1067
|
-
framework=framework,
|
|
1068
1082
|
additional_files=additional_files,
|
|
1069
1083
|
description=description,
|
|
1070
1084
|
metadata=metadata,
|
|
1071
1085
|
step=step,
|
|
1072
1086
|
progress=progress,
|
|
1087
|
+
framework=framework,
|
|
1073
1088
|
)
|
|
1074
1089
|
logger.info(f"Logged model successfully with fqn {model_version.fqn!r}")
|
|
1075
1090
|
return model_version
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import Literal, Optional, Union, get_args
|
|
3
|
+
|
|
4
|
+
from truefoundry.ml import ModelFramework
|
|
5
|
+
from truefoundry.ml.autogen.entities import artifacts as autogen_artifacts
|
|
6
|
+
from truefoundry.pydantic_v1 import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FastAIFramework(autogen_artifacts.FastAIFramework):
|
|
10
|
+
"""FastAI model Framework"""
|
|
11
|
+
|
|
12
|
+
type: Literal["fastai"] = "fastai"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class GluonFramework(autogen_artifacts.GluonFramework):
|
|
16
|
+
"""Gluon model Framework"""
|
|
17
|
+
|
|
18
|
+
type: Literal["gluon"] = "gluon"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class H2OFramework(autogen_artifacts.H2OFramework):
|
|
22
|
+
"""H2O model Framework"""
|
|
23
|
+
|
|
24
|
+
type: Literal["h2o"] = "h2o"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class KerasFramework(autogen_artifacts.KerasFramework):
|
|
28
|
+
"""Keras model Framework"""
|
|
29
|
+
|
|
30
|
+
type: Literal["keras"] = "keras"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class LightGBMFramework(autogen_artifacts.LightGBMFramework):
|
|
34
|
+
"""LightGBM model Framework"""
|
|
35
|
+
|
|
36
|
+
type: Literal["lightgbm"] = "lightgbm"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ONNXFramework(autogen_artifacts.ONNXFramework):
|
|
40
|
+
"""ONNX model Framework"""
|
|
41
|
+
|
|
42
|
+
type: Literal["onnx"] = "onnx"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class PaddleFramework(autogen_artifacts.PaddleFramework):
|
|
46
|
+
"""Paddle model Framework"""
|
|
47
|
+
|
|
48
|
+
type: Literal["paddle"] = "paddle"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class PyTorchFramework(autogen_artifacts.PyTorchFramework):
|
|
52
|
+
"""PyTorch model Framework"""
|
|
53
|
+
|
|
54
|
+
type: Literal["pytorch"] = "pytorch"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class SklearnFramework(autogen_artifacts.SklearnFramework):
|
|
58
|
+
"""Sklearn model Framework"""
|
|
59
|
+
|
|
60
|
+
type: Literal["sklearn"] = "sklearn"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class SpaCyFramework(autogen_artifacts.SpaCyFramework):
|
|
64
|
+
"""SpaCy model Framework"""
|
|
65
|
+
|
|
66
|
+
type: Literal["spacy"] = "spacy"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class StatsModelsFramework(autogen_artifacts.StatsModelsFramework):
|
|
70
|
+
"""StatsModels model Framework"""
|
|
71
|
+
|
|
72
|
+
type: Literal["statsmodels"] = "statsmodels"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class TensorFlowFramework(autogen_artifacts.TensorFlowFramework):
|
|
76
|
+
"""TensorFlow model Framework"""
|
|
77
|
+
|
|
78
|
+
type: Literal["tensorflow"] = "tensorflow"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class TransformersFramework(autogen_artifacts.TransformersFramework):
|
|
82
|
+
"""Transformers model Framework"""
|
|
83
|
+
|
|
84
|
+
type: Literal["transformers"] = "transformers"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class XGBoostFramework(autogen_artifacts.XGBoostFramework):
|
|
88
|
+
"""XGBoost model Framework"""
|
|
89
|
+
|
|
90
|
+
type: Literal["xgboost"] = "xgboost"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# Union of all the model frameworks
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
ModelFrameworkType = Union[
|
|
97
|
+
FastAIFramework,
|
|
98
|
+
GluonFramework,
|
|
99
|
+
H2OFramework,
|
|
100
|
+
KerasFramework,
|
|
101
|
+
LightGBMFramework,
|
|
102
|
+
ONNXFramework,
|
|
103
|
+
PaddleFramework,
|
|
104
|
+
PyTorchFramework,
|
|
105
|
+
SklearnFramework,
|
|
106
|
+
SpaCyFramework,
|
|
107
|
+
StatsModelsFramework,
|
|
108
|
+
TensorFlowFramework,
|
|
109
|
+
TransformersFramework,
|
|
110
|
+
XGBoostFramework,
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class _ModelFramework(BaseModel):
|
|
115
|
+
__root__: ModelFrameworkType = Field(discriminator="type")
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def to_model_framework_type(
|
|
119
|
+
cls,
|
|
120
|
+
framework: Optional[Union[str, ModelFramework, "ModelFrameworkType"]] = None,
|
|
121
|
+
) -> Optional["ModelFrameworkType"]:
|
|
122
|
+
"""
|
|
123
|
+
Converts a ModelFramework or string representation to a ModelFrameworkType object.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
framework (Optional[Union[str, ModelFramework, ModelFrameworkType]]): ModelFrameworkType or equivalent input.
|
|
127
|
+
Supported frameworks can be found in `truefoundry.ml.enums.ModelFramework`.
|
|
128
|
+
May be `None` if the framework is unknown or unsupported.
|
|
129
|
+
**Deprecated**: Prefer passing a `ModelFrameworkType` instance.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
ModelFrameworkType corresponding to the input, or None if the input is None.
|
|
133
|
+
"""
|
|
134
|
+
if framework is None:
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
# Issue a deprecation warning for str and ModelFramework types
|
|
138
|
+
if isinstance(framework, (str, ModelFramework)):
|
|
139
|
+
warnings.warn(
|
|
140
|
+
"Passing a string or ModelFramework Enum is deprecated. Please use a ModelFrameworkType object.",
|
|
141
|
+
DeprecationWarning,
|
|
142
|
+
stacklevel=2,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Convert string to ModelFramework
|
|
146
|
+
if isinstance(framework, str):
|
|
147
|
+
framework = ModelFramework(framework)
|
|
148
|
+
|
|
149
|
+
# Convert ModelFramework to ModelFrameworkType
|
|
150
|
+
if isinstance(framework, ModelFramework):
|
|
151
|
+
if framework == ModelFramework.UNKNOWN:
|
|
152
|
+
return None
|
|
153
|
+
return cls.parse_obj({"type": framework.value}).__root__
|
|
154
|
+
|
|
155
|
+
# Directly return if already a ModelFrameworkType
|
|
156
|
+
if isinstance(framework, get_args(ModelFrameworkType)):
|
|
157
|
+
return framework
|
|
158
|
+
|
|
159
|
+
raise ValueError(
|
|
160
|
+
"framework must be a string, ModelFramework enum, or ModelFrameworkType object"
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def from_dict(cls, obj: dict) -> ModelFrameworkType:
|
|
165
|
+
"""Create an instance of ModelFramework from a dict"""
|
|
166
|
+
if obj is None:
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
return cls.parse_obj(obj).__root__
|
truefoundry/workflow/__init__.py
CHANGED
|
@@ -6,9 +6,9 @@ except ImportError:
|
|
|
6
6
|
|
|
7
7
|
from flytekit import conditional
|
|
8
8
|
from flytekit.types.directory import FlyteDirectory
|
|
9
|
+
from flytekit.types.file import FlyteFile
|
|
9
10
|
|
|
10
11
|
from truefoundry.common.constants import ENV_VARS
|
|
11
|
-
from truefoundry.common.tfy_signed_url_fs import SignedURLFileSystem
|
|
12
12
|
from truefoundry.deploy.v2.lib.patched_models import (
|
|
13
13
|
ContainerTaskConfig,
|
|
14
14
|
PythonTaskConfig,
|
|
@@ -18,6 +18,7 @@ from truefoundry.deploy.v2.lib.patched_models import (
|
|
|
18
18
|
from truefoundry.workflow.container_task import ContainerTask
|
|
19
19
|
from truefoundry.workflow.map_task import map_task
|
|
20
20
|
from truefoundry.workflow.python_task import PythonFunctionTask
|
|
21
|
+
from truefoundry.workflow.remote_filesystem.tfy_signed_url_fs import SignedURLFileSystem
|
|
21
22
|
from truefoundry.workflow.task import task
|
|
22
23
|
from truefoundry.workflow.workflow import ExecutionConfig, workflow
|
|
23
24
|
|
|
@@ -34,6 +35,7 @@ __all__ = [
|
|
|
34
35
|
"ContainerTaskConfig",
|
|
35
36
|
"PythonTaskConfig",
|
|
36
37
|
"ExecutionConfig",
|
|
38
|
+
"FlyteFile",
|
|
37
39
|
]
|
|
38
40
|
|
|
39
41
|
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from functools import wraps
|
|
4
|
+
from timeit import default_timer
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger("truefoundry.workflow.remote_filesystem")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def init_logger(level=logging.WARNING):
|
|
10
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
11
|
+
handler.setLevel(level)
|
|
12
|
+
formatter = logging.Formatter(
|
|
13
|
+
"[%(name)s] %(asctime)s %(levelname)s %(message)s",
|
|
14
|
+
datefmt="%Y-%m-%dT%H:%M:%S%z",
|
|
15
|
+
)
|
|
16
|
+
handler.setFormatter(formatter)
|
|
17
|
+
logger.addHandler(handler)
|
|
18
|
+
logger.setLevel(logging.DEBUG)
|
|
19
|
+
logger.propagate = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def log_time(prefix: str = ""):
|
|
23
|
+
"""Decorator to log the time taken by I/O operations."""
|
|
24
|
+
|
|
25
|
+
def decorator(func):
|
|
26
|
+
@wraps(func)
|
|
27
|
+
def wrapper(*args, **kwargs):
|
|
28
|
+
start_time = default_timer()
|
|
29
|
+
result = func(*args, **kwargs)
|
|
30
|
+
elapsed_time = default_timer() - start_time
|
|
31
|
+
logger.info(f"{prefix}{func.__name__} took {elapsed_time:.2f} seconds")
|
|
32
|
+
return result
|
|
33
|
+
|
|
34
|
+
return wrapper
|
|
35
|
+
|
|
36
|
+
return decorator
|
|
@@ -11,9 +11,8 @@ from truefoundry.common.constants import (
|
|
|
11
11
|
TFY_INTERNAL_SIGNED_URL_SERVER_TOKEN_ENV_KEY,
|
|
12
12
|
)
|
|
13
13
|
from truefoundry.common.request_utils import requests_retry_session
|
|
14
|
-
from truefoundry.common.utils import log_time
|
|
15
|
-
from truefoundry.logger import logger
|
|
16
14
|
from truefoundry.pydantic_v1 import BaseModel, Field
|
|
15
|
+
from truefoundry.workflow.remote_filesystem.logger import log_time, logger
|
|
17
16
|
|
|
18
17
|
LOG_PREFIX = "[tfy][fs]"
|
|
19
18
|
DEFAULT_TTL = ENV_VARS.TFY_INTERNAL_SIGNED_URL_SERVER_DEFAULT_TTL
|