truefoundry 0.3.3__py3-none-any.whl → 0.4.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truefoundry might be problematic. Click here for more details.
- truefoundry/cli/__main__.py +3 -17
- truefoundry/common/__init__.py +0 -0
- truefoundry/common/request_utils.py +56 -0
- truefoundry/deploy/cli/cli.py +1 -1
- truefoundry/deploy/cli/util.py +3 -1
- truefoundry/deploy/lib/auth/credential_provider.py +2 -12
- truefoundry/deploy/lib/clients/servicefoundry_client.py +0 -9
- truefoundry/deploy/lib/exceptions.py +1 -6
- truefoundry/deploy/lib/session.py +1 -16
- truefoundry/langchain/truefoundry_chat.py +1 -1
- truefoundry/langchain/truefoundry_embeddings.py +1 -1
- truefoundry/langchain/truefoundry_llm.py +1 -1
- truefoundry/langchain/utils.py +0 -41
- truefoundry/ml/__init__.py +46 -6
- truefoundry/ml/artifact/__init__.py +0 -0
- truefoundry/ml/artifact/truefoundry_artifact_repo.py +1120 -0
- truefoundry/ml/autogen/__init__.py +0 -0
- truefoundry/ml/autogen/client/__init__.py +373 -0
- truefoundry/ml/autogen/client/api/__init__.py +16 -0
- truefoundry/ml/autogen/client/api/auth_api.py +184 -0
- truefoundry/ml/autogen/client/api/deprecated_api.py +605 -0
- truefoundry/ml/autogen/client/api/experiments_api.py +2109 -0
- truefoundry/ml/autogen/client/api/health_api.py +299 -0
- truefoundry/ml/autogen/client/api/metrics_api.py +371 -0
- truefoundry/ml/autogen/client/api/mlfoundry_artifacts_api.py +7213 -0
- truefoundry/ml/autogen/client/api/python_deployment_config_api.py +201 -0
- truefoundry/ml/autogen/client/api/run_artifacts_api.py +231 -0
- truefoundry/ml/autogen/client/api/runs_api.py +2919 -0
- truefoundry/ml/autogen/client/api_client.py +822 -0
- truefoundry/ml/autogen/client/api_response.py +30 -0
- truefoundry/ml/autogen/client/configuration.py +489 -0
- truefoundry/ml/autogen/client/exceptions.py +161 -0
- truefoundry/ml/autogen/client/models/__init__.py +344 -0
- truefoundry/ml/autogen/client/models/add_custom_metrics_to_model_version_request_dto.py +69 -0
- truefoundry/ml/autogen/client/models/add_features_to_model_version_request_dto.py +83 -0
- truefoundry/ml/autogen/client/models/agent.py +125 -0
- truefoundry/ml/autogen/client/models/agent_app.py +118 -0
- truefoundry/ml/autogen/client/models/agent_open_api_tool.py +143 -0
- truefoundry/ml/autogen/client/models/agent_open_api_tool_with_fqn.py +144 -0
- truefoundry/ml/autogen/client/models/agent_with_fqn.py +127 -0
- truefoundry/ml/autogen/client/models/artifact_dto.py +115 -0
- truefoundry/ml/autogen/client/models/artifact_response_dto.py +75 -0
- truefoundry/ml/autogen/client/models/artifact_type.py +39 -0
- truefoundry/ml/autogen/client/models/artifact_version_dto.py +141 -0
- truefoundry/ml/autogen/client/models/artifact_version_response_dto.py +77 -0
- truefoundry/ml/autogen/client/models/artifact_version_status.py +35 -0
- truefoundry/ml/autogen/client/models/assistant_message.py +89 -0
- truefoundry/ml/autogen/client/models/authorize_user_for_model_request_dto.py +69 -0
- truefoundry/ml/autogen/client/models/authorize_user_for_model_version_request_dto.py +69 -0
- truefoundry/ml/autogen/client/models/backfill_default_storage_integration_id_request_dto.py +67 -0
- truefoundry/ml/autogen/client/models/blob_storage_reference.py +93 -0
- truefoundry/ml/autogen/client/models/body_get_search_runs_get.py +72 -0
- truefoundry/ml/autogen/client/models/chat_prompt.py +156 -0
- truefoundry/ml/autogen/client/models/chat_prompt_messages_inner.py +171 -0
- truefoundry/ml/autogen/client/models/columns_dto.py +73 -0
- truefoundry/ml/autogen/client/models/content.py +153 -0
- truefoundry/ml/autogen/client/models/content1.py +153 -0
- truefoundry/ml/autogen/client/models/content2.py +174 -0
- truefoundry/ml/autogen/client/models/content2_any_of_inner.py +150 -0
- truefoundry/ml/autogen/client/models/create_artifact_request_dto.py +74 -0
- truefoundry/ml/autogen/client/models/create_artifact_response_dto.py +66 -0
- truefoundry/ml/autogen/client/models/create_artifact_version_request_dto.py +74 -0
- truefoundry/ml/autogen/client/models/create_artifact_version_response_dto.py +66 -0
- truefoundry/ml/autogen/client/models/create_dataset_request_dto.py +76 -0
- truefoundry/ml/autogen/client/models/create_experiment_request_dto.py +94 -0
- truefoundry/ml/autogen/client/models/create_experiment_response_dto.py +67 -0
- truefoundry/ml/autogen/client/models/create_model_version_request_dto.py +95 -0
- truefoundry/ml/autogen/client/models/create_multi_part_upload_for_dataset_request_dto.py +73 -0
- truefoundry/ml/autogen/client/models/create_multi_part_upload_for_dataset_response_dto.py +79 -0
- truefoundry/ml/autogen/client/models/create_multi_part_upload_request_dto.py +73 -0
- truefoundry/ml/autogen/client/models/create_python_deployment_config_request_dto.py +72 -0
- truefoundry/ml/autogen/client/models/create_python_deployment_config_response_dto.py +68 -0
- truefoundry/ml/autogen/client/models/create_run_request_dto.py +97 -0
- truefoundry/ml/autogen/client/models/create_run_response_dto.py +76 -0
- truefoundry/ml/autogen/client/models/dataset_dto.py +112 -0
- truefoundry/ml/autogen/client/models/dataset_response_dto.py +75 -0
- truefoundry/ml/autogen/client/models/delete_artifact_versions_request_dto.py +65 -0
- truefoundry/ml/autogen/client/models/delete_dataset_request_dto.py +74 -0
- truefoundry/ml/autogen/client/models/delete_model_version_request_dto.py +65 -0
- truefoundry/ml/autogen/client/models/delete_run_request.py +65 -0
- truefoundry/ml/autogen/client/models/delete_tag_request_dto.py +68 -0
- truefoundry/ml/autogen/client/models/experiment_dto.py +127 -0
- truefoundry/ml/autogen/client/models/experiment_id_request_dto.py +67 -0
- truefoundry/ml/autogen/client/models/experiment_response_dto.py +75 -0
- truefoundry/ml/autogen/client/models/experiment_tag_dto.py +69 -0
- truefoundry/ml/autogen/client/models/feature_dto.py +68 -0
- truefoundry/ml/autogen/client/models/feature_value_type.py +35 -0
- truefoundry/ml/autogen/client/models/file_info_dto.py +76 -0
- truefoundry/ml/autogen/client/models/finalize_artifact_version_request_dto.py +101 -0
- truefoundry/ml/autogen/client/models/get_experiment_response_dto.py +88 -0
- truefoundry/ml/autogen/client/models/get_latest_run_log_response_dto.py +76 -0
- truefoundry/ml/autogen/client/models/get_metric_history_response.py +79 -0
- truefoundry/ml/autogen/client/models/get_signed_url_for_dataset_write_request_dto.py +68 -0
- truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_read_request_dto.py +68 -0
- truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_read_response_dto.py +81 -0
- truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_write_request_dto.py +69 -0
- truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_write_response_dto.py +83 -0
- truefoundry/ml/autogen/client/models/get_signed_urls_for_dataset_read_request_dto.py +68 -0
- truefoundry/ml/autogen/client/models/get_signed_urls_for_dataset_read_response_dto.py +81 -0
- truefoundry/ml/autogen/client/models/get_signed_urls_for_dataset_write_response_dto.py +81 -0
- truefoundry/ml/autogen/client/models/get_tenant_id_response_dto.py +74 -0
- truefoundry/ml/autogen/client/models/http_validation_error.py +82 -0
- truefoundry/ml/autogen/client/models/image_content_part.py +87 -0
- truefoundry/ml/autogen/client/models/image_url.py +75 -0
- truefoundry/ml/autogen/client/models/internal_metadata.py +180 -0
- truefoundry/ml/autogen/client/models/latest_run_log_dto.py +78 -0
- truefoundry/ml/autogen/client/models/list_artifact_versions_request_dto.py +107 -0
- truefoundry/ml/autogen/client/models/list_artifact_versions_response_dto.py +87 -0
- truefoundry/ml/autogen/client/models/list_artifacts_request_dto.py +96 -0
- truefoundry/ml/autogen/client/models/list_artifacts_response_dto.py +86 -0
- truefoundry/ml/autogen/client/models/list_colums_response_dto.py +75 -0
- truefoundry/ml/autogen/client/models/list_datasets_request_dto.py +78 -0
- truefoundry/ml/autogen/client/models/list_datasets_response_dto.py +86 -0
- truefoundry/ml/autogen/client/models/list_experiments_response_dto.py +86 -0
- truefoundry/ml/autogen/client/models/list_files_for_artifact_version_request_dto.py +76 -0
- truefoundry/ml/autogen/client/models/list_files_for_artifact_versions_response_dto.py +82 -0
- truefoundry/ml/autogen/client/models/list_files_for_dataset_request_dto.py +76 -0
- truefoundry/ml/autogen/client/models/list_files_for_dataset_response_dto.py +82 -0
- truefoundry/ml/autogen/client/models/list_latest_run_logs_response_dto.py +82 -0
- truefoundry/ml/autogen/client/models/list_metric_history_request_dto.py +69 -0
- truefoundry/ml/autogen/client/models/list_metric_history_response_dto.py +84 -0
- truefoundry/ml/autogen/client/models/list_model_version_response_dto.py +87 -0
- truefoundry/ml/autogen/client/models/list_model_versions_request_dto.py +93 -0
- truefoundry/ml/autogen/client/models/list_models_request_dto.py +89 -0
- truefoundry/ml/autogen/client/models/list_models_response_dto.py +84 -0
- truefoundry/ml/autogen/client/models/list_run_artifacts_response_dto.py +84 -0
- truefoundry/ml/autogen/client/models/list_run_logs_response_dto.py +82 -0
- truefoundry/ml/autogen/client/models/list_seed_experiments_response_dto.py +81 -0
- truefoundry/ml/autogen/client/models/log_batch_request_dto.py +106 -0
- truefoundry/ml/autogen/client/models/log_metric_request_dto.py +80 -0
- truefoundry/ml/autogen/client/models/log_param_request_dto.py +76 -0
- truefoundry/ml/autogen/client/models/method.py +37 -0
- truefoundry/ml/autogen/client/models/metric_collection_dto.py +82 -0
- truefoundry/ml/autogen/client/models/metric_dto.py +76 -0
- truefoundry/ml/autogen/client/models/mime_type.py +37 -0
- truefoundry/ml/autogen/client/models/model_configuration.py +103 -0
- truefoundry/ml/autogen/client/models/model_dto.py +122 -0
- truefoundry/ml/autogen/client/models/model_response_dto.py +75 -0
- truefoundry/ml/autogen/client/models/model_schema_dto.py +85 -0
- truefoundry/ml/autogen/client/models/model_version_dto.py +163 -0
- truefoundry/ml/autogen/client/models/model_version_response_dto.py +75 -0
- truefoundry/ml/autogen/client/models/multi_part_upload_dto.py +107 -0
- truefoundry/ml/autogen/client/models/multi_part_upload_response_dto.py +79 -0
- truefoundry/ml/autogen/client/models/multi_part_upload_storage_provider.py +34 -0
- truefoundry/ml/autogen/client/models/notify_artifact_version_failure_dto.py +65 -0
- truefoundry/ml/autogen/client/models/openapi_spec.py +152 -0
- truefoundry/ml/autogen/client/models/param_dto.py +66 -0
- truefoundry/ml/autogen/client/models/parameters.py +84 -0
- truefoundry/ml/autogen/client/models/prediction_type.py +34 -0
- truefoundry/ml/autogen/client/models/resolve_agent_app_response_dto.py +75 -0
- truefoundry/ml/autogen/client/models/restore_run_request_dto.py +65 -0
- truefoundry/ml/autogen/client/models/run_data_dto.py +104 -0
- truefoundry/ml/autogen/client/models/run_dto.py +84 -0
- truefoundry/ml/autogen/client/models/run_info_dto.py +105 -0
- truefoundry/ml/autogen/client/models/run_log_dto.py +90 -0
- truefoundry/ml/autogen/client/models/run_log_input_dto.py +80 -0
- truefoundry/ml/autogen/client/models/run_response_dto.py +75 -0
- truefoundry/ml/autogen/client/models/run_tag_dto.py +66 -0
- truefoundry/ml/autogen/client/models/search_runs_request_dto.py +94 -0
- truefoundry/ml/autogen/client/models/search_runs_response_dto.py +84 -0
- truefoundry/ml/autogen/client/models/set_experiment_tag_request_dto.py +73 -0
- truefoundry/ml/autogen/client/models/set_tag_request_dto.py +76 -0
- truefoundry/ml/autogen/client/models/signed_url_dto.py +69 -0
- truefoundry/ml/autogen/client/models/stop.py +152 -0
- truefoundry/ml/autogen/client/models/store_run_logs_request_dto.py +83 -0
- truefoundry/ml/autogen/client/models/system_message.py +89 -0
- truefoundry/ml/autogen/client/models/text.py +153 -0
- truefoundry/ml/autogen/client/models/text_content_part.py +84 -0
- truefoundry/ml/autogen/client/models/update_artifact_version_request_dto.py +74 -0
- truefoundry/ml/autogen/client/models/update_dataset_request_dto.py +74 -0
- truefoundry/ml/autogen/client/models/update_experiment_request_dto.py +74 -0
- truefoundry/ml/autogen/client/models/update_model_version_request_dto.py +93 -0
- truefoundry/ml/autogen/client/models/update_run_request_dto.py +78 -0
- truefoundry/ml/autogen/client/models/update_run_response_dto.py +76 -0
- truefoundry/ml/autogen/client/models/url.py +153 -0
- truefoundry/ml/autogen/client/models/user_message.py +89 -0
- truefoundry/ml/autogen/client/models/validation_error.py +87 -0
- truefoundry/ml/autogen/client/models/validation_error_loc_inner.py +154 -0
- truefoundry/ml/autogen/client/rest.py +426 -0
- truefoundry/ml/autogen/client_README.md +322 -0
- truefoundry/ml/cli/__init__.py +0 -0
- truefoundry/ml/cli/cli.py +18 -0
- truefoundry/ml/cli/commands/__init__.py +3 -0
- truefoundry/ml/cli/commands/download.py +87 -0
- truefoundry/ml/constants.py +84 -0
- truefoundry/ml/enums.py +70 -0
- truefoundry/ml/env_vars.py +13 -0
- truefoundry/ml/exceptions.py +8 -0
- truefoundry/ml/git_info.py +60 -0
- truefoundry/ml/internal_namespace.py +52 -0
- truefoundry/ml/log_types/__init__.py +4 -0
- truefoundry/ml/log_types/artifacts/artifact.py +427 -0
- truefoundry/ml/log_types/artifacts/constants.py +33 -0
- truefoundry/ml/log_types/artifacts/dataset.py +383 -0
- truefoundry/ml/log_types/artifacts/general_artifact.py +110 -0
- truefoundry/ml/log_types/artifacts/model.py +628 -0
- truefoundry/ml/log_types/artifacts/model_extras.py +48 -0
- truefoundry/ml/log_types/artifacts/utils.py +161 -0
- truefoundry/ml/log_types/image/__init__.py +3 -0
- truefoundry/ml/log_types/image/constants.py +8 -0
- truefoundry/ml/log_types/image/image.py +358 -0
- truefoundry/ml/log_types/image/image_normalizer.py +101 -0
- truefoundry/ml/log_types/image/types.py +68 -0
- truefoundry/ml/log_types/plot.py +281 -0
- truefoundry/ml/log_types/pydantic_base.py +10 -0
- truefoundry/ml/log_types/utils.py +12 -0
- truefoundry/ml/logger.py +17 -0
- truefoundry/ml/login.py +241 -0
- truefoundry/ml/mlfoundry_api.py +1620 -0
- truefoundry/ml/mlfoundry_run.py +1238 -0
- truefoundry/ml/run_utils.py +102 -0
- truefoundry/ml/services/__init__.py +0 -0
- truefoundry/ml/services/auth_service.py +109 -0
- truefoundry/ml/services/entities.py +108 -0
- truefoundry/ml/services/servicefoundry_service.py +35 -0
- truefoundry/ml/services/utils.py +122 -0
- truefoundry/ml/session.py +271 -0
- truefoundry/ml/validation_utils.py +346 -0
- truefoundry/pydantic_v1.py +5 -1
- {truefoundry-0.3.3.dist-info → truefoundry-0.4.0.dev1.dist-info}/METADATA +18 -11
- truefoundry-0.4.0.dev1.dist-info/RECORD +342 -0
- truefoundry-0.3.3.dist-info/RECORD +0 -136
- /truefoundry/{python_deploy_codegen.py → deploy/python_deploy_codegen.py} +0 -0
- {truefoundry-0.3.3.dist-info → truefoundry-0.4.0.dev1.dist-info}/WHEEL +0 -0
- {truefoundry-0.3.3.dist-info → truefoundry-0.4.0.dev1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,1620 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
import uuid
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
|
|
6
|
+
|
|
7
|
+
import coolname
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from truefoundry.ml import constants, env_vars
|
|
11
|
+
from truefoundry.ml.autogen.client import ( # type: ignore[attr-defined]
|
|
12
|
+
ArtifactDto,
|
|
13
|
+
ArtifactType,
|
|
14
|
+
CreateDatasetRequestDto,
|
|
15
|
+
CreateExperimentRequestDto,
|
|
16
|
+
CreateRunRequestDto,
|
|
17
|
+
DatasetDto,
|
|
18
|
+
ListArtifactsRequestDto,
|
|
19
|
+
ListArtifactVersionsRequestDto,
|
|
20
|
+
ListDatasetsRequestDto,
|
|
21
|
+
ListModelVersionsRequestDto,
|
|
22
|
+
ModelDto,
|
|
23
|
+
RunTagDto,
|
|
24
|
+
SearchRunsRequestDto,
|
|
25
|
+
)
|
|
26
|
+
from truefoundry.ml.autogen.client.api import ( # type: ignore[attr-defined]
|
|
27
|
+
ExperimentsApi,
|
|
28
|
+
MlfoundryArtifactsApi,
|
|
29
|
+
RunsApi,
|
|
30
|
+
)
|
|
31
|
+
from truefoundry.ml.autogen.client.exceptions import (
|
|
32
|
+
ApiException,
|
|
33
|
+
NotFoundException,
|
|
34
|
+
)
|
|
35
|
+
from truefoundry.ml.enums import ModelFramework, ViewType
|
|
36
|
+
from truefoundry.ml.exceptions import MlFoundryException
|
|
37
|
+
from truefoundry.ml.internal_namespace import NAMESPACE
|
|
38
|
+
from truefoundry.ml.log_types.artifacts.artifact import ArtifactPath, ArtifactVersion
|
|
39
|
+
from truefoundry.ml.log_types.artifacts.dataset import DataDirectory
|
|
40
|
+
from truefoundry.ml.log_types.artifacts.general_artifact import _log_artifact_version
|
|
41
|
+
from truefoundry.ml.log_types.artifacts.model import ModelVersion, _log_model_version
|
|
42
|
+
from truefoundry.ml.log_types.artifacts.model_extras import CustomMetric, ModelSchema
|
|
43
|
+
from truefoundry.ml.logger import logger
|
|
44
|
+
from truefoundry.ml.mlfoundry_run import MlFoundryRun
|
|
45
|
+
from truefoundry.ml.services.servicefoundry_service import ServicefoundryService
|
|
46
|
+
from truefoundry.ml.session import (
|
|
47
|
+
Session,
|
|
48
|
+
_get_api_client,
|
|
49
|
+
get_active_session,
|
|
50
|
+
init_session,
|
|
51
|
+
)
|
|
52
|
+
from truefoundry.ml.validation_utils import (
|
|
53
|
+
_validate_ml_repo_description,
|
|
54
|
+
_validate_ml_repo_name,
|
|
55
|
+
_validate_run_name,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
_SEARCH_MAX_RESULTS_DEFAULT = 1000
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _get_internal_env_vars_values() -> Dict[str, str]:
|
|
62
|
+
env = {}
|
|
63
|
+
for env_var_name in env_vars.INTERNAL_ENV_VARS:
|
|
64
|
+
value = os.getenv(env_var_name)
|
|
65
|
+
if value:
|
|
66
|
+
env[env_var_name] = value
|
|
67
|
+
|
|
68
|
+
return env
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _resolve_version(version: Union[int, str]) -> int:
|
|
72
|
+
if not isinstance(version, int) and not (
|
|
73
|
+
isinstance(version, str) and version.isnumeric()
|
|
74
|
+
):
|
|
75
|
+
raise MlFoundryException(
|
|
76
|
+
f"version must be an integer or string containing numbers only. Got {version!r}"
|
|
77
|
+
)
|
|
78
|
+
final_version = int(version)
|
|
79
|
+
if final_version <= 0:
|
|
80
|
+
raise ValueError("version must be greater than 0")
|
|
81
|
+
return final_version
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class MlFoundry:
|
|
85
|
+
"""MlFoundry."""
|
|
86
|
+
|
|
87
|
+
# TODO (chiragjn): Don't allow session as None here!
|
|
88
|
+
def __init__(self, session: Session):
|
|
89
|
+
"""__init__
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
session (Optional[Session], optional): Session instance to get auth credentials from
|
|
93
|
+
"""
|
|
94
|
+
self._tracking_uri: str = session.tracking_uri
|
|
95
|
+
self._api_client = _get_api_client(session=session)
|
|
96
|
+
self._experiments_api = ExperimentsApi(api_client=self._api_client)
|
|
97
|
+
self._runs_api = RunsApi(api_client=self._api_client)
|
|
98
|
+
self._mlfoundry_artifacts_api = MlfoundryArtifactsApi(
|
|
99
|
+
api_client=self._api_client
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def _get_ml_repo_id(self, ml_repo: str) -> str:
|
|
103
|
+
"""_get_ml_repo_id.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
ml_repo (str): The name of the ML Repo.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
str: The id of the ML Repo.
|
|
110
|
+
"""
|
|
111
|
+
try:
|
|
112
|
+
_ml_repo_instance = self._experiments_api.get_experiment_by_name_get(
|
|
113
|
+
experiment_name=ml_repo
|
|
114
|
+
)
|
|
115
|
+
ml_repo_instance = _ml_repo_instance.experiment
|
|
116
|
+
except NotFoundException:
|
|
117
|
+
err_msg = (
|
|
118
|
+
f"ML Repo Does Not Exist for name: {ml_repo}. You may either "
|
|
119
|
+
"create it from the dashboard or using client.create_ml_repo('<ml_repo_name>')"
|
|
120
|
+
)
|
|
121
|
+
raise MlFoundryException(err_msg) from None
|
|
122
|
+
except ApiException as e:
|
|
123
|
+
err_msg = (
|
|
124
|
+
f"Error happened in getting ML Repo based on name: "
|
|
125
|
+
f"{ml_repo}. Error details: {e}"
|
|
126
|
+
)
|
|
127
|
+
raise MlFoundryException(err_msg) from e
|
|
128
|
+
|
|
129
|
+
assert ml_repo_instance.experiment_id is not None
|
|
130
|
+
return ml_repo_instance.experiment_id
|
|
131
|
+
|
|
132
|
+
def list_ml_repos(self) -> List[str]:
|
|
133
|
+
"""Returns a list of names of ML Repos accessible by the current user.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
List[str]: A list of names of ML Repos
|
|
137
|
+
"""
|
|
138
|
+
# TODO (chiragjn): This API should yield ML Repo Entities instead of just names
|
|
139
|
+
# Kinda useless without it
|
|
140
|
+
ml_repos_names = []
|
|
141
|
+
done, page_token, max_results = False, None, 25
|
|
142
|
+
while not done:
|
|
143
|
+
try:
|
|
144
|
+
_ml_repos = self._experiments_api.list_experiments_get(
|
|
145
|
+
view_type=ViewType.ALL.value,
|
|
146
|
+
max_results=max_results,
|
|
147
|
+
page_token=page_token,
|
|
148
|
+
)
|
|
149
|
+
except ApiException as e:
|
|
150
|
+
err_msg = f"Error happened in fetching ML Repos. Error details: {e}"
|
|
151
|
+
raise MlFoundryException(err_msg) from e
|
|
152
|
+
else:
|
|
153
|
+
ml_repos = _ml_repos.experiments
|
|
154
|
+
page_token = _ml_repos.next_page_token
|
|
155
|
+
for ml_repo in ml_repos:
|
|
156
|
+
# ML Repo with experiment_id 0 represents default ML Repo which we are removing.
|
|
157
|
+
if ml_repo.experiment_id != "0":
|
|
158
|
+
ml_repos_names.append(ml_repo.name)
|
|
159
|
+
if not ml_repos or page_token is None:
|
|
160
|
+
done = True
|
|
161
|
+
return ml_repos_names
|
|
162
|
+
|
|
163
|
+
def create_ml_repo(
|
|
164
|
+
self,
|
|
165
|
+
name: str,
|
|
166
|
+
storage_integration_fqn: str,
|
|
167
|
+
description: Optional[str] = None,
|
|
168
|
+
):
|
|
169
|
+
"""Creates an ML Repository.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
name (str): The name of the Repository you want to create.
|
|
173
|
+
storage_integration_fqn(str): The storage integration FQN to use for the experiment
|
|
174
|
+
for saving artifacts.
|
|
175
|
+
description (str): A description for ML Repo.
|
|
176
|
+
|
|
177
|
+
Examples:
|
|
178
|
+
|
|
179
|
+
### Create Repository
|
|
180
|
+
```python
|
|
181
|
+
from truefoundry.ml import get_client
|
|
182
|
+
|
|
183
|
+
client = get_client()
|
|
184
|
+
|
|
185
|
+
client.create_ml_repo(ml_repo="my-repo")
|
|
186
|
+
```
|
|
187
|
+
"""
|
|
188
|
+
_validate_ml_repo_name(ml_repo_name=name)
|
|
189
|
+
if description:
|
|
190
|
+
_validate_ml_repo_description(description=description)
|
|
191
|
+
try:
|
|
192
|
+
_ml_repo_instance = self._experiments_api.get_experiment_by_name_get(
|
|
193
|
+
experiment_name=name
|
|
194
|
+
)
|
|
195
|
+
existing_ml_repo = _ml_repo_instance.experiment
|
|
196
|
+
except NotFoundException:
|
|
197
|
+
existing_ml_repo = None
|
|
198
|
+
|
|
199
|
+
if not existing_ml_repo:
|
|
200
|
+
try:
|
|
201
|
+
self._experiments_api.create_experiment_post(
|
|
202
|
+
create_experiment_request_dto=CreateExperimentRequestDto(
|
|
203
|
+
name=name,
|
|
204
|
+
description=description,
|
|
205
|
+
storage_integration_fqn=storage_integration_fqn,
|
|
206
|
+
)
|
|
207
|
+
)
|
|
208
|
+
except ApiException as e:
|
|
209
|
+
err_msg = f"Error happened in creating ML Repo with name: {name}. Error details: {e}"
|
|
210
|
+
raise MlFoundryException(err_msg) from e
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
session = get_active_session()
|
|
214
|
+
if session is None:
|
|
215
|
+
raise MlFoundryException(
|
|
216
|
+
"No active session found. Perhaps you are not logged in?\n"
|
|
217
|
+
"Please log in using `tfy login [--host HOST] --relogin"
|
|
218
|
+
)
|
|
219
|
+
servicefoundry_service = ServicefoundryService(
|
|
220
|
+
tracking_uri=self.get_tracking_uri(),
|
|
221
|
+
token=session.token.access_token,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
assert existing_ml_repo.storage_integration_id is not None
|
|
225
|
+
try:
|
|
226
|
+
existing_storage_integration = (
|
|
227
|
+
servicefoundry_service.get_integration_from_id(
|
|
228
|
+
existing_ml_repo.storage_integration_id
|
|
229
|
+
)
|
|
230
|
+
)
|
|
231
|
+
except Exception as e:
|
|
232
|
+
raise MlFoundryException(
|
|
233
|
+
"Error in getting storage integration for ML Repo"
|
|
234
|
+
) from e
|
|
235
|
+
|
|
236
|
+
if existing_storage_integration["fqn"] != storage_integration_fqn:
|
|
237
|
+
raise MlFoundryException(
|
|
238
|
+
f"ML Repo with same name already exists with storage integration:"
|
|
239
|
+
f"{existing_storage_integration['fqn']}. Cannot update the storage integration to: "
|
|
240
|
+
f"{storage_integration_fqn}"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def create_run(
|
|
244
|
+
self,
|
|
245
|
+
ml_repo: str,
|
|
246
|
+
run_name: Optional[str] = None,
|
|
247
|
+
tags: Optional[Dict[str, Any]] = None,
|
|
248
|
+
**kwargs,
|
|
249
|
+
) -> MlFoundryRun:
|
|
250
|
+
"""Initialize a `run`.
|
|
251
|
+
|
|
252
|
+
In a machine learning experiment `run` represents a single experiment
|
|
253
|
+
conducted under a ML Repo.
|
|
254
|
+
Args:
|
|
255
|
+
ml_repo (str): The name of the ML Repo under which the run will be created.
|
|
256
|
+
ml_repo should only contain alphanumerics (a-z,A-Z,0-9) or hyphen (-).
|
|
257
|
+
The user must have `ADMIN` or `WRITE` access to this ML Repo.
|
|
258
|
+
run_name (Optional[str], optional): The name of the run. If not passed, a randomly
|
|
259
|
+
generated name is assigned to the run. Under a ML Repo, all runs should have
|
|
260
|
+
a unique name. If the passed `run_name` is already used under a ML Repo, the
|
|
261
|
+
`run_name` will be de-duplicated by adding a suffix.
|
|
262
|
+
run name should only contain alphanumerics (a-z,A-Z,0-9) or hyphen (-).
|
|
263
|
+
tags (Optional[Dict[str, Any]], optional): Optional tags to attach with
|
|
264
|
+
this run. Tags are key-value pairs.
|
|
265
|
+
kwargs:
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
MlFoundryRun: An instance of `MlFoundryRun` class which represents a `run`.
|
|
269
|
+
|
|
270
|
+
Examples:
|
|
271
|
+
|
|
272
|
+
### Create a run under current user.
|
|
273
|
+
```python
|
|
274
|
+
from truefoundry.ml import get_client
|
|
275
|
+
|
|
276
|
+
client = get_client()
|
|
277
|
+
|
|
278
|
+
tags = {"model_type": "svm"}
|
|
279
|
+
run = client.create_run(
|
|
280
|
+
ml_repo="my-classification-project", run_name="svm-with-rbf-kernel", tags=tags
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
run.end()
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Creating a run using context manager.
|
|
287
|
+
```python
|
|
288
|
+
from truefoundry.ml import get_client
|
|
289
|
+
|
|
290
|
+
client = get_client()
|
|
291
|
+
with client.create_run(
|
|
292
|
+
ml_repo="my-classification-project", run_name="svm-with-rbf-kernel"
|
|
293
|
+
) as run:
|
|
294
|
+
# ...
|
|
295
|
+
# Model training code
|
|
296
|
+
...
|
|
297
|
+
# `run` will be automatically marked as `FINISHED` or `FAILED`.
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
### Create a run in a ML Repo owned by a different user.
|
|
301
|
+
```python
|
|
302
|
+
from truefoundry.ml import get_client
|
|
303
|
+
|
|
304
|
+
client = get_client()
|
|
305
|
+
|
|
306
|
+
tags = {"model_type": "svm"}
|
|
307
|
+
run = client.create_run(
|
|
308
|
+
ml_repo="my-classification-project",
|
|
309
|
+
run_name="svm-with-rbf-kernel",
|
|
310
|
+
tags=tags,
|
|
311
|
+
)
|
|
312
|
+
run.end()
|
|
313
|
+
```
|
|
314
|
+
"""
|
|
315
|
+
if not run_name:
|
|
316
|
+
run_name = coolname.generate_slug(2)
|
|
317
|
+
logger.info(
|
|
318
|
+
f"No run_name given. Using a randomly generated name {run_name}."
|
|
319
|
+
" You can pass your own using the `run_name` argument"
|
|
320
|
+
)
|
|
321
|
+
_validate_run_name(name=run_name)
|
|
322
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
323
|
+
if tags is not None:
|
|
324
|
+
NAMESPACE.validate_namespace_not_used(tags.keys())
|
|
325
|
+
else:
|
|
326
|
+
tags = {}
|
|
327
|
+
|
|
328
|
+
tags.update(_get_internal_env_vars_values())
|
|
329
|
+
_run = self._runs_api.create_run_post(
|
|
330
|
+
CreateRunRequestDto(
|
|
331
|
+
user_id="unknown", # This does not matter, because on server we use the id from token
|
|
332
|
+
start_time=int(
|
|
333
|
+
time.time() * 1000
|
|
334
|
+
), # TODO (chiragjn): computing start time should be on server side!
|
|
335
|
+
experiment_id=ml_repo_id,
|
|
336
|
+
name=run_name,
|
|
337
|
+
tags=[RunTagDto(key=k, value=v) for k, v in tags.items()],
|
|
338
|
+
)
|
|
339
|
+
)
|
|
340
|
+
run = _run.run
|
|
341
|
+
|
|
342
|
+
assert run is not None
|
|
343
|
+
assert run.info.run_id is not None
|
|
344
|
+
assert run.info.fqn is not None
|
|
345
|
+
|
|
346
|
+
mlf_run_id = run.info.run_id
|
|
347
|
+
kwargs.setdefault("auto_end", True)
|
|
348
|
+
mlf_run = MlFoundryRun(experiment_id=ml_repo_id, run_id=mlf_run_id, **kwargs)
|
|
349
|
+
mlf_run._add_git_info()
|
|
350
|
+
mlf_run._add_python_truefoundry_version()
|
|
351
|
+
logger.info(f"Run {run.info.fqn!r} has started.")
|
|
352
|
+
logger.info(f"Link to the dashboard for the run: {mlf_run.dashboard_link}")
|
|
353
|
+
return mlf_run
|
|
354
|
+
|
|
355
|
+
def get_run_by_id(self, run_id: str) -> MlFoundryRun:
|
|
356
|
+
"""Get an existing `run` by the `run_id`.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
run_id (str): run_id or fqn of an existing `run`.
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
MlFoundryRun: An instance of `MlFoundryRun` class which represents a `run`.
|
|
363
|
+
|
|
364
|
+
Examples:
|
|
365
|
+
|
|
366
|
+
### Get run by the run id
|
|
367
|
+
```python
|
|
368
|
+
from truefoundry.ml import get_client
|
|
369
|
+
|
|
370
|
+
client = get_client()
|
|
371
|
+
|
|
372
|
+
run = client.get_run_by_id(run_id='a8f6dafd70aa4baf9437a33c52d7ee90')
|
|
373
|
+
```
|
|
374
|
+
"""
|
|
375
|
+
if run_id == "" or (not isinstance(run_id, str)):
|
|
376
|
+
raise MlFoundryException(
|
|
377
|
+
f"run_id must be string type and not empty. "
|
|
378
|
+
f"Got {type(run_id)} type with value {run_id}"
|
|
379
|
+
)
|
|
380
|
+
if "/" in run_id:
|
|
381
|
+
return self.get_run_by_fqn(run_id)
|
|
382
|
+
_run = self._runs_api.get_run_get(run_id=run_id)
|
|
383
|
+
run = _run.run
|
|
384
|
+
|
|
385
|
+
assert run is not None
|
|
386
|
+
|
|
387
|
+
mlfoundry_run = MlFoundryRun._from_dto(run)
|
|
388
|
+
logger.info(
|
|
389
|
+
f"Link to the dashboard for the run: {mlfoundry_run.dashboard_link}"
|
|
390
|
+
)
|
|
391
|
+
return mlfoundry_run
|
|
392
|
+
|
|
393
|
+
def get_run_by_fqn(self, run_fqn: str) -> MlFoundryRun:
|
|
394
|
+
"""Get an existing `run` by `fqn`.
|
|
395
|
+
|
|
396
|
+
`fqn` stands for Fully Qualified Name. A run `fqn` has the following pattern:
|
|
397
|
+
tenant_name/ml_repo/run_name
|
|
398
|
+
|
|
399
|
+
If a run `svm` under the ML Repo `cat-classifier` in `truefoundry` tenant,
|
|
400
|
+
the `fqn` will be `truefoundry/cat-classifier/svm`.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
run_fqn (str): `fqn` of an existing run.
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
MlFoundryRun: An instance of `MlFoundryRun` class which represents a `run`.
|
|
407
|
+
|
|
408
|
+
Examples:
|
|
409
|
+
|
|
410
|
+
### get run by run fqn
|
|
411
|
+
```python
|
|
412
|
+
from truefoundry.ml import get_client
|
|
413
|
+
|
|
414
|
+
client = get_client()
|
|
415
|
+
|
|
416
|
+
run = client.get_run_by_fqn(run_fqn='truefoundry/my-repo/svm')
|
|
417
|
+
```
|
|
418
|
+
"""
|
|
419
|
+
_run = self._runs_api.get_run_by_fqn_get(run_fqn=run_fqn)
|
|
420
|
+
run = _run.run
|
|
421
|
+
mlfoundry_run = MlFoundryRun._from_dto(run)
|
|
422
|
+
logger.info(
|
|
423
|
+
f"Link to the dashboard for the run: {mlfoundry_run.dashboard_link}"
|
|
424
|
+
)
|
|
425
|
+
return mlfoundry_run
|
|
426
|
+
|
|
427
|
+
def get_run_by_name(
|
|
428
|
+
self,
|
|
429
|
+
ml_repo: str,
|
|
430
|
+
run_name: str,
|
|
431
|
+
) -> MlFoundryRun:
|
|
432
|
+
"""Get an existing `run` by `run_name`.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
ml_repo (str): name of the ml_repo of which the run is part of.
|
|
436
|
+
run_name (str): the name of the run required
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
MlFoundryRun: An instance of `MlFoundryRun` class which represents a `run`.
|
|
440
|
+
|
|
441
|
+
Examples:
|
|
442
|
+
|
|
443
|
+
### get run by name
|
|
444
|
+
```python
|
|
445
|
+
from truefoundry.ml import get_client
|
|
446
|
+
|
|
447
|
+
client = get_client()
|
|
448
|
+
|
|
449
|
+
run = client.get_run_by_name(run_name='svm', ml_repo='my-repo')
|
|
450
|
+
```
|
|
451
|
+
"""
|
|
452
|
+
_run = self._runs_api.get_run_by_name_get(
|
|
453
|
+
experiment_id=None,
|
|
454
|
+
run_name=run_name,
|
|
455
|
+
experiment_name=ml_repo,
|
|
456
|
+
)
|
|
457
|
+
run = _run.run
|
|
458
|
+
mlfoundry_run = MlFoundryRun._from_dto(run)
|
|
459
|
+
logger.info(
|
|
460
|
+
f"Link to the dashboard for the run: {mlfoundry_run.dashboard_link}"
|
|
461
|
+
)
|
|
462
|
+
return mlfoundry_run
|
|
463
|
+
|
|
464
|
+
def get_all_runs(
|
|
465
|
+
self,
|
|
466
|
+
ml_repo: str,
|
|
467
|
+
) -> pd.DataFrame:
|
|
468
|
+
"""Returns all the run name and id present under a ML Repo.
|
|
469
|
+
|
|
470
|
+
The user must have `READ` access to the ML Repo.
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
ml_repo (str): Name of the ML Repo.
|
|
474
|
+
Returns:
|
|
475
|
+
pd.DataFrame: dataframe with two columns- run_id and run_name
|
|
476
|
+
|
|
477
|
+
Examples:
|
|
478
|
+
|
|
479
|
+
### get all the runs from a ml_repo
|
|
480
|
+
```python
|
|
481
|
+
from truefoundry.ml import get_client
|
|
482
|
+
|
|
483
|
+
client = get_client()
|
|
484
|
+
|
|
485
|
+
run = client.get_all_runs(ml_repo='my-repo')
|
|
486
|
+
```
|
|
487
|
+
"""
|
|
488
|
+
runs = []
|
|
489
|
+
for run in self.search_runs(ml_repo=ml_repo):
|
|
490
|
+
runs.append((run.run_id, run.run_name))
|
|
491
|
+
|
|
492
|
+
if len(runs) == 0:
|
|
493
|
+
return pd.DataFrame(
|
|
494
|
+
columns=[constants.RUN_ID_COL_NAME, constants.RUN_NAME_COL_NAME]
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
return pd.DataFrame(
|
|
498
|
+
runs, columns=[constants.RUN_ID_COL_NAME, constants.RUN_NAME_COL_NAME]
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
def search_runs(
|
|
502
|
+
self,
|
|
503
|
+
ml_repo: str,
|
|
504
|
+
filter_string: str = "",
|
|
505
|
+
run_view_type: ViewType = ViewType.ACTIVE_ONLY.value,
|
|
506
|
+
order_by: Sequence[str] = ("attribute.start_time DESC",),
|
|
507
|
+
job_run_name: Optional[str] = None,
|
|
508
|
+
) -> Iterator[MlFoundryRun]:
|
|
509
|
+
"""
|
|
510
|
+
The user must have `READ` access to the ML Repo.
|
|
511
|
+
Returns an iterator that returns a MLFoundryRun on each next call.
|
|
512
|
+
All the runs under a ML Repo which matches the filter string and the run_view_type are returned.
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
ml_repo (str): Name of the ML Repo.
|
|
516
|
+
filter_string (str, optional):
|
|
517
|
+
Filter query string, defaults to searching all runs.
|
|
518
|
+
Identifier required in the LHS of a search expression.
|
|
519
|
+
Signifies an entity to compare against. An identifier has two parts separated by a period: the
|
|
520
|
+
type of the entity and the name of the entity.
|
|
521
|
+
The type of the entity is metrics, params, attributes, or tags.
|
|
522
|
+
The entity name can contain alphanumeric characters and special characters.
|
|
523
|
+
You can search using two run attributes : status and artifact_uri. Both attributes have string values.
|
|
524
|
+
When a metric, parameter, or tag name contains a special character like hyphen, space, period,
|
|
525
|
+
and so on, enclose the entity name in double quotes or backticks,
|
|
526
|
+
params."model-type" or params.`model-type`
|
|
527
|
+
|
|
528
|
+
run_view_type (str, optional): one of the following values "ACTIVE_ONLY", "DELETED_ONLY", or "ALL" runs.
|
|
529
|
+
order_by (List[str], optional):
|
|
530
|
+
List of columns to order by (e.g., "metrics.rmse"). Currently supported values
|
|
531
|
+
are metric.key, parameter.key, tag.key, attribute.key. The ``order_by`` column
|
|
532
|
+
can contain an optional ``DESC`` or ``ASC`` value. The default is ``ASC``.
|
|
533
|
+
The default ordering is to sort by ``start_time DESC``.
|
|
534
|
+
|
|
535
|
+
job_run_name (str): Name of the job which are associated with the runs to get that runs
|
|
536
|
+
|
|
537
|
+
Returns:
|
|
538
|
+
Iterator[MlFoundryRun]: MLFoundryRuns matching the search query.
|
|
539
|
+
|
|
540
|
+
Examples:
|
|
541
|
+
|
|
542
|
+
```python
|
|
543
|
+
from truefoundry.ml import get_client
|
|
544
|
+
|
|
545
|
+
client = get_client()
|
|
546
|
+
with client.create_run(ml_repo="my-project", run_name="run-1") as run1:
|
|
547
|
+
run1.log_metrics(metric_dict={"accuracy": 0.74, "loss": 0.6})
|
|
548
|
+
run1.log_params({"model": "LogisticRegression", "lambda": "0.001"})
|
|
549
|
+
|
|
550
|
+
with client.create_run(ml_repo="my-project", run_name="run-2") as run2:
|
|
551
|
+
run2.log_metrics(metric_dict={"accuracy": 0.8, "loss": 0.4})
|
|
552
|
+
run2.log_params({"model": "SVM"})
|
|
553
|
+
|
|
554
|
+
# Search for the subset of runs with logged accuracy metric greater than 0.75
|
|
555
|
+
filter_string = "metrics.accuracy > 0.75"
|
|
556
|
+
runs = client.search_runs(ml_repo="my-project", filter_string=filter_string)
|
|
557
|
+
|
|
558
|
+
# Search for the subset of runs with logged accuracy metric greater than 0.7
|
|
559
|
+
filter_string = "metrics.accuracy > 0.7"
|
|
560
|
+
runs = client.search_runs(ml_repo="my-project", filter_string=filter_string)
|
|
561
|
+
|
|
562
|
+
# Search for the subset of runs with logged accuracy metric greater than 0.7 and model="LogisticRegression"
|
|
563
|
+
filter_string = "metrics.accuracy > 0.7 and params.model = 'LogisticRegression'"
|
|
564
|
+
runs = client.search_runs(ml_repo="my-project", filter_string=filter_string)
|
|
565
|
+
|
|
566
|
+
# Search for the subset of runs with logged accuracy metric greater than 0.7 and
|
|
567
|
+
# order by accuracy in Descending order
|
|
568
|
+
filter_string = "metrics.accuracy > 0.7"
|
|
569
|
+
order_by = ["metric.accuracy DESC"]
|
|
570
|
+
runs = client.search_runs(
|
|
571
|
+
ml_repo="my-project", filter_string=filter_string, order_by=order_by
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
filter_string = "metrics.accuracy > 0.7"
|
|
575
|
+
runs = client.search_runs(
|
|
576
|
+
ml_repo="transformers", order_by=order_by ,job_run_name='job_run_name', filter_string=filter_string
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
order_by = ["metric.accuracy DESC"]
|
|
580
|
+
runs = client.search_runs(
|
|
581
|
+
ml_repo="my-project", filter_string=filter_string, order_by=order_by, max_results=10
|
|
582
|
+
)
|
|
583
|
+
```
|
|
584
|
+
"""
|
|
585
|
+
_validate_ml_repo_name(ml_repo_name=ml_repo)
|
|
586
|
+
try:
|
|
587
|
+
_ml_repo_obj = self._experiments_api.get_experiment_by_name_get(
|
|
588
|
+
experiment_name=ml_repo
|
|
589
|
+
)
|
|
590
|
+
ml_repo_obj = _ml_repo_obj.experiment
|
|
591
|
+
except ApiException as e:
|
|
592
|
+
raise MlFoundryException(
|
|
593
|
+
f"ML Repo with name {ml_repo} does not exist or your user does not have permission to access it: {e}"
|
|
594
|
+
) from e
|
|
595
|
+
|
|
596
|
+
ml_repo_id = ml_repo_obj.experiment_id
|
|
597
|
+
|
|
598
|
+
page_token = None
|
|
599
|
+
done = False
|
|
600
|
+
if job_run_name:
|
|
601
|
+
if filter_string == "":
|
|
602
|
+
filter_string = f"tags.TFY_INTERNAL_JOB_RUN_NAME = '{job_run_name}'"
|
|
603
|
+
else:
|
|
604
|
+
filter_string += (
|
|
605
|
+
f" and tags.TFY_INTERNAL_JOB_RUN_NAME = '{job_run_name}'"
|
|
606
|
+
)
|
|
607
|
+
while not done:
|
|
608
|
+
runs_page = self._runs_api.search_runs_post(
|
|
609
|
+
SearchRunsRequestDto(
|
|
610
|
+
experiment_ids=[ml_repo_id],
|
|
611
|
+
filter=filter_string,
|
|
612
|
+
run_view_type=run_view_type,
|
|
613
|
+
max_results=_SEARCH_MAX_RESULTS_DEFAULT,
|
|
614
|
+
order_by=order_by,
|
|
615
|
+
page_token=page_token,
|
|
616
|
+
)
|
|
617
|
+
)
|
|
618
|
+
runs = runs_page.runs
|
|
619
|
+
page_token = runs_page.next_page_token
|
|
620
|
+
|
|
621
|
+
for run in runs:
|
|
622
|
+
yield MlFoundryRun._from_dto(run)
|
|
623
|
+
if not runs or page_token is None:
|
|
624
|
+
done = True
|
|
625
|
+
|
|
626
|
+
def get_tracking_uri(self) -> str:
|
|
627
|
+
"""
|
|
628
|
+
Get the current tracking URI.
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
The tracking URI.
|
|
632
|
+
|
|
633
|
+
Examples:
|
|
634
|
+
|
|
635
|
+
```python
|
|
636
|
+
import tempfile
|
|
637
|
+
from truefoundry.ml import get_client
|
|
638
|
+
|
|
639
|
+
client = get_client()
|
|
640
|
+
tracking_uri = client.get_tracking_uri()
|
|
641
|
+
print("Current tracking uri: {}".format(tracking_uri))
|
|
642
|
+
```
|
|
643
|
+
"""
|
|
644
|
+
return self._tracking_uri
|
|
645
|
+
|
|
646
|
+
def get_model_version(
|
|
647
|
+
self,
|
|
648
|
+
ml_repo: str,
|
|
649
|
+
name: str,
|
|
650
|
+
version: Union[str, int] = constants.LATEST_ARTIFACT_OR_MODEL_VERSION,
|
|
651
|
+
) -> Optional[ModelVersion]:
|
|
652
|
+
"""
|
|
653
|
+
Get the model version to download contents or load it in memory
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
ml_repo (str): ML Repo to which model is logged
|
|
657
|
+
name (str): Model Name
|
|
658
|
+
version (str | int): Model Version to fetch (default is the latest version)
|
|
659
|
+
|
|
660
|
+
Returns:
|
|
661
|
+
ModelVersion: The ModelVersion instance of the model.
|
|
662
|
+
|
|
663
|
+
Examples:
|
|
664
|
+
|
|
665
|
+
### Sklearn
|
|
666
|
+
|
|
667
|
+
```python
|
|
668
|
+
# See `truefoundry.ml.mlfoundry_api.MlFoundry.log_model` examples to understand model logging
|
|
669
|
+
import tempfile
|
|
670
|
+
import joblib
|
|
671
|
+
from truefoundry.ml import get_client
|
|
672
|
+
|
|
673
|
+
client = get_client()
|
|
674
|
+
model_version = client.get_model_version(
|
|
675
|
+
ml_repo="my-classification-project",
|
|
676
|
+
name="my-sklearn-model",
|
|
677
|
+
version=1
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
# Download the model to disk
|
|
681
|
+
temp = tempfile.TemporaryDirectory()
|
|
682
|
+
download_info = model_version.download(path=temp.name)
|
|
683
|
+
print(download_info.model_dir, download_info.model_filename)
|
|
684
|
+
|
|
685
|
+
# Deserialize and Load
|
|
686
|
+
model = joblib.load(
|
|
687
|
+
os.path.join(download_info.model_dir, download_info.model_filename)
|
|
688
|
+
)
|
|
689
|
+
```
|
|
690
|
+
|
|
691
|
+
### Huggingface Transformers
|
|
692
|
+
|
|
693
|
+
```python
|
|
694
|
+
# See `truefoundry.ml.mlfoundry_api.MlFoundry.log_model` examples to understand model logging
|
|
695
|
+
import torch
|
|
696
|
+
from transformers import pipeline
|
|
697
|
+
|
|
698
|
+
from truefoundry.ml import get_client
|
|
699
|
+
|
|
700
|
+
client = get_client()
|
|
701
|
+
model_version = client.get_model_version(
|
|
702
|
+
ml_repo="my-llm-project",
|
|
703
|
+
name="my-transformers-model",
|
|
704
|
+
version=1
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
# Download the model to disk
|
|
708
|
+
temp = tempfile.TemporaryDirectory()
|
|
709
|
+
download_info = model_version.download(path=temp.name)
|
|
710
|
+
print(download_info.model_dir)
|
|
711
|
+
|
|
712
|
+
# Deserialize and Load
|
|
713
|
+
pln = pipeline("text-generation", model=download_info.model_dir, torch_dtype=torch.float16)
|
|
714
|
+
```
|
|
715
|
+
"""
|
|
716
|
+
resolved_version = None
|
|
717
|
+
if version != constants.LATEST_ARTIFACT_OR_MODEL_VERSION:
|
|
718
|
+
resolved_version = _resolve_version(version=version)
|
|
719
|
+
|
|
720
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
721
|
+
|
|
722
|
+
_model_version = self._mlfoundry_artifacts_api.get_model_version_by_name_get(
|
|
723
|
+
experiment_id=int(ml_repo_id),
|
|
724
|
+
model_name=name,
|
|
725
|
+
version=resolved_version,
|
|
726
|
+
)
|
|
727
|
+
model_version = _model_version.model_version
|
|
728
|
+
assert model_version.model_id is not None
|
|
729
|
+
_model = self._mlfoundry_artifacts_api.get_model_get(id=model_version.model_id)
|
|
730
|
+
model = _model.model
|
|
731
|
+
|
|
732
|
+
return ModelVersion(
|
|
733
|
+
model_version=model_version,
|
|
734
|
+
model=model,
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
def get_model_version_by_fqn(self, fqn: str) -> ModelVersion:
|
|
738
|
+
"""
|
|
739
|
+
Get the model version to download contents or load it in memory
|
|
740
|
+
|
|
741
|
+
Args:
|
|
742
|
+
fqn (str): Fully qualified name of the model version.
|
|
743
|
+
|
|
744
|
+
Returns:
|
|
745
|
+
ModelVersion: The ModelVersion instance of the model.
|
|
746
|
+
|
|
747
|
+
Examples:
|
|
748
|
+
|
|
749
|
+
### Sklearn
|
|
750
|
+
|
|
751
|
+
```python
|
|
752
|
+
# See `truefoundry.ml.mlfoundry_api.MlFoundry.log_model` examples to understand model logging
|
|
753
|
+
import tempfile
|
|
754
|
+
import joblib
|
|
755
|
+
from truefoundry.ml import get_client
|
|
756
|
+
|
|
757
|
+
client = get_client()
|
|
758
|
+
model_version = client.get_model_version_by_fqn(
|
|
759
|
+
fqn="model:truefoundry/my-classification-project/my-sklearn-model:1"
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
# Download the model to disk
|
|
763
|
+
temp = tempfile.TemporaryDirectory()
|
|
764
|
+
download_info = model_version.download(path=temp.name)
|
|
765
|
+
print(download_info.model_dir, download_info.model_filename)
|
|
766
|
+
|
|
767
|
+
# Deserialize and Load
|
|
768
|
+
model = joblib.load(
|
|
769
|
+
os.path.join(download_info.model_dir, download_info.model_filename)
|
|
770
|
+
)
|
|
771
|
+
```
|
|
772
|
+
|
|
773
|
+
### Huggingface Transformers
|
|
774
|
+
|
|
775
|
+
```python
|
|
776
|
+
# See `truefoundry.ml.mlfoundry_api.MlFoundry.log_model` examples to understand model logging
|
|
777
|
+
import torch
|
|
778
|
+
from transformers import pipeline
|
|
779
|
+
|
|
780
|
+
from truefoundry.ml import get_client
|
|
781
|
+
|
|
782
|
+
client = get_client()
|
|
783
|
+
model_version = client.get_model_version_by_fqn(
|
|
784
|
+
fqn="model:truefoundry/my-llm-project/my-transformers-model:1"
|
|
785
|
+
)
|
|
786
|
+
# Download the model to disk
|
|
787
|
+
temp = tempfile.TemporaryDirectory()
|
|
788
|
+
download_info = model_version.download(path=temp.name)
|
|
789
|
+
print(download_info.model_dir)
|
|
790
|
+
|
|
791
|
+
# Deserialize and Load
|
|
792
|
+
pln = pipeline("text-generation", model=download_info.model_dir, torch_dtype=torch.float16)
|
|
793
|
+
```
|
|
794
|
+
"""
|
|
795
|
+
return ModelVersion.from_fqn(fqn=fqn)
|
|
796
|
+
|
|
797
|
+
def list_model_versions(self, ml_repo: str, name: str) -> Iterator[ModelVersion]:
|
|
798
|
+
"""
|
|
799
|
+
Get all the version of a model to download contents or load them in memory
|
|
800
|
+
|
|
801
|
+
Args:
|
|
802
|
+
ml_repo (str): Repository in which the model is stored.
|
|
803
|
+
name (str): Name of the model whose version is required
|
|
804
|
+
|
|
805
|
+
Returns:
|
|
806
|
+
Iterator[ModelVersion]: An iterator that yields non deleted model versions
|
|
807
|
+
of a model under a given ml_repo sorted reverse by the version number
|
|
808
|
+
|
|
809
|
+
Examples:
|
|
810
|
+
|
|
811
|
+
```python
|
|
812
|
+
from truefoundry.ml import get_client
|
|
813
|
+
|
|
814
|
+
client = get_client()
|
|
815
|
+
model_versions = client.list_model_version(ml_repo="my-repo", name="svm")
|
|
816
|
+
|
|
817
|
+
for model_version in model_versions:
|
|
818
|
+
print(model_version)
|
|
819
|
+
```
|
|
820
|
+
"""
|
|
821
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
822
|
+
try:
|
|
823
|
+
_model = self._mlfoundry_artifacts_api.get_model_by_name_get(
|
|
824
|
+
experiment_id=int(ml_repo_id), name=name
|
|
825
|
+
)
|
|
826
|
+
except NotFoundException as e:
|
|
827
|
+
err_msg = (
|
|
828
|
+
f"Model Does Not Exist for ml_repo={ml_repo}, name={name}. Error: {e}"
|
|
829
|
+
)
|
|
830
|
+
raise MlFoundryException(err_msg) from e
|
|
831
|
+
model = _model.model
|
|
832
|
+
return self._list_model_versions_by_id(model=model)
|
|
833
|
+
|
|
834
|
+
def list_model_versions_by_fqn(self, model_fqn: str) -> Iterator[ModelVersion]:
|
|
835
|
+
"""
|
|
836
|
+
List versions for a given model
|
|
837
|
+
|
|
838
|
+
Args:
|
|
839
|
+
model_fqn: FQN of the Model to list versions for.
|
|
840
|
+
A model_fqn looks like `model:{org}/{user}/{project}/{artifact_name}`
|
|
841
|
+
or `model:{user}/{project}/{artifact_name}`
|
|
842
|
+
|
|
843
|
+
Returns:
|
|
844
|
+
Iterator[ModelVersion]: An iterator that yields non deleted model versions
|
|
845
|
+
under the given model_fqn sorted reverse by the version number
|
|
846
|
+
|
|
847
|
+
Yields:
|
|
848
|
+
ModelVersion: An instance of `truefoundry.ml.ModelVersion`
|
|
849
|
+
|
|
850
|
+
Examples:
|
|
851
|
+
|
|
852
|
+
```python
|
|
853
|
+
from truefoundry.ml import get_client
|
|
854
|
+
|
|
855
|
+
client = get_client()
|
|
856
|
+
model_fqn = "model:org/my-project/my-model"
|
|
857
|
+
for mv in client.list_model_versions(model_fqn=model_fqn):
|
|
858
|
+
print(mv.name, mv.version, mv.description)
|
|
859
|
+
```
|
|
860
|
+
"""
|
|
861
|
+
_model = self._mlfoundry_artifacts_api.get_model_by_fqn_get(fqn=model_fqn)
|
|
862
|
+
model = _model.model
|
|
863
|
+
return self._list_model_versions_by_id(model=model)
|
|
864
|
+
|
|
865
|
+
def _list_model_versions_by_id(
|
|
866
|
+
self,
|
|
867
|
+
model_id: Optional[uuid.UUID] = None,
|
|
868
|
+
model: Optional[ModelDto] = None,
|
|
869
|
+
) -> Iterator[ModelVersion]:
|
|
870
|
+
if model and not model_id:
|
|
871
|
+
assert model.id is not None
|
|
872
|
+
model_id = model.id
|
|
873
|
+
elif not model and model_id:
|
|
874
|
+
_model = self._mlfoundry_artifacts_api.get_model_get(id=str(model_id))
|
|
875
|
+
model = _model.model
|
|
876
|
+
else:
|
|
877
|
+
raise MlFoundryException(
|
|
878
|
+
"Exactly one of model_id or model should be passed"
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
max_results, page_token, done = 10, None, False
|
|
882
|
+
while not done:
|
|
883
|
+
_model_versions = self._mlfoundry_artifacts_api.list_model_versions_post(
|
|
884
|
+
list_model_versions_request_dto=ListModelVersionsRequestDto(
|
|
885
|
+
model_id=str(model_id),
|
|
886
|
+
max_results=max_results,
|
|
887
|
+
page_token=page_token,
|
|
888
|
+
)
|
|
889
|
+
)
|
|
890
|
+
model_versions = _model_versions.model_versions
|
|
891
|
+
page_token = _model_versions.next_page_token
|
|
892
|
+
for model_version in model_versions:
|
|
893
|
+
yield ModelVersion(model_version=model_version, model=model)
|
|
894
|
+
if not model_versions or not page_token:
|
|
895
|
+
done = True
|
|
896
|
+
|
|
897
|
+
def get_artifact_version(
|
|
898
|
+
self,
|
|
899
|
+
ml_repo: str,
|
|
900
|
+
name: str,
|
|
901
|
+
artifact_type: Optional[ArtifactType] = ArtifactType.ARTIFACT,
|
|
902
|
+
version: Union[str, int] = constants.LATEST_ARTIFACT_OR_MODEL_VERSION,
|
|
903
|
+
) -> Optional[ArtifactVersion]:
|
|
904
|
+
"""
|
|
905
|
+
Get the model version to download contents or load it in memory
|
|
906
|
+
|
|
907
|
+
Args:
|
|
908
|
+
ml_repo (str): ML Repo to which artifact is logged
|
|
909
|
+
name (str): Artifact Name
|
|
910
|
+
artifact_type (str): The type of artifact to fetch (acceptable values: "artifact", "model", "plot", "image")
|
|
911
|
+
version (str | int): Artifact Version to fetch (default is the latest version)
|
|
912
|
+
|
|
913
|
+
Returns:
|
|
914
|
+
ArtifactVersion : An ArtifactVersion instance of the artifact
|
|
915
|
+
|
|
916
|
+
Examples:
|
|
917
|
+
|
|
918
|
+
```python
|
|
919
|
+
import tempfile
|
|
920
|
+
from truefoundry.ml import get_client
|
|
921
|
+
|
|
922
|
+
client = get_client()
|
|
923
|
+
artifact_version = client.get_artifact_version(ml_repo="ml-repo-name", name="artifact-name", version=1)
|
|
924
|
+
|
|
925
|
+
# download the artifact to disk
|
|
926
|
+
temp = tempfile.TemporaryDirectory()
|
|
927
|
+
download_path = artifact_version.download(path=temp.name)
|
|
928
|
+
print(download_path)
|
|
929
|
+
```
|
|
930
|
+
"""
|
|
931
|
+
resolved_version = None
|
|
932
|
+
if version != constants.LATEST_ARTIFACT_OR_MODEL_VERSION:
|
|
933
|
+
resolved_version = _resolve_version(version=version)
|
|
934
|
+
|
|
935
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
936
|
+
|
|
937
|
+
_artifact_version = (
|
|
938
|
+
self._mlfoundry_artifacts_api.get_artifact_version_by_name_get(
|
|
939
|
+
experiment_id=int(ml_repo_id),
|
|
940
|
+
artifact_name=name,
|
|
941
|
+
artifact_type=artifact_type,
|
|
942
|
+
version=resolved_version,
|
|
943
|
+
)
|
|
944
|
+
)
|
|
945
|
+
artifact_version = _artifact_version.artifact_version
|
|
946
|
+
assert artifact_version.artifact_id is not None
|
|
947
|
+
_artifact = self._mlfoundry_artifacts_api.get_artifact_by_id_get(
|
|
948
|
+
id=artifact_version.artifact_id
|
|
949
|
+
)
|
|
950
|
+
artifact = _artifact.artifact
|
|
951
|
+
|
|
952
|
+
return ArtifactVersion(
|
|
953
|
+
artifact_version=artifact_version,
|
|
954
|
+
artifact=artifact,
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
def get_artifact_version_by_fqn(self, fqn: str) -> ArtifactVersion:
|
|
958
|
+
"""
|
|
959
|
+
Get the artifact version to download contents
|
|
960
|
+
|
|
961
|
+
Args:
|
|
962
|
+
fqn (str): Fully qualified name of the artifact version.
|
|
963
|
+
|
|
964
|
+
Returns:
|
|
965
|
+
ArtifactVersion : An ArtifactVersion instance of the artifact
|
|
966
|
+
|
|
967
|
+
Examples:
|
|
968
|
+
|
|
969
|
+
```python
|
|
970
|
+
import tempfile
|
|
971
|
+
from truefoundry.ml import get_client
|
|
972
|
+
|
|
973
|
+
client = get_client()
|
|
974
|
+
artifact_version = client.get_artifact_version_by_fqn(
|
|
975
|
+
fqn="artifact:truefoundry/my-classification-project/sklearn-artifact:1"
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
# download the artifact to disk
|
|
979
|
+
temp = tempfile.TemporaryDirectory()
|
|
980
|
+
download_path = artifact_version.download(path=temp.name)
|
|
981
|
+
print(download_path)
|
|
982
|
+
```
|
|
983
|
+
"""
|
|
984
|
+
return ArtifactVersion.from_fqn(fqn=fqn)
|
|
985
|
+
|
|
986
|
+
def list_artifact_versions(
|
|
987
|
+
self,
|
|
988
|
+
ml_repo: str,
|
|
989
|
+
name: str,
|
|
990
|
+
artifact_type: Optional[ArtifactType] = ArtifactType.ARTIFACT,
|
|
991
|
+
) -> Iterator[ArtifactVersion]:
|
|
992
|
+
"""
|
|
993
|
+
Get all the version of na artifact to download contents or load them in memory
|
|
994
|
+
|
|
995
|
+
Args:
|
|
996
|
+
ml_repo (str): Repository in which the model is stored.
|
|
997
|
+
name (str): Name of the artifact whose version is required
|
|
998
|
+
artifact_type (ArtifactType): Type of artifact you want for example model, image, etc.
|
|
999
|
+
|
|
1000
|
+
Returns:
|
|
1001
|
+
Iterator[ArtifactVersion]: An iterator that yields non deleted artifact-versions
|
|
1002
|
+
of an artifact under a given ml_repo sorted reverse by the version number
|
|
1003
|
+
|
|
1004
|
+
Examples:
|
|
1005
|
+
|
|
1006
|
+
```python
|
|
1007
|
+
from truefoundry.ml import get_client
|
|
1008
|
+
|
|
1009
|
+
client = get_client()
|
|
1010
|
+
artifact_versions = client.list_artifact_versions(ml_repo="my-repo", name="artifact-name")
|
|
1011
|
+
|
|
1012
|
+
for artifact_version in artifact_versions:
|
|
1013
|
+
print(artifact_version)
|
|
1014
|
+
```
|
|
1015
|
+
"""
|
|
1016
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
1017
|
+
_artifacts = self._mlfoundry_artifacts_api.list_artifacts_post(
|
|
1018
|
+
list_artifacts_request_dto=ListArtifactsRequestDto(
|
|
1019
|
+
experiment_id=ml_repo_id,
|
|
1020
|
+
name=name,
|
|
1021
|
+
artifact_types=[artifact_type] if artifact_type else None,
|
|
1022
|
+
max_results=1,
|
|
1023
|
+
)
|
|
1024
|
+
)
|
|
1025
|
+
artifacts = _artifacts.artifacts
|
|
1026
|
+
if not artifacts or len(artifacts) == 0:
|
|
1027
|
+
err_msg = f"Artifact Does Not Exist for ml_repo={ml_repo}, name={name}, type={artifact_type}"
|
|
1028
|
+
raise MlFoundryException(err_msg)
|
|
1029
|
+
return self._list_artifact_versions_by_id(artifact=artifacts[0])
|
|
1030
|
+
|
|
1031
|
+
def list_artifact_versions_by_fqn(
|
|
1032
|
+
self, artifact_fqn: str
|
|
1033
|
+
) -> Iterator[ArtifactVersion]:
|
|
1034
|
+
"""
|
|
1035
|
+
List versions for a given artifact
|
|
1036
|
+
|
|
1037
|
+
Args:
|
|
1038
|
+
artifact_fqn: FQN of the Artifact to list versions for.
|
|
1039
|
+
An artifact_fqn looks like `{artifact_type}:{org}/{user}/{project}/{artifact_name}`
|
|
1040
|
+
or `{artifact_type}:{user}/{project}/{artifact_name}`
|
|
1041
|
+
|
|
1042
|
+
where artifact_type can be on of ("model", "image", "plot")
|
|
1043
|
+
|
|
1044
|
+
Returns:
|
|
1045
|
+
Iterator[ArtifactVersion]: An iterator that yields non deleted artifact versions
|
|
1046
|
+
under the given artifact_fqn sorted reverse by the version number
|
|
1047
|
+
|
|
1048
|
+
Yields:
|
|
1049
|
+
ArtifactVersion: An instance of `truefoundry.ml.ArtifactVersion`
|
|
1050
|
+
|
|
1051
|
+
Examples:
|
|
1052
|
+
|
|
1053
|
+
```python
|
|
1054
|
+
from truefoundry.ml import get_client
|
|
1055
|
+
|
|
1056
|
+
client = get_client()
|
|
1057
|
+
artifact_fqn = "artifact:org/my-project/my-artifact"
|
|
1058
|
+
for av in client.list_artifact_versions(artifact_fqn=artifact_fqn):
|
|
1059
|
+
print(av.name, av.version, av.description)
|
|
1060
|
+
```
|
|
1061
|
+
"""
|
|
1062
|
+
|
|
1063
|
+
_artifact = self._mlfoundry_artifacts_api.get_artifact_by_fqn_get(
|
|
1064
|
+
fqn=artifact_fqn
|
|
1065
|
+
)
|
|
1066
|
+
artifact = _artifact.artifact
|
|
1067
|
+
return self._list_artifact_versions_by_id(artifact=artifact)
|
|
1068
|
+
|
|
1069
|
+
def _list_artifact_versions_by_id(
|
|
1070
|
+
self,
|
|
1071
|
+
artifact_id: Optional[uuid.UUID] = None,
|
|
1072
|
+
artifact: Optional[ArtifactDto] = None,
|
|
1073
|
+
) -> Iterator[ArtifactVersion]:
|
|
1074
|
+
if artifact and not artifact_id:
|
|
1075
|
+
assert artifact.id is not None
|
|
1076
|
+
artifact_id = artifact.id
|
|
1077
|
+
elif not artifact and artifact_id:
|
|
1078
|
+
_artifact = self._mlfoundry_artifacts_api.get_artifact_by_id_get(
|
|
1079
|
+
id=str(artifact_id)
|
|
1080
|
+
)
|
|
1081
|
+
artifact = _artifact.artifact
|
|
1082
|
+
else:
|
|
1083
|
+
raise MlFoundryException(
|
|
1084
|
+
"Exactly one of artifact_id or artifact should be passed"
|
|
1085
|
+
)
|
|
1086
|
+
|
|
1087
|
+
max_results, page_token, done = 10, None, False
|
|
1088
|
+
while not done:
|
|
1089
|
+
_artifact_versions = (
|
|
1090
|
+
self._mlfoundry_artifacts_api.list_artifact_versions_post(
|
|
1091
|
+
list_artifact_versions_request_dto=ListArtifactVersionsRequestDto(
|
|
1092
|
+
artifact_id=str(artifact_id),
|
|
1093
|
+
max_results=max_results,
|
|
1094
|
+
page_token=page_token,
|
|
1095
|
+
)
|
|
1096
|
+
)
|
|
1097
|
+
)
|
|
1098
|
+
artifact_versions = _artifact_versions.artifact_versions
|
|
1099
|
+
page_token = _artifact_versions.next_page_token
|
|
1100
|
+
for artifact_version in artifact_versions:
|
|
1101
|
+
yield ArtifactVersion(
|
|
1102
|
+
artifact_version=artifact_version, artifact=artifact
|
|
1103
|
+
)
|
|
1104
|
+
if not artifact_versions or not page_token:
|
|
1105
|
+
done = True
|
|
1106
|
+
|
|
1107
|
+
def log_artifact(
|
|
1108
|
+
self,
|
|
1109
|
+
ml_repo: str,
|
|
1110
|
+
name: str,
|
|
1111
|
+
artifact_paths: List[
|
|
1112
|
+
Union[Tuple[str], Tuple[str, Optional[str]], ArtifactPath]
|
|
1113
|
+
],
|
|
1114
|
+
description: Optional[str] = None,
|
|
1115
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1116
|
+
progress: Optional[bool] = None,
|
|
1117
|
+
) -> Optional[ArtifactVersion]:
|
|
1118
|
+
"""Logs an artifact for the current `ml_repo`.
|
|
1119
|
+
|
|
1120
|
+
An `artifact` is a list of local files and directories.
|
|
1121
|
+
This function packs the mentioned files and directories in `artifact_paths`
|
|
1122
|
+
and uploads them to remote storage linked to the ml_repo
|
|
1123
|
+
|
|
1124
|
+
Args:
|
|
1125
|
+
ml_repo (str): Name of the ML Repo to which an artifact is to be logged.
|
|
1126
|
+
name (str): Name of the Artifact. If an artifact with this name already exists under the current ml_repo,
|
|
1127
|
+
the logged artifact will be added as a new version under that `name`. If no artifact exist with
|
|
1128
|
+
the given `name`, the given artifact will be logged as version 1.
|
|
1129
|
+
artifact_paths (List[truefoundry.ml.ArtifactPath], optional): A list of pairs
|
|
1130
|
+
of (source path, destination path) to add files and folders
|
|
1131
|
+
to the artifact version contents. The first member of the pair should be a file or directory path
|
|
1132
|
+
and the second member should be the path inside the artifact contents to upload to.
|
|
1133
|
+
progress (bool): value to show progress bar, defaults to None.
|
|
1134
|
+
|
|
1135
|
+
```python
|
|
1136
|
+
|
|
1137
|
+
from truefoundry.ml import get_client, ArtifactPath
|
|
1138
|
+
|
|
1139
|
+
client = get_client()
|
|
1140
|
+
client.log_artifact(
|
|
1141
|
+
ml_repo="sample-repo",
|
|
1142
|
+
name="xyz",
|
|
1143
|
+
artifact_paths=[
|
|
1144
|
+
ArtifactPath("foo.txt", "foo/bar/foo.txt"),
|
|
1145
|
+
ArtifactPath("tokenizer/", "foo/tokenizer/"),
|
|
1146
|
+
ArtifactPath('bar.text'),
|
|
1147
|
+
('bar.txt', ),
|
|
1148
|
+
('foo.txt', 'a/foo.txt')
|
|
1149
|
+
]
|
|
1150
|
+
)
|
|
1151
|
+
```
|
|
1152
|
+
|
|
1153
|
+
would result in
|
|
1154
|
+
|
|
1155
|
+
```
|
|
1156
|
+
.
|
|
1157
|
+
└── foo/
|
|
1158
|
+
├── bar/
|
|
1159
|
+
│ └── foo.txt
|
|
1160
|
+
└── tokenizer/
|
|
1161
|
+
└── # contents of tokenizer/ directory will be uploaded here
|
|
1162
|
+
```
|
|
1163
|
+
description (Optional[str], optional): arbitrary text upto 1024 characters to store as description.
|
|
1164
|
+
This field can be updated at any time after logging. Defaults to `None`
|
|
1165
|
+
metadata (Optional[Dict[str, Any]], optional): arbitrary json serializable dictionary to store metadata.
|
|
1166
|
+
For example, you can use this to store metrics, params, notes.
|
|
1167
|
+
This field can be updated at any time after logging. Defaults to `None`
|
|
1168
|
+
|
|
1169
|
+
Returns:
|
|
1170
|
+
truefoundry.ml.ArtifactVersion: an instance of `ArtifactVersion` that can be used to download the files,
|
|
1171
|
+
or update attributes like description, metadata.
|
|
1172
|
+
|
|
1173
|
+
Examples:
|
|
1174
|
+
|
|
1175
|
+
```python
|
|
1176
|
+
import os
|
|
1177
|
+
from truefoundry.ml import get_client, ArtifactPath
|
|
1178
|
+
|
|
1179
|
+
with open("artifact.txt", "w") as f:
|
|
1180
|
+
f.write("hello-world")
|
|
1181
|
+
|
|
1182
|
+
client = get_client()
|
|
1183
|
+
ml_repo = "sample-repo"
|
|
1184
|
+
|
|
1185
|
+
client.create_ml_repo(ml_repo=ml_repo)
|
|
1186
|
+
client.log_artifact(
|
|
1187
|
+
ml_repo=ml_repo,
|
|
1188
|
+
name="hello-world-file",
|
|
1189
|
+
artifact_paths=[ArtifactPath('artifact.txt', 'a/b/')]
|
|
1190
|
+
)
|
|
1191
|
+
```
|
|
1192
|
+
"""
|
|
1193
|
+
if not artifact_paths:
|
|
1194
|
+
raise MlFoundryException(
|
|
1195
|
+
"artifact_paths cannot be empty, atleast one artifact_path must be passed"
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
1199
|
+
artifact_version = _log_artifact_version(
|
|
1200
|
+
run=None,
|
|
1201
|
+
mlfoundry_artifacts_api=self._mlfoundry_artifacts_api,
|
|
1202
|
+
ml_repo_id=ml_repo_id,
|
|
1203
|
+
name=name,
|
|
1204
|
+
artifact_paths=artifact_paths,
|
|
1205
|
+
description=description,
|
|
1206
|
+
metadata=metadata,
|
|
1207
|
+
step=None,
|
|
1208
|
+
progress=progress,
|
|
1209
|
+
)
|
|
1210
|
+
logger.info(f"Logged artifact successfully with fqn {artifact_version.fqn!r}")
|
|
1211
|
+
return artifact_version
|
|
1212
|
+
|
|
1213
|
+
def log_model(
|
|
1214
|
+
self,
|
|
1215
|
+
*,
|
|
1216
|
+
ml_repo: str,
|
|
1217
|
+
name: str,
|
|
1218
|
+
model_file_or_folder: str,
|
|
1219
|
+
framework: Optional[Union[ModelFramework, str]],
|
|
1220
|
+
additional_files: Sequence[Tuple[Union[str, Path], Optional[str]]] = (),
|
|
1221
|
+
description: Optional[str] = None,
|
|
1222
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1223
|
+
model_schema: Optional[Union[ModelSchema, Dict[str, Any]]] = None,
|
|
1224
|
+
custom_metrics: Optional[List[Union[CustomMetric, Dict[str, Any]]]] = None,
|
|
1225
|
+
) -> ModelVersion:
|
|
1226
|
+
"""
|
|
1227
|
+
Serialize and log a versioned model under the current ml_repo. Each logged model generates a new version
|
|
1228
|
+
associated with the given `name` and linked to the current run. Multiple versions of the model can be
|
|
1229
|
+
logged as separate versions under the same `name`.
|
|
1230
|
+
|
|
1231
|
+
Args:
|
|
1232
|
+
ml_repo (str): Name of the ML Repo to which an artifact is to be logged.
|
|
1233
|
+
name (str): Name of the model. If a model with this name already exists under the current ML Repo,
|
|
1234
|
+
the logged model will be added as a new version under that `name`. If no models exist with the given
|
|
1235
|
+
`name`, the given model will be logged as version 1.
|
|
1236
|
+
model_file_or_folder (str): Path to either a single file or a folder containing model files. This folder
|
|
1237
|
+
is usually created using serialization methods of libraries or frameworks e.g. `joblib.dump`,
|
|
1238
|
+
`model.save_pretrained(...)`, `torch.save(...)`, `model.save(...)`
|
|
1239
|
+
framework (Union[enums.ModelFramework, str]): Model Framework. Ex:- pytorch, sklearn, tensorflow etc.
|
|
1240
|
+
The full list of supported frameworks can be found in `truefoundry.ml.enums.ModelFramework`.
|
|
1241
|
+
Can also be `None` when `model` is `None`.
|
|
1242
|
+
additional_files (Sequence[Tuple[Union[str, Path], Optional[str]]], optional): A list of pairs
|
|
1243
|
+
of (source path, destination path) to add additional files and folders
|
|
1244
|
+
to the model version contents. The first member of the pair should be a file or directory path
|
|
1245
|
+
and the second member should be the path inside the model versions contents to upload to.
|
|
1246
|
+
The model version contents are arranged like follows
|
|
1247
|
+
.
|
|
1248
|
+
└── model/
|
|
1249
|
+
└── # model files are serialized here
|
|
1250
|
+
└── # any additional files and folders can be added here.
|
|
1251
|
+
|
|
1252
|
+
You can also add additional files to model/ subdirectory by specifying the destination path as model/
|
|
1253
|
+
|
|
1254
|
+
```python
|
|
1255
|
+
run.log_model(
|
|
1256
|
+
name="xyz",
|
|
1257
|
+
model_file_or_folder="clf.joblib",
|
|
1258
|
+
framework="sklearn",
|
|
1259
|
+
additional_files=[("foo.txt", "foo/bar/foo.txt"), ("tokenizer/", "foo/tokenizer/")]
|
|
1260
|
+
)
|
|
1261
|
+
```
|
|
1262
|
+
|
|
1263
|
+
would result in
|
|
1264
|
+
|
|
1265
|
+
```
|
|
1266
|
+
.
|
|
1267
|
+
├── model/
|
|
1268
|
+
│ └── clf.joblib # if `model_file_or_folder` is a folder, contents will be added here
|
|
1269
|
+
└── foo/
|
|
1270
|
+
├── bar/
|
|
1271
|
+
│ └── foo.txt
|
|
1272
|
+
└── tokenizer/
|
|
1273
|
+
└── # contents of tokenizer/ directory will be uploaded here
|
|
1274
|
+
```
|
|
1275
|
+
description (Optional[str], optional): arbitrary text upto 1024 characters to store as description.
|
|
1276
|
+
This field can be updated at any time after logging. Defaults to `None`
|
|
1277
|
+
metadata (Optional[Dict[str, Any]], optional): arbitrary json serializable dictionary to store metadata.
|
|
1278
|
+
For example, you can use this to store metrics, params, notes.
|
|
1279
|
+
This field can be updated at any time after logging. Defaults to `None`
|
|
1280
|
+
model_schema (Optional[Union[Dict[str, Any], ModelSchema]], optional):
|
|
1281
|
+
instance of `truefoundry.ml.ModelSchema`.
|
|
1282
|
+
This schema needs to be consistent with older versions of the model under the given `name` i.e.
|
|
1283
|
+
a feature's value type and model's prediction type cannot be changed in the schema of new version.
|
|
1284
|
+
Features can be removed or added between versions.
|
|
1285
|
+
```
|
|
1286
|
+
E.g. if there exists a v1 with
|
|
1287
|
+
schema = {"features": {"name": "feat1": "int"}, "prediction": "categorical"}, then
|
|
1288
|
+
|
|
1289
|
+
schema = {"features": {"name": "feat1": "string"}, "prediction": "categorical"} or
|
|
1290
|
+
schema = {"features": {"name": "feat1": "int"}, "prediction": "numerical"}
|
|
1291
|
+
are invalid because they change the types of existing features and prediction
|
|
1292
|
+
|
|
1293
|
+
while
|
|
1294
|
+
schema = {"features": {"name": "feat1": "int", "feat2": "string"}, "prediction": "categorical"} or
|
|
1295
|
+
schema = {"features": {"feat2": "string"}, "prediction": "categorical"}
|
|
1296
|
+
are valid
|
|
1297
|
+
|
|
1298
|
+
This field can be updated at any time after logging. Defaults to None
|
|
1299
|
+
```
|
|
1300
|
+
custom_metrics: (Optional[Union[List[Dict[str, Any]], CustomMetric]], optional): list of instances of
|
|
1301
|
+
`truefoundry.ml.CustomMetric`
|
|
1302
|
+
The custom metrics must be added according to the prediction type of schema.
|
|
1303
|
+
custom_metrics = [{
|
|
1304
|
+
"name": "mean_square_error",
|
|
1305
|
+
"type": "metric",
|
|
1306
|
+
"value_type": "float"
|
|
1307
|
+
}]
|
|
1308
|
+
|
|
1309
|
+
Returns:
|
|
1310
|
+
truefoundry.ml.ModelVersion: an instance of `ModelVersion` that can be used to download the files,
|
|
1311
|
+
load the model, or update attributes like description, metadata, schema.
|
|
1312
|
+
|
|
1313
|
+
Examples:
|
|
1314
|
+
|
|
1315
|
+
### Sklearn
|
|
1316
|
+
|
|
1317
|
+
```python
|
|
1318
|
+
from truefoundry.ml import get_client
|
|
1319
|
+
from truefoundry.ml.enums import ModelFramework
|
|
1320
|
+
|
|
1321
|
+
import joblib
|
|
1322
|
+
import numpy as np
|
|
1323
|
+
from sklearn.pipeline import make_pipeline
|
|
1324
|
+
from sklearn.preprocessing import StandardScaler
|
|
1325
|
+
from sklearn.svm import SVC
|
|
1326
|
+
|
|
1327
|
+
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
|
|
1328
|
+
y = np.array([1, 1, 2, 2])
|
|
1329
|
+
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
|
|
1330
|
+
clf.fit(X, y)
|
|
1331
|
+
joblib.dump(clf, "sklearn-pipeline.joblib")
|
|
1332
|
+
|
|
1333
|
+
client = get_client()
|
|
1334
|
+
client.create_ml_repo( # This is only required once
|
|
1335
|
+
ml_repo="my-classification-project",
|
|
1336
|
+
# This controls which bucket is used.
|
|
1337
|
+
# You can get this from Integrations > Blob Storage. `None` picks the default
|
|
1338
|
+
storage_integration_fqn=None
|
|
1339
|
+
)
|
|
1340
|
+
model_version = client.log_model(
|
|
1341
|
+
ml_repo="my-classification-project",
|
|
1342
|
+
name="my-sklearn-model",
|
|
1343
|
+
model_file_or_folder="sklearn-pipeline.joblib",
|
|
1344
|
+
framework=ModelFramework.SKLEARN,
|
|
1345
|
+
metadata={"accuracy": 0.99, "f1": 0.80},
|
|
1346
|
+
step=1, # step number, useful when using iterative algorithms like SGD
|
|
1347
|
+
)
|
|
1348
|
+
print(model_version.fqn)
|
|
1349
|
+
```
|
|
1350
|
+
|
|
1351
|
+
### Huggingface Transformers
|
|
1352
|
+
|
|
1353
|
+
```python
|
|
1354
|
+
from truefoundry.ml import get_client
|
|
1355
|
+
from truefoundry.ml.enums import ModelFramework
|
|
1356
|
+
|
|
1357
|
+
import torch
|
|
1358
|
+
from transformers import AutoTokenizer, AutoConfig, pipeline, AutoModelForCausalLM
|
|
1359
|
+
pln = pipeline(
|
|
1360
|
+
"text-generation",
|
|
1361
|
+
model_file_or_folder="EleutherAI/pythia-70m",
|
|
1362
|
+
tokenizer="EleutherAI/pythia-70m",
|
|
1363
|
+
torch_dtype=torch.float16
|
|
1364
|
+
)
|
|
1365
|
+
pln.model.save_pretrained("my-transformers-model")
|
|
1366
|
+
pln.tokenizer.save_pretrained("my-transformers-model")
|
|
1367
|
+
|
|
1368
|
+
client = get_client()
|
|
1369
|
+
client.create_ml_repo( # This is only required once
|
|
1370
|
+
ml_repo="my-llm-project",
|
|
1371
|
+
# This controls which bucket is used.
|
|
1372
|
+
# You can get this from Integrations > Blob Storage. `None` picks the default
|
|
1373
|
+
storage_integration_fqn=None
|
|
1374
|
+
)
|
|
1375
|
+
model_version = client.log_model(
|
|
1376
|
+
ml_repo="my-llm-project",
|
|
1377
|
+
name="my-transformers-model",
|
|
1378
|
+
model_file_or_folder="my-transformers-model/",
|
|
1379
|
+
framework=ModelFramework.TRANSFORMERS
|
|
1380
|
+
)
|
|
1381
|
+
print(model_version.fqn)
|
|
1382
|
+
```
|
|
1383
|
+
|
|
1384
|
+
"""
|
|
1385
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
1386
|
+
|
|
1387
|
+
model_version = _log_model_version(
|
|
1388
|
+
run=None,
|
|
1389
|
+
mlfoundry_artifacts_api=self._mlfoundry_artifacts_api,
|
|
1390
|
+
ml_repo_id=ml_repo_id,
|
|
1391
|
+
name=name,
|
|
1392
|
+
model_file_or_folder=model_file_or_folder,
|
|
1393
|
+
framework=framework,
|
|
1394
|
+
additional_files=additional_files,
|
|
1395
|
+
description=description,
|
|
1396
|
+
metadata=metadata,
|
|
1397
|
+
model_schema=model_schema,
|
|
1398
|
+
custom_metrics=custom_metrics,
|
|
1399
|
+
step=None,
|
|
1400
|
+
)
|
|
1401
|
+
logger.info(f"Logged model successfully with fqn {model_version.fqn!r}")
|
|
1402
|
+
return model_version
|
|
1403
|
+
|
|
1404
|
+
# Datasets API
|
|
1405
|
+
def create_data_directory(
|
|
1406
|
+
self,
|
|
1407
|
+
ml_repo: str,
|
|
1408
|
+
name: str,
|
|
1409
|
+
description: Optional[str] = None,
|
|
1410
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1411
|
+
) -> DataDirectory:
|
|
1412
|
+
"""
|
|
1413
|
+
Create DataDirectory to Upload the files
|
|
1414
|
+
|
|
1415
|
+
Args:
|
|
1416
|
+
ml_repo (str): Name of the ML Repo in which you want to create data_directory
|
|
1417
|
+
name (str): Name of the DataDirectory to be created.
|
|
1418
|
+
description (str): Description of the Datset
|
|
1419
|
+
metadata (Dict <str>: Any): Metadata about the data_directory in Dictionary form.
|
|
1420
|
+
|
|
1421
|
+
Returns:
|
|
1422
|
+
DataDirectory : An instance of class DataDirectory
|
|
1423
|
+
|
|
1424
|
+
Examples:
|
|
1425
|
+
|
|
1426
|
+
```python
|
|
1427
|
+
from truefoundry.ml import get_client
|
|
1428
|
+
|
|
1429
|
+
client = get_client()
|
|
1430
|
+
data_directory = client.create_data_directory(name="<data_directory-name>", ml_repo="<repo-name>")
|
|
1431
|
+
print(data_directory.fqn)
|
|
1432
|
+
```
|
|
1433
|
+
"""
|
|
1434
|
+
if name == "" or not isinstance(name, str):
|
|
1435
|
+
raise MlFoundryException(
|
|
1436
|
+
f"DataDirectory name must be string type and not empty. "
|
|
1437
|
+
f"Got {type(name)} type with value {name}"
|
|
1438
|
+
)
|
|
1439
|
+
|
|
1440
|
+
if ml_repo == "" or not isinstance(ml_repo, str):
|
|
1441
|
+
raise MlFoundryException(
|
|
1442
|
+
f"ML repo must be string type and not empty. "
|
|
1443
|
+
f"Got {type(ml_repo)} type with value {ml_repo}"
|
|
1444
|
+
)
|
|
1445
|
+
|
|
1446
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
1447
|
+
# TODO: Add get_data_directory_by_name on server
|
|
1448
|
+
_datasets = self._mlfoundry_artifacts_api.list_datasets_post(
|
|
1449
|
+
list_datasets_request_dto=ListDatasetsRequestDto(
|
|
1450
|
+
experiment_id=ml_repo_id, name=name, max_results=1
|
|
1451
|
+
)
|
|
1452
|
+
)
|
|
1453
|
+
datasets = _datasets.datasets
|
|
1454
|
+
if datasets is not None and len(datasets) > 0:
|
|
1455
|
+
logger.warning(
|
|
1456
|
+
f"Data Directory with the name {name} already exists in ML Repo {ml_repo}, "
|
|
1457
|
+
f"returning the original instance of DataDirectory instead"
|
|
1458
|
+
)
|
|
1459
|
+
return DataDirectory(dataset=datasets[0])
|
|
1460
|
+
|
|
1461
|
+
_dataset = self._mlfoundry_artifacts_api.create_dataset_post(
|
|
1462
|
+
create_dataset_request_dto=CreateDatasetRequestDto(
|
|
1463
|
+
name=name,
|
|
1464
|
+
experiment_id=ml_repo_id,
|
|
1465
|
+
description=description,
|
|
1466
|
+
dataset_metadata=metadata,
|
|
1467
|
+
)
|
|
1468
|
+
)
|
|
1469
|
+
dataset = _dataset.dataset
|
|
1470
|
+
return DataDirectory(dataset=dataset)
|
|
1471
|
+
|
|
1472
|
+
def get_data_directory_by_fqn(
|
|
1473
|
+
self,
|
|
1474
|
+
fqn: str,
|
|
1475
|
+
) -> DataDirectory:
|
|
1476
|
+
"""
|
|
1477
|
+
Get the DataDirectory by DataDirectory FQN
|
|
1478
|
+
|
|
1479
|
+
Args:
|
|
1480
|
+
fqn (str): Fully qualified name of the artifact version.
|
|
1481
|
+
|
|
1482
|
+
Returns:
|
|
1483
|
+
DataDirectory : An instance of class DataDirectory
|
|
1484
|
+
|
|
1485
|
+
Examples:
|
|
1486
|
+
|
|
1487
|
+
```python
|
|
1488
|
+
from truefoundry.ml import get_client, DataDirectoryPath
|
|
1489
|
+
|
|
1490
|
+
client = get_client()
|
|
1491
|
+
data_directory = client.get_data_directory_by_fqn(fqn="<data-dir-fqn>")
|
|
1492
|
+
with open("artifact.txt", "w") as f:
|
|
1493
|
+
f.write("hello-world")
|
|
1494
|
+
|
|
1495
|
+
data_directory.add_files(
|
|
1496
|
+
artifact_paths=[DataDirectoryPath('artifact.txt', 'a/b/')]
|
|
1497
|
+
)
|
|
1498
|
+
# print the path of files and folder in the data_directory
|
|
1499
|
+
for file in data_directory.list_files():
|
|
1500
|
+
print(file.path)
|
|
1501
|
+
```
|
|
1502
|
+
"""
|
|
1503
|
+
|
|
1504
|
+
_dataset = self._mlfoundry_artifacts_api.get_dataset_by_fqn_get(fqn=fqn)
|
|
1505
|
+
dataset = _dataset.dataset
|
|
1506
|
+
return DataDirectory(dataset)
|
|
1507
|
+
|
|
1508
|
+
def get_data_directory(
|
|
1509
|
+
self,
|
|
1510
|
+
ml_repo: str,
|
|
1511
|
+
name: str,
|
|
1512
|
+
) -> DataDirectory:
|
|
1513
|
+
"""Get an existing `data_directory` by `name`.
|
|
1514
|
+
Args:
|
|
1515
|
+
ml_repo (str): name of the ML Repo the data-directory is part of.
|
|
1516
|
+
name (str): the name of the data-directory
|
|
1517
|
+
Returns:
|
|
1518
|
+
DataDirectory: An instance of class DataDirectory
|
|
1519
|
+
Examples:
|
|
1520
|
+
```python
|
|
1521
|
+
from truefoundry.ml import get_client
|
|
1522
|
+
client = get_client()
|
|
1523
|
+
data_directory = client.get_data_directory(ml_repo='my-repo', name="<data-directory-name>")
|
|
1524
|
+
with open("artifact.txt", "w") as f:
|
|
1525
|
+
f.write("hello-world")
|
|
1526
|
+
data_directory.add_files(
|
|
1527
|
+
artifact_paths=[DataDirectoryPath('artifact.txt', 'a/b/')]
|
|
1528
|
+
)
|
|
1529
|
+
# print the path of files and folder in the data_directory
|
|
1530
|
+
for file in data_directory.list_files():
|
|
1531
|
+
print(file.path)
|
|
1532
|
+
```
|
|
1533
|
+
"""
|
|
1534
|
+
if ml_repo == "" or not isinstance(ml_repo, str):
|
|
1535
|
+
raise MlFoundryException(
|
|
1536
|
+
f"ML repo must be string type and not empty. "
|
|
1537
|
+
f"Got {type(ml_repo)} type with value {ml_repo}"
|
|
1538
|
+
)
|
|
1539
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
1540
|
+
_datasets = self._mlfoundry_artifacts_api.list_datasets_post(
|
|
1541
|
+
list_datasets_request_dto=ListDatasetsRequestDto(
|
|
1542
|
+
experiment_id=ml_repo_id,
|
|
1543
|
+
name=name,
|
|
1544
|
+
max_results=1,
|
|
1545
|
+
),
|
|
1546
|
+
)
|
|
1547
|
+
datasets = _datasets.datasets
|
|
1548
|
+
if not datasets or len(datasets) == 0:
|
|
1549
|
+
raise MlFoundryException(
|
|
1550
|
+
f"No data directory found with name {name} under ML Repo {ml_repo}"
|
|
1551
|
+
)
|
|
1552
|
+
|
|
1553
|
+
return DataDirectory(dataset=datasets[0])
|
|
1554
|
+
|
|
1555
|
+
def list_data_directories(
|
|
1556
|
+
self,
|
|
1557
|
+
ml_repo: str,
|
|
1558
|
+
) -> Iterator[DataDirectory]:
|
|
1559
|
+
"""
|
|
1560
|
+
Get the list of DataDirectory in a ml_repo
|
|
1561
|
+
|
|
1562
|
+
Args:
|
|
1563
|
+
ml_repo (str): Name of the ML Repository
|
|
1564
|
+
|
|
1565
|
+
Returns:
|
|
1566
|
+
DataDirectory : An instance of class DataDirectory
|
|
1567
|
+
|
|
1568
|
+
Examples:
|
|
1569
|
+
|
|
1570
|
+
```python
|
|
1571
|
+
from truefoundry.ml import get_client
|
|
1572
|
+
|
|
1573
|
+
client = get_client()
|
|
1574
|
+
data_directories = client.list_data_directories(ml_repo="<ml-repo-nam>")
|
|
1575
|
+
|
|
1576
|
+
for data_directory in data_directories:
|
|
1577
|
+
print(data_directory.name)
|
|
1578
|
+
```
|
|
1579
|
+
"""
|
|
1580
|
+
if ml_repo == "" or not isinstance(ml_repo, str):
|
|
1581
|
+
raise MlFoundryException(
|
|
1582
|
+
f"ML repo must be string type and not empty. "
|
|
1583
|
+
f"Got {type(ml_repo)} type with value {ml_repo}"
|
|
1584
|
+
)
|
|
1585
|
+
ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
|
|
1586
|
+
max_results, page_token, done = 10, None, False
|
|
1587
|
+
while not done:
|
|
1588
|
+
_datasets = self._mlfoundry_artifacts_api.list_datasets_post(
|
|
1589
|
+
list_datasets_request_dto=ListDatasetsRequestDto(
|
|
1590
|
+
experiment_id=ml_repo_id,
|
|
1591
|
+
max_results=max_results,
|
|
1592
|
+
page_token=page_token,
|
|
1593
|
+
)
|
|
1594
|
+
)
|
|
1595
|
+
datasets: List[DatasetDto] = _datasets.datasets or []
|
|
1596
|
+
page_token = _datasets.next_page_token
|
|
1597
|
+
for dataset in datasets:
|
|
1598
|
+
yield DataDirectory(dataset=dataset)
|
|
1599
|
+
if not datasets or not page_token:
|
|
1600
|
+
done = True
|
|
1601
|
+
|
|
1602
|
+
|
|
1603
|
+
def get_client() -> MlFoundry:
|
|
1604
|
+
"""Initializes and returns the mlfoundry client.
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
Returns:
|
|
1608
|
+
MlFoundry: Instance of `MlFoundry` class which represents a `run`.
|
|
1609
|
+
|
|
1610
|
+
Examples:
|
|
1611
|
+
|
|
1612
|
+
### Get client
|
|
1613
|
+
```python
|
|
1614
|
+
from truefoundry.ml import get_client
|
|
1615
|
+
|
|
1616
|
+
client = get_client()
|
|
1617
|
+
```
|
|
1618
|
+
"""
|
|
1619
|
+
session = init_session()
|
|
1620
|
+
return MlFoundry(session=session)
|