truefoundry 0.3.3__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truefoundry might be problematic. Click here for more details.

Files changed (224) hide show
  1. truefoundry/cli/__main__.py +3 -17
  2. truefoundry/common/__init__.py +0 -0
  3. truefoundry/common/request_utils.py +56 -0
  4. truefoundry/deploy/cli/cli.py +1 -1
  5. truefoundry/deploy/lib/auth/credential_provider.py +2 -12
  6. truefoundry/deploy/lib/clients/servicefoundry_client.py +0 -9
  7. truefoundry/deploy/lib/exceptions.py +1 -6
  8. truefoundry/deploy/lib/session.py +1 -16
  9. truefoundry/langchain/truefoundry_chat.py +1 -1
  10. truefoundry/langchain/truefoundry_embeddings.py +1 -1
  11. truefoundry/langchain/truefoundry_llm.py +1 -1
  12. truefoundry/langchain/utils.py +0 -41
  13. truefoundry/ml/__init__.py +46 -6
  14. truefoundry/ml/artifact/__init__.py +0 -0
  15. truefoundry/ml/artifact/truefoundry_artifact_repo.py +1120 -0
  16. truefoundry/ml/autogen/__init__.py +0 -0
  17. truefoundry/ml/autogen/client/__init__.py +373 -0
  18. truefoundry/ml/autogen/client/api/__init__.py +16 -0
  19. truefoundry/ml/autogen/client/api/auth_api.py +184 -0
  20. truefoundry/ml/autogen/client/api/deprecated_api.py +605 -0
  21. truefoundry/ml/autogen/client/api/experiments_api.py +2109 -0
  22. truefoundry/ml/autogen/client/api/health_api.py +299 -0
  23. truefoundry/ml/autogen/client/api/metrics_api.py +371 -0
  24. truefoundry/ml/autogen/client/api/mlfoundry_artifacts_api.py +7213 -0
  25. truefoundry/ml/autogen/client/api/python_deployment_config_api.py +201 -0
  26. truefoundry/ml/autogen/client/api/run_artifacts_api.py +231 -0
  27. truefoundry/ml/autogen/client/api/runs_api.py +2919 -0
  28. truefoundry/ml/autogen/client/api_client.py +822 -0
  29. truefoundry/ml/autogen/client/api_response.py +30 -0
  30. truefoundry/ml/autogen/client/configuration.py +489 -0
  31. truefoundry/ml/autogen/client/exceptions.py +161 -0
  32. truefoundry/ml/autogen/client/models/__init__.py +344 -0
  33. truefoundry/ml/autogen/client/models/add_custom_metrics_to_model_version_request_dto.py +69 -0
  34. truefoundry/ml/autogen/client/models/add_features_to_model_version_request_dto.py +83 -0
  35. truefoundry/ml/autogen/client/models/agent.py +125 -0
  36. truefoundry/ml/autogen/client/models/agent_app.py +118 -0
  37. truefoundry/ml/autogen/client/models/agent_open_api_tool.py +143 -0
  38. truefoundry/ml/autogen/client/models/agent_open_api_tool_with_fqn.py +144 -0
  39. truefoundry/ml/autogen/client/models/agent_with_fqn.py +127 -0
  40. truefoundry/ml/autogen/client/models/artifact_dto.py +115 -0
  41. truefoundry/ml/autogen/client/models/artifact_response_dto.py +75 -0
  42. truefoundry/ml/autogen/client/models/artifact_type.py +39 -0
  43. truefoundry/ml/autogen/client/models/artifact_version_dto.py +141 -0
  44. truefoundry/ml/autogen/client/models/artifact_version_response_dto.py +77 -0
  45. truefoundry/ml/autogen/client/models/artifact_version_status.py +35 -0
  46. truefoundry/ml/autogen/client/models/assistant_message.py +89 -0
  47. truefoundry/ml/autogen/client/models/authorize_user_for_model_request_dto.py +69 -0
  48. truefoundry/ml/autogen/client/models/authorize_user_for_model_version_request_dto.py +69 -0
  49. truefoundry/ml/autogen/client/models/backfill_default_storage_integration_id_request_dto.py +67 -0
  50. truefoundry/ml/autogen/client/models/blob_storage_reference.py +93 -0
  51. truefoundry/ml/autogen/client/models/body_get_search_runs_get.py +72 -0
  52. truefoundry/ml/autogen/client/models/chat_prompt.py +156 -0
  53. truefoundry/ml/autogen/client/models/chat_prompt_messages_inner.py +171 -0
  54. truefoundry/ml/autogen/client/models/columns_dto.py +73 -0
  55. truefoundry/ml/autogen/client/models/content.py +153 -0
  56. truefoundry/ml/autogen/client/models/content1.py +153 -0
  57. truefoundry/ml/autogen/client/models/content2.py +174 -0
  58. truefoundry/ml/autogen/client/models/content2_any_of_inner.py +150 -0
  59. truefoundry/ml/autogen/client/models/create_artifact_request_dto.py +74 -0
  60. truefoundry/ml/autogen/client/models/create_artifact_response_dto.py +66 -0
  61. truefoundry/ml/autogen/client/models/create_artifact_version_request_dto.py +74 -0
  62. truefoundry/ml/autogen/client/models/create_artifact_version_response_dto.py +66 -0
  63. truefoundry/ml/autogen/client/models/create_dataset_request_dto.py +76 -0
  64. truefoundry/ml/autogen/client/models/create_experiment_request_dto.py +94 -0
  65. truefoundry/ml/autogen/client/models/create_experiment_response_dto.py +67 -0
  66. truefoundry/ml/autogen/client/models/create_model_version_request_dto.py +95 -0
  67. truefoundry/ml/autogen/client/models/create_multi_part_upload_for_dataset_request_dto.py +73 -0
  68. truefoundry/ml/autogen/client/models/create_multi_part_upload_for_dataset_response_dto.py +79 -0
  69. truefoundry/ml/autogen/client/models/create_multi_part_upload_request_dto.py +73 -0
  70. truefoundry/ml/autogen/client/models/create_python_deployment_config_request_dto.py +72 -0
  71. truefoundry/ml/autogen/client/models/create_python_deployment_config_response_dto.py +68 -0
  72. truefoundry/ml/autogen/client/models/create_run_request_dto.py +97 -0
  73. truefoundry/ml/autogen/client/models/create_run_response_dto.py +76 -0
  74. truefoundry/ml/autogen/client/models/dataset_dto.py +112 -0
  75. truefoundry/ml/autogen/client/models/dataset_response_dto.py +75 -0
  76. truefoundry/ml/autogen/client/models/delete_artifact_versions_request_dto.py +65 -0
  77. truefoundry/ml/autogen/client/models/delete_dataset_request_dto.py +74 -0
  78. truefoundry/ml/autogen/client/models/delete_model_version_request_dto.py +65 -0
  79. truefoundry/ml/autogen/client/models/delete_run_request.py +65 -0
  80. truefoundry/ml/autogen/client/models/delete_tag_request_dto.py +68 -0
  81. truefoundry/ml/autogen/client/models/experiment_dto.py +127 -0
  82. truefoundry/ml/autogen/client/models/experiment_id_request_dto.py +67 -0
  83. truefoundry/ml/autogen/client/models/experiment_response_dto.py +75 -0
  84. truefoundry/ml/autogen/client/models/experiment_tag_dto.py +69 -0
  85. truefoundry/ml/autogen/client/models/feature_dto.py +68 -0
  86. truefoundry/ml/autogen/client/models/feature_value_type.py +35 -0
  87. truefoundry/ml/autogen/client/models/file_info_dto.py +76 -0
  88. truefoundry/ml/autogen/client/models/finalize_artifact_version_request_dto.py +101 -0
  89. truefoundry/ml/autogen/client/models/get_experiment_response_dto.py +88 -0
  90. truefoundry/ml/autogen/client/models/get_latest_run_log_response_dto.py +76 -0
  91. truefoundry/ml/autogen/client/models/get_metric_history_response.py +79 -0
  92. truefoundry/ml/autogen/client/models/get_signed_url_for_dataset_write_request_dto.py +68 -0
  93. truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_read_request_dto.py +68 -0
  94. truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_read_response_dto.py +81 -0
  95. truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_write_request_dto.py +69 -0
  96. truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_write_response_dto.py +83 -0
  97. truefoundry/ml/autogen/client/models/get_signed_urls_for_dataset_read_request_dto.py +68 -0
  98. truefoundry/ml/autogen/client/models/get_signed_urls_for_dataset_read_response_dto.py +81 -0
  99. truefoundry/ml/autogen/client/models/get_signed_urls_for_dataset_write_response_dto.py +81 -0
  100. truefoundry/ml/autogen/client/models/get_tenant_id_response_dto.py +74 -0
  101. truefoundry/ml/autogen/client/models/http_validation_error.py +82 -0
  102. truefoundry/ml/autogen/client/models/image_content_part.py +87 -0
  103. truefoundry/ml/autogen/client/models/image_url.py +75 -0
  104. truefoundry/ml/autogen/client/models/internal_metadata.py +180 -0
  105. truefoundry/ml/autogen/client/models/latest_run_log_dto.py +78 -0
  106. truefoundry/ml/autogen/client/models/list_artifact_versions_request_dto.py +107 -0
  107. truefoundry/ml/autogen/client/models/list_artifact_versions_response_dto.py +87 -0
  108. truefoundry/ml/autogen/client/models/list_artifacts_request_dto.py +96 -0
  109. truefoundry/ml/autogen/client/models/list_artifacts_response_dto.py +86 -0
  110. truefoundry/ml/autogen/client/models/list_colums_response_dto.py +75 -0
  111. truefoundry/ml/autogen/client/models/list_datasets_request_dto.py +78 -0
  112. truefoundry/ml/autogen/client/models/list_datasets_response_dto.py +86 -0
  113. truefoundry/ml/autogen/client/models/list_experiments_response_dto.py +86 -0
  114. truefoundry/ml/autogen/client/models/list_files_for_artifact_version_request_dto.py +76 -0
  115. truefoundry/ml/autogen/client/models/list_files_for_artifact_versions_response_dto.py +82 -0
  116. truefoundry/ml/autogen/client/models/list_files_for_dataset_request_dto.py +76 -0
  117. truefoundry/ml/autogen/client/models/list_files_for_dataset_response_dto.py +82 -0
  118. truefoundry/ml/autogen/client/models/list_latest_run_logs_response_dto.py +82 -0
  119. truefoundry/ml/autogen/client/models/list_metric_history_request_dto.py +69 -0
  120. truefoundry/ml/autogen/client/models/list_metric_history_response_dto.py +84 -0
  121. truefoundry/ml/autogen/client/models/list_model_version_response_dto.py +87 -0
  122. truefoundry/ml/autogen/client/models/list_model_versions_request_dto.py +93 -0
  123. truefoundry/ml/autogen/client/models/list_models_request_dto.py +89 -0
  124. truefoundry/ml/autogen/client/models/list_models_response_dto.py +84 -0
  125. truefoundry/ml/autogen/client/models/list_run_artifacts_response_dto.py +84 -0
  126. truefoundry/ml/autogen/client/models/list_run_logs_response_dto.py +82 -0
  127. truefoundry/ml/autogen/client/models/list_seed_experiments_response_dto.py +81 -0
  128. truefoundry/ml/autogen/client/models/log_batch_request_dto.py +106 -0
  129. truefoundry/ml/autogen/client/models/log_metric_request_dto.py +80 -0
  130. truefoundry/ml/autogen/client/models/log_param_request_dto.py +76 -0
  131. truefoundry/ml/autogen/client/models/method.py +37 -0
  132. truefoundry/ml/autogen/client/models/metric_collection_dto.py +82 -0
  133. truefoundry/ml/autogen/client/models/metric_dto.py +76 -0
  134. truefoundry/ml/autogen/client/models/mime_type.py +37 -0
  135. truefoundry/ml/autogen/client/models/model_configuration.py +103 -0
  136. truefoundry/ml/autogen/client/models/model_dto.py +122 -0
  137. truefoundry/ml/autogen/client/models/model_response_dto.py +75 -0
  138. truefoundry/ml/autogen/client/models/model_schema_dto.py +85 -0
  139. truefoundry/ml/autogen/client/models/model_version_dto.py +163 -0
  140. truefoundry/ml/autogen/client/models/model_version_response_dto.py +75 -0
  141. truefoundry/ml/autogen/client/models/multi_part_upload_dto.py +107 -0
  142. truefoundry/ml/autogen/client/models/multi_part_upload_response_dto.py +79 -0
  143. truefoundry/ml/autogen/client/models/multi_part_upload_storage_provider.py +34 -0
  144. truefoundry/ml/autogen/client/models/notify_artifact_version_failure_dto.py +65 -0
  145. truefoundry/ml/autogen/client/models/openapi_spec.py +152 -0
  146. truefoundry/ml/autogen/client/models/param_dto.py +66 -0
  147. truefoundry/ml/autogen/client/models/parameters.py +84 -0
  148. truefoundry/ml/autogen/client/models/prediction_type.py +34 -0
  149. truefoundry/ml/autogen/client/models/resolve_agent_app_response_dto.py +75 -0
  150. truefoundry/ml/autogen/client/models/restore_run_request_dto.py +65 -0
  151. truefoundry/ml/autogen/client/models/run_data_dto.py +104 -0
  152. truefoundry/ml/autogen/client/models/run_dto.py +84 -0
  153. truefoundry/ml/autogen/client/models/run_info_dto.py +105 -0
  154. truefoundry/ml/autogen/client/models/run_log_dto.py +90 -0
  155. truefoundry/ml/autogen/client/models/run_log_input_dto.py +80 -0
  156. truefoundry/ml/autogen/client/models/run_response_dto.py +75 -0
  157. truefoundry/ml/autogen/client/models/run_tag_dto.py +66 -0
  158. truefoundry/ml/autogen/client/models/search_runs_request_dto.py +94 -0
  159. truefoundry/ml/autogen/client/models/search_runs_response_dto.py +84 -0
  160. truefoundry/ml/autogen/client/models/set_experiment_tag_request_dto.py +73 -0
  161. truefoundry/ml/autogen/client/models/set_tag_request_dto.py +76 -0
  162. truefoundry/ml/autogen/client/models/signed_url_dto.py +69 -0
  163. truefoundry/ml/autogen/client/models/stop.py +152 -0
  164. truefoundry/ml/autogen/client/models/store_run_logs_request_dto.py +83 -0
  165. truefoundry/ml/autogen/client/models/system_message.py +89 -0
  166. truefoundry/ml/autogen/client/models/text.py +153 -0
  167. truefoundry/ml/autogen/client/models/text_content_part.py +84 -0
  168. truefoundry/ml/autogen/client/models/update_artifact_version_request_dto.py +74 -0
  169. truefoundry/ml/autogen/client/models/update_dataset_request_dto.py +74 -0
  170. truefoundry/ml/autogen/client/models/update_experiment_request_dto.py +74 -0
  171. truefoundry/ml/autogen/client/models/update_model_version_request_dto.py +93 -0
  172. truefoundry/ml/autogen/client/models/update_run_request_dto.py +78 -0
  173. truefoundry/ml/autogen/client/models/update_run_response_dto.py +76 -0
  174. truefoundry/ml/autogen/client/models/url.py +153 -0
  175. truefoundry/ml/autogen/client/models/user_message.py +89 -0
  176. truefoundry/ml/autogen/client/models/validation_error.py +87 -0
  177. truefoundry/ml/autogen/client/models/validation_error_loc_inner.py +154 -0
  178. truefoundry/ml/autogen/client/rest.py +426 -0
  179. truefoundry/ml/autogen/client_README.md +322 -0
  180. truefoundry/ml/cli/__init__.py +0 -0
  181. truefoundry/ml/cli/cli.py +18 -0
  182. truefoundry/ml/cli/commands/__init__.py +3 -0
  183. truefoundry/ml/cli/commands/download.py +87 -0
  184. truefoundry/ml/constants.py +84 -0
  185. truefoundry/ml/enums.py +70 -0
  186. truefoundry/ml/env_vars.py +13 -0
  187. truefoundry/ml/exceptions.py +8 -0
  188. truefoundry/ml/git_info.py +60 -0
  189. truefoundry/ml/internal_namespace.py +52 -0
  190. truefoundry/ml/log_types/__init__.py +4 -0
  191. truefoundry/ml/log_types/artifacts/artifact.py +427 -0
  192. truefoundry/ml/log_types/artifacts/constants.py +33 -0
  193. truefoundry/ml/log_types/artifacts/dataset.py +383 -0
  194. truefoundry/ml/log_types/artifacts/general_artifact.py +110 -0
  195. truefoundry/ml/log_types/artifacts/model.py +628 -0
  196. truefoundry/ml/log_types/artifacts/model_extras.py +48 -0
  197. truefoundry/ml/log_types/artifacts/utils.py +161 -0
  198. truefoundry/ml/log_types/image/__init__.py +3 -0
  199. truefoundry/ml/log_types/image/constants.py +8 -0
  200. truefoundry/ml/log_types/image/image.py +358 -0
  201. truefoundry/ml/log_types/image/image_normalizer.py +101 -0
  202. truefoundry/ml/log_types/image/types.py +68 -0
  203. truefoundry/ml/log_types/plot.py +281 -0
  204. truefoundry/ml/log_types/pydantic_base.py +10 -0
  205. truefoundry/ml/log_types/utils.py +12 -0
  206. truefoundry/ml/logger.py +17 -0
  207. truefoundry/ml/login.py +241 -0
  208. truefoundry/ml/mlfoundry_api.py +1620 -0
  209. truefoundry/ml/mlfoundry_run.py +1238 -0
  210. truefoundry/ml/run_utils.py +102 -0
  211. truefoundry/ml/services/__init__.py +0 -0
  212. truefoundry/ml/services/auth_service.py +109 -0
  213. truefoundry/ml/services/entities.py +108 -0
  214. truefoundry/ml/services/servicefoundry_service.py +35 -0
  215. truefoundry/ml/services/utils.py +122 -0
  216. truefoundry/ml/session.py +271 -0
  217. truefoundry/ml/validation_utils.py +346 -0
  218. truefoundry/pydantic_v1.py +5 -1
  219. {truefoundry-0.3.3.dist-info → truefoundry-0.4.0.dev0.dist-info}/METADATA +19 -12
  220. truefoundry-0.4.0.dev0.dist-info/RECORD +342 -0
  221. truefoundry-0.3.3.dist-info/RECORD +0 -136
  222. /truefoundry/{python_deploy_codegen.py → deploy/python_deploy_codegen.py} +0 -0
  223. {truefoundry-0.3.3.dist-info → truefoundry-0.4.0.dev0.dist-info}/WHEEL +0 -0
  224. {truefoundry-0.3.3.dist-info → truefoundry-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1620 @@
1
+ import os
2
+ import time
3
+ import uuid
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
6
+
7
+ import coolname
8
+ import pandas as pd
9
+
10
+ from truefoundry.ml import constants, env_vars
11
+ from truefoundry.ml.autogen.client import ( # type: ignore[attr-defined]
12
+ ArtifactDto,
13
+ ArtifactType,
14
+ CreateDatasetRequestDto,
15
+ CreateExperimentRequestDto,
16
+ CreateRunRequestDto,
17
+ DatasetDto,
18
+ ListArtifactsRequestDto,
19
+ ListArtifactVersionsRequestDto,
20
+ ListDatasetsRequestDto,
21
+ ListModelVersionsRequestDto,
22
+ ModelDto,
23
+ RunTagDto,
24
+ SearchRunsRequestDto,
25
+ )
26
+ from truefoundry.ml.autogen.client.api import ( # type: ignore[attr-defined]
27
+ ExperimentsApi,
28
+ MlfoundryArtifactsApi,
29
+ RunsApi,
30
+ )
31
+ from truefoundry.ml.autogen.client.exceptions import (
32
+ ApiException,
33
+ NotFoundException,
34
+ )
35
+ from truefoundry.ml.enums import ModelFramework, ViewType
36
+ from truefoundry.ml.exceptions import MlFoundryException
37
+ from truefoundry.ml.internal_namespace import NAMESPACE
38
+ from truefoundry.ml.log_types.artifacts.artifact import ArtifactPath, ArtifactVersion
39
+ from truefoundry.ml.log_types.artifacts.dataset import DataDirectory
40
+ from truefoundry.ml.log_types.artifacts.general_artifact import _log_artifact_version
41
+ from truefoundry.ml.log_types.artifacts.model import ModelVersion, _log_model_version
42
+ from truefoundry.ml.log_types.artifacts.model_extras import CustomMetric, ModelSchema
43
+ from truefoundry.ml.logger import logger
44
+ from truefoundry.ml.mlfoundry_run import MlFoundryRun
45
+ from truefoundry.ml.services.servicefoundry_service import ServicefoundryService
46
+ from truefoundry.ml.session import (
47
+ Session,
48
+ _get_api_client,
49
+ get_active_session,
50
+ init_session,
51
+ )
52
+ from truefoundry.ml.validation_utils import (
53
+ _validate_ml_repo_description,
54
+ _validate_ml_repo_name,
55
+ _validate_run_name,
56
+ )
57
+
58
+ _SEARCH_MAX_RESULTS_DEFAULT = 1000
59
+
60
+
61
+ def _get_internal_env_vars_values() -> Dict[str, str]:
62
+ env = {}
63
+ for env_var_name in env_vars.INTERNAL_ENV_VARS:
64
+ value = os.getenv(env_var_name)
65
+ if value:
66
+ env[env_var_name] = value
67
+
68
+ return env
69
+
70
+
71
+ def _resolve_version(version: Union[int, str]) -> int:
72
+ if not isinstance(version, int) and not (
73
+ isinstance(version, str) and version.isnumeric()
74
+ ):
75
+ raise MlFoundryException(
76
+ f"version must be an integer or string containing numbers only. Got {version!r}"
77
+ )
78
+ final_version = int(version)
79
+ if final_version <= 0:
80
+ raise ValueError("version must be greater than 0")
81
+ return final_version
82
+
83
+
84
+ class MlFoundry:
85
+ """MlFoundry."""
86
+
87
+ # TODO (chiragjn): Don't allow session as None here!
88
+ def __init__(self, session: Session):
89
+ """__init__
90
+
91
+ Args:
92
+ session (Optional[Session], optional): Session instance to get auth credentials from
93
+ """
94
+ self._tracking_uri: str = session.tracking_uri
95
+ self._api_client = _get_api_client(session=session)
96
+ self._experiments_api = ExperimentsApi(api_client=self._api_client)
97
+ self._runs_api = RunsApi(api_client=self._api_client)
98
+ self._mlfoundry_artifacts_api = MlfoundryArtifactsApi(
99
+ api_client=self._api_client
100
+ )
101
+
102
+ def _get_ml_repo_id(self, ml_repo: str) -> str:
103
+ """_get_ml_repo_id.
104
+
105
+ Args:
106
+ ml_repo (str): The name of the ML Repo.
107
+
108
+ Returns:
109
+ str: The id of the ML Repo.
110
+ """
111
+ try:
112
+ _ml_repo_instance = self._experiments_api.get_experiment_by_name_get(
113
+ experiment_name=ml_repo
114
+ )
115
+ ml_repo_instance = _ml_repo_instance.experiment
116
+ except NotFoundException:
117
+ err_msg = (
118
+ f"ML Repo Does Not Exist for name: {ml_repo}. You may either "
119
+ "create it from the dashboard or using client.create_ml_repo('<ml_repo_name>')"
120
+ )
121
+ raise MlFoundryException(err_msg) from None
122
+ except ApiException as e:
123
+ err_msg = (
124
+ f"Error happened in getting ML Repo based on name: "
125
+ f"{ml_repo}. Error details: {e}"
126
+ )
127
+ raise MlFoundryException(err_msg) from e
128
+
129
+ assert ml_repo_instance.experiment_id is not None
130
+ return ml_repo_instance.experiment_id
131
+
132
+ def list_ml_repos(self) -> List[str]:
133
+ """Returns a list of names of ML Repos accessible by the current user.
134
+
135
+ Returns:
136
+ List[str]: A list of names of ML Repos
137
+ """
138
+ # TODO (chiragjn): This API should yield ML Repo Entities instead of just names
139
+ # Kinda useless without it
140
+ ml_repos_names = []
141
+ done, page_token, max_results = False, None, 25
142
+ while not done:
143
+ try:
144
+ _ml_repos = self._experiments_api.list_experiments_get(
145
+ view_type=ViewType.ALL.value,
146
+ max_results=max_results,
147
+ page_token=page_token,
148
+ )
149
+ except ApiException as e:
150
+ err_msg = f"Error happened in fetching ML Repos. Error details: {e}"
151
+ raise MlFoundryException(err_msg) from e
152
+ else:
153
+ ml_repos = _ml_repos.experiments
154
+ page_token = _ml_repos.next_page_token
155
+ for ml_repo in ml_repos:
156
+ # ML Repo with experiment_id 0 represents default ML Repo which we are removing.
157
+ if ml_repo.experiment_id != "0":
158
+ ml_repos_names.append(ml_repo.name)
159
+ if not ml_repos or page_token is None:
160
+ done = True
161
+ return ml_repos_names
162
+
163
+ def create_ml_repo(
164
+ self,
165
+ name: str,
166
+ storage_integration_fqn: str,
167
+ description: Optional[str] = None,
168
+ ):
169
+ """Creates an ML Repository.
170
+
171
+ Args:
172
+ name (str): The name of the Repository you want to create.
173
+ storage_integration_fqn(str): The storage integration FQN to use for the experiment
174
+ for saving artifacts.
175
+ description (str): A description for ML Repo.
176
+
177
+ Examples:
178
+
179
+ ### Create Repository
180
+ ```python
181
+ from truefoundry.ml import get_client
182
+
183
+ client = get_client()
184
+
185
+ client.create_ml_repo(ml_repo="my-repo")
186
+ ```
187
+ """
188
+ _validate_ml_repo_name(ml_repo_name=name)
189
+ if description:
190
+ _validate_ml_repo_description(description=description)
191
+ try:
192
+ _ml_repo_instance = self._experiments_api.get_experiment_by_name_get(
193
+ experiment_name=name
194
+ )
195
+ existing_ml_repo = _ml_repo_instance.experiment
196
+ except NotFoundException:
197
+ existing_ml_repo = None
198
+
199
+ if not existing_ml_repo:
200
+ try:
201
+ self._experiments_api.create_experiment_post(
202
+ create_experiment_request_dto=CreateExperimentRequestDto(
203
+ name=name,
204
+ description=description,
205
+ storage_integration_fqn=storage_integration_fqn,
206
+ )
207
+ )
208
+ except ApiException as e:
209
+ err_msg = f"Error happened in creating ML Repo with name: {name}. Error details: {e}"
210
+ raise MlFoundryException(err_msg) from e
211
+ return
212
+
213
+ session = get_active_session()
214
+ if session is None:
215
+ raise MlFoundryException(
216
+ "No active session found. Perhaps you are not logged in?\n"
217
+ "Please log in using `tfy login [--host HOST] --relogin"
218
+ )
219
+ servicefoundry_service = ServicefoundryService(
220
+ tracking_uri=self.get_tracking_uri(),
221
+ token=session.token.access_token,
222
+ )
223
+
224
+ assert existing_ml_repo.storage_integration_id is not None
225
+ try:
226
+ existing_storage_integration = (
227
+ servicefoundry_service.get_integration_from_id(
228
+ existing_ml_repo.storage_integration_id
229
+ )
230
+ )
231
+ except Exception as e:
232
+ raise MlFoundryException(
233
+ "Error in getting storage integration for ML Repo"
234
+ ) from e
235
+
236
+ if existing_storage_integration["fqn"] != storage_integration_fqn:
237
+ raise MlFoundryException(
238
+ f"ML Repo with same name already exists with storage integration:"
239
+ f"{existing_storage_integration['fqn']}. Cannot update the storage integration to: "
240
+ f"{storage_integration_fqn}"
241
+ )
242
+
243
+ def create_run(
244
+ self,
245
+ ml_repo: str,
246
+ run_name: Optional[str] = None,
247
+ tags: Optional[Dict[str, Any]] = None,
248
+ **kwargs,
249
+ ) -> MlFoundryRun:
250
+ """Initialize a `run`.
251
+
252
+ In a machine learning experiment `run` represents a single experiment
253
+ conducted under a ML Repo.
254
+ Args:
255
+ ml_repo (str): The name of the ML Repo under which the run will be created.
256
+ ml_repo should only contain alphanumerics (a-z,A-Z,0-9) or hyphen (-).
257
+ The user must have `ADMIN` or `WRITE` access to this ML Repo.
258
+ run_name (Optional[str], optional): The name of the run. If not passed, a randomly
259
+ generated name is assigned to the run. Under a ML Repo, all runs should have
260
+ a unique name. If the passed `run_name` is already used under a ML Repo, the
261
+ `run_name` will be de-duplicated by adding a suffix.
262
+ run name should only contain alphanumerics (a-z,A-Z,0-9) or hyphen (-).
263
+ tags (Optional[Dict[str, Any]], optional): Optional tags to attach with
264
+ this run. Tags are key-value pairs.
265
+ kwargs:
266
+
267
+ Returns:
268
+ MlFoundryRun: An instance of `MlFoundryRun` class which represents a `run`.
269
+
270
+ Examples:
271
+
272
+ ### Create a run under current user.
273
+ ```python
274
+ from truefoundry.ml import get_client
275
+
276
+ client = get_client()
277
+
278
+ tags = {"model_type": "svm"}
279
+ run = client.create_run(
280
+ ml_repo="my-classification-project", run_name="svm-with-rbf-kernel", tags=tags
281
+ )
282
+
283
+ run.end()
284
+ ```
285
+
286
+ ### Creating a run using context manager.
287
+ ```python
288
+ from truefoundry.ml import get_client
289
+
290
+ client = get_client()
291
+ with client.create_run(
292
+ ml_repo="my-classification-project", run_name="svm-with-rbf-kernel"
293
+ ) as run:
294
+ # ...
295
+ # Model training code
296
+ ...
297
+ # `run` will be automatically marked as `FINISHED` or `FAILED`.
298
+ ```
299
+
300
+ ### Create a run in a ML Repo owned by a different user.
301
+ ```python
302
+ from truefoundry.ml import get_client
303
+
304
+ client = get_client()
305
+
306
+ tags = {"model_type": "svm"}
307
+ run = client.create_run(
308
+ ml_repo="my-classification-project",
309
+ run_name="svm-with-rbf-kernel",
310
+ tags=tags,
311
+ )
312
+ run.end()
313
+ ```
314
+ """
315
+ if not run_name:
316
+ run_name = coolname.generate_slug(2)
317
+ logger.info(
318
+ f"No run_name given. Using a randomly generated name {run_name}."
319
+ " You can pass your own using the `run_name` argument"
320
+ )
321
+ _validate_run_name(name=run_name)
322
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
323
+ if tags is not None:
324
+ NAMESPACE.validate_namespace_not_used(tags.keys())
325
+ else:
326
+ tags = {}
327
+
328
+ tags.update(_get_internal_env_vars_values())
329
+ _run = self._runs_api.create_run_post(
330
+ CreateRunRequestDto(
331
+ user_id="unknown", # This does not matter, because on server we use the id from token
332
+ start_time=int(
333
+ time.time() * 1000
334
+ ), # TODO (chiragjn): computing start time should be on server side!
335
+ experiment_id=ml_repo_id,
336
+ name=run_name,
337
+ tags=[RunTagDto(key=k, value=v) for k, v in tags.items()],
338
+ )
339
+ )
340
+ run = _run.run
341
+
342
+ assert run is not None
343
+ assert run.info.run_id is not None
344
+ assert run.info.fqn is not None
345
+
346
+ mlf_run_id = run.info.run_id
347
+ kwargs.setdefault("auto_end", True)
348
+ mlf_run = MlFoundryRun(experiment_id=ml_repo_id, run_id=mlf_run_id, **kwargs)
349
+ mlf_run._add_git_info()
350
+ mlf_run._add_python_truefoundry_version()
351
+ logger.info(f"Run {run.info.fqn!r} has started.")
352
+ logger.info(f"Link to the dashboard for the run: {mlf_run.dashboard_link}")
353
+ return mlf_run
354
+
355
+ def get_run_by_id(self, run_id: str) -> MlFoundryRun:
356
+ """Get an existing `run` by the `run_id`.
357
+
358
+ Args:
359
+ run_id (str): run_id or fqn of an existing `run`.
360
+
361
+ Returns:
362
+ MlFoundryRun: An instance of `MlFoundryRun` class which represents a `run`.
363
+
364
+ Examples:
365
+
366
+ ### Get run by the run id
367
+ ```python
368
+ from truefoundry.ml import get_client
369
+
370
+ client = get_client()
371
+
372
+ run = client.get_run_by_id(run_id='a8f6dafd70aa4baf9437a33c52d7ee90')
373
+ ```
374
+ """
375
+ if run_id == "" or (not isinstance(run_id, str)):
376
+ raise MlFoundryException(
377
+ f"run_id must be string type and not empty. "
378
+ f"Got {type(run_id)} type with value {run_id}"
379
+ )
380
+ if "/" in run_id:
381
+ return self.get_run_by_fqn(run_id)
382
+ _run = self._runs_api.get_run_get(run_id=run_id)
383
+ run = _run.run
384
+
385
+ assert run is not None
386
+
387
+ mlfoundry_run = MlFoundryRun._from_dto(run)
388
+ logger.info(
389
+ f"Link to the dashboard for the run: {mlfoundry_run.dashboard_link}"
390
+ )
391
+ return mlfoundry_run
392
+
393
+ def get_run_by_fqn(self, run_fqn: str) -> MlFoundryRun:
394
+ """Get an existing `run` by `fqn`.
395
+
396
+ `fqn` stands for Fully Qualified Name. A run `fqn` has the following pattern:
397
+ tenant_name/ml_repo/run_name
398
+
399
+ If a run `svm` under the ML Repo `cat-classifier` in `truefoundry` tenant,
400
+ the `fqn` will be `truefoundry/cat-classifier/svm`.
401
+
402
+ Args:
403
+ run_fqn (str): `fqn` of an existing run.
404
+
405
+ Returns:
406
+ MlFoundryRun: An instance of `MlFoundryRun` class which represents a `run`.
407
+
408
+ Examples:
409
+
410
+ ### get run by run fqn
411
+ ```python
412
+ from truefoundry.ml import get_client
413
+
414
+ client = get_client()
415
+
416
+ run = client.get_run_by_fqn(run_fqn='truefoundry/my-repo/svm')
417
+ ```
418
+ """
419
+ _run = self._runs_api.get_run_by_fqn_get(run_fqn=run_fqn)
420
+ run = _run.run
421
+ mlfoundry_run = MlFoundryRun._from_dto(run)
422
+ logger.info(
423
+ f"Link to the dashboard for the run: {mlfoundry_run.dashboard_link}"
424
+ )
425
+ return mlfoundry_run
426
+
427
+ def get_run_by_name(
428
+ self,
429
+ ml_repo: str,
430
+ run_name: str,
431
+ ) -> MlFoundryRun:
432
+ """Get an existing `run` by `run_name`.
433
+
434
+ Args:
435
+ ml_repo (str): name of the ml_repo of which the run is part of.
436
+ run_name (str): the name of the run required
437
+
438
+ Returns:
439
+ MlFoundryRun: An instance of `MlFoundryRun` class which represents a `run`.
440
+
441
+ Examples:
442
+
443
+ ### get run by name
444
+ ```python
445
+ from truefoundry.ml import get_client
446
+
447
+ client = get_client()
448
+
449
+ run = client.get_run_by_name(run_name='svm', ml_repo='my-repo')
450
+ ```
451
+ """
452
+ _run = self._runs_api.get_run_by_name_get(
453
+ experiment_id=None,
454
+ run_name=run_name,
455
+ experiment_name=ml_repo,
456
+ )
457
+ run = _run.run
458
+ mlfoundry_run = MlFoundryRun._from_dto(run)
459
+ logger.info(
460
+ f"Link to the dashboard for the run: {mlfoundry_run.dashboard_link}"
461
+ )
462
+ return mlfoundry_run
463
+
464
+ def get_all_runs(
465
+ self,
466
+ ml_repo: str,
467
+ ) -> pd.DataFrame:
468
+ """Returns all the run name and id present under a ML Repo.
469
+
470
+ The user must have `READ` access to the ML Repo.
471
+
472
+ Args:
473
+ ml_repo (str): Name of the ML Repo.
474
+ Returns:
475
+ pd.DataFrame: dataframe with two columns- run_id and run_name
476
+
477
+ Examples:
478
+
479
+ ### get all the runs from a ml_repo
480
+ ```python
481
+ from truefoundry.ml import get_client
482
+
483
+ client = get_client()
484
+
485
+ run = client.get_all_runs(ml_repo='my-repo')
486
+ ```
487
+ """
488
+ runs = []
489
+ for run in self.search_runs(ml_repo=ml_repo):
490
+ runs.append((run.run_id, run.run_name))
491
+
492
+ if len(runs) == 0:
493
+ return pd.DataFrame(
494
+ columns=[constants.RUN_ID_COL_NAME, constants.RUN_NAME_COL_NAME]
495
+ )
496
+
497
+ return pd.DataFrame(
498
+ runs, columns=[constants.RUN_ID_COL_NAME, constants.RUN_NAME_COL_NAME]
499
+ )
500
+
501
+ def search_runs(
502
+ self,
503
+ ml_repo: str,
504
+ filter_string: str = "",
505
+ run_view_type: ViewType = ViewType.ACTIVE_ONLY.value,
506
+ order_by: Sequence[str] = ("attribute.start_time DESC",),
507
+ job_run_name: Optional[str] = None,
508
+ ) -> Iterator[MlFoundryRun]:
509
+ """
510
+ The user must have `READ` access to the ML Repo.
511
+ Returns an iterator that returns a MLFoundryRun on each next call.
512
+ All the runs under a ML Repo which matches the filter string and the run_view_type are returned.
513
+
514
+ Args:
515
+ ml_repo (str): Name of the ML Repo.
516
+ filter_string (str, optional):
517
+ Filter query string, defaults to searching all runs.
518
+ Identifier required in the LHS of a search expression.
519
+ Signifies an entity to compare against. An identifier has two parts separated by a period: the
520
+ type of the entity and the name of the entity.
521
+ The type of the entity is metrics, params, attributes, or tags.
522
+ The entity name can contain alphanumeric characters and special characters.
523
+ You can search using two run attributes : status and artifact_uri. Both attributes have string values.
524
+ When a metric, parameter, or tag name contains a special character like hyphen, space, period,
525
+ and so on, enclose the entity name in double quotes or backticks,
526
+ params."model-type" or params.`model-type`
527
+
528
+ run_view_type (str, optional): one of the following values "ACTIVE_ONLY", "DELETED_ONLY", or "ALL" runs.
529
+ order_by (List[str], optional):
530
+ List of columns to order by (e.g., "metrics.rmse"). Currently supported values
531
+ are metric.key, parameter.key, tag.key, attribute.key. The ``order_by`` column
532
+ can contain an optional ``DESC`` or ``ASC`` value. The default is ``ASC``.
533
+ The default ordering is to sort by ``start_time DESC``.
534
+
535
+ job_run_name (str): Name of the job which are associated with the runs to get that runs
536
+
537
+ Returns:
538
+ Iterator[MlFoundryRun]: MLFoundryRuns matching the search query.
539
+
540
+ Examples:
541
+
542
+ ```python
543
+ from truefoundry.ml import get_client
544
+
545
+ client = get_client()
546
+ with client.create_run(ml_repo="my-project", run_name="run-1") as run1:
547
+ run1.log_metrics(metric_dict={"accuracy": 0.74, "loss": 0.6})
548
+ run1.log_params({"model": "LogisticRegression", "lambda": "0.001"})
549
+
550
+ with client.create_run(ml_repo="my-project", run_name="run-2") as run2:
551
+ run2.log_metrics(metric_dict={"accuracy": 0.8, "loss": 0.4})
552
+ run2.log_params({"model": "SVM"})
553
+
554
+ # Search for the subset of runs with logged accuracy metric greater than 0.75
555
+ filter_string = "metrics.accuracy > 0.75"
556
+ runs = client.search_runs(ml_repo="my-project", filter_string=filter_string)
557
+
558
+ # Search for the subset of runs with logged accuracy metric greater than 0.7
559
+ filter_string = "metrics.accuracy > 0.7"
560
+ runs = client.search_runs(ml_repo="my-project", filter_string=filter_string)
561
+
562
+ # Search for the subset of runs with logged accuracy metric greater than 0.7 and model="LogisticRegression"
563
+ filter_string = "metrics.accuracy > 0.7 and params.model = 'LogisticRegression'"
564
+ runs = client.search_runs(ml_repo="my-project", filter_string=filter_string)
565
+
566
+ # Search for the subset of runs with logged accuracy metric greater than 0.7 and
567
+ # order by accuracy in Descending order
568
+ filter_string = "metrics.accuracy > 0.7"
569
+ order_by = ["metric.accuracy DESC"]
570
+ runs = client.search_runs(
571
+ ml_repo="my-project", filter_string=filter_string, order_by=order_by
572
+ )
573
+
574
+ filter_string = "metrics.accuracy > 0.7"
575
+ runs = client.search_runs(
576
+ ml_repo="transformers", order_by=order_by ,job_run_name='job_run_name', filter_string=filter_string
577
+ )
578
+
579
+ order_by = ["metric.accuracy DESC"]
580
+ runs = client.search_runs(
581
+ ml_repo="my-project", filter_string=filter_string, order_by=order_by, max_results=10
582
+ )
583
+ ```
584
+ """
585
+ _validate_ml_repo_name(ml_repo_name=ml_repo)
586
+ try:
587
+ _ml_repo_obj = self._experiments_api.get_experiment_by_name_get(
588
+ experiment_name=ml_repo
589
+ )
590
+ ml_repo_obj = _ml_repo_obj.experiment
591
+ except ApiException as e:
592
+ raise MlFoundryException(
593
+ f"ML Repo with name {ml_repo} does not exist or your user does not have permission to access it: {e}"
594
+ ) from e
595
+
596
+ ml_repo_id = ml_repo_obj.experiment_id
597
+
598
+ page_token = None
599
+ done = False
600
+ if job_run_name:
601
+ if filter_string == "":
602
+ filter_string = f"tags.TFY_INTERNAL_JOB_RUN_NAME = '{job_run_name}'"
603
+ else:
604
+ filter_string += (
605
+ f" and tags.TFY_INTERNAL_JOB_RUN_NAME = '{job_run_name}'"
606
+ )
607
+ while not done:
608
+ runs_page = self._runs_api.search_runs_post(
609
+ SearchRunsRequestDto(
610
+ experiment_ids=[ml_repo_id],
611
+ filter=filter_string,
612
+ run_view_type=run_view_type,
613
+ max_results=_SEARCH_MAX_RESULTS_DEFAULT,
614
+ order_by=order_by,
615
+ page_token=page_token,
616
+ )
617
+ )
618
+ runs = runs_page.runs
619
+ page_token = runs_page.next_page_token
620
+
621
+ for run in runs:
622
+ yield MlFoundryRun._from_dto(run)
623
+ if not runs or page_token is None:
624
+ done = True
625
+
626
+ def get_tracking_uri(self) -> str:
627
+ """
628
+ Get the current tracking URI.
629
+
630
+ Returns:
631
+ The tracking URI.
632
+
633
+ Examples:
634
+
635
+ ```python
636
+ import tempfile
637
+ from truefoundry.ml import get_client
638
+
639
+ client = get_client()
640
+ tracking_uri = client.get_tracking_uri()
641
+ print("Current tracking uri: {}".format(tracking_uri))
642
+ ```
643
+ """
644
+ return self._tracking_uri
645
+
646
+ def get_model_version(
647
+ self,
648
+ ml_repo: str,
649
+ name: str,
650
+ version: Union[str, int] = constants.LATEST_ARTIFACT_OR_MODEL_VERSION,
651
+ ) -> Optional[ModelVersion]:
652
+ """
653
+ Get the model version to download contents or load it in memory
654
+
655
+ Args:
656
+ ml_repo (str): ML Repo to which model is logged
657
+ name (str): Model Name
658
+ version (str | int): Model Version to fetch (default is the latest version)
659
+
660
+ Returns:
661
+ ModelVersion: The ModelVersion instance of the model.
662
+
663
+ Examples:
664
+
665
+ ### Sklearn
666
+
667
+ ```python
668
+ # See `truefoundry.ml.mlfoundry_api.MlFoundry.log_model` examples to understand model logging
669
+ import tempfile
670
+ import joblib
671
+ from truefoundry.ml import get_client
672
+
673
+ client = get_client()
674
+ model_version = client.get_model_version(
675
+ ml_repo="my-classification-project",
676
+ name="my-sklearn-model",
677
+ version=1
678
+ )
679
+
680
+ # Download the model to disk
681
+ temp = tempfile.TemporaryDirectory()
682
+ download_info = model_version.download(path=temp.name)
683
+ print(download_info.model_dir, download_info.model_filename)
684
+
685
+ # Deserialize and Load
686
+ model = joblib.load(
687
+ os.path.join(download_info.model_dir, download_info.model_filename)
688
+ )
689
+ ```
690
+
691
+ ### Huggingface Transformers
692
+
693
+ ```python
694
+ # See `truefoundry.ml.mlfoundry_api.MlFoundry.log_model` examples to understand model logging
695
+ import torch
696
+ from transformers import pipeline
697
+
698
+ from truefoundry.ml import get_client
699
+
700
+ client = get_client()
701
+ model_version = client.get_model_version(
702
+ ml_repo="my-llm-project",
703
+ name="my-transformers-model",
704
+ version=1
705
+ )
706
+
707
+ # Download the model to disk
708
+ temp = tempfile.TemporaryDirectory()
709
+ download_info = model_version.download(path=temp.name)
710
+ print(download_info.model_dir)
711
+
712
+ # Deserialize and Load
713
+ pln = pipeline("text-generation", model=download_info.model_dir, torch_dtype=torch.float16)
714
+ ```
715
+ """
716
+ resolved_version = None
717
+ if version != constants.LATEST_ARTIFACT_OR_MODEL_VERSION:
718
+ resolved_version = _resolve_version(version=version)
719
+
720
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
721
+
722
+ _model_version = self._mlfoundry_artifacts_api.get_model_version_by_name_get(
723
+ experiment_id=int(ml_repo_id),
724
+ model_name=name,
725
+ version=resolved_version,
726
+ )
727
+ model_version = _model_version.model_version
728
+ assert model_version.model_id is not None
729
+ _model = self._mlfoundry_artifacts_api.get_model_get(id=model_version.model_id)
730
+ model = _model.model
731
+
732
+ return ModelVersion(
733
+ model_version=model_version,
734
+ model=model,
735
+ )
736
+
737
+ def get_model_version_by_fqn(self, fqn: str) -> ModelVersion:
738
+ """
739
+ Get the model version to download contents or load it in memory
740
+
741
+ Args:
742
+ fqn (str): Fully qualified name of the model version.
743
+
744
+ Returns:
745
+ ModelVersion: The ModelVersion instance of the model.
746
+
747
+ Examples:
748
+
749
+ ### Sklearn
750
+
751
+ ```python
752
+ # See `truefoundry.ml.mlfoundry_api.MlFoundry.log_model` examples to understand model logging
753
+ import tempfile
754
+ import joblib
755
+ from truefoundry.ml import get_client
756
+
757
+ client = get_client()
758
+ model_version = client.get_model_version_by_fqn(
759
+ fqn="model:truefoundry/my-classification-project/my-sklearn-model:1"
760
+ )
761
+
762
+ # Download the model to disk
763
+ temp = tempfile.TemporaryDirectory()
764
+ download_info = model_version.download(path=temp.name)
765
+ print(download_info.model_dir, download_info.model_filename)
766
+
767
+ # Deserialize and Load
768
+ model = joblib.load(
769
+ os.path.join(download_info.model_dir, download_info.model_filename)
770
+ )
771
+ ```
772
+
773
+ ### Huggingface Transformers
774
+
775
+ ```python
776
+ # See `truefoundry.ml.mlfoundry_api.MlFoundry.log_model` examples to understand model logging
777
+ import torch
778
+ from transformers import pipeline
779
+
780
+ from truefoundry.ml import get_client
781
+
782
+ client = get_client()
783
+ model_version = client.get_model_version_by_fqn(
784
+ fqn="model:truefoundry/my-llm-project/my-transformers-model:1"
785
+ )
786
+ # Download the model to disk
787
+ temp = tempfile.TemporaryDirectory()
788
+ download_info = model_version.download(path=temp.name)
789
+ print(download_info.model_dir)
790
+
791
+ # Deserialize and Load
792
+ pln = pipeline("text-generation", model=download_info.model_dir, torch_dtype=torch.float16)
793
+ ```
794
+ """
795
+ return ModelVersion.from_fqn(fqn=fqn)
796
+
797
+ def list_model_versions(self, ml_repo: str, name: str) -> Iterator[ModelVersion]:
798
+ """
799
+ Get all the version of a model to download contents or load them in memory
800
+
801
+ Args:
802
+ ml_repo (str): Repository in which the model is stored.
803
+ name (str): Name of the model whose version is required
804
+
805
+ Returns:
806
+ Iterator[ModelVersion]: An iterator that yields non deleted model versions
807
+ of a model under a given ml_repo sorted reverse by the version number
808
+
809
+ Examples:
810
+
811
+ ```python
812
+ from truefoundry.ml import get_client
813
+
814
+ client = get_client()
815
+ model_versions = client.list_model_version(ml_repo="my-repo", name="svm")
816
+
817
+ for model_version in model_versions:
818
+ print(model_version)
819
+ ```
820
+ """
821
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
822
+ try:
823
+ _model = self._mlfoundry_artifacts_api.get_model_by_name_get(
824
+ experiment_id=int(ml_repo_id), name=name
825
+ )
826
+ except NotFoundException as e:
827
+ err_msg = (
828
+ f"Model Does Not Exist for ml_repo={ml_repo}, name={name}. Error: {e}"
829
+ )
830
+ raise MlFoundryException(err_msg) from e
831
+ model = _model.model
832
+ return self._list_model_versions_by_id(model=model)
833
+
834
+ def list_model_versions_by_fqn(self, model_fqn: str) -> Iterator[ModelVersion]:
835
+ """
836
+ List versions for a given model
837
+
838
+ Args:
839
+ model_fqn: FQN of the Model to list versions for.
840
+ A model_fqn looks like `model:{org}/{user}/{project}/{artifact_name}`
841
+ or `model:{user}/{project}/{artifact_name}`
842
+
843
+ Returns:
844
+ Iterator[ModelVersion]: An iterator that yields non deleted model versions
845
+ under the given model_fqn sorted reverse by the version number
846
+
847
+ Yields:
848
+ ModelVersion: An instance of `truefoundry.ml.ModelVersion`
849
+
850
+ Examples:
851
+
852
+ ```python
853
+ from truefoundry.ml import get_client
854
+
855
+ client = get_client()
856
+ model_fqn = "model:org/my-project/my-model"
857
+ for mv in client.list_model_versions(model_fqn=model_fqn):
858
+ print(mv.name, mv.version, mv.description)
859
+ ```
860
+ """
861
+ _model = self._mlfoundry_artifacts_api.get_model_by_fqn_get(fqn=model_fqn)
862
+ model = _model.model
863
+ return self._list_model_versions_by_id(model=model)
864
+
865
+ def _list_model_versions_by_id(
866
+ self,
867
+ model_id: Optional[uuid.UUID] = None,
868
+ model: Optional[ModelDto] = None,
869
+ ) -> Iterator[ModelVersion]:
870
+ if model and not model_id:
871
+ assert model.id is not None
872
+ model_id = model.id
873
+ elif not model and model_id:
874
+ _model = self._mlfoundry_artifacts_api.get_model_get(id=str(model_id))
875
+ model = _model.model
876
+ else:
877
+ raise MlFoundryException(
878
+ "Exactly one of model_id or model should be passed"
879
+ )
880
+
881
+ max_results, page_token, done = 10, None, False
882
+ while not done:
883
+ _model_versions = self._mlfoundry_artifacts_api.list_model_versions_post(
884
+ list_model_versions_request_dto=ListModelVersionsRequestDto(
885
+ model_id=str(model_id),
886
+ max_results=max_results,
887
+ page_token=page_token,
888
+ )
889
+ )
890
+ model_versions = _model_versions.model_versions
891
+ page_token = _model_versions.next_page_token
892
+ for model_version in model_versions:
893
+ yield ModelVersion(model_version=model_version, model=model)
894
+ if not model_versions or not page_token:
895
+ done = True
896
+
897
+ def get_artifact_version(
898
+ self,
899
+ ml_repo: str,
900
+ name: str,
901
+ artifact_type: Optional[ArtifactType] = ArtifactType.ARTIFACT,
902
+ version: Union[str, int] = constants.LATEST_ARTIFACT_OR_MODEL_VERSION,
903
+ ) -> Optional[ArtifactVersion]:
904
+ """
905
+ Get the model version to download contents or load it in memory
906
+
907
+ Args:
908
+ ml_repo (str): ML Repo to which artifact is logged
909
+ name (str): Artifact Name
910
+ artifact_type (str): The type of artifact to fetch (acceptable values: "artifact", "model", "plot", "image")
911
+ version (str | int): Artifact Version to fetch (default is the latest version)
912
+
913
+ Returns:
914
+ ArtifactVersion : An ArtifactVersion instance of the artifact
915
+
916
+ Examples:
917
+
918
+ ```python
919
+ import tempfile
920
+ from truefoundry.ml import get_client
921
+
922
+ client = get_client()
923
+ artifact_version = client.get_artifact_version(ml_repo="ml-repo-name", name="artifact-name", version=1)
924
+
925
+ # download the artifact to disk
926
+ temp = tempfile.TemporaryDirectory()
927
+ download_path = artifact_version.download(path=temp.name)
928
+ print(download_path)
929
+ ```
930
+ """
931
+ resolved_version = None
932
+ if version != constants.LATEST_ARTIFACT_OR_MODEL_VERSION:
933
+ resolved_version = _resolve_version(version=version)
934
+
935
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
936
+
937
+ _artifact_version = (
938
+ self._mlfoundry_artifacts_api.get_artifact_version_by_name_get(
939
+ experiment_id=int(ml_repo_id),
940
+ artifact_name=name,
941
+ artifact_type=artifact_type,
942
+ version=resolved_version,
943
+ )
944
+ )
945
+ artifact_version = _artifact_version.artifact_version
946
+ assert artifact_version.artifact_id is not None
947
+ _artifact = self._mlfoundry_artifacts_api.get_artifact_by_id_get(
948
+ id=artifact_version.artifact_id
949
+ )
950
+ artifact = _artifact.artifact
951
+
952
+ return ArtifactVersion(
953
+ artifact_version=artifact_version,
954
+ artifact=artifact,
955
+ )
956
+
957
+ def get_artifact_version_by_fqn(self, fqn: str) -> ArtifactVersion:
958
+ """
959
+ Get the artifact version to download contents
960
+
961
+ Args:
962
+ fqn (str): Fully qualified name of the artifact version.
963
+
964
+ Returns:
965
+ ArtifactVersion : An ArtifactVersion instance of the artifact
966
+
967
+ Examples:
968
+
969
+ ```python
970
+ import tempfile
971
+ from truefoundry.ml import get_client
972
+
973
+ client = get_client()
974
+ artifact_version = client.get_artifact_version_by_fqn(
975
+ fqn="artifact:truefoundry/my-classification-project/sklearn-artifact:1"
976
+ )
977
+
978
+ # download the artifact to disk
979
+ temp = tempfile.TemporaryDirectory()
980
+ download_path = artifact_version.download(path=temp.name)
981
+ print(download_path)
982
+ ```
983
+ """
984
+ return ArtifactVersion.from_fqn(fqn=fqn)
985
+
986
+ def list_artifact_versions(
987
+ self,
988
+ ml_repo: str,
989
+ name: str,
990
+ artifact_type: Optional[ArtifactType] = ArtifactType.ARTIFACT,
991
+ ) -> Iterator[ArtifactVersion]:
992
+ """
993
+ Get all the version of na artifact to download contents or load them in memory
994
+
995
+ Args:
996
+ ml_repo (str): Repository in which the model is stored.
997
+ name (str): Name of the artifact whose version is required
998
+ artifact_type (ArtifactType): Type of artifact you want for example model, image, etc.
999
+
1000
+ Returns:
1001
+ Iterator[ArtifactVersion]: An iterator that yields non deleted artifact-versions
1002
+ of an artifact under a given ml_repo sorted reverse by the version number
1003
+
1004
+ Examples:
1005
+
1006
+ ```python
1007
+ from truefoundry.ml import get_client
1008
+
1009
+ client = get_client()
1010
+ artifact_versions = client.list_artifact_versions(ml_repo="my-repo", name="artifact-name")
1011
+
1012
+ for artifact_version in artifact_versions:
1013
+ print(artifact_version)
1014
+ ```
1015
+ """
1016
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
1017
+ _artifacts = self._mlfoundry_artifacts_api.list_artifacts_post(
1018
+ list_artifacts_request_dto=ListArtifactsRequestDto(
1019
+ experiment_id=ml_repo_id,
1020
+ name=name,
1021
+ artifact_types=[artifact_type] if artifact_type else None,
1022
+ max_results=1,
1023
+ )
1024
+ )
1025
+ artifacts = _artifacts.artifacts
1026
+ if not artifacts or len(artifacts) == 0:
1027
+ err_msg = f"Artifact Does Not Exist for ml_repo={ml_repo}, name={name}, type={artifact_type}"
1028
+ raise MlFoundryException(err_msg)
1029
+ return self._list_artifact_versions_by_id(artifact=artifacts[0])
1030
+
1031
+ def list_artifact_versions_by_fqn(
1032
+ self, artifact_fqn: str
1033
+ ) -> Iterator[ArtifactVersion]:
1034
+ """
1035
+ List versions for a given artifact
1036
+
1037
+ Args:
1038
+ artifact_fqn: FQN of the Artifact to list versions for.
1039
+ An artifact_fqn looks like `{artifact_type}:{org}/{user}/{project}/{artifact_name}`
1040
+ or `{artifact_type}:{user}/{project}/{artifact_name}`
1041
+
1042
+ where artifact_type can be on of ("model", "image", "plot")
1043
+
1044
+ Returns:
1045
+ Iterator[ArtifactVersion]: An iterator that yields non deleted artifact versions
1046
+ under the given artifact_fqn sorted reverse by the version number
1047
+
1048
+ Yields:
1049
+ ArtifactVersion: An instance of `truefoundry.ml.ArtifactVersion`
1050
+
1051
+ Examples:
1052
+
1053
+ ```python
1054
+ from truefoundry.ml import get_client
1055
+
1056
+ client = get_client()
1057
+ artifact_fqn = "artifact:org/my-project/my-artifact"
1058
+ for av in client.list_artifact_versions(artifact_fqn=artifact_fqn):
1059
+ print(av.name, av.version, av.description)
1060
+ ```
1061
+ """
1062
+
1063
+ _artifact = self._mlfoundry_artifacts_api.get_artifact_by_fqn_get(
1064
+ fqn=artifact_fqn
1065
+ )
1066
+ artifact = _artifact.artifact
1067
+ return self._list_artifact_versions_by_id(artifact=artifact)
1068
+
1069
+ def _list_artifact_versions_by_id(
1070
+ self,
1071
+ artifact_id: Optional[uuid.UUID] = None,
1072
+ artifact: Optional[ArtifactDto] = None,
1073
+ ) -> Iterator[ArtifactVersion]:
1074
+ if artifact and not artifact_id:
1075
+ assert artifact.id is not None
1076
+ artifact_id = artifact.id
1077
+ elif not artifact and artifact_id:
1078
+ _artifact = self._mlfoundry_artifacts_api.get_artifact_by_id_get(
1079
+ id=str(artifact_id)
1080
+ )
1081
+ artifact = _artifact.artifact
1082
+ else:
1083
+ raise MlFoundryException(
1084
+ "Exactly one of artifact_id or artifact should be passed"
1085
+ )
1086
+
1087
+ max_results, page_token, done = 10, None, False
1088
+ while not done:
1089
+ _artifact_versions = (
1090
+ self._mlfoundry_artifacts_api.list_artifact_versions_post(
1091
+ list_artifact_versions_request_dto=ListArtifactVersionsRequestDto(
1092
+ artifact_id=str(artifact_id),
1093
+ max_results=max_results,
1094
+ page_token=page_token,
1095
+ )
1096
+ )
1097
+ )
1098
+ artifact_versions = _artifact_versions.artifact_versions
1099
+ page_token = _artifact_versions.next_page_token
1100
+ for artifact_version in artifact_versions:
1101
+ yield ArtifactVersion(
1102
+ artifact_version=artifact_version, artifact=artifact
1103
+ )
1104
+ if not artifact_versions or not page_token:
1105
+ done = True
1106
+
1107
+ def log_artifact(
1108
+ self,
1109
+ ml_repo: str,
1110
+ name: str,
1111
+ artifact_paths: List[
1112
+ Union[Tuple[str], Tuple[str, Optional[str]], ArtifactPath]
1113
+ ],
1114
+ description: Optional[str] = None,
1115
+ metadata: Optional[Dict[str, Any]] = None,
1116
+ progress: Optional[bool] = None,
1117
+ ) -> Optional[ArtifactVersion]:
1118
+ """Logs an artifact for the current `ml_repo`.
1119
+
1120
+ An `artifact` is a list of local files and directories.
1121
+ This function packs the mentioned files and directories in `artifact_paths`
1122
+ and uploads them to remote storage linked to the ml_repo
1123
+
1124
+ Args:
1125
+ ml_repo (str): Name of the ML Repo to which an artifact is to be logged.
1126
+ name (str): Name of the Artifact. If an artifact with this name already exists under the current ml_repo,
1127
+ the logged artifact will be added as a new version under that `name`. If no artifact exist with
1128
+ the given `name`, the given artifact will be logged as version 1.
1129
+ artifact_paths (List[truefoundry.ml.ArtifactPath], optional): A list of pairs
1130
+ of (source path, destination path) to add files and folders
1131
+ to the artifact version contents. The first member of the pair should be a file or directory path
1132
+ and the second member should be the path inside the artifact contents to upload to.
1133
+ progress (bool): value to show progress bar, defaults to None.
1134
+
1135
+ ```python
1136
+
1137
+ from truefoundry.ml import get_client, ArtifactPath
1138
+
1139
+ client = get_client()
1140
+ client.log_artifact(
1141
+ ml_repo="sample-repo",
1142
+ name="xyz",
1143
+ artifact_paths=[
1144
+ ArtifactPath("foo.txt", "foo/bar/foo.txt"),
1145
+ ArtifactPath("tokenizer/", "foo/tokenizer/"),
1146
+ ArtifactPath('bar.text'),
1147
+ ('bar.txt', ),
1148
+ ('foo.txt', 'a/foo.txt')
1149
+ ]
1150
+ )
1151
+ ```
1152
+
1153
+ would result in
1154
+
1155
+ ```
1156
+ .
1157
+ └── foo/
1158
+ ├── bar/
1159
+ │ └── foo.txt
1160
+ └── tokenizer/
1161
+ └── # contents of tokenizer/ directory will be uploaded here
1162
+ ```
1163
+ description (Optional[str], optional): arbitrary text upto 1024 characters to store as description.
1164
+ This field can be updated at any time after logging. Defaults to `None`
1165
+ metadata (Optional[Dict[str, Any]], optional): arbitrary json serializable dictionary to store metadata.
1166
+ For example, you can use this to store metrics, params, notes.
1167
+ This field can be updated at any time after logging. Defaults to `None`
1168
+
1169
+ Returns:
1170
+ truefoundry.ml.ArtifactVersion: an instance of `ArtifactVersion` that can be used to download the files,
1171
+ or update attributes like description, metadata.
1172
+
1173
+ Examples:
1174
+
1175
+ ```python
1176
+ import os
1177
+ from truefoundry.ml import get_client, ArtifactPath
1178
+
1179
+ with open("artifact.txt", "w") as f:
1180
+ f.write("hello-world")
1181
+
1182
+ client = get_client()
1183
+ ml_repo = "sample-repo"
1184
+
1185
+ client.create_ml_repo(ml_repo=ml_repo)
1186
+ client.log_artifact(
1187
+ ml_repo=ml_repo,
1188
+ name="hello-world-file",
1189
+ artifact_paths=[ArtifactPath('artifact.txt', 'a/b/')]
1190
+ )
1191
+ ```
1192
+ """
1193
+ if not artifact_paths:
1194
+ raise MlFoundryException(
1195
+ "artifact_paths cannot be empty, atleast one artifact_path must be passed"
1196
+ )
1197
+
1198
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
1199
+ artifact_version = _log_artifact_version(
1200
+ run=None,
1201
+ mlfoundry_artifacts_api=self._mlfoundry_artifacts_api,
1202
+ ml_repo_id=ml_repo_id,
1203
+ name=name,
1204
+ artifact_paths=artifact_paths,
1205
+ description=description,
1206
+ metadata=metadata,
1207
+ step=None,
1208
+ progress=progress,
1209
+ )
1210
+ logger.info(f"Logged artifact successfully with fqn {artifact_version.fqn!r}")
1211
+ return artifact_version
1212
+
1213
+ def log_model(
1214
+ self,
1215
+ *,
1216
+ ml_repo: str,
1217
+ name: str,
1218
+ model_file_or_folder: str,
1219
+ framework: Optional[Union[ModelFramework, str]],
1220
+ additional_files: Sequence[Tuple[Union[str, Path], Optional[str]]] = (),
1221
+ description: Optional[str] = None,
1222
+ metadata: Optional[Dict[str, Any]] = None,
1223
+ model_schema: Optional[Union[ModelSchema, Dict[str, Any]]] = None,
1224
+ custom_metrics: Optional[List[Union[CustomMetric, Dict[str, Any]]]] = None,
1225
+ ) -> ModelVersion:
1226
+ """
1227
+ Serialize and log a versioned model under the current ml_repo. Each logged model generates a new version
1228
+ associated with the given `name` and linked to the current run. Multiple versions of the model can be
1229
+ logged as separate versions under the same `name`.
1230
+
1231
+ Args:
1232
+ ml_repo (str): Name of the ML Repo to which an artifact is to be logged.
1233
+ name (str): Name of the model. If a model with this name already exists under the current ML Repo,
1234
+ the logged model will be added as a new version under that `name`. If no models exist with the given
1235
+ `name`, the given model will be logged as version 1.
1236
+ model_file_or_folder (str): Path to either a single file or a folder containing model files. This folder
1237
+ is usually created using serialization methods of libraries or frameworks e.g. `joblib.dump`,
1238
+ `model.save_pretrained(...)`, `torch.save(...)`, `model.save(...)`
1239
+ framework (Union[enums.ModelFramework, str]): Model Framework. Ex:- pytorch, sklearn, tensorflow etc.
1240
+ The full list of supported frameworks can be found in `truefoundry.ml.enums.ModelFramework`.
1241
+ Can also be `None` when `model` is `None`.
1242
+ additional_files (Sequence[Tuple[Union[str, Path], Optional[str]]], optional): A list of pairs
1243
+ of (source path, destination path) to add additional files and folders
1244
+ to the model version contents. The first member of the pair should be a file or directory path
1245
+ and the second member should be the path inside the model versions contents to upload to.
1246
+ The model version contents are arranged like follows
1247
+ .
1248
+ └── model/
1249
+ └── # model files are serialized here
1250
+ └── # any additional files and folders can be added here.
1251
+
1252
+ You can also add additional files to model/ subdirectory by specifying the destination path as model/
1253
+
1254
+ ```python
1255
+ run.log_model(
1256
+ name="xyz",
1257
+ model_file_or_folder="clf.joblib",
1258
+ framework="sklearn",
1259
+ additional_files=[("foo.txt", "foo/bar/foo.txt"), ("tokenizer/", "foo/tokenizer/")]
1260
+ )
1261
+ ```
1262
+
1263
+ would result in
1264
+
1265
+ ```
1266
+ .
1267
+ ├── model/
1268
+ │ └── clf.joblib # if `model_file_or_folder` is a folder, contents will be added here
1269
+ └── foo/
1270
+ ├── bar/
1271
+ │ └── foo.txt
1272
+ └── tokenizer/
1273
+ └── # contents of tokenizer/ directory will be uploaded here
1274
+ ```
1275
+ description (Optional[str], optional): arbitrary text upto 1024 characters to store as description.
1276
+ This field can be updated at any time after logging. Defaults to `None`
1277
+ metadata (Optional[Dict[str, Any]], optional): arbitrary json serializable dictionary to store metadata.
1278
+ For example, you can use this to store metrics, params, notes.
1279
+ This field can be updated at any time after logging. Defaults to `None`
1280
+ model_schema (Optional[Union[Dict[str, Any], ModelSchema]], optional):
1281
+ instance of `truefoundry.ml.ModelSchema`.
1282
+ This schema needs to be consistent with older versions of the model under the given `name` i.e.
1283
+ a feature's value type and model's prediction type cannot be changed in the schema of new version.
1284
+ Features can be removed or added between versions.
1285
+ ```
1286
+ E.g. if there exists a v1 with
1287
+ schema = {"features": {"name": "feat1": "int"}, "prediction": "categorical"}, then
1288
+
1289
+ schema = {"features": {"name": "feat1": "string"}, "prediction": "categorical"} or
1290
+ schema = {"features": {"name": "feat1": "int"}, "prediction": "numerical"}
1291
+ are invalid because they change the types of existing features and prediction
1292
+
1293
+ while
1294
+ schema = {"features": {"name": "feat1": "int", "feat2": "string"}, "prediction": "categorical"} or
1295
+ schema = {"features": {"feat2": "string"}, "prediction": "categorical"}
1296
+ are valid
1297
+
1298
+ This field can be updated at any time after logging. Defaults to None
1299
+ ```
1300
+ custom_metrics: (Optional[Union[List[Dict[str, Any]], CustomMetric]], optional): list of instances of
1301
+ `truefoundry.ml.CustomMetric`
1302
+ The custom metrics must be added according to the prediction type of schema.
1303
+ custom_metrics = [{
1304
+ "name": "mean_square_error",
1305
+ "type": "metric",
1306
+ "value_type": "float"
1307
+ }]
1308
+
1309
+ Returns:
1310
+ truefoundry.ml.ModelVersion: an instance of `ModelVersion` that can be used to download the files,
1311
+ load the model, or update attributes like description, metadata, schema.
1312
+
1313
+ Examples:
1314
+
1315
+ ### Sklearn
1316
+
1317
+ ```python
1318
+ from truefoundry.ml import get_client
1319
+ from truefoundry.ml.enums import ModelFramework
1320
+
1321
+ import joblib
1322
+ import numpy as np
1323
+ from sklearn.pipeline import make_pipeline
1324
+ from sklearn.preprocessing import StandardScaler
1325
+ from sklearn.svm import SVC
1326
+
1327
+ X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
1328
+ y = np.array([1, 1, 2, 2])
1329
+ clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
1330
+ clf.fit(X, y)
1331
+ joblib.dump(clf, "sklearn-pipeline.joblib")
1332
+
1333
+ client = get_client()
1334
+ client.create_ml_repo( # This is only required once
1335
+ ml_repo="my-classification-project",
1336
+ # This controls which bucket is used.
1337
+ # You can get this from Integrations > Blob Storage. `None` picks the default
1338
+ storage_integration_fqn=None
1339
+ )
1340
+ model_version = client.log_model(
1341
+ ml_repo="my-classification-project",
1342
+ name="my-sklearn-model",
1343
+ model_file_or_folder="sklearn-pipeline.joblib",
1344
+ framework=ModelFramework.SKLEARN,
1345
+ metadata={"accuracy": 0.99, "f1": 0.80},
1346
+ step=1, # step number, useful when using iterative algorithms like SGD
1347
+ )
1348
+ print(model_version.fqn)
1349
+ ```
1350
+
1351
+ ### Huggingface Transformers
1352
+
1353
+ ```python
1354
+ from truefoundry.ml import get_client
1355
+ from truefoundry.ml.enums import ModelFramework
1356
+
1357
+ import torch
1358
+ from transformers import AutoTokenizer, AutoConfig, pipeline, AutoModelForCausalLM
1359
+ pln = pipeline(
1360
+ "text-generation",
1361
+ model_file_or_folder="EleutherAI/pythia-70m",
1362
+ tokenizer="EleutherAI/pythia-70m",
1363
+ torch_dtype=torch.float16
1364
+ )
1365
+ pln.model.save_pretrained("my-transformers-model")
1366
+ pln.tokenizer.save_pretrained("my-transformers-model")
1367
+
1368
+ client = get_client()
1369
+ client.create_ml_repo( # This is only required once
1370
+ ml_repo="my-llm-project",
1371
+ # This controls which bucket is used.
1372
+ # You can get this from Integrations > Blob Storage. `None` picks the default
1373
+ storage_integration_fqn=None
1374
+ )
1375
+ model_version = client.log_model(
1376
+ ml_repo="my-llm-project",
1377
+ name="my-transformers-model",
1378
+ model_file_or_folder="my-transformers-model/",
1379
+ framework=ModelFramework.TRANSFORMERS
1380
+ )
1381
+ print(model_version.fqn)
1382
+ ```
1383
+
1384
+ """
1385
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
1386
+
1387
+ model_version = _log_model_version(
1388
+ run=None,
1389
+ mlfoundry_artifacts_api=self._mlfoundry_artifacts_api,
1390
+ ml_repo_id=ml_repo_id,
1391
+ name=name,
1392
+ model_file_or_folder=model_file_or_folder,
1393
+ framework=framework,
1394
+ additional_files=additional_files,
1395
+ description=description,
1396
+ metadata=metadata,
1397
+ model_schema=model_schema,
1398
+ custom_metrics=custom_metrics,
1399
+ step=None,
1400
+ )
1401
+ logger.info(f"Logged model successfully with fqn {model_version.fqn!r}")
1402
+ return model_version
1403
+
1404
+ # Datasets API
1405
+ def create_data_directory(
1406
+ self,
1407
+ ml_repo: str,
1408
+ name: str,
1409
+ description: Optional[str] = None,
1410
+ metadata: Optional[Dict[str, Any]] = None,
1411
+ ) -> DataDirectory:
1412
+ """
1413
+ Create DataDirectory to Upload the files
1414
+
1415
+ Args:
1416
+ ml_repo (str): Name of the ML Repo in which you want to create data_directory
1417
+ name (str): Name of the DataDirectory to be created.
1418
+ description (str): Description of the Datset
1419
+ metadata (Dict <str>: Any): Metadata about the data_directory in Dictionary form.
1420
+
1421
+ Returns:
1422
+ DataDirectory : An instance of class DataDirectory
1423
+
1424
+ Examples:
1425
+
1426
+ ```python
1427
+ from truefoundry.ml import get_client
1428
+
1429
+ client = get_client()
1430
+ data_directory = client.create_data_directory(name="<data_directory-name>", ml_repo="<repo-name>")
1431
+ print(data_directory.fqn)
1432
+ ```
1433
+ """
1434
+ if name == "" or not isinstance(name, str):
1435
+ raise MlFoundryException(
1436
+ f"DataDirectory name must be string type and not empty. "
1437
+ f"Got {type(name)} type with value {name}"
1438
+ )
1439
+
1440
+ if ml_repo == "" or not isinstance(ml_repo, str):
1441
+ raise MlFoundryException(
1442
+ f"ML repo must be string type and not empty. "
1443
+ f"Got {type(ml_repo)} type with value {ml_repo}"
1444
+ )
1445
+
1446
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
1447
+ # TODO: Add get_data_directory_by_name on server
1448
+ _datasets = self._mlfoundry_artifacts_api.list_datasets_post(
1449
+ list_datasets_request_dto=ListDatasetsRequestDto(
1450
+ experiment_id=ml_repo_id, name=name, max_results=1
1451
+ )
1452
+ )
1453
+ datasets = _datasets.datasets
1454
+ if datasets is not None and len(datasets) > 0:
1455
+ logger.warning(
1456
+ f"Data Directory with the name {name} already exists in ML Repo {ml_repo}, "
1457
+ f"returning the original instance of DataDirectory instead"
1458
+ )
1459
+ return DataDirectory(dataset=datasets[0])
1460
+
1461
+ _dataset = self._mlfoundry_artifacts_api.create_dataset_post(
1462
+ create_dataset_request_dto=CreateDatasetRequestDto(
1463
+ name=name,
1464
+ experiment_id=ml_repo_id,
1465
+ description=description,
1466
+ dataset_metadata=metadata,
1467
+ )
1468
+ )
1469
+ dataset = _dataset.dataset
1470
+ return DataDirectory(dataset=dataset)
1471
+
1472
+ def get_data_directory_by_fqn(
1473
+ self,
1474
+ fqn: str,
1475
+ ) -> DataDirectory:
1476
+ """
1477
+ Get the DataDirectory by DataDirectory FQN
1478
+
1479
+ Args:
1480
+ fqn (str): Fully qualified name of the artifact version.
1481
+
1482
+ Returns:
1483
+ DataDirectory : An instance of class DataDirectory
1484
+
1485
+ Examples:
1486
+
1487
+ ```python
1488
+ from truefoundry.ml import get_client, DataDirectoryPath
1489
+
1490
+ client = get_client()
1491
+ data_directory = client.get_data_directory_by_fqn(fqn="<data-dir-fqn>")
1492
+ with open("artifact.txt", "w") as f:
1493
+ f.write("hello-world")
1494
+
1495
+ data_directory.add_files(
1496
+ artifact_paths=[DataDirectoryPath('artifact.txt', 'a/b/')]
1497
+ )
1498
+ # print the path of files and folder in the data_directory
1499
+ for file in data_directory.list_files():
1500
+ print(file.path)
1501
+ ```
1502
+ """
1503
+
1504
+ _dataset = self._mlfoundry_artifacts_api.get_dataset_by_fqn_get(fqn=fqn)
1505
+ dataset = _dataset.dataset
1506
+ return DataDirectory(dataset)
1507
+
1508
+ def get_data_directory(
1509
+ self,
1510
+ ml_repo: str,
1511
+ name: str,
1512
+ ) -> DataDirectory:
1513
+ """Get an existing `data_directory` by `name`.
1514
+ Args:
1515
+ ml_repo (str): name of the ML Repo the data-directory is part of.
1516
+ name (str): the name of the data-directory
1517
+ Returns:
1518
+ DataDirectory: An instance of class DataDirectory
1519
+ Examples:
1520
+ ```python
1521
+ from truefoundry.ml import get_client
1522
+ client = get_client()
1523
+ data_directory = client.get_data_directory(ml_repo='my-repo', name="<data-directory-name>")
1524
+ with open("artifact.txt", "w") as f:
1525
+ f.write("hello-world")
1526
+ data_directory.add_files(
1527
+ artifact_paths=[DataDirectoryPath('artifact.txt', 'a/b/')]
1528
+ )
1529
+ # print the path of files and folder in the data_directory
1530
+ for file in data_directory.list_files():
1531
+ print(file.path)
1532
+ ```
1533
+ """
1534
+ if ml_repo == "" or not isinstance(ml_repo, str):
1535
+ raise MlFoundryException(
1536
+ f"ML repo must be string type and not empty. "
1537
+ f"Got {type(ml_repo)} type with value {ml_repo}"
1538
+ )
1539
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
1540
+ _datasets = self._mlfoundry_artifacts_api.list_datasets_post(
1541
+ list_datasets_request_dto=ListDatasetsRequestDto(
1542
+ experiment_id=ml_repo_id,
1543
+ name=name,
1544
+ max_results=1,
1545
+ ),
1546
+ )
1547
+ datasets = _datasets.datasets
1548
+ if not datasets or len(datasets) == 0:
1549
+ raise MlFoundryException(
1550
+ f"No data directory found with name {name} under ML Repo {ml_repo}"
1551
+ )
1552
+
1553
+ return DataDirectory(dataset=datasets[0])
1554
+
1555
+ def list_data_directories(
1556
+ self,
1557
+ ml_repo: str,
1558
+ ) -> Iterator[DataDirectory]:
1559
+ """
1560
+ Get the list of DataDirectory in a ml_repo
1561
+
1562
+ Args:
1563
+ ml_repo (str): Name of the ML Repository
1564
+
1565
+ Returns:
1566
+ DataDirectory : An instance of class DataDirectory
1567
+
1568
+ Examples:
1569
+
1570
+ ```python
1571
+ from truefoundry.ml import get_client
1572
+
1573
+ client = get_client()
1574
+ data_directories = client.list_data_directories(ml_repo="<ml-repo-nam>")
1575
+
1576
+ for data_directory in data_directories:
1577
+ print(data_directory.name)
1578
+ ```
1579
+ """
1580
+ if ml_repo == "" or not isinstance(ml_repo, str):
1581
+ raise MlFoundryException(
1582
+ f"ML repo must be string type and not empty. "
1583
+ f"Got {type(ml_repo)} type with value {ml_repo}"
1584
+ )
1585
+ ml_repo_id = self._get_ml_repo_id(ml_repo=ml_repo)
1586
+ max_results, page_token, done = 10, None, False
1587
+ while not done:
1588
+ _datasets = self._mlfoundry_artifacts_api.list_datasets_post(
1589
+ list_datasets_request_dto=ListDatasetsRequestDto(
1590
+ experiment_id=ml_repo_id,
1591
+ max_results=max_results,
1592
+ page_token=page_token,
1593
+ )
1594
+ )
1595
+ datasets: List[DatasetDto] = _datasets.datasets or []
1596
+ page_token = _datasets.next_page_token
1597
+ for dataset in datasets:
1598
+ yield DataDirectory(dataset=dataset)
1599
+ if not datasets or not page_token:
1600
+ done = True
1601
+
1602
+
1603
+ def get_client() -> MlFoundry:
1604
+ """Initializes and returns the mlfoundry client.
1605
+
1606
+
1607
+ Returns:
1608
+ MlFoundry: Instance of `MlFoundry` class which represents a `run`.
1609
+
1610
+ Examples:
1611
+
1612
+ ### Get client
1613
+ ```python
1614
+ from truefoundry.ml import get_client
1615
+
1616
+ client = get_client()
1617
+ ```
1618
+ """
1619
+ session = init_session()
1620
+ return MlFoundry(session=session)