truefoundry 0.3.4rc1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truefoundry might be problematic. Click here for more details.

Files changed (253) hide show
  1. truefoundry/__init__.py +2 -0
  2. truefoundry/autodeploy/agents/developer.py +1 -1
  3. truefoundry/autodeploy/agents/project_identifier.py +2 -2
  4. truefoundry/autodeploy/agents/tester.py +1 -1
  5. truefoundry/autodeploy/cli.py +1 -1
  6. truefoundry/autodeploy/tools/list_files.py +1 -1
  7. truefoundry/cli/__main__.py +3 -17
  8. truefoundry/common/__init__.py +0 -0
  9. truefoundry/{deploy/lib/auth → common}/auth_service_client.py +50 -40
  10. truefoundry/common/constants.py +12 -0
  11. truefoundry/{deploy/lib/auth → common}/credential_file_manager.py +7 -7
  12. truefoundry/{deploy/lib/auth → common}/credential_provider.py +10 -23
  13. truefoundry/common/entities.py +124 -0
  14. truefoundry/common/exceptions.py +12 -0
  15. truefoundry/common/request_utils.py +84 -0
  16. truefoundry/common/servicefoundry_client.py +91 -0
  17. truefoundry/common/utils.py +56 -0
  18. truefoundry/deploy/auto_gen/models.py +4 -6
  19. truefoundry/deploy/cli/cli.py +3 -1
  20. truefoundry/deploy/cli/commands/apply_command.py +1 -1
  21. truefoundry/deploy/cli/commands/build_command.py +1 -1
  22. truefoundry/deploy/cli/commands/deploy_command.py +1 -1
  23. truefoundry/deploy/cli/commands/login_command.py +2 -2
  24. truefoundry/deploy/cli/commands/patch_application_command.py +1 -1
  25. truefoundry/deploy/cli/commands/patch_command.py +1 -1
  26. truefoundry/deploy/cli/commands/terminate_comand.py +1 -1
  27. truefoundry/deploy/cli/util.py +1 -1
  28. truefoundry/deploy/function_service/remote/remote.py +1 -1
  29. truefoundry/deploy/lib/auth/servicefoundry_session.py +2 -2
  30. truefoundry/deploy/lib/clients/servicefoundry_client.py +120 -159
  31. truefoundry/deploy/lib/const.py +1 -35
  32. truefoundry/deploy/lib/exceptions.py +0 -16
  33. truefoundry/deploy/lib/model/entity.py +1 -112
  34. truefoundry/deploy/lib/session.py +14 -42
  35. truefoundry/deploy/lib/util.py +0 -37
  36. truefoundry/{python_deploy_codegen.py → deploy/python_deploy_codegen.py} +2 -2
  37. truefoundry/deploy/v2/lib/deploy.py +3 -3
  38. truefoundry/deploy/v2/lib/deployable_patched_models.py +1 -1
  39. truefoundry/langchain/truefoundry_chat.py +1 -1
  40. truefoundry/langchain/truefoundry_embeddings.py +1 -1
  41. truefoundry/langchain/truefoundry_llm.py +1 -1
  42. truefoundry/langchain/utils.py +0 -41
  43. truefoundry/ml/__init__.py +37 -6
  44. truefoundry/ml/artifact/__init__.py +0 -0
  45. truefoundry/ml/artifact/truefoundry_artifact_repo.py +1161 -0
  46. truefoundry/ml/autogen/__init__.py +0 -0
  47. truefoundry/ml/autogen/client/__init__.py +370 -0
  48. truefoundry/ml/autogen/client/api/__init__.py +16 -0
  49. truefoundry/ml/autogen/client/api/auth_api.py +184 -0
  50. truefoundry/ml/autogen/client/api/deprecated_api.py +605 -0
  51. truefoundry/ml/autogen/client/api/experiments_api.py +1944 -0
  52. truefoundry/ml/autogen/client/api/health_api.py +299 -0
  53. truefoundry/ml/autogen/client/api/metrics_api.py +371 -0
  54. truefoundry/ml/autogen/client/api/mlfoundry_artifacts_api.py +7213 -0
  55. truefoundry/ml/autogen/client/api/python_deployment_config_api.py +201 -0
  56. truefoundry/ml/autogen/client/api/run_artifacts_api.py +231 -0
  57. truefoundry/ml/autogen/client/api/runs_api.py +2919 -0
  58. truefoundry/ml/autogen/client/api_client.py +822 -0
  59. truefoundry/ml/autogen/client/api_response.py +30 -0
  60. truefoundry/ml/autogen/client/configuration.py +489 -0
  61. truefoundry/ml/autogen/client/exceptions.py +161 -0
  62. truefoundry/ml/autogen/client/models/__init__.py +341 -0
  63. truefoundry/ml/autogen/client/models/add_custom_metrics_to_model_version_request_dto.py +69 -0
  64. truefoundry/ml/autogen/client/models/add_features_to_model_version_request_dto.py +83 -0
  65. truefoundry/ml/autogen/client/models/agent.py +125 -0
  66. truefoundry/ml/autogen/client/models/agent_app.py +118 -0
  67. truefoundry/ml/autogen/client/models/agent_open_api_tool.py +143 -0
  68. truefoundry/ml/autogen/client/models/agent_open_api_tool_with_fqn.py +144 -0
  69. truefoundry/ml/autogen/client/models/agent_with_fqn.py +127 -0
  70. truefoundry/ml/autogen/client/models/artifact_dto.py +115 -0
  71. truefoundry/ml/autogen/client/models/artifact_response_dto.py +75 -0
  72. truefoundry/ml/autogen/client/models/artifact_type.py +39 -0
  73. truefoundry/ml/autogen/client/models/artifact_version_dto.py +141 -0
  74. truefoundry/ml/autogen/client/models/artifact_version_response_dto.py +77 -0
  75. truefoundry/ml/autogen/client/models/artifact_version_status.py +35 -0
  76. truefoundry/ml/autogen/client/models/assistant_message.py +89 -0
  77. truefoundry/ml/autogen/client/models/authorize_user_for_model_request_dto.py +69 -0
  78. truefoundry/ml/autogen/client/models/authorize_user_for_model_version_request_dto.py +69 -0
  79. truefoundry/ml/autogen/client/models/blob_storage_reference.py +93 -0
  80. truefoundry/ml/autogen/client/models/body_get_search_runs_get.py +72 -0
  81. truefoundry/ml/autogen/client/models/chat_prompt.py +156 -0
  82. truefoundry/ml/autogen/client/models/chat_prompt_messages_inner.py +171 -0
  83. truefoundry/ml/autogen/client/models/columns_dto.py +73 -0
  84. truefoundry/ml/autogen/client/models/content.py +153 -0
  85. truefoundry/ml/autogen/client/models/content1.py +153 -0
  86. truefoundry/ml/autogen/client/models/content2.py +174 -0
  87. truefoundry/ml/autogen/client/models/content2_any_of_inner.py +150 -0
  88. truefoundry/ml/autogen/client/models/create_artifact_request_dto.py +74 -0
  89. truefoundry/ml/autogen/client/models/create_artifact_response_dto.py +65 -0
  90. truefoundry/ml/autogen/client/models/create_artifact_version_request_dto.py +74 -0
  91. truefoundry/ml/autogen/client/models/create_artifact_version_response_dto.py +65 -0
  92. truefoundry/ml/autogen/client/models/create_dataset_request_dto.py +76 -0
  93. truefoundry/ml/autogen/client/models/create_experiment_request_dto.py +94 -0
  94. truefoundry/ml/autogen/client/models/create_experiment_response_dto.py +67 -0
  95. truefoundry/ml/autogen/client/models/create_model_version_request_dto.py +95 -0
  96. truefoundry/ml/autogen/client/models/create_multi_part_upload_for_dataset_request_dto.py +73 -0
  97. truefoundry/ml/autogen/client/models/create_multi_part_upload_for_dataset_response_dto.py +79 -0
  98. truefoundry/ml/autogen/client/models/create_multi_part_upload_request_dto.py +73 -0
  99. truefoundry/ml/autogen/client/models/create_python_deployment_config_request_dto.py +72 -0
  100. truefoundry/ml/autogen/client/models/create_python_deployment_config_response_dto.py +67 -0
  101. truefoundry/ml/autogen/client/models/create_run_request_dto.py +97 -0
  102. truefoundry/ml/autogen/client/models/create_run_response_dto.py +75 -0
  103. truefoundry/ml/autogen/client/models/dataset_dto.py +112 -0
  104. truefoundry/ml/autogen/client/models/dataset_response_dto.py +75 -0
  105. truefoundry/ml/autogen/client/models/delete_artifact_versions_request_dto.py +65 -0
  106. truefoundry/ml/autogen/client/models/delete_dataset_request_dto.py +74 -0
  107. truefoundry/ml/autogen/client/models/delete_model_version_request_dto.py +65 -0
  108. truefoundry/ml/autogen/client/models/delete_run_request.py +65 -0
  109. truefoundry/ml/autogen/client/models/delete_tag_request_dto.py +68 -0
  110. truefoundry/ml/autogen/client/models/experiment_dto.py +127 -0
  111. truefoundry/ml/autogen/client/models/experiment_id_request_dto.py +67 -0
  112. truefoundry/ml/autogen/client/models/experiment_response_dto.py +75 -0
  113. truefoundry/ml/autogen/client/models/experiment_tag_dto.py +69 -0
  114. truefoundry/ml/autogen/client/models/feature_dto.py +68 -0
  115. truefoundry/ml/autogen/client/models/feature_value_type.py +35 -0
  116. truefoundry/ml/autogen/client/models/file_info_dto.py +76 -0
  117. truefoundry/ml/autogen/client/models/finalize_artifact_version_request_dto.py +101 -0
  118. truefoundry/ml/autogen/client/models/get_experiment_response_dto.py +88 -0
  119. truefoundry/ml/autogen/client/models/get_latest_run_log_response_dto.py +75 -0
  120. truefoundry/ml/autogen/client/models/get_metric_history_response.py +79 -0
  121. truefoundry/ml/autogen/client/models/get_signed_url_for_dataset_write_request_dto.py +68 -0
  122. truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_read_request_dto.py +68 -0
  123. truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_read_response_dto.py +81 -0
  124. truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_write_request_dto.py +69 -0
  125. truefoundry/ml/autogen/client/models/get_signed_urls_for_artifact_version_write_response_dto.py +83 -0
  126. truefoundry/ml/autogen/client/models/get_signed_urls_for_dataset_read_request_dto.py +68 -0
  127. truefoundry/ml/autogen/client/models/get_signed_urls_for_dataset_read_response_dto.py +81 -0
  128. truefoundry/ml/autogen/client/models/get_signed_urls_for_dataset_write_response_dto.py +81 -0
  129. truefoundry/ml/autogen/client/models/get_tenant_id_response_dto.py +73 -0
  130. truefoundry/ml/autogen/client/models/http_validation_error.py +82 -0
  131. truefoundry/ml/autogen/client/models/image_content_part.py +87 -0
  132. truefoundry/ml/autogen/client/models/image_url.py +75 -0
  133. truefoundry/ml/autogen/client/models/internal_metadata.py +180 -0
  134. truefoundry/ml/autogen/client/models/latest_run_log_dto.py +78 -0
  135. truefoundry/ml/autogen/client/models/list_artifact_versions_request_dto.py +107 -0
  136. truefoundry/ml/autogen/client/models/list_artifact_versions_response_dto.py +87 -0
  137. truefoundry/ml/autogen/client/models/list_artifacts_request_dto.py +96 -0
  138. truefoundry/ml/autogen/client/models/list_artifacts_response_dto.py +86 -0
  139. truefoundry/ml/autogen/client/models/list_colums_response_dto.py +75 -0
  140. truefoundry/ml/autogen/client/models/list_datasets_request_dto.py +78 -0
  141. truefoundry/ml/autogen/client/models/list_datasets_response_dto.py +86 -0
  142. truefoundry/ml/autogen/client/models/list_experiments_response_dto.py +86 -0
  143. truefoundry/ml/autogen/client/models/list_files_for_artifact_version_request_dto.py +76 -0
  144. truefoundry/ml/autogen/client/models/list_files_for_artifact_versions_response_dto.py +82 -0
  145. truefoundry/ml/autogen/client/models/list_files_for_dataset_request_dto.py +76 -0
  146. truefoundry/ml/autogen/client/models/list_files_for_dataset_response_dto.py +82 -0
  147. truefoundry/ml/autogen/client/models/list_latest_run_logs_response_dto.py +82 -0
  148. truefoundry/ml/autogen/client/models/list_metric_history_request_dto.py +69 -0
  149. truefoundry/ml/autogen/client/models/list_metric_history_response_dto.py +84 -0
  150. truefoundry/ml/autogen/client/models/list_model_version_response_dto.py +87 -0
  151. truefoundry/ml/autogen/client/models/list_model_versions_request_dto.py +93 -0
  152. truefoundry/ml/autogen/client/models/list_models_request_dto.py +89 -0
  153. truefoundry/ml/autogen/client/models/list_models_response_dto.py +84 -0
  154. truefoundry/ml/autogen/client/models/list_run_artifacts_response_dto.py +84 -0
  155. truefoundry/ml/autogen/client/models/list_run_logs_response_dto.py +82 -0
  156. truefoundry/ml/autogen/client/models/list_seed_experiments_response_dto.py +81 -0
  157. truefoundry/ml/autogen/client/models/log_batch_request_dto.py +106 -0
  158. truefoundry/ml/autogen/client/models/log_metric_request_dto.py +80 -0
  159. truefoundry/ml/autogen/client/models/log_param_request_dto.py +76 -0
  160. truefoundry/ml/autogen/client/models/method.py +37 -0
  161. truefoundry/ml/autogen/client/models/metric_collection_dto.py +82 -0
  162. truefoundry/ml/autogen/client/models/metric_dto.py +76 -0
  163. truefoundry/ml/autogen/client/models/mime_type.py +37 -0
  164. truefoundry/ml/autogen/client/models/model_configuration.py +103 -0
  165. truefoundry/ml/autogen/client/models/model_dto.py +122 -0
  166. truefoundry/ml/autogen/client/models/model_response_dto.py +75 -0
  167. truefoundry/ml/autogen/client/models/model_schema_dto.py +85 -0
  168. truefoundry/ml/autogen/client/models/model_version_dto.py +170 -0
  169. truefoundry/ml/autogen/client/models/model_version_response_dto.py +75 -0
  170. truefoundry/ml/autogen/client/models/multi_part_upload_dto.py +107 -0
  171. truefoundry/ml/autogen/client/models/multi_part_upload_response_dto.py +79 -0
  172. truefoundry/ml/autogen/client/models/multi_part_upload_storage_provider.py +34 -0
  173. truefoundry/ml/autogen/client/models/notify_artifact_version_failure_dto.py +65 -0
  174. truefoundry/ml/autogen/client/models/openapi_spec.py +152 -0
  175. truefoundry/ml/autogen/client/models/param_dto.py +66 -0
  176. truefoundry/ml/autogen/client/models/parameters.py +84 -0
  177. truefoundry/ml/autogen/client/models/prediction_type.py +34 -0
  178. truefoundry/ml/autogen/client/models/resolve_agent_app_response_dto.py +75 -0
  179. truefoundry/ml/autogen/client/models/restore_run_request_dto.py +65 -0
  180. truefoundry/ml/autogen/client/models/run_data_dto.py +104 -0
  181. truefoundry/ml/autogen/client/models/run_dto.py +84 -0
  182. truefoundry/ml/autogen/client/models/run_info_dto.py +105 -0
  183. truefoundry/ml/autogen/client/models/run_log_dto.py +90 -0
  184. truefoundry/ml/autogen/client/models/run_log_input_dto.py +80 -0
  185. truefoundry/ml/autogen/client/models/run_response_dto.py +75 -0
  186. truefoundry/ml/autogen/client/models/run_tag_dto.py +66 -0
  187. truefoundry/ml/autogen/client/models/search_runs_request_dto.py +94 -0
  188. truefoundry/ml/autogen/client/models/search_runs_response_dto.py +84 -0
  189. truefoundry/ml/autogen/client/models/set_experiment_tag_request_dto.py +73 -0
  190. truefoundry/ml/autogen/client/models/set_tag_request_dto.py +76 -0
  191. truefoundry/ml/autogen/client/models/signed_url_dto.py +69 -0
  192. truefoundry/ml/autogen/client/models/stop.py +152 -0
  193. truefoundry/ml/autogen/client/models/store_run_logs_request_dto.py +83 -0
  194. truefoundry/ml/autogen/client/models/system_message.py +89 -0
  195. truefoundry/ml/autogen/client/models/text.py +153 -0
  196. truefoundry/ml/autogen/client/models/text_content_part.py +84 -0
  197. truefoundry/ml/autogen/client/models/update_artifact_version_request_dto.py +74 -0
  198. truefoundry/ml/autogen/client/models/update_dataset_request_dto.py +74 -0
  199. truefoundry/ml/autogen/client/models/update_experiment_request_dto.py +74 -0
  200. truefoundry/ml/autogen/client/models/update_model_version_request_dto.py +93 -0
  201. truefoundry/ml/autogen/client/models/update_run_request_dto.py +78 -0
  202. truefoundry/ml/autogen/client/models/update_run_response_dto.py +75 -0
  203. truefoundry/ml/autogen/client/models/url.py +153 -0
  204. truefoundry/ml/autogen/client/models/user_message.py +89 -0
  205. truefoundry/ml/autogen/client/models/validation_error.py +87 -0
  206. truefoundry/ml/autogen/client/models/validation_error_loc_inner.py +154 -0
  207. truefoundry/ml/autogen/client/rest.py +426 -0
  208. truefoundry/ml/autogen/client_README.md +320 -0
  209. truefoundry/ml/cli/__init__.py +0 -0
  210. truefoundry/ml/cli/cli.py +18 -0
  211. truefoundry/ml/cli/commands/__init__.py +3 -0
  212. truefoundry/ml/cli/commands/download.py +87 -0
  213. truefoundry/ml/clients/__init__.py +0 -0
  214. truefoundry/ml/clients/entities.py +8 -0
  215. truefoundry/ml/clients/servicefoundry_client.py +45 -0
  216. truefoundry/ml/clients/utils.py +122 -0
  217. truefoundry/ml/constants.py +84 -0
  218. truefoundry/ml/entities.py +62 -0
  219. truefoundry/ml/enums.py +70 -0
  220. truefoundry/ml/env_vars.py +9 -0
  221. truefoundry/ml/exceptions.py +8 -0
  222. truefoundry/ml/git_info.py +60 -0
  223. truefoundry/ml/internal_namespace.py +52 -0
  224. truefoundry/ml/log_types/__init__.py +4 -0
  225. truefoundry/ml/log_types/artifacts/artifact.py +431 -0
  226. truefoundry/ml/log_types/artifacts/constants.py +33 -0
  227. truefoundry/ml/log_types/artifacts/dataset.py +384 -0
  228. truefoundry/ml/log_types/artifacts/general_artifact.py +110 -0
  229. truefoundry/ml/log_types/artifacts/model.py +611 -0
  230. truefoundry/ml/log_types/artifacts/model_extras.py +48 -0
  231. truefoundry/ml/log_types/artifacts/utils.py +161 -0
  232. truefoundry/ml/log_types/image/__init__.py +3 -0
  233. truefoundry/ml/log_types/image/constants.py +8 -0
  234. truefoundry/ml/log_types/image/image.py +357 -0
  235. truefoundry/ml/log_types/image/image_normalizer.py +102 -0
  236. truefoundry/ml/log_types/image/types.py +68 -0
  237. truefoundry/ml/log_types/plot.py +281 -0
  238. truefoundry/ml/log_types/pydantic_base.py +10 -0
  239. truefoundry/ml/log_types/utils.py +12 -0
  240. truefoundry/ml/logger.py +17 -0
  241. truefoundry/ml/mlfoundry_api.py +1575 -0
  242. truefoundry/ml/mlfoundry_run.py +1203 -0
  243. truefoundry/ml/run_utils.py +93 -0
  244. truefoundry/ml/session.py +168 -0
  245. truefoundry/ml/validation_utils.py +346 -0
  246. truefoundry/pydantic_v1.py +8 -1
  247. truefoundry/workflow/__init__.py +16 -1
  248. {truefoundry-0.3.4rc1.dist-info → truefoundry-0.4.0.dist-info}/METADATA +21 -14
  249. truefoundry-0.4.0.dist-info/RECORD +344 -0
  250. truefoundry/deploy/lib/clients/utils.py +0 -41
  251. truefoundry-0.3.4rc1.dist-info/RECORD +0 -136
  252. {truefoundry-0.3.4rc1.dist-info → truefoundry-0.4.0.dist-info}/WHEEL +0 -0
  253. {truefoundry-0.3.4rc1.dist-info → truefoundry-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1203 @@
1
+ import functools
2
+ import os
3
+ import platform
4
+ import re
5
+ import time
6
+ from pathlib import Path
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Any,
10
+ Dict,
11
+ Iterable,
12
+ Iterator,
13
+ List,
14
+ Optional,
15
+ Sequence,
16
+ Tuple,
17
+ Union,
18
+ )
19
+ from urllib.parse import urljoin, urlsplit
20
+
21
+ from truefoundry import version
22
+ from truefoundry.ml import constants, enums
23
+ from truefoundry.ml.autogen.client import ( # type: ignore[attr-defined]
24
+ ArtifactType,
25
+ DeleteRunRequest,
26
+ ExperimentsApi,
27
+ ListArtifactVersionsRequestDto,
28
+ ListModelVersionsRequestDto,
29
+ LogBatchRequestDto,
30
+ MetricDto,
31
+ MetricsApi,
32
+ MlfoundryArtifactsApi,
33
+ ParamDto,
34
+ RunDataDto,
35
+ RunDto,
36
+ RunInfoDto,
37
+ RunsApi,
38
+ RunTagDto,
39
+ UpdateRunRequestDto,
40
+ )
41
+ from truefoundry.ml.entities import Metric
42
+ from truefoundry.ml.enums import RunStatus
43
+ from truefoundry.ml.exceptions import MlFoundryException
44
+ from truefoundry.ml.internal_namespace import NAMESPACE
45
+ from truefoundry.ml.log_types import Image, Plot
46
+ from truefoundry.ml.log_types.artifacts.artifact import ArtifactPath, ArtifactVersion
47
+ from truefoundry.ml.log_types.artifacts.general_artifact import _log_artifact_version
48
+ from truefoundry.ml.log_types.artifacts.model import ModelVersion, _log_model_version
49
+ from truefoundry.ml.logger import logger
50
+ from truefoundry.ml.run_utils import ParamsType, flatten_dict, process_params
51
+ from truefoundry.ml.session import ACTIVE_RUNS, _get_api_client, get_active_session
52
+ from truefoundry.ml.validation_utils import (
53
+ MAX_ENTITY_KEY_LENGTH,
54
+ _validate_batch_log_data,
55
+ )
56
+
57
+ if TYPE_CHECKING:
58
+ import matplotlib
59
+ import plotly
60
+
61
+
62
+ def _ensure_not_deleted(method):
63
+ @functools.wraps(method)
64
+ def _check_deleted_or_not(self, *args, **kwargs):
65
+ if self._deleted:
66
+ raise MlFoundryException("Run was deleted, cannot access a deleted Run")
67
+ else:
68
+ return method(self, *args, **kwargs)
69
+
70
+ return _check_deleted_or_not
71
+
72
+
73
+ class MlFoundryRun:
74
+ """MlFoundryRun."""
75
+
76
+ VALID_PARAM_AND_METRIC_NAMES = re.compile(r"^[A-Za-z0-9_\-\. /]+$")
77
+
78
+ def __init__(
79
+ self,
80
+ experiment_id: str,
81
+ run_id: str,
82
+ auto_end: bool = False,
83
+ **kwargs,
84
+ ):
85
+ """__init__.
86
+
87
+ Args:
88
+ experiment_id (str): experiment_id
89
+ run_id (str): run_id
90
+ auto_end (bool): If to end the run at garbage collection or process end (atexit)
91
+ """
92
+ self._experiment_id = str(experiment_id)
93
+ self._run_id = run_id
94
+ self._auto_end = auto_end
95
+ self._run_info: Optional[RunInfoDto] = None
96
+ self._run_data: Optional[RunDataDto] = None
97
+ self._deleted = False
98
+ self._terminate_called = False
99
+ if self._auto_end:
100
+ ACTIVE_RUNS.add_run(self)
101
+
102
+ self._api_client = _get_api_client()
103
+ self._experiments_api = ExperimentsApi(api_client=self._api_client)
104
+ self._runs_api = RunsApi(api_client=self._api_client)
105
+ self._metrics_api = MetricsApi(api_client=self._api_client)
106
+ self._mlfoundry_artifacts_api = MlfoundryArtifactsApi(
107
+ api_client=self._api_client
108
+ )
109
+
110
+ @classmethod
111
+ def _from_dto(cls, run_dto: RunDto) -> "MlFoundryRun":
112
+ """classmethod to get MLfoundry run from dto instance"""
113
+ run = cls(run_dto.info.experiment_id, run_dto.info.run_id)
114
+ run._run_info = run_dto.info
115
+ run._run_data = run_dto.data
116
+ return run
117
+
118
+ def _get_run_info(self) -> RunInfoDto:
119
+ if self._run_info is not None:
120
+ return self._run_info
121
+
122
+ _run = self._runs_api.get_run_get(run_id=self.run_id)
123
+ self._run_info = _run.run.info
124
+ return self._run_info
125
+
126
+ @property
127
+ @_ensure_not_deleted
128
+ def run_id(self) -> str:
129
+ """Get run_id for the current `run`"""
130
+ return self._run_id
131
+
132
+ @property
133
+ @_ensure_not_deleted
134
+ def run_name(self) -> str:
135
+ """Get run_name for the current `run`"""
136
+ run_info = self._get_run_info()
137
+ return run_info.name
138
+
139
+ @property
140
+ @_ensure_not_deleted
141
+ def fqn(self) -> str:
142
+ """Get fqn for the current `run`"""
143
+ run_info = self._get_run_info()
144
+ return run_info.fqn
145
+
146
+ @property
147
+ @_ensure_not_deleted
148
+ def status(self) -> RunStatus:
149
+ """Get status for the current `run`"""
150
+ _run = self._runs_api.get_run_get(run_id=self.run_id)
151
+ assert _run.run.info.status is not None
152
+ return RunStatus(_run.run.info.status)
153
+
154
+ @property
155
+ @_ensure_not_deleted
156
+ def ml_repo(self) -> str:
157
+ """Get ml_repo name of which the current `run` is part of"""
158
+ _experiment = self._experiments_api.get_experiment_get(
159
+ experiment_id=self._experiment_id
160
+ )
161
+ return _experiment.experiment.name
162
+
163
+ @property
164
+ @_ensure_not_deleted
165
+ def auto_end(self) -> bool:
166
+ """Tells whether automatic end for `run` is True or False"""
167
+ return self._auto_end
168
+
169
+ @_ensure_not_deleted
170
+ def __repr__(self) -> str:
171
+ return f"<{type(self).__name__} at 0x{id(self):x}: run={self.fqn!r}>"
172
+
173
+ @_ensure_not_deleted
174
+ def __enter__(self):
175
+ return self
176
+
177
+ def _terminate_run_if_running(self, termination_status: RunStatus):
178
+ """_terminate_run_if_running.
179
+
180
+ Args:
181
+ termination_status (RunStatus): termination_status
182
+ """
183
+ if self._terminate_called:
184
+ return
185
+
186
+ # Prevent double execution for termination
187
+ self._terminate_called = True
188
+ ACTIVE_RUNS.remove_run(self)
189
+
190
+ current_status = self.status
191
+ try:
192
+ # we do not need to set any termination status unless the run was in RUNNING state
193
+ if current_status != RunStatus.RUNNING:
194
+ return
195
+ logger.info("Setting run status of %r to %r", self.fqn, termination_status)
196
+ _run_update = self._runs_api.update_run_post(
197
+ update_run_request_dto=UpdateRunRequestDto(
198
+ run_id=self.run_id,
199
+ status=termination_status.value,
200
+ end_time=int(time.time() * 1000),
201
+ )
202
+ )
203
+ self._run_info = _run_update.run_info
204
+ except Exception as e:
205
+ logger.warning(
206
+ f"failed to set termination status {termination_status} due to {e}"
207
+ )
208
+ logger.info(f"Finished run: {self.fqn!r}, Dashboard: {self.dashboard_link}")
209
+
210
+ def __exit__(self, exc_type, exc_val, exc_tb):
211
+ status = RunStatus.FINISHED if exc_type is None else RunStatus.FAILED
212
+ self._terminate_run_if_running(status)
213
+
214
+ def __del__(self):
215
+ if self._auto_end:
216
+ self._terminate_run_if_running(RunStatus.FINISHED)
217
+
218
+ @property
219
+ @_ensure_not_deleted
220
+ def dashboard_link(self) -> str:
221
+ """Get Mlfoundry dashboard link for a `run`"""
222
+ session = get_active_session()
223
+ if session is None:
224
+ raise MlFoundryException(
225
+ "No active session found. Perhaps you are not logged in?\n"
226
+ "Please log in using `tfy login [--host HOST] --relogin"
227
+ )
228
+ base_url = "{uri.scheme}://{uri.netloc}/".format(
229
+ uri=urlsplit(session.tracking_uri)
230
+ )
231
+
232
+ return urljoin(base_url, f"mlfoundry/{self._experiment_id}/run/{self.run_id}/")
233
+
234
+ @_ensure_not_deleted
235
+ def end(self, status: RunStatus = RunStatus.FINISHED):
236
+ """End a `run`.
237
+
238
+ This function marks the run as `FINISHED`.
239
+
240
+ Examples:
241
+
242
+ ```python
243
+ from truefoundry.ml import get_client
244
+
245
+ client = get_client()
246
+ run = client.create_run(
247
+ ml_repo="my-classification-project", run_name="svm-with-rbf-kernel"
248
+ )
249
+ # ...
250
+ # Model training code
251
+ # ...
252
+ run.end()
253
+ ```
254
+
255
+ In case the run was created using the context manager approach,
256
+ We do not need to call this function.
257
+
258
+ ```python
259
+ from truefoundry.ml import get_client
260
+
261
+ client = get_client()
262
+ with client.create_run(
263
+ ml_repo="my-classification-project", run_name="svm-with-rbf-kernel"
264
+ ) as run:
265
+ # ...
266
+ # Model training code
267
+ ...
268
+ # `run` will be automatically marked as `FINISHED` or `FAILED`.
269
+ ```
270
+ """
271
+ self._terminate_run_if_running(status)
272
+
273
+ @_ensure_not_deleted
274
+ def delete(self) -> None:
275
+ """
276
+ This function permanently delete the run
277
+
278
+ Examples:
279
+
280
+ ```python
281
+ from truefoundry.ml import get_client
282
+
283
+ client = get_client()
284
+ client.create_ml_repo('iris-learning')
285
+ run = client.create_run(ml_repo="iris-learning", run_name="svm-model1")
286
+ run.log_params({"learning_rate": 0.001})
287
+ run.log_metrics({"accuracy": 0.7, "loss": 0.6})
288
+
289
+ run.delete()
290
+ ```
291
+
292
+ In case we try to call or access any other function of that run after deleting
293
+ then it will through MlfoundryException
294
+
295
+ ```python
296
+ from truefoundry.ml import get_client
297
+
298
+ client = get_client()
299
+ client.create_ml_repo('iris-learning')
300
+ run = client.create_run(ml_repo="iris-learning", run_name="svm-model1")
301
+ run.log_params({"learning_rate": 0.001})
302
+ run.log_metrics({"accuracy": 0.7, "loss": 0.6})
303
+
304
+ run.delete()
305
+ run.log_params({"learning_rate": 0.001}) # raises MlfoundryException
306
+ ```
307
+ """
308
+ name = self.run_name
309
+ try:
310
+ self._runs_api.hard_delete_run_post(
311
+ delete_run_request=DeleteRunRequest(run_id=self.run_id)
312
+ )
313
+ logger.info(f"Run {name} was deleted successfully")
314
+ ACTIVE_RUNS.remove_run(self)
315
+ self._deleted = True
316
+ self._auto_end = False
317
+ except Exception as ex:
318
+ logger.warning(f"Failed to delete the run {name} because of {ex}")
319
+ raise
320
+
321
+ @_ensure_not_deleted
322
+ def list_artifact_versions(
323
+ self,
324
+ artifact_type: Optional[ArtifactType] = ArtifactType.ARTIFACT,
325
+ ) -> Iterator[ArtifactVersion]:
326
+ """
327
+ Get all the version of an artifact from a particular run to download contents or load them in memory
328
+
329
+ Args:
330
+ artifact_type: Type of the artifact you want
331
+
332
+ Returns:
333
+ Iterator[ArtifactVersion]: An iterator that yields non deleted artifact-versions
334
+ of an artifact under a given run sorted reverse by the version number
335
+
336
+ Examples:
337
+
338
+ ```python
339
+ from truefoundry.ml import get_client
340
+
341
+ client = get_client()
342
+ run = client.create_run(ml_repo="iris-learning", run_name="svm-model1")
343
+ artifact_versions = run.list_artifact_versions()
344
+
345
+ for artifact_version in artifact_versions:
346
+ print(artifact_version)
347
+
348
+ run.end()
349
+ ```
350
+ """
351
+ done, page_token, max_results = False, None, 25
352
+ while not done:
353
+ _artifact_versions = (
354
+ self._mlfoundry_artifacts_api.list_artifact_versions_post(
355
+ list_artifact_versions_request_dto=ListArtifactVersionsRequestDto(
356
+ run_ids=[self.run_id],
357
+ artifact_types=[artifact_type] if artifact_type else None,
358
+ max_results=max_results,
359
+ page_token=page_token,
360
+ )
361
+ )
362
+ )
363
+ artifact_versions = _artifact_versions.artifact_versions
364
+ page_token = _artifact_versions.next_page_token
365
+ for artifact_version in artifact_versions:
366
+ yield ArtifactVersion.from_fqn(artifact_version.fqn)
367
+ if not artifact_versions or page_token is None:
368
+ done = True
369
+
370
+ @_ensure_not_deleted
371
+ def list_model_versions(
372
+ self,
373
+ ) -> Iterator[ModelVersion]:
374
+ """
375
+ Get all the version of a models from a particular run to download contents or load them in memory
376
+
377
+ Returns:
378
+ Iterator[ModelVersion]: An iterator that yields non deleted model-versions
379
+ under a given run sorted reverse by the version number
380
+
381
+ Examples:
382
+
383
+ ```python
384
+ from truefoundry.ml import get_client
385
+
386
+ client = get_client()
387
+ run = client.get_run(run_id="<your-run-id>")
388
+ model_versions = run.list_model_versions()
389
+
390
+ for model_version in model_versions:
391
+ print(model_version)
392
+
393
+ run.end()
394
+ ```
395
+ """
396
+ done, page_token, max_results = False, None, 25
397
+ while not done:
398
+ _model_versions = self._mlfoundry_artifacts_api.list_model_versions_post(
399
+ list_model_versions_request_dto=ListModelVersionsRequestDto(
400
+ run_ids=[self.run_id],
401
+ max_results=max_results,
402
+ page_token=page_token,
403
+ )
404
+ )
405
+ model_versions = _model_versions.model_versions
406
+ page_token = _model_versions.next_page_token
407
+ for model_version in model_versions:
408
+ yield ModelVersion.from_fqn(fqn=model_version.fqn)
409
+ if not model_versions or page_token is None:
410
+ done = True
411
+
412
+ def _add_git_info(self, root_path: Optional[str] = None):
413
+ """_add_git_info.
414
+
415
+ Args:
416
+ root_path (Optional[str]): root_path
417
+ """
418
+ root_path = root_path or os.getcwd()
419
+ try:
420
+ from truefoundry.ml.git_info import GitInfo
421
+
422
+ git_info = GitInfo(root_path)
423
+ tags = [
424
+ RunTagDto(
425
+ key=constants.GIT_COMMIT_TAG_NAME,
426
+ value=git_info.current_commit_sha,
427
+ ),
428
+ RunTagDto(
429
+ key=constants.GIT_BRANCH_TAG_NAME,
430
+ value=git_info.current_branch_name,
431
+ ),
432
+ RunTagDto(
433
+ key=constants.GIT_DIRTY_TAG_NAME, value=str(git_info.is_dirty)
434
+ ),
435
+ ]
436
+ remote_url = git_info.remote_url
437
+ if remote_url is not None:
438
+ tags.append(
439
+ RunTagDto(key=constants.GIT_REMOTE_URL_NAME, value=remote_url)
440
+ )
441
+ _validate_batch_log_data(metrics=[], params=[], tags=tags)
442
+ self._runs_api.log_run_batch_post(
443
+ log_batch_request_dto=LogBatchRequestDto(run_id=self.run_id, tags=tags)
444
+ )
445
+ except Exception as ex:
446
+ # no-blocking
447
+ logger.warning(f"failed to log git info because {ex}")
448
+
449
+ def _add_python_truefoundry_version(self):
450
+ python_version = platform.python_version()
451
+ truefoundry_version = version.__version__
452
+ tags = [
453
+ RunTagDto(
454
+ key=constants.PYTHON_VERSION_TAG_NAME,
455
+ value=python_version,
456
+ ),
457
+ ]
458
+
459
+ if truefoundry_version:
460
+ tags.append(
461
+ RunTagDto(
462
+ key=constants.MLFOUNDRY_VERSION_TAG_NAME,
463
+ value=truefoundry_version,
464
+ )
465
+ )
466
+ tags.append(
467
+ RunTagDto(
468
+ key=constants.TRUEFOUNDRY_VERSION_TAG_NAME,
469
+ value=truefoundry_version,
470
+ )
471
+ )
472
+ else:
473
+ logger.warning("Failed to get MLFoundry version.")
474
+ _validate_batch_log_data(metrics=[], params=[], tags=tags)
475
+ self._runs_api.log_run_batch_post(
476
+ log_batch_request_dto=LogBatchRequestDto(run_id=self.run_id, tags=tags)
477
+ )
478
+
479
+ @_ensure_not_deleted
480
+ def log_artifact(
481
+ self,
482
+ name: str,
483
+ artifact_paths: List[
484
+ Union[Tuple[str], Tuple[str, Optional[str]], ArtifactPath]
485
+ ],
486
+ description: Optional[str] = None,
487
+ metadata: Optional[Dict[str, Any]] = None,
488
+ step: Optional[int] = 0,
489
+ progress: Optional[bool] = None,
490
+ ) -> ArtifactVersion:
491
+ """Logs an artifact for the current ML Repo.
492
+
493
+ An `artifact` is a list of local files and directories.
494
+ This function packs the mentioned files and directories in `artifact_paths`
495
+ and uploads them to remote storage linked to the experiment
496
+
497
+ Args:
498
+ name (str): Name of the Artifact. If an artifact with this name already exists under the current ML Repo,
499
+ the logged artifact will be added as a new version under that `name`.
500
+ If no artifact exist with the given `name`, the given artifact will be logged as version 1.
501
+ artifact_paths (List[truefoundry.ml.ArtifactPath], optional): A list of pairs
502
+ of (source path, destination path) to add files and folders
503
+ to the artifact version contents. The first member of the pair should be a file or directory path
504
+ and the second member should be the path inside the artifact contents to upload to.
505
+
506
+ ```python
507
+ from truefoundry.ml import ArtifactPath
508
+
509
+ ...
510
+ run.log_artifact(
511
+ name="xyz",
512
+ artifact_paths=[
513
+ ArtifactPath("foo.txt", "foo/bar/foo.txt"),
514
+ ArtifactPath("tokenizer/", "foo/tokenizer/"),
515
+ ArtifactPath('bar.text'),
516
+ ('bar.txt', ),
517
+ ('foo.txt', 'a/foo.txt')
518
+ ]
519
+ )
520
+ ```
521
+
522
+ would result in
523
+
524
+ ```
525
+ .
526
+ └── foo/
527
+ ├── bar/
528
+ │ └── foo.txt
529
+ └── tokenizer/
530
+ └── # contents of tokenizer/ directory will be uploaded here
531
+ ```
532
+ description (Optional[str], optional): arbitrary text upto 1024 characters to store as description.
533
+ This field can be updated at any time after logging. Defaults to `None`
534
+ metadata (Optional[Dict[str, Any]], optional): arbitrary json serializable dictionary to store metadata.
535
+ For example, you can use this to store metrics, params, notes.
536
+ This field can be updated at any time after logging. Defaults to `None`
537
+ step (int): step/iteration at which the vesion is being logged, defaults to 0.
538
+ progress (bool): value to show progress bar, defaults to None.
539
+
540
+ Returns:
541
+ truefoundry.ml.ArtifactVersion: an instance of `ArtifactVersion` that can be used to download the files,
542
+ or update attributes like description, metadata.
543
+
544
+ Examples:
545
+
546
+ ```python
547
+ import os
548
+ from truefoundry.ml import get_client, ArtifactPath
549
+
550
+ with open("artifact.txt", "w") as f:
551
+ f.write("hello-world")
552
+
553
+ client = get_client()
554
+ run = client.create_run(
555
+ ml_repo="my-classification-project", run_name="svm-with-rbf-kernel"
556
+ )
557
+
558
+ run.log_artifact(
559
+ name="hello-world-file",
560
+ artifact_paths=[ArtifactPath('artifact.txt', 'a/b/')]
561
+ )
562
+
563
+ run.end()
564
+ ```
565
+ """
566
+ if not artifact_paths:
567
+ raise MlFoundryException(
568
+ "artifact_paths cannot be empty, atleast one artifact_path must be passed"
569
+ )
570
+
571
+ return _log_artifact_version(
572
+ self,
573
+ name=name,
574
+ artifact_paths=artifact_paths,
575
+ description=description,
576
+ metadata=metadata,
577
+ step=step,
578
+ progress=progress,
579
+ )
580
+
581
+ @_ensure_not_deleted
582
+ def log_metrics(self, metric_dict: Dict[str, Union[int, float]], step: int = 0):
583
+ """Log metrics for the current `run`.
584
+
585
+ A metric is defined by a metric name (such as "training-loss") and a
586
+ floating point or integral value (such as `1.2`). A metric is associated
587
+ with a `step` which is the training iteration at which the metric was
588
+ calculated.
589
+
590
+ Args:
591
+ metric_dict (Dict[str, Union[int, float]]): A metric name to metric value map.
592
+ metric value should be either `float` or `int`. This should be
593
+ a non-empty dictionary.
594
+ step (int, optional): Training step/iteration at which the metrics
595
+ present in `metric_dict` were calculated. If not passed, `0` is
596
+ set as the `step`.
597
+
598
+ Examples:
599
+
600
+ ```python
601
+ from truefoundry.ml import get_client
602
+
603
+ client = get_client()
604
+ run = client.create_run(
605
+ ml_repo="my-classification-project"
606
+ )
607
+ run.log_metrics(metric_dict={"accuracy": 0.7, "loss": 0.6}, step=0)
608
+ run.log_metrics(metric_dict={"accuracy": 0.8, "loss": 0.4}, step=1)
609
+
610
+ run.end()
611
+ ```
612
+ """
613
+ timestamp = int(time.time() * 1000)
614
+ metrics = []
615
+ for key in metric_dict.keys():
616
+ if isinstance(metric_dict[key], str):
617
+ logger.warning(
618
+ f"Cannot log metric with string value. Discarding metric {key}={metric_dict[key]}"
619
+ )
620
+ continue
621
+ if not self.VALID_PARAM_AND_METRIC_NAMES.match(key):
622
+ logger.warning(
623
+ f"Invalid metric name: {key}. Names may only contain alphanumerics, "
624
+ f"underscores (_), dashes (-), periods (.), spaces ( ), and slashes (/). "
625
+ f"Discarding metric {key}={metric_dict[key]}"
626
+ )
627
+ continue
628
+ metrics.append(
629
+ MetricDto(
630
+ key=key, value=metric_dict[key], timestamp=timestamp, step=step
631
+ )
632
+ )
633
+
634
+ if len(metrics) == 0:
635
+ logger.warning("Cannot log empty metrics dictionary")
636
+ return
637
+
638
+ try:
639
+ _validate_batch_log_data(metrics=metrics, params=[], tags=[])
640
+ self._runs_api.log_run_batch_post(
641
+ log_batch_request_dto=LogBatchRequestDto(
642
+ run_id=self.run_id, metrics=metrics, params=[], tags=[]
643
+ )
644
+ )
645
+ except Exception as e:
646
+ raise MlFoundryException(str(e)) from e
647
+
648
+ logger.info("Metrics logged successfully")
649
+
650
+ @_ensure_not_deleted
651
+ def log_params(self, param_dict: ParamsType, flatten_params: bool = False):
652
+ """Logs parameters for the run.
653
+
654
+ Parameters or Hyperparameters can be thought of as configurations for a run.
655
+ For example, the type of kernel used in a SVM model is a parameter.
656
+ A Parameter is defined by a name and a string value. Parameters are
657
+ also immutable, we cannot overwrite parameter value for a parameter
658
+ name.
659
+
660
+ Args:
661
+ param_dict (ParamsType): A parameter name to parameter value map.
662
+ Parameter values are converted to `str`.
663
+ flatten_params (bool): Flatten hierarchical dict, e.g. `{'a': {'b': 'c'}} -> {'a.b': 'c'}`.
664
+ All the keys will be converted to `str`. Defaults to False
665
+
666
+ Examples:
667
+
668
+ ### Logging parameters using a `dict`.
669
+
670
+ ```python
671
+ from truefoundry.ml import get_client
672
+
673
+ client = get_client()
674
+ run = client.create_run(
675
+ ml_repo="my-classification-project"
676
+ )
677
+ run.log_params({"learning_rate": 0.01, "epochs": 10})
678
+
679
+ run.end()
680
+ ```
681
+
682
+ ### Logging parameters using `argparse` Namespace object
683
+
684
+ ```python
685
+ import argparse
686
+ from truefoundry.ml import get_client
687
+
688
+ parser = argparse.ArgumentParser()
689
+ parser.add_argument("-batch_size", type=int, required=True)
690
+ args = parser.parse_args()
691
+
692
+ client = get_client()
693
+ run = client.create_run(
694
+ ml_repo="my-classification-project"
695
+ )
696
+ run.log_params(args)
697
+ ```
698
+ """
699
+ try:
700
+ param_dict = process_params(param_dict)
701
+ param_dict = flatten_dict(param_dict) if flatten_params else param_dict
702
+
703
+ params = []
704
+ for param_key in param_dict.keys():
705
+ if (
706
+ len(str(param_key)) > MAX_ENTITY_KEY_LENGTH
707
+ or len(str(param_dict[param_key])) > MAX_ENTITY_KEY_LENGTH
708
+ ):
709
+ logger.warning(
710
+ f"MlFoundry can't log parameters with length greater than {MAX_ENTITY_KEY_LENGTH} characters. "
711
+ f"Discarding {param_key}:{param_dict[param_key]}."
712
+ )
713
+ continue
714
+ if not self.VALID_PARAM_AND_METRIC_NAMES.match(param_key):
715
+ logger.warning(
716
+ f"Invalid param name: {param_key}. Names may only contain alphanumerics, "
717
+ f"underscores (_), dashes (-), periods (.), spaces ( ), and slashes (/). "
718
+ f"Discarding param {param_key}={param_dict[param_key]}"
719
+ )
720
+ continue
721
+ params.append(ParamDto(key=param_key, value=str(param_dict[param_key])))
722
+
723
+ if len(params) == 0:
724
+ logger.warning("Cannot log empty params dictionary")
725
+
726
+ _validate_batch_log_data(metrics=[], params=params, tags=[])
727
+ self._runs_api.log_run_batch_post(
728
+ log_batch_request_dto=LogBatchRequestDto(
729
+ run_id=self.run_id, metrics=[], params=params, tags=[]
730
+ )
731
+ )
732
+ except Exception as e:
733
+ raise MlFoundryException(str(e)) from e
734
+ logger.info("Parameters logged successfully")
735
+
736
+ @_ensure_not_deleted
737
+ def set_tags(self, tags: Dict[str, str]):
738
+ """Set tags for the current `run`.
739
+
740
+ Tags are "labels" for a run. A tag is represented by a tag name and value.
741
+
742
+ Args:
743
+ tags (Dict[str, str]): A tag name to value map.
744
+ Tag name cannot start with `mlf.`, `mlf.` prefix
745
+ is reserved for truefoundry. Tag values will be converted
746
+ to `str`.
747
+
748
+ Examples:
749
+
750
+ ```python
751
+ from truefoundry.ml import get_client
752
+
753
+ client = get_client()
754
+ run = client.create_run(
755
+ ml_repo="my-classification-project"
756
+ )
757
+ run.set_tags({"nlp.framework": "Spark NLP"})
758
+
759
+ run.end()
760
+ ```
761
+ """
762
+ tags = tags or {}
763
+ try:
764
+ NAMESPACE.validate_namespace_not_used(names=tags.keys())
765
+ tags_arr = [
766
+ RunTagDto(key=key, value=str(value)) for key, value in tags.items()
767
+ ]
768
+ self._runs_api.log_run_batch_post(
769
+ log_batch_request_dto=LogBatchRequestDto(
770
+ run_id=self.run_id, metrics=[], params=[], tags=tags_arr
771
+ )
772
+ )
773
+ except Exception as e:
774
+ raise MlFoundryException(str(e)) from e
775
+ logger.info("Tags set successfully")
776
+
777
+ @_ensure_not_deleted
778
+ def get_tags(self, no_cache=False) -> Dict[str, str]:
779
+ """Returns all the tags set for the current `run`.
780
+
781
+ Returns:
782
+ Dict[str, str]: A dictionary containing tags. The keys in the dictionary
783
+ are tag names and the values are corresponding tag values.
784
+
785
+ Examples:
786
+
787
+ ```python
788
+ from truefoundry.ml import get_client
789
+
790
+ client = get_client()
791
+ run = client.create_run(
792
+ ml_repo="my-classification-project"
793
+ )
794
+ run.set_tags({"nlp.framework": "Spark NLP"})
795
+ print(run.get_tags())
796
+
797
+ run.end()
798
+ ```
799
+ """
800
+ if no_cache or not self._run_data:
801
+ _run = self._runs_api.get_run_get(run_id=self.run_id)
802
+ self._run_data = _run.run.data
803
+ assert self._run_data is not None
804
+ tags = self._run_data.tags or []
805
+ return {tag.key: tag.value for tag in tags}
806
+
807
+ @_ensure_not_deleted
808
+ def get_metrics(
809
+ self, metric_names: Optional[Iterable[str]] = None
810
+ ) -> Dict[str, List[Metric]]:
811
+ """Get metrics logged for the current `run` grouped by metric name.
812
+
813
+ Args:
814
+ metric_names (Optional[Iterable[str]], optional): A list of metric names
815
+ For which the logged metrics will be fetched. If not passed, then all
816
+ metrics logged under the `run` is returned.
817
+
818
+ Returns:
819
+ Dict[str, List[Metric]]: A dictionary containing metric name to list of metrics
820
+ map.
821
+
822
+ Examples:
823
+
824
+ ```python
825
+ from truefoundry.ml import get_client
826
+
827
+ client = get_client()
828
+ run = client.create_run(
829
+ ml_repo="my-classification-project", run_name="svm-with-rbf-kernel"
830
+ )
831
+ run.log_metrics(metric_dict={"accuracy": 0.7, "loss": 0.6}, step=0)
832
+ run.log_metrics(metric_dict={"accuracy": 0.8, "loss": 0.4}, step=1)
833
+
834
+ metrics = run.get_metrics()
835
+ for metric_name, metric_history in metrics.items():
836
+ print(f"logged metrics for metric {metric_name}:")
837
+ for metric in metric_history:
838
+ print(f"value: {metric.value}")
839
+ print(f"step: {metric.step}")
840
+ print(f"timestamp_ms: {metric.timestamp}")
841
+ print("--")
842
+
843
+ run.end()
844
+ ```
845
+ """
846
+ _run = self._runs_api.get_run_get(run_id=self.run_id)
847
+ assert _run.run.data is not None
848
+ run_metrics = _run.run.data.metrics or []
849
+ run_metric_names = {metric.key for metric in run_metrics}
850
+
851
+ metric_names = (
852
+ set(metric_names) if metric_names is not None else run_metric_names
853
+ )
854
+
855
+ unknown_metrics = metric_names - run_metric_names
856
+ if len(unknown_metrics) > 0:
857
+ logger.warning(f"{unknown_metrics} metrics not present in the run")
858
+ metrics_dict: Dict[str, List[Metric]] = {
859
+ metric_name: [] for metric_name in unknown_metrics
860
+ }
861
+ valid_metrics = metric_names - unknown_metrics
862
+ for metric_name in valid_metrics:
863
+ _metric_history = self._metrics_api.get_metric_history_get(
864
+ run_id=self.run_id, metric_key=metric_name
865
+ )
866
+ metrics_dict[metric_name] = [
867
+ Metric.from_dto(metric) for metric in _metric_history.metrics
868
+ ]
869
+ return metrics_dict
870
+
871
+ @_ensure_not_deleted
872
+ def get_params(self, no_cache=False) -> Dict[str, str]:
873
+ """Get all the params logged for the current `run`.
874
+
875
+ Returns:
876
+ Dict[str, str]: A dictionary containing the parameters. The keys in the dictionary
877
+ are parameter names and the values are corresponding parameter values.
878
+
879
+ Examples:
880
+
881
+ ```python
882
+ from truefoundry.ml import get_client
883
+
884
+ client = get_client()
885
+ run = client.create_run(
886
+ ml_repo="my-classification-project"
887
+ )
888
+ run.log_params({"learning_rate": 0.01, "epochs": 10})
889
+ print(run.get_params())
890
+
891
+ run.end()
892
+ ```
893
+ """
894
+ if no_cache or not self._run_data:
895
+ _run = self._runs_api.get_run_get(run_id=self.run_id)
896
+ self._run_data = _run.run.data
897
+ assert self._run_data is not None
898
+ params = self._run_data.params or []
899
+ return {param.key: param.value for param in params}
900
+
901
+ @_ensure_not_deleted
902
+ def log_model(
903
+ self,
904
+ *,
905
+ name: str,
906
+ model_file_or_folder: str,
907
+ framework: Optional[Union[enums.ModelFramework, str]],
908
+ additional_files: Sequence[Tuple[Union[str, Path], Optional[str]]] = (),
909
+ description: Optional[str] = None,
910
+ metadata: Optional[Dict[str, Any]] = None,
911
+ step: int = 0,
912
+ progress: Optional[bool] = None,
913
+ ) -> ModelVersion:
914
+ # TODO (chiragjn): Document mapping of framework to list of valid model save kwargs
915
+ # TODO (chiragjn): Add more examples
916
+ """
917
+ Serialize and log a versioned model under the current ML Repo. Each logged model generates a new version
918
+ associated with the given `name` and linked to the current run. Multiple versions of the model can be
919
+ logged as separate versions under the same `name`.
920
+
921
+ Args:
922
+ name (str): Name of the model. If a model with this name already exists under the current ML Repo,
923
+ the logged model will be added as a new version under that `name`. If no models exist with the given
924
+ `name`, the given model will be logged as version 1.
925
+ model_file_or_folder (str): Path to either a single file or a folder containing model files. This folder
926
+ is usually created using serialization methods of libraries or frameworks e.g. `joblib.dump`,
927
+ `model.save_pretrained(...)`, `torch.save(...)`, `model.save(...)`
928
+ framework (Union[enums.ModelFramework, str]): Model Framework. Ex:- pytorch, sklearn, tensorflow etc.
929
+ The full list of supported frameworks can be found in `truefoundry.ml.enums.ModelFramework`.
930
+ Can also be `None` when `model` is `None`.
931
+ additional_files (Sequence[Tuple[Union[str, Path], Optional[str]]], optional): A list of pairs
932
+ of (source path, destination path) to add additional files and folders
933
+ to the model version contents. The first member of the pair should be a file or directory path
934
+ and the second member should be the path inside the model versions contents to upload to.
935
+ The model version contents are arranged like follows
936
+ .
937
+ └── model/
938
+ └── # model files are serialized here
939
+ └── # any additional files and folders can be added here.
940
+
941
+ You can also add additional files to model/ subdirectory by specifying the destination path as model/
942
+
943
+ ```
944
+ E.g. >>> run.log_model(
945
+ ... name="xyz", model_file_or_folder="clf.joblib", framework="sklearn",
946
+ ... additional_files=[("foo.txt", "foo/bar/foo.txt"), ("tokenizer/", "foo/tokenizer/")]
947
+ ... )
948
+ would result in
949
+ .
950
+ ├── model/
951
+ │ └── clf.joblib # if `model_file_or_folder` is a folder, contents will be added here
952
+ └── foo/
953
+ ├── bar/
954
+ │ └── foo.txt
955
+ └── tokenizer/
956
+ └── # contents of tokenizer/ directory will be uploaded here
957
+ ```
958
+ description (Optional[str], optional): arbitrary text upto 1024 characters to store as description.
959
+ This field can be updated at any time after logging. Defaults to `None`
960
+ metadata (Optional[Dict[str, Any]], optional): arbitrary json serializable dictionary to store metadata.
961
+ For example, you can use this to store metrics, params, notes.
962
+ This field can be updated at any time after logging. Defaults to `None`
963
+ step (int): step/iteration at which the model is being logged, defaults to 0.
964
+ progress (bool): value to show progress bar, defaults to None.
965
+
966
+ Returns:
967
+ truefoundry.ml.ModelVersion: an instance of `ModelVersion` that can be used to download the files,
968
+ load the model, or update attributes like description, metadata, schema.
969
+
970
+ Examples:
971
+
972
+ ### Sklearn
973
+
974
+ ```python
975
+ from truefoundry.ml import get_client
976
+ from truefoundry.ml.enums import ModelFramework
977
+
978
+ import joblib
979
+ import numpy as np
980
+ from sklearn.pipeline import make_pipeline
981
+ from sklearn.preprocessing import StandardScaler
982
+ from sklearn.svm import SVC
983
+
984
+ X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
985
+ y = np.array([1, 1, 2, 2])
986
+ clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
987
+ clf.fit(X, y)
988
+ joblib.dump(clf, "sklearn-pipeline.joblib")
989
+
990
+ client = get_client()
991
+ client.create_ml_repo( # This is only required once
992
+ ml_repo="my-classification-project",
993
+ # This controls which bucket is used.
994
+ # You can get this from Integrations > Blob Storage. `None` picks the default
995
+ storage_integration_fqn=None
996
+ )
997
+ run = client.create_run(
998
+ ml_repo="my-classification-project"
999
+ )
1000
+ model_version = run.log_model(
1001
+ name="my-sklearn-model",
1002
+ model_file_or_folder="sklearn-pipeline.joblib",
1003
+ framework=ModelFramework.SKLEARN,
1004
+ metadata={"accuracy": 0.99, "f1": 0.80},
1005
+ step=1, # step number, useful when using iterative algorithms like SGD
1006
+ )
1007
+ print(model_version.fqn)
1008
+ ```
1009
+
1010
+ ### Huggingface Transformers
1011
+
1012
+ ```python
1013
+ from truefoundry.ml import get_client
1014
+ from truefoundry.ml.enums import ModelFramework
1015
+
1016
+ import torch
1017
+ from transformers import AutoTokenizer, AutoConfig, pipeline, AutoModelForCausalLM
1018
+ pln = pipeline(
1019
+ "text-generation",
1020
+ model_file_or_folder="EleutherAI/pythia-70m",
1021
+ tokenizer="EleutherAI/pythia-70m",
1022
+ torch_dtype=torch.float16
1023
+ )
1024
+ pln.model.save_pretrained("my-transformers-model")
1025
+ pln.tokenizer.save_pretrained("my-transformers-model")
1026
+
1027
+ client = get_client()
1028
+ client.create_ml_repo( # This is only required once
1029
+ ml_repo="my-llm-project",
1030
+ # This controls which bucket is used.
1031
+ # You can get this from Integrations > Blob Storage. `None` picks the default
1032
+ storage_integration_fqn=None
1033
+ )
1034
+ run = client.create_run(
1035
+ ml_repo="my-llm-project"
1036
+ )
1037
+ model_version = run.log_model(
1038
+ name="my-transformers-model",
1039
+ model_file_or_folder="my-transformers-model/",
1040
+ framework=ModelFramework.TRANSFORMERS
1041
+ )
1042
+ print(model_version.fqn)
1043
+ ```
1044
+ """
1045
+
1046
+ model_version = _log_model_version(
1047
+ run=self,
1048
+ name=name,
1049
+ model_file_or_folder=model_file_or_folder,
1050
+ framework=framework,
1051
+ additional_files=additional_files,
1052
+ description=description,
1053
+ metadata=metadata,
1054
+ step=step,
1055
+ progress=progress,
1056
+ )
1057
+ logger.info(f"Logged model successfully with fqn {model_version.fqn!r}")
1058
+ return model_version
1059
+
1060
+ @_ensure_not_deleted
1061
+ def log_images(self, images: Dict[str, Image], step: int = 0):
1062
+ """Log images under the current `run` at the given `step`.
1063
+
1064
+ Use this function to log images for a `run`. `PIL` package is needed to log images.
1065
+ To install the `PIL` package, run `pip install pillow`.
1066
+
1067
+ Args:
1068
+ images (Dict[str, "truefoundry.ml.Image"]): A map of string image key to instance of
1069
+ `truefoundry.ml.Image` class. The image key should only contain alphanumeric,
1070
+ hyphens(-) or underscores(_). For a single key and step pair, we can log only
1071
+ one image.
1072
+ step (int, optional): Training step/iteration for which the `images` should be
1073
+ logged. Default is `0`.
1074
+
1075
+ Examples:
1076
+
1077
+ ### Logging images from different sources
1078
+
1079
+ ```python
1080
+ from truefoundry.ml import get_client, Image
1081
+ import numpy as np
1082
+ import PIL.Image
1083
+
1084
+ client = get_client()
1085
+ run = client.create_run(
1086
+ ml_repo="my-classification-project",
1087
+ )
1088
+
1089
+ imarray = np.random.randint(low=0, high=256, size=(100, 100, 3))
1090
+ im = PIL.Image.fromarray(imarray.astype("uint8")).convert("RGB")
1091
+ im.save("result_image.jpeg")
1092
+
1093
+ images_to_log = {
1094
+ "logged-image-array": Image(data_or_path=imarray),
1095
+ "logged-pil-image": Image(data_or_path=im),
1096
+ "logged-image-from-path": Image(data_or_path="result_image.jpeg"),
1097
+ }
1098
+
1099
+ run.log_images(images_to_log, step=1)
1100
+ run.end()
1101
+ ```
1102
+ """
1103
+ for key, image in images.items():
1104
+ if not isinstance(image, Image):
1105
+ raise MlFoundryException(
1106
+ "image should be of type `truefoundry.ml.Image`"
1107
+ )
1108
+ image.save(run=self, key=key, step=step)
1109
+
1110
+ @_ensure_not_deleted
1111
+ def log_plots(
1112
+ self,
1113
+ plots: Dict[
1114
+ str,
1115
+ Union[
1116
+ "matplotlib.pyplot",
1117
+ "matplotlib.figure.Figure",
1118
+ "plotly.graph_objects.Figure",
1119
+ Plot,
1120
+ ],
1121
+ ],
1122
+ step: int = 0,
1123
+ ):
1124
+ """Log custom plots under the current `run` at the given `step`.
1125
+
1126
+ Use this function to log custom matplotlib, plotly plots.
1127
+
1128
+ Args:
1129
+ plots (Dict[str, "matplotlib.pyplot", "matplotlib.figure.Figure", "plotly.graph_objects.Figure", Plot]):
1130
+ A map of string plot key to the plot or figure object.
1131
+ The plot key should only contain alphanumeric, hyphens(-) or
1132
+ underscores(_). For a single key and step pair, we can log only
1133
+ one image.
1134
+ step (int, optional): Training step/iteration for which the `plots` should be
1135
+ logged. Default is `0`.
1136
+
1137
+
1138
+ Examples:
1139
+
1140
+ ### Logging a plotly figure
1141
+
1142
+ ```python
1143
+ from truefoundry.ml import get_client
1144
+ import plotly.express as px
1145
+
1146
+ client = get_client()
1147
+ run = client.create_run(
1148
+ ml_repo="my-classification-project",
1149
+ )
1150
+
1151
+ df = px.data.tips()
1152
+ fig = px.histogram(
1153
+ df,
1154
+ x="total_bill",
1155
+ y="tip",
1156
+ color="sex",
1157
+ marginal="rug",
1158
+ hover_data=df.columns,
1159
+ )
1160
+
1161
+ plots_to_log = {
1162
+ "distribution-plot": fig,
1163
+ }
1164
+
1165
+ run.log_plots(plots_to_log, step=1)
1166
+ run.end()
1167
+ ```
1168
+
1169
+
1170
+ ### Logging a matplotlib plt or figure
1171
+
1172
+ ```python
1173
+ from truefoundry.ml import get_client
1174
+ from matplotlib import pyplot as plt
1175
+ import numpy as np
1176
+
1177
+ client = get_client()
1178
+ run = client.create_run(
1179
+ ml_repo="my-classification-project",
1180
+ )
1181
+
1182
+ t = np.arange(0.0, 5.0, 0.01)
1183
+ s = np.cos(2 * np.pi * t)
1184
+ (line,) = plt.plot(t, s, lw=2)
1185
+
1186
+ plt.annotate(
1187
+ "local max",
1188
+ xy=(2, 1),
1189
+ xytext=(3, 1.5),
1190
+ arrowprops=dict(facecolor="black", shrink=0.05),
1191
+ )
1192
+
1193
+ plt.ylim(-2, 2)
1194
+
1195
+ plots_to_log = {"cos-plot": plt, "cos-plot-using-figure": plt.gcf()}
1196
+
1197
+ run.log_plots(plots_to_log, step=1)
1198
+ run.end()
1199
+ ```
1200
+ """
1201
+ for key, plot in plots.items():
1202
+ plot = Plot(plot) if not isinstance(plot, Plot) else plot
1203
+ plot.save(run=self, key=key, step=step)