snowflake-ml-python 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. snowflake/ml/_internal/env_utils.py +16 -0
  2. snowflake/ml/_internal/platform_capabilities.py +36 -0
  3. snowflake/ml/_internal/telemetry.py +56 -7
  4. snowflake/ml/data/_internal/arrow_ingestor.py +67 -2
  5. snowflake/ml/data/data_connector.py +103 -1
  6. snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +8 -2
  7. snowflake/ml/experiment/_entities/run.py +15 -0
  8. snowflake/ml/experiment/callback/keras.py +25 -2
  9. snowflake/ml/experiment/callback/lightgbm.py +27 -2
  10. snowflake/ml/experiment/callback/xgboost.py +25 -2
  11. snowflake/ml/experiment/experiment_tracking.py +123 -13
  12. snowflake/ml/experiment/utils.py +6 -0
  13. snowflake/ml/feature_store/access_manager.py +1 -0
  14. snowflake/ml/feature_store/feature_store.py +1 -1
  15. snowflake/ml/feature_store/feature_view.py +34 -24
  16. snowflake/ml/jobs/_interop/protocols.py +3 -0
  17. snowflake/ml/jobs/_utils/feature_flags.py +1 -0
  18. snowflake/ml/jobs/_utils/payload_utils.py +360 -357
  19. snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +95 -8
  20. snowflake/ml/jobs/_utils/scripts/start_mlruntime.sh +92 -0
  21. snowflake/ml/jobs/_utils/scripts/startup.sh +112 -0
  22. snowflake/ml/jobs/_utils/spec_utils.py +2 -406
  23. snowflake/ml/jobs/_utils/stage_utils.py +22 -1
  24. snowflake/ml/jobs/_utils/types.py +14 -7
  25. snowflake/ml/jobs/job.py +8 -9
  26. snowflake/ml/jobs/manager.py +64 -129
  27. snowflake/ml/model/_client/model/inference_engine_utils.py +8 -4
  28. snowflake/ml/model/_client/model/model_version_impl.py +109 -28
  29. snowflake/ml/model/_client/ops/model_ops.py +32 -6
  30. snowflake/ml/model/_client/ops/service_ops.py +9 -4
  31. snowflake/ml/model/_client/sql/service.py +69 -2
  32. snowflake/ml/model/_packager/model_handler.py +8 -2
  33. snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} +203 -76
  34. snowflake/ml/model/_packager/model_handlers/mlflow.py +6 -1
  35. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  36. snowflake/ml/model/_signatures/core.py +305 -8
  37. snowflake/ml/model/_signatures/utils.py +13 -4
  38. snowflake/ml/model/compute_pool.py +2 -0
  39. snowflake/ml/model/models/huggingface.py +285 -0
  40. snowflake/ml/model/models/huggingface_pipeline.py +25 -215
  41. snowflake/ml/model/type_hints.py +5 -1
  42. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
  43. snowflake/ml/monitoring/_client/model_monitor_sql_client.py +12 -0
  44. snowflake/ml/monitoring/_manager/model_monitor_manager.py +12 -0
  45. snowflake/ml/monitoring/entities/model_monitor_config.py +5 -0
  46. snowflake/ml/utils/html_utils.py +67 -1
  47. snowflake/ml/version.py +1 -1
  48. {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/METADATA +94 -7
  49. {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/RECORD +52 -48
  50. {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/WHEEL +0 -0
  51. {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/licenses/LICENSE.txt +0 -0
  52. {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,285 @@
1
+ import logging
2
+ import warnings
3
+ from typing import Any, Optional, Union
4
+
5
+ from packaging import version
6
+
7
+ from snowflake.ml._internal.utils import sql_identifier
8
+ from snowflake.ml.model.compute_pool import DEFAULT_CPU_COMPUTE_POOL
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ _TELEMETRY_PROJECT = "MLOps"
14
+ _TELEMETRY_SUBPROJECT = "ModelManagement"
15
+
16
+
17
+ class TransformersPipeline:
18
+ def __init__(
19
+ self,
20
+ task: Optional[str] = None,
21
+ model: Optional[str] = None,
22
+ *,
23
+ revision: Optional[str] = None,
24
+ token_or_secret: Optional[str] = None,
25
+ trust_remote_code: Optional[bool] = None,
26
+ model_kwargs: Optional[dict[str, Any]] = None,
27
+ compute_pool_for_log: Optional[str] = DEFAULT_CPU_COMPUTE_POOL,
28
+ # repo snapshot download args
29
+ allow_patterns: Optional[Union[list[str], str]] = None,
30
+ ignore_patterns: Optional[Union[list[str], str]] = None,
31
+ **kwargs: Any,
32
+ ) -> None:
33
+ """
34
+ Utility factory method to build a wrapper over transformers [`Pipeline`].
35
+ When deploying, this wrapper will create a real pipeline object and loading tokenizers and models.
36
+
37
+ For pipelines docs, please refer:
38
+ https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.pipeline
39
+
40
+ Args:
41
+ task: The task that pipeline will be used. If None it would be inferred from model.
42
+ For available tasks, please refer Transformers's documentation. Defaults to None.
43
+ model: The model that will be used by the pipeline to make predictions. This can only be a model identifier
44
+ currently. If not provided, the default for the `task` will be loaded. Defaults to None.
45
+ revision: When passing a task name or a string model identifier: The specific model version to use. It can
46
+ be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and
47
+ other artifacts on huggingface.co, so `revision` can be any identifier allowed by git. Defaults to None.
48
+ token_or_secret: The token to use as HTTP bearer authorization for remote files. Defaults to None.
49
+ The token can be a token or a secret. If a secret is provided, it must a fully qualified secret name.
50
+ trust_remote_code: Whether or not to allow for custom code defined on the Hub in their own modeling,
51
+ configuration, tokenization or even pipeline files. This option should only be set to `True` for
52
+ repositories you trust and in which you have read the code, as it will execute code present on the Hub.
53
+ Defaults to None.
54
+ model_kwargs: Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,`.
55
+ Defaults to None.
56
+ compute_pool_for_log: The compute pool to use for logging the model. Defaults to DEFAULT_CPU_COMPUTE_POOL.
57
+ If a string is provided, it will be used as the compute pool name. This override allows for logging
58
+ the model when there is no system compute pool available.
59
+ If None is passed,
60
+ if `huggingface_hub` is installed, the model artifacts will be downloaded
61
+ from the HuggingFace repository.
62
+ otherwise, the only the metadata will be logged to snowflake.
63
+ allow_patterns: If provided, only files matching at least one pattern are downloaded.
64
+ ignore_patterns: If provided, files matching any of the patterns are not downloaded.
65
+ kwargs: Additional keyword arguments passed along to the specific pipeline init (see the documentation for
66
+ the corresponding pipeline class for possible values).
67
+
68
+ Raises:
69
+ RuntimeError: Raised when the input argument cannot determine the pipeline.
70
+ ValueError: Raised when the pipeline contains remote code but trust_remote_code is not set or False.
71
+ ValueError: Raised when having conflicting arguments.
72
+
73
+ .. # noqa: DAR003
74
+ """
75
+ import transformers
76
+
77
+ config = kwargs.get("config", None)
78
+ tokenizer = kwargs.get("tokenizer", None)
79
+ framework = kwargs.get("framework", None)
80
+ feature_extractor = kwargs.get("feature_extractor", None)
81
+
82
+ self.secret_identifier: Optional[str] = None
83
+ uses_secret = False
84
+ if token_or_secret is not None and isinstance(token_or_secret, str):
85
+ db, schema, secret_name = sql_identifier.parse_fully_qualified_name(token_or_secret)
86
+ if db is not None and schema is not None and secret_name is not None:
87
+ self.secret_identifier = sql_identifier.get_fully_qualified_name(
88
+ db=db,
89
+ schema=schema,
90
+ object=secret_name,
91
+ )
92
+ uses_secret = True
93
+ else:
94
+ logger.info("The token_or_secret is not a fully qualified secret name. It will be used as is.")
95
+
96
+ can_download_snapshot = False
97
+ if compute_pool_for_log is None:
98
+ try:
99
+ import huggingface_hub as hf_hub
100
+
101
+ can_download_snapshot = True
102
+ except ImportError:
103
+ pass
104
+
105
+ if compute_pool_for_log is None and not can_download_snapshot:
106
+ logger.info(
107
+ "The model will be logged with metadata only. No model artifacts will be downloaded. "
108
+ "During deployment, the model artifacts will be downloaded from the HuggingFace repository."
109
+ )
110
+
111
+ # ==== Start pipeline logic from transformers ====
112
+ if model_kwargs is None:
113
+ model_kwargs = {}
114
+
115
+ use_auth_token = model_kwargs.pop("use_auth_token", None)
116
+ if use_auth_token is not None:
117
+ warnings.warn(
118
+ "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.",
119
+ FutureWarning,
120
+ stacklevel=2,
121
+ )
122
+ if token_or_secret is not None:
123
+ raise ValueError(
124
+ "`token_or_secret` and `use_auth_token` are both specified. "
125
+ "Please set only the argument `token_or_secret`."
126
+ )
127
+ token_or_secret = use_auth_token
128
+
129
+ hub_kwargs = {
130
+ "revision": revision,
131
+ "token": token_or_secret,
132
+ "trust_remote_code": trust_remote_code,
133
+ "_commit_hash": None,
134
+ }
135
+
136
+ # Backward compatibility since HF interface change.
137
+ if version.parse(transformers.__version__) < version.parse("4.32.0"):
138
+ # Backward compatibility since HF interface change.
139
+ hub_kwargs["use_auth_token"] = hub_kwargs["token"]
140
+ del hub_kwargs["token"]
141
+
142
+ if task is None and model is None:
143
+ raise RuntimeError(
144
+ "Impossible to instantiate a pipeline without either a task or a model being specified. "
145
+ )
146
+
147
+ if model is None and tokenizer is not None:
148
+ raise RuntimeError(
149
+ "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided"
150
+ " tokenizer may not be compatible with the default model. Please provide an identifier to a pretrained"
151
+ " model when providing tokenizer."
152
+ )
153
+ if model is None and feature_extractor is not None:
154
+ raise RuntimeError(
155
+ "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the "
156
+ "provided feature_extractor may not be compatible with the default model. Please provide an identifier"
157
+ " to a pretrained model when providing feature_extractor."
158
+ )
159
+
160
+ # ==== End pipeline logic from transformers ====
161
+
162
+ # We only support string as model argument.
163
+
164
+ if model is not None and not isinstance(model, str):
165
+ raise RuntimeError(f"Impossible to use non-string model as input for class {self.__class__.__name__}.")
166
+
167
+ # ==== Start pipeline logic (Config) from transformers ====
168
+
169
+ # Config is the primordial information item.
170
+ # Instantiate config if needed
171
+ config_obj = None
172
+
173
+ if not can_download_snapshot:
174
+ if isinstance(config, str):
175
+ config_obj = transformers.AutoConfig.from_pretrained(
176
+ config, _from_pipeline=task, **hub_kwargs, **model_kwargs
177
+ )
178
+ hub_kwargs["_commit_hash"] = config_obj._commit_hash
179
+ elif config is None and isinstance(model, str):
180
+ config_obj = transformers.AutoConfig.from_pretrained(
181
+ model, _from_pipeline=task, **hub_kwargs, **model_kwargs
182
+ )
183
+ hub_kwargs["_commit_hash"] = config_obj._commit_hash
184
+ # We only support string as config argument.
185
+ elif config is not None and not isinstance(config, str):
186
+ raise RuntimeError(
187
+ f"Impossible to use non-string config as input for class {self.__class__.__name__}. "
188
+ "Use transformers.Pipeline object if required."
189
+ )
190
+
191
+ # ==== Start pipeline logic (Task) from transformers ====
192
+
193
+ custom_tasks = {}
194
+ if config_obj is not None and len(getattr(config_obj, "custom_pipelines", {})) > 0:
195
+ custom_tasks = config_obj.custom_pipelines
196
+ if task is None and trust_remote_code is not False:
197
+ if len(custom_tasks) == 1:
198
+ task = list(custom_tasks.keys())[0]
199
+ else:
200
+ raise RuntimeError(
201
+ "We can't infer the task automatically for this model as there are multiple tasks available. "
202
+ f"Pick one in {', '.join(custom_tasks.keys())}"
203
+ )
204
+
205
+ if task is None and model is not None:
206
+ task = transformers.pipelines.get_task(model, token_or_secret)
207
+
208
+ # Retrieve the task
209
+ if task in custom_tasks:
210
+ normalized_task = task
211
+ targeted_task, task_options = transformers.pipelines.clean_custom_task(custom_tasks[task])
212
+ if not trust_remote_code:
213
+ raise ValueError(
214
+ "Loading this pipeline requires you to execute the code in the pipeline file in that"
215
+ " repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
216
+ " set the option `trust_remote_code=True` to remove this error."
217
+ )
218
+ else:
219
+ (
220
+ normalized_task,
221
+ targeted_task,
222
+ task_options,
223
+ ) = transformers.pipelines.check_task(task)
224
+
225
+ # ==== Start pipeline logic (Model) from transformers ====
226
+
227
+ # Use default model/config/tokenizer for the task if no model is provided
228
+ if model is None:
229
+ # At that point framework might still be undetermined
230
+ (
231
+ model,
232
+ default_revision,
233
+ ) = transformers.pipelines.get_default_model_and_revision(targeted_task, framework, task_options)
234
+ revision = revision if revision is not None else default_revision
235
+ warnings.warn(
236
+ f"No model was supplied, defaulted to {model} and revision"
237
+ f" {revision} ({transformers.pipelines.HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n"
238
+ "Using a pipeline without specifying a model name and revision in production is not recommended.",
239
+ stacklevel=2,
240
+ )
241
+ if not can_download_snapshot and config is None and isinstance(model, str):
242
+ config_obj = transformers.AutoConfig.from_pretrained(
243
+ model, _from_pipeline=task, **hub_kwargs, **model_kwargs
244
+ )
245
+ hub_kwargs["_commit_hash"] = config_obj._commit_hash
246
+
247
+ if kwargs.get("device_map", None) is not None:
248
+ if "device_map" in model_kwargs:
249
+ raise ValueError(
250
+ 'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
251
+ " arguments might conflict, use only one.)"
252
+ )
253
+ if kwargs.get("device", None) is not None:
254
+ warnings.warn(
255
+ "Both `device` and `device_map` are specified. `device` will override `device_map`. You"
256
+ " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`.",
257
+ stacklevel=2,
258
+ )
259
+
260
+ repo_snapshot_dir: Optional[str] = None
261
+ if can_download_snapshot and not uses_secret:
262
+ try:
263
+ repo_snapshot_dir = hf_hub.snapshot_download(
264
+ repo_id=model,
265
+ revision=revision,
266
+ token=token_or_secret,
267
+ allow_patterns=allow_patterns,
268
+ ignore_patterns=ignore_patterns,
269
+ )
270
+ except ImportError:
271
+ logger.info("huggingface_hub package is not installed, skipping snapshot download")
272
+
273
+ # ==== End pipeline logic from transformers ====
274
+
275
+ self.model = model
276
+ self.task = normalized_task
277
+ self.revision = revision
278
+ self.token_or_secret = token_or_secret
279
+ self.trust_remote_code = trust_remote_code
280
+ self.model_kwargs = model_kwargs
281
+ self.tokenizer = tokenizer
282
+
283
+ self.repo_snapshot_dir = repo_snapshot_dir
284
+ self.compute_pool_for_log = compute_pool_for_log
285
+ self.__dict__.update(kwargs)
@@ -1,25 +1,19 @@
1
1
  import logging
2
- import warnings
3
2
  from typing import Any, Optional, Union
4
3
 
5
- from packaging import version
6
-
7
4
  from snowflake import snowpark
8
5
  from snowflake.ml._internal import telemetry
9
6
  from snowflake.ml._internal.human_readable_id import hrid_generator
10
7
  from snowflake.ml._internal.utils import sql_identifier
11
8
  from snowflake.ml.model._client.model import inference_engine_utils
12
9
  from snowflake.ml.model._client.ops import service_ops
10
+ from snowflake.ml.model.models import huggingface
13
11
  from snowflake.snowpark import async_job, session
14
12
 
15
13
  logger = logging.getLogger(__name__)
16
14
 
17
15
 
18
- _TELEMETRY_PROJECT = "MLOps"
19
- _TELEMETRY_SUBPROJECT = "ModelManagement"
20
-
21
-
22
- class HuggingFacePipelineModel:
16
+ class HuggingFacePipelineModel(huggingface.TransformersPipeline):
23
17
  def __init__(
24
18
  self,
25
19
  task: Optional[str] = None,
@@ -65,208 +59,25 @@ class HuggingFacePipelineModel:
65
59
 
66
60
  Return:
67
61
  A wrapper over transformers [`Pipeline`].
68
-
69
- Raises:
70
- RuntimeError: Raised when the input argument cannot determine the pipeline.
71
- ValueError: Raised when the pipeline contains remote code but trust_remote_code is not set or False.
72
- ValueError: Raised when having conflicting arguments.
73
62
  """
74
- import transformers
75
-
76
- config = kwargs.get("config", None)
77
- tokenizer = kwargs.get("tokenizer", None)
78
- framework = kwargs.get("framework", None)
79
- feature_extractor = kwargs.get("feature_extractor", None)
80
-
81
- _can_download_snapshot = False
82
- if download_snapshot:
83
- try:
84
- import huggingface_hub as hf_hub
85
-
86
- _can_download_snapshot = True
87
- except ImportError:
88
- pass
89
-
90
- # ==== Start pipeline logic from transformers ====
91
- if model_kwargs is None:
92
- model_kwargs = {}
93
-
94
- use_auth_token = model_kwargs.pop("use_auth_token", None)
95
- if use_auth_token is not None:
96
- warnings.warn(
97
- "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.",
98
- FutureWarning,
99
- stacklevel=2,
100
- )
101
- if token is not None:
102
- raise ValueError(
103
- "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
104
- )
105
- token = use_auth_token
106
-
107
- hub_kwargs = {
108
- "revision": revision,
109
- "token": token,
110
- "trust_remote_code": trust_remote_code,
111
- "_commit_hash": None,
112
- }
113
-
114
- # Backward compatibility since HF interface change.
115
- if version.parse(transformers.__version__) < version.parse("4.32.0"):
116
- # Backward compatibility since HF interface change.
117
- hub_kwargs["use_auth_token"] = hub_kwargs["token"]
118
- del hub_kwargs["token"]
119
-
120
- if task is None and model is None:
121
- raise RuntimeError(
122
- "Impossible to instantiate a pipeline without either a task or a model being specified. "
123
- )
124
-
125
- if model is None and tokenizer is not None:
126
- raise RuntimeError(
127
- "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided"
128
- " tokenizer may not be compatible with the default model. Please provide an identifier to a pretrained"
129
- " model when providing tokenizer."
130
- )
131
- if model is None and feature_extractor is not None:
132
- raise RuntimeError(
133
- "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the "
134
- "provided feature_extractor may not be compatible with the default model. Please provide an identifier"
135
- " to a pretrained model when providing feature_extractor."
136
- )
137
-
138
- # ==== End pipeline logic from transformers ====
139
-
140
- # We only support string as model argument.
141
-
142
- if model is not None and not isinstance(model, str):
143
- raise RuntimeError(
144
- "Impossible to use non-string model as input for HuggingFacePipelineModel. Use transformers.Pipeline"
145
- " object if required."
146
- )
147
-
148
- # ==== Start pipeline logic (Config) from transformers ====
149
-
150
- # Config is the primordial information item.
151
- # Instantiate config if needed
152
- config_obj = None
153
-
154
- if not _can_download_snapshot:
155
- if isinstance(config, str):
156
- config_obj = transformers.AutoConfig.from_pretrained(
157
- config, _from_pipeline=task, **hub_kwargs, **model_kwargs
158
- )
159
- hub_kwargs["_commit_hash"] = config_obj._commit_hash
160
- elif config is None and isinstance(model, str):
161
- config_obj = transformers.AutoConfig.from_pretrained(
162
- model, _from_pipeline=task, **hub_kwargs, **model_kwargs
163
- )
164
- hub_kwargs["_commit_hash"] = config_obj._commit_hash
165
- # We only support string as config argument.
166
- elif config is not None and not isinstance(config, str):
167
- raise RuntimeError(
168
- "Impossible to use non-string config as input for HuggingFacePipelineModel. "
169
- "Use transformers.Pipeline object if required."
170
- )
171
-
172
- # ==== Start pipeline logic (Task) from transformers ====
173
-
174
- custom_tasks = {}
175
- if config_obj is not None and len(getattr(config_obj, "custom_pipelines", {})) > 0:
176
- custom_tasks = config_obj.custom_pipelines
177
- if task is None and trust_remote_code is not False:
178
- if len(custom_tasks) == 1:
179
- task = list(custom_tasks.keys())[0]
180
- else:
181
- raise RuntimeError(
182
- "We can't infer the task automatically for this model as there are multiple tasks available. "
183
- f"Pick one in {', '.join(custom_tasks.keys())}"
184
- )
185
-
186
- if task is None and model is not None:
187
- task = transformers.pipelines.get_task(model, token)
188
-
189
- # Retrieve the task
190
- if task in custom_tasks:
191
- normalized_task = task
192
- targeted_task, task_options = transformers.pipelines.clean_custom_task(custom_tasks[task])
193
- if not trust_remote_code:
194
- raise ValueError(
195
- "Loading this pipeline requires you to execute the code in the pipeline file in that"
196
- " repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
197
- " set the option `trust_remote_code=True` to remove this error."
198
- )
199
- else:
200
- (
201
- normalized_task,
202
- targeted_task,
203
- task_options,
204
- ) = transformers.pipelines.check_task(task)
205
-
206
- # ==== Start pipeline logic (Model) from transformers ====
207
-
208
- # Use default model/config/tokenizer for the task if no model is provided
209
- if model is None:
210
- # At that point framework might still be undetermined
211
- (
212
- model,
213
- default_revision,
214
- ) = transformers.pipelines.get_default_model_and_revision(targeted_task, framework, task_options)
215
- revision = revision if revision is not None else default_revision
216
- warnings.warn(
217
- f"No model was supplied, defaulted to {model} and revision"
218
- f" {revision} ({transformers.pipelines.HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n"
219
- "Using a pipeline without specifying a model name and revision in production is not recommended.",
220
- stacklevel=2,
221
- )
222
- if not _can_download_snapshot and config is None and isinstance(model, str):
223
- config_obj = transformers.AutoConfig.from_pretrained(
224
- model, _from_pipeline=task, **hub_kwargs, **model_kwargs
225
- )
226
- hub_kwargs["_commit_hash"] = config_obj._commit_hash
227
-
228
- if kwargs.get("device_map", None) is not None:
229
- if "device_map" in model_kwargs:
230
- raise ValueError(
231
- 'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
232
- " arguments might conflict, use only one.)"
233
- )
234
- if kwargs.get("device", None) is not None:
235
- warnings.warn(
236
- "Both `device` and `device_map` are specified. `device` will override `device_map`. You"
237
- " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`.",
238
- stacklevel=2,
239
- )
240
-
241
- repo_snapshot_dir: Optional[str] = None
242
- if _can_download_snapshot:
243
- try:
244
-
245
- repo_snapshot_dir = hf_hub.snapshot_download(
246
- repo_id=model,
247
- revision=revision,
248
- token=token,
249
- allow_patterns=allow_patterns,
250
- ignore_patterns=ignore_patterns,
251
- )
252
- except ImportError:
253
- logger.info("huggingface_hub package is not installed, skipping snapshot download")
254
-
255
- # ==== End pipeline logic from transformers ====
256
-
257
- self.task = normalized_task
258
- self.model = model
259
- self.revision = revision
63
+ logger.warning("HuggingFacePipelineModel is deprecated. Please use TransformersPipeline instead.")
64
+ super().__init__(
65
+ task=task,
66
+ model=model,
67
+ revision=revision,
68
+ token_or_secret=token,
69
+ trust_remote_code=trust_remote_code,
70
+ model_kwargs=model_kwargs,
71
+ compute_pool_for_log=None,
72
+ allow_patterns=allow_patterns,
73
+ ignore_patterns=ignore_patterns,
74
+ **kwargs,
75
+ )
260
76
  self.token = token
261
- self.trust_remote_code = trust_remote_code
262
- self.model_kwargs = model_kwargs
263
- self.tokenizer = tokenizer
264
- self.repo_snapshot_dir = repo_snapshot_dir
265
- self.__dict__.update(kwargs)
266
77
 
267
78
  @telemetry.send_api_usage_telemetry(
268
- project=_TELEMETRY_PROJECT,
269
- subproject=_TELEMETRY_SUBPROJECT,
79
+ project=huggingface._TELEMETRY_PROJECT,
80
+ subproject=huggingface._TELEMETRY_SUBPROJECT,
270
81
  func_params_to_log=[
271
82
  "service_name",
272
83
  "image_build_compute_pool",
@@ -303,6 +114,7 @@ class HuggingFacePipelineModel:
303
114
  force_rebuild: bool = False,
304
115
  build_external_access_integrations: Optional[list[str]] = None,
305
116
  block: bool = True,
117
+ inference_engine_options: Optional[dict[str, Any]] = None,
306
118
  experimental_options: Optional[dict[str, Any]] = None,
307
119
  ) -> Union[str, async_job.AsyncJob]:
308
120
  """Logs a Hugging Face model and creates a service in Snowflake.
@@ -330,10 +142,8 @@ class HuggingFacePipelineModel:
330
142
  force_rebuild: Whether to force rebuild the image. Defaults to False.
331
143
  build_external_access_integrations: External access integrations for building the image. Defaults to None.
332
144
  block: Whether to block the operation. Defaults to True.
333
- experimental_options: Experimental options for the service creation with custom inference engine.
334
- Currently, only `inference_engine` and `inference_engine_args_override` are supported.
335
- `inference_engine` is the name of the inference engine to use.
336
- `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
145
+ inference_engine_options: Options for the service creation with custom inference engine. Defaults to None.
146
+ experimental_options: Experimental options for the service creation. Defaults to None.
337
147
 
338
148
  Raises:
339
149
  ValueError: if database and schema name is not provided and session doesn't have a
@@ -346,8 +156,8 @@ class HuggingFacePipelineModel:
346
156
  .. # noqa: DAR003
347
157
  """
348
158
  statement_params = telemetry.get_statement_params(
349
- project=_TELEMETRY_PROJECT,
350
- subproject=_TELEMETRY_SUBPROJECT,
159
+ project=huggingface._TELEMETRY_PROJECT,
160
+ subproject=huggingface._TELEMETRY_SUBPROJECT,
351
161
  )
352
162
 
353
163
  database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
@@ -377,14 +187,14 @@ class HuggingFacePipelineModel:
377
187
 
378
188
  # Check if model is HuggingFace text-generation before doing inference engine checks
379
189
  inference_engine_args = None
380
- if experimental_options:
190
+ if inference_engine_options:
381
191
  if self.task != "text-generation":
382
192
  raise ValueError(
383
- "Currently, InferenceEngine using experimental_options is only supported for "
193
+ "Currently, InferenceEngine using inference_engine_options is only supported for "
384
194
  "HuggingFace text-generation models."
385
195
  )
386
196
 
387
- inference_engine_args = inference_engine_utils._get_inference_engine_args(experimental_options)
197
+ inference_engine_args = inference_engine_utils._get_inference_engine_args(inference_engine_options)
388
198
 
389
199
  # Enrich inference engine args if inference engine is specified
390
200
  if inference_engine_args is not None:
@@ -13,6 +13,10 @@ from typing import (
13
13
  import numpy.typing as npt
14
14
  from typing_extensions import NotRequired
15
15
 
16
+ from snowflake.ml.model.compute_pool import (
17
+ DEFAULT_CPU_COMPUTE_POOL,
18
+ DEFAULT_GPU_COMPUTE_POOL,
19
+ )
16
20
  from snowflake.ml.model.target_platform import TargetPlatform
17
21
  from snowflake.ml.model.task import Task
18
22
  from snowflake.ml.model.volatility import Volatility
@@ -380,4 +384,4 @@ class ProgressStatus(Protocol):
380
384
  ...
381
385
 
382
386
 
383
- __all__ = ["TargetPlatform", "Task"]
387
+ __all__ = ["TargetPlatform", "Task", "DEFAULT_CPU_COMPUTE_POOL", "DEFAULT_GPU_COMPUTE_POOL"]
@@ -365,8 +365,8 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
365
365
 
366
366
  required_deps = dependencies + [
367
367
  "snowflake-snowpark-python<2",
368
- "fastparquet<2023.11",
369
- "pyarrow<14",
368
+ "fastparquet<2024.3",
369
+ "pyarrow<18",
370
370
  "cachetools<6",
371
371
  ]
372
372
 
@@ -92,6 +92,9 @@ class ModelMonitorSQLClient:
92
92
  baseline: Optional[sql_identifier.SqlIdentifier] = None,
93
93
  segment_columns: Optional[list[sql_identifier.SqlIdentifier]] = None,
94
94
  custom_metric_columns: Optional[list[sql_identifier.SqlIdentifier]] = None,
95
+ timestamp_custom_metric_database: Optional[sql_identifier.SqlIdentifier] = None,
96
+ timestamp_custom_metric_schema: Optional[sql_identifier.SqlIdentifier] = None,
97
+ timestamp_custom_metric_table: Optional[sql_identifier.SqlIdentifier] = None,
95
98
  statement_params: Optional[dict[str, Any]] = None,
96
99
  ) -> None:
97
100
  baseline_sql = ""
@@ -106,6 +109,14 @@ class ModelMonitorSQLClient:
106
109
  if custom_metric_columns:
107
110
  custom_metric_columns_sql = f"CUSTOM_METRIC_COLUMNS={_build_sql_list_from_columns(custom_metric_columns)}"
108
111
 
112
+ timestamp_custom_metric_table_sql = ""
113
+ if timestamp_custom_metric_table:
114
+ timestamp_custom_metric_table_sql = (
115
+ f"TIMESTAMP_CUSTOM_METRIC_TABLE="
116
+ f"{self._infer_qualified_schema(timestamp_custom_metric_database, timestamp_custom_metric_schema)}."
117
+ f"{timestamp_custom_metric_table}"
118
+ )
119
+
109
120
  query_result_checker.SqlResultValidator(
110
121
  self._sql_client._session,
111
122
  f"""
@@ -126,6 +137,7 @@ class ModelMonitorSQLClient:
126
137
  AGGREGATION_WINDOW='{aggregation_window}'
127
138
  {segment_columns_sql}
128
139
  {custom_metric_columns_sql}
140
+ {timestamp_custom_metric_table_sql}
129
141
  {baseline_sql}""",
130
142
  statement_params=statement_params,
131
143
  ).has_column("status").has_dimensions(1, 1).validate()
@@ -100,6 +100,15 @@ class ModelMonitorManager:
100
100
  if source_config.baseline
101
101
  else (None, None, None)
102
102
  )
103
+ (
104
+ timestamp_custom_metric_database_name_id,
105
+ timestamp_custom_metric_schema_name_id,
106
+ timestamp_custom_metric_table_name_id,
107
+ ) = (
108
+ sql_identifier.parse_fully_qualified_name(source_config.timestamp_custom_metric_table)
109
+ if source_config.timestamp_custom_metric_table
110
+ else (None, None, None)
111
+ )
103
112
  model_database_name_id, model_schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(
104
113
  model_monitor_config.model_version.fully_qualified_model_name
105
114
  )
@@ -155,6 +164,9 @@ class ModelMonitorManager:
155
164
  baseline_database=baseline_database_name_id,
156
165
  baseline_schema=baseline_schema_name_id,
157
166
  baseline=baseline_name_id,
167
+ timestamp_custom_metric_database=timestamp_custom_metric_database_name_id,
168
+ timestamp_custom_metric_schema=timestamp_custom_metric_schema_name_id,
169
+ timestamp_custom_metric_table=timestamp_custom_metric_table_name_id,
158
170
  statement_params=self.statement_params,
159
171
  )
160
172
  return model_monitor.ModelMonitor._ref(