snowflake-ml-python 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +16 -0
- snowflake/ml/_internal/platform_capabilities.py +36 -0
- snowflake/ml/_internal/telemetry.py +56 -7
- snowflake/ml/data/_internal/arrow_ingestor.py +67 -2
- snowflake/ml/data/data_connector.py +103 -1
- snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +8 -2
- snowflake/ml/experiment/_entities/run.py +15 -0
- snowflake/ml/experiment/callback/keras.py +25 -2
- snowflake/ml/experiment/callback/lightgbm.py +27 -2
- snowflake/ml/experiment/callback/xgboost.py +25 -2
- snowflake/ml/experiment/experiment_tracking.py +123 -13
- snowflake/ml/experiment/utils.py +6 -0
- snowflake/ml/feature_store/access_manager.py +1 -0
- snowflake/ml/feature_store/feature_store.py +1 -1
- snowflake/ml/feature_store/feature_view.py +34 -24
- snowflake/ml/jobs/_interop/protocols.py +3 -0
- snowflake/ml/jobs/_utils/feature_flags.py +1 -0
- snowflake/ml/jobs/_utils/payload_utils.py +360 -357
- snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +95 -8
- snowflake/ml/jobs/_utils/scripts/start_mlruntime.sh +92 -0
- snowflake/ml/jobs/_utils/scripts/startup.sh +112 -0
- snowflake/ml/jobs/_utils/spec_utils.py +2 -406
- snowflake/ml/jobs/_utils/stage_utils.py +22 -1
- snowflake/ml/jobs/_utils/types.py +14 -7
- snowflake/ml/jobs/job.py +8 -9
- snowflake/ml/jobs/manager.py +64 -129
- snowflake/ml/model/_client/model/inference_engine_utils.py +8 -4
- snowflake/ml/model/_client/model/model_version_impl.py +109 -28
- snowflake/ml/model/_client/ops/model_ops.py +32 -6
- snowflake/ml/model/_client/ops/service_ops.py +9 -4
- snowflake/ml/model/_client/sql/service.py +69 -2
- snowflake/ml/model/_packager/model_handler.py +8 -2
- snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} +203 -76
- snowflake/ml/model/_packager/model_handlers/mlflow.py +6 -1
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_signatures/core.py +305 -8
- snowflake/ml/model/_signatures/utils.py +13 -4
- snowflake/ml/model/compute_pool.py +2 -0
- snowflake/ml/model/models/huggingface.py +285 -0
- snowflake/ml/model/models/huggingface_pipeline.py +25 -215
- snowflake/ml/model/type_hints.py +5 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +12 -0
- snowflake/ml/monitoring/_manager/model_monitor_manager.py +12 -0
- snowflake/ml/monitoring/entities/model_monitor_config.py +5 -0
- snowflake/ml/utils/html_utils.py +67 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/METADATA +94 -7
- {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/RECORD +52 -48
- {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Any, Optional, Union
|
|
4
|
+
|
|
5
|
+
from packaging import version
|
|
6
|
+
|
|
7
|
+
from snowflake.ml._internal.utils import sql_identifier
|
|
8
|
+
from snowflake.ml.model.compute_pool import DEFAULT_CPU_COMPUTE_POOL
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
_TELEMETRY_PROJECT = "MLOps"
|
|
14
|
+
_TELEMETRY_SUBPROJECT = "ModelManagement"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TransformersPipeline:
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
task: Optional[str] = None,
|
|
21
|
+
model: Optional[str] = None,
|
|
22
|
+
*,
|
|
23
|
+
revision: Optional[str] = None,
|
|
24
|
+
token_or_secret: Optional[str] = None,
|
|
25
|
+
trust_remote_code: Optional[bool] = None,
|
|
26
|
+
model_kwargs: Optional[dict[str, Any]] = None,
|
|
27
|
+
compute_pool_for_log: Optional[str] = DEFAULT_CPU_COMPUTE_POOL,
|
|
28
|
+
# repo snapshot download args
|
|
29
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
30
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
31
|
+
**kwargs: Any,
|
|
32
|
+
) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Utility factory method to build a wrapper over transformers [`Pipeline`].
|
|
35
|
+
When deploying, this wrapper will create a real pipeline object and loading tokenizers and models.
|
|
36
|
+
|
|
37
|
+
For pipelines docs, please refer:
|
|
38
|
+
https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.pipeline
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
task: The task that pipeline will be used. If None it would be inferred from model.
|
|
42
|
+
For available tasks, please refer Transformers's documentation. Defaults to None.
|
|
43
|
+
model: The model that will be used by the pipeline to make predictions. This can only be a model identifier
|
|
44
|
+
currently. If not provided, the default for the `task` will be loaded. Defaults to None.
|
|
45
|
+
revision: When passing a task name or a string model identifier: The specific model version to use. It can
|
|
46
|
+
be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and
|
|
47
|
+
other artifacts on huggingface.co, so `revision` can be any identifier allowed by git. Defaults to None.
|
|
48
|
+
token_or_secret: The token to use as HTTP bearer authorization for remote files. Defaults to None.
|
|
49
|
+
The token can be a token or a secret. If a secret is provided, it must a fully qualified secret name.
|
|
50
|
+
trust_remote_code: Whether or not to allow for custom code defined on the Hub in their own modeling,
|
|
51
|
+
configuration, tokenization or even pipeline files. This option should only be set to `True` for
|
|
52
|
+
repositories you trust and in which you have read the code, as it will execute code present on the Hub.
|
|
53
|
+
Defaults to None.
|
|
54
|
+
model_kwargs: Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,`.
|
|
55
|
+
Defaults to None.
|
|
56
|
+
compute_pool_for_log: The compute pool to use for logging the model. Defaults to DEFAULT_CPU_COMPUTE_POOL.
|
|
57
|
+
If a string is provided, it will be used as the compute pool name. This override allows for logging
|
|
58
|
+
the model when there is no system compute pool available.
|
|
59
|
+
If None is passed,
|
|
60
|
+
if `huggingface_hub` is installed, the model artifacts will be downloaded
|
|
61
|
+
from the HuggingFace repository.
|
|
62
|
+
otherwise, the only the metadata will be logged to snowflake.
|
|
63
|
+
allow_patterns: If provided, only files matching at least one pattern are downloaded.
|
|
64
|
+
ignore_patterns: If provided, files matching any of the patterns are not downloaded.
|
|
65
|
+
kwargs: Additional keyword arguments passed along to the specific pipeline init (see the documentation for
|
|
66
|
+
the corresponding pipeline class for possible values).
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
RuntimeError: Raised when the input argument cannot determine the pipeline.
|
|
70
|
+
ValueError: Raised when the pipeline contains remote code but trust_remote_code is not set or False.
|
|
71
|
+
ValueError: Raised when having conflicting arguments.
|
|
72
|
+
|
|
73
|
+
.. # noqa: DAR003
|
|
74
|
+
"""
|
|
75
|
+
import transformers
|
|
76
|
+
|
|
77
|
+
config = kwargs.get("config", None)
|
|
78
|
+
tokenizer = kwargs.get("tokenizer", None)
|
|
79
|
+
framework = kwargs.get("framework", None)
|
|
80
|
+
feature_extractor = kwargs.get("feature_extractor", None)
|
|
81
|
+
|
|
82
|
+
self.secret_identifier: Optional[str] = None
|
|
83
|
+
uses_secret = False
|
|
84
|
+
if token_or_secret is not None and isinstance(token_or_secret, str):
|
|
85
|
+
db, schema, secret_name = sql_identifier.parse_fully_qualified_name(token_or_secret)
|
|
86
|
+
if db is not None and schema is not None and secret_name is not None:
|
|
87
|
+
self.secret_identifier = sql_identifier.get_fully_qualified_name(
|
|
88
|
+
db=db,
|
|
89
|
+
schema=schema,
|
|
90
|
+
object=secret_name,
|
|
91
|
+
)
|
|
92
|
+
uses_secret = True
|
|
93
|
+
else:
|
|
94
|
+
logger.info("The token_or_secret is not a fully qualified secret name. It will be used as is.")
|
|
95
|
+
|
|
96
|
+
can_download_snapshot = False
|
|
97
|
+
if compute_pool_for_log is None:
|
|
98
|
+
try:
|
|
99
|
+
import huggingface_hub as hf_hub
|
|
100
|
+
|
|
101
|
+
can_download_snapshot = True
|
|
102
|
+
except ImportError:
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
if compute_pool_for_log is None and not can_download_snapshot:
|
|
106
|
+
logger.info(
|
|
107
|
+
"The model will be logged with metadata only. No model artifacts will be downloaded. "
|
|
108
|
+
"During deployment, the model artifacts will be downloaded from the HuggingFace repository."
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# ==== Start pipeline logic from transformers ====
|
|
112
|
+
if model_kwargs is None:
|
|
113
|
+
model_kwargs = {}
|
|
114
|
+
|
|
115
|
+
use_auth_token = model_kwargs.pop("use_auth_token", None)
|
|
116
|
+
if use_auth_token is not None:
|
|
117
|
+
warnings.warn(
|
|
118
|
+
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.",
|
|
119
|
+
FutureWarning,
|
|
120
|
+
stacklevel=2,
|
|
121
|
+
)
|
|
122
|
+
if token_or_secret is not None:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
"`token_or_secret` and `use_auth_token` are both specified. "
|
|
125
|
+
"Please set only the argument `token_or_secret`."
|
|
126
|
+
)
|
|
127
|
+
token_or_secret = use_auth_token
|
|
128
|
+
|
|
129
|
+
hub_kwargs = {
|
|
130
|
+
"revision": revision,
|
|
131
|
+
"token": token_or_secret,
|
|
132
|
+
"trust_remote_code": trust_remote_code,
|
|
133
|
+
"_commit_hash": None,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
# Backward compatibility since HF interface change.
|
|
137
|
+
if version.parse(transformers.__version__) < version.parse("4.32.0"):
|
|
138
|
+
# Backward compatibility since HF interface change.
|
|
139
|
+
hub_kwargs["use_auth_token"] = hub_kwargs["token"]
|
|
140
|
+
del hub_kwargs["token"]
|
|
141
|
+
|
|
142
|
+
if task is None and model is None:
|
|
143
|
+
raise RuntimeError(
|
|
144
|
+
"Impossible to instantiate a pipeline without either a task or a model being specified. "
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if model is None and tokenizer is not None:
|
|
148
|
+
raise RuntimeError(
|
|
149
|
+
"Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided"
|
|
150
|
+
" tokenizer may not be compatible with the default model. Please provide an identifier to a pretrained"
|
|
151
|
+
" model when providing tokenizer."
|
|
152
|
+
)
|
|
153
|
+
if model is None and feature_extractor is not None:
|
|
154
|
+
raise RuntimeError(
|
|
155
|
+
"Impossible to instantiate a pipeline with feature_extractor specified but not the model as the "
|
|
156
|
+
"provided feature_extractor may not be compatible with the default model. Please provide an identifier"
|
|
157
|
+
" to a pretrained model when providing feature_extractor."
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# ==== End pipeline logic from transformers ====
|
|
161
|
+
|
|
162
|
+
# We only support string as model argument.
|
|
163
|
+
|
|
164
|
+
if model is not None and not isinstance(model, str):
|
|
165
|
+
raise RuntimeError(f"Impossible to use non-string model as input for class {self.__class__.__name__}.")
|
|
166
|
+
|
|
167
|
+
# ==== Start pipeline logic (Config) from transformers ====
|
|
168
|
+
|
|
169
|
+
# Config is the primordial information item.
|
|
170
|
+
# Instantiate config if needed
|
|
171
|
+
config_obj = None
|
|
172
|
+
|
|
173
|
+
if not can_download_snapshot:
|
|
174
|
+
if isinstance(config, str):
|
|
175
|
+
config_obj = transformers.AutoConfig.from_pretrained(
|
|
176
|
+
config, _from_pipeline=task, **hub_kwargs, **model_kwargs
|
|
177
|
+
)
|
|
178
|
+
hub_kwargs["_commit_hash"] = config_obj._commit_hash
|
|
179
|
+
elif config is None and isinstance(model, str):
|
|
180
|
+
config_obj = transformers.AutoConfig.from_pretrained(
|
|
181
|
+
model, _from_pipeline=task, **hub_kwargs, **model_kwargs
|
|
182
|
+
)
|
|
183
|
+
hub_kwargs["_commit_hash"] = config_obj._commit_hash
|
|
184
|
+
# We only support string as config argument.
|
|
185
|
+
elif config is not None and not isinstance(config, str):
|
|
186
|
+
raise RuntimeError(
|
|
187
|
+
f"Impossible to use non-string config as input for class {self.__class__.__name__}. "
|
|
188
|
+
"Use transformers.Pipeline object if required."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# ==== Start pipeline logic (Task) from transformers ====
|
|
192
|
+
|
|
193
|
+
custom_tasks = {}
|
|
194
|
+
if config_obj is not None and len(getattr(config_obj, "custom_pipelines", {})) > 0:
|
|
195
|
+
custom_tasks = config_obj.custom_pipelines
|
|
196
|
+
if task is None and trust_remote_code is not False:
|
|
197
|
+
if len(custom_tasks) == 1:
|
|
198
|
+
task = list(custom_tasks.keys())[0]
|
|
199
|
+
else:
|
|
200
|
+
raise RuntimeError(
|
|
201
|
+
"We can't infer the task automatically for this model as there are multiple tasks available. "
|
|
202
|
+
f"Pick one in {', '.join(custom_tasks.keys())}"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
if task is None and model is not None:
|
|
206
|
+
task = transformers.pipelines.get_task(model, token_or_secret)
|
|
207
|
+
|
|
208
|
+
# Retrieve the task
|
|
209
|
+
if task in custom_tasks:
|
|
210
|
+
normalized_task = task
|
|
211
|
+
targeted_task, task_options = transformers.pipelines.clean_custom_task(custom_tasks[task])
|
|
212
|
+
if not trust_remote_code:
|
|
213
|
+
raise ValueError(
|
|
214
|
+
"Loading this pipeline requires you to execute the code in the pipeline file in that"
|
|
215
|
+
" repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
|
|
216
|
+
" set the option `trust_remote_code=True` to remove this error."
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
(
|
|
220
|
+
normalized_task,
|
|
221
|
+
targeted_task,
|
|
222
|
+
task_options,
|
|
223
|
+
) = transformers.pipelines.check_task(task)
|
|
224
|
+
|
|
225
|
+
# ==== Start pipeline logic (Model) from transformers ====
|
|
226
|
+
|
|
227
|
+
# Use default model/config/tokenizer for the task if no model is provided
|
|
228
|
+
if model is None:
|
|
229
|
+
# At that point framework might still be undetermined
|
|
230
|
+
(
|
|
231
|
+
model,
|
|
232
|
+
default_revision,
|
|
233
|
+
) = transformers.pipelines.get_default_model_and_revision(targeted_task, framework, task_options)
|
|
234
|
+
revision = revision if revision is not None else default_revision
|
|
235
|
+
warnings.warn(
|
|
236
|
+
f"No model was supplied, defaulted to {model} and revision"
|
|
237
|
+
f" {revision} ({transformers.pipelines.HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n"
|
|
238
|
+
"Using a pipeline without specifying a model name and revision in production is not recommended.",
|
|
239
|
+
stacklevel=2,
|
|
240
|
+
)
|
|
241
|
+
if not can_download_snapshot and config is None and isinstance(model, str):
|
|
242
|
+
config_obj = transformers.AutoConfig.from_pretrained(
|
|
243
|
+
model, _from_pipeline=task, **hub_kwargs, **model_kwargs
|
|
244
|
+
)
|
|
245
|
+
hub_kwargs["_commit_hash"] = config_obj._commit_hash
|
|
246
|
+
|
|
247
|
+
if kwargs.get("device_map", None) is not None:
|
|
248
|
+
if "device_map" in model_kwargs:
|
|
249
|
+
raise ValueError(
|
|
250
|
+
'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
|
|
251
|
+
" arguments might conflict, use only one.)"
|
|
252
|
+
)
|
|
253
|
+
if kwargs.get("device", None) is not None:
|
|
254
|
+
warnings.warn(
|
|
255
|
+
"Both `device` and `device_map` are specified. `device` will override `device_map`. You"
|
|
256
|
+
" will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`.",
|
|
257
|
+
stacklevel=2,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
repo_snapshot_dir: Optional[str] = None
|
|
261
|
+
if can_download_snapshot and not uses_secret:
|
|
262
|
+
try:
|
|
263
|
+
repo_snapshot_dir = hf_hub.snapshot_download(
|
|
264
|
+
repo_id=model,
|
|
265
|
+
revision=revision,
|
|
266
|
+
token=token_or_secret,
|
|
267
|
+
allow_patterns=allow_patterns,
|
|
268
|
+
ignore_patterns=ignore_patterns,
|
|
269
|
+
)
|
|
270
|
+
except ImportError:
|
|
271
|
+
logger.info("huggingface_hub package is not installed, skipping snapshot download")
|
|
272
|
+
|
|
273
|
+
# ==== End pipeline logic from transformers ====
|
|
274
|
+
|
|
275
|
+
self.model = model
|
|
276
|
+
self.task = normalized_task
|
|
277
|
+
self.revision = revision
|
|
278
|
+
self.token_or_secret = token_or_secret
|
|
279
|
+
self.trust_remote_code = trust_remote_code
|
|
280
|
+
self.model_kwargs = model_kwargs
|
|
281
|
+
self.tokenizer = tokenizer
|
|
282
|
+
|
|
283
|
+
self.repo_snapshot_dir = repo_snapshot_dir
|
|
284
|
+
self.compute_pool_for_log = compute_pool_for_log
|
|
285
|
+
self.__dict__.update(kwargs)
|
|
@@ -1,25 +1,19 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import warnings
|
|
3
2
|
from typing import Any, Optional, Union
|
|
4
3
|
|
|
5
|
-
from packaging import version
|
|
6
|
-
|
|
7
4
|
from snowflake import snowpark
|
|
8
5
|
from snowflake.ml._internal import telemetry
|
|
9
6
|
from snowflake.ml._internal.human_readable_id import hrid_generator
|
|
10
7
|
from snowflake.ml._internal.utils import sql_identifier
|
|
11
8
|
from snowflake.ml.model._client.model import inference_engine_utils
|
|
12
9
|
from snowflake.ml.model._client.ops import service_ops
|
|
10
|
+
from snowflake.ml.model.models import huggingface
|
|
13
11
|
from snowflake.snowpark import async_job, session
|
|
14
12
|
|
|
15
13
|
logger = logging.getLogger(__name__)
|
|
16
14
|
|
|
17
15
|
|
|
18
|
-
|
|
19
|
-
_TELEMETRY_SUBPROJECT = "ModelManagement"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class HuggingFacePipelineModel:
|
|
16
|
+
class HuggingFacePipelineModel(huggingface.TransformersPipeline):
|
|
23
17
|
def __init__(
|
|
24
18
|
self,
|
|
25
19
|
task: Optional[str] = None,
|
|
@@ -65,208 +59,25 @@ class HuggingFacePipelineModel:
|
|
|
65
59
|
|
|
66
60
|
Return:
|
|
67
61
|
A wrapper over transformers [`Pipeline`].
|
|
68
|
-
|
|
69
|
-
Raises:
|
|
70
|
-
RuntimeError: Raised when the input argument cannot determine the pipeline.
|
|
71
|
-
ValueError: Raised when the pipeline contains remote code but trust_remote_code is not set or False.
|
|
72
|
-
ValueError: Raised when having conflicting arguments.
|
|
73
62
|
"""
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
except ImportError:
|
|
88
|
-
pass
|
|
89
|
-
|
|
90
|
-
# ==== Start pipeline logic from transformers ====
|
|
91
|
-
if model_kwargs is None:
|
|
92
|
-
model_kwargs = {}
|
|
93
|
-
|
|
94
|
-
use_auth_token = model_kwargs.pop("use_auth_token", None)
|
|
95
|
-
if use_auth_token is not None:
|
|
96
|
-
warnings.warn(
|
|
97
|
-
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.",
|
|
98
|
-
FutureWarning,
|
|
99
|
-
stacklevel=2,
|
|
100
|
-
)
|
|
101
|
-
if token is not None:
|
|
102
|
-
raise ValueError(
|
|
103
|
-
"`token` and `use_auth_token` are both specified. Please set only the argument `token`."
|
|
104
|
-
)
|
|
105
|
-
token = use_auth_token
|
|
106
|
-
|
|
107
|
-
hub_kwargs = {
|
|
108
|
-
"revision": revision,
|
|
109
|
-
"token": token,
|
|
110
|
-
"trust_remote_code": trust_remote_code,
|
|
111
|
-
"_commit_hash": None,
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
# Backward compatibility since HF interface change.
|
|
115
|
-
if version.parse(transformers.__version__) < version.parse("4.32.0"):
|
|
116
|
-
# Backward compatibility since HF interface change.
|
|
117
|
-
hub_kwargs["use_auth_token"] = hub_kwargs["token"]
|
|
118
|
-
del hub_kwargs["token"]
|
|
119
|
-
|
|
120
|
-
if task is None and model is None:
|
|
121
|
-
raise RuntimeError(
|
|
122
|
-
"Impossible to instantiate a pipeline without either a task or a model being specified. "
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
if model is None and tokenizer is not None:
|
|
126
|
-
raise RuntimeError(
|
|
127
|
-
"Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided"
|
|
128
|
-
" tokenizer may not be compatible with the default model. Please provide an identifier to a pretrained"
|
|
129
|
-
" model when providing tokenizer."
|
|
130
|
-
)
|
|
131
|
-
if model is None and feature_extractor is not None:
|
|
132
|
-
raise RuntimeError(
|
|
133
|
-
"Impossible to instantiate a pipeline with feature_extractor specified but not the model as the "
|
|
134
|
-
"provided feature_extractor may not be compatible with the default model. Please provide an identifier"
|
|
135
|
-
" to a pretrained model when providing feature_extractor."
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
# ==== End pipeline logic from transformers ====
|
|
139
|
-
|
|
140
|
-
# We only support string as model argument.
|
|
141
|
-
|
|
142
|
-
if model is not None and not isinstance(model, str):
|
|
143
|
-
raise RuntimeError(
|
|
144
|
-
"Impossible to use non-string model as input for HuggingFacePipelineModel. Use transformers.Pipeline"
|
|
145
|
-
" object if required."
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
# ==== Start pipeline logic (Config) from transformers ====
|
|
149
|
-
|
|
150
|
-
# Config is the primordial information item.
|
|
151
|
-
# Instantiate config if needed
|
|
152
|
-
config_obj = None
|
|
153
|
-
|
|
154
|
-
if not _can_download_snapshot:
|
|
155
|
-
if isinstance(config, str):
|
|
156
|
-
config_obj = transformers.AutoConfig.from_pretrained(
|
|
157
|
-
config, _from_pipeline=task, **hub_kwargs, **model_kwargs
|
|
158
|
-
)
|
|
159
|
-
hub_kwargs["_commit_hash"] = config_obj._commit_hash
|
|
160
|
-
elif config is None and isinstance(model, str):
|
|
161
|
-
config_obj = transformers.AutoConfig.from_pretrained(
|
|
162
|
-
model, _from_pipeline=task, **hub_kwargs, **model_kwargs
|
|
163
|
-
)
|
|
164
|
-
hub_kwargs["_commit_hash"] = config_obj._commit_hash
|
|
165
|
-
# We only support string as config argument.
|
|
166
|
-
elif config is not None and not isinstance(config, str):
|
|
167
|
-
raise RuntimeError(
|
|
168
|
-
"Impossible to use non-string config as input for HuggingFacePipelineModel. "
|
|
169
|
-
"Use transformers.Pipeline object if required."
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
# ==== Start pipeline logic (Task) from transformers ====
|
|
173
|
-
|
|
174
|
-
custom_tasks = {}
|
|
175
|
-
if config_obj is not None and len(getattr(config_obj, "custom_pipelines", {})) > 0:
|
|
176
|
-
custom_tasks = config_obj.custom_pipelines
|
|
177
|
-
if task is None and trust_remote_code is not False:
|
|
178
|
-
if len(custom_tasks) == 1:
|
|
179
|
-
task = list(custom_tasks.keys())[0]
|
|
180
|
-
else:
|
|
181
|
-
raise RuntimeError(
|
|
182
|
-
"We can't infer the task automatically for this model as there are multiple tasks available. "
|
|
183
|
-
f"Pick one in {', '.join(custom_tasks.keys())}"
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
if task is None and model is not None:
|
|
187
|
-
task = transformers.pipelines.get_task(model, token)
|
|
188
|
-
|
|
189
|
-
# Retrieve the task
|
|
190
|
-
if task in custom_tasks:
|
|
191
|
-
normalized_task = task
|
|
192
|
-
targeted_task, task_options = transformers.pipelines.clean_custom_task(custom_tasks[task])
|
|
193
|
-
if not trust_remote_code:
|
|
194
|
-
raise ValueError(
|
|
195
|
-
"Loading this pipeline requires you to execute the code in the pipeline file in that"
|
|
196
|
-
" repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
|
|
197
|
-
" set the option `trust_remote_code=True` to remove this error."
|
|
198
|
-
)
|
|
199
|
-
else:
|
|
200
|
-
(
|
|
201
|
-
normalized_task,
|
|
202
|
-
targeted_task,
|
|
203
|
-
task_options,
|
|
204
|
-
) = transformers.pipelines.check_task(task)
|
|
205
|
-
|
|
206
|
-
# ==== Start pipeline logic (Model) from transformers ====
|
|
207
|
-
|
|
208
|
-
# Use default model/config/tokenizer for the task if no model is provided
|
|
209
|
-
if model is None:
|
|
210
|
-
# At that point framework might still be undetermined
|
|
211
|
-
(
|
|
212
|
-
model,
|
|
213
|
-
default_revision,
|
|
214
|
-
) = transformers.pipelines.get_default_model_and_revision(targeted_task, framework, task_options)
|
|
215
|
-
revision = revision if revision is not None else default_revision
|
|
216
|
-
warnings.warn(
|
|
217
|
-
f"No model was supplied, defaulted to {model} and revision"
|
|
218
|
-
f" {revision} ({transformers.pipelines.HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n"
|
|
219
|
-
"Using a pipeline without specifying a model name and revision in production is not recommended.",
|
|
220
|
-
stacklevel=2,
|
|
221
|
-
)
|
|
222
|
-
if not _can_download_snapshot and config is None and isinstance(model, str):
|
|
223
|
-
config_obj = transformers.AutoConfig.from_pretrained(
|
|
224
|
-
model, _from_pipeline=task, **hub_kwargs, **model_kwargs
|
|
225
|
-
)
|
|
226
|
-
hub_kwargs["_commit_hash"] = config_obj._commit_hash
|
|
227
|
-
|
|
228
|
-
if kwargs.get("device_map", None) is not None:
|
|
229
|
-
if "device_map" in model_kwargs:
|
|
230
|
-
raise ValueError(
|
|
231
|
-
'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
|
|
232
|
-
" arguments might conflict, use only one.)"
|
|
233
|
-
)
|
|
234
|
-
if kwargs.get("device", None) is not None:
|
|
235
|
-
warnings.warn(
|
|
236
|
-
"Both `device` and `device_map` are specified. `device` will override `device_map`. You"
|
|
237
|
-
" will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`.",
|
|
238
|
-
stacklevel=2,
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
repo_snapshot_dir: Optional[str] = None
|
|
242
|
-
if _can_download_snapshot:
|
|
243
|
-
try:
|
|
244
|
-
|
|
245
|
-
repo_snapshot_dir = hf_hub.snapshot_download(
|
|
246
|
-
repo_id=model,
|
|
247
|
-
revision=revision,
|
|
248
|
-
token=token,
|
|
249
|
-
allow_patterns=allow_patterns,
|
|
250
|
-
ignore_patterns=ignore_patterns,
|
|
251
|
-
)
|
|
252
|
-
except ImportError:
|
|
253
|
-
logger.info("huggingface_hub package is not installed, skipping snapshot download")
|
|
254
|
-
|
|
255
|
-
# ==== End pipeline logic from transformers ====
|
|
256
|
-
|
|
257
|
-
self.task = normalized_task
|
|
258
|
-
self.model = model
|
|
259
|
-
self.revision = revision
|
|
63
|
+
logger.warning("HuggingFacePipelineModel is deprecated. Please use TransformersPipeline instead.")
|
|
64
|
+
super().__init__(
|
|
65
|
+
task=task,
|
|
66
|
+
model=model,
|
|
67
|
+
revision=revision,
|
|
68
|
+
token_or_secret=token,
|
|
69
|
+
trust_remote_code=trust_remote_code,
|
|
70
|
+
model_kwargs=model_kwargs,
|
|
71
|
+
compute_pool_for_log=None,
|
|
72
|
+
allow_patterns=allow_patterns,
|
|
73
|
+
ignore_patterns=ignore_patterns,
|
|
74
|
+
**kwargs,
|
|
75
|
+
)
|
|
260
76
|
self.token = token
|
|
261
|
-
self.trust_remote_code = trust_remote_code
|
|
262
|
-
self.model_kwargs = model_kwargs
|
|
263
|
-
self.tokenizer = tokenizer
|
|
264
|
-
self.repo_snapshot_dir = repo_snapshot_dir
|
|
265
|
-
self.__dict__.update(kwargs)
|
|
266
77
|
|
|
267
78
|
@telemetry.send_api_usage_telemetry(
|
|
268
|
-
project=_TELEMETRY_PROJECT,
|
|
269
|
-
subproject=_TELEMETRY_SUBPROJECT,
|
|
79
|
+
project=huggingface._TELEMETRY_PROJECT,
|
|
80
|
+
subproject=huggingface._TELEMETRY_SUBPROJECT,
|
|
270
81
|
func_params_to_log=[
|
|
271
82
|
"service_name",
|
|
272
83
|
"image_build_compute_pool",
|
|
@@ -303,6 +114,7 @@ class HuggingFacePipelineModel:
|
|
|
303
114
|
force_rebuild: bool = False,
|
|
304
115
|
build_external_access_integrations: Optional[list[str]] = None,
|
|
305
116
|
block: bool = True,
|
|
117
|
+
inference_engine_options: Optional[dict[str, Any]] = None,
|
|
306
118
|
experimental_options: Optional[dict[str, Any]] = None,
|
|
307
119
|
) -> Union[str, async_job.AsyncJob]:
|
|
308
120
|
"""Logs a Hugging Face model and creates a service in Snowflake.
|
|
@@ -330,10 +142,8 @@ class HuggingFacePipelineModel:
|
|
|
330
142
|
force_rebuild: Whether to force rebuild the image. Defaults to False.
|
|
331
143
|
build_external_access_integrations: External access integrations for building the image. Defaults to None.
|
|
332
144
|
block: Whether to block the operation. Defaults to True.
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
`inference_engine` is the name of the inference engine to use.
|
|
336
|
-
`inference_engine_args_override` is a list of string arguments to pass to the inference engine.
|
|
145
|
+
inference_engine_options: Options for the service creation with custom inference engine. Defaults to None.
|
|
146
|
+
experimental_options: Experimental options for the service creation. Defaults to None.
|
|
337
147
|
|
|
338
148
|
Raises:
|
|
339
149
|
ValueError: if database and schema name is not provided and session doesn't have a
|
|
@@ -346,8 +156,8 @@ class HuggingFacePipelineModel:
|
|
|
346
156
|
.. # noqa: DAR003
|
|
347
157
|
"""
|
|
348
158
|
statement_params = telemetry.get_statement_params(
|
|
349
|
-
project=_TELEMETRY_PROJECT,
|
|
350
|
-
subproject=_TELEMETRY_SUBPROJECT,
|
|
159
|
+
project=huggingface._TELEMETRY_PROJECT,
|
|
160
|
+
subproject=huggingface._TELEMETRY_SUBPROJECT,
|
|
351
161
|
)
|
|
352
162
|
|
|
353
163
|
database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
|
|
@@ -377,14 +187,14 @@ class HuggingFacePipelineModel:
|
|
|
377
187
|
|
|
378
188
|
# Check if model is HuggingFace text-generation before doing inference engine checks
|
|
379
189
|
inference_engine_args = None
|
|
380
|
-
if
|
|
190
|
+
if inference_engine_options:
|
|
381
191
|
if self.task != "text-generation":
|
|
382
192
|
raise ValueError(
|
|
383
|
-
"Currently, InferenceEngine using
|
|
193
|
+
"Currently, InferenceEngine using inference_engine_options is only supported for "
|
|
384
194
|
"HuggingFace text-generation models."
|
|
385
195
|
)
|
|
386
196
|
|
|
387
|
-
inference_engine_args = inference_engine_utils._get_inference_engine_args(
|
|
197
|
+
inference_engine_args = inference_engine_utils._get_inference_engine_args(inference_engine_options)
|
|
388
198
|
|
|
389
199
|
# Enrich inference engine args if inference engine is specified
|
|
390
200
|
if inference_engine_args is not None:
|
snowflake/ml/model/type_hints.py
CHANGED
|
@@ -13,6 +13,10 @@ from typing import (
|
|
|
13
13
|
import numpy.typing as npt
|
|
14
14
|
from typing_extensions import NotRequired
|
|
15
15
|
|
|
16
|
+
from snowflake.ml.model.compute_pool import (
|
|
17
|
+
DEFAULT_CPU_COMPUTE_POOL,
|
|
18
|
+
DEFAULT_GPU_COMPUTE_POOL,
|
|
19
|
+
)
|
|
16
20
|
from snowflake.ml.model.target_platform import TargetPlatform
|
|
17
21
|
from snowflake.ml.model.task import Task
|
|
18
22
|
from snowflake.ml.model.volatility import Volatility
|
|
@@ -380,4 +384,4 @@ class ProgressStatus(Protocol):
|
|
|
380
384
|
...
|
|
381
385
|
|
|
382
386
|
|
|
383
|
-
__all__ = ["TargetPlatform", "Task"]
|
|
387
|
+
__all__ = ["TargetPlatform", "Task", "DEFAULT_CPU_COMPUTE_POOL", "DEFAULT_GPU_COMPUTE_POOL"]
|
|
@@ -92,6 +92,9 @@ class ModelMonitorSQLClient:
|
|
|
92
92
|
baseline: Optional[sql_identifier.SqlIdentifier] = None,
|
|
93
93
|
segment_columns: Optional[list[sql_identifier.SqlIdentifier]] = None,
|
|
94
94
|
custom_metric_columns: Optional[list[sql_identifier.SqlIdentifier]] = None,
|
|
95
|
+
timestamp_custom_metric_database: Optional[sql_identifier.SqlIdentifier] = None,
|
|
96
|
+
timestamp_custom_metric_schema: Optional[sql_identifier.SqlIdentifier] = None,
|
|
97
|
+
timestamp_custom_metric_table: Optional[sql_identifier.SqlIdentifier] = None,
|
|
95
98
|
statement_params: Optional[dict[str, Any]] = None,
|
|
96
99
|
) -> None:
|
|
97
100
|
baseline_sql = ""
|
|
@@ -106,6 +109,14 @@ class ModelMonitorSQLClient:
|
|
|
106
109
|
if custom_metric_columns:
|
|
107
110
|
custom_metric_columns_sql = f"CUSTOM_METRIC_COLUMNS={_build_sql_list_from_columns(custom_metric_columns)}"
|
|
108
111
|
|
|
112
|
+
timestamp_custom_metric_table_sql = ""
|
|
113
|
+
if timestamp_custom_metric_table:
|
|
114
|
+
timestamp_custom_metric_table_sql = (
|
|
115
|
+
f"TIMESTAMP_CUSTOM_METRIC_TABLE="
|
|
116
|
+
f"{self._infer_qualified_schema(timestamp_custom_metric_database, timestamp_custom_metric_schema)}."
|
|
117
|
+
f"{timestamp_custom_metric_table}"
|
|
118
|
+
)
|
|
119
|
+
|
|
109
120
|
query_result_checker.SqlResultValidator(
|
|
110
121
|
self._sql_client._session,
|
|
111
122
|
f"""
|
|
@@ -126,6 +137,7 @@ class ModelMonitorSQLClient:
|
|
|
126
137
|
AGGREGATION_WINDOW='{aggregation_window}'
|
|
127
138
|
{segment_columns_sql}
|
|
128
139
|
{custom_metric_columns_sql}
|
|
140
|
+
{timestamp_custom_metric_table_sql}
|
|
129
141
|
{baseline_sql}""",
|
|
130
142
|
statement_params=statement_params,
|
|
131
143
|
).has_column("status").has_dimensions(1, 1).validate()
|
|
@@ -100,6 +100,15 @@ class ModelMonitorManager:
|
|
|
100
100
|
if source_config.baseline
|
|
101
101
|
else (None, None, None)
|
|
102
102
|
)
|
|
103
|
+
(
|
|
104
|
+
timestamp_custom_metric_database_name_id,
|
|
105
|
+
timestamp_custom_metric_schema_name_id,
|
|
106
|
+
timestamp_custom_metric_table_name_id,
|
|
107
|
+
) = (
|
|
108
|
+
sql_identifier.parse_fully_qualified_name(source_config.timestamp_custom_metric_table)
|
|
109
|
+
if source_config.timestamp_custom_metric_table
|
|
110
|
+
else (None, None, None)
|
|
111
|
+
)
|
|
103
112
|
model_database_name_id, model_schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(
|
|
104
113
|
model_monitor_config.model_version.fully_qualified_model_name
|
|
105
114
|
)
|
|
@@ -155,6 +164,9 @@ class ModelMonitorManager:
|
|
|
155
164
|
baseline_database=baseline_database_name_id,
|
|
156
165
|
baseline_schema=baseline_schema_name_id,
|
|
157
166
|
baseline=baseline_name_id,
|
|
167
|
+
timestamp_custom_metric_database=timestamp_custom_metric_database_name_id,
|
|
168
|
+
timestamp_custom_metric_schema=timestamp_custom_metric_schema_name_id,
|
|
169
|
+
timestamp_custom_metric_table=timestamp_custom_metric_table_name_id,
|
|
158
170
|
statement_params=self.statement_params,
|
|
159
171
|
)
|
|
160
172
|
return model_monitor.ModelMonitor._ref(
|