snowflake-ml-python 1.7.4__py3-none-any.whl → 1.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +64 -21
- snowflake/ml/_internal/relax_version_strategy.py +16 -0
- snowflake/ml/_internal/telemetry.py +21 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +1 -1
- snowflake/ml/feature_store/feature_store.py +18 -0
- snowflake/ml/feature_store/feature_view.py +46 -1
- snowflake/ml/jobs/_utils/constants.py +7 -1
- snowflake/ml/jobs/_utils/payload_utils.py +139 -53
- snowflake/ml/jobs/_utils/spec_utils.py +5 -7
- snowflake/ml/jobs/decorators.py +5 -25
- snowflake/ml/jobs/job.py +4 -4
- snowflake/ml/model/_packager/model_env/model_env.py +45 -28
- snowflake/ml/model/_packager/model_handlers/_utils.py +8 -4
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +16 -0
- snowflake/ml/model/_packager/model_handlers/keras.py +230 -0
- snowflake/ml/model/_packager/model_handlers/pytorch.py +1 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +28 -3
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +74 -21
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +27 -49
- snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2023_12_01.py +48 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +3 -0
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -1
- snowflake/ml/model/_packager/model_task/model_task_utils.py +5 -1
- snowflake/ml/model/_signatures/core.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +5 -5
- snowflake/ml/model/_signatures/pandas_handler.py +9 -7
- snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
- snowflake/ml/model/model_signature.py +8 -0
- snowflake/ml/model/type_hints.py +15 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +14 -1
- snowflake/ml/modeling/pipeline/pipeline.py +18 -1
- snowflake/ml/modeling/preprocessing/polynomial_features.py +2 -2
- snowflake/ml/registry/registry.py +34 -4
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.7.5.dist-info}/METADATA +58 -25
- {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.7.5.dist-info}/RECORD +41 -38
- {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.7.5.dist-info}/WHEEL +1 -1
- {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.7.5.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.7.5.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,11 @@ def get_task_skl(model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pi
|
|
24
24
|
from sklearn.base import is_classifier, is_regressor
|
25
25
|
|
26
26
|
if type_utils.LazyType("sklearn.pipeline.Pipeline").isinstance(model):
|
27
|
-
|
27
|
+
if hasattr(model, "predict_proba") or hasattr(model, "predict"):
|
28
|
+
model = model.steps[-1][1] # type: ignore[attr-defined]
|
29
|
+
return _get_model_task(model)
|
30
|
+
else:
|
31
|
+
return type_hints.Task.UNKNOWN
|
28
32
|
if is_regressor(model):
|
29
33
|
return type_hints.Task.TABULAR_REGRESSION
|
30
34
|
if is_classifier(model):
|
@@ -282,7 +282,7 @@ class FeatureSpec(BaseFeatureSpec):
|
|
282
282
|
result_type = spt.ArrayType(result_type)
|
283
283
|
return result_type
|
284
284
|
|
285
|
-
def as_dtype(self) -> Union[npt.DTypeLike, str, PandasExtensionTypes]:
|
285
|
+
def as_dtype(self, force_numpy_dtype: bool = False) -> Union[npt.DTypeLike, str, PandasExtensionTypes]:
|
286
286
|
"""Convert to corresponding local Type."""
|
287
287
|
|
288
288
|
if not self._shape:
|
@@ -291,7 +291,7 @@ class FeatureSpec(BaseFeatureSpec):
|
|
291
291
|
return self._dtype._value
|
292
292
|
|
293
293
|
np_type = self._dtype._numpy_type
|
294
|
-
if self._nullable:
|
294
|
+
if self._nullable and not force_numpy_dtype:
|
295
295
|
np_to_pd_dtype_mapping = {
|
296
296
|
np.int8: pd.Int8Dtype(),
|
297
297
|
np.int16: pd.Int16Dtype(),
|
@@ -50,7 +50,7 @@ class NumpyArrayHandler(base_handler.BaseDataHandler[model_types._SupportedNumpy
|
|
50
50
|
dtype = core.DataType.from_numpy_type(data.dtype)
|
51
51
|
role_prefix = (NumpyArrayHandler.INPUT_PREFIX if role == "input" else NumpyArrayHandler.OUTPUT_PREFIX) + "_"
|
52
52
|
if len(data.shape) == 1:
|
53
|
-
return [core.FeatureSpec(dtype=dtype, name=f"{role_prefix}{feature_prefix}0")]
|
53
|
+
return [core.FeatureSpec(dtype=dtype, name=f"{role_prefix}{feature_prefix}0", nullable=False)]
|
54
54
|
else:
|
55
55
|
# For high-dimension array, 0-axis is for batch, 1-axis is for column, further more is details of columns.
|
56
56
|
features = []
|
@@ -59,9 +59,9 @@ class NumpyArrayHandler(base_handler.BaseDataHandler[model_types._SupportedNumpy
|
|
59
59
|
for col_data, ft_name in zip(data[0], ft_names):
|
60
60
|
if isinstance(col_data, np.ndarray):
|
61
61
|
ft_shape = np.shape(col_data)
|
62
|
-
features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape))
|
62
|
+
features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape, nullable=False))
|
63
63
|
else:
|
64
|
-
features.append(core.FeatureSpec(dtype=dtype, name=ft_name))
|
64
|
+
features.append(core.FeatureSpec(dtype=dtype, name=ft_name, nullable=False))
|
65
65
|
return features
|
66
66
|
|
67
67
|
@staticmethod
|
@@ -118,10 +118,10 @@ class SeqOfNumpyArrayHandler(base_handler.BaseDataHandler[Sequence[model_types._
|
|
118
118
|
dtype = core.DataType.from_numpy_type(data_col.dtype)
|
119
119
|
ft_name = f"{role_prefix}{feature_prefix}{i}"
|
120
120
|
if len(data_col.shape) == 1:
|
121
|
-
features.append(core.FeatureSpec(dtype=dtype, name=ft_name))
|
121
|
+
features.append(core.FeatureSpec(dtype=dtype, name=ft_name, nullable=False))
|
122
122
|
else:
|
123
123
|
ft_shape = tuple(data_col.shape[1:])
|
124
|
-
features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape))
|
124
|
+
features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape, nullable=False))
|
125
125
|
return features
|
126
126
|
|
127
127
|
@staticmethod
|
@@ -72,13 +72,6 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
|
|
72
72
|
df_col_dtypes = [data[col].dtype for col in data.columns]
|
73
73
|
for df_col, df_col_dtype in zip(df_cols, df_col_dtypes):
|
74
74
|
df_col_data = data[df_col]
|
75
|
-
if df_col_data.isnull().all():
|
76
|
-
raise snowml_exceptions.SnowflakeMLException(
|
77
|
-
error_code=error_codes.INVALID_DATA,
|
78
|
-
original_exception=ValueError(
|
79
|
-
f"Data Validation Error: There is no non-null data in column {df_col}."
|
80
|
-
),
|
81
|
-
)
|
82
75
|
if df_col_data.isnull().any():
|
83
76
|
warnings.warn(
|
84
77
|
(
|
@@ -163,6 +156,15 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
|
|
163
156
|
specs = []
|
164
157
|
for df_col, df_col_dtype, ft_name in zip(df_cols, df_col_dtypes, ft_names):
|
165
158
|
df_col_data = data[df_col]
|
159
|
+
|
160
|
+
if df_col_data.isnull().all():
|
161
|
+
raise snowml_exceptions.SnowflakeMLException(
|
162
|
+
error_code=error_codes.INVALID_DATA,
|
163
|
+
original_exception=ValueError(
|
164
|
+
"Data Validation Error: "
|
165
|
+
f"There is no non-null data in column {df_col} so the signature cannot be inferred."
|
166
|
+
),
|
167
|
+
)
|
166
168
|
if df_col_data.isnull().any():
|
167
169
|
df_col_data = utils.series_dropna(df_col_data)
|
168
170
|
df_col_dtype = df_col_data.dtype
|
@@ -30,7 +30,7 @@ class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Ten
|
|
30
30
|
|
31
31
|
@staticmethod
|
32
32
|
def count(data: Sequence["torch.Tensor"]) -> int:
|
33
|
-
return min(data_col.shape[0] for data_col in data)
|
33
|
+
return min(data_col.shape[0] for data_col in data)
|
34
34
|
|
35
35
|
@staticmethod
|
36
36
|
def truncate(data: Sequence["torch.Tensor"], length: int) -> Sequence["torch.Tensor"]:
|
@@ -21,6 +21,7 @@ from typing_extensions import Never
|
|
21
21
|
import snowflake.snowpark
|
22
22
|
import snowflake.snowpark.functions as F
|
23
23
|
import snowflake.snowpark.types as spt
|
24
|
+
from snowflake.ml._internal import telemetry
|
24
25
|
from snowflake.ml._internal.exceptions import (
|
25
26
|
error_codes,
|
26
27
|
exceptions as snowml_exceptions,
|
@@ -56,6 +57,9 @@ _LOCAL_DATA_HANDLERS: List[Type[base_handler.BaseDataHandler[Any]]] = [
|
|
56
57
|
]
|
57
58
|
_ALL_DATA_HANDLERS = _LOCAL_DATA_HANDLERS + [snowpark_handler.SnowparkDataFrameHandler]
|
58
59
|
|
60
|
+
_TELEMETRY_PROJECT = "MLOps"
|
61
|
+
_MODEL_TELEMETRY_SUBPROJECT = "ModelSignature"
|
62
|
+
|
59
63
|
|
60
64
|
def _truncate_data(
|
61
65
|
data: model_types.SupportedDataType,
|
@@ -687,6 +691,10 @@ def _convert_and_validate_local_data(
|
|
687
691
|
return df
|
688
692
|
|
689
693
|
|
694
|
+
@telemetry.send_api_usage_telemetry(
|
695
|
+
project=_TELEMETRY_PROJECT,
|
696
|
+
subproject=_MODEL_TELEMETRY_SUBPROJECT,
|
697
|
+
)
|
690
698
|
def infer_signature(
|
691
699
|
input_data: model_types.SupportedLocalDataType,
|
692
700
|
output_data: model_types.SupportedLocalDataType,
|
snowflake/ml/model/type_hints.py
CHANGED
@@ -7,6 +7,7 @@ from typing_extensions import NotRequired
|
|
7
7
|
|
8
8
|
if TYPE_CHECKING:
|
9
9
|
import catboost
|
10
|
+
import keras
|
10
11
|
import lightgbm
|
11
12
|
import mlflow
|
12
13
|
import numpy as np
|
@@ -68,6 +69,7 @@ SupportedRequireSignatureModelType = Union[
|
|
68
69
|
"torch.nn.Module",
|
69
70
|
"torch.jit.ScriptModule",
|
70
71
|
"tensorflow.Module",
|
72
|
+
"keras.Model",
|
71
73
|
]
|
72
74
|
|
73
75
|
SupportedNoSignatureRequirementsModelType = Union[
|
@@ -103,6 +105,7 @@ Here is all acceptable types of Snowflake native model packaging and its handler
|
|
103
105
|
| transformers.Pipeline | huggingface_pipeline.py | _HuggingFacePipelineHandler |
|
104
106
|
| huggingface_pipeline.HuggingFacePipelineModel | huggingface_pipeline.py | _HuggingFacePipelineHandler |
|
105
107
|
| sentence_transformers.SentenceTransformer | sentence_transformers.py | _SentenceTransformerHandler |
|
108
|
+
| keras.Model | keras.py | _KerasHandler |
|
106
109
|
"""
|
107
110
|
|
108
111
|
SupportedModelHandlerType = Literal[
|
@@ -118,6 +121,7 @@ SupportedModelHandlerType = Literal[
|
|
118
121
|
"tensorflow",
|
119
122
|
"torchscript",
|
120
123
|
"xgboost",
|
124
|
+
"keras",
|
121
125
|
]
|
122
126
|
|
123
127
|
_ModelType = TypeVar("_ModelType", bound=SupportedModelType)
|
@@ -202,6 +206,11 @@ class SentenceTransformersSaveOptions(BaseModelSaveOption):
|
|
202
206
|
batch_size: NotRequired[int]
|
203
207
|
|
204
208
|
|
209
|
+
class KerasSaveOptions(BaseModelSaveOption):
|
210
|
+
target_methods: NotRequired[Sequence[str]]
|
211
|
+
cuda_version: NotRequired[str]
|
212
|
+
|
213
|
+
|
205
214
|
ModelSaveOption = Union[
|
206
215
|
BaseModelSaveOption,
|
207
216
|
CatBoostModelSaveOptions,
|
@@ -216,6 +225,7 @@ ModelSaveOption = Union[
|
|
216
225
|
MLFlowSaveOptions,
|
217
226
|
HuggingFaceSaveOptions,
|
218
227
|
SentenceTransformersSaveOptions,
|
228
|
+
KerasSaveOptions,
|
219
229
|
]
|
220
230
|
|
221
231
|
|
@@ -276,6 +286,10 @@ class SentenceTransformersLoadOptions(BaseModelLoadOption):
|
|
276
286
|
device: NotRequired[str]
|
277
287
|
|
278
288
|
|
289
|
+
class KerasLoadOptions(BaseModelLoadOption):
|
290
|
+
use_gpu: NotRequired[bool]
|
291
|
+
|
292
|
+
|
279
293
|
ModelLoadOption = Union[
|
280
294
|
BaseModelLoadOption,
|
281
295
|
CatBoostModelLoadOptions,
|
@@ -290,6 +304,7 @@ ModelLoadOption = Union[
|
|
290
304
|
MLFlowLoadOptions,
|
291
305
|
HuggingFaceLoadOptions,
|
292
306
|
SentenceTransformersLoadOptions,
|
307
|
+
KerasLoadOptions,
|
293
308
|
]
|
294
309
|
|
295
310
|
|
@@ -199,8 +199,21 @@ class SnowparkTransformHandlers:
|
|
199
199
|
if expected_output_cols_type == "":
|
200
200
|
expected_output_cols_type = "string"
|
201
201
|
assert expected_output_cols_type is not None
|
202
|
+
|
203
|
+
# If there is only one output column, the UDF might have generate complex objects (lists, dicts).
|
204
|
+
# In such cases, we attempt to not do explicit cast. (Example: PolynomialFeatures.transform)
|
205
|
+
try_parse_object = len(expected_output_cols) == 1 and expected_output_cols_type != "string"
|
202
206
|
for output_feature in expected_output_cols:
|
203
|
-
|
207
|
+
column_expr = F.col(INTERMEDIATE_OBJ_NAME)[output_feature]
|
208
|
+
|
209
|
+
if try_parse_object and df_res.count() > 0:
|
210
|
+
# Only do type casting if it's not an array
|
211
|
+
if not df_res.select(F.is_array(column_expr)).first()[0]:
|
212
|
+
column_expr = column_expr.astype(expected_output_cols_type)
|
213
|
+
else:
|
214
|
+
column_expr = column_expr.astype(expected_output_cols_type)
|
215
|
+
|
216
|
+
output_cols.append(column_expr)
|
204
217
|
output_col_names.append(identifier.get_inferred_name(output_feature))
|
205
218
|
|
206
219
|
# Extract output from INTERMEDIATE_OBJ_NAME and drop that column
|
@@ -854,6 +854,7 @@ class Pipeline(base.BaseTransformer):
|
|
854
854
|
# Create a fitted sklearn pipeline object by translating each non-estimator step in pipeline with with
|
855
855
|
# a fitted column transformer.
|
856
856
|
sksteps = []
|
857
|
+
i = 0
|
857
858
|
for i, (name, trans) in enumerate(self._get_transformers()):
|
858
859
|
if isinstance(trans, base.BaseTransformer):
|
859
860
|
trans = self._construct_fitted_column_transformer_object(
|
@@ -899,7 +900,23 @@ class Pipeline(base.BaseTransformer):
|
|
899
900
|
if estimator_step:
|
900
901
|
estimator_signatures = estimator_step[1].model_signatures
|
901
902
|
for method, signature in estimator_signatures.items():
|
902
|
-
|
903
|
+
# Add the inferred input signature to the model signature dictionary for each method
|
904
|
+
self._model_signature_dict[method] = ModelSignature(
|
905
|
+
inputs=inputs_signature,
|
906
|
+
outputs=(
|
907
|
+
# If _drop_input_cols is True, do not include any input columns in the output signature
|
908
|
+
[]
|
909
|
+
if self._drop_input_cols
|
910
|
+
else [
|
911
|
+
# Include input columns in the output signature if they are not already present
|
912
|
+
# Those already present means they are overwritten by the output of the estimator
|
913
|
+
spec
|
914
|
+
for spec in inputs_signature
|
915
|
+
if spec.name not in [_spec.name for _spec in signature.outputs]
|
916
|
+
]
|
917
|
+
)
|
918
|
+
+ signature.outputs, # Append the existing output signature
|
919
|
+
)
|
903
920
|
|
904
921
|
@property
|
905
922
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -337,7 +337,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
337
337
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
338
338
|
|
339
339
|
if isinstance(dataset, DataFrame):
|
340
|
-
expected_type_inferred = ""
|
340
|
+
expected_type_inferred = "float"
|
341
341
|
# when it is classifier, infer the datatype from label columns
|
342
342
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
343
343
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
@@ -415,7 +415,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
415
415
|
# are specific to the type of dataset used.
|
416
416
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
417
417
|
if isinstance(dataset, DataFrame):
|
418
|
-
expected_dtype = ""
|
418
|
+
expected_dtype = "float"
|
419
419
|
if False: # is child of _BaseHeterogeneousEnsemble
|
420
420
|
# transform() method of HeterogeneousEnsemble estimators return responses of varying shapes
|
421
421
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
@@ -78,7 +78,7 @@ class Registry:
|
|
78
78
|
session, database_name=self._database_name, schema_name=self._schema_name
|
79
79
|
)
|
80
80
|
|
81
|
-
self.enable_monitoring = options.get("enable_monitoring",
|
81
|
+
self.enable_monitoring = options.get("enable_monitoring", True) if options else True
|
82
82
|
if self.enable_monitoring:
|
83
83
|
monitor_statement_params = telemetry.get_statement_params(
|
84
84
|
project=telemetry.TelemetryProject.MLOPS.value,
|
@@ -162,8 +162,12 @@ class Registry:
|
|
162
162
|
- relax_version: Whether to relax the version constraints of the dependencies when running in the
|
163
163
|
Warehouse. It detects any ==x.y.z in specifiers and replaced with >=x.y, <(x+1). Defaults to True.
|
164
164
|
- function_type: Set the method function type globally. To set method function types individually see
|
165
|
-
|
166
|
-
- method_options: Per-method saving options
|
165
|
+
function_type in model_options.
|
166
|
+
- method_options: Per-method saving options. This dictionary has method names as keys and dictionary
|
167
|
+
values with the desired options.
|
168
|
+
|
169
|
+
The following are the available method options:
|
170
|
+
|
167
171
|
- case_sensitive: Indicates whether the method and its signature should be case sensitive.
|
168
172
|
This means when you refer the method in the SQL, you need to double quote it.
|
169
173
|
This will be helpful if you need case to tell apart your methods or features, or you have
|
@@ -283,7 +287,11 @@ class Registry:
|
|
283
287
|
Warehouse. It detects any ==x.y.z in specifiers and replaced with >=x.y, <(x+1). Defaults to True.
|
284
288
|
- function_type: Set the method function type globally. To set method function types individually see
|
285
289
|
function_type in model_options.
|
286
|
-
- method_options: Per-method saving options
|
290
|
+
- method_options: Per-method saving options. This dictionary has method names as keys and dictionary
|
291
|
+
values with the desired options. See the example below.
|
292
|
+
|
293
|
+
The following are the available method options:
|
294
|
+
|
287
295
|
- case_sensitive: Indicates whether the method and its signature should be case sensitive.
|
288
296
|
This means when you refer the method in the SQL, you need to double quote it.
|
289
297
|
This will be helpful if you need case to tell apart your methods or features, or you have
|
@@ -294,6 +302,28 @@ class Registry:
|
|
294
302
|
|
295
303
|
Returns:
|
296
304
|
ModelVersion: ModelVersion object corresponding to the model just logged.
|
305
|
+
|
306
|
+
Example::
|
307
|
+
|
308
|
+
from snowflake.ml.registry import Registry
|
309
|
+
|
310
|
+
# create a session
|
311
|
+
session = ...
|
312
|
+
|
313
|
+
registry = Registry(session=session)
|
314
|
+
|
315
|
+
# Define `method_options` for each inference method if needed.
|
316
|
+
method_options={
|
317
|
+
"predict": {
|
318
|
+
"case_sensitive": True
|
319
|
+
}
|
320
|
+
}
|
321
|
+
|
322
|
+
registry.log_model(
|
323
|
+
model=model,
|
324
|
+
model_name="my_model",
|
325
|
+
method_options=method_options,
|
326
|
+
)
|
297
327
|
"""
|
298
328
|
statement_params = telemetry.get_statement_params(
|
299
329
|
project=_TELEMETRY_PROJECT,
|
snowflake/ml/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION="1.7.
|
1
|
+
VERSION="1.7.5"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: snowflake-ml-python
|
3
|
-
Version: 1.7.
|
3
|
+
Version: 1.7.5
|
4
4
|
Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
|
5
5
|
Author-email: "Snowflake, Inc" <support@snowflake.com>
|
6
6
|
License:
|
@@ -223,13 +223,14 @@ Classifier: Operating System :: OS Independent
|
|
223
223
|
Classifier: Programming Language :: Python :: 3.9
|
224
224
|
Classifier: Programming Language :: Python :: 3.10
|
225
225
|
Classifier: Programming Language :: Python :: 3.11
|
226
|
+
Classifier: Programming Language :: Python :: 3.12
|
226
227
|
Classifier: Topic :: Database
|
227
228
|
Classifier: Topic :: Software Development
|
228
229
|
Classifier: Topic :: Software Development :: Libraries
|
229
230
|
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
230
231
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
231
232
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
232
|
-
Requires-Python: <3.
|
233
|
+
Requires-Python: <3.13,>=3.9
|
233
234
|
Description-Content-Type: text/markdown
|
234
235
|
License-File: LICENSE.txt
|
235
236
|
Requires-Dist: absl-py<2,>=0.15
|
@@ -257,39 +258,40 @@ Requires-Dist: typing-extensions<5,>=4.1.0
|
|
257
258
|
Requires-Dist: xgboost<3,>=1.7.3
|
258
259
|
Provides-Extra: all
|
259
260
|
Requires-Dist: catboost<2,>=1.2.0; extra == "all"
|
260
|
-
Requires-Dist:
|
261
|
+
Requires-Dist: keras<4,>=2.0.0; extra == "all"
|
261
262
|
Requires-Dist: lightgbm<5,>=4.1.0; extra == "all"
|
262
263
|
Requires-Dist: mlflow<3,>=2.16.0; extra == "all"
|
263
|
-
Requires-Dist:
|
264
|
-
Requires-Dist:
|
265
|
-
Requires-Dist: sentencepiece<1,>=0.1.95; extra == "all"
|
264
|
+
Requires-Dist: sentence-transformers<3,>=2.7.0; extra == "all"
|
265
|
+
Requires-Dist: sentencepiece<0.2.0,>=0.1.95; extra == "all"
|
266
266
|
Requires-Dist: shap<1,>=0.46.0; extra == "all"
|
267
|
-
Requires-Dist: tensorflow<3,>=2.
|
268
|
-
Requires-Dist: tokenizers<1,>=0.
|
269
|
-
Requires-Dist: torch<
|
267
|
+
Requires-Dist: tensorflow<3,>=2.17.0; extra == "all"
|
268
|
+
Requires-Dist: tokenizers<1,>=0.15.1; extra == "all"
|
269
|
+
Requires-Dist: torch<3,>=2.0.1; extra == "all"
|
270
270
|
Requires-Dist: torchdata<1,>=0.4; extra == "all"
|
271
|
-
Requires-Dist: transformers<5,>=4.
|
271
|
+
Requires-Dist: transformers<5,>=4.37.2; extra == "all"
|
272
272
|
Provides-Extra: catboost
|
273
273
|
Requires-Dist: catboost<2,>=1.2.0; extra == "catboost"
|
274
|
+
Provides-Extra: keras
|
275
|
+
Requires-Dist: keras<4,>=2.0.0; extra == "keras"
|
276
|
+
Requires-Dist: tensorflow<3,>=2.17.0; extra == "keras"
|
277
|
+
Requires-Dist: torch<3,>=2.0.1; extra == "keras"
|
274
278
|
Provides-Extra: lightgbm
|
275
279
|
Requires-Dist: lightgbm<5,>=4.1.0; extra == "lightgbm"
|
276
|
-
Provides-Extra: llm
|
277
|
-
Requires-Dist: peft<1,>=0.5.0; extra == "llm"
|
278
280
|
Provides-Extra: mlflow
|
279
281
|
Requires-Dist: mlflow<3,>=2.16.0; extra == "mlflow"
|
280
282
|
Provides-Extra: shap
|
281
283
|
Requires-Dist: shap<1,>=0.46.0; extra == "shap"
|
282
284
|
Provides-Extra: tensorflow
|
283
|
-
Requires-Dist: tensorflow<3,>=2.
|
285
|
+
Requires-Dist: tensorflow<3,>=2.17.0; extra == "tensorflow"
|
284
286
|
Provides-Extra: torch
|
285
|
-
Requires-Dist: torch<
|
287
|
+
Requires-Dist: torch<3,>=2.0.1; extra == "torch"
|
286
288
|
Requires-Dist: torchdata<1,>=0.4; extra == "torch"
|
287
289
|
Provides-Extra: transformers
|
288
|
-
Requires-Dist:
|
289
|
-
Requires-Dist:
|
290
|
-
Requires-Dist:
|
291
|
-
Requires-Dist:
|
292
|
-
Requires-Dist: transformers<5,>=4.
|
290
|
+
Requires-Dist: sentence-transformers<3,>=2.7.0; extra == "transformers"
|
291
|
+
Requires-Dist: sentencepiece<0.2.0,>=0.1.95; extra == "transformers"
|
292
|
+
Requires-Dist: tokenizers<1,>=0.15.1; extra == "transformers"
|
293
|
+
Requires-Dist: torch<3,>=2.0.1; extra == "transformers"
|
294
|
+
Requires-Dist: transformers<5,>=4.37.2; extra == "transformers"
|
293
295
|
|
294
296
|
# Snowpark ML
|
295
297
|
|
@@ -346,7 +348,7 @@ If you don't have a Snowflake account yet, you can [sign up for a 30-day free tr
|
|
346
348
|
Follow the [installation instructions](https://docs.snowflake.com/en/developer-guide/snowpark-ml/index#installing-snowpark-ml)
|
347
349
|
in the Snowflake documentation.
|
348
350
|
|
349
|
-
Python versions 3.9 to 3.
|
351
|
+
Python versions 3.9 to 3.12 are supported. You can use [miniconda](https://docs.conda.io/en/latest/miniconda.html) or
|
350
352
|
[anaconda](https://www.anaconda.com/) to create a Conda environment (recommended),
|
351
353
|
or [virtualenv](https://docs.python.org/3/tutorial/venv.html) to create a virtual environment.
|
352
354
|
|
@@ -399,28 +401,54 @@ NOTE: Version 1.7.0 is used as example here. Please choose the the latest versio
|
|
399
401
|
|
400
402
|
# Release History
|
401
403
|
|
402
|
-
## 1.7.
|
404
|
+
## 1.7.5
|
405
|
+
|
406
|
+
- Support Python 3.12.
|
407
|
+
- Explainability: Support native and snowml sklearn pipeline
|
408
|
+
|
409
|
+
### Bug Fixes
|
410
|
+
|
411
|
+
- Registry: Fixed a compatibility issue when using `snowflake-ml-python` 1.7.0 or greater to save a `tensorflow.keras`
|
412
|
+
model with `keras` 2.x, if `relax_version` is set or default to True, and newer version of `snowflake-ml-python`
|
413
|
+
is available in Snowflake Anaconda Channel, model could not be run in Snowflake. If you have such model, you could
|
414
|
+
use the latest version of `snowflake-ml-python` and call `ModelVersion.load` to load it back, and re-log it.
|
415
|
+
Alternatively, you can prevent this issue by setting `relax_version=False` when saving the model.
|
416
|
+
- Registry: Removed the validation that disallows data that does not have non-null values being passed to
|
417
|
+
`ModelVersion.run`.
|
418
|
+
- ML Job (PrPr): No longer require CREATE STAGE privilege if `stage_name` points to an existing stage
|
419
|
+
- ML Job (PrPr): Fixed a bug causing some payload source and entrypoint path
|
420
|
+
combinations to be erroneously rejected with
|
421
|
+
`ValueError(f"{self.entrypoint} must be a subpath of {self.source}")`
|
422
|
+
- ML Job (PrPr): Fixed a bug in Ray cluster startup config which caused certain Runtime APIs to fail
|
423
|
+
|
424
|
+
### Behavior Change
|
425
|
+
|
426
|
+
### New Features
|
427
|
+
|
428
|
+
- Registry: Added support for handling Hugging Face model configurations with auto-mapping functionality.
|
429
|
+
- Registry: Added support for `keras` 3.x model with `tensorflow` and `pytorch` backend
|
430
|
+
- ML Job (PrPr): Support any serializable (pickleable) argument for `@remote` decorated functions
|
431
|
+
|
432
|
+
## 1.7.4 (01-28-2025)
|
403
433
|
|
404
434
|
- FileSet: The `snowflake.ml.fileset.FileSet` has been deprecated and will be removed in a future version.
|
405
435
|
Use [snowflake.ml.dataset.Dataset](https://docs.snowflake.com/en/developer-guide/snowflake-ml/dataset) and
|
406
436
|
[snowflake.ml.data.DataConnector](https://docs.snowflake.com/en/developer-guide/snowpark-ml/reference/latest/api/data/snowflake.ml.data.data_connector.DataConnector)
|
407
437
|
instead.
|
438
|
+
- Registry: `ModelVersion.run` on a service would require redeploying the service once account opts into nested function.
|
408
439
|
|
409
440
|
### Bug Fixes
|
410
441
|
|
411
442
|
- Registry: Fixed an issue that the hugging face pipeline is loaded using incorrect dtype.
|
412
443
|
- Registry: Fixed an issue that only 1 row is used when infer the model signature in the modeling model.
|
413
444
|
|
414
|
-
### Behavior Changes
|
415
|
-
|
416
|
-
- Registry: `ModelVersion.run` on a service would require redeploying the service once account opts into nested function.
|
417
|
-
|
418
445
|
### New Features
|
419
446
|
|
420
447
|
- Add new `snowflake.ml.jobs` preview API for running headless workloads on SPCS using
|
421
448
|
[Container Runtime for ML](https://docs.snowflake.com/en/developer-guide/snowflake-ml/container-runtime-ml)
|
422
449
|
- Added `guardrails` option to Cortex `complete` function, enabling
|
423
450
|
[Cortex Guard](https://docs.snowflake.com/en/user-guide/snowflake-cortex/llm-functions#cortex-guard) support
|
451
|
+
- Model Monitoring: Expose Model Monitoring Python API by default.
|
424
452
|
|
425
453
|
## 1.7.3 (2025-01-08)
|
426
454
|
|
@@ -428,6 +456,7 @@ NOTE: Version 1.7.0 is used as example here. Please choose the the latest versio
|
|
428
456
|
- Bumped the requirements of `fsspec` and `s3fs` to `>=2024.6.1,<2026`
|
429
457
|
- Bumped the requirement of `mlflow` to `>=2.16.0, <3`
|
430
458
|
- Registry: Support 500+ features for model registry
|
459
|
+
- Feature Store: Add support for `cluster_by` for feature views.
|
431
460
|
|
432
461
|
### Bug Fixes
|
433
462
|
|
@@ -545,6 +574,10 @@ class ExamplePipelineModel(custom_model.CustomModel):
|
|
545
574
|
- Data Connector: Add the option of passing a `None` sized batch to `to_torch_dataset` for better
|
546
575
|
interoperability with PyTorch DataLoader.
|
547
576
|
- Model Registry: Support [pandas.CategoricalDtype](https://pandas.pydata.org/docs/reference/api/pandas.CategoricalDtype.html#pandas-categoricaldtype)
|
577
|
+
- Limitations:
|
578
|
+
- The native categorical data handling handling by XGBoost using `enable_categorical=True` is not supported.
|
579
|
+
Instead please use [`sklearn.pipeline`](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html)
|
580
|
+
to preprocess the categorical datatype and log the pipeline with the XGBoost model.
|
548
581
|
- Registry: It is now possible to pass `signatures` and `sample_input_data` at the same time to capture background
|
549
582
|
data from explainablity and data lineage.
|
550
583
|
|