snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. snowflake/ml/_internal/env_utils.py +6 -0
  2. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  3. snowflake/ml/_internal/telemetry.py +1 -0
  4. snowflake/ml/_internal/utils/identifier.py +1 -1
  5. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  6. snowflake/ml/dataset/__init__.py +2 -1
  7. snowflake/ml/dataset/dataset.py +4 -3
  8. snowflake/ml/dataset/dataset_reader.py +5 -8
  9. snowflake/ml/feature_store/__init__.py +6 -0
  10. snowflake/ml/feature_store/access_manager.py +279 -0
  11. snowflake/ml/feature_store/feature_store.py +159 -99
  12. snowflake/ml/feature_store/feature_view.py +18 -8
  13. snowflake/ml/fileset/embedded_stage_fs.py +15 -12
  14. snowflake/ml/fileset/snowfs.py +3 -2
  15. snowflake/ml/fileset/stage_fs.py +25 -7
  16. snowflake/ml/model/_client/model/model_impl.py +46 -39
  17. snowflake/ml/model/_client/model/model_version_impl.py +24 -2
  18. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  19. snowflake/ml/model/_client/ops/model_ops.py +131 -16
  20. snowflake/ml/model/_client/sql/_base.py +34 -0
  21. snowflake/ml/model/_client/sql/model.py +32 -39
  22. snowflake/ml/model/_client/sql/model_version.py +60 -43
  23. snowflake/ml/model/_client/sql/stage.py +6 -32
  24. snowflake/ml/model/_client/sql/tag.py +32 -56
  25. snowflake/ml/model/_model_composer/model_composer.py +2 -2
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  27. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +50 -21
  28. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
  29. snowflake/ml/modeling/framework/base.py +4 -3
  30. snowflake/ml/modeling/pipeline/pipeline.py +27 -7
  31. snowflake/ml/registry/_manager/model_manager.py +36 -7
  32. snowflake/ml/version.py +1 -1
  33. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/METADATA +54 -10
  34. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/RECORD +37 -35
  35. snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
  36. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/LICENSE.txt +0 -0
  37. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/WHEEL +0 -0
  38. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@ import io
4
4
  import os
5
5
  import posixpath
6
6
  import sys
7
- from typing import Any, Dict, List, Optional, Tuple, Union
7
+ from typing import Any, Dict, List, Optional, Set, Tuple, Union
8
8
 
9
9
  import cloudpickle as cp
10
10
  import numpy as np
@@ -154,7 +154,7 @@ def construct_cv_results(
154
154
  return multimetric, estimator._format_results(param_grid, n_split, out)
155
155
 
156
156
 
157
- def construct_cv_results_new_implementation(
157
+ def construct_cv_results_memory_efficient_version(
158
158
  estimator: Union[GridSearchCV, RandomizedSearchCV],
159
159
  n_split: int,
160
160
  param_grid: List[Dict[str, Any]],
@@ -205,12 +205,35 @@ def construct_cv_results_new_implementation(
205
205
  with io.BytesIO(hex_str) as f_reload:
206
206
  out = cp.load(f_reload)
207
207
  all_out.extend(out)
208
+
209
+ # because original SearchCV is ranked by parameter first and cv second,
210
+ # to make the memory efficient, we implemented by fitting on cv first and parameter second
211
+ # when retrieving the results back, the ordering should revert back to remain the same result as original SearchCV
212
+ def generate_the_order_by_parameter_index(all_combination_length: int) -> List[int]:
213
+ pattern = []
214
+ for i in range(all_combination_length):
215
+ if i % parameter_grid_length == 0:
216
+ pattern.append(i)
217
+ for i in range(1, parameter_grid_length):
218
+ for j in range(all_combination_length):
219
+ if j % parameter_grid_length == i:
220
+ pattern.append(j)
221
+ return pattern
222
+
223
+ def rerank_array(original_array: List[Any], pattern: List[int]) -> List[Any]:
224
+ reranked_array = []
225
+ for index in pattern:
226
+ reranked_array.append(original_array[index])
227
+ return reranked_array
228
+
229
+ pattern = generate_the_order_by_parameter_index(len(all_out))
230
+ reranked_all_out = rerank_array(all_out, pattern)
208
231
  first_test_score = all_out[0]["test_scores"]
209
- return first_test_score, estimator._format_results(param_grid, n_split, all_out)
232
+ return first_test_score, estimator._format_results(param_grid, n_split, reranked_all_out)
210
233
 
211
234
 
212
235
  cp.register_pickle_by_value(inspect.getmodule(construct_cv_results))
213
- cp.register_pickle_by_value(inspect.getmodule(construct_cv_results_new_implementation))
236
+ cp.register_pickle_by_value(inspect.getmodule(construct_cv_results_memory_efficient_version))
214
237
 
215
238
 
216
239
  class DistributedHPOTrainer(SnowparkModelTrainer):
@@ -661,7 +684,7 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
661
684
 
662
685
  return fit_estimator
663
686
 
664
- def fit_search_snowpark_new_implementation(
687
+ def fit_search_snowpark_enable_efficient_memory_usage(
665
688
  self,
666
689
  param_grid: Union[model_selection.ParameterGrid, model_selection.ParameterSampler],
667
690
  dataset: DataFrame,
@@ -718,7 +741,7 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
718
741
  inspect.currentframe(), self.__class__.__name__
719
742
  ),
720
743
  api_calls=[udtf],
721
- custom_tags=dict([("hpo_udtf", True)]),
744
+ custom_tags=dict([("hpo_memory_efficient", True)]),
722
745
  )
723
746
 
724
747
  # Put locally serialized estimator on stage.
@@ -960,22 +983,26 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
960
983
  self.base_estimator = base_estimator
961
984
  self.fit_and_score_kwargs = fit_and_score_kwargs
962
985
  self.fit_score_params: List[Any] = []
963
- self.cached_train_test_indices = []
964
- # Calculate the full index here to avoid duplicate calculation (which consumes a lot of memory)
965
- full_index = np.arange(DATA_LENGTH)
966
- for i in range(n_splits):
967
- self.cached_train_test_indices.extend(
968
- [[np.setdiff1d(full_index, self.test_indices[i]), self.test_indices[i]]]
969
- )
986
+ self.cv_indices_set: Set[int] = set()
970
987
 
971
988
  def process(self, idx: int, params_idx: int, cv_idx: int) -> None:
972
989
  self.fit_score_params.extend([[idx, params_idx, cv_idx]])
990
+ self.cv_indices_set.add(cv_idx)
973
991
 
974
992
  def end_partition(self) -> Iterator[Tuple[int, str]]:
975
993
  from sklearn.base import clone
976
994
  from sklearn.model_selection._validation import _fit_and_score
977
995
  from sklearn.utils.parallel import Parallel, delayed
978
996
 
997
+ cached_train_test_indices = {}
998
+ # Calculate the full index here to avoid duplicate calculation (which consumes a lot of memory)
999
+ full_index = np.arange(DATA_LENGTH)
1000
+ for i in self.cv_indices_set:
1001
+ cached_train_test_indices[i] = [
1002
+ np.setdiff1d(full_index, self.test_indices[i]),
1003
+ self.test_indices[i],
1004
+ ]
1005
+
979
1006
  parallel = Parallel(n_jobs=_N_JOBS, pre_dispatch=_PRE_DISPATCH)
980
1007
 
981
1008
  out = parallel(
@@ -983,8 +1010,8 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
983
1010
  clone(self.base_estimator),
984
1011
  self.X,
985
1012
  self.y,
986
- train=self.cached_train_test_indices[split_idx][0],
987
- test=self.cached_train_test_indices[split_idx][1],
1013
+ train=cached_train_test_indices[split_idx][0],
1014
+ test=cached_train_test_indices[split_idx][1],
988
1015
  parameters=self.params_to_evaluate[cand_idx],
989
1016
  split_progress=(split_idx, n_splits),
990
1017
  candidate_progress=(cand_idx, n_candidates),
@@ -1005,7 +1032,9 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
1005
1032
 
1006
1033
  session.udtf.register(
1007
1034
  SearchCV,
1008
- output_schema=StructType([StructField("IDX", IntegerType()), StructField("CV_RESULTS", StringType())]),
1035
+ output_schema=StructType(
1036
+ [StructField("FIRST_IDX", IntegerType()), StructField("EACH_CV_RESULTS", StringType())]
1037
+ ),
1009
1038
  input_types=[IntegerType(), IntegerType(), IntegerType()],
1010
1039
  name=random_udtf_name,
1011
1040
  packages=required_deps, # type: ignore[arg-type]
@@ -1020,8 +1049,8 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
1020
1049
  # param_indices is for the index for each parameter grid;
1021
1050
  # cv_indices is for the index for each cross_validator's fold;
1022
1051
  # param_cv_indices is for the index for the product of (len(param_indices) * len(cv_indices))
1023
- param_indices, cv_indices = zip(
1024
- *product(range(parameter_grid_length), range(cross_validator_indices_length))
1052
+ cv_indices, param_indices = zip(
1053
+ *product(range(cross_validator_indices_length), range(parameter_grid_length))
1025
1054
  )
1026
1055
 
1027
1056
  indices_info_pandas = pd.DataFrame(
@@ -1042,11 +1071,11 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
1042
1071
  ),
1043
1072
  )
1044
1073
 
1045
- first_test_score, cv_results_ = construct_cv_results_new_implementation(
1074
+ first_test_score, cv_results_ = construct_cv_results_memory_efficient_version(
1046
1075
  estimator,
1047
1076
  n_splits,
1048
1077
  list(param_grid),
1049
- HP_raw_results.select("CV_RESULTS").sort(F.col("IDX")).collect(),
1078
+ HP_raw_results.select("EACH_CV_RESULTS").sort(F.col("FIRST_IDX")).collect(),
1050
1079
  cross_validator_indices_length,
1051
1080
  parameter_grid_length,
1052
1081
  )
@@ -1163,7 +1192,7 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
1163
1192
  pkg_versions=model_spec.pkgDependencies, session=self.session
1164
1193
  )
1165
1194
  if ENABLE_EFFICIENT_MEMORY_USAGE:
1166
- return self.fit_search_snowpark_new_implementation(
1195
+ return self.fit_search_snowpark_enable_efficient_memory_usage(
1167
1196
  param_grid=param_grid,
1168
1197
  dataset=self.dataset,
1169
1198
  session=self.session,
@@ -45,6 +45,7 @@ cp.register_pickle_by_value(inspect.getmodule(identifier.get_inferred_name))
45
45
  cp.register_pickle_by_value(inspect.getmodule(handle_inference_result))
46
46
 
47
47
  _PROJECT = "ModelDevelopment"
48
+ _ENABLE_ANONYMOUS_SPROC = False
48
49
 
49
50
 
50
51
  class SnowparkModelTrainer:
@@ -251,6 +252,27 @@ class SnowparkModelTrainer:
251
252
 
252
253
  return fit_wrapper_function
253
254
 
255
+ def _get_fit_wrapper_sproc_anonymous(self, statement_params: Dict[str, str]) -> StoredProcedure:
256
+ model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
257
+ fit_sproc_name = random_name_for_temp_object(TempObjectType.PROCEDURE)
258
+
259
+ relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
260
+ pkg_versions=model_spec.pkgDependencies, session=self.session
261
+ )
262
+
263
+ fit_wrapper_sproc = self.session.sproc.register(
264
+ func=self._build_fit_wrapper_sproc(model_spec=model_spec),
265
+ is_permanent=False,
266
+ name=fit_sproc_name,
267
+ packages=["snowflake-snowpark-python"] + relaxed_dependencies, # type: ignore[arg-type]
268
+ replace=True,
269
+ session=self.session,
270
+ statement_params=statement_params,
271
+ anonymous=True,
272
+ )
273
+
274
+ return fit_wrapper_sproc
275
+
254
276
  def _get_fit_wrapper_sproc(self, statement_params: Dict[str, str]) -> StoredProcedure:
255
277
  # If the sproc already exists, don't register.
256
278
  if not hasattr(self.session, "_FIT_WRAPPER_SPROCS"):
@@ -510,6 +532,28 @@ class SnowparkModelTrainer:
510
532
 
511
533
  return fit_transform_wrapper_function
512
534
 
535
+ def _get_fit_predict_wrapper_sproc_anonymous(self, statement_params: Dict[str, str]) -> StoredProcedure:
536
+ model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
537
+
538
+ fit_predict_sproc_name = random_name_for_temp_object(TempObjectType.PROCEDURE)
539
+
540
+ relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
541
+ pkg_versions=model_spec.pkgDependencies, session=self.session
542
+ )
543
+
544
+ fit_predict_wrapper_sproc = self.session.sproc.register(
545
+ func=self._build_fit_predict_wrapper_sproc(model_spec=model_spec),
546
+ is_permanent=False,
547
+ name=fit_predict_sproc_name,
548
+ packages=["snowflake-snowpark-python"] + relaxed_dependencies, # type: ignore[arg-type]
549
+ replace=True,
550
+ session=self.session,
551
+ statement_params=statement_params,
552
+ anonymous=True,
553
+ )
554
+
555
+ return fit_predict_wrapper_sproc
556
+
513
557
  def _get_fit_predict_wrapper_sproc(self, statement_params: Dict[str, str]) -> StoredProcedure:
514
558
  # If the sproc already exists, don't register.
515
559
  if not hasattr(self.session, "_FIT_WRAPPER_SPROCS"):
@@ -545,6 +589,27 @@ class SnowparkModelTrainer:
545
589
 
546
590
  return fit_predict_wrapper_sproc
547
591
 
592
+ def _get_fit_transform_wrapper_sproc_anonymous(self, statement_params: Dict[str, str]) -> StoredProcedure:
593
+ model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
594
+
595
+ fit_transform_sproc_name = random_name_for_temp_object(TempObjectType.PROCEDURE)
596
+
597
+ relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
598
+ pkg_versions=model_spec.pkgDependencies, session=self.session
599
+ )
600
+
601
+ fit_transform_wrapper_sproc = self.session.sproc.register(
602
+ func=self._build_fit_transform_wrapper_sproc(model_spec=model_spec),
603
+ is_permanent=False,
604
+ name=fit_transform_sproc_name,
605
+ packages=["snowflake-snowpark-python"] + relaxed_dependencies, # type: ignore[arg-type]
606
+ replace=True,
607
+ session=self.session,
608
+ statement_params=statement_params,
609
+ anonymous=True,
610
+ )
611
+ return fit_transform_wrapper_sproc
612
+
548
613
  def _get_fit_transform_wrapper_sproc(self, statement_params: Dict[str, str]) -> StoredProcedure:
549
614
  # If the sproc already exists, don't register.
550
615
  if not hasattr(self.session, "_FIT_WRAPPER_SPROCS"):
@@ -612,7 +677,10 @@ class SnowparkModelTrainer:
612
677
  custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
613
678
  )
614
679
 
615
- fit_wrapper_sproc = self._get_fit_wrapper_sproc(statement_params=statement_params)
680
+ if _ENABLE_ANONYMOUS_SPROC:
681
+ fit_wrapper_sproc = self._get_fit_wrapper_sproc_anonymous(statement_params=statement_params)
682
+ else:
683
+ fit_wrapper_sproc = self._get_fit_wrapper_sproc(statement_params=statement_params)
616
684
 
617
685
  try:
618
686
  sproc_export_file_name: str = fit_wrapper_sproc(
@@ -680,7 +748,11 @@ class SnowparkModelTrainer:
680
748
  custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
681
749
  )
682
750
 
683
- fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc(statement_params=statement_params)
751
+ if _ENABLE_ANONYMOUS_SPROC:
752
+ fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc_anonymous(statement_params=statement_params)
753
+ else:
754
+ fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc(statement_params=statement_params)
755
+
684
756
  fit_predict_result_name = random_name_for_temp_object(TempObjectType.TABLE)
685
757
 
686
758
  sproc_export_file_name: str = fit_predict_wrapper_sproc(
@@ -741,7 +813,13 @@ class SnowparkModelTrainer:
741
813
  custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
742
814
  )
743
815
 
744
- fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc(statement_params=statement_params)
816
+ if _ENABLE_ANONYMOUS_SPROC:
817
+ fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc_anonymous(
818
+ statement_params=statement_params
819
+ )
820
+ else:
821
+ fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc(statement_params=statement_params)
822
+
745
823
  fit_transform_result_name = random_name_for_temp_object(TempObjectType.TABLE)
746
824
 
747
825
  sproc_export_file_name: str = fit_transform_wrapper_sproc(
@@ -16,7 +16,7 @@ from snowflake.ml._internal.exceptions import (
16
16
  exceptions,
17
17
  modeling_error_messages,
18
18
  )
19
- from snowflake.ml._internal.lineage import data_source, dataset_dataframe
19
+ from snowflake.ml._internal.lineage import data_source, lineage_utils
20
20
  from snowflake.ml._internal.utils import identifier, parallelize
21
21
  from snowflake.ml.modeling.framework import _utils
22
22
  from snowflake.snowpark import functions as F
@@ -430,8 +430,9 @@ class BaseEstimator(Base):
430
430
  )
431
431
  def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "BaseEstimator":
432
432
  """Runs universal logics for all fit implementations."""
433
- if isinstance(dataset, dataset_dataframe.DatasetDataFrame):
434
- self._data_sources = dataset._get_sources()
433
+ self._data_sources = getattr(dataset, lineage_utils.DATA_SOURCES_ATTR, None)
434
+ if self._data_sources:
435
+ assert all(isinstance(ds, data_source.DataSource) for ds in self._data_sources)
435
436
  return self._fit(dataset)
436
437
 
437
438
  @abstractmethod
@@ -115,7 +115,7 @@ class Pipeline(base.BaseTransformer):
115
115
  self._feature_names_in: List[np.ndarray[Any, np.dtype[Any]]] = []
116
116
  self._n_features_in: List[int] = []
117
117
  self._transformers_to_input_indices: Dict[str, List[int]] = {}
118
- self._is_convertible_to_sklearn = True
118
+ self._modifies_label_or_sample_weight = True
119
119
 
120
120
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
121
121
 
@@ -126,6 +126,9 @@ class Pipeline(base.BaseTransformer):
126
126
  self._deps = list(deps)
127
127
  self._sklearn_object = None
128
128
  self.label_cols = self._get_label_cols()
129
+ self._is_convertible_to_sklearn = self._is_convertible_to_sklearn_object()
130
+
131
+ self._send_pipeline_configuration_telemetry()
129
132
 
130
133
  @staticmethod
131
134
  def _is_estimator(obj: object) -> bool:
@@ -228,7 +231,7 @@ class Pipeline(base.BaseTransformer):
228
231
  return [c for c in columns if c not in target_cols]
229
232
 
230
233
  def _append_step_feature_consumption_info(self, step_name: str, all_cols: List[str], input_cols: List[str]) -> None:
231
- if self._is_convertible_to_sklearn:
234
+ if self._modifies_label_or_sample_weight:
232
235
  all_cols = self._get_sanitized_list_of_columns(all_cols)
233
236
  self._feature_names_in.append(np.asarray(all_cols, dtype=object))
234
237
  self._n_features_in.append(len(all_cols))
@@ -248,7 +251,7 @@ class Pipeline(base.BaseTransformer):
248
251
  self, dataset: Union[snowpark.DataFrame, pd.DataFrame]
249
252
  ) -> Union[snowpark.DataFrame, pd.DataFrame]:
250
253
  self._reset()
251
- self._is_convertible_to_sklearn = not self._is_pipeline_modifying_label_or_sample_weight()
254
+ self._modifies_label_or_sample_weight = not self._is_pipeline_modifying_label_or_sample_weight()
252
255
  transformed_dataset = dataset
253
256
  for name, trans in self._get_transformers():
254
257
  self._append_step_feature_consumption_info(
@@ -425,7 +428,7 @@ class Pipeline(base.BaseTransformer):
425
428
  )
426
429
 
427
430
  if self._can_be_trained_in_ml_runtime(dataset):
428
- if not self._is_convertible_to_sklearn_object():
431
+ if not self._is_convertible_to_sklearn:
429
432
  raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")
430
433
  self._fit_ml_runtime(dataset)
431
434
 
@@ -947,7 +950,7 @@ class Pipeline(base.BaseTransformer):
947
950
  if not os.environ.get(IN_ML_RUNTIME_ENV_VAR):
948
951
  return False
949
952
 
950
- return self._is_convertible_to_sklearn_object()
953
+ return self._is_convertible_to_sklearn
951
954
 
952
955
  @staticmethod
953
956
  def _wrap_transformer_in_column_transformer(
@@ -1003,7 +1006,7 @@ class Pipeline(base.BaseTransformer):
1003
1006
  if not self._is_fitted:
1004
1007
  return self._create_unfitted_sklearn_object()
1005
1008
 
1006
- if not self._is_convertible_to_sklearn:
1009
+ if not self._modifies_label_or_sample_weight:
1007
1010
  raise exceptions.SnowflakeMLException(
1008
1011
  error_code=error_codes.METHOD_NOT_ALLOWED,
1009
1012
  original_exception=ValueError(
@@ -1109,7 +1112,24 @@ class Pipeline(base.BaseTransformer):
1109
1112
  else:
1110
1113
  return self._create_sklearn_object()
1111
1114
  else:
1112
- if self._is_convertible_to_sklearn_object():
1115
+ if self._is_convertible_to_sklearn:
1113
1116
  return self._create_unfitted_sklearn_object()
1114
1117
  else:
1115
1118
  raise ValueError("This pipeline can not be converted to an sklearn pipeline.")
1119
+
1120
+ def _send_pipeline_configuration_telemetry(self) -> None:
1121
+ """Track information about the pipeline setup. Currently, we want to track:
1122
+ - Whether the pipeline is converible to an sklearn pipeline
1123
+ - Whether the pipeline is being used in the SPCS ml runtime.
1124
+ """
1125
+
1126
+ telemetry_data = {
1127
+ "pipeline_is_convertible_to_sklearn": self._is_convertible_to_sklearn,
1128
+ "in_spcs_ml_runtime": bool(os.environ.get(IN_ML_RUNTIME_ENV_VAR)),
1129
+ }
1130
+ telemetry.send_custom_usage(
1131
+ project=_PROJECT,
1132
+ subproject=_SUBPROJECT,
1133
+ telemetry_type=telemetry.TelemetryField.TYPE_SNOWML_PIPELINE_USAGE.value,
1134
+ data=telemetry_data,
1135
+ )
@@ -48,20 +48,29 @@ class ModelManager:
48
48
  options: Optional[model_types.ModelSaveOption] = None,
49
49
  statement_params: Optional[Dict[str, Any]] = None,
50
50
  ) -> model_version_impl.ModelVersion:
51
- model_name_id = sql_identifier.SqlIdentifier(model_name)
51
+ database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
52
52
 
53
53
  if not version_name:
54
54
  version_name = self._hrid_generator.generate()[1]
55
55
  version_name_id = sql_identifier.SqlIdentifier(version_name)
56
56
 
57
57
  if self._model_ops.validate_existence(
58
- model_name=model_name_id, statement_params=statement_params
58
+ database_name=database_name_id,
59
+ schema_name=schema_name_id,
60
+ model_name=model_name_id,
61
+ statement_params=statement_params,
59
62
  ) and self._model_ops.validate_existence(
60
- model_name=model_name_id, version_name=version_name_id, statement_params=statement_params
63
+ database_name=database_name_id,
64
+ schema_name=schema_name_id,
65
+ model_name=model_name_id,
66
+ version_name=version_name_id,
67
+ statement_params=statement_params,
61
68
  ):
62
69
  raise ValueError(f"Model {model_name} version {version_name} already existed.")
63
70
 
64
71
  stage_path = self._model_ops.prepare_model_stage_path(
72
+ database_name=database_name_id,
73
+ schema_name=schema_name_id,
65
74
  statement_params=statement_params,
66
75
  )
67
76
 
@@ -85,13 +94,19 @@ class ModelManager:
85
94
 
86
95
  self._model_ops.create_from_stage(
87
96
  composed_model=mc,
97
+ database_name=database_name_id,
98
+ schema_name=schema_name_id,
88
99
  model_name=model_name_id,
89
100
  version_name=version_name_id,
90
101
  statement_params=statement_params,
91
102
  )
92
103
 
93
104
  mv = model_version_impl.ModelVersion._ref(
94
- self._model_ops,
105
+ model_ops.ModelOperator(
106
+ self._model_ops._session,
107
+ database_name=database_name_id or self._database_name,
108
+ schema_name=schema_name_id or self._schema_name,
109
+ ),
95
110
  model_name=model_name_id,
96
111
  version_name=version_name_id,
97
112
  )
@@ -102,6 +117,8 @@ class ModelManager:
102
117
  if metrics:
103
118
  self._model_ops._metadata_ops.save(
104
119
  metadata_ops.ModelVersionMetadataSchema(metrics=metrics),
120
+ database_name=database_name_id,
121
+ schema_name=schema_name_id,
105
122
  model_name=model_name_id,
106
123
  version_name=version_name_id,
107
124
  statement_params=statement_params,
@@ -115,13 +132,19 @@ class ModelManager:
115
132
  *,
116
133
  statement_params: Optional[Dict[str, Any]] = None,
117
134
  ) -> model_impl.Model:
118
- model_name_id = sql_identifier.SqlIdentifier(model_name)
135
+ database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
119
136
  if self._model_ops.validate_existence(
137
+ database_name=database_name_id,
138
+ schema_name=schema_name_id,
120
139
  model_name=model_name_id,
121
140
  statement_params=statement_params,
122
141
  ):
123
142
  return model_impl.Model._ref(
124
- self._model_ops,
143
+ model_ops.ModelOperator(
144
+ self._model_ops._session,
145
+ database_name=database_name_id or self._database_name,
146
+ schema_name=schema_name_id or self._schema_name,
147
+ ),
125
148
  model_name=model_name_id,
126
149
  )
127
150
  else:
@@ -133,6 +156,8 @@ class ModelManager:
133
156
  statement_params: Optional[Dict[str, Any]] = None,
134
157
  ) -> List[model_impl.Model]:
135
158
  model_names = self._model_ops.list_models_or_versions(
159
+ database_name=None,
160
+ schema_name=None,
136
161
  statement_params=statement_params,
137
162
  )
138
163
  return [
@@ -149,6 +174,8 @@ class ModelManager:
149
174
  statement_params: Optional[Dict[str, Any]] = None,
150
175
  ) -> pd.DataFrame:
151
176
  rows = self._model_ops.show_models_or_versions(
177
+ database_name=None,
178
+ schema_name=None,
152
179
  statement_params=statement_params,
153
180
  )
154
181
  return pd.DataFrame([row.as_dict() for row in rows])
@@ -159,9 +186,11 @@ class ModelManager:
159
186
  *,
160
187
  statement_params: Optional[Dict[str, Any]] = None,
161
188
  ) -> None:
162
- model_name_id = sql_identifier.SqlIdentifier(model_name)
189
+ database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
163
190
 
164
191
  self._model_ops.delete_model_or_version(
192
+ database_name=database_name_id,
193
+ schema_name=schema_name_id,
165
194
  model_name=model_name_id,
166
195
  statement_params=statement_params,
167
196
  )
snowflake/ml/version.py CHANGED
@@ -1 +1 @@
1
- VERSION="1.5.0"
1
+ VERSION="1.5.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: snowflake-ml-python
3
- Version: 1.5.0
3
+ Version: 1.5.1
4
4
  Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
5
5
  Author-email: "Snowflake, Inc" <support@snowflake.com>
6
6
  License:
@@ -236,7 +236,6 @@ License-File: LICENSE.txt
236
236
  Requires-Dist: absl-py <2,>=0.15
237
237
  Requires-Dist: anyio <4,>=3.5.0
238
238
  Requires-Dist: cachetools <6,>=3.1.1
239
- Requires-Dist: catboost <1.3,>=1.2.0
240
239
  Requires-Dist: cloudpickle >=2.0.0
241
240
  Requires-Dist: fsspec[http] <2024,>=2022.11
242
241
  Requires-Dist: importlib-resources <7,>=6.1.1
@@ -256,19 +255,22 @@ Requires-Dist: sqlparse <1,>=0.4
256
255
  Requires-Dist: typing-extensions <5,>=4.1.0
257
256
  Requires-Dist: xgboost <2,>=1.7.3
258
257
  Provides-Extra: all
259
- Requires-Dist: lightgbm <4.2,>=3.3.5 ; extra == 'all'
258
+ Requires-Dist: catboost <2,>=1.2.0 ; extra == 'all'
259
+ Requires-Dist: lightgbm <5,>=3.3.5 ; extra == 'all'
260
260
  Requires-Dist: mlflow <2.4,>=2.1.0 ; extra == 'all'
261
261
  Requires-Dist: peft <1,>=0.5.0 ; extra == 'all'
262
262
  Requires-Dist: sentence-transformers <3,>=2.2.2 ; extra == 'all'
263
- Requires-Dist: sentencepiece <0.2,>=0.1.95 ; extra == 'all'
263
+ Requires-Dist: sentencepiece <1,>=0.1.95 ; extra == 'all'
264
264
  Requires-Dist: shap ==0.42.1 ; extra == 'all'
265
265
  Requires-Dist: tensorflow <3,>=2.10 ; extra == 'all'
266
266
  Requires-Dist: tokenizers <1,>=0.10 ; extra == 'all'
267
267
  Requires-Dist: torch <3,>=2.0.1 ; extra == 'all'
268
268
  Requires-Dist: torchdata <1,>=0.4 ; extra == 'all'
269
269
  Requires-Dist: transformers <5,>=4.32.1 ; extra == 'all'
270
+ Provides-Extra: catboost
271
+ Requires-Dist: catboost <2,>=1.2.0 ; extra == 'catboost'
270
272
  Provides-Extra: lightgbm
271
- Requires-Dist: lightgbm <4.2,>=3.3.5 ; extra == 'lightgbm'
273
+ Requires-Dist: lightgbm <5,>=3.3.5 ; extra == 'lightgbm'
272
274
  Provides-Extra: llm
273
275
  Requires-Dist: peft <1,>=0.5.0 ; extra == 'llm'
274
276
  Provides-Extra: mlflow
@@ -282,7 +284,7 @@ Requires-Dist: torch <3,>=2.0.1 ; extra == 'torch'
282
284
  Requires-Dist: torchdata <1,>=0.4 ; extra == 'torch'
283
285
  Provides-Extra: transformers
284
286
  Requires-Dist: sentence-transformers <3,>=2.2.2 ; extra == 'transformers'
285
- Requires-Dist: sentencepiece <0.2,>=0.1.95 ; extra == 'transformers'
287
+ Requires-Dist: sentencepiece <1,>=0.1.95 ; extra == 'transformers'
286
288
  Requires-Dist: tokenizers <1,>=0.10 ; extra == 'transformers'
287
289
  Requires-Dist: transformers <5,>=4.32.1 ; extra == 'transformers'
288
290
 
@@ -371,6 +373,31 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
371
373
 
372
374
  # Release History
373
375
 
376
+ ## 1.5.1
377
+
378
+ ### Bug Fixes
379
+
380
+ - Dataset: Fix `snowflake.connector.errors.DataError: Query Result did not match expected number of rows` when accessing
381
+ DatasetVersion properties when case insensitive `SHOW VERSIONS IN DATASET` check matches multiple version names.
382
+ - Dataset: Fix bug in SnowFS bulk file read when used with DuckDB
383
+ - Registry: Fixed a bug when loading old models.
384
+ - Lineage: Fix Dataset source lineage propagation through `snowpark.DataFrame` transformations
385
+
386
+ ### Behavior Changes
387
+
388
+ - Feature Store: convert clear() into a private function. Also make it deletes feature views and entities only.
389
+ - Feature Store: Use NULL as default value for timestamp tag value.
390
+
391
+ ### New Features
392
+
393
+ - Feature Store: Added new `snowflake.ml.feature_store.setup_feature_store()` API to assist Feature Store RBAC setup.
394
+ - Feature Store: Add `output_type` argument to `FeatureStore.generate_dataset()` to allow generating data snapshots
395
+ as Datasets or Tables.
396
+ - Registry: `log_model`, `get_model`, `delete_model` now supports fully qualified name.
397
+ - Modeling: Supports anonymous stored procedure during fit calls so that modeling would not require sufficient
398
+ permissions to operate on schema. Please call
399
+ `import snowflake.ml.modeling.parameters.enable_anonymous_sproc # noqa: F401`
400
+
374
401
  ## 1.5.0
375
402
 
376
403
  ### Bug Fixes
@@ -411,12 +438,19 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
411
438
 
412
439
  #### Feature Store (PrPr)
413
440
 
414
- `FeatureStore.generate_dataset` argument list has been changed to match the new
441
+ - `FeatureStore.generate_dataset` argument list has been changed to match the new
415
442
  `snowflake.ml.dataset.Dataset` definition
416
443
 
417
- - `materialized_table` has been removed and replaced with `name` and `version`.
418
- - `name` moved to first positional argument
419
- - `save_mode` has been removed as `merge` behavior is no longer supported. The new behavior is always `errorifexists`.
444
+ - `materialized_table` has been removed and replaced with `name` and `version`.
445
+ - `name` moved to first positional argument
446
+ - `save_mode` has been removed as `merge` behavior is no longer supported. The new behavior is always `errorifexists`.
447
+
448
+ - Change feature view version type from str to `FeatureViewVersion`. It is a restricted string literal.
449
+
450
+ - Remove as_dataframe arg from FeatureStore.list_feature_views(), now always returns result as DataFrame.
451
+
452
+ - Combines few metadata tags into a new tag: SNOWML_FEATURE_VIEW_METADATA. This will make previously created feature views
453
+ not readable by new SDK.
420
454
 
421
455
  ### New Features
422
456
 
@@ -432,6 +466,10 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
432
466
  and `Dataset.read.to_tf_dataset()` respectively.
433
467
  - Added `fsspec` style file integration using `Dataset.read.files()` and `Dataset.read.filesystem()`
434
468
 
469
+ #### Feature Store
470
+
471
+ - use new tag_reference_internal to speed up metadata lookup.
472
+
435
473
  ## 1.4.1 (2024-04-18)
436
474
 
437
475
  ### New Features
@@ -443,6 +481,10 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
443
481
 
444
482
  - Registry: Fix a bug that leads to relax_version option is not working.
445
483
 
484
+ ### Behavior changes
485
+
486
+ - Feature Store: update_feature_view takes refresh_freq and warehouse as argument.
487
+
446
488
  ## 1.4.0 (2024-04-08)
447
489
 
448
490
  ### Bug Fixes
@@ -464,6 +506,8 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
464
506
 
465
507
  - Registry: `apply` method is no longer by default logged when logging a xgboost model. If that is required, it could
466
508
  be specified manually when logging the model by `log_model(..., options={"target_methods": ["apply", ...]})`.
509
+ - Feature Store: register_entity returns an entity object.
510
+ - Feature Store: register_feature_view `block=true` becomes default.
467
511
 
468
512
  ### New Features
469
513