snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. snowflake/cortex/_sentiment.py +7 -4
  2. snowflake/ml/_internal/env_utils.py +6 -0
  3. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  4. snowflake/ml/_internal/telemetry.py +1 -0
  5. snowflake/ml/_internal/utils/identifier.py +1 -1
  6. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  7. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  8. snowflake/ml/dataset/__init__.py +2 -1
  9. snowflake/ml/dataset/dataset.py +4 -3
  10. snowflake/ml/dataset/dataset_reader.py +5 -8
  11. snowflake/ml/feature_store/__init__.py +6 -0
  12. snowflake/ml/feature_store/access_manager.py +283 -0
  13. snowflake/ml/feature_store/feature_store.py +160 -100
  14. snowflake/ml/feature_store/feature_view.py +30 -19
  15. snowflake/ml/fileset/embedded_stage_fs.py +15 -12
  16. snowflake/ml/fileset/snowfs.py +2 -30
  17. snowflake/ml/fileset/stage_fs.py +25 -7
  18. snowflake/ml/model/_client/model/model_impl.py +46 -39
  19. snowflake/ml/model/_client/model/model_version_impl.py +24 -2
  20. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  21. snowflake/ml/model/_client/ops/model_ops.py +174 -16
  22. snowflake/ml/model/_client/sql/_base.py +34 -0
  23. snowflake/ml/model/_client/sql/model.py +32 -39
  24. snowflake/ml/model/_client/sql/model_version.py +111 -42
  25. snowflake/ml/model/_client/sql/stage.py +6 -32
  26. snowflake/ml/model/_client/sql/tag.py +32 -56
  27. snowflake/ml/model/_model_composer/model_composer.py +8 -4
  28. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  29. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  30. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  31. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +90 -142
  32. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
  33. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
  34. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
  35. snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
  36. snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
  37. snowflake/ml/modeling/cluster/birch.py +8 -1
  38. snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
  39. snowflake/ml/modeling/cluster/dbscan.py +8 -1
  40. snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
  41. snowflake/ml/modeling/cluster/k_means.py +8 -1
  42. snowflake/ml/modeling/cluster/mean_shift.py +8 -1
  43. snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
  44. snowflake/ml/modeling/cluster/optics.py +8 -1
  45. snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
  46. snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
  47. snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
  48. snowflake/ml/modeling/compose/column_transformer.py +8 -1
  49. snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
  50. snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
  51. snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
  52. snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
  53. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
  54. snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
  55. snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
  56. snowflake/ml/modeling/covariance/oas.py +8 -1
  57. snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
  58. snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
  59. snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
  60. snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
  61. snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
  62. snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
  63. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
  64. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
  65. snowflake/ml/modeling/decomposition/pca.py +8 -1
  66. snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
  67. snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
  68. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
  69. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
  70. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
  71. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
  72. snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
  73. snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
  74. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
  75. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
  76. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
  77. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
  79. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
  80. snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
  81. snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
  82. snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
  83. snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
  84. snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
  85. snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
  86. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
  87. snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
  88. snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
  89. snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
  90. snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
  91. snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
  92. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
  93. snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
  94. snowflake/ml/modeling/framework/base.py +4 -3
  95. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
  96. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
  97. snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
  98. snowflake/ml/modeling/impute/knn_imputer.py +8 -1
  99. snowflake/ml/modeling/impute/missing_indicator.py +8 -1
  100. snowflake/ml/modeling/impute/simple_imputer.py +21 -2
  101. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
  102. snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
  103. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
  104. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
  105. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
  106. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
  107. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
  108. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
  109. snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
  110. snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
  111. snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
  112. snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
  113. snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
  114. snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
  115. snowflake/ml/modeling/linear_model/lars.py +8 -1
  116. snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
  117. snowflake/ml/modeling/linear_model/lasso.py +8 -1
  118. snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
  119. snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
  120. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
  121. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
  122. snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
  123. snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
  124. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
  125. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
  126. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
  127. snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
  128. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
  129. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
  130. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
  131. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
  132. snowflake/ml/modeling/linear_model/perceptron.py +8 -1
  133. snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
  134. snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
  135. snowflake/ml/modeling/linear_model/ridge.py +8 -1
  136. snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
  137. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
  138. snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
  139. snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
  140. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
  141. snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
  142. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
  143. snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
  144. snowflake/ml/modeling/manifold/isomap.py +8 -1
  145. snowflake/ml/modeling/manifold/mds.py +8 -1
  146. snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
  147. snowflake/ml/modeling/manifold/tsne.py +8 -1
  148. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
  149. snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
  150. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
  151. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
  152. snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
  153. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
  154. snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
  155. snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
  156. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
  157. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
  158. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
  159. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
  160. snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
  161. snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
  162. snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
  163. snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
  164. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
  165. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
  166. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
  167. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
  168. snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
  169. snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
  170. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  171. snowflake/ml/modeling/pipeline/pipeline.py +27 -7
  172. snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
  173. snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
  174. snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
  175. snowflake/ml/modeling/svm/linear_svc.py +8 -1
  176. snowflake/ml/modeling/svm/linear_svr.py +8 -1
  177. snowflake/ml/modeling/svm/nu_svc.py +8 -1
  178. snowflake/ml/modeling/svm/nu_svr.py +8 -1
  179. snowflake/ml/modeling/svm/svc.py +8 -1
  180. snowflake/ml/modeling/svm/svr.py +8 -1
  181. snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
  182. snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
  183. snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
  184. snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
  185. snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
  186. snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
  187. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
  188. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
  189. snowflake/ml/registry/_manager/model_manager.py +95 -8
  190. snowflake/ml/registry/registry.py +10 -1
  191. snowflake/ml/version.py +1 -1
  192. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +66 -10
  193. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +196 -192
  194. snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
  195. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
  196. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
  197. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -655,7 +655,14 @@ class LinearSVC(BaseTransformer):
655
655
  ) -> List[str]:
656
656
  # in case the inferred output column names dimension is different
657
657
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
658
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
658
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
659
+
660
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
661
+ # seen during the fit.
662
+ snowpark_column_names = dataset.select(self.input_cols).columns
663
+ sample_pd_df.columns = snowpark_column_names
664
+
665
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
659
666
  output_df_columns = list(output_df_pd.columns)
660
667
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
661
668
  if self.sample_weight_col:
@@ -627,7 +627,14 @@ class LinearSVR(BaseTransformer):
627
627
  ) -> List[str]:
628
628
  # in case the inferred output column names dimension is different
629
629
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
630
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
630
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
631
+
632
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
633
+ # seen during the fit.
634
+ snowpark_column_names = dataset.select(self.input_cols).columns
635
+ sample_pd_df.columns = snowpark_column_names
636
+
637
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
631
638
  output_df_columns = list(output_df_pd.columns)
632
639
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
633
640
  if self.sample_weight_col:
@@ -661,7 +661,14 @@ class NuSVC(BaseTransformer):
661
661
  ) -> List[str]:
662
662
  # in case the inferred output column names dimension is different
663
663
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
664
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
664
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
665
+
666
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
667
+ # seen during the fit.
668
+ snowpark_column_names = dataset.select(self.input_cols).columns
669
+ sample_pd_df.columns = snowpark_column_names
670
+
671
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
665
672
  output_df_columns = list(output_df_pd.columns)
666
673
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
667
674
  if self.sample_weight_col:
@@ -622,7 +622,14 @@ class NuSVR(BaseTransformer):
622
622
  ) -> List[str]:
623
623
  # in case the inferred output column names dimension is different
624
624
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
625
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
625
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
626
+
627
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
628
+ # seen during the fit.
629
+ snowpark_column_names = dataset.select(self.input_cols).columns
630
+ sample_pd_df.columns = snowpark_column_names
631
+
632
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
626
633
  output_df_columns = list(output_df_pd.columns)
627
634
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
628
635
  if self.sample_weight_col:
@@ -664,7 +664,14 @@ class SVC(BaseTransformer):
664
664
  ) -> List[str]:
665
665
  # in case the inferred output column names dimension is different
666
666
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
667
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
667
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
668
+
669
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
670
+ # seen during the fit.
671
+ snowpark_column_names = dataset.select(self.input_cols).columns
672
+ sample_pd_df.columns = snowpark_column_names
673
+
674
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
668
675
  output_df_columns = list(output_df_pd.columns)
669
676
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
670
677
  if self.sample_weight_col:
@@ -625,7 +625,14 @@ class SVR(BaseTransformer):
625
625
  ) -> List[str]:
626
626
  # in case the inferred output column names dimension is different
627
627
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
628
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
628
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
629
+
630
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
631
+ # seen during the fit.
632
+ snowpark_column_names = dataset.select(self.input_cols).columns
633
+ sample_pd_df.columns = snowpark_column_names
634
+
635
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
629
636
  output_df_columns = list(output_df_pd.columns)
630
637
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
631
638
  if self.sample_weight_col:
@@ -692,7 +692,14 @@ class DecisionTreeClassifier(BaseTransformer):
692
692
  ) -> List[str]:
693
693
  # in case the inferred output column names dimension is different
694
694
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
695
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
695
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
696
+
697
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
698
+ # seen during the fit.
699
+ snowpark_column_names = dataset.select(self.input_cols).columns
700
+ sample_pd_df.columns = snowpark_column_names
701
+
702
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
696
703
  output_df_columns = list(output_df_pd.columns)
697
704
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
698
705
  if self.sample_weight_col:
@@ -674,7 +674,14 @@ class DecisionTreeRegressor(BaseTransformer):
674
674
  ) -> List[str]:
675
675
  # in case the inferred output column names dimension is different
676
676
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
677
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
677
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
678
+
679
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
680
+ # seen during the fit.
681
+ snowpark_column_names = dataset.select(self.input_cols).columns
682
+ sample_pd_df.columns = snowpark_column_names
683
+
684
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
678
685
  output_df_columns = list(output_df_pd.columns)
679
686
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
680
687
  if self.sample_weight_col:
@@ -684,7 +684,14 @@ class ExtraTreeClassifier(BaseTransformer):
684
684
  ) -> List[str]:
685
685
  # in case the inferred output column names dimension is different
686
686
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
687
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
687
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
688
+
689
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
690
+ # seen during the fit.
691
+ snowpark_column_names = dataset.select(self.input_cols).columns
692
+ sample_pd_df.columns = snowpark_column_names
693
+
694
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
688
695
  output_df_columns = list(output_df_pd.columns)
689
696
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
690
697
  if self.sample_weight_col:
@@ -666,7 +666,14 @@ class ExtraTreeRegressor(BaseTransformer):
666
666
  ) -> List[str]:
667
667
  # in case the inferred output column names dimension is different
668
668
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
669
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
669
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
670
+
671
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
672
+ # seen during the fit.
673
+ snowpark_column_names = dataset.select(self.input_cols).columns
674
+ sample_pd_df.columns = snowpark_column_names
675
+
676
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
670
677
  output_df_columns = list(output_df_pd.columns)
671
678
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
672
679
  if self.sample_weight_col:
@@ -784,7 +784,14 @@ class XGBClassifier(BaseTransformer):
784
784
  ) -> List[str]:
785
785
  # in case the inferred output column names dimension is different
786
786
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
787
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
787
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
788
+
789
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
790
+ # seen during the fit.
791
+ snowpark_column_names = dataset.select(self.input_cols).columns
792
+ sample_pd_df.columns = snowpark_column_names
793
+
794
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
788
795
  output_df_columns = list(output_df_pd.columns)
789
796
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
790
797
  if self.sample_weight_col:
@@ -783,7 +783,14 @@ class XGBRegressor(BaseTransformer):
783
783
  ) -> List[str]:
784
784
  # in case the inferred output column names dimension is different
785
785
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
786
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
786
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
787
+
788
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
789
+ # seen during the fit.
790
+ snowpark_column_names = dataset.select(self.input_cols).columns
791
+ sample_pd_df.columns = snowpark_column_names
792
+
793
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
787
794
  output_df_columns = list(output_df_pd.columns)
788
795
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
789
796
  if self.sample_weight_col:
@@ -788,7 +788,14 @@ class XGBRFClassifier(BaseTransformer):
788
788
  ) -> List[str]:
789
789
  # in case the inferred output column names dimension is different
790
790
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
791
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
791
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
792
+
793
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
794
+ # seen during the fit.
795
+ snowpark_column_names = dataset.select(self.input_cols).columns
796
+ sample_pd_df.columns = snowpark_column_names
797
+
798
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
792
799
  output_df_columns = list(output_df_pd.columns)
793
800
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
794
801
  if self.sample_weight_col:
@@ -788,7 +788,14 @@ class XGBRFRegressor(BaseTransformer):
788
788
  ) -> List[str]:
789
789
  # in case the inferred output column names dimension is different
790
790
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
791
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
791
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
792
+
793
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
794
+ # seen during the fit.
795
+ snowpark_column_names = dataset.select(self.input_cols).columns
796
+ sample_pd_df.columns = snowpark_column_names
797
+
798
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
792
799
  output_df_columns = list(output_df_pd.columns)
793
800
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
794
801
  if self.sample_weight_col:
@@ -1,5 +1,5 @@
1
1
  from types import ModuleType
2
- from typing import Any, Dict, List, Optional
2
+ from typing import Any, Dict, List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
  from absl.logging import logging
@@ -31,6 +31,64 @@ class ModelManager:
31
31
  self._hrid_generator = hrid_generator.HRID16()
32
32
 
33
33
  def log_model(
34
+ self,
35
+ *,
36
+ model: Union[model_types.SupportedModelType, model_version_impl.ModelVersion],
37
+ model_name: str,
38
+ version_name: Optional[str] = None,
39
+ comment: Optional[str] = None,
40
+ metrics: Optional[Dict[str, Any]] = None,
41
+ conda_dependencies: Optional[List[str]] = None,
42
+ pip_requirements: Optional[List[str]] = None,
43
+ python_version: Optional[str] = None,
44
+ signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
45
+ sample_input_data: Optional[model_types.SupportedDataType] = None,
46
+ code_paths: Optional[List[str]] = None,
47
+ ext_modules: Optional[List[ModuleType]] = None,
48
+ options: Optional[model_types.ModelSaveOption] = None,
49
+ statement_params: Optional[Dict[str, Any]] = None,
50
+ ) -> model_version_impl.ModelVersion:
51
+ if not version_name:
52
+ version_name = self._hrid_generator.generate()[1]
53
+
54
+ if isinstance(model, model_version_impl.ModelVersion):
55
+ (
56
+ source_database_name_id,
57
+ source_schema_name_id,
58
+ source_model_name_id,
59
+ ) = sql_identifier.parse_fully_qualified_name(model.fully_qualified_model_name)
60
+
61
+ self._model_ops.create_from_model_version(
62
+ source_database_name=source_database_name_id,
63
+ source_schema_name=source_schema_name_id,
64
+ source_model_name=source_model_name_id,
65
+ source_version_name=sql_identifier.SqlIdentifier(model.version_name),
66
+ database_name=None,
67
+ schema_name=None,
68
+ model_name=sql_identifier.SqlIdentifier(model_name),
69
+ version_name=sql_identifier.SqlIdentifier(version_name),
70
+ statement_params=statement_params,
71
+ )
72
+ return self.get_model(model_name=model_name, statement_params=statement_params).version(version_name)
73
+
74
+ return self._log_model(
75
+ model=model,
76
+ model_name=model_name,
77
+ version_name=version_name,
78
+ comment=comment,
79
+ metrics=metrics,
80
+ conda_dependencies=conda_dependencies,
81
+ pip_requirements=pip_requirements,
82
+ python_version=python_version,
83
+ signatures=signatures,
84
+ sample_input_data=sample_input_data,
85
+ code_paths=code_paths,
86
+ ext_modules=ext_modules,
87
+ options=options,
88
+ statement_params=statement_params,
89
+ )
90
+
91
+ def _log_model(
34
92
  self,
35
93
  model: model_types.SupportedModelType,
36
94
  *,
@@ -48,20 +106,29 @@ class ModelManager:
48
106
  options: Optional[model_types.ModelSaveOption] = None,
49
107
  statement_params: Optional[Dict[str, Any]] = None,
50
108
  ) -> model_version_impl.ModelVersion:
51
- model_name_id = sql_identifier.SqlIdentifier(model_name)
109
+ database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
52
110
 
53
111
  if not version_name:
54
112
  version_name = self._hrid_generator.generate()[1]
55
113
  version_name_id = sql_identifier.SqlIdentifier(version_name)
56
114
 
57
115
  if self._model_ops.validate_existence(
58
- model_name=model_name_id, statement_params=statement_params
116
+ database_name=database_name_id,
117
+ schema_name=schema_name_id,
118
+ model_name=model_name_id,
119
+ statement_params=statement_params,
59
120
  ) and self._model_ops.validate_existence(
60
- model_name=model_name_id, version_name=version_name_id, statement_params=statement_params
121
+ database_name=database_name_id,
122
+ schema_name=schema_name_id,
123
+ model_name=model_name_id,
124
+ version_name=version_name_id,
125
+ statement_params=statement_params,
61
126
  ):
62
127
  raise ValueError(f"Model {model_name} version {version_name} already existed.")
63
128
 
64
129
  stage_path = self._model_ops.prepare_model_stage_path(
130
+ database_name=database_name_id,
131
+ schema_name=schema_name_id,
65
132
  statement_params=statement_params,
66
133
  )
67
134
 
@@ -85,13 +152,19 @@ class ModelManager:
85
152
 
86
153
  self._model_ops.create_from_stage(
87
154
  composed_model=mc,
155
+ database_name=database_name_id,
156
+ schema_name=schema_name_id,
88
157
  model_name=model_name_id,
89
158
  version_name=version_name_id,
90
159
  statement_params=statement_params,
91
160
  )
92
161
 
93
162
  mv = model_version_impl.ModelVersion._ref(
94
- self._model_ops,
163
+ model_ops.ModelOperator(
164
+ self._model_ops._session,
165
+ database_name=database_name_id or self._database_name,
166
+ schema_name=schema_name_id or self._schema_name,
167
+ ),
95
168
  model_name=model_name_id,
96
169
  version_name=version_name_id,
97
170
  )
@@ -102,6 +175,8 @@ class ModelManager:
102
175
  if metrics:
103
176
  self._model_ops._metadata_ops.save(
104
177
  metadata_ops.ModelVersionMetadataSchema(metrics=metrics),
178
+ database_name=database_name_id,
179
+ schema_name=schema_name_id,
105
180
  model_name=model_name_id,
106
181
  version_name=version_name_id,
107
182
  statement_params=statement_params,
@@ -115,13 +190,19 @@ class ModelManager:
115
190
  *,
116
191
  statement_params: Optional[Dict[str, Any]] = None,
117
192
  ) -> model_impl.Model:
118
- model_name_id = sql_identifier.SqlIdentifier(model_name)
193
+ database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
119
194
  if self._model_ops.validate_existence(
195
+ database_name=database_name_id,
196
+ schema_name=schema_name_id,
120
197
  model_name=model_name_id,
121
198
  statement_params=statement_params,
122
199
  ):
123
200
  return model_impl.Model._ref(
124
- self._model_ops,
201
+ model_ops.ModelOperator(
202
+ self._model_ops._session,
203
+ database_name=database_name_id or self._database_name,
204
+ schema_name=schema_name_id or self._schema_name,
205
+ ),
125
206
  model_name=model_name_id,
126
207
  )
127
208
  else:
@@ -133,6 +214,8 @@ class ModelManager:
133
214
  statement_params: Optional[Dict[str, Any]] = None,
134
215
  ) -> List[model_impl.Model]:
135
216
  model_names = self._model_ops.list_models_or_versions(
217
+ database_name=None,
218
+ schema_name=None,
136
219
  statement_params=statement_params,
137
220
  )
138
221
  return [
@@ -149,6 +232,8 @@ class ModelManager:
149
232
  statement_params: Optional[Dict[str, Any]] = None,
150
233
  ) -> pd.DataFrame:
151
234
  rows = self._model_ops.show_models_or_versions(
235
+ database_name=None,
236
+ schema_name=None,
152
237
  statement_params=statement_params,
153
238
  )
154
239
  return pd.DataFrame([row.as_dict() for row in rows])
@@ -159,9 +244,11 @@ class ModelManager:
159
244
  *,
160
245
  statement_params: Optional[Dict[str, Any]] = None,
161
246
  ) -> None:
162
- model_name_id = sql_identifier.SqlIdentifier(model_name)
247
+ database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
163
248
 
164
249
  self._model_ops.delete_model_or_version(
250
+ database_name=database_name_id,
251
+ schema_name=schema_name_id,
165
252
  model_name=model_name_id,
166
253
  statement_params=statement_params,
167
254
  )
@@ -71,6 +71,16 @@ class Registry:
71
71
  @telemetry.send_api_usage_telemetry(
72
72
  project=_TELEMETRY_PROJECT,
73
73
  subproject=_MODEL_TELEMETRY_SUBPROJECT,
74
+ func_params_to_log=[
75
+ "model_name",
76
+ "version_name",
77
+ "comment",
78
+ "metrics",
79
+ "conda_dependencies",
80
+ "pip_requirements",
81
+ "python_version",
82
+ "signatures",
83
+ ],
74
84
  )
75
85
  def log_model(
76
86
  self,
@@ -142,7 +152,6 @@ class Registry:
142
152
  Returns:
143
153
  ModelVersion: ModelVersion object corresponding to the model just logged.
144
154
  """
145
-
146
155
  statement_params = telemetry.get_statement_params(
147
156
  project=_TELEMETRY_PROJECT,
148
157
  subproject=_MODEL_TELEMETRY_SUBPROJECT,
snowflake/ml/version.py CHANGED
@@ -1 +1 @@
1
- VERSION="1.5.0"
1
+ VERSION="1.5.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: snowflake-ml-python
3
- Version: 1.5.0
3
+ Version: 1.5.2
4
4
  Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
5
5
  Author-email: "Snowflake, Inc" <support@snowflake.com>
6
6
  License:
@@ -236,7 +236,6 @@ License-File: LICENSE.txt
236
236
  Requires-Dist: absl-py <2,>=0.15
237
237
  Requires-Dist: anyio <4,>=3.5.0
238
238
  Requires-Dist: cachetools <6,>=3.1.1
239
- Requires-Dist: catboost <1.3,>=1.2.0
240
239
  Requires-Dist: cloudpickle >=2.0.0
241
240
  Requires-Dist: fsspec[http] <2024,>=2022.11
242
241
  Requires-Dist: importlib-resources <7,>=6.1.1
@@ -256,19 +255,22 @@ Requires-Dist: sqlparse <1,>=0.4
256
255
  Requires-Dist: typing-extensions <5,>=4.1.0
257
256
  Requires-Dist: xgboost <2,>=1.7.3
258
257
  Provides-Extra: all
259
- Requires-Dist: lightgbm <4.2,>=3.3.5 ; extra == 'all'
258
+ Requires-Dist: catboost <2,>=1.2.0 ; extra == 'all'
259
+ Requires-Dist: lightgbm <5,>=3.3.5 ; extra == 'all'
260
260
  Requires-Dist: mlflow <2.4,>=2.1.0 ; extra == 'all'
261
261
  Requires-Dist: peft <1,>=0.5.0 ; extra == 'all'
262
262
  Requires-Dist: sentence-transformers <3,>=2.2.2 ; extra == 'all'
263
- Requires-Dist: sentencepiece <0.2,>=0.1.95 ; extra == 'all'
263
+ Requires-Dist: sentencepiece <1,>=0.1.95 ; extra == 'all'
264
264
  Requires-Dist: shap ==0.42.1 ; extra == 'all'
265
265
  Requires-Dist: tensorflow <3,>=2.10 ; extra == 'all'
266
266
  Requires-Dist: tokenizers <1,>=0.10 ; extra == 'all'
267
267
  Requires-Dist: torch <3,>=2.0.1 ; extra == 'all'
268
268
  Requires-Dist: torchdata <1,>=0.4 ; extra == 'all'
269
269
  Requires-Dist: transformers <5,>=4.32.1 ; extra == 'all'
270
+ Provides-Extra: catboost
271
+ Requires-Dist: catboost <2,>=1.2.0 ; extra == 'catboost'
270
272
  Provides-Extra: lightgbm
271
- Requires-Dist: lightgbm <4.2,>=3.3.5 ; extra == 'lightgbm'
273
+ Requires-Dist: lightgbm <5,>=3.3.5 ; extra == 'lightgbm'
272
274
  Provides-Extra: llm
273
275
  Requires-Dist: peft <1,>=0.5.0 ; extra == 'llm'
274
276
  Provides-Extra: mlflow
@@ -282,7 +284,7 @@ Requires-Dist: torch <3,>=2.0.1 ; extra == 'torch'
282
284
  Requires-Dist: torchdata <1,>=0.4 ; extra == 'torch'
283
285
  Provides-Extra: transformers
284
286
  Requires-Dist: sentence-transformers <3,>=2.2.2 ; extra == 'transformers'
285
- Requires-Dist: sentencepiece <0.2,>=0.1.95 ; extra == 'transformers'
287
+ Requires-Dist: sentencepiece <1,>=0.1.95 ; extra == 'transformers'
286
288
  Requires-Dist: tokenizers <1,>=0.10 ; extra == 'transformers'
287
289
  Requires-Dist: transformers <5,>=4.32.1 ; extra == 'transformers'
288
290
 
@@ -371,6 +373,43 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
371
373
 
372
374
  # Release History
373
375
 
376
+ ## 1.5.2
377
+
378
+ ### Bug Fixes
379
+
380
+ - Registry: Fix an issue that leads to unable to log model in store procedure.
381
+ - Modeling: Quick fix `import snowflake.ml.modeling.parameters.enable_anonymous_sproc` cannot be imported due to package
382
+ dependency error.
383
+
384
+ ### Behavior Changes
385
+
386
+ ### New Features
387
+
388
+ ## 1.5.1
389
+
390
+ ### Bug Fixes
391
+
392
+ - Dataset: Fix `snowflake.connector.errors.DataError: Query Result did not match expected number of rows` when accessing
393
+ DatasetVersion properties when case insensitive `SHOW VERSIONS IN DATASET` check matches multiple version names.
394
+ - Dataset: Fix bug in SnowFS bulk file read when used with DuckDB
395
+ - Registry: Fixed a bug when loading old models.
396
+ - Lineage: Fix Dataset source lineage propagation through `snowpark.DataFrame` transformations
397
+
398
+ ### Behavior Changes
399
+
400
+ - Feature Store: convert clear() into a private function. Also make it deletes feature views and entities only.
401
+ - Feature Store: Use NULL as default value for timestamp tag value.
402
+
403
+ ### New Features
404
+
405
+ - Feature Store: Added new `snowflake.ml.feature_store.setup_feature_store()` API to assist Feature Store RBAC setup.
406
+ - Feature Store: Add `output_type` argument to `FeatureStore.generate_dataset()` to allow generating data snapshots
407
+ as Datasets or Tables.
408
+ - Registry: `log_model`, `get_model`, `delete_model` now supports fully qualified name.
409
+ - Modeling: Supports anonymous stored procedure during fit calls so that modeling would not require sufficient
410
+ permissions to operate on schema. Please call
411
+ `import snowflake.ml.modeling.parameters.enable_anonymous_sproc # noqa: F401`
412
+
374
413
  ## 1.5.0
375
414
 
376
415
  ### Bug Fixes
@@ -411,12 +450,19 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
411
450
 
412
451
  #### Feature Store (PrPr)
413
452
 
414
- `FeatureStore.generate_dataset` argument list has been changed to match the new
453
+ - `FeatureStore.generate_dataset` argument list has been changed to match the new
415
454
  `snowflake.ml.dataset.Dataset` definition
416
455
 
417
- - `materialized_table` has been removed and replaced with `name` and `version`.
418
- - `name` moved to first positional argument
419
- - `save_mode` has been removed as `merge` behavior is no longer supported. The new behavior is always `errorifexists`.
456
+ - `materialized_table` has been removed and replaced with `name` and `version`.
457
+ - `name` moved to first positional argument
458
+ - `save_mode` has been removed as `merge` behavior is no longer supported. The new behavior is always `errorifexists`.
459
+
460
+ - Change feature view version type from str to `FeatureViewVersion`. It is a restricted string literal.
461
+
462
+ - Remove as_dataframe arg from FeatureStore.list_feature_views(), now always returns result as DataFrame.
463
+
464
+ - Combines few metadata tags into a new tag: SNOWML_FEATURE_VIEW_METADATA. This will make previously created feature views
465
+ not readable by new SDK.
420
466
 
421
467
  ### New Features
422
468
 
@@ -432,6 +478,10 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
432
478
  and `Dataset.read.to_tf_dataset()` respectively.
433
479
  - Added `fsspec` style file integration using `Dataset.read.files()` and `Dataset.read.filesystem()`
434
480
 
481
+ #### Feature Store
482
+
483
+ - use new tag_reference_internal to speed up metadata lookup.
484
+
435
485
  ## 1.4.1 (2024-04-18)
436
486
 
437
487
  ### New Features
@@ -443,6 +493,10 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
443
493
 
444
494
  - Registry: Fix a bug that leads to relax_version option is not working.
445
495
 
496
+ ### Behavior changes
497
+
498
+ - Feature Store: update_feature_view takes refresh_freq and warehouse as argument.
499
+
446
500
  ## 1.4.0 (2024-04-08)
447
501
 
448
502
  ### Bug Fixes
@@ -464,6 +518,8 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
464
518
 
465
519
  - Registry: `apply` method is no longer by default logged when logging a xgboost model. If that is required, it could
466
520
  be specified manually when logging the model by `log_model(..., options={"target_methods": ["apply", ...]})`.
521
+ - Feature Store: register_entity returns an entity object.
522
+ - Feature Store: register_feature_view `block=true` becomes default.
467
523
 
468
524
  ### New Features
469
525