snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -487,18 +487,24 @@ class XGBRFClassifier(BaseTransformer):
487
487
  self._get_model_signatures(dataset)
488
488
  return self
489
489
 
490
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
491
- if self._drop_input_cols:
492
- return []
493
- else:
494
- return list(set(dataset.columns) - set(self.output_cols))
495
-
496
490
  def _batch_inference_validate_snowpark(
497
491
  self,
498
492
  dataset: DataFrame,
499
493
  inference_method: str,
500
494
  ) -> List[str]:
501
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
495
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
496
+ return the available package that exists in the snowflake anaconda channel
497
+
498
+ Args:
499
+ dataset: snowpark dataframe
500
+ inference_method: the inference method such as predict, score...
501
+
502
+ Raises:
503
+ SnowflakeMLException: If the estimator is not fitted, raise error
504
+ SnowflakeMLException: If the session is None, raise error
505
+
506
+ Returns:
507
+ A list of available package that exists in the snowflake anaconda channel
502
508
  """
503
509
  if not self._is_fitted:
504
510
  raise exceptions.SnowflakeMLException(
@@ -572,7 +578,7 @@ class XGBRFClassifier(BaseTransformer):
572
578
  transform_kwargs = dict(
573
579
  session = dataset._session,
574
580
  dependencies = self._deps,
575
- pass_through_cols = self._get_pass_through_columns(dataset),
581
+ drop_input_cols = self._drop_input_cols,
576
582
  expected_output_cols_type = expected_type_inferred,
577
583
  )
578
584
 
@@ -632,16 +638,16 @@ class XGBRFClassifier(BaseTransformer):
632
638
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
633
639
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
634
640
  # each row containing a list of values.
635
- expected_dtype = "ARRAY"
641
+ expected_dtype = "array"
636
642
 
637
643
  # If we were unable to assign a type to this transform in the factory, infer the type here.
638
644
  if expected_dtype == "":
639
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
645
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
640
646
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
641
- expected_dtype = "ARRAY"
642
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
647
+ expected_dtype = "array"
648
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
643
649
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
644
- expected_dtype = "ARRAY"
650
+ expected_dtype = "array"
645
651
  else:
646
652
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
647
653
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -659,7 +665,7 @@ class XGBRFClassifier(BaseTransformer):
659
665
  transform_kwargs = dict(
660
666
  session = dataset._session,
661
667
  dependencies = self._deps,
662
- pass_through_cols = self._get_pass_through_columns(dataset),
668
+ drop_input_cols = self._drop_input_cols,
663
669
  expected_output_cols_type = expected_dtype,
664
670
  )
665
671
 
@@ -710,7 +716,7 @@ class XGBRFClassifier(BaseTransformer):
710
716
  subproject=_SUBPROJECT,
711
717
  )
712
718
  output_result, fitted_estimator = model_trainer.train_fit_predict(
713
- pass_through_columns=self._get_pass_through_columns(dataset),
719
+ drop_input_cols=self._drop_input_cols,
714
720
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
715
721
  )
716
722
  self._sklearn_object = fitted_estimator
@@ -728,44 +734,6 @@ class XGBRFClassifier(BaseTransformer):
728
734
  assert self._sklearn_object is not None
729
735
  return self._sklearn_object.embedding_
730
736
 
731
-
732
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
733
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
734
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
735
- """
736
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
737
- if output_cols:
738
- output_cols = [
739
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
740
- for c in output_cols
741
- ]
742
- elif getattr(self._sklearn_object, "classes_", None) is None:
743
- output_cols = [output_cols_prefix]
744
- elif self._sklearn_object is not None:
745
- classes = self._sklearn_object.classes_
746
- if isinstance(classes, numpy.ndarray):
747
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
748
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
749
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
750
- output_cols = []
751
- for i, cl in enumerate(classes):
752
- # For binary classification, there is only one output column for each class
753
- # ndarray as the two classes are complementary.
754
- if len(cl) == 2:
755
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
756
- else:
757
- output_cols.extend([
758
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
759
- ])
760
- else:
761
- output_cols = []
762
-
763
- # Make sure column names are valid snowflake identifiers.
764
- assert output_cols is not None # Make MyPy happy
765
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
766
-
767
- return rv
768
-
769
737
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
770
738
  @telemetry.send_api_usage_telemetry(
771
739
  project=_PROJECT,
@@ -807,7 +775,7 @@ class XGBRFClassifier(BaseTransformer):
807
775
  transform_kwargs = dict(
808
776
  session=dataset._session,
809
777
  dependencies=self._deps,
810
- pass_through_cols=self._get_pass_through_columns(dataset),
778
+ drop_input_cols = self._drop_input_cols,
811
779
  expected_output_cols_type="float",
812
780
  )
813
781
 
@@ -874,7 +842,7 @@ class XGBRFClassifier(BaseTransformer):
874
842
  transform_kwargs = dict(
875
843
  session=dataset._session,
876
844
  dependencies=self._deps,
877
- pass_through_cols=self._get_pass_through_columns(dataset),
845
+ drop_input_cols = self._drop_input_cols,
878
846
  expected_output_cols_type="float",
879
847
  )
880
848
  elif isinstance(dataset, pd.DataFrame):
@@ -935,7 +903,7 @@ class XGBRFClassifier(BaseTransformer):
935
903
  transform_kwargs = dict(
936
904
  session=dataset._session,
937
905
  dependencies=self._deps,
938
- pass_through_cols=self._get_pass_through_columns(dataset),
906
+ drop_input_cols = self._drop_input_cols,
939
907
  expected_output_cols_type="float",
940
908
  )
941
909
 
@@ -1000,7 +968,7 @@ class XGBRFClassifier(BaseTransformer):
1000
968
  transform_kwargs = dict(
1001
969
  session=dataset._session,
1002
970
  dependencies=self._deps,
1003
- pass_through_cols=self._get_pass_through_columns(dataset),
971
+ drop_input_cols = self._drop_input_cols,
1004
972
  expected_output_cols_type="float",
1005
973
  )
1006
974
 
@@ -1056,13 +1024,17 @@ class XGBRFClassifier(BaseTransformer):
1056
1024
  transform_kwargs: ScoreKwargsTypedDict = dict()
1057
1025
 
1058
1026
  if isinstance(dataset, DataFrame):
1027
+ self._deps = self._batch_inference_validate_snowpark(
1028
+ dataset=dataset,
1029
+ inference_method="score",
1030
+ )
1059
1031
  selected_cols = self._get_active_columns()
1060
1032
  if len(selected_cols) > 0:
1061
1033
  dataset = dataset.select(selected_cols)
1062
1034
  assert isinstance(dataset._session, Session) # keep mypy happy
1063
1035
  transform_kwargs = dict(
1064
1036
  session=dataset._session,
1065
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
1037
+ dependencies=["snowflake-snowpark-python"] + self._deps,
1066
1038
  score_sproc_imports=['xgboost'],
1067
1039
  )
1068
1040
  elif isinstance(dataset, pd.DataFrame):
@@ -1136,9 +1108,9 @@ class XGBRFClassifier(BaseTransformer):
1136
1108
  transform_kwargs = dict(
1137
1109
  session = dataset._session,
1138
1110
  dependencies = self._deps,
1139
- pass_through_cols = self._get_pass_through_columns(dataset),
1140
- expected_output_cols_type = "array",
1141
- n_neighbors = n_neighbors,
1111
+ drop_input_cols = self._drop_input_cols,
1112
+ expected_output_cols_type="array",
1113
+ n_neighbors = n_neighbors,
1142
1114
  return_distance = return_distance
1143
1115
  )
1144
1116
  elif isinstance(dataset, pd.DataFrame):
@@ -487,18 +487,24 @@ class XGBRFRegressor(BaseTransformer):
487
487
  self._get_model_signatures(dataset)
488
488
  return self
489
489
 
490
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
491
- if self._drop_input_cols:
492
- return []
493
- else:
494
- return list(set(dataset.columns) - set(self.output_cols))
495
-
496
490
  def _batch_inference_validate_snowpark(
497
491
  self,
498
492
  dataset: DataFrame,
499
493
  inference_method: str,
500
494
  ) -> List[str]:
501
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
495
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
496
+ return the available package that exists in the snowflake anaconda channel
497
+
498
+ Args:
499
+ dataset: snowpark dataframe
500
+ inference_method: the inference method such as predict, score...
501
+
502
+ Raises:
503
+ SnowflakeMLException: If the estimator is not fitted, raise error
504
+ SnowflakeMLException: If the session is None, raise error
505
+
506
+ Returns:
507
+ A list of available package that exists in the snowflake anaconda channel
502
508
  """
503
509
  if not self._is_fitted:
504
510
  raise exceptions.SnowflakeMLException(
@@ -572,7 +578,7 @@ class XGBRFRegressor(BaseTransformer):
572
578
  transform_kwargs = dict(
573
579
  session = dataset._session,
574
580
  dependencies = self._deps,
575
- pass_through_cols = self._get_pass_through_columns(dataset),
581
+ drop_input_cols = self._drop_input_cols,
576
582
  expected_output_cols_type = expected_type_inferred,
577
583
  )
578
584
 
@@ -632,16 +638,16 @@ class XGBRFRegressor(BaseTransformer):
632
638
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
633
639
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
634
640
  # each row containing a list of values.
635
- expected_dtype = "ARRAY"
641
+ expected_dtype = "array"
636
642
 
637
643
  # If we were unable to assign a type to this transform in the factory, infer the type here.
638
644
  if expected_dtype == "":
639
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
645
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
640
646
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
641
- expected_dtype = "ARRAY"
642
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
647
+ expected_dtype = "array"
648
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
643
649
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
644
- expected_dtype = "ARRAY"
650
+ expected_dtype = "array"
645
651
  else:
646
652
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
647
653
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -659,7 +665,7 @@ class XGBRFRegressor(BaseTransformer):
659
665
  transform_kwargs = dict(
660
666
  session = dataset._session,
661
667
  dependencies = self._deps,
662
- pass_through_cols = self._get_pass_through_columns(dataset),
668
+ drop_input_cols = self._drop_input_cols,
663
669
  expected_output_cols_type = expected_dtype,
664
670
  )
665
671
 
@@ -710,7 +716,7 @@ class XGBRFRegressor(BaseTransformer):
710
716
  subproject=_SUBPROJECT,
711
717
  )
712
718
  output_result, fitted_estimator = model_trainer.train_fit_predict(
713
- pass_through_columns=self._get_pass_through_columns(dataset),
719
+ drop_input_cols=self._drop_input_cols,
714
720
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
715
721
  )
716
722
  self._sklearn_object = fitted_estimator
@@ -728,44 +734,6 @@ class XGBRFRegressor(BaseTransformer):
728
734
  assert self._sklearn_object is not None
729
735
  return self._sklearn_object.embedding_
730
736
 
731
-
732
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
733
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
734
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
735
- """
736
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
737
- if output_cols:
738
- output_cols = [
739
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
740
- for c in output_cols
741
- ]
742
- elif getattr(self._sklearn_object, "classes_", None) is None:
743
- output_cols = [output_cols_prefix]
744
- elif self._sklearn_object is not None:
745
- classes = self._sklearn_object.classes_
746
- if isinstance(classes, numpy.ndarray):
747
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
748
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
749
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
750
- output_cols = []
751
- for i, cl in enumerate(classes):
752
- # For binary classification, there is only one output column for each class
753
- # ndarray as the two classes are complementary.
754
- if len(cl) == 2:
755
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
756
- else:
757
- output_cols.extend([
758
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
759
- ])
760
- else:
761
- output_cols = []
762
-
763
- # Make sure column names are valid snowflake identifiers.
764
- assert output_cols is not None # Make MyPy happy
765
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
766
-
767
- return rv
768
-
769
737
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
770
738
  @telemetry.send_api_usage_telemetry(
771
739
  project=_PROJECT,
@@ -805,7 +773,7 @@ class XGBRFRegressor(BaseTransformer):
805
773
  transform_kwargs = dict(
806
774
  session=dataset._session,
807
775
  dependencies=self._deps,
808
- pass_through_cols=self._get_pass_through_columns(dataset),
776
+ drop_input_cols = self._drop_input_cols,
809
777
  expected_output_cols_type="float",
810
778
  )
811
779
 
@@ -870,7 +838,7 @@ class XGBRFRegressor(BaseTransformer):
870
838
  transform_kwargs = dict(
871
839
  session=dataset._session,
872
840
  dependencies=self._deps,
873
- pass_through_cols=self._get_pass_through_columns(dataset),
841
+ drop_input_cols = self._drop_input_cols,
874
842
  expected_output_cols_type="float",
875
843
  )
876
844
  elif isinstance(dataset, pd.DataFrame):
@@ -931,7 +899,7 @@ class XGBRFRegressor(BaseTransformer):
931
899
  transform_kwargs = dict(
932
900
  session=dataset._session,
933
901
  dependencies=self._deps,
934
- pass_through_cols=self._get_pass_through_columns(dataset),
902
+ drop_input_cols = self._drop_input_cols,
935
903
  expected_output_cols_type="float",
936
904
  )
937
905
 
@@ -996,7 +964,7 @@ class XGBRFRegressor(BaseTransformer):
996
964
  transform_kwargs = dict(
997
965
  session=dataset._session,
998
966
  dependencies=self._deps,
999
- pass_through_cols=self._get_pass_through_columns(dataset),
967
+ drop_input_cols = self._drop_input_cols,
1000
968
  expected_output_cols_type="float",
1001
969
  )
1002
970
 
@@ -1052,13 +1020,17 @@ class XGBRFRegressor(BaseTransformer):
1052
1020
  transform_kwargs: ScoreKwargsTypedDict = dict()
1053
1021
 
1054
1022
  if isinstance(dataset, DataFrame):
1023
+ self._deps = self._batch_inference_validate_snowpark(
1024
+ dataset=dataset,
1025
+ inference_method="score",
1026
+ )
1055
1027
  selected_cols = self._get_active_columns()
1056
1028
  if len(selected_cols) > 0:
1057
1029
  dataset = dataset.select(selected_cols)
1058
1030
  assert isinstance(dataset._session, Session) # keep mypy happy
1059
1031
  transform_kwargs = dict(
1060
1032
  session=dataset._session,
1061
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
1033
+ dependencies=["snowflake-snowpark-python"] + self._deps,
1062
1034
  score_sproc_imports=['xgboost'],
1063
1035
  )
1064
1036
  elif isinstance(dataset, pd.DataFrame):
@@ -1132,9 +1104,9 @@ class XGBRFRegressor(BaseTransformer):
1132
1104
  transform_kwargs = dict(
1133
1105
  session = dataset._session,
1134
1106
  dependencies = self._deps,
1135
- pass_through_cols = self._get_pass_through_columns(dataset),
1136
- expected_output_cols_type = "array",
1137
- n_neighbors = n_neighbors,
1107
+ drop_input_cols = self._drop_input_cols,
1108
+ expected_output_cols_type="array",
1109
+ n_neighbors = n_neighbors,
1138
1110
  return_distance = return_distance
1139
1111
  )
1140
1112
  elif isinstance(dataset, pd.DataFrame):
@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional
4
4
  import pandas as pd
5
5
  from absl.logging import logging
6
6
 
7
+ from snowflake.ml._internal.human_readable_id import hrid_generator
7
8
  from snowflake.ml._internal.utils import sql_identifier
8
9
  from snowflake.ml.model import model_signature, type_hints as model_types
9
10
  from snowflake.ml.model._client.model import model_impl, model_version_impl
@@ -27,13 +28,14 @@ class ModelManager:
27
28
  self._model_ops = model_ops.ModelOperator(
28
29
  session, database_name=self._database_name, schema_name=self._schema_name
29
30
  )
31
+ self._hrid_generator = hrid_generator.HRID16()
30
32
 
31
33
  def log_model(
32
34
  self,
33
35
  model: model_types.SupportedModelType,
34
36
  *,
35
37
  model_name: str,
36
- version_name: str,
38
+ version_name: Optional[str] = None,
37
39
  comment: Optional[str] = None,
38
40
  metrics: Optional[Dict[str, Any]] = None,
39
41
  conda_dependencies: Optional[List[str]] = None,
@@ -48,6 +50,8 @@ class ModelManager:
48
50
  ) -> model_version_impl.ModelVersion:
49
51
  model_name_id = sql_identifier.SqlIdentifier(model_name)
50
52
 
53
+ if not version_name:
54
+ version_name = self._hrid_generator.generate()[1]
51
55
  version_name_id = sql_identifier.SqlIdentifier(version_name)
52
56
 
53
57
  if self._model_ops.validate_existence(
@@ -68,7 +72,7 @@ class ModelManager:
68
72
  name=model_name_id.resolved(),
69
73
  model=model,
70
74
  signatures=signatures,
71
- sample_input=sample_input_data,
75
+ sample_input_data=sample_input_data,
72
76
  conda_dependencies=conda_dependencies,
73
77
  pip_requirements=pip_requirements,
74
78
  python_version=python_version,