snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -298,7 +300,6 @@ class BayesianGaussianMixture(BaseTransformer):
298
300
  sample_weight_col: Optional[str] = None,
299
301
  ) -> None:
300
302
  super().__init__()
301
- self.id = str(uuid4()).replace("-", "_").upper()
302
303
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
303
304
 
304
305
  self._deps = list(deps)
@@ -334,6 +335,15 @@ class BayesianGaussianMixture(BaseTransformer):
334
335
  self.set_drop_input_cols(drop_input_cols)
335
336
  self.set_sample_weight_col(sample_weight_col)
336
337
 
338
+ def _get_rand_id(self) -> str:
339
+ """
340
+ Generate random id to be used in sproc and stage names.
341
+
342
+ Returns:
343
+ Random id string usable in sproc, table, and stage names.
344
+ """
345
+ return str(uuid4()).replace("-", "_").upper()
346
+
337
347
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
338
348
  """
339
349
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -412,7 +422,7 @@ class BayesianGaussianMixture(BaseTransformer):
412
422
  cp.dump(self._sklearn_object, local_transform_file)
413
423
 
414
424
  # Create temp stage to run fit.
415
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
425
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
416
426
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
417
427
  SqlResultValidator(
418
428
  session=session,
@@ -425,11 +435,12 @@ class BayesianGaussianMixture(BaseTransformer):
425
435
  expected_value=f"Stage area {transform_stage_name} successfully created."
426
436
  ).validate()
427
437
 
428
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
438
+ # Use posixpath to construct stage paths
439
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
440
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
429
441
  local_result_file_name = get_temp_file_path()
430
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
431
442
 
432
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
443
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
433
444
  statement_params = telemetry.get_function_usage_statement_params(
434
445
  project=_PROJECT,
435
446
  subproject=_SUBPROJECT,
@@ -455,6 +466,7 @@ class BayesianGaussianMixture(BaseTransformer):
455
466
  replace=True,
456
467
  session=session,
457
468
  statement_params=statement_params,
469
+ anonymous=True
458
470
  )
459
471
  def fit_wrapper_sproc(
460
472
  session: Session,
@@ -463,7 +475,8 @@ class BayesianGaussianMixture(BaseTransformer):
463
475
  stage_result_file_name: str,
464
476
  input_cols: List[str],
465
477
  label_cols: List[str],
466
- sample_weight_col: Optional[str]
478
+ sample_weight_col: Optional[str],
479
+ statement_params: Dict[str, str]
467
480
  ) -> str:
468
481
  import cloudpickle as cp
469
482
  import numpy as np
@@ -530,15 +543,15 @@ class BayesianGaussianMixture(BaseTransformer):
530
543
  api_calls=[Session.call],
531
544
  custom_tags=dict([("autogen", True)]),
532
545
  )
533
- sproc_export_file_name = session.call(
534
- fit_sproc_name,
546
+ sproc_export_file_name = fit_wrapper_sproc(
547
+ session,
535
548
  query,
536
549
  stage_transform_file_name,
537
550
  stage_result_file_name,
538
551
  identifier.get_unescaped_names(self.input_cols),
539
552
  identifier.get_unescaped_names(self.label_cols),
540
553
  identifier.get_unescaped_names(self.sample_weight_col),
541
- statement_params=statement_params,
554
+ statement_params,
542
555
  )
543
556
 
544
557
  if "|" in sproc_export_file_name:
@@ -548,7 +561,7 @@ class BayesianGaussianMixture(BaseTransformer):
548
561
  print("\n".join(fields[1:]))
549
562
 
550
563
  session.file.get(
551
- os.path.join(stage_result_file_name, sproc_export_file_name),
564
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
552
565
  local_result_file_name,
553
566
  statement_params=statement_params
554
567
  )
@@ -594,7 +607,7 @@ class BayesianGaussianMixture(BaseTransformer):
594
607
 
595
608
  # Register vectorized UDF for batch inference
596
609
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
597
- safe_id=self.id, method=inference_method)
610
+ safe_id=self._get_rand_id(), method=inference_method)
598
611
 
599
612
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
600
613
  # will try to pickle all of self which fails.
@@ -686,7 +699,7 @@ class BayesianGaussianMixture(BaseTransformer):
686
699
  return transformed_pandas_df.to_dict("records")
687
700
 
688
701
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
689
- safe_id=self.id
702
+ safe_id=self._get_rand_id()
690
703
  )
691
704
 
692
705
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -853,11 +866,18 @@ class BayesianGaussianMixture(BaseTransformer):
853
866
  Transformed dataset.
854
867
  """
855
868
  if isinstance(dataset, DataFrame):
869
+ expected_type_inferred = ""
870
+ # when it is classifier, infer the datatype from label columns
871
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
872
+ expected_type_inferred = convert_sp_to_sf_type(
873
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
874
+ )
875
+
856
876
  output_df = self._batch_inference(
857
877
  dataset=dataset,
858
878
  inference_method="predict",
859
879
  expected_output_cols_list=self.output_cols,
860
- expected_output_cols_type="",
880
+ expected_output_cols_type=expected_type_inferred,
861
881
  )
862
882
  elif isinstance(dataset, pd.DataFrame):
863
883
  output_df = self._sklearn_inference(
@@ -928,10 +948,10 @@ class BayesianGaussianMixture(BaseTransformer):
928
948
 
929
949
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
930
950
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
931
- Returns an empty list if current object is not a classifier or not yet fitted.
951
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
932
952
  """
933
953
  if getattr(self._sklearn_object, "classes_", None) is None:
934
- return []
954
+ return [output_cols_prefix]
935
955
 
936
956
  classes = self._sklearn_object.classes_
937
957
  if isinstance(classes, numpy.ndarray):
@@ -1160,7 +1180,7 @@ class BayesianGaussianMixture(BaseTransformer):
1160
1180
  cp.dump(self._sklearn_object, local_score_file)
1161
1181
 
1162
1182
  # Create temp stage to run score.
1163
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1183
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1164
1184
  session = dataset._session
1165
1185
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1166
1186
  SqlResultValidator(
@@ -1174,8 +1194,9 @@ class BayesianGaussianMixture(BaseTransformer):
1174
1194
  expected_value=f"Stage area {score_stage_name} successfully created."
1175
1195
  ).validate()
1176
1196
 
1177
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1178
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1197
+ # Use posixpath to construct stage paths
1198
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1199
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1179
1200
  statement_params = telemetry.get_function_usage_statement_params(
1180
1201
  project=_PROJECT,
1181
1202
  subproject=_SUBPROJECT,
@@ -1201,6 +1222,7 @@ class BayesianGaussianMixture(BaseTransformer):
1201
1222
  replace=True,
1202
1223
  session=session,
1203
1224
  statement_params=statement_params,
1225
+ anonymous=True
1204
1226
  )
1205
1227
  def score_wrapper_sproc(
1206
1228
  session: Session,
@@ -1208,7 +1230,8 @@ class BayesianGaussianMixture(BaseTransformer):
1208
1230
  stage_score_file_name: str,
1209
1231
  input_cols: List[str],
1210
1232
  label_cols: List[str],
1211
- sample_weight_col: Optional[str]
1233
+ sample_weight_col: Optional[str],
1234
+ statement_params: Dict[str, str]
1212
1235
  ) -> float:
1213
1236
  import cloudpickle as cp
1214
1237
  import numpy as np
@@ -1258,14 +1281,14 @@ class BayesianGaussianMixture(BaseTransformer):
1258
1281
  api_calls=[Session.call],
1259
1282
  custom_tags=dict([("autogen", True)]),
1260
1283
  )
1261
- score = session.call(
1262
- score_sproc_name,
1284
+ score = score_wrapper_sproc(
1285
+ session,
1263
1286
  query,
1264
1287
  stage_score_file_name,
1265
1288
  identifier.get_unescaped_names(self.input_cols),
1266
1289
  identifier.get_unescaped_names(self.label_cols),
1267
1290
  identifier.get_unescaped_names(self.sample_weight_col),
1268
- statement_params=statement_params,
1291
+ statement_params,
1269
1292
  )
1270
1293
 
1271
1294
  cleanup_temp_files([local_score_file_name])
@@ -1283,18 +1306,20 @@ class BayesianGaussianMixture(BaseTransformer):
1283
1306
  if self._sklearn_object._estimator_type == 'classifier':
1284
1307
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1285
1308
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1286
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1309
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1310
+ ([] if self._drop_input_cols else inputs) + outputs)
1287
1311
  # For regressor, the type of predict is float64
1288
1312
  elif self._sklearn_object._estimator_type == 'regressor':
1289
1313
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1290
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1291
-
1314
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1315
+ ([] if self._drop_input_cols else inputs) + outputs)
1292
1316
  for prob_func in PROB_FUNCTIONS:
1293
1317
  if hasattr(self, prob_func):
1294
1318
  output_cols_prefix: str = f"{prob_func}_"
1295
1319
  output_column_names = self._get_output_column_names(output_cols_prefix)
1296
1320
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1297
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1321
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1322
+ ([] if self._drop_input_cols else inputs) + outputs)
1298
1323
 
1299
1324
  @property
1300
1325
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -274,7 +276,6 @@ class GaussianMixture(BaseTransformer):
274
276
  sample_weight_col: Optional[str] = None,
275
277
  ) -> None:
276
278
  super().__init__()
277
- self.id = str(uuid4()).replace("-", "_").upper()
278
279
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
279
280
 
280
281
  self._deps = list(deps)
@@ -307,6 +308,15 @@ class GaussianMixture(BaseTransformer):
307
308
  self.set_drop_input_cols(drop_input_cols)
308
309
  self.set_sample_weight_col(sample_weight_col)
309
310
 
311
+ def _get_rand_id(self) -> str:
312
+ """
313
+ Generate random id to be used in sproc and stage names.
314
+
315
+ Returns:
316
+ Random id string usable in sproc, table, and stage names.
317
+ """
318
+ return str(uuid4()).replace("-", "_").upper()
319
+
310
320
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
311
321
  """
312
322
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -385,7 +395,7 @@ class GaussianMixture(BaseTransformer):
385
395
  cp.dump(self._sklearn_object, local_transform_file)
386
396
 
387
397
  # Create temp stage to run fit.
388
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
398
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
389
399
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
390
400
  SqlResultValidator(
391
401
  session=session,
@@ -398,11 +408,12 @@ class GaussianMixture(BaseTransformer):
398
408
  expected_value=f"Stage area {transform_stage_name} successfully created."
399
409
  ).validate()
400
410
 
401
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
411
+ # Use posixpath to construct stage paths
412
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
413
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
402
414
  local_result_file_name = get_temp_file_path()
403
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
404
415
 
405
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
416
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
406
417
  statement_params = telemetry.get_function_usage_statement_params(
407
418
  project=_PROJECT,
408
419
  subproject=_SUBPROJECT,
@@ -428,6 +439,7 @@ class GaussianMixture(BaseTransformer):
428
439
  replace=True,
429
440
  session=session,
430
441
  statement_params=statement_params,
442
+ anonymous=True
431
443
  )
432
444
  def fit_wrapper_sproc(
433
445
  session: Session,
@@ -436,7 +448,8 @@ class GaussianMixture(BaseTransformer):
436
448
  stage_result_file_name: str,
437
449
  input_cols: List[str],
438
450
  label_cols: List[str],
439
- sample_weight_col: Optional[str]
451
+ sample_weight_col: Optional[str],
452
+ statement_params: Dict[str, str]
440
453
  ) -> str:
441
454
  import cloudpickle as cp
442
455
  import numpy as np
@@ -503,15 +516,15 @@ class GaussianMixture(BaseTransformer):
503
516
  api_calls=[Session.call],
504
517
  custom_tags=dict([("autogen", True)]),
505
518
  )
506
- sproc_export_file_name = session.call(
507
- fit_sproc_name,
519
+ sproc_export_file_name = fit_wrapper_sproc(
520
+ session,
508
521
  query,
509
522
  stage_transform_file_name,
510
523
  stage_result_file_name,
511
524
  identifier.get_unescaped_names(self.input_cols),
512
525
  identifier.get_unescaped_names(self.label_cols),
513
526
  identifier.get_unescaped_names(self.sample_weight_col),
514
- statement_params=statement_params,
527
+ statement_params,
515
528
  )
516
529
 
517
530
  if "|" in sproc_export_file_name:
@@ -521,7 +534,7 @@ class GaussianMixture(BaseTransformer):
521
534
  print("\n".join(fields[1:]))
522
535
 
523
536
  session.file.get(
524
- os.path.join(stage_result_file_name, sproc_export_file_name),
537
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
525
538
  local_result_file_name,
526
539
  statement_params=statement_params
527
540
  )
@@ -567,7 +580,7 @@ class GaussianMixture(BaseTransformer):
567
580
 
568
581
  # Register vectorized UDF for batch inference
569
582
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
570
- safe_id=self.id, method=inference_method)
583
+ safe_id=self._get_rand_id(), method=inference_method)
571
584
 
572
585
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
573
586
  # will try to pickle all of self which fails.
@@ -659,7 +672,7 @@ class GaussianMixture(BaseTransformer):
659
672
  return transformed_pandas_df.to_dict("records")
660
673
 
661
674
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
662
- safe_id=self.id
675
+ safe_id=self._get_rand_id()
663
676
  )
664
677
 
665
678
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -826,11 +839,18 @@ class GaussianMixture(BaseTransformer):
826
839
  Transformed dataset.
827
840
  """
828
841
  if isinstance(dataset, DataFrame):
842
+ expected_type_inferred = ""
843
+ # when it is classifier, infer the datatype from label columns
844
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
845
+ expected_type_inferred = convert_sp_to_sf_type(
846
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
847
+ )
848
+
829
849
  output_df = self._batch_inference(
830
850
  dataset=dataset,
831
851
  inference_method="predict",
832
852
  expected_output_cols_list=self.output_cols,
833
- expected_output_cols_type="",
853
+ expected_output_cols_type=expected_type_inferred,
834
854
  )
835
855
  elif isinstance(dataset, pd.DataFrame):
836
856
  output_df = self._sklearn_inference(
@@ -901,10 +921,10 @@ class GaussianMixture(BaseTransformer):
901
921
 
902
922
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
903
923
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
904
- Returns an empty list if current object is not a classifier or not yet fitted.
924
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
905
925
  """
906
926
  if getattr(self._sklearn_object, "classes_", None) is None:
907
- return []
927
+ return [output_cols_prefix]
908
928
 
909
929
  classes = self._sklearn_object.classes_
910
930
  if isinstance(classes, numpy.ndarray):
@@ -1133,7 +1153,7 @@ class GaussianMixture(BaseTransformer):
1133
1153
  cp.dump(self._sklearn_object, local_score_file)
1134
1154
 
1135
1155
  # Create temp stage to run score.
1136
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1156
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1137
1157
  session = dataset._session
1138
1158
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1139
1159
  SqlResultValidator(
@@ -1147,8 +1167,9 @@ class GaussianMixture(BaseTransformer):
1147
1167
  expected_value=f"Stage area {score_stage_name} successfully created."
1148
1168
  ).validate()
1149
1169
 
1150
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1151
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1170
+ # Use posixpath to construct stage paths
1171
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1172
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1152
1173
  statement_params = telemetry.get_function_usage_statement_params(
1153
1174
  project=_PROJECT,
1154
1175
  subproject=_SUBPROJECT,
@@ -1174,6 +1195,7 @@ class GaussianMixture(BaseTransformer):
1174
1195
  replace=True,
1175
1196
  session=session,
1176
1197
  statement_params=statement_params,
1198
+ anonymous=True
1177
1199
  )
1178
1200
  def score_wrapper_sproc(
1179
1201
  session: Session,
@@ -1181,7 +1203,8 @@ class GaussianMixture(BaseTransformer):
1181
1203
  stage_score_file_name: str,
1182
1204
  input_cols: List[str],
1183
1205
  label_cols: List[str],
1184
- sample_weight_col: Optional[str]
1206
+ sample_weight_col: Optional[str],
1207
+ statement_params: Dict[str, str]
1185
1208
  ) -> float:
1186
1209
  import cloudpickle as cp
1187
1210
  import numpy as np
@@ -1231,14 +1254,14 @@ class GaussianMixture(BaseTransformer):
1231
1254
  api_calls=[Session.call],
1232
1255
  custom_tags=dict([("autogen", True)]),
1233
1256
  )
1234
- score = session.call(
1235
- score_sproc_name,
1257
+ score = score_wrapper_sproc(
1258
+ session,
1236
1259
  query,
1237
1260
  stage_score_file_name,
1238
1261
  identifier.get_unescaped_names(self.input_cols),
1239
1262
  identifier.get_unescaped_names(self.label_cols),
1240
1263
  identifier.get_unescaped_names(self.sample_weight_col),
1241
- statement_params=statement_params,
1264
+ statement_params,
1242
1265
  )
1243
1266
 
1244
1267
  cleanup_temp_files([local_score_file_name])
@@ -1256,18 +1279,20 @@ class GaussianMixture(BaseTransformer):
1256
1279
  if self._sklearn_object._estimator_type == 'classifier':
1257
1280
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1258
1281
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1259
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1282
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1283
+ ([] if self._drop_input_cols else inputs) + outputs)
1260
1284
  # For regressor, the type of predict is float64
1261
1285
  elif self._sklearn_object._estimator_type == 'regressor':
1262
1286
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1263
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1264
-
1287
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1288
+ ([] if self._drop_input_cols else inputs) + outputs)
1265
1289
  for prob_func in PROB_FUNCTIONS:
1266
1290
  if hasattr(self, prob_func):
1267
1291
  output_cols_prefix: str = f"{prob_func}_"
1268
1292
  output_column_names = self._get_output_column_names(output_cols_prefix)
1269
1293
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1270
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1294
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1295
+ ([] if self._drop_input_cols else inputs) + outputs)
1271
1296
 
1272
1297
  @property
1273
1298
  def model_signatures(self) -> Dict[str, ModelSignature]: