snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -236,7 +238,6 @@ class MeanShift(BaseTransformer):
236
238
  sample_weight_col: Optional[str] = None,
237
239
  ) -> None:
238
240
  super().__init__()
239
- self.id = str(uuid4()).replace("-", "_").upper()
240
241
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
241
242
 
242
243
  self._deps = list(deps)
@@ -262,6 +263,15 @@ class MeanShift(BaseTransformer):
262
263
  self.set_drop_input_cols(drop_input_cols)
263
264
  self.set_sample_weight_col(sample_weight_col)
264
265
 
266
+ def _get_rand_id(self) -> str:
267
+ """
268
+ Generate random id to be used in sproc and stage names.
269
+
270
+ Returns:
271
+ Random id string usable in sproc, table, and stage names.
272
+ """
273
+ return str(uuid4()).replace("-", "_").upper()
274
+
265
275
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
266
276
  """
267
277
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -340,7 +350,7 @@ class MeanShift(BaseTransformer):
340
350
  cp.dump(self._sklearn_object, local_transform_file)
341
351
 
342
352
  # Create temp stage to run fit.
343
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
353
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
344
354
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
345
355
  SqlResultValidator(
346
356
  session=session,
@@ -353,11 +363,12 @@ class MeanShift(BaseTransformer):
353
363
  expected_value=f"Stage area {transform_stage_name} successfully created."
354
364
  ).validate()
355
365
 
356
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
366
+ # Use posixpath to construct stage paths
367
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
368
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
357
369
  local_result_file_name = get_temp_file_path()
358
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
359
370
 
360
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
371
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
361
372
  statement_params = telemetry.get_function_usage_statement_params(
362
373
  project=_PROJECT,
363
374
  subproject=_SUBPROJECT,
@@ -383,6 +394,7 @@ class MeanShift(BaseTransformer):
383
394
  replace=True,
384
395
  session=session,
385
396
  statement_params=statement_params,
397
+ anonymous=True
386
398
  )
387
399
  def fit_wrapper_sproc(
388
400
  session: Session,
@@ -391,7 +403,8 @@ class MeanShift(BaseTransformer):
391
403
  stage_result_file_name: str,
392
404
  input_cols: List[str],
393
405
  label_cols: List[str],
394
- sample_weight_col: Optional[str]
406
+ sample_weight_col: Optional[str],
407
+ statement_params: Dict[str, str]
395
408
  ) -> str:
396
409
  import cloudpickle as cp
397
410
  import numpy as np
@@ -458,15 +471,15 @@ class MeanShift(BaseTransformer):
458
471
  api_calls=[Session.call],
459
472
  custom_tags=dict([("autogen", True)]),
460
473
  )
461
- sproc_export_file_name = session.call(
462
- fit_sproc_name,
474
+ sproc_export_file_name = fit_wrapper_sproc(
475
+ session,
463
476
  query,
464
477
  stage_transform_file_name,
465
478
  stage_result_file_name,
466
479
  identifier.get_unescaped_names(self.input_cols),
467
480
  identifier.get_unescaped_names(self.label_cols),
468
481
  identifier.get_unescaped_names(self.sample_weight_col),
469
- statement_params=statement_params,
482
+ statement_params,
470
483
  )
471
484
 
472
485
  if "|" in sproc_export_file_name:
@@ -476,7 +489,7 @@ class MeanShift(BaseTransformer):
476
489
  print("\n".join(fields[1:]))
477
490
 
478
491
  session.file.get(
479
- os.path.join(stage_result_file_name, sproc_export_file_name),
492
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
480
493
  local_result_file_name,
481
494
  statement_params=statement_params
482
495
  )
@@ -522,7 +535,7 @@ class MeanShift(BaseTransformer):
522
535
 
523
536
  # Register vectorized UDF for batch inference
524
537
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
525
- safe_id=self.id, method=inference_method)
538
+ safe_id=self._get_rand_id(), method=inference_method)
526
539
 
527
540
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
528
541
  # will try to pickle all of self which fails.
@@ -614,7 +627,7 @@ class MeanShift(BaseTransformer):
614
627
  return transformed_pandas_df.to_dict("records")
615
628
 
616
629
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
617
- safe_id=self.id
630
+ safe_id=self._get_rand_id()
618
631
  )
619
632
 
620
633
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -781,11 +794,18 @@ class MeanShift(BaseTransformer):
781
794
  Transformed dataset.
782
795
  """
783
796
  if isinstance(dataset, DataFrame):
797
+ expected_type_inferred = ""
798
+ # when it is classifier, infer the datatype from label columns
799
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
800
+ expected_type_inferred = convert_sp_to_sf_type(
801
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
802
+ )
803
+
784
804
  output_df = self._batch_inference(
785
805
  dataset=dataset,
786
806
  inference_method="predict",
787
807
  expected_output_cols_list=self.output_cols,
788
- expected_output_cols_type="",
808
+ expected_output_cols_type=expected_type_inferred,
789
809
  )
790
810
  elif isinstance(dataset, pd.DataFrame):
791
811
  output_df = self._sklearn_inference(
@@ -856,10 +876,10 @@ class MeanShift(BaseTransformer):
856
876
 
857
877
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
858
878
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
859
- Returns an empty list if current object is not a classifier or not yet fitted.
879
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
860
880
  """
861
881
  if getattr(self._sklearn_object, "classes_", None) is None:
862
- return []
882
+ return [output_cols_prefix]
863
883
 
864
884
  classes = self._sklearn_object.classes_
865
885
  if isinstance(classes, numpy.ndarray):
@@ -1084,7 +1104,7 @@ class MeanShift(BaseTransformer):
1084
1104
  cp.dump(self._sklearn_object, local_score_file)
1085
1105
 
1086
1106
  # Create temp stage to run score.
1087
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1107
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1088
1108
  session = dataset._session
1089
1109
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1090
1110
  SqlResultValidator(
@@ -1098,8 +1118,9 @@ class MeanShift(BaseTransformer):
1098
1118
  expected_value=f"Stage area {score_stage_name} successfully created."
1099
1119
  ).validate()
1100
1120
 
1101
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1102
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1121
+ # Use posixpath to construct stage paths
1122
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1123
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1103
1124
  statement_params = telemetry.get_function_usage_statement_params(
1104
1125
  project=_PROJECT,
1105
1126
  subproject=_SUBPROJECT,
@@ -1125,6 +1146,7 @@ class MeanShift(BaseTransformer):
1125
1146
  replace=True,
1126
1147
  session=session,
1127
1148
  statement_params=statement_params,
1149
+ anonymous=True
1128
1150
  )
1129
1151
  def score_wrapper_sproc(
1130
1152
  session: Session,
@@ -1132,7 +1154,8 @@ class MeanShift(BaseTransformer):
1132
1154
  stage_score_file_name: str,
1133
1155
  input_cols: List[str],
1134
1156
  label_cols: List[str],
1135
- sample_weight_col: Optional[str]
1157
+ sample_weight_col: Optional[str],
1158
+ statement_params: Dict[str, str]
1136
1159
  ) -> float:
1137
1160
  import cloudpickle as cp
1138
1161
  import numpy as np
@@ -1182,14 +1205,14 @@ class MeanShift(BaseTransformer):
1182
1205
  api_calls=[Session.call],
1183
1206
  custom_tags=dict([("autogen", True)]),
1184
1207
  )
1185
- score = session.call(
1186
- score_sproc_name,
1208
+ score = score_wrapper_sproc(
1209
+ session,
1187
1210
  query,
1188
1211
  stage_score_file_name,
1189
1212
  identifier.get_unescaped_names(self.input_cols),
1190
1213
  identifier.get_unescaped_names(self.label_cols),
1191
1214
  identifier.get_unescaped_names(self.sample_weight_col),
1192
- statement_params=statement_params,
1215
+ statement_params,
1193
1216
  )
1194
1217
 
1195
1218
  cleanup_temp_files([local_score_file_name])
@@ -1207,18 +1230,20 @@ class MeanShift(BaseTransformer):
1207
1230
  if self._sklearn_object._estimator_type == 'classifier':
1208
1231
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1209
1232
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1210
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1233
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1234
+ ([] if self._drop_input_cols else inputs) + outputs)
1211
1235
  # For regressor, the type of predict is float64
1212
1236
  elif self._sklearn_object._estimator_type == 'regressor':
1213
1237
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1214
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1215
-
1238
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1239
+ ([] if self._drop_input_cols else inputs) + outputs)
1216
1240
  for prob_func in PROB_FUNCTIONS:
1217
1241
  if hasattr(self, prob_func):
1218
1242
  output_cols_prefix: str = f"{prob_func}_"
1219
1243
  output_column_names = self._get_output_column_names(output_cols_prefix)
1220
1244
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1221
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1245
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1246
+ ([] if self._drop_input_cols else inputs) + outputs)
1222
1247
 
1223
1248
  @property
1224
1249
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -281,7 +283,6 @@ class MiniBatchKMeans(BaseTransformer):
281
283
  sample_weight_col: Optional[str] = None,
282
284
  ) -> None:
283
285
  super().__init__()
284
- self.id = str(uuid4()).replace("-", "_").upper()
285
286
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
286
287
 
287
288
  self._deps = list(deps)
@@ -312,6 +313,15 @@ class MiniBatchKMeans(BaseTransformer):
312
313
  self.set_drop_input_cols(drop_input_cols)
313
314
  self.set_sample_weight_col(sample_weight_col)
314
315
 
316
+ def _get_rand_id(self) -> str:
317
+ """
318
+ Generate random id to be used in sproc and stage names.
319
+
320
+ Returns:
321
+ Random id string usable in sproc, table, and stage names.
322
+ """
323
+ return str(uuid4()).replace("-", "_").upper()
324
+
315
325
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
316
326
  """
317
327
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -390,7 +400,7 @@ class MiniBatchKMeans(BaseTransformer):
390
400
  cp.dump(self._sklearn_object, local_transform_file)
391
401
 
392
402
  # Create temp stage to run fit.
393
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
403
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
394
404
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
395
405
  SqlResultValidator(
396
406
  session=session,
@@ -403,11 +413,12 @@ class MiniBatchKMeans(BaseTransformer):
403
413
  expected_value=f"Stage area {transform_stage_name} successfully created."
404
414
  ).validate()
405
415
 
406
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
416
+ # Use posixpath to construct stage paths
417
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
418
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
407
419
  local_result_file_name = get_temp_file_path()
408
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
409
420
 
410
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
421
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
411
422
  statement_params = telemetry.get_function_usage_statement_params(
412
423
  project=_PROJECT,
413
424
  subproject=_SUBPROJECT,
@@ -433,6 +444,7 @@ class MiniBatchKMeans(BaseTransformer):
433
444
  replace=True,
434
445
  session=session,
435
446
  statement_params=statement_params,
447
+ anonymous=True
436
448
  )
437
449
  def fit_wrapper_sproc(
438
450
  session: Session,
@@ -441,7 +453,8 @@ class MiniBatchKMeans(BaseTransformer):
441
453
  stage_result_file_name: str,
442
454
  input_cols: List[str],
443
455
  label_cols: List[str],
444
- sample_weight_col: Optional[str]
456
+ sample_weight_col: Optional[str],
457
+ statement_params: Dict[str, str]
445
458
  ) -> str:
446
459
  import cloudpickle as cp
447
460
  import numpy as np
@@ -508,15 +521,15 @@ class MiniBatchKMeans(BaseTransformer):
508
521
  api_calls=[Session.call],
509
522
  custom_tags=dict([("autogen", True)]),
510
523
  )
511
- sproc_export_file_name = session.call(
512
- fit_sproc_name,
524
+ sproc_export_file_name = fit_wrapper_sproc(
525
+ session,
513
526
  query,
514
527
  stage_transform_file_name,
515
528
  stage_result_file_name,
516
529
  identifier.get_unescaped_names(self.input_cols),
517
530
  identifier.get_unescaped_names(self.label_cols),
518
531
  identifier.get_unescaped_names(self.sample_weight_col),
519
- statement_params=statement_params,
532
+ statement_params,
520
533
  )
521
534
 
522
535
  if "|" in sproc_export_file_name:
@@ -526,7 +539,7 @@ class MiniBatchKMeans(BaseTransformer):
526
539
  print("\n".join(fields[1:]))
527
540
 
528
541
  session.file.get(
529
- os.path.join(stage_result_file_name, sproc_export_file_name),
542
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
530
543
  local_result_file_name,
531
544
  statement_params=statement_params
532
545
  )
@@ -572,7 +585,7 @@ class MiniBatchKMeans(BaseTransformer):
572
585
 
573
586
  # Register vectorized UDF for batch inference
574
587
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
575
- safe_id=self.id, method=inference_method)
588
+ safe_id=self._get_rand_id(), method=inference_method)
576
589
 
577
590
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
578
591
  # will try to pickle all of self which fails.
@@ -664,7 +677,7 @@ class MiniBatchKMeans(BaseTransformer):
664
677
  return transformed_pandas_df.to_dict("records")
665
678
 
666
679
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
667
- safe_id=self.id
680
+ safe_id=self._get_rand_id()
668
681
  )
669
682
 
670
683
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -831,11 +844,18 @@ class MiniBatchKMeans(BaseTransformer):
831
844
  Transformed dataset.
832
845
  """
833
846
  if isinstance(dataset, DataFrame):
847
+ expected_type_inferred = ""
848
+ # when it is classifier, infer the datatype from label columns
849
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
850
+ expected_type_inferred = convert_sp_to_sf_type(
851
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
852
+ )
853
+
834
854
  output_df = self._batch_inference(
835
855
  dataset=dataset,
836
856
  inference_method="predict",
837
857
  expected_output_cols_list=self.output_cols,
838
- expected_output_cols_type="",
858
+ expected_output_cols_type=expected_type_inferred,
839
859
  )
840
860
  elif isinstance(dataset, pd.DataFrame):
841
861
  output_df = self._sklearn_inference(
@@ -908,10 +928,10 @@ class MiniBatchKMeans(BaseTransformer):
908
928
 
909
929
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
910
930
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
911
- Returns an empty list if current object is not a classifier or not yet fitted.
931
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
912
932
  """
913
933
  if getattr(self._sklearn_object, "classes_", None) is None:
914
- return []
934
+ return [output_cols_prefix]
915
935
 
916
936
  classes = self._sklearn_object.classes_
917
937
  if isinstance(classes, numpy.ndarray):
@@ -1136,7 +1156,7 @@ class MiniBatchKMeans(BaseTransformer):
1136
1156
  cp.dump(self._sklearn_object, local_score_file)
1137
1157
 
1138
1158
  # Create temp stage to run score.
1139
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1159
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1140
1160
  session = dataset._session
1141
1161
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1142
1162
  SqlResultValidator(
@@ -1150,8 +1170,9 @@ class MiniBatchKMeans(BaseTransformer):
1150
1170
  expected_value=f"Stage area {score_stage_name} successfully created."
1151
1171
  ).validate()
1152
1172
 
1153
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1154
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1173
+ # Use posixpath to construct stage paths
1174
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1175
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1155
1176
  statement_params = telemetry.get_function_usage_statement_params(
1156
1177
  project=_PROJECT,
1157
1178
  subproject=_SUBPROJECT,
@@ -1177,6 +1198,7 @@ class MiniBatchKMeans(BaseTransformer):
1177
1198
  replace=True,
1178
1199
  session=session,
1179
1200
  statement_params=statement_params,
1201
+ anonymous=True
1180
1202
  )
1181
1203
  def score_wrapper_sproc(
1182
1204
  session: Session,
@@ -1184,7 +1206,8 @@ class MiniBatchKMeans(BaseTransformer):
1184
1206
  stage_score_file_name: str,
1185
1207
  input_cols: List[str],
1186
1208
  label_cols: List[str],
1187
- sample_weight_col: Optional[str]
1209
+ sample_weight_col: Optional[str],
1210
+ statement_params: Dict[str, str]
1188
1211
  ) -> float:
1189
1212
  import cloudpickle as cp
1190
1213
  import numpy as np
@@ -1234,14 +1257,14 @@ class MiniBatchKMeans(BaseTransformer):
1234
1257
  api_calls=[Session.call],
1235
1258
  custom_tags=dict([("autogen", True)]),
1236
1259
  )
1237
- score = session.call(
1238
- score_sproc_name,
1260
+ score = score_wrapper_sproc(
1261
+ session,
1239
1262
  query,
1240
1263
  stage_score_file_name,
1241
1264
  identifier.get_unescaped_names(self.input_cols),
1242
1265
  identifier.get_unescaped_names(self.label_cols),
1243
1266
  identifier.get_unescaped_names(self.sample_weight_col),
1244
- statement_params=statement_params,
1267
+ statement_params,
1245
1268
  )
1246
1269
 
1247
1270
  cleanup_temp_files([local_score_file_name])
@@ -1259,18 +1282,20 @@ class MiniBatchKMeans(BaseTransformer):
1259
1282
  if self._sklearn_object._estimator_type == 'classifier':
1260
1283
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1261
1284
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1262
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1285
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1286
+ ([] if self._drop_input_cols else inputs) + outputs)
1263
1287
  # For regressor, the type of predict is float64
1264
1288
  elif self._sklearn_object._estimator_type == 'regressor':
1265
1289
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1266
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1267
-
1290
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1291
+ ([] if self._drop_input_cols else inputs) + outputs)
1268
1292
  for prob_func in PROB_FUNCTIONS:
1269
1293
  if hasattr(self, prob_func):
1270
1294
  output_cols_prefix: str = f"{prob_func}_"
1271
1295
  output_column_names = self._get_output_column_names(output_cols_prefix)
1272
1296
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1273
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1297
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1298
+ ([] if self._drop_input_cols else inputs) + outputs)
1274
1299
 
1275
1300
  @property
1276
1301
  def model_signatures(self) -> Dict[str, ModelSignature]: