snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -291,7 +293,6 @@ class KernelPCA(BaseTransformer):
291
293
  sample_weight_col: Optional[str] = None,
292
294
  ) -> None:
293
295
  super().__init__()
294
- self.id = str(uuid4()).replace("-", "_").upper()
295
296
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
296
297
 
297
298
  self._deps = list(deps)
@@ -326,6 +327,15 @@ class KernelPCA(BaseTransformer):
326
327
  self.set_drop_input_cols(drop_input_cols)
327
328
  self.set_sample_weight_col(sample_weight_col)
328
329
 
330
+ def _get_rand_id(self) -> str:
331
+ """
332
+ Generate random id to be used in sproc and stage names.
333
+
334
+ Returns:
335
+ Random id string usable in sproc, table, and stage names.
336
+ """
337
+ return str(uuid4()).replace("-", "_").upper()
338
+
329
339
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
330
340
  """
331
341
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -404,7 +414,7 @@ class KernelPCA(BaseTransformer):
404
414
  cp.dump(self._sklearn_object, local_transform_file)
405
415
 
406
416
  # Create temp stage to run fit.
407
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
417
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
408
418
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
409
419
  SqlResultValidator(
410
420
  session=session,
@@ -417,11 +427,12 @@ class KernelPCA(BaseTransformer):
417
427
  expected_value=f"Stage area {transform_stage_name} successfully created."
418
428
  ).validate()
419
429
 
420
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
430
+ # Use posixpath to construct stage paths
431
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
432
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
421
433
  local_result_file_name = get_temp_file_path()
422
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
423
434
 
424
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
435
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
425
436
  statement_params = telemetry.get_function_usage_statement_params(
426
437
  project=_PROJECT,
427
438
  subproject=_SUBPROJECT,
@@ -447,6 +458,7 @@ class KernelPCA(BaseTransformer):
447
458
  replace=True,
448
459
  session=session,
449
460
  statement_params=statement_params,
461
+ anonymous=True
450
462
  )
451
463
  def fit_wrapper_sproc(
452
464
  session: Session,
@@ -455,7 +467,8 @@ class KernelPCA(BaseTransformer):
455
467
  stage_result_file_name: str,
456
468
  input_cols: List[str],
457
469
  label_cols: List[str],
458
- sample_weight_col: Optional[str]
470
+ sample_weight_col: Optional[str],
471
+ statement_params: Dict[str, str]
459
472
  ) -> str:
460
473
  import cloudpickle as cp
461
474
  import numpy as np
@@ -522,15 +535,15 @@ class KernelPCA(BaseTransformer):
522
535
  api_calls=[Session.call],
523
536
  custom_tags=dict([("autogen", True)]),
524
537
  )
525
- sproc_export_file_name = session.call(
526
- fit_sproc_name,
538
+ sproc_export_file_name = fit_wrapper_sproc(
539
+ session,
527
540
  query,
528
541
  stage_transform_file_name,
529
542
  stage_result_file_name,
530
543
  identifier.get_unescaped_names(self.input_cols),
531
544
  identifier.get_unescaped_names(self.label_cols),
532
545
  identifier.get_unescaped_names(self.sample_weight_col),
533
- statement_params=statement_params,
546
+ statement_params,
534
547
  )
535
548
 
536
549
  if "|" in sproc_export_file_name:
@@ -540,7 +553,7 @@ class KernelPCA(BaseTransformer):
540
553
  print("\n".join(fields[1:]))
541
554
 
542
555
  session.file.get(
543
- os.path.join(stage_result_file_name, sproc_export_file_name),
556
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
544
557
  local_result_file_name,
545
558
  statement_params=statement_params
546
559
  )
@@ -586,7 +599,7 @@ class KernelPCA(BaseTransformer):
586
599
 
587
600
  # Register vectorized UDF for batch inference
588
601
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
589
- safe_id=self.id, method=inference_method)
602
+ safe_id=self._get_rand_id(), method=inference_method)
590
603
 
591
604
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
592
605
  # will try to pickle all of self which fails.
@@ -678,7 +691,7 @@ class KernelPCA(BaseTransformer):
678
691
  return transformed_pandas_df.to_dict("records")
679
692
 
680
693
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
681
- safe_id=self.id
694
+ safe_id=self._get_rand_id()
682
695
  )
683
696
 
684
697
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -843,11 +856,18 @@ class KernelPCA(BaseTransformer):
843
856
  Transformed dataset.
844
857
  """
845
858
  if isinstance(dataset, DataFrame):
859
+ expected_type_inferred = ""
860
+ # when it is classifier, infer the datatype from label columns
861
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
862
+ expected_type_inferred = convert_sp_to_sf_type(
863
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
864
+ )
865
+
846
866
  output_df = self._batch_inference(
847
867
  dataset=dataset,
848
868
  inference_method="predict",
849
869
  expected_output_cols_list=self.output_cols,
850
- expected_output_cols_type="",
870
+ expected_output_cols_type=expected_type_inferred,
851
871
  )
852
872
  elif isinstance(dataset, pd.DataFrame):
853
873
  output_df = self._sklearn_inference(
@@ -920,10 +940,10 @@ class KernelPCA(BaseTransformer):
920
940
 
921
941
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
922
942
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
923
- Returns an empty list if current object is not a classifier or not yet fitted.
943
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
924
944
  """
925
945
  if getattr(self._sklearn_object, "classes_", None) is None:
926
- return []
946
+ return [output_cols_prefix]
927
947
 
928
948
  classes = self._sklearn_object.classes_
929
949
  if isinstance(classes, numpy.ndarray):
@@ -1148,7 +1168,7 @@ class KernelPCA(BaseTransformer):
1148
1168
  cp.dump(self._sklearn_object, local_score_file)
1149
1169
 
1150
1170
  # Create temp stage to run score.
1151
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1171
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1152
1172
  session = dataset._session
1153
1173
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1154
1174
  SqlResultValidator(
@@ -1162,8 +1182,9 @@ class KernelPCA(BaseTransformer):
1162
1182
  expected_value=f"Stage area {score_stage_name} successfully created."
1163
1183
  ).validate()
1164
1184
 
1165
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1166
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1185
+ # Use posixpath to construct stage paths
1186
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1187
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1167
1188
  statement_params = telemetry.get_function_usage_statement_params(
1168
1189
  project=_PROJECT,
1169
1190
  subproject=_SUBPROJECT,
@@ -1189,6 +1210,7 @@ class KernelPCA(BaseTransformer):
1189
1210
  replace=True,
1190
1211
  session=session,
1191
1212
  statement_params=statement_params,
1213
+ anonymous=True
1192
1214
  )
1193
1215
  def score_wrapper_sproc(
1194
1216
  session: Session,
@@ -1196,7 +1218,8 @@ class KernelPCA(BaseTransformer):
1196
1218
  stage_score_file_name: str,
1197
1219
  input_cols: List[str],
1198
1220
  label_cols: List[str],
1199
- sample_weight_col: Optional[str]
1221
+ sample_weight_col: Optional[str],
1222
+ statement_params: Dict[str, str]
1200
1223
  ) -> float:
1201
1224
  import cloudpickle as cp
1202
1225
  import numpy as np
@@ -1246,14 +1269,14 @@ class KernelPCA(BaseTransformer):
1246
1269
  api_calls=[Session.call],
1247
1270
  custom_tags=dict([("autogen", True)]),
1248
1271
  )
1249
- score = session.call(
1250
- score_sproc_name,
1272
+ score = score_wrapper_sproc(
1273
+ session,
1251
1274
  query,
1252
1275
  stage_score_file_name,
1253
1276
  identifier.get_unescaped_names(self.input_cols),
1254
1277
  identifier.get_unescaped_names(self.label_cols),
1255
1278
  identifier.get_unescaped_names(self.sample_weight_col),
1256
- statement_params=statement_params,
1279
+ statement_params,
1257
1280
  )
1258
1281
 
1259
1282
  cleanup_temp_files([local_score_file_name])
@@ -1271,18 +1294,20 @@ class KernelPCA(BaseTransformer):
1271
1294
  if self._sklearn_object._estimator_type == 'classifier':
1272
1295
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1273
1296
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1274
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1297
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1298
+ ([] if self._drop_input_cols else inputs) + outputs)
1275
1299
  # For regressor, the type of predict is float64
1276
1300
  elif self._sklearn_object._estimator_type == 'regressor':
1277
1301
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1278
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1279
-
1302
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1303
+ ([] if self._drop_input_cols else inputs) + outputs)
1280
1304
  for prob_func in PROB_FUNCTIONS:
1281
1305
  if hasattr(self, prob_func):
1282
1306
  output_cols_prefix: str = f"{prob_func}_"
1283
1307
  output_column_names = self._get_output_column_names(output_cols_prefix)
1284
1308
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1285
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1309
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1310
+ ([] if self._drop_input_cols else inputs) + outputs)
1286
1311
 
1287
1312
  @property
1288
1313
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -308,7 +310,6 @@ class MiniBatchDictionaryLearning(BaseTransformer):
308
310
  sample_weight_col: Optional[str] = None,
309
311
  ) -> None:
310
312
  super().__init__()
311
- self.id = str(uuid4()).replace("-", "_").upper()
312
313
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
313
314
 
314
315
  self._deps = list(deps)
@@ -348,6 +349,15 @@ class MiniBatchDictionaryLearning(BaseTransformer):
348
349
  self.set_drop_input_cols(drop_input_cols)
349
350
  self.set_sample_weight_col(sample_weight_col)
350
351
 
352
+ def _get_rand_id(self) -> str:
353
+ """
354
+ Generate random id to be used in sproc and stage names.
355
+
356
+ Returns:
357
+ Random id string usable in sproc, table, and stage names.
358
+ """
359
+ return str(uuid4()).replace("-", "_").upper()
360
+
351
361
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
352
362
  """
353
363
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -426,7 +436,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
426
436
  cp.dump(self._sklearn_object, local_transform_file)
427
437
 
428
438
  # Create temp stage to run fit.
429
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
439
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
430
440
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
431
441
  SqlResultValidator(
432
442
  session=session,
@@ -439,11 +449,12 @@ class MiniBatchDictionaryLearning(BaseTransformer):
439
449
  expected_value=f"Stage area {transform_stage_name} successfully created."
440
450
  ).validate()
441
451
 
442
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
452
+ # Use posixpath to construct stage paths
453
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
454
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
443
455
  local_result_file_name = get_temp_file_path()
444
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
445
456
 
446
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
457
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
447
458
  statement_params = telemetry.get_function_usage_statement_params(
448
459
  project=_PROJECT,
449
460
  subproject=_SUBPROJECT,
@@ -469,6 +480,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
469
480
  replace=True,
470
481
  session=session,
471
482
  statement_params=statement_params,
483
+ anonymous=True
472
484
  )
473
485
  def fit_wrapper_sproc(
474
486
  session: Session,
@@ -477,7 +489,8 @@ class MiniBatchDictionaryLearning(BaseTransformer):
477
489
  stage_result_file_name: str,
478
490
  input_cols: List[str],
479
491
  label_cols: List[str],
480
- sample_weight_col: Optional[str]
492
+ sample_weight_col: Optional[str],
493
+ statement_params: Dict[str, str]
481
494
  ) -> str:
482
495
  import cloudpickle as cp
483
496
  import numpy as np
@@ -544,15 +557,15 @@ class MiniBatchDictionaryLearning(BaseTransformer):
544
557
  api_calls=[Session.call],
545
558
  custom_tags=dict([("autogen", True)]),
546
559
  )
547
- sproc_export_file_name = session.call(
548
- fit_sproc_name,
560
+ sproc_export_file_name = fit_wrapper_sproc(
561
+ session,
549
562
  query,
550
563
  stage_transform_file_name,
551
564
  stage_result_file_name,
552
565
  identifier.get_unescaped_names(self.input_cols),
553
566
  identifier.get_unescaped_names(self.label_cols),
554
567
  identifier.get_unescaped_names(self.sample_weight_col),
555
- statement_params=statement_params,
568
+ statement_params,
556
569
  )
557
570
 
558
571
  if "|" in sproc_export_file_name:
@@ -562,7 +575,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
562
575
  print("\n".join(fields[1:]))
563
576
 
564
577
  session.file.get(
565
- os.path.join(stage_result_file_name, sproc_export_file_name),
578
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
566
579
  local_result_file_name,
567
580
  statement_params=statement_params
568
581
  )
@@ -608,7 +621,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
608
621
 
609
622
  # Register vectorized UDF for batch inference
610
623
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
611
- safe_id=self.id, method=inference_method)
624
+ safe_id=self._get_rand_id(), method=inference_method)
612
625
 
613
626
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
614
627
  # will try to pickle all of self which fails.
@@ -700,7 +713,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
700
713
  return transformed_pandas_df.to_dict("records")
701
714
 
702
715
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
703
- safe_id=self.id
716
+ safe_id=self._get_rand_id()
704
717
  )
705
718
 
706
719
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -865,11 +878,18 @@ class MiniBatchDictionaryLearning(BaseTransformer):
865
878
  Transformed dataset.
866
879
  """
867
880
  if isinstance(dataset, DataFrame):
881
+ expected_type_inferred = ""
882
+ # when it is classifier, infer the datatype from label columns
883
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
884
+ expected_type_inferred = convert_sp_to_sf_type(
885
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
886
+ )
887
+
868
888
  output_df = self._batch_inference(
869
889
  dataset=dataset,
870
890
  inference_method="predict",
871
891
  expected_output_cols_list=self.output_cols,
872
- expected_output_cols_type="",
892
+ expected_output_cols_type=expected_type_inferred,
873
893
  )
874
894
  elif isinstance(dataset, pd.DataFrame):
875
895
  output_df = self._sklearn_inference(
@@ -942,10 +962,10 @@ class MiniBatchDictionaryLearning(BaseTransformer):
942
962
 
943
963
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
944
964
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
945
- Returns an empty list if current object is not a classifier or not yet fitted.
965
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
946
966
  """
947
967
  if getattr(self._sklearn_object, "classes_", None) is None:
948
- return []
968
+ return [output_cols_prefix]
949
969
 
950
970
  classes = self._sklearn_object.classes_
951
971
  if isinstance(classes, numpy.ndarray):
@@ -1170,7 +1190,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
1170
1190
  cp.dump(self._sklearn_object, local_score_file)
1171
1191
 
1172
1192
  # Create temp stage to run score.
1173
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1193
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1174
1194
  session = dataset._session
1175
1195
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1176
1196
  SqlResultValidator(
@@ -1184,8 +1204,9 @@ class MiniBatchDictionaryLearning(BaseTransformer):
1184
1204
  expected_value=f"Stage area {score_stage_name} successfully created."
1185
1205
  ).validate()
1186
1206
 
1187
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1188
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1207
+ # Use posixpath to construct stage paths
1208
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1209
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1189
1210
  statement_params = telemetry.get_function_usage_statement_params(
1190
1211
  project=_PROJECT,
1191
1212
  subproject=_SUBPROJECT,
@@ -1211,6 +1232,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
1211
1232
  replace=True,
1212
1233
  session=session,
1213
1234
  statement_params=statement_params,
1235
+ anonymous=True
1214
1236
  )
1215
1237
  def score_wrapper_sproc(
1216
1238
  session: Session,
@@ -1218,7 +1240,8 @@ class MiniBatchDictionaryLearning(BaseTransformer):
1218
1240
  stage_score_file_name: str,
1219
1241
  input_cols: List[str],
1220
1242
  label_cols: List[str],
1221
- sample_weight_col: Optional[str]
1243
+ sample_weight_col: Optional[str],
1244
+ statement_params: Dict[str, str]
1222
1245
  ) -> float:
1223
1246
  import cloudpickle as cp
1224
1247
  import numpy as np
@@ -1268,14 +1291,14 @@ class MiniBatchDictionaryLearning(BaseTransformer):
1268
1291
  api_calls=[Session.call],
1269
1292
  custom_tags=dict([("autogen", True)]),
1270
1293
  )
1271
- score = session.call(
1272
- score_sproc_name,
1294
+ score = score_wrapper_sproc(
1295
+ session,
1273
1296
  query,
1274
1297
  stage_score_file_name,
1275
1298
  identifier.get_unescaped_names(self.input_cols),
1276
1299
  identifier.get_unescaped_names(self.label_cols),
1277
1300
  identifier.get_unescaped_names(self.sample_weight_col),
1278
- statement_params=statement_params,
1301
+ statement_params,
1279
1302
  )
1280
1303
 
1281
1304
  cleanup_temp_files([local_score_file_name])
@@ -1293,18 +1316,20 @@ class MiniBatchDictionaryLearning(BaseTransformer):
1293
1316
  if self._sklearn_object._estimator_type == 'classifier':
1294
1317
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1295
1318
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1296
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1319
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1320
+ ([] if self._drop_input_cols else inputs) + outputs)
1297
1321
  # For regressor, the type of predict is float64
1298
1322
  elif self._sklearn_object._estimator_type == 'regressor':
1299
1323
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1300
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1301
-
1324
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1325
+ ([] if self._drop_input_cols else inputs) + outputs)
1302
1326
  for prob_func in PROB_FUNCTIONS:
1303
1327
  if hasattr(self, prob_func):
1304
1328
  output_cols_prefix: str = f"{prob_func}_"
1305
1329
  output_column_names = self._get_output_column_names(output_cols_prefix)
1306
1330
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1307
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1331
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1332
+ ([] if self._drop_input_cols else inputs) + outputs)
1308
1333
 
1309
1334
  @property
1310
1335
  def model_signatures(self) -> Dict[str, ModelSignature]: