snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -244,7 +246,6 @@ class IsolationForest(BaseTransformer):
244
246
  sample_weight_col: Optional[str] = None,
245
247
  ) -> None:
246
248
  super().__init__()
247
- self.id = str(uuid4()).replace("-", "_").upper()
248
249
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
249
250
 
250
251
  self._deps = list(deps)
@@ -272,6 +273,15 @@ class IsolationForest(BaseTransformer):
272
273
  self.set_drop_input_cols(drop_input_cols)
273
274
  self.set_sample_weight_col(sample_weight_col)
274
275
 
276
+ def _get_rand_id(self) -> str:
277
+ """
278
+ Generate random id to be used in sproc and stage names.
279
+
280
+ Returns:
281
+ Random id string usable in sproc, table, and stage names.
282
+ """
283
+ return str(uuid4()).replace("-", "_").upper()
284
+
275
285
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
276
286
  """
277
287
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -350,7 +360,7 @@ class IsolationForest(BaseTransformer):
350
360
  cp.dump(self._sklearn_object, local_transform_file)
351
361
 
352
362
  # Create temp stage to run fit.
353
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
363
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
354
364
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
355
365
  SqlResultValidator(
356
366
  session=session,
@@ -363,11 +373,12 @@ class IsolationForest(BaseTransformer):
363
373
  expected_value=f"Stage area {transform_stage_name} successfully created."
364
374
  ).validate()
365
375
 
366
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
376
+ # Use posixpath to construct stage paths
377
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
378
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
367
379
  local_result_file_name = get_temp_file_path()
368
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
369
380
 
370
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
381
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
371
382
  statement_params = telemetry.get_function_usage_statement_params(
372
383
  project=_PROJECT,
373
384
  subproject=_SUBPROJECT,
@@ -393,6 +404,7 @@ class IsolationForest(BaseTransformer):
393
404
  replace=True,
394
405
  session=session,
395
406
  statement_params=statement_params,
407
+ anonymous=True
396
408
  )
397
409
  def fit_wrapper_sproc(
398
410
  session: Session,
@@ -401,7 +413,8 @@ class IsolationForest(BaseTransformer):
401
413
  stage_result_file_name: str,
402
414
  input_cols: List[str],
403
415
  label_cols: List[str],
404
- sample_weight_col: Optional[str]
416
+ sample_weight_col: Optional[str],
417
+ statement_params: Dict[str, str]
405
418
  ) -> str:
406
419
  import cloudpickle as cp
407
420
  import numpy as np
@@ -468,15 +481,15 @@ class IsolationForest(BaseTransformer):
468
481
  api_calls=[Session.call],
469
482
  custom_tags=dict([("autogen", True)]),
470
483
  )
471
- sproc_export_file_name = session.call(
472
- fit_sproc_name,
484
+ sproc_export_file_name = fit_wrapper_sproc(
485
+ session,
473
486
  query,
474
487
  stage_transform_file_name,
475
488
  stage_result_file_name,
476
489
  identifier.get_unescaped_names(self.input_cols),
477
490
  identifier.get_unescaped_names(self.label_cols),
478
491
  identifier.get_unescaped_names(self.sample_weight_col),
479
- statement_params=statement_params,
492
+ statement_params,
480
493
  )
481
494
 
482
495
  if "|" in sproc_export_file_name:
@@ -486,7 +499,7 @@ class IsolationForest(BaseTransformer):
486
499
  print("\n".join(fields[1:]))
487
500
 
488
501
  session.file.get(
489
- os.path.join(stage_result_file_name, sproc_export_file_name),
502
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
490
503
  local_result_file_name,
491
504
  statement_params=statement_params
492
505
  )
@@ -532,7 +545,7 @@ class IsolationForest(BaseTransformer):
532
545
 
533
546
  # Register vectorized UDF for batch inference
534
547
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
535
- safe_id=self.id, method=inference_method)
548
+ safe_id=self._get_rand_id(), method=inference_method)
536
549
 
537
550
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
538
551
  # will try to pickle all of self which fails.
@@ -624,7 +637,7 @@ class IsolationForest(BaseTransformer):
624
637
  return transformed_pandas_df.to_dict("records")
625
638
 
626
639
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
627
- safe_id=self.id
640
+ safe_id=self._get_rand_id()
628
641
  )
629
642
 
630
643
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -791,11 +804,18 @@ class IsolationForest(BaseTransformer):
791
804
  Transformed dataset.
792
805
  """
793
806
  if isinstance(dataset, DataFrame):
807
+ expected_type_inferred = ""
808
+ # when it is classifier, infer the datatype from label columns
809
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
810
+ expected_type_inferred = convert_sp_to_sf_type(
811
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
812
+ )
813
+
794
814
  output_df = self._batch_inference(
795
815
  dataset=dataset,
796
816
  inference_method="predict",
797
817
  expected_output_cols_list=self.output_cols,
798
- expected_output_cols_type="",
818
+ expected_output_cols_type=expected_type_inferred,
799
819
  )
800
820
  elif isinstance(dataset, pd.DataFrame):
801
821
  output_df = self._sklearn_inference(
@@ -866,10 +886,10 @@ class IsolationForest(BaseTransformer):
866
886
 
867
887
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
868
888
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
869
- Returns an empty list if current object is not a classifier or not yet fitted.
889
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
870
890
  """
871
891
  if getattr(self._sklearn_object, "classes_", None) is None:
872
- return []
892
+ return [output_cols_prefix]
873
893
 
874
894
  classes = self._sklearn_object.classes_
875
895
  if isinstance(classes, numpy.ndarray):
@@ -1096,7 +1116,7 @@ class IsolationForest(BaseTransformer):
1096
1116
  cp.dump(self._sklearn_object, local_score_file)
1097
1117
 
1098
1118
  # Create temp stage to run score.
1099
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1119
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1100
1120
  session = dataset._session
1101
1121
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1102
1122
  SqlResultValidator(
@@ -1110,8 +1130,9 @@ class IsolationForest(BaseTransformer):
1110
1130
  expected_value=f"Stage area {score_stage_name} successfully created."
1111
1131
  ).validate()
1112
1132
 
1113
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1114
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1133
+ # Use posixpath to construct stage paths
1134
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1135
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1115
1136
  statement_params = telemetry.get_function_usage_statement_params(
1116
1137
  project=_PROJECT,
1117
1138
  subproject=_SUBPROJECT,
@@ -1137,6 +1158,7 @@ class IsolationForest(BaseTransformer):
1137
1158
  replace=True,
1138
1159
  session=session,
1139
1160
  statement_params=statement_params,
1161
+ anonymous=True
1140
1162
  )
1141
1163
  def score_wrapper_sproc(
1142
1164
  session: Session,
@@ -1144,7 +1166,8 @@ class IsolationForest(BaseTransformer):
1144
1166
  stage_score_file_name: str,
1145
1167
  input_cols: List[str],
1146
1168
  label_cols: List[str],
1147
- sample_weight_col: Optional[str]
1169
+ sample_weight_col: Optional[str],
1170
+ statement_params: Dict[str, str]
1148
1171
  ) -> float:
1149
1172
  import cloudpickle as cp
1150
1173
  import numpy as np
@@ -1194,14 +1217,14 @@ class IsolationForest(BaseTransformer):
1194
1217
  api_calls=[Session.call],
1195
1218
  custom_tags=dict([("autogen", True)]),
1196
1219
  )
1197
- score = session.call(
1198
- score_sproc_name,
1220
+ score = score_wrapper_sproc(
1221
+ session,
1199
1222
  query,
1200
1223
  stage_score_file_name,
1201
1224
  identifier.get_unescaped_names(self.input_cols),
1202
1225
  identifier.get_unescaped_names(self.label_cols),
1203
1226
  identifier.get_unescaped_names(self.sample_weight_col),
1204
- statement_params=statement_params,
1227
+ statement_params,
1205
1228
  )
1206
1229
 
1207
1230
  cleanup_temp_files([local_score_file_name])
@@ -1219,18 +1242,20 @@ class IsolationForest(BaseTransformer):
1219
1242
  if self._sklearn_object._estimator_type == 'classifier':
1220
1243
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1221
1244
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1222
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1245
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1246
+ ([] if self._drop_input_cols else inputs) + outputs)
1223
1247
  # For regressor, the type of predict is float64
1224
1248
  elif self._sklearn_object._estimator_type == 'regressor':
1225
1249
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1226
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1227
-
1250
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1251
+ ([] if self._drop_input_cols else inputs) + outputs)
1228
1252
  for prob_func in PROB_FUNCTIONS:
1229
1253
  if hasattr(self, prob_func):
1230
1254
  output_cols_prefix: str = f"{prob_func}_"
1231
1255
  output_column_names = self._get_output_column_names(output_cols_prefix)
1232
1256
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1233
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1257
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1258
+ ([] if self._drop_input_cols else inputs) + outputs)
1234
1259
 
1235
1260
  @property
1236
1261
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -344,7 +346,6 @@ class RandomForestClassifier(BaseTransformer):
344
346
  sample_weight_col: Optional[str] = None,
345
347
  ) -> None:
346
348
  super().__init__()
347
- self.id = str(uuid4()).replace("-", "_").upper()
348
349
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
349
350
 
350
351
  self._deps = list(deps)
@@ -381,6 +382,15 @@ class RandomForestClassifier(BaseTransformer):
381
382
  self.set_drop_input_cols(drop_input_cols)
382
383
  self.set_sample_weight_col(sample_weight_col)
383
384
 
385
+ def _get_rand_id(self) -> str:
386
+ """
387
+ Generate random id to be used in sproc and stage names.
388
+
389
+ Returns:
390
+ Random id string usable in sproc, table, and stage names.
391
+ """
392
+ return str(uuid4()).replace("-", "_").upper()
393
+
384
394
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
385
395
  """
386
396
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -459,7 +469,7 @@ class RandomForestClassifier(BaseTransformer):
459
469
  cp.dump(self._sklearn_object, local_transform_file)
460
470
 
461
471
  # Create temp stage to run fit.
462
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
472
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
463
473
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
464
474
  SqlResultValidator(
465
475
  session=session,
@@ -472,11 +482,12 @@ class RandomForestClassifier(BaseTransformer):
472
482
  expected_value=f"Stage area {transform_stage_name} successfully created."
473
483
  ).validate()
474
484
 
475
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
485
+ # Use posixpath to construct stage paths
486
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
487
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
476
488
  local_result_file_name = get_temp_file_path()
477
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
478
489
 
479
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
490
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
480
491
  statement_params = telemetry.get_function_usage_statement_params(
481
492
  project=_PROJECT,
482
493
  subproject=_SUBPROJECT,
@@ -502,6 +513,7 @@ class RandomForestClassifier(BaseTransformer):
502
513
  replace=True,
503
514
  session=session,
504
515
  statement_params=statement_params,
516
+ anonymous=True
505
517
  )
506
518
  def fit_wrapper_sproc(
507
519
  session: Session,
@@ -510,7 +522,8 @@ class RandomForestClassifier(BaseTransformer):
510
522
  stage_result_file_name: str,
511
523
  input_cols: List[str],
512
524
  label_cols: List[str],
513
- sample_weight_col: Optional[str]
525
+ sample_weight_col: Optional[str],
526
+ statement_params: Dict[str, str]
514
527
  ) -> str:
515
528
  import cloudpickle as cp
516
529
  import numpy as np
@@ -577,15 +590,15 @@ class RandomForestClassifier(BaseTransformer):
577
590
  api_calls=[Session.call],
578
591
  custom_tags=dict([("autogen", True)]),
579
592
  )
580
- sproc_export_file_name = session.call(
581
- fit_sproc_name,
593
+ sproc_export_file_name = fit_wrapper_sproc(
594
+ session,
582
595
  query,
583
596
  stage_transform_file_name,
584
597
  stage_result_file_name,
585
598
  identifier.get_unescaped_names(self.input_cols),
586
599
  identifier.get_unescaped_names(self.label_cols),
587
600
  identifier.get_unescaped_names(self.sample_weight_col),
588
- statement_params=statement_params,
601
+ statement_params,
589
602
  )
590
603
 
591
604
  if "|" in sproc_export_file_name:
@@ -595,7 +608,7 @@ class RandomForestClassifier(BaseTransformer):
595
608
  print("\n".join(fields[1:]))
596
609
 
597
610
  session.file.get(
598
- os.path.join(stage_result_file_name, sproc_export_file_name),
611
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
599
612
  local_result_file_name,
600
613
  statement_params=statement_params
601
614
  )
@@ -641,7 +654,7 @@ class RandomForestClassifier(BaseTransformer):
641
654
 
642
655
  # Register vectorized UDF for batch inference
643
656
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
644
- safe_id=self.id, method=inference_method)
657
+ safe_id=self._get_rand_id(), method=inference_method)
645
658
 
646
659
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
647
660
  # will try to pickle all of self which fails.
@@ -733,7 +746,7 @@ class RandomForestClassifier(BaseTransformer):
733
746
  return transformed_pandas_df.to_dict("records")
734
747
 
735
748
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
736
- safe_id=self.id
749
+ safe_id=self._get_rand_id()
737
750
  )
738
751
 
739
752
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -900,11 +913,18 @@ class RandomForestClassifier(BaseTransformer):
900
913
  Transformed dataset.
901
914
  """
902
915
  if isinstance(dataset, DataFrame):
916
+ expected_type_inferred = ""
917
+ # when it is classifier, infer the datatype from label columns
918
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
919
+ expected_type_inferred = convert_sp_to_sf_type(
920
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
921
+ )
922
+
903
923
  output_df = self._batch_inference(
904
924
  dataset=dataset,
905
925
  inference_method="predict",
906
926
  expected_output_cols_list=self.output_cols,
907
- expected_output_cols_type="",
927
+ expected_output_cols_type=expected_type_inferred,
908
928
  )
909
929
  elif isinstance(dataset, pd.DataFrame):
910
930
  output_df = self._sklearn_inference(
@@ -975,10 +995,10 @@ class RandomForestClassifier(BaseTransformer):
975
995
 
976
996
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
977
997
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
978
- Returns an empty list if current object is not a classifier or not yet fitted.
998
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
979
999
  """
980
1000
  if getattr(self._sklearn_object, "classes_", None) is None:
981
- return []
1001
+ return [output_cols_prefix]
982
1002
 
983
1003
  classes = self._sklearn_object.classes_
984
1004
  if isinstance(classes, numpy.ndarray):
@@ -1207,7 +1227,7 @@ class RandomForestClassifier(BaseTransformer):
1207
1227
  cp.dump(self._sklearn_object, local_score_file)
1208
1228
 
1209
1229
  # Create temp stage to run score.
1210
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1230
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1211
1231
  session = dataset._session
1212
1232
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1213
1233
  SqlResultValidator(
@@ -1221,8 +1241,9 @@ class RandomForestClassifier(BaseTransformer):
1221
1241
  expected_value=f"Stage area {score_stage_name} successfully created."
1222
1242
  ).validate()
1223
1243
 
1224
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1225
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1244
+ # Use posixpath to construct stage paths
1245
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1246
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1226
1247
  statement_params = telemetry.get_function_usage_statement_params(
1227
1248
  project=_PROJECT,
1228
1249
  subproject=_SUBPROJECT,
@@ -1248,6 +1269,7 @@ class RandomForestClassifier(BaseTransformer):
1248
1269
  replace=True,
1249
1270
  session=session,
1250
1271
  statement_params=statement_params,
1272
+ anonymous=True
1251
1273
  )
1252
1274
  def score_wrapper_sproc(
1253
1275
  session: Session,
@@ -1255,7 +1277,8 @@ class RandomForestClassifier(BaseTransformer):
1255
1277
  stage_score_file_name: str,
1256
1278
  input_cols: List[str],
1257
1279
  label_cols: List[str],
1258
- sample_weight_col: Optional[str]
1280
+ sample_weight_col: Optional[str],
1281
+ statement_params: Dict[str, str]
1259
1282
  ) -> float:
1260
1283
  import cloudpickle as cp
1261
1284
  import numpy as np
@@ -1305,14 +1328,14 @@ class RandomForestClassifier(BaseTransformer):
1305
1328
  api_calls=[Session.call],
1306
1329
  custom_tags=dict([("autogen", True)]),
1307
1330
  )
1308
- score = session.call(
1309
- score_sproc_name,
1331
+ score = score_wrapper_sproc(
1332
+ session,
1310
1333
  query,
1311
1334
  stage_score_file_name,
1312
1335
  identifier.get_unescaped_names(self.input_cols),
1313
1336
  identifier.get_unescaped_names(self.label_cols),
1314
1337
  identifier.get_unescaped_names(self.sample_weight_col),
1315
- statement_params=statement_params,
1338
+ statement_params,
1316
1339
  )
1317
1340
 
1318
1341
  cleanup_temp_files([local_score_file_name])
@@ -1330,18 +1353,20 @@ class RandomForestClassifier(BaseTransformer):
1330
1353
  if self._sklearn_object._estimator_type == 'classifier':
1331
1354
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1332
1355
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1333
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1356
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1357
+ ([] if self._drop_input_cols else inputs) + outputs)
1334
1358
  # For regressor, the type of predict is float64
1335
1359
  elif self._sklearn_object._estimator_type == 'regressor':
1336
1360
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1337
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1338
-
1361
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1362
+ ([] if self._drop_input_cols else inputs) + outputs)
1339
1363
  for prob_func in PROB_FUNCTIONS:
1340
1364
  if hasattr(self, prob_func):
1341
1365
  output_cols_prefix: str = f"{prob_func}_"
1342
1366
  output_column_names = self._get_output_column_names(output_cols_prefix)
1343
1367
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1344
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1368
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1369
+ ([] if self._drop_input_cols else inputs) + outputs)
1345
1370
 
1346
1371
  @property
1347
1372
  def model_signatures(self) -> Dict[str, ModelSignature]: