snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -352,7 +354,6 @@ class MLPClassifier(BaseTransformer):
352
354
  sample_weight_col: Optional[str] = None,
353
355
  ) -> None:
354
356
  super().__init__()
355
- self.id = str(uuid4()).replace("-", "_").upper()
356
357
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
357
358
 
358
359
  self._deps = list(deps)
@@ -394,6 +395,15 @@ class MLPClassifier(BaseTransformer):
394
395
  self.set_drop_input_cols(drop_input_cols)
395
396
  self.set_sample_weight_col(sample_weight_col)
396
397
 
398
+ def _get_rand_id(self) -> str:
399
+ """
400
+ Generate random id to be used in sproc and stage names.
401
+
402
+ Returns:
403
+ Random id string usable in sproc, table, and stage names.
404
+ """
405
+ return str(uuid4()).replace("-", "_").upper()
406
+
397
407
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
398
408
  """
399
409
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -472,7 +482,7 @@ class MLPClassifier(BaseTransformer):
472
482
  cp.dump(self._sklearn_object, local_transform_file)
473
483
 
474
484
  # Create temp stage to run fit.
475
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
485
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
476
486
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
477
487
  SqlResultValidator(
478
488
  session=session,
@@ -485,11 +495,12 @@ class MLPClassifier(BaseTransformer):
485
495
  expected_value=f"Stage area {transform_stage_name} successfully created."
486
496
  ).validate()
487
497
 
488
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
498
+ # Use posixpath to construct stage paths
499
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
500
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
489
501
  local_result_file_name = get_temp_file_path()
490
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
491
502
 
492
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
503
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
493
504
  statement_params = telemetry.get_function_usage_statement_params(
494
505
  project=_PROJECT,
495
506
  subproject=_SUBPROJECT,
@@ -515,6 +526,7 @@ class MLPClassifier(BaseTransformer):
515
526
  replace=True,
516
527
  session=session,
517
528
  statement_params=statement_params,
529
+ anonymous=True
518
530
  )
519
531
  def fit_wrapper_sproc(
520
532
  session: Session,
@@ -523,7 +535,8 @@ class MLPClassifier(BaseTransformer):
523
535
  stage_result_file_name: str,
524
536
  input_cols: List[str],
525
537
  label_cols: List[str],
526
- sample_weight_col: Optional[str]
538
+ sample_weight_col: Optional[str],
539
+ statement_params: Dict[str, str]
527
540
  ) -> str:
528
541
  import cloudpickle as cp
529
542
  import numpy as np
@@ -590,15 +603,15 @@ class MLPClassifier(BaseTransformer):
590
603
  api_calls=[Session.call],
591
604
  custom_tags=dict([("autogen", True)]),
592
605
  )
593
- sproc_export_file_name = session.call(
594
- fit_sproc_name,
606
+ sproc_export_file_name = fit_wrapper_sproc(
607
+ session,
595
608
  query,
596
609
  stage_transform_file_name,
597
610
  stage_result_file_name,
598
611
  identifier.get_unescaped_names(self.input_cols),
599
612
  identifier.get_unescaped_names(self.label_cols),
600
613
  identifier.get_unescaped_names(self.sample_weight_col),
601
- statement_params=statement_params,
614
+ statement_params,
602
615
  )
603
616
 
604
617
  if "|" in sproc_export_file_name:
@@ -608,7 +621,7 @@ class MLPClassifier(BaseTransformer):
608
621
  print("\n".join(fields[1:]))
609
622
 
610
623
  session.file.get(
611
- os.path.join(stage_result_file_name, sproc_export_file_name),
624
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
612
625
  local_result_file_name,
613
626
  statement_params=statement_params
614
627
  )
@@ -654,7 +667,7 @@ class MLPClassifier(BaseTransformer):
654
667
 
655
668
  # Register vectorized UDF for batch inference
656
669
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
657
- safe_id=self.id, method=inference_method)
670
+ safe_id=self._get_rand_id(), method=inference_method)
658
671
 
659
672
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
660
673
  # will try to pickle all of self which fails.
@@ -746,7 +759,7 @@ class MLPClassifier(BaseTransformer):
746
759
  return transformed_pandas_df.to_dict("records")
747
760
 
748
761
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
749
- safe_id=self.id
762
+ safe_id=self._get_rand_id()
750
763
  )
751
764
 
752
765
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -913,11 +926,18 @@ class MLPClassifier(BaseTransformer):
913
926
  Transformed dataset.
914
927
  """
915
928
  if isinstance(dataset, DataFrame):
929
+ expected_type_inferred = ""
930
+ # when it is classifier, infer the datatype from label columns
931
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
932
+ expected_type_inferred = convert_sp_to_sf_type(
933
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
934
+ )
935
+
916
936
  output_df = self._batch_inference(
917
937
  dataset=dataset,
918
938
  inference_method="predict",
919
939
  expected_output_cols_list=self.output_cols,
920
- expected_output_cols_type="",
940
+ expected_output_cols_type=expected_type_inferred,
921
941
  )
922
942
  elif isinstance(dataset, pd.DataFrame):
923
943
  output_df = self._sklearn_inference(
@@ -988,10 +1008,10 @@ class MLPClassifier(BaseTransformer):
988
1008
 
989
1009
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
990
1010
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
991
- Returns an empty list if current object is not a classifier or not yet fitted.
1011
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
992
1012
  """
993
1013
  if getattr(self._sklearn_object, "classes_", None) is None:
994
- return []
1014
+ return [output_cols_prefix]
995
1015
 
996
1016
  classes = self._sklearn_object.classes_
997
1017
  if isinstance(classes, numpy.ndarray):
@@ -1220,7 +1240,7 @@ class MLPClassifier(BaseTransformer):
1220
1240
  cp.dump(self._sklearn_object, local_score_file)
1221
1241
 
1222
1242
  # Create temp stage to run score.
1223
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1243
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1224
1244
  session = dataset._session
1225
1245
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1226
1246
  SqlResultValidator(
@@ -1234,8 +1254,9 @@ class MLPClassifier(BaseTransformer):
1234
1254
  expected_value=f"Stage area {score_stage_name} successfully created."
1235
1255
  ).validate()
1236
1256
 
1237
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1238
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1257
+ # Use posixpath to construct stage paths
1258
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1259
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1239
1260
  statement_params = telemetry.get_function_usage_statement_params(
1240
1261
  project=_PROJECT,
1241
1262
  subproject=_SUBPROJECT,
@@ -1261,6 +1282,7 @@ class MLPClassifier(BaseTransformer):
1261
1282
  replace=True,
1262
1283
  session=session,
1263
1284
  statement_params=statement_params,
1285
+ anonymous=True
1264
1286
  )
1265
1287
  def score_wrapper_sproc(
1266
1288
  session: Session,
@@ -1268,7 +1290,8 @@ class MLPClassifier(BaseTransformer):
1268
1290
  stage_score_file_name: str,
1269
1291
  input_cols: List[str],
1270
1292
  label_cols: List[str],
1271
- sample_weight_col: Optional[str]
1293
+ sample_weight_col: Optional[str],
1294
+ statement_params: Dict[str, str]
1272
1295
  ) -> float:
1273
1296
  import cloudpickle as cp
1274
1297
  import numpy as np
@@ -1318,14 +1341,14 @@ class MLPClassifier(BaseTransformer):
1318
1341
  api_calls=[Session.call],
1319
1342
  custom_tags=dict([("autogen", True)]),
1320
1343
  )
1321
- score = session.call(
1322
- score_sproc_name,
1344
+ score = score_wrapper_sproc(
1345
+ session,
1323
1346
  query,
1324
1347
  stage_score_file_name,
1325
1348
  identifier.get_unescaped_names(self.input_cols),
1326
1349
  identifier.get_unescaped_names(self.label_cols),
1327
1350
  identifier.get_unescaped_names(self.sample_weight_col),
1328
- statement_params=statement_params,
1351
+ statement_params,
1329
1352
  )
1330
1353
 
1331
1354
  cleanup_temp_files([local_score_file_name])
@@ -1343,18 +1366,20 @@ class MLPClassifier(BaseTransformer):
1343
1366
  if self._sklearn_object._estimator_type == 'classifier':
1344
1367
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1345
1368
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1346
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1369
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1370
+ ([] if self._drop_input_cols else inputs) + outputs)
1347
1371
  # For regressor, the type of predict is float64
1348
1372
  elif self._sklearn_object._estimator_type == 'regressor':
1349
1373
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1350
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1351
-
1374
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1375
+ ([] if self._drop_input_cols else inputs) + outputs)
1352
1376
  for prob_func in PROB_FUNCTIONS:
1353
1377
  if hasattr(self, prob_func):
1354
1378
  output_cols_prefix: str = f"{prob_func}_"
1355
1379
  output_column_names = self._get_output_column_names(output_cols_prefix)
1356
1380
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1357
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1381
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1382
+ ([] if self._drop_input_cols else inputs) + outputs)
1358
1383
 
1359
1384
  @property
1360
1385
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -348,7 +350,6 @@ class MLPRegressor(BaseTransformer):
348
350
  sample_weight_col: Optional[str] = None,
349
351
  ) -> None:
350
352
  super().__init__()
351
- self.id = str(uuid4()).replace("-", "_").upper()
352
353
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
353
354
 
354
355
  self._deps = list(deps)
@@ -390,6 +391,15 @@ class MLPRegressor(BaseTransformer):
390
391
  self.set_drop_input_cols(drop_input_cols)
391
392
  self.set_sample_weight_col(sample_weight_col)
392
393
 
394
+ def _get_rand_id(self) -> str:
395
+ """
396
+ Generate random id to be used in sproc and stage names.
397
+
398
+ Returns:
399
+ Random id string usable in sproc, table, and stage names.
400
+ """
401
+ return str(uuid4()).replace("-", "_").upper()
402
+
393
403
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
394
404
  """
395
405
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -468,7 +478,7 @@ class MLPRegressor(BaseTransformer):
468
478
  cp.dump(self._sklearn_object, local_transform_file)
469
479
 
470
480
  # Create temp stage to run fit.
471
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
481
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
472
482
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
473
483
  SqlResultValidator(
474
484
  session=session,
@@ -481,11 +491,12 @@ class MLPRegressor(BaseTransformer):
481
491
  expected_value=f"Stage area {transform_stage_name} successfully created."
482
492
  ).validate()
483
493
 
484
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
494
+ # Use posixpath to construct stage paths
495
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
496
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
485
497
  local_result_file_name = get_temp_file_path()
486
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
487
498
 
488
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
499
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
489
500
  statement_params = telemetry.get_function_usage_statement_params(
490
501
  project=_PROJECT,
491
502
  subproject=_SUBPROJECT,
@@ -511,6 +522,7 @@ class MLPRegressor(BaseTransformer):
511
522
  replace=True,
512
523
  session=session,
513
524
  statement_params=statement_params,
525
+ anonymous=True
514
526
  )
515
527
  def fit_wrapper_sproc(
516
528
  session: Session,
@@ -519,7 +531,8 @@ class MLPRegressor(BaseTransformer):
519
531
  stage_result_file_name: str,
520
532
  input_cols: List[str],
521
533
  label_cols: List[str],
522
- sample_weight_col: Optional[str]
534
+ sample_weight_col: Optional[str],
535
+ statement_params: Dict[str, str]
523
536
  ) -> str:
524
537
  import cloudpickle as cp
525
538
  import numpy as np
@@ -586,15 +599,15 @@ class MLPRegressor(BaseTransformer):
586
599
  api_calls=[Session.call],
587
600
  custom_tags=dict([("autogen", True)]),
588
601
  )
589
- sproc_export_file_name = session.call(
590
- fit_sproc_name,
602
+ sproc_export_file_name = fit_wrapper_sproc(
603
+ session,
591
604
  query,
592
605
  stage_transform_file_name,
593
606
  stage_result_file_name,
594
607
  identifier.get_unescaped_names(self.input_cols),
595
608
  identifier.get_unescaped_names(self.label_cols),
596
609
  identifier.get_unescaped_names(self.sample_weight_col),
597
- statement_params=statement_params,
610
+ statement_params,
598
611
  )
599
612
 
600
613
  if "|" in sproc_export_file_name:
@@ -604,7 +617,7 @@ class MLPRegressor(BaseTransformer):
604
617
  print("\n".join(fields[1:]))
605
618
 
606
619
  session.file.get(
607
- os.path.join(stage_result_file_name, sproc_export_file_name),
620
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
608
621
  local_result_file_name,
609
622
  statement_params=statement_params
610
623
  )
@@ -650,7 +663,7 @@ class MLPRegressor(BaseTransformer):
650
663
 
651
664
  # Register vectorized UDF for batch inference
652
665
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
653
- safe_id=self.id, method=inference_method)
666
+ safe_id=self._get_rand_id(), method=inference_method)
654
667
 
655
668
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
656
669
  # will try to pickle all of self which fails.
@@ -742,7 +755,7 @@ class MLPRegressor(BaseTransformer):
742
755
  return transformed_pandas_df.to_dict("records")
743
756
 
744
757
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
745
- safe_id=self.id
758
+ safe_id=self._get_rand_id()
746
759
  )
747
760
 
748
761
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -909,11 +922,18 @@ class MLPRegressor(BaseTransformer):
909
922
  Transformed dataset.
910
923
  """
911
924
  if isinstance(dataset, DataFrame):
925
+ expected_type_inferred = "float"
926
+ # when it is classifier, infer the datatype from label columns
927
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
928
+ expected_type_inferred = convert_sp_to_sf_type(
929
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
930
+ )
931
+
912
932
  output_df = self._batch_inference(
913
933
  dataset=dataset,
914
934
  inference_method="predict",
915
935
  expected_output_cols_list=self.output_cols,
916
- expected_output_cols_type="float",
936
+ expected_output_cols_type=expected_type_inferred,
917
937
  )
918
938
  elif isinstance(dataset, pd.DataFrame):
919
939
  output_df = self._sklearn_inference(
@@ -984,10 +1004,10 @@ class MLPRegressor(BaseTransformer):
984
1004
 
985
1005
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
986
1006
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
987
- Returns an empty list if current object is not a classifier or not yet fitted.
1007
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
988
1008
  """
989
1009
  if getattr(self._sklearn_object, "classes_", None) is None:
990
- return []
1010
+ return [output_cols_prefix]
991
1011
 
992
1012
  classes = self._sklearn_object.classes_
993
1013
  if isinstance(classes, numpy.ndarray):
@@ -1212,7 +1232,7 @@ class MLPRegressor(BaseTransformer):
1212
1232
  cp.dump(self._sklearn_object, local_score_file)
1213
1233
 
1214
1234
  # Create temp stage to run score.
1215
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1235
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1216
1236
  session = dataset._session
1217
1237
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1218
1238
  SqlResultValidator(
@@ -1226,8 +1246,9 @@ class MLPRegressor(BaseTransformer):
1226
1246
  expected_value=f"Stage area {score_stage_name} successfully created."
1227
1247
  ).validate()
1228
1248
 
1229
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1230
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1249
+ # Use posixpath to construct stage paths
1250
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1251
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1231
1252
  statement_params = telemetry.get_function_usage_statement_params(
1232
1253
  project=_PROJECT,
1233
1254
  subproject=_SUBPROJECT,
@@ -1253,6 +1274,7 @@ class MLPRegressor(BaseTransformer):
1253
1274
  replace=True,
1254
1275
  session=session,
1255
1276
  statement_params=statement_params,
1277
+ anonymous=True
1256
1278
  )
1257
1279
  def score_wrapper_sproc(
1258
1280
  session: Session,
@@ -1260,7 +1282,8 @@ class MLPRegressor(BaseTransformer):
1260
1282
  stage_score_file_name: str,
1261
1283
  input_cols: List[str],
1262
1284
  label_cols: List[str],
1263
- sample_weight_col: Optional[str]
1285
+ sample_weight_col: Optional[str],
1286
+ statement_params: Dict[str, str]
1264
1287
  ) -> float:
1265
1288
  import cloudpickle as cp
1266
1289
  import numpy as np
@@ -1310,14 +1333,14 @@ class MLPRegressor(BaseTransformer):
1310
1333
  api_calls=[Session.call],
1311
1334
  custom_tags=dict([("autogen", True)]),
1312
1335
  )
1313
- score = session.call(
1314
- score_sproc_name,
1336
+ score = score_wrapper_sproc(
1337
+ session,
1315
1338
  query,
1316
1339
  stage_score_file_name,
1317
1340
  identifier.get_unescaped_names(self.input_cols),
1318
1341
  identifier.get_unescaped_names(self.label_cols),
1319
1342
  identifier.get_unescaped_names(self.sample_weight_col),
1320
- statement_params=statement_params,
1343
+ statement_params,
1321
1344
  )
1322
1345
 
1323
1346
  cleanup_temp_files([local_score_file_name])
@@ -1335,18 +1358,20 @@ class MLPRegressor(BaseTransformer):
1335
1358
  if self._sklearn_object._estimator_type == 'classifier':
1336
1359
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1337
1360
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1338
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1361
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1362
+ ([] if self._drop_input_cols else inputs) + outputs)
1339
1363
  # For regressor, the type of predict is float64
1340
1364
  elif self._sklearn_object._estimator_type == 'regressor':
1341
1365
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1342
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1343
-
1366
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1367
+ ([] if self._drop_input_cols else inputs) + outputs)
1344
1368
  for prob_func in PROB_FUNCTIONS:
1345
1369
  if hasattr(self, prob_func):
1346
1370
  output_cols_prefix: str = f"{prob_func}_"
1347
1371
  output_column_names = self._get_output_column_names(output_cols_prefix)
1348
1372
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1349
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1373
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1374
+ ([] if self._drop_input_cols else inputs) + outputs)
1350
1375
 
1351
1376
  @property
1352
1377
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -121,6 +121,7 @@ class OrdinalEncoder(base.BaseTransformer):
121
121
  self.categories_: Dict[str, type_utils.LiteralNDArrayType] = {}
122
122
  self._categories_list: List[type_utils.LiteralNDArrayType] = []
123
123
  self._missing_indices: Dict[int, int] = {}
124
+ self._infrequent_enabled = False
124
125
  self._vocab_table_name = "snowml_preprocessing_ordinal_encoder_temp_table_" + uuid.uuid4().hex
125
126
 
126
127
  self.set_input_cols(input_cols)
@@ -547,6 +548,7 @@ class OrdinalEncoder(base.BaseTransformer):
547
548
  if self._is_fitted:
548
549
  encoder.categories_ = self._categories_list
549
550
  encoder._missing_indices = self._missing_indices
551
+ encoder._infrequent_enabled = self._infrequent_enabled
550
552
  return encoder
551
553
 
552
554
  def _validate_keywords(self) -> None: