snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -285,7 +287,6 @@ class RANSACRegressor(BaseTransformer):
285
287
  sample_weight_col: Optional[str] = None,
286
288
  ) -> None:
287
289
  super().__init__()
288
- self.id = str(uuid4()).replace("-", "_").upper()
289
290
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
290
291
  deps = deps | _gather_dependencies(estimator)
291
292
  deps = deps | _gather_dependencies(base_estimator)
@@ -319,6 +320,15 @@ class RANSACRegressor(BaseTransformer):
319
320
  self.set_drop_input_cols(drop_input_cols)
320
321
  self.set_sample_weight_col(sample_weight_col)
321
322
 
323
+ def _get_rand_id(self) -> str:
324
+ """
325
+ Generate random id to be used in sproc and stage names.
326
+
327
+ Returns:
328
+ Random id string usable in sproc, table, and stage names.
329
+ """
330
+ return str(uuid4()).replace("-", "_").upper()
331
+
322
332
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
323
333
  """
324
334
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -397,7 +407,7 @@ class RANSACRegressor(BaseTransformer):
397
407
  cp.dump(self._sklearn_object, local_transform_file)
398
408
 
399
409
  # Create temp stage to run fit.
400
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
410
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
401
411
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
402
412
  SqlResultValidator(
403
413
  session=session,
@@ -410,11 +420,12 @@ class RANSACRegressor(BaseTransformer):
410
420
  expected_value=f"Stage area {transform_stage_name} successfully created."
411
421
  ).validate()
412
422
 
413
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
423
+ # Use posixpath to construct stage paths
424
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
425
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
414
426
  local_result_file_name = get_temp_file_path()
415
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
416
427
 
417
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
428
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
418
429
  statement_params = telemetry.get_function_usage_statement_params(
419
430
  project=_PROJECT,
420
431
  subproject=_SUBPROJECT,
@@ -440,6 +451,7 @@ class RANSACRegressor(BaseTransformer):
440
451
  replace=True,
441
452
  session=session,
442
453
  statement_params=statement_params,
454
+ anonymous=True
443
455
  )
444
456
  def fit_wrapper_sproc(
445
457
  session: Session,
@@ -448,7 +460,8 @@ class RANSACRegressor(BaseTransformer):
448
460
  stage_result_file_name: str,
449
461
  input_cols: List[str],
450
462
  label_cols: List[str],
451
- sample_weight_col: Optional[str]
463
+ sample_weight_col: Optional[str],
464
+ statement_params: Dict[str, str]
452
465
  ) -> str:
453
466
  import cloudpickle as cp
454
467
  import numpy as np
@@ -515,15 +528,15 @@ class RANSACRegressor(BaseTransformer):
515
528
  api_calls=[Session.call],
516
529
  custom_tags=dict([("autogen", True)]),
517
530
  )
518
- sproc_export_file_name = session.call(
519
- fit_sproc_name,
531
+ sproc_export_file_name = fit_wrapper_sproc(
532
+ session,
520
533
  query,
521
534
  stage_transform_file_name,
522
535
  stage_result_file_name,
523
536
  identifier.get_unescaped_names(self.input_cols),
524
537
  identifier.get_unescaped_names(self.label_cols),
525
538
  identifier.get_unescaped_names(self.sample_weight_col),
526
- statement_params=statement_params,
539
+ statement_params,
527
540
  )
528
541
 
529
542
  if "|" in sproc_export_file_name:
@@ -533,7 +546,7 @@ class RANSACRegressor(BaseTransformer):
533
546
  print("\n".join(fields[1:]))
534
547
 
535
548
  session.file.get(
536
- os.path.join(stage_result_file_name, sproc_export_file_name),
549
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
537
550
  local_result_file_name,
538
551
  statement_params=statement_params
539
552
  )
@@ -579,7 +592,7 @@ class RANSACRegressor(BaseTransformer):
579
592
 
580
593
  # Register vectorized UDF for batch inference
581
594
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
582
- safe_id=self.id, method=inference_method)
595
+ safe_id=self._get_rand_id(), method=inference_method)
583
596
 
584
597
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
585
598
  # will try to pickle all of self which fails.
@@ -671,7 +684,7 @@ class RANSACRegressor(BaseTransformer):
671
684
  return transformed_pandas_df.to_dict("records")
672
685
 
673
686
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
674
- safe_id=self.id
687
+ safe_id=self._get_rand_id()
675
688
  )
676
689
 
677
690
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -838,11 +851,18 @@ class RANSACRegressor(BaseTransformer):
838
851
  Transformed dataset.
839
852
  """
840
853
  if isinstance(dataset, DataFrame):
854
+ expected_type_inferred = "float"
855
+ # when it is classifier, infer the datatype from label columns
856
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
857
+ expected_type_inferred = convert_sp_to_sf_type(
858
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
859
+ )
860
+
841
861
  output_df = self._batch_inference(
842
862
  dataset=dataset,
843
863
  inference_method="predict",
844
864
  expected_output_cols_list=self.output_cols,
845
- expected_output_cols_type="float",
865
+ expected_output_cols_type=expected_type_inferred,
846
866
  )
847
867
  elif isinstance(dataset, pd.DataFrame):
848
868
  output_df = self._sklearn_inference(
@@ -913,10 +933,10 @@ class RANSACRegressor(BaseTransformer):
913
933
 
914
934
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
915
935
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
916
- Returns an empty list if current object is not a classifier or not yet fitted.
936
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
917
937
  """
918
938
  if getattr(self._sklearn_object, "classes_", None) is None:
919
- return []
939
+ return [output_cols_prefix]
920
940
 
921
941
  classes = self._sklearn_object.classes_
922
942
  if isinstance(classes, numpy.ndarray):
@@ -1141,7 +1161,7 @@ class RANSACRegressor(BaseTransformer):
1141
1161
  cp.dump(self._sklearn_object, local_score_file)
1142
1162
 
1143
1163
  # Create temp stage to run score.
1144
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1164
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1145
1165
  session = dataset._session
1146
1166
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1147
1167
  SqlResultValidator(
@@ -1155,8 +1175,9 @@ class RANSACRegressor(BaseTransformer):
1155
1175
  expected_value=f"Stage area {score_stage_name} successfully created."
1156
1176
  ).validate()
1157
1177
 
1158
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1159
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1178
+ # Use posixpath to construct stage paths
1179
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1180
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1160
1181
  statement_params = telemetry.get_function_usage_statement_params(
1161
1182
  project=_PROJECT,
1162
1183
  subproject=_SUBPROJECT,
@@ -1182,6 +1203,7 @@ class RANSACRegressor(BaseTransformer):
1182
1203
  replace=True,
1183
1204
  session=session,
1184
1205
  statement_params=statement_params,
1206
+ anonymous=True
1185
1207
  )
1186
1208
  def score_wrapper_sproc(
1187
1209
  session: Session,
@@ -1189,7 +1211,8 @@ class RANSACRegressor(BaseTransformer):
1189
1211
  stage_score_file_name: str,
1190
1212
  input_cols: List[str],
1191
1213
  label_cols: List[str],
1192
- sample_weight_col: Optional[str]
1214
+ sample_weight_col: Optional[str],
1215
+ statement_params: Dict[str, str]
1193
1216
  ) -> float:
1194
1217
  import cloudpickle as cp
1195
1218
  import numpy as np
@@ -1239,14 +1262,14 @@ class RANSACRegressor(BaseTransformer):
1239
1262
  api_calls=[Session.call],
1240
1263
  custom_tags=dict([("autogen", True)]),
1241
1264
  )
1242
- score = session.call(
1243
- score_sproc_name,
1265
+ score = score_wrapper_sproc(
1266
+ session,
1244
1267
  query,
1245
1268
  stage_score_file_name,
1246
1269
  identifier.get_unescaped_names(self.input_cols),
1247
1270
  identifier.get_unescaped_names(self.label_cols),
1248
1271
  identifier.get_unescaped_names(self.sample_weight_col),
1249
- statement_params=statement_params,
1272
+ statement_params,
1250
1273
  )
1251
1274
 
1252
1275
  cleanup_temp_files([local_score_file_name])
@@ -1264,18 +1287,20 @@ class RANSACRegressor(BaseTransformer):
1264
1287
  if self._sklearn_object._estimator_type == 'classifier':
1265
1288
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1266
1289
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1267
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1290
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1291
+ ([] if self._drop_input_cols else inputs) + outputs)
1268
1292
  # For regressor, the type of predict is float64
1269
1293
  elif self._sklearn_object._estimator_type == 'regressor':
1270
1294
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1271
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1272
-
1295
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1296
+ ([] if self._drop_input_cols else inputs) + outputs)
1273
1297
  for prob_func in PROB_FUNCTIONS:
1274
1298
  if hasattr(self, prob_func):
1275
1299
  output_cols_prefix: str = f"{prob_func}_"
1276
1300
  output_column_names = self._get_output_column_names(output_cols_prefix)
1277
1301
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1278
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1302
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1303
+ ([] if self._drop_input_cols else inputs) + outputs)
1279
1304
 
1280
1305
  @property
1281
1306
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -262,7 +264,6 @@ class Ridge(BaseTransformer):
262
264
  sample_weight_col: Optional[str] = None,
263
265
  ) -> None:
264
266
  super().__init__()
265
- self.id = str(uuid4()).replace("-", "_").upper()
266
267
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
267
268
 
268
269
  self._deps = list(deps)
@@ -289,6 +290,15 @@ class Ridge(BaseTransformer):
289
290
  self.set_drop_input_cols(drop_input_cols)
290
291
  self.set_sample_weight_col(sample_weight_col)
291
292
 
293
+ def _get_rand_id(self) -> str:
294
+ """
295
+ Generate random id to be used in sproc and stage names.
296
+
297
+ Returns:
298
+ Random id string usable in sproc, table, and stage names.
299
+ """
300
+ return str(uuid4()).replace("-", "_").upper()
301
+
292
302
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
293
303
  """
294
304
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -367,7 +377,7 @@ class Ridge(BaseTransformer):
367
377
  cp.dump(self._sklearn_object, local_transform_file)
368
378
 
369
379
  # Create temp stage to run fit.
370
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
380
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
371
381
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
372
382
  SqlResultValidator(
373
383
  session=session,
@@ -380,11 +390,12 @@ class Ridge(BaseTransformer):
380
390
  expected_value=f"Stage area {transform_stage_name} successfully created."
381
391
  ).validate()
382
392
 
383
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
393
+ # Use posixpath to construct stage paths
394
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
395
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
384
396
  local_result_file_name = get_temp_file_path()
385
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
386
397
 
387
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
398
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
388
399
  statement_params = telemetry.get_function_usage_statement_params(
389
400
  project=_PROJECT,
390
401
  subproject=_SUBPROJECT,
@@ -410,6 +421,7 @@ class Ridge(BaseTransformer):
410
421
  replace=True,
411
422
  session=session,
412
423
  statement_params=statement_params,
424
+ anonymous=True
413
425
  )
414
426
  def fit_wrapper_sproc(
415
427
  session: Session,
@@ -418,7 +430,8 @@ class Ridge(BaseTransformer):
418
430
  stage_result_file_name: str,
419
431
  input_cols: List[str],
420
432
  label_cols: List[str],
421
- sample_weight_col: Optional[str]
433
+ sample_weight_col: Optional[str],
434
+ statement_params: Dict[str, str]
422
435
  ) -> str:
423
436
  import cloudpickle as cp
424
437
  import numpy as np
@@ -485,15 +498,15 @@ class Ridge(BaseTransformer):
485
498
  api_calls=[Session.call],
486
499
  custom_tags=dict([("autogen", True)]),
487
500
  )
488
- sproc_export_file_name = session.call(
489
- fit_sproc_name,
501
+ sproc_export_file_name = fit_wrapper_sproc(
502
+ session,
490
503
  query,
491
504
  stage_transform_file_name,
492
505
  stage_result_file_name,
493
506
  identifier.get_unescaped_names(self.input_cols),
494
507
  identifier.get_unescaped_names(self.label_cols),
495
508
  identifier.get_unescaped_names(self.sample_weight_col),
496
- statement_params=statement_params,
509
+ statement_params,
497
510
  )
498
511
 
499
512
  if "|" in sproc_export_file_name:
@@ -503,7 +516,7 @@ class Ridge(BaseTransformer):
503
516
  print("\n".join(fields[1:]))
504
517
 
505
518
  session.file.get(
506
- os.path.join(stage_result_file_name, sproc_export_file_name),
519
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
507
520
  local_result_file_name,
508
521
  statement_params=statement_params
509
522
  )
@@ -549,7 +562,7 @@ class Ridge(BaseTransformer):
549
562
 
550
563
  # Register vectorized UDF for batch inference
551
564
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
552
- safe_id=self.id, method=inference_method)
565
+ safe_id=self._get_rand_id(), method=inference_method)
553
566
 
554
567
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
555
568
  # will try to pickle all of self which fails.
@@ -641,7 +654,7 @@ class Ridge(BaseTransformer):
641
654
  return transformed_pandas_df.to_dict("records")
642
655
 
643
656
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
644
- safe_id=self.id
657
+ safe_id=self._get_rand_id()
645
658
  )
646
659
 
647
660
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -808,11 +821,18 @@ class Ridge(BaseTransformer):
808
821
  Transformed dataset.
809
822
  """
810
823
  if isinstance(dataset, DataFrame):
824
+ expected_type_inferred = "float"
825
+ # when it is classifier, infer the datatype from label columns
826
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
827
+ expected_type_inferred = convert_sp_to_sf_type(
828
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
829
+ )
830
+
811
831
  output_df = self._batch_inference(
812
832
  dataset=dataset,
813
833
  inference_method="predict",
814
834
  expected_output_cols_list=self.output_cols,
815
- expected_output_cols_type="float",
835
+ expected_output_cols_type=expected_type_inferred,
816
836
  )
817
837
  elif isinstance(dataset, pd.DataFrame):
818
838
  output_df = self._sklearn_inference(
@@ -883,10 +903,10 @@ class Ridge(BaseTransformer):
883
903
 
884
904
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
885
905
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
886
- Returns an empty list if current object is not a classifier or not yet fitted.
906
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
887
907
  """
888
908
  if getattr(self._sklearn_object, "classes_", None) is None:
889
- return []
909
+ return [output_cols_prefix]
890
910
 
891
911
  classes = self._sklearn_object.classes_
892
912
  if isinstance(classes, numpy.ndarray):
@@ -1111,7 +1131,7 @@ class Ridge(BaseTransformer):
1111
1131
  cp.dump(self._sklearn_object, local_score_file)
1112
1132
 
1113
1133
  # Create temp stage to run score.
1114
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1134
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1115
1135
  session = dataset._session
1116
1136
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1117
1137
  SqlResultValidator(
@@ -1125,8 +1145,9 @@ class Ridge(BaseTransformer):
1125
1145
  expected_value=f"Stage area {score_stage_name} successfully created."
1126
1146
  ).validate()
1127
1147
 
1128
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1129
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1148
+ # Use posixpath to construct stage paths
1149
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1150
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1130
1151
  statement_params = telemetry.get_function_usage_statement_params(
1131
1152
  project=_PROJECT,
1132
1153
  subproject=_SUBPROJECT,
@@ -1152,6 +1173,7 @@ class Ridge(BaseTransformer):
1152
1173
  replace=True,
1153
1174
  session=session,
1154
1175
  statement_params=statement_params,
1176
+ anonymous=True
1155
1177
  )
1156
1178
  def score_wrapper_sproc(
1157
1179
  session: Session,
@@ -1159,7 +1181,8 @@ class Ridge(BaseTransformer):
1159
1181
  stage_score_file_name: str,
1160
1182
  input_cols: List[str],
1161
1183
  label_cols: List[str],
1162
- sample_weight_col: Optional[str]
1184
+ sample_weight_col: Optional[str],
1185
+ statement_params: Dict[str, str]
1163
1186
  ) -> float:
1164
1187
  import cloudpickle as cp
1165
1188
  import numpy as np
@@ -1209,14 +1232,14 @@ class Ridge(BaseTransformer):
1209
1232
  api_calls=[Session.call],
1210
1233
  custom_tags=dict([("autogen", True)]),
1211
1234
  )
1212
- score = session.call(
1213
- score_sproc_name,
1235
+ score = score_wrapper_sproc(
1236
+ session,
1214
1237
  query,
1215
1238
  stage_score_file_name,
1216
1239
  identifier.get_unescaped_names(self.input_cols),
1217
1240
  identifier.get_unescaped_names(self.label_cols),
1218
1241
  identifier.get_unescaped_names(self.sample_weight_col),
1219
- statement_params=statement_params,
1242
+ statement_params,
1220
1243
  )
1221
1244
 
1222
1245
  cleanup_temp_files([local_score_file_name])
@@ -1234,18 +1257,20 @@ class Ridge(BaseTransformer):
1234
1257
  if self._sklearn_object._estimator_type == 'classifier':
1235
1258
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1236
1259
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1237
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1260
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1261
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1262
  # For regressor, the type of predict is float64
1239
1263
  elif self._sklearn_object._estimator_type == 'regressor':
1240
1264
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1241
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1242
-
1265
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1266
+ ([] if self._drop_input_cols else inputs) + outputs)
1243
1267
  for prob_func in PROB_FUNCTIONS:
1244
1268
  if hasattr(self, prob_func):
1245
1269
  output_cols_prefix: str = f"{prob_func}_"
1246
1270
  output_column_names = self._get_output_column_names(output_cols_prefix)
1247
1271
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1248
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1272
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1273
+ ([] if self._drop_input_cols else inputs) + outputs)
1249
1274
 
1250
1275
  @property
1251
1276
  def model_signatures(self) -> Dict[str, ModelSignature]: