snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -255,7 +257,6 @@ class RadiusNeighborsRegressor(BaseTransformer):
255
257
  sample_weight_col: Optional[str] = None,
256
258
  ) -> None:
257
259
  super().__init__()
258
- self.id = str(uuid4()).replace("-", "_").upper()
259
260
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
260
261
 
261
262
  self._deps = list(deps)
@@ -282,6 +283,15 @@ class RadiusNeighborsRegressor(BaseTransformer):
282
283
  self.set_drop_input_cols(drop_input_cols)
283
284
  self.set_sample_weight_col(sample_weight_col)
284
285
 
286
+ def _get_rand_id(self) -> str:
287
+ """
288
+ Generate random id to be used in sproc and stage names.
289
+
290
+ Returns:
291
+ Random id string usable in sproc, table, and stage names.
292
+ """
293
+ return str(uuid4()).replace("-", "_").upper()
294
+
285
295
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
286
296
  """
287
297
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -360,7 +370,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
360
370
  cp.dump(self._sklearn_object, local_transform_file)
361
371
 
362
372
  # Create temp stage to run fit.
363
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
373
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
364
374
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
365
375
  SqlResultValidator(
366
376
  session=session,
@@ -373,11 +383,12 @@ class RadiusNeighborsRegressor(BaseTransformer):
373
383
  expected_value=f"Stage area {transform_stage_name} successfully created."
374
384
  ).validate()
375
385
 
376
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
386
+ # Use posixpath to construct stage paths
387
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
388
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
377
389
  local_result_file_name = get_temp_file_path()
378
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
379
390
 
380
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
391
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
381
392
  statement_params = telemetry.get_function_usage_statement_params(
382
393
  project=_PROJECT,
383
394
  subproject=_SUBPROJECT,
@@ -403,6 +414,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
403
414
  replace=True,
404
415
  session=session,
405
416
  statement_params=statement_params,
417
+ anonymous=True
406
418
  )
407
419
  def fit_wrapper_sproc(
408
420
  session: Session,
@@ -411,7 +423,8 @@ class RadiusNeighborsRegressor(BaseTransformer):
411
423
  stage_result_file_name: str,
412
424
  input_cols: List[str],
413
425
  label_cols: List[str],
414
- sample_weight_col: Optional[str]
426
+ sample_weight_col: Optional[str],
427
+ statement_params: Dict[str, str]
415
428
  ) -> str:
416
429
  import cloudpickle as cp
417
430
  import numpy as np
@@ -478,15 +491,15 @@ class RadiusNeighborsRegressor(BaseTransformer):
478
491
  api_calls=[Session.call],
479
492
  custom_tags=dict([("autogen", True)]),
480
493
  )
481
- sproc_export_file_name = session.call(
482
- fit_sproc_name,
494
+ sproc_export_file_name = fit_wrapper_sproc(
495
+ session,
483
496
  query,
484
497
  stage_transform_file_name,
485
498
  stage_result_file_name,
486
499
  identifier.get_unescaped_names(self.input_cols),
487
500
  identifier.get_unescaped_names(self.label_cols),
488
501
  identifier.get_unescaped_names(self.sample_weight_col),
489
- statement_params=statement_params,
502
+ statement_params,
490
503
  )
491
504
 
492
505
  if "|" in sproc_export_file_name:
@@ -496,7 +509,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
496
509
  print("\n".join(fields[1:]))
497
510
 
498
511
  session.file.get(
499
- os.path.join(stage_result_file_name, sproc_export_file_name),
512
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
500
513
  local_result_file_name,
501
514
  statement_params=statement_params
502
515
  )
@@ -542,7 +555,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
542
555
 
543
556
  # Register vectorized UDF for batch inference
544
557
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
545
- safe_id=self.id, method=inference_method)
558
+ safe_id=self._get_rand_id(), method=inference_method)
546
559
 
547
560
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
548
561
  # will try to pickle all of self which fails.
@@ -634,7 +647,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
634
647
  return transformed_pandas_df.to_dict("records")
635
648
 
636
649
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
637
- safe_id=self.id
650
+ safe_id=self._get_rand_id()
638
651
  )
639
652
 
640
653
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -801,11 +814,18 @@ class RadiusNeighborsRegressor(BaseTransformer):
801
814
  Transformed dataset.
802
815
  """
803
816
  if isinstance(dataset, DataFrame):
817
+ expected_type_inferred = "float"
818
+ # when it is classifier, infer the datatype from label columns
819
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
820
+ expected_type_inferred = convert_sp_to_sf_type(
821
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
822
+ )
823
+
804
824
  output_df = self._batch_inference(
805
825
  dataset=dataset,
806
826
  inference_method="predict",
807
827
  expected_output_cols_list=self.output_cols,
808
- expected_output_cols_type="float",
828
+ expected_output_cols_type=expected_type_inferred,
809
829
  )
810
830
  elif isinstance(dataset, pd.DataFrame):
811
831
  output_df = self._sklearn_inference(
@@ -876,10 +896,10 @@ class RadiusNeighborsRegressor(BaseTransformer):
876
896
 
877
897
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
878
898
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
879
- Returns an empty list if current object is not a classifier or not yet fitted.
899
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
880
900
  """
881
901
  if getattr(self._sklearn_object, "classes_", None) is None:
882
- return []
902
+ return [output_cols_prefix]
883
903
 
884
904
  classes = self._sklearn_object.classes_
885
905
  if isinstance(classes, numpy.ndarray):
@@ -1104,7 +1124,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
1104
1124
  cp.dump(self._sklearn_object, local_score_file)
1105
1125
 
1106
1126
  # Create temp stage to run score.
1107
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1127
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1108
1128
  session = dataset._session
1109
1129
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1110
1130
  SqlResultValidator(
@@ -1118,8 +1138,9 @@ class RadiusNeighborsRegressor(BaseTransformer):
1118
1138
  expected_value=f"Stage area {score_stage_name} successfully created."
1119
1139
  ).validate()
1120
1140
 
1121
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1122
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1141
+ # Use posixpath to construct stage paths
1142
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1143
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1123
1144
  statement_params = telemetry.get_function_usage_statement_params(
1124
1145
  project=_PROJECT,
1125
1146
  subproject=_SUBPROJECT,
@@ -1145,6 +1166,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
1145
1166
  replace=True,
1146
1167
  session=session,
1147
1168
  statement_params=statement_params,
1169
+ anonymous=True
1148
1170
  )
1149
1171
  def score_wrapper_sproc(
1150
1172
  session: Session,
@@ -1152,7 +1174,8 @@ class RadiusNeighborsRegressor(BaseTransformer):
1152
1174
  stage_score_file_name: str,
1153
1175
  input_cols: List[str],
1154
1176
  label_cols: List[str],
1155
- sample_weight_col: Optional[str]
1177
+ sample_weight_col: Optional[str],
1178
+ statement_params: Dict[str, str]
1156
1179
  ) -> float:
1157
1180
  import cloudpickle as cp
1158
1181
  import numpy as np
@@ -1202,14 +1225,14 @@ class RadiusNeighborsRegressor(BaseTransformer):
1202
1225
  api_calls=[Session.call],
1203
1226
  custom_tags=dict([("autogen", True)]),
1204
1227
  )
1205
- score = session.call(
1206
- score_sproc_name,
1228
+ score = score_wrapper_sproc(
1229
+ session,
1207
1230
  query,
1208
1231
  stage_score_file_name,
1209
1232
  identifier.get_unescaped_names(self.input_cols),
1210
1233
  identifier.get_unescaped_names(self.label_cols),
1211
1234
  identifier.get_unescaped_names(self.sample_weight_col),
1212
- statement_params=statement_params,
1235
+ statement_params,
1213
1236
  )
1214
1237
 
1215
1238
  cleanup_temp_files([local_score_file_name])
@@ -1227,18 +1250,20 @@ class RadiusNeighborsRegressor(BaseTransformer):
1227
1250
  if self._sklearn_object._estimator_type == 'classifier':
1228
1251
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1229
1252
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1230
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1253
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1254
+ ([] if self._drop_input_cols else inputs) + outputs)
1231
1255
  # For regressor, the type of predict is float64
1232
1256
  elif self._sklearn_object._estimator_type == 'regressor':
1233
1257
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1234
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1235
-
1258
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1259
+ ([] if self._drop_input_cols else inputs) + outputs)
1236
1260
  for prob_func in PROB_FUNCTIONS:
1237
1261
  if hasattr(self, prob_func):
1238
1262
  output_cols_prefix: str = f"{prob_func}_"
1239
1263
  output_column_names = self._get_output_column_names(output_cols_prefix)
1240
1264
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1241
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1265
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1266
+ ([] if self._drop_input_cols else inputs) + outputs)
1242
1267
 
1243
1268
  @property
1244
1269
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -216,7 +218,6 @@ class BernoulliRBM(BaseTransformer):
216
218
  sample_weight_col: Optional[str] = None,
217
219
  ) -> None:
218
220
  super().__init__()
219
- self.id = str(uuid4()).replace("-", "_").upper()
220
221
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
221
222
 
222
223
  self._deps = list(deps)
@@ -241,6 +242,15 @@ class BernoulliRBM(BaseTransformer):
241
242
  self.set_drop_input_cols(drop_input_cols)
242
243
  self.set_sample_weight_col(sample_weight_col)
243
244
 
245
+ def _get_rand_id(self) -> str:
246
+ """
247
+ Generate random id to be used in sproc and stage names.
248
+
249
+ Returns:
250
+ Random id string usable in sproc, table, and stage names.
251
+ """
252
+ return str(uuid4()).replace("-", "_").upper()
253
+
244
254
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
245
255
  """
246
256
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -319,7 +329,7 @@ class BernoulliRBM(BaseTransformer):
319
329
  cp.dump(self._sklearn_object, local_transform_file)
320
330
 
321
331
  # Create temp stage to run fit.
322
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
332
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
323
333
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
324
334
  SqlResultValidator(
325
335
  session=session,
@@ -332,11 +342,12 @@ class BernoulliRBM(BaseTransformer):
332
342
  expected_value=f"Stage area {transform_stage_name} successfully created."
333
343
  ).validate()
334
344
 
335
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
345
+ # Use posixpath to construct stage paths
346
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
347
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
336
348
  local_result_file_name = get_temp_file_path()
337
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
338
349
 
339
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
350
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
340
351
  statement_params = telemetry.get_function_usage_statement_params(
341
352
  project=_PROJECT,
342
353
  subproject=_SUBPROJECT,
@@ -362,6 +373,7 @@ class BernoulliRBM(BaseTransformer):
362
373
  replace=True,
363
374
  session=session,
364
375
  statement_params=statement_params,
376
+ anonymous=True
365
377
  )
366
378
  def fit_wrapper_sproc(
367
379
  session: Session,
@@ -370,7 +382,8 @@ class BernoulliRBM(BaseTransformer):
370
382
  stage_result_file_name: str,
371
383
  input_cols: List[str],
372
384
  label_cols: List[str],
373
- sample_weight_col: Optional[str]
385
+ sample_weight_col: Optional[str],
386
+ statement_params: Dict[str, str]
374
387
  ) -> str:
375
388
  import cloudpickle as cp
376
389
  import numpy as np
@@ -437,15 +450,15 @@ class BernoulliRBM(BaseTransformer):
437
450
  api_calls=[Session.call],
438
451
  custom_tags=dict([("autogen", True)]),
439
452
  )
440
- sproc_export_file_name = session.call(
441
- fit_sproc_name,
453
+ sproc_export_file_name = fit_wrapper_sproc(
454
+ session,
442
455
  query,
443
456
  stage_transform_file_name,
444
457
  stage_result_file_name,
445
458
  identifier.get_unescaped_names(self.input_cols),
446
459
  identifier.get_unescaped_names(self.label_cols),
447
460
  identifier.get_unescaped_names(self.sample_weight_col),
448
- statement_params=statement_params,
461
+ statement_params,
449
462
  )
450
463
 
451
464
  if "|" in sproc_export_file_name:
@@ -455,7 +468,7 @@ class BernoulliRBM(BaseTransformer):
455
468
  print("\n".join(fields[1:]))
456
469
 
457
470
  session.file.get(
458
- os.path.join(stage_result_file_name, sproc_export_file_name),
471
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
459
472
  local_result_file_name,
460
473
  statement_params=statement_params
461
474
  )
@@ -501,7 +514,7 @@ class BernoulliRBM(BaseTransformer):
501
514
 
502
515
  # Register vectorized UDF for batch inference
503
516
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
504
- safe_id=self.id, method=inference_method)
517
+ safe_id=self._get_rand_id(), method=inference_method)
505
518
 
506
519
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
507
520
  # will try to pickle all of self which fails.
@@ -593,7 +606,7 @@ class BernoulliRBM(BaseTransformer):
593
606
  return transformed_pandas_df.to_dict("records")
594
607
 
595
608
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
596
- safe_id=self.id
609
+ safe_id=self._get_rand_id()
597
610
  )
598
611
 
599
612
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -758,11 +771,18 @@ class BernoulliRBM(BaseTransformer):
758
771
  Transformed dataset.
759
772
  """
760
773
  if isinstance(dataset, DataFrame):
774
+ expected_type_inferred = ""
775
+ # when it is classifier, infer the datatype from label columns
776
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
777
+ expected_type_inferred = convert_sp_to_sf_type(
778
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
779
+ )
780
+
761
781
  output_df = self._batch_inference(
762
782
  dataset=dataset,
763
783
  inference_method="predict",
764
784
  expected_output_cols_list=self.output_cols,
765
- expected_output_cols_type="",
785
+ expected_output_cols_type=expected_type_inferred,
766
786
  )
767
787
  elif isinstance(dataset, pd.DataFrame):
768
788
  output_df = self._sklearn_inference(
@@ -835,10 +855,10 @@ class BernoulliRBM(BaseTransformer):
835
855
 
836
856
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
837
857
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
838
- Returns an empty list if current object is not a classifier or not yet fitted.
858
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
839
859
  """
840
860
  if getattr(self._sklearn_object, "classes_", None) is None:
841
- return []
861
+ return [output_cols_prefix]
842
862
 
843
863
  classes = self._sklearn_object.classes_
844
864
  if isinstance(classes, numpy.ndarray):
@@ -1063,7 +1083,7 @@ class BernoulliRBM(BaseTransformer):
1063
1083
  cp.dump(self._sklearn_object, local_score_file)
1064
1084
 
1065
1085
  # Create temp stage to run score.
1066
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1086
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1067
1087
  session = dataset._session
1068
1088
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1069
1089
  SqlResultValidator(
@@ -1077,8 +1097,9 @@ class BernoulliRBM(BaseTransformer):
1077
1097
  expected_value=f"Stage area {score_stage_name} successfully created."
1078
1098
  ).validate()
1079
1099
 
1080
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1081
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1100
+ # Use posixpath to construct stage paths
1101
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1102
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1082
1103
  statement_params = telemetry.get_function_usage_statement_params(
1083
1104
  project=_PROJECT,
1084
1105
  subproject=_SUBPROJECT,
@@ -1104,6 +1125,7 @@ class BernoulliRBM(BaseTransformer):
1104
1125
  replace=True,
1105
1126
  session=session,
1106
1127
  statement_params=statement_params,
1128
+ anonymous=True
1107
1129
  )
1108
1130
  def score_wrapper_sproc(
1109
1131
  session: Session,
@@ -1111,7 +1133,8 @@ class BernoulliRBM(BaseTransformer):
1111
1133
  stage_score_file_name: str,
1112
1134
  input_cols: List[str],
1113
1135
  label_cols: List[str],
1114
- sample_weight_col: Optional[str]
1136
+ sample_weight_col: Optional[str],
1137
+ statement_params: Dict[str, str]
1115
1138
  ) -> float:
1116
1139
  import cloudpickle as cp
1117
1140
  import numpy as np
@@ -1161,14 +1184,14 @@ class BernoulliRBM(BaseTransformer):
1161
1184
  api_calls=[Session.call],
1162
1185
  custom_tags=dict([("autogen", True)]),
1163
1186
  )
1164
- score = session.call(
1165
- score_sproc_name,
1187
+ score = score_wrapper_sproc(
1188
+ session,
1166
1189
  query,
1167
1190
  stage_score_file_name,
1168
1191
  identifier.get_unescaped_names(self.input_cols),
1169
1192
  identifier.get_unescaped_names(self.label_cols),
1170
1193
  identifier.get_unescaped_names(self.sample_weight_col),
1171
- statement_params=statement_params,
1194
+ statement_params,
1172
1195
  )
1173
1196
 
1174
1197
  cleanup_temp_files([local_score_file_name])
@@ -1186,18 +1209,20 @@ class BernoulliRBM(BaseTransformer):
1186
1209
  if self._sklearn_object._estimator_type == 'classifier':
1187
1210
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1188
1211
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1189
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1212
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1213
+ ([] if self._drop_input_cols else inputs) + outputs)
1190
1214
  # For regressor, the type of predict is float64
1191
1215
  elif self._sklearn_object._estimator_type == 'regressor':
1192
1216
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1193
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1194
-
1217
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1218
+ ([] if self._drop_input_cols else inputs) + outputs)
1195
1219
  for prob_func in PROB_FUNCTIONS:
1196
1220
  if hasattr(self, prob_func):
1197
1221
  output_cols_prefix: str = f"{prob_func}_"
1198
1222
  output_column_names = self._get_output_column_names(output_cols_prefix)
1199
1223
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1200
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1224
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1225
+ ([] if self._drop_input_cols else inputs) + outputs)
1201
1226
 
1202
1227
  @property
1203
1228
  def model_signatures(self) -> Dict[str, ModelSignature]: