snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -210,7 +212,6 @@ class MissingIndicator(BaseTransformer):
210
212
  sample_weight_col: Optional[str] = None,
211
213
  ) -> None:
212
214
  super().__init__()
213
- self.id = str(uuid4()).replace("-", "_").upper()
214
215
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
215
216
 
216
217
  self._deps = list(deps)
@@ -233,6 +234,15 @@ class MissingIndicator(BaseTransformer):
233
234
  self.set_drop_input_cols(drop_input_cols)
234
235
  self.set_sample_weight_col(sample_weight_col)
235
236
 
237
+ def _get_rand_id(self) -> str:
238
+ """
239
+ Generate random id to be used in sproc and stage names.
240
+
241
+ Returns:
242
+ Random id string usable in sproc, table, and stage names.
243
+ """
244
+ return str(uuid4()).replace("-", "_").upper()
245
+
236
246
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
237
247
  """
238
248
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -311,7 +321,7 @@ class MissingIndicator(BaseTransformer):
311
321
  cp.dump(self._sklearn_object, local_transform_file)
312
322
 
313
323
  # Create temp stage to run fit.
314
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
324
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
315
325
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
316
326
  SqlResultValidator(
317
327
  session=session,
@@ -324,11 +334,12 @@ class MissingIndicator(BaseTransformer):
324
334
  expected_value=f"Stage area {transform_stage_name} successfully created."
325
335
  ).validate()
326
336
 
327
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
337
+ # Use posixpath to construct stage paths
338
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
339
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
328
340
  local_result_file_name = get_temp_file_path()
329
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
330
341
 
331
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
342
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
332
343
  statement_params = telemetry.get_function_usage_statement_params(
333
344
  project=_PROJECT,
334
345
  subproject=_SUBPROJECT,
@@ -354,6 +365,7 @@ class MissingIndicator(BaseTransformer):
354
365
  replace=True,
355
366
  session=session,
356
367
  statement_params=statement_params,
368
+ anonymous=True
357
369
  )
358
370
  def fit_wrapper_sproc(
359
371
  session: Session,
@@ -362,7 +374,8 @@ class MissingIndicator(BaseTransformer):
362
374
  stage_result_file_name: str,
363
375
  input_cols: List[str],
364
376
  label_cols: List[str],
365
- sample_weight_col: Optional[str]
377
+ sample_weight_col: Optional[str],
378
+ statement_params: Dict[str, str]
366
379
  ) -> str:
367
380
  import cloudpickle as cp
368
381
  import numpy as np
@@ -429,15 +442,15 @@ class MissingIndicator(BaseTransformer):
429
442
  api_calls=[Session.call],
430
443
  custom_tags=dict([("autogen", True)]),
431
444
  )
432
- sproc_export_file_name = session.call(
433
- fit_sproc_name,
445
+ sproc_export_file_name = fit_wrapper_sproc(
446
+ session,
434
447
  query,
435
448
  stage_transform_file_name,
436
449
  stage_result_file_name,
437
450
  identifier.get_unescaped_names(self.input_cols),
438
451
  identifier.get_unescaped_names(self.label_cols),
439
452
  identifier.get_unescaped_names(self.sample_weight_col),
440
- statement_params=statement_params,
453
+ statement_params,
441
454
  )
442
455
 
443
456
  if "|" in sproc_export_file_name:
@@ -447,7 +460,7 @@ class MissingIndicator(BaseTransformer):
447
460
  print("\n".join(fields[1:]))
448
461
 
449
462
  session.file.get(
450
- os.path.join(stage_result_file_name, sproc_export_file_name),
463
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
451
464
  local_result_file_name,
452
465
  statement_params=statement_params
453
466
  )
@@ -493,7 +506,7 @@ class MissingIndicator(BaseTransformer):
493
506
 
494
507
  # Register vectorized UDF for batch inference
495
508
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
496
- safe_id=self.id, method=inference_method)
509
+ safe_id=self._get_rand_id(), method=inference_method)
497
510
 
498
511
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
499
512
  # will try to pickle all of self which fails.
@@ -585,7 +598,7 @@ class MissingIndicator(BaseTransformer):
585
598
  return transformed_pandas_df.to_dict("records")
586
599
 
587
600
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
588
- safe_id=self.id
601
+ safe_id=self._get_rand_id()
589
602
  )
590
603
 
591
604
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -750,11 +763,18 @@ class MissingIndicator(BaseTransformer):
750
763
  Transformed dataset.
751
764
  """
752
765
  if isinstance(dataset, DataFrame):
766
+ expected_type_inferred = ""
767
+ # when it is classifier, infer the datatype from label columns
768
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
769
+ expected_type_inferred = convert_sp_to_sf_type(
770
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
771
+ )
772
+
753
773
  output_df = self._batch_inference(
754
774
  dataset=dataset,
755
775
  inference_method="predict",
756
776
  expected_output_cols_list=self.output_cols,
757
- expected_output_cols_type="",
777
+ expected_output_cols_type=expected_type_inferred,
758
778
  )
759
779
  elif isinstance(dataset, pd.DataFrame):
760
780
  output_df = self._sklearn_inference(
@@ -827,10 +847,10 @@ class MissingIndicator(BaseTransformer):
827
847
 
828
848
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
829
849
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
830
- Returns an empty list if current object is not a classifier or not yet fitted.
850
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
831
851
  """
832
852
  if getattr(self._sklearn_object, "classes_", None) is None:
833
- return []
853
+ return [output_cols_prefix]
834
854
 
835
855
  classes = self._sklearn_object.classes_
836
856
  if isinstance(classes, numpy.ndarray):
@@ -1055,7 +1075,7 @@ class MissingIndicator(BaseTransformer):
1055
1075
  cp.dump(self._sklearn_object, local_score_file)
1056
1076
 
1057
1077
  # Create temp stage to run score.
1058
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1078
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1059
1079
  session = dataset._session
1060
1080
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1061
1081
  SqlResultValidator(
@@ -1069,8 +1089,9 @@ class MissingIndicator(BaseTransformer):
1069
1089
  expected_value=f"Stage area {score_stage_name} successfully created."
1070
1090
  ).validate()
1071
1091
 
1072
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1073
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1092
+ # Use posixpath to construct stage paths
1093
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1094
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1074
1095
  statement_params = telemetry.get_function_usage_statement_params(
1075
1096
  project=_PROJECT,
1076
1097
  subproject=_SUBPROJECT,
@@ -1096,6 +1117,7 @@ class MissingIndicator(BaseTransformer):
1096
1117
  replace=True,
1097
1118
  session=session,
1098
1119
  statement_params=statement_params,
1120
+ anonymous=True
1099
1121
  )
1100
1122
  def score_wrapper_sproc(
1101
1123
  session: Session,
@@ -1103,7 +1125,8 @@ class MissingIndicator(BaseTransformer):
1103
1125
  stage_score_file_name: str,
1104
1126
  input_cols: List[str],
1105
1127
  label_cols: List[str],
1106
- sample_weight_col: Optional[str]
1128
+ sample_weight_col: Optional[str],
1129
+ statement_params: Dict[str, str]
1107
1130
  ) -> float:
1108
1131
  import cloudpickle as cp
1109
1132
  import numpy as np
@@ -1153,14 +1176,14 @@ class MissingIndicator(BaseTransformer):
1153
1176
  api_calls=[Session.call],
1154
1177
  custom_tags=dict([("autogen", True)]),
1155
1178
  )
1156
- score = session.call(
1157
- score_sproc_name,
1179
+ score = score_wrapper_sproc(
1180
+ session,
1158
1181
  query,
1159
1182
  stage_score_file_name,
1160
1183
  identifier.get_unescaped_names(self.input_cols),
1161
1184
  identifier.get_unescaped_names(self.label_cols),
1162
1185
  identifier.get_unescaped_names(self.sample_weight_col),
1163
- statement_params=statement_params,
1186
+ statement_params,
1164
1187
  )
1165
1188
 
1166
1189
  cleanup_temp_files([local_score_file_name])
@@ -1178,18 +1201,20 @@ class MissingIndicator(BaseTransformer):
1178
1201
  if self._sklearn_object._estimator_type == 'classifier':
1179
1202
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1180
1203
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1181
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1204
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1205
+ ([] if self._drop_input_cols else inputs) + outputs)
1182
1206
  # For regressor, the type of predict is float64
1183
1207
  elif self._sklearn_object._estimator_type == 'regressor':
1184
1208
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1185
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1186
-
1209
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1210
+ ([] if self._drop_input_cols else inputs) + outputs)
1187
1211
  for prob_func in PROB_FUNCTIONS:
1188
1212
  if hasattr(self, prob_func):
1189
1213
  output_cols_prefix: str = f"{prob_func}_"
1190
1214
  output_column_names = self._get_output_column_names(output_cols_prefix)
1191
1215
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1192
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1216
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1217
+ ([] if self._drop_input_cols else inputs) + outputs)
1193
1218
 
1194
1219
  @property
1195
1220
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -187,7 +189,6 @@ class AdditiveChi2Sampler(BaseTransformer):
187
189
  sample_weight_col: Optional[str] = None,
188
190
  ) -> None:
189
191
  super().__init__()
190
- self.id = str(uuid4()).replace("-", "_").upper()
191
192
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
192
193
 
193
194
  self._deps = list(deps)
@@ -208,6 +209,15 @@ class AdditiveChi2Sampler(BaseTransformer):
208
209
  self.set_drop_input_cols(drop_input_cols)
209
210
  self.set_sample_weight_col(sample_weight_col)
210
211
 
212
+ def _get_rand_id(self) -> str:
213
+ """
214
+ Generate random id to be used in sproc and stage names.
215
+
216
+ Returns:
217
+ Random id string usable in sproc, table, and stage names.
218
+ """
219
+ return str(uuid4()).replace("-", "_").upper()
220
+
211
221
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
212
222
  """
213
223
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -286,7 +296,7 @@ class AdditiveChi2Sampler(BaseTransformer):
286
296
  cp.dump(self._sklearn_object, local_transform_file)
287
297
 
288
298
  # Create temp stage to run fit.
289
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
299
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
290
300
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
291
301
  SqlResultValidator(
292
302
  session=session,
@@ -299,11 +309,12 @@ class AdditiveChi2Sampler(BaseTransformer):
299
309
  expected_value=f"Stage area {transform_stage_name} successfully created."
300
310
  ).validate()
301
311
 
302
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
312
+ # Use posixpath to construct stage paths
313
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
314
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
303
315
  local_result_file_name = get_temp_file_path()
304
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
305
316
 
306
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
317
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
307
318
  statement_params = telemetry.get_function_usage_statement_params(
308
319
  project=_PROJECT,
309
320
  subproject=_SUBPROJECT,
@@ -329,6 +340,7 @@ class AdditiveChi2Sampler(BaseTransformer):
329
340
  replace=True,
330
341
  session=session,
331
342
  statement_params=statement_params,
343
+ anonymous=True
332
344
  )
333
345
  def fit_wrapper_sproc(
334
346
  session: Session,
@@ -337,7 +349,8 @@ class AdditiveChi2Sampler(BaseTransformer):
337
349
  stage_result_file_name: str,
338
350
  input_cols: List[str],
339
351
  label_cols: List[str],
340
- sample_weight_col: Optional[str]
352
+ sample_weight_col: Optional[str],
353
+ statement_params: Dict[str, str]
341
354
  ) -> str:
342
355
  import cloudpickle as cp
343
356
  import numpy as np
@@ -404,15 +417,15 @@ class AdditiveChi2Sampler(BaseTransformer):
404
417
  api_calls=[Session.call],
405
418
  custom_tags=dict([("autogen", True)]),
406
419
  )
407
- sproc_export_file_name = session.call(
408
- fit_sproc_name,
420
+ sproc_export_file_name = fit_wrapper_sproc(
421
+ session,
409
422
  query,
410
423
  stage_transform_file_name,
411
424
  stage_result_file_name,
412
425
  identifier.get_unescaped_names(self.input_cols),
413
426
  identifier.get_unescaped_names(self.label_cols),
414
427
  identifier.get_unescaped_names(self.sample_weight_col),
415
- statement_params=statement_params,
428
+ statement_params,
416
429
  )
417
430
 
418
431
  if "|" in sproc_export_file_name:
@@ -422,7 +435,7 @@ class AdditiveChi2Sampler(BaseTransformer):
422
435
  print("\n".join(fields[1:]))
423
436
 
424
437
  session.file.get(
425
- os.path.join(stage_result_file_name, sproc_export_file_name),
438
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
426
439
  local_result_file_name,
427
440
  statement_params=statement_params
428
441
  )
@@ -468,7 +481,7 @@ class AdditiveChi2Sampler(BaseTransformer):
468
481
 
469
482
  # Register vectorized UDF for batch inference
470
483
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
471
- safe_id=self.id, method=inference_method)
484
+ safe_id=self._get_rand_id(), method=inference_method)
472
485
 
473
486
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
474
487
  # will try to pickle all of self which fails.
@@ -560,7 +573,7 @@ class AdditiveChi2Sampler(BaseTransformer):
560
573
  return transformed_pandas_df.to_dict("records")
561
574
 
562
575
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
563
- safe_id=self.id
576
+ safe_id=self._get_rand_id()
564
577
  )
565
578
 
566
579
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -725,11 +738,18 @@ class AdditiveChi2Sampler(BaseTransformer):
725
738
  Transformed dataset.
726
739
  """
727
740
  if isinstance(dataset, DataFrame):
741
+ expected_type_inferred = ""
742
+ # when it is classifier, infer the datatype from label columns
743
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
744
+ expected_type_inferred = convert_sp_to_sf_type(
745
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
746
+ )
747
+
728
748
  output_df = self._batch_inference(
729
749
  dataset=dataset,
730
750
  inference_method="predict",
731
751
  expected_output_cols_list=self.output_cols,
732
- expected_output_cols_type="",
752
+ expected_output_cols_type=expected_type_inferred,
733
753
  )
734
754
  elif isinstance(dataset, pd.DataFrame):
735
755
  output_df = self._sklearn_inference(
@@ -802,10 +822,10 @@ class AdditiveChi2Sampler(BaseTransformer):
802
822
 
803
823
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
804
824
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
805
- Returns an empty list if current object is not a classifier or not yet fitted.
825
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
806
826
  """
807
827
  if getattr(self._sklearn_object, "classes_", None) is None:
808
- return []
828
+ return [output_cols_prefix]
809
829
 
810
830
  classes = self._sklearn_object.classes_
811
831
  if isinstance(classes, numpy.ndarray):
@@ -1030,7 +1050,7 @@ class AdditiveChi2Sampler(BaseTransformer):
1030
1050
  cp.dump(self._sklearn_object, local_score_file)
1031
1051
 
1032
1052
  # Create temp stage to run score.
1033
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1053
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1034
1054
  session = dataset._session
1035
1055
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1036
1056
  SqlResultValidator(
@@ -1044,8 +1064,9 @@ class AdditiveChi2Sampler(BaseTransformer):
1044
1064
  expected_value=f"Stage area {score_stage_name} successfully created."
1045
1065
  ).validate()
1046
1066
 
1047
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1048
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1067
+ # Use posixpath to construct stage paths
1068
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1069
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1049
1070
  statement_params = telemetry.get_function_usage_statement_params(
1050
1071
  project=_PROJECT,
1051
1072
  subproject=_SUBPROJECT,
@@ -1071,6 +1092,7 @@ class AdditiveChi2Sampler(BaseTransformer):
1071
1092
  replace=True,
1072
1093
  session=session,
1073
1094
  statement_params=statement_params,
1095
+ anonymous=True
1074
1096
  )
1075
1097
  def score_wrapper_sproc(
1076
1098
  session: Session,
@@ -1078,7 +1100,8 @@ class AdditiveChi2Sampler(BaseTransformer):
1078
1100
  stage_score_file_name: str,
1079
1101
  input_cols: List[str],
1080
1102
  label_cols: List[str],
1081
- sample_weight_col: Optional[str]
1103
+ sample_weight_col: Optional[str],
1104
+ statement_params: Dict[str, str]
1082
1105
  ) -> float:
1083
1106
  import cloudpickle as cp
1084
1107
  import numpy as np
@@ -1128,14 +1151,14 @@ class AdditiveChi2Sampler(BaseTransformer):
1128
1151
  api_calls=[Session.call],
1129
1152
  custom_tags=dict([("autogen", True)]),
1130
1153
  )
1131
- score = session.call(
1132
- score_sproc_name,
1154
+ score = score_wrapper_sproc(
1155
+ session,
1133
1156
  query,
1134
1157
  stage_score_file_name,
1135
1158
  identifier.get_unescaped_names(self.input_cols),
1136
1159
  identifier.get_unescaped_names(self.label_cols),
1137
1160
  identifier.get_unescaped_names(self.sample_weight_col),
1138
- statement_params=statement_params,
1161
+ statement_params,
1139
1162
  )
1140
1163
 
1141
1164
  cleanup_temp_files([local_score_file_name])
@@ -1153,18 +1176,20 @@ class AdditiveChi2Sampler(BaseTransformer):
1153
1176
  if self._sklearn_object._estimator_type == 'classifier':
1154
1177
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1155
1178
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1156
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1179
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1180
+ ([] if self._drop_input_cols else inputs) + outputs)
1157
1181
  # For regressor, the type of predict is float64
1158
1182
  elif self._sklearn_object._estimator_type == 'regressor':
1159
1183
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1160
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1161
-
1184
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1185
+ ([] if self._drop_input_cols else inputs) + outputs)
1162
1186
  for prob_func in PROB_FUNCTIONS:
1163
1187
  if hasattr(self, prob_func):
1164
1188
  output_cols_prefix: str = f"{prob_func}_"
1165
1189
  output_column_names = self._get_output_column_names(output_cols_prefix)
1166
1190
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1167
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1191
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1192
+ ([] if self._drop_input_cols else inputs) + outputs)
1168
1193
 
1169
1194
  @property
1170
1195
  def model_signatures(self) -> Dict[str, ModelSignature]: