snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -249,7 +251,6 @@ class FastICA(BaseTransformer):
249
251
  sample_weight_col: Optional[str] = None,
250
252
  ) -> None:
251
253
  super().__init__()
252
- self.id = str(uuid4()).replace("-", "_").upper()
253
254
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
254
255
 
255
256
  self._deps = list(deps)
@@ -278,6 +279,15 @@ class FastICA(BaseTransformer):
278
279
  self.set_drop_input_cols(drop_input_cols)
279
280
  self.set_sample_weight_col(sample_weight_col)
280
281
 
282
+ def _get_rand_id(self) -> str:
283
+ """
284
+ Generate random id to be used in sproc and stage names.
285
+
286
+ Returns:
287
+ Random id string usable in sproc, table, and stage names.
288
+ """
289
+ return str(uuid4()).replace("-", "_").upper()
290
+
281
291
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
282
292
  """
283
293
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -356,7 +366,7 @@ class FastICA(BaseTransformer):
356
366
  cp.dump(self._sklearn_object, local_transform_file)
357
367
 
358
368
  # Create temp stage to run fit.
359
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
369
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
360
370
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
361
371
  SqlResultValidator(
362
372
  session=session,
@@ -369,11 +379,12 @@ class FastICA(BaseTransformer):
369
379
  expected_value=f"Stage area {transform_stage_name} successfully created."
370
380
  ).validate()
371
381
 
372
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
382
+ # Use posixpath to construct stage paths
383
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
384
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
373
385
  local_result_file_name = get_temp_file_path()
374
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
375
386
 
376
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
387
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
377
388
  statement_params = telemetry.get_function_usage_statement_params(
378
389
  project=_PROJECT,
379
390
  subproject=_SUBPROJECT,
@@ -399,6 +410,7 @@ class FastICA(BaseTransformer):
399
410
  replace=True,
400
411
  session=session,
401
412
  statement_params=statement_params,
413
+ anonymous=True
402
414
  )
403
415
  def fit_wrapper_sproc(
404
416
  session: Session,
@@ -407,7 +419,8 @@ class FastICA(BaseTransformer):
407
419
  stage_result_file_name: str,
408
420
  input_cols: List[str],
409
421
  label_cols: List[str],
410
- sample_weight_col: Optional[str]
422
+ sample_weight_col: Optional[str],
423
+ statement_params: Dict[str, str]
411
424
  ) -> str:
412
425
  import cloudpickle as cp
413
426
  import numpy as np
@@ -474,15 +487,15 @@ class FastICA(BaseTransformer):
474
487
  api_calls=[Session.call],
475
488
  custom_tags=dict([("autogen", True)]),
476
489
  )
477
- sproc_export_file_name = session.call(
478
- fit_sproc_name,
490
+ sproc_export_file_name = fit_wrapper_sproc(
491
+ session,
479
492
  query,
480
493
  stage_transform_file_name,
481
494
  stage_result_file_name,
482
495
  identifier.get_unescaped_names(self.input_cols),
483
496
  identifier.get_unescaped_names(self.label_cols),
484
497
  identifier.get_unescaped_names(self.sample_weight_col),
485
- statement_params=statement_params,
498
+ statement_params,
486
499
  )
487
500
 
488
501
  if "|" in sproc_export_file_name:
@@ -492,7 +505,7 @@ class FastICA(BaseTransformer):
492
505
  print("\n".join(fields[1:]))
493
506
 
494
507
  session.file.get(
495
- os.path.join(stage_result_file_name, sproc_export_file_name),
508
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
496
509
  local_result_file_name,
497
510
  statement_params=statement_params
498
511
  )
@@ -538,7 +551,7 @@ class FastICA(BaseTransformer):
538
551
 
539
552
  # Register vectorized UDF for batch inference
540
553
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
541
- safe_id=self.id, method=inference_method)
554
+ safe_id=self._get_rand_id(), method=inference_method)
542
555
 
543
556
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
544
557
  # will try to pickle all of self which fails.
@@ -630,7 +643,7 @@ class FastICA(BaseTransformer):
630
643
  return transformed_pandas_df.to_dict("records")
631
644
 
632
645
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
633
- safe_id=self.id
646
+ safe_id=self._get_rand_id()
634
647
  )
635
648
 
636
649
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -795,11 +808,18 @@ class FastICA(BaseTransformer):
795
808
  Transformed dataset.
796
809
  """
797
810
  if isinstance(dataset, DataFrame):
811
+ expected_type_inferred = ""
812
+ # when it is classifier, infer the datatype from label columns
813
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
814
+ expected_type_inferred = convert_sp_to_sf_type(
815
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
816
+ )
817
+
798
818
  output_df = self._batch_inference(
799
819
  dataset=dataset,
800
820
  inference_method="predict",
801
821
  expected_output_cols_list=self.output_cols,
802
- expected_output_cols_type="",
822
+ expected_output_cols_type=expected_type_inferred,
803
823
  )
804
824
  elif isinstance(dataset, pd.DataFrame):
805
825
  output_df = self._sklearn_inference(
@@ -872,10 +892,10 @@ class FastICA(BaseTransformer):
872
892
 
873
893
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
874
894
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
875
- Returns an empty list if current object is not a classifier or not yet fitted.
895
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
876
896
  """
877
897
  if getattr(self._sklearn_object, "classes_", None) is None:
878
- return []
898
+ return [output_cols_prefix]
879
899
 
880
900
  classes = self._sklearn_object.classes_
881
901
  if isinstance(classes, numpy.ndarray):
@@ -1100,7 +1120,7 @@ class FastICA(BaseTransformer):
1100
1120
  cp.dump(self._sklearn_object, local_score_file)
1101
1121
 
1102
1122
  # Create temp stage to run score.
1103
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1123
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1104
1124
  session = dataset._session
1105
1125
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1106
1126
  SqlResultValidator(
@@ -1114,8 +1134,9 @@ class FastICA(BaseTransformer):
1114
1134
  expected_value=f"Stage area {score_stage_name} successfully created."
1115
1135
  ).validate()
1116
1136
 
1117
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1118
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1137
+ # Use posixpath to construct stage paths
1138
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1139
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1119
1140
  statement_params = telemetry.get_function_usage_statement_params(
1120
1141
  project=_PROJECT,
1121
1142
  subproject=_SUBPROJECT,
@@ -1141,6 +1162,7 @@ class FastICA(BaseTransformer):
1141
1162
  replace=True,
1142
1163
  session=session,
1143
1164
  statement_params=statement_params,
1165
+ anonymous=True
1144
1166
  )
1145
1167
  def score_wrapper_sproc(
1146
1168
  session: Session,
@@ -1148,7 +1170,8 @@ class FastICA(BaseTransformer):
1148
1170
  stage_score_file_name: str,
1149
1171
  input_cols: List[str],
1150
1172
  label_cols: List[str],
1151
- sample_weight_col: Optional[str]
1173
+ sample_weight_col: Optional[str],
1174
+ statement_params: Dict[str, str]
1152
1175
  ) -> float:
1153
1176
  import cloudpickle as cp
1154
1177
  import numpy as np
@@ -1198,14 +1221,14 @@ class FastICA(BaseTransformer):
1198
1221
  api_calls=[Session.call],
1199
1222
  custom_tags=dict([("autogen", True)]),
1200
1223
  )
1201
- score = session.call(
1202
- score_sproc_name,
1224
+ score = score_wrapper_sproc(
1225
+ session,
1203
1226
  query,
1204
1227
  stage_score_file_name,
1205
1228
  identifier.get_unescaped_names(self.input_cols),
1206
1229
  identifier.get_unescaped_names(self.label_cols),
1207
1230
  identifier.get_unescaped_names(self.sample_weight_col),
1208
- statement_params=statement_params,
1231
+ statement_params,
1209
1232
  )
1210
1233
 
1211
1234
  cleanup_temp_files([local_score_file_name])
@@ -1223,18 +1246,20 @@ class FastICA(BaseTransformer):
1223
1246
  if self._sklearn_object._estimator_type == 'classifier':
1224
1247
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1225
1248
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1226
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1249
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1250
+ ([] if self._drop_input_cols else inputs) + outputs)
1227
1251
  # For regressor, the type of predict is float64
1228
1252
  elif self._sklearn_object._estimator_type == 'regressor':
1229
1253
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1230
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1231
-
1254
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1255
+ ([] if self._drop_input_cols else inputs) + outputs)
1232
1256
  for prob_func in PROB_FUNCTIONS:
1233
1257
  if hasattr(self, prob_func):
1234
1258
  output_cols_prefix: str = f"{prob_func}_"
1235
1259
  output_column_names = self._get_output_column_names(output_cols_prefix)
1236
1260
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1237
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1261
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1262
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1263
 
1239
1264
  @property
1240
1265
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -207,7 +209,6 @@ class IncrementalPCA(BaseTransformer):
207
209
  sample_weight_col: Optional[str] = None,
208
210
  ) -> None:
209
211
  super().__init__()
210
- self.id = str(uuid4()).replace("-", "_").upper()
211
212
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
212
213
 
213
214
  self._deps = list(deps)
@@ -230,6 +231,15 @@ class IncrementalPCA(BaseTransformer):
230
231
  self.set_drop_input_cols(drop_input_cols)
231
232
  self.set_sample_weight_col(sample_weight_col)
232
233
 
234
+ def _get_rand_id(self) -> str:
235
+ """
236
+ Generate random id to be used in sproc and stage names.
237
+
238
+ Returns:
239
+ Random id string usable in sproc, table, and stage names.
240
+ """
241
+ return str(uuid4()).replace("-", "_").upper()
242
+
233
243
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
234
244
  """
235
245
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -308,7 +318,7 @@ class IncrementalPCA(BaseTransformer):
308
318
  cp.dump(self._sklearn_object, local_transform_file)
309
319
 
310
320
  # Create temp stage to run fit.
311
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
321
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
312
322
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
313
323
  SqlResultValidator(
314
324
  session=session,
@@ -321,11 +331,12 @@ class IncrementalPCA(BaseTransformer):
321
331
  expected_value=f"Stage area {transform_stage_name} successfully created."
322
332
  ).validate()
323
333
 
324
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
334
+ # Use posixpath to construct stage paths
335
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
336
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
325
337
  local_result_file_name = get_temp_file_path()
326
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
327
338
 
328
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
339
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
329
340
  statement_params = telemetry.get_function_usage_statement_params(
330
341
  project=_PROJECT,
331
342
  subproject=_SUBPROJECT,
@@ -351,6 +362,7 @@ class IncrementalPCA(BaseTransformer):
351
362
  replace=True,
352
363
  session=session,
353
364
  statement_params=statement_params,
365
+ anonymous=True
354
366
  )
355
367
  def fit_wrapper_sproc(
356
368
  session: Session,
@@ -359,7 +371,8 @@ class IncrementalPCA(BaseTransformer):
359
371
  stage_result_file_name: str,
360
372
  input_cols: List[str],
361
373
  label_cols: List[str],
362
- sample_weight_col: Optional[str]
374
+ sample_weight_col: Optional[str],
375
+ statement_params: Dict[str, str]
363
376
  ) -> str:
364
377
  import cloudpickle as cp
365
378
  import numpy as np
@@ -426,15 +439,15 @@ class IncrementalPCA(BaseTransformer):
426
439
  api_calls=[Session.call],
427
440
  custom_tags=dict([("autogen", True)]),
428
441
  )
429
- sproc_export_file_name = session.call(
430
- fit_sproc_name,
442
+ sproc_export_file_name = fit_wrapper_sproc(
443
+ session,
431
444
  query,
432
445
  stage_transform_file_name,
433
446
  stage_result_file_name,
434
447
  identifier.get_unescaped_names(self.input_cols),
435
448
  identifier.get_unescaped_names(self.label_cols),
436
449
  identifier.get_unescaped_names(self.sample_weight_col),
437
- statement_params=statement_params,
450
+ statement_params,
438
451
  )
439
452
 
440
453
  if "|" in sproc_export_file_name:
@@ -444,7 +457,7 @@ class IncrementalPCA(BaseTransformer):
444
457
  print("\n".join(fields[1:]))
445
458
 
446
459
  session.file.get(
447
- os.path.join(stage_result_file_name, sproc_export_file_name),
460
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
448
461
  local_result_file_name,
449
462
  statement_params=statement_params
450
463
  )
@@ -490,7 +503,7 @@ class IncrementalPCA(BaseTransformer):
490
503
 
491
504
  # Register vectorized UDF for batch inference
492
505
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
493
- safe_id=self.id, method=inference_method)
506
+ safe_id=self._get_rand_id(), method=inference_method)
494
507
 
495
508
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
496
509
  # will try to pickle all of self which fails.
@@ -582,7 +595,7 @@ class IncrementalPCA(BaseTransformer):
582
595
  return transformed_pandas_df.to_dict("records")
583
596
 
584
597
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
585
- safe_id=self.id
598
+ safe_id=self._get_rand_id()
586
599
  )
587
600
 
588
601
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -747,11 +760,18 @@ class IncrementalPCA(BaseTransformer):
747
760
  Transformed dataset.
748
761
  """
749
762
  if isinstance(dataset, DataFrame):
763
+ expected_type_inferred = ""
764
+ # when it is classifier, infer the datatype from label columns
765
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
766
+ expected_type_inferred = convert_sp_to_sf_type(
767
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
768
+ )
769
+
750
770
  output_df = self._batch_inference(
751
771
  dataset=dataset,
752
772
  inference_method="predict",
753
773
  expected_output_cols_list=self.output_cols,
754
- expected_output_cols_type="",
774
+ expected_output_cols_type=expected_type_inferred,
755
775
  )
756
776
  elif isinstance(dataset, pd.DataFrame):
757
777
  output_df = self._sklearn_inference(
@@ -824,10 +844,10 @@ class IncrementalPCA(BaseTransformer):
824
844
 
825
845
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
826
846
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
827
- Returns an empty list if current object is not a classifier or not yet fitted.
847
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
828
848
  """
829
849
  if getattr(self._sklearn_object, "classes_", None) is None:
830
- return []
850
+ return [output_cols_prefix]
831
851
 
832
852
  classes = self._sklearn_object.classes_
833
853
  if isinstance(classes, numpy.ndarray):
@@ -1052,7 +1072,7 @@ class IncrementalPCA(BaseTransformer):
1052
1072
  cp.dump(self._sklearn_object, local_score_file)
1053
1073
 
1054
1074
  # Create temp stage to run score.
1055
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1075
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1056
1076
  session = dataset._session
1057
1077
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1058
1078
  SqlResultValidator(
@@ -1066,8 +1086,9 @@ class IncrementalPCA(BaseTransformer):
1066
1086
  expected_value=f"Stage area {score_stage_name} successfully created."
1067
1087
  ).validate()
1068
1088
 
1069
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1070
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1089
+ # Use posixpath to construct stage paths
1090
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1091
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1071
1092
  statement_params = telemetry.get_function_usage_statement_params(
1072
1093
  project=_PROJECT,
1073
1094
  subproject=_SUBPROJECT,
@@ -1093,6 +1114,7 @@ class IncrementalPCA(BaseTransformer):
1093
1114
  replace=True,
1094
1115
  session=session,
1095
1116
  statement_params=statement_params,
1117
+ anonymous=True
1096
1118
  )
1097
1119
  def score_wrapper_sproc(
1098
1120
  session: Session,
@@ -1100,7 +1122,8 @@ class IncrementalPCA(BaseTransformer):
1100
1122
  stage_score_file_name: str,
1101
1123
  input_cols: List[str],
1102
1124
  label_cols: List[str],
1103
- sample_weight_col: Optional[str]
1125
+ sample_weight_col: Optional[str],
1126
+ statement_params: Dict[str, str]
1104
1127
  ) -> float:
1105
1128
  import cloudpickle as cp
1106
1129
  import numpy as np
@@ -1150,14 +1173,14 @@ class IncrementalPCA(BaseTransformer):
1150
1173
  api_calls=[Session.call],
1151
1174
  custom_tags=dict([("autogen", True)]),
1152
1175
  )
1153
- score = session.call(
1154
- score_sproc_name,
1176
+ score = score_wrapper_sproc(
1177
+ session,
1155
1178
  query,
1156
1179
  stage_score_file_name,
1157
1180
  identifier.get_unescaped_names(self.input_cols),
1158
1181
  identifier.get_unescaped_names(self.label_cols),
1159
1182
  identifier.get_unescaped_names(self.sample_weight_col),
1160
- statement_params=statement_params,
1183
+ statement_params,
1161
1184
  )
1162
1185
 
1163
1186
  cleanup_temp_files([local_score_file_name])
@@ -1175,18 +1198,20 @@ class IncrementalPCA(BaseTransformer):
1175
1198
  if self._sklearn_object._estimator_type == 'classifier':
1176
1199
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1177
1200
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1178
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1201
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1202
+ ([] if self._drop_input_cols else inputs) + outputs)
1179
1203
  # For regressor, the type of predict is float64
1180
1204
  elif self._sklearn_object._estimator_type == 'regressor':
1181
1205
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1182
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1183
-
1206
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1207
+ ([] if self._drop_input_cols else inputs) + outputs)
1184
1208
  for prob_func in PROB_FUNCTIONS:
1185
1209
  if hasattr(self, prob_func):
1186
1210
  output_cols_prefix: str = f"{prob_func}_"
1187
1211
  output_column_names = self._get_output_column_names(output_cols_prefix)
1188
1212
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1189
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1213
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1214
+ ([] if self._drop_input_cols else inputs) + outputs)
1190
1215
 
1191
1216
  @property
1192
1217
  def model_signatures(self) -> Dict[str, ModelSignature]: