snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -277,7 +279,6 @@ class ElasticNetCV(BaseTransformer):
277
279
  sample_weight_col: Optional[str] = None,
278
280
  ) -> None:
279
281
  super().__init__()
280
- self.id = str(uuid4()).replace("-", "_").upper()
281
282
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
282
283
 
283
284
  self._deps = list(deps)
@@ -311,6 +312,15 @@ class ElasticNetCV(BaseTransformer):
311
312
  self.set_drop_input_cols(drop_input_cols)
312
313
  self.set_sample_weight_col(sample_weight_col)
313
314
 
315
+ def _get_rand_id(self) -> str:
316
+ """
317
+ Generate random id to be used in sproc and stage names.
318
+
319
+ Returns:
320
+ Random id string usable in sproc, table, and stage names.
321
+ """
322
+ return str(uuid4()).replace("-", "_").upper()
323
+
314
324
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
315
325
  """
316
326
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -389,7 +399,7 @@ class ElasticNetCV(BaseTransformer):
389
399
  cp.dump(self._sklearn_object, local_transform_file)
390
400
 
391
401
  # Create temp stage to run fit.
392
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
402
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
393
403
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
394
404
  SqlResultValidator(
395
405
  session=session,
@@ -402,11 +412,12 @@ class ElasticNetCV(BaseTransformer):
402
412
  expected_value=f"Stage area {transform_stage_name} successfully created."
403
413
  ).validate()
404
414
 
405
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
415
+ # Use posixpath to construct stage paths
416
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
417
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
406
418
  local_result_file_name = get_temp_file_path()
407
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
408
419
 
409
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
420
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
410
421
  statement_params = telemetry.get_function_usage_statement_params(
411
422
  project=_PROJECT,
412
423
  subproject=_SUBPROJECT,
@@ -432,6 +443,7 @@ class ElasticNetCV(BaseTransformer):
432
443
  replace=True,
433
444
  session=session,
434
445
  statement_params=statement_params,
446
+ anonymous=True
435
447
  )
436
448
  def fit_wrapper_sproc(
437
449
  session: Session,
@@ -440,7 +452,8 @@ class ElasticNetCV(BaseTransformer):
440
452
  stage_result_file_name: str,
441
453
  input_cols: List[str],
442
454
  label_cols: List[str],
443
- sample_weight_col: Optional[str]
455
+ sample_weight_col: Optional[str],
456
+ statement_params: Dict[str, str]
444
457
  ) -> str:
445
458
  import cloudpickle as cp
446
459
  import numpy as np
@@ -507,15 +520,15 @@ class ElasticNetCV(BaseTransformer):
507
520
  api_calls=[Session.call],
508
521
  custom_tags=dict([("autogen", True)]),
509
522
  )
510
- sproc_export_file_name = session.call(
511
- fit_sproc_name,
523
+ sproc_export_file_name = fit_wrapper_sproc(
524
+ session,
512
525
  query,
513
526
  stage_transform_file_name,
514
527
  stage_result_file_name,
515
528
  identifier.get_unescaped_names(self.input_cols),
516
529
  identifier.get_unescaped_names(self.label_cols),
517
530
  identifier.get_unescaped_names(self.sample_weight_col),
518
- statement_params=statement_params,
531
+ statement_params,
519
532
  )
520
533
 
521
534
  if "|" in sproc_export_file_name:
@@ -525,7 +538,7 @@ class ElasticNetCV(BaseTransformer):
525
538
  print("\n".join(fields[1:]))
526
539
 
527
540
  session.file.get(
528
- os.path.join(stage_result_file_name, sproc_export_file_name),
541
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
529
542
  local_result_file_name,
530
543
  statement_params=statement_params
531
544
  )
@@ -571,7 +584,7 @@ class ElasticNetCV(BaseTransformer):
571
584
 
572
585
  # Register vectorized UDF for batch inference
573
586
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
574
- safe_id=self.id, method=inference_method)
587
+ safe_id=self._get_rand_id(), method=inference_method)
575
588
 
576
589
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
577
590
  # will try to pickle all of self which fails.
@@ -663,7 +676,7 @@ class ElasticNetCV(BaseTransformer):
663
676
  return transformed_pandas_df.to_dict("records")
664
677
 
665
678
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
666
- safe_id=self.id
679
+ safe_id=self._get_rand_id()
667
680
  )
668
681
 
669
682
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -830,11 +843,18 @@ class ElasticNetCV(BaseTransformer):
830
843
  Transformed dataset.
831
844
  """
832
845
  if isinstance(dataset, DataFrame):
846
+ expected_type_inferred = "float"
847
+ # when it is classifier, infer the datatype from label columns
848
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
849
+ expected_type_inferred = convert_sp_to_sf_type(
850
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
851
+ )
852
+
833
853
  output_df = self._batch_inference(
834
854
  dataset=dataset,
835
855
  inference_method="predict",
836
856
  expected_output_cols_list=self.output_cols,
837
- expected_output_cols_type="float",
857
+ expected_output_cols_type=expected_type_inferred,
838
858
  )
839
859
  elif isinstance(dataset, pd.DataFrame):
840
860
  output_df = self._sklearn_inference(
@@ -905,10 +925,10 @@ class ElasticNetCV(BaseTransformer):
905
925
 
906
926
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
907
927
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
908
- Returns an empty list if current object is not a classifier or not yet fitted.
928
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
909
929
  """
910
930
  if getattr(self._sklearn_object, "classes_", None) is None:
911
- return []
931
+ return [output_cols_prefix]
912
932
 
913
933
  classes = self._sklearn_object.classes_
914
934
  if isinstance(classes, numpy.ndarray):
@@ -1133,7 +1153,7 @@ class ElasticNetCV(BaseTransformer):
1133
1153
  cp.dump(self._sklearn_object, local_score_file)
1134
1154
 
1135
1155
  # Create temp stage to run score.
1136
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1156
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1137
1157
  session = dataset._session
1138
1158
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1139
1159
  SqlResultValidator(
@@ -1147,8 +1167,9 @@ class ElasticNetCV(BaseTransformer):
1147
1167
  expected_value=f"Stage area {score_stage_name} successfully created."
1148
1168
  ).validate()
1149
1169
 
1150
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1151
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1170
+ # Use posixpath to construct stage paths
1171
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1172
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1152
1173
  statement_params = telemetry.get_function_usage_statement_params(
1153
1174
  project=_PROJECT,
1154
1175
  subproject=_SUBPROJECT,
@@ -1174,6 +1195,7 @@ class ElasticNetCV(BaseTransformer):
1174
1195
  replace=True,
1175
1196
  session=session,
1176
1197
  statement_params=statement_params,
1198
+ anonymous=True
1177
1199
  )
1178
1200
  def score_wrapper_sproc(
1179
1201
  session: Session,
@@ -1181,7 +1203,8 @@ class ElasticNetCV(BaseTransformer):
1181
1203
  stage_score_file_name: str,
1182
1204
  input_cols: List[str],
1183
1205
  label_cols: List[str],
1184
- sample_weight_col: Optional[str]
1206
+ sample_weight_col: Optional[str],
1207
+ statement_params: Dict[str, str]
1185
1208
  ) -> float:
1186
1209
  import cloudpickle as cp
1187
1210
  import numpy as np
@@ -1231,14 +1254,14 @@ class ElasticNetCV(BaseTransformer):
1231
1254
  api_calls=[Session.call],
1232
1255
  custom_tags=dict([("autogen", True)]),
1233
1256
  )
1234
- score = session.call(
1235
- score_sproc_name,
1257
+ score = score_wrapper_sproc(
1258
+ session,
1236
1259
  query,
1237
1260
  stage_score_file_name,
1238
1261
  identifier.get_unescaped_names(self.input_cols),
1239
1262
  identifier.get_unescaped_names(self.label_cols),
1240
1263
  identifier.get_unescaped_names(self.sample_weight_col),
1241
- statement_params=statement_params,
1264
+ statement_params,
1242
1265
  )
1243
1266
 
1244
1267
  cleanup_temp_files([local_score_file_name])
@@ -1256,18 +1279,20 @@ class ElasticNetCV(BaseTransformer):
1256
1279
  if self._sklearn_object._estimator_type == 'classifier':
1257
1280
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1258
1281
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1259
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1282
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1283
+ ([] if self._drop_input_cols else inputs) + outputs)
1260
1284
  # For regressor, the type of predict is float64
1261
1285
  elif self._sklearn_object._estimator_type == 'regressor':
1262
1286
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1263
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1264
-
1287
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1288
+ ([] if self._drop_input_cols else inputs) + outputs)
1265
1289
  for prob_func in PROB_FUNCTIONS:
1266
1290
  if hasattr(self, prob_func):
1267
1291
  output_cols_prefix: str = f"{prob_func}_"
1268
1292
  output_column_names = self._get_output_column_names(output_cols_prefix)
1269
1293
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1270
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1294
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1295
+ ([] if self._drop_input_cols else inputs) + outputs)
1271
1296
 
1272
1297
  @property
1273
1298
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -230,7 +232,6 @@ class GammaRegressor(BaseTransformer):
230
232
  sample_weight_col: Optional[str] = None,
231
233
  ) -> None:
232
234
  super().__init__()
233
- self.id = str(uuid4()).replace("-", "_").upper()
234
235
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
235
236
 
236
237
  self._deps = list(deps)
@@ -256,6 +257,15 @@ class GammaRegressor(BaseTransformer):
256
257
  self.set_drop_input_cols(drop_input_cols)
257
258
  self.set_sample_weight_col(sample_weight_col)
258
259
 
260
+ def _get_rand_id(self) -> str:
261
+ """
262
+ Generate random id to be used in sproc and stage names.
263
+
264
+ Returns:
265
+ Random id string usable in sproc, table, and stage names.
266
+ """
267
+ return str(uuid4()).replace("-", "_").upper()
268
+
259
269
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
260
270
  """
261
271
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -334,7 +344,7 @@ class GammaRegressor(BaseTransformer):
334
344
  cp.dump(self._sklearn_object, local_transform_file)
335
345
 
336
346
  # Create temp stage to run fit.
337
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
347
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
338
348
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
339
349
  SqlResultValidator(
340
350
  session=session,
@@ -347,11 +357,12 @@ class GammaRegressor(BaseTransformer):
347
357
  expected_value=f"Stage area {transform_stage_name} successfully created."
348
358
  ).validate()
349
359
 
350
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
360
+ # Use posixpath to construct stage paths
361
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
362
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
351
363
  local_result_file_name = get_temp_file_path()
352
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
353
364
 
354
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
365
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
355
366
  statement_params = telemetry.get_function_usage_statement_params(
356
367
  project=_PROJECT,
357
368
  subproject=_SUBPROJECT,
@@ -377,6 +388,7 @@ class GammaRegressor(BaseTransformer):
377
388
  replace=True,
378
389
  session=session,
379
390
  statement_params=statement_params,
391
+ anonymous=True
380
392
  )
381
393
  def fit_wrapper_sproc(
382
394
  session: Session,
@@ -385,7 +397,8 @@ class GammaRegressor(BaseTransformer):
385
397
  stage_result_file_name: str,
386
398
  input_cols: List[str],
387
399
  label_cols: List[str],
388
- sample_weight_col: Optional[str]
400
+ sample_weight_col: Optional[str],
401
+ statement_params: Dict[str, str]
389
402
  ) -> str:
390
403
  import cloudpickle as cp
391
404
  import numpy as np
@@ -452,15 +465,15 @@ class GammaRegressor(BaseTransformer):
452
465
  api_calls=[Session.call],
453
466
  custom_tags=dict([("autogen", True)]),
454
467
  )
455
- sproc_export_file_name = session.call(
456
- fit_sproc_name,
468
+ sproc_export_file_name = fit_wrapper_sproc(
469
+ session,
457
470
  query,
458
471
  stage_transform_file_name,
459
472
  stage_result_file_name,
460
473
  identifier.get_unescaped_names(self.input_cols),
461
474
  identifier.get_unescaped_names(self.label_cols),
462
475
  identifier.get_unescaped_names(self.sample_weight_col),
463
- statement_params=statement_params,
476
+ statement_params,
464
477
  )
465
478
 
466
479
  if "|" in sproc_export_file_name:
@@ -470,7 +483,7 @@ class GammaRegressor(BaseTransformer):
470
483
  print("\n".join(fields[1:]))
471
484
 
472
485
  session.file.get(
473
- os.path.join(stage_result_file_name, sproc_export_file_name),
486
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
474
487
  local_result_file_name,
475
488
  statement_params=statement_params
476
489
  )
@@ -516,7 +529,7 @@ class GammaRegressor(BaseTransformer):
516
529
 
517
530
  # Register vectorized UDF for batch inference
518
531
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
519
- safe_id=self.id, method=inference_method)
532
+ safe_id=self._get_rand_id(), method=inference_method)
520
533
 
521
534
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
522
535
  # will try to pickle all of self which fails.
@@ -608,7 +621,7 @@ class GammaRegressor(BaseTransformer):
608
621
  return transformed_pandas_df.to_dict("records")
609
622
 
610
623
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
611
- safe_id=self.id
624
+ safe_id=self._get_rand_id()
612
625
  )
613
626
 
614
627
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -775,11 +788,18 @@ class GammaRegressor(BaseTransformer):
775
788
  Transformed dataset.
776
789
  """
777
790
  if isinstance(dataset, DataFrame):
791
+ expected_type_inferred = "float"
792
+ # when it is classifier, infer the datatype from label columns
793
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
794
+ expected_type_inferred = convert_sp_to_sf_type(
795
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
796
+ )
797
+
778
798
  output_df = self._batch_inference(
779
799
  dataset=dataset,
780
800
  inference_method="predict",
781
801
  expected_output_cols_list=self.output_cols,
782
- expected_output_cols_type="float",
802
+ expected_output_cols_type=expected_type_inferred,
783
803
  )
784
804
  elif isinstance(dataset, pd.DataFrame):
785
805
  output_df = self._sklearn_inference(
@@ -850,10 +870,10 @@ class GammaRegressor(BaseTransformer):
850
870
 
851
871
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
852
872
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
853
- Returns an empty list if current object is not a classifier or not yet fitted.
873
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
854
874
  """
855
875
  if getattr(self._sklearn_object, "classes_", None) is None:
856
- return []
876
+ return [output_cols_prefix]
857
877
 
858
878
  classes = self._sklearn_object.classes_
859
879
  if isinstance(classes, numpy.ndarray):
@@ -1078,7 +1098,7 @@ class GammaRegressor(BaseTransformer):
1078
1098
  cp.dump(self._sklearn_object, local_score_file)
1079
1099
 
1080
1100
  # Create temp stage to run score.
1081
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1101
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1082
1102
  session = dataset._session
1083
1103
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1084
1104
  SqlResultValidator(
@@ -1092,8 +1112,9 @@ class GammaRegressor(BaseTransformer):
1092
1112
  expected_value=f"Stage area {score_stage_name} successfully created."
1093
1113
  ).validate()
1094
1114
 
1095
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1096
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1115
+ # Use posixpath to construct stage paths
1116
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1117
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1097
1118
  statement_params = telemetry.get_function_usage_statement_params(
1098
1119
  project=_PROJECT,
1099
1120
  subproject=_SUBPROJECT,
@@ -1119,6 +1140,7 @@ class GammaRegressor(BaseTransformer):
1119
1140
  replace=True,
1120
1141
  session=session,
1121
1142
  statement_params=statement_params,
1143
+ anonymous=True
1122
1144
  )
1123
1145
  def score_wrapper_sproc(
1124
1146
  session: Session,
@@ -1126,7 +1148,8 @@ class GammaRegressor(BaseTransformer):
1126
1148
  stage_score_file_name: str,
1127
1149
  input_cols: List[str],
1128
1150
  label_cols: List[str],
1129
- sample_weight_col: Optional[str]
1151
+ sample_weight_col: Optional[str],
1152
+ statement_params: Dict[str, str]
1130
1153
  ) -> float:
1131
1154
  import cloudpickle as cp
1132
1155
  import numpy as np
@@ -1176,14 +1199,14 @@ class GammaRegressor(BaseTransformer):
1176
1199
  api_calls=[Session.call],
1177
1200
  custom_tags=dict([("autogen", True)]),
1178
1201
  )
1179
- score = session.call(
1180
- score_sproc_name,
1202
+ score = score_wrapper_sproc(
1203
+ session,
1181
1204
  query,
1182
1205
  stage_score_file_name,
1183
1206
  identifier.get_unescaped_names(self.input_cols),
1184
1207
  identifier.get_unescaped_names(self.label_cols),
1185
1208
  identifier.get_unescaped_names(self.sample_weight_col),
1186
- statement_params=statement_params,
1209
+ statement_params,
1187
1210
  )
1188
1211
 
1189
1212
  cleanup_temp_files([local_score_file_name])
@@ -1201,18 +1224,20 @@ class GammaRegressor(BaseTransformer):
1201
1224
  if self._sklearn_object._estimator_type == 'classifier':
1202
1225
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1203
1226
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1204
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1227
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1228
+ ([] if self._drop_input_cols else inputs) + outputs)
1205
1229
  # For regressor, the type of predict is float64
1206
1230
  elif self._sklearn_object._estimator_type == 'regressor':
1207
1231
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1208
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1209
-
1232
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1233
+ ([] if self._drop_input_cols else inputs) + outputs)
1210
1234
  for prob_func in PROB_FUNCTIONS:
1211
1235
  if hasattr(self, prob_func):
1212
1236
  output_cols_prefix: str = f"{prob_func}_"
1213
1237
  output_column_names = self._get_output_column_names(output_cols_prefix)
1214
1238
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1215
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1216
1241
 
1217
1242
  @property
1218
1243
  def model_signatures(self) -> Dict[str, ModelSignature]: