snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -235,7 +237,6 @@ class TheilSenRegressor(BaseTransformer):
235
237
  sample_weight_col: Optional[str] = None,
236
238
  ) -> None:
237
239
  super().__init__()
238
- self.id = str(uuid4()).replace("-", "_").upper()
239
240
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
240
241
 
241
242
  self._deps = list(deps)
@@ -263,6 +264,15 @@ class TheilSenRegressor(BaseTransformer):
263
264
  self.set_drop_input_cols(drop_input_cols)
264
265
  self.set_sample_weight_col(sample_weight_col)
265
266
 
267
+ def _get_rand_id(self) -> str:
268
+ """
269
+ Generate random id to be used in sproc and stage names.
270
+
271
+ Returns:
272
+ Random id string usable in sproc, table, and stage names.
273
+ """
274
+ return str(uuid4()).replace("-", "_").upper()
275
+
266
276
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
267
277
  """
268
278
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -341,7 +351,7 @@ class TheilSenRegressor(BaseTransformer):
341
351
  cp.dump(self._sklearn_object, local_transform_file)
342
352
 
343
353
  # Create temp stage to run fit.
344
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
354
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
345
355
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
346
356
  SqlResultValidator(
347
357
  session=session,
@@ -354,11 +364,12 @@ class TheilSenRegressor(BaseTransformer):
354
364
  expected_value=f"Stage area {transform_stage_name} successfully created."
355
365
  ).validate()
356
366
 
357
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
367
+ # Use posixpath to construct stage paths
368
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
369
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
358
370
  local_result_file_name = get_temp_file_path()
359
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
360
371
 
361
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
372
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
362
373
  statement_params = telemetry.get_function_usage_statement_params(
363
374
  project=_PROJECT,
364
375
  subproject=_SUBPROJECT,
@@ -384,6 +395,7 @@ class TheilSenRegressor(BaseTransformer):
384
395
  replace=True,
385
396
  session=session,
386
397
  statement_params=statement_params,
398
+ anonymous=True
387
399
  )
388
400
  def fit_wrapper_sproc(
389
401
  session: Session,
@@ -392,7 +404,8 @@ class TheilSenRegressor(BaseTransformer):
392
404
  stage_result_file_name: str,
393
405
  input_cols: List[str],
394
406
  label_cols: List[str],
395
- sample_weight_col: Optional[str]
407
+ sample_weight_col: Optional[str],
408
+ statement_params: Dict[str, str]
396
409
  ) -> str:
397
410
  import cloudpickle as cp
398
411
  import numpy as np
@@ -459,15 +472,15 @@ class TheilSenRegressor(BaseTransformer):
459
472
  api_calls=[Session.call],
460
473
  custom_tags=dict([("autogen", True)]),
461
474
  )
462
- sproc_export_file_name = session.call(
463
- fit_sproc_name,
475
+ sproc_export_file_name = fit_wrapper_sproc(
476
+ session,
464
477
  query,
465
478
  stage_transform_file_name,
466
479
  stage_result_file_name,
467
480
  identifier.get_unescaped_names(self.input_cols),
468
481
  identifier.get_unescaped_names(self.label_cols),
469
482
  identifier.get_unescaped_names(self.sample_weight_col),
470
- statement_params=statement_params,
483
+ statement_params,
471
484
  )
472
485
 
473
486
  if "|" in sproc_export_file_name:
@@ -477,7 +490,7 @@ class TheilSenRegressor(BaseTransformer):
477
490
  print("\n".join(fields[1:]))
478
491
 
479
492
  session.file.get(
480
- os.path.join(stage_result_file_name, sproc_export_file_name),
493
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
481
494
  local_result_file_name,
482
495
  statement_params=statement_params
483
496
  )
@@ -523,7 +536,7 @@ class TheilSenRegressor(BaseTransformer):
523
536
 
524
537
  # Register vectorized UDF for batch inference
525
538
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
526
- safe_id=self.id, method=inference_method)
539
+ safe_id=self._get_rand_id(), method=inference_method)
527
540
 
528
541
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
529
542
  # will try to pickle all of self which fails.
@@ -615,7 +628,7 @@ class TheilSenRegressor(BaseTransformer):
615
628
  return transformed_pandas_df.to_dict("records")
616
629
 
617
630
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
618
- safe_id=self.id
631
+ safe_id=self._get_rand_id()
619
632
  )
620
633
 
621
634
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -782,11 +795,18 @@ class TheilSenRegressor(BaseTransformer):
782
795
  Transformed dataset.
783
796
  """
784
797
  if isinstance(dataset, DataFrame):
798
+ expected_type_inferred = "float"
799
+ # when it is classifier, infer the datatype from label columns
800
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
801
+ expected_type_inferred = convert_sp_to_sf_type(
802
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
803
+ )
804
+
785
805
  output_df = self._batch_inference(
786
806
  dataset=dataset,
787
807
  inference_method="predict",
788
808
  expected_output_cols_list=self.output_cols,
789
- expected_output_cols_type="float",
809
+ expected_output_cols_type=expected_type_inferred,
790
810
  )
791
811
  elif isinstance(dataset, pd.DataFrame):
792
812
  output_df = self._sklearn_inference(
@@ -857,10 +877,10 @@ class TheilSenRegressor(BaseTransformer):
857
877
 
858
878
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
859
879
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
860
- Returns an empty list if current object is not a classifier or not yet fitted.
880
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
861
881
  """
862
882
  if getattr(self._sklearn_object, "classes_", None) is None:
863
- return []
883
+ return [output_cols_prefix]
864
884
 
865
885
  classes = self._sklearn_object.classes_
866
886
  if isinstance(classes, numpy.ndarray):
@@ -1085,7 +1105,7 @@ class TheilSenRegressor(BaseTransformer):
1085
1105
  cp.dump(self._sklearn_object, local_score_file)
1086
1106
 
1087
1107
  # Create temp stage to run score.
1088
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1108
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1089
1109
  session = dataset._session
1090
1110
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1091
1111
  SqlResultValidator(
@@ -1099,8 +1119,9 @@ class TheilSenRegressor(BaseTransformer):
1099
1119
  expected_value=f"Stage area {score_stage_name} successfully created."
1100
1120
  ).validate()
1101
1121
 
1102
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1103
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1122
+ # Use posixpath to construct stage paths
1123
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1124
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1104
1125
  statement_params = telemetry.get_function_usage_statement_params(
1105
1126
  project=_PROJECT,
1106
1127
  subproject=_SUBPROJECT,
@@ -1126,6 +1147,7 @@ class TheilSenRegressor(BaseTransformer):
1126
1147
  replace=True,
1127
1148
  session=session,
1128
1149
  statement_params=statement_params,
1150
+ anonymous=True
1129
1151
  )
1130
1152
  def score_wrapper_sproc(
1131
1153
  session: Session,
@@ -1133,7 +1155,8 @@ class TheilSenRegressor(BaseTransformer):
1133
1155
  stage_score_file_name: str,
1134
1156
  input_cols: List[str],
1135
1157
  label_cols: List[str],
1136
- sample_weight_col: Optional[str]
1158
+ sample_weight_col: Optional[str],
1159
+ statement_params: Dict[str, str]
1137
1160
  ) -> float:
1138
1161
  import cloudpickle as cp
1139
1162
  import numpy as np
@@ -1183,14 +1206,14 @@ class TheilSenRegressor(BaseTransformer):
1183
1206
  api_calls=[Session.call],
1184
1207
  custom_tags=dict([("autogen", True)]),
1185
1208
  )
1186
- score = session.call(
1187
- score_sproc_name,
1209
+ score = score_wrapper_sproc(
1210
+ session,
1188
1211
  query,
1189
1212
  stage_score_file_name,
1190
1213
  identifier.get_unescaped_names(self.input_cols),
1191
1214
  identifier.get_unescaped_names(self.label_cols),
1192
1215
  identifier.get_unescaped_names(self.sample_weight_col),
1193
- statement_params=statement_params,
1216
+ statement_params,
1194
1217
  )
1195
1218
 
1196
1219
  cleanup_temp_files([local_score_file_name])
@@ -1208,18 +1231,20 @@ class TheilSenRegressor(BaseTransformer):
1208
1231
  if self._sklearn_object._estimator_type == 'classifier':
1209
1232
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1210
1233
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1211
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1234
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1235
+ ([] if self._drop_input_cols else inputs) + outputs)
1212
1236
  # For regressor, the type of predict is float64
1213
1237
  elif self._sklearn_object._estimator_type == 'regressor':
1214
1238
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1215
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1216
-
1239
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1241
  for prob_func in PROB_FUNCTIONS:
1218
1242
  if hasattr(self, prob_func):
1219
1243
  output_cols_prefix: str = f"{prob_func}_"
1220
1244
  output_column_names = self._get_output_column_names(output_cols_prefix)
1221
1245
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1222
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1246
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1247
+ ([] if self._drop_input_cols else inputs) + outputs)
1223
1248
 
1224
1249
  @property
1225
1250
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -261,7 +263,6 @@ class TweedieRegressor(BaseTransformer):
261
263
  sample_weight_col: Optional[str] = None,
262
264
  ) -> None:
263
265
  super().__init__()
264
- self.id = str(uuid4()).replace("-", "_").upper()
265
266
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
266
267
 
267
268
  self._deps = list(deps)
@@ -289,6 +290,15 @@ class TweedieRegressor(BaseTransformer):
289
290
  self.set_drop_input_cols(drop_input_cols)
290
291
  self.set_sample_weight_col(sample_weight_col)
291
292
 
293
+ def _get_rand_id(self) -> str:
294
+ """
295
+ Generate random id to be used in sproc and stage names.
296
+
297
+ Returns:
298
+ Random id string usable in sproc, table, and stage names.
299
+ """
300
+ return str(uuid4()).replace("-", "_").upper()
301
+
292
302
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
293
303
  """
294
304
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -367,7 +377,7 @@ class TweedieRegressor(BaseTransformer):
367
377
  cp.dump(self._sklearn_object, local_transform_file)
368
378
 
369
379
  # Create temp stage to run fit.
370
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
380
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
371
381
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
372
382
  SqlResultValidator(
373
383
  session=session,
@@ -380,11 +390,12 @@ class TweedieRegressor(BaseTransformer):
380
390
  expected_value=f"Stage area {transform_stage_name} successfully created."
381
391
  ).validate()
382
392
 
383
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
393
+ # Use posixpath to construct stage paths
394
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
395
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
384
396
  local_result_file_name = get_temp_file_path()
385
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
386
397
 
387
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
398
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
388
399
  statement_params = telemetry.get_function_usage_statement_params(
389
400
  project=_PROJECT,
390
401
  subproject=_SUBPROJECT,
@@ -410,6 +421,7 @@ class TweedieRegressor(BaseTransformer):
410
421
  replace=True,
411
422
  session=session,
412
423
  statement_params=statement_params,
424
+ anonymous=True
413
425
  )
414
426
  def fit_wrapper_sproc(
415
427
  session: Session,
@@ -418,7 +430,8 @@ class TweedieRegressor(BaseTransformer):
418
430
  stage_result_file_name: str,
419
431
  input_cols: List[str],
420
432
  label_cols: List[str],
421
- sample_weight_col: Optional[str]
433
+ sample_weight_col: Optional[str],
434
+ statement_params: Dict[str, str]
422
435
  ) -> str:
423
436
  import cloudpickle as cp
424
437
  import numpy as np
@@ -485,15 +498,15 @@ class TweedieRegressor(BaseTransformer):
485
498
  api_calls=[Session.call],
486
499
  custom_tags=dict([("autogen", True)]),
487
500
  )
488
- sproc_export_file_name = session.call(
489
- fit_sproc_name,
501
+ sproc_export_file_name = fit_wrapper_sproc(
502
+ session,
490
503
  query,
491
504
  stage_transform_file_name,
492
505
  stage_result_file_name,
493
506
  identifier.get_unescaped_names(self.input_cols),
494
507
  identifier.get_unescaped_names(self.label_cols),
495
508
  identifier.get_unescaped_names(self.sample_weight_col),
496
- statement_params=statement_params,
509
+ statement_params,
497
510
  )
498
511
 
499
512
  if "|" in sproc_export_file_name:
@@ -503,7 +516,7 @@ class TweedieRegressor(BaseTransformer):
503
516
  print("\n".join(fields[1:]))
504
517
 
505
518
  session.file.get(
506
- os.path.join(stage_result_file_name, sproc_export_file_name),
519
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
507
520
  local_result_file_name,
508
521
  statement_params=statement_params
509
522
  )
@@ -549,7 +562,7 @@ class TweedieRegressor(BaseTransformer):
549
562
 
550
563
  # Register vectorized UDF for batch inference
551
564
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
552
- safe_id=self.id, method=inference_method)
565
+ safe_id=self._get_rand_id(), method=inference_method)
553
566
 
554
567
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
555
568
  # will try to pickle all of self which fails.
@@ -641,7 +654,7 @@ class TweedieRegressor(BaseTransformer):
641
654
  return transformed_pandas_df.to_dict("records")
642
655
 
643
656
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
644
- safe_id=self.id
657
+ safe_id=self._get_rand_id()
645
658
  )
646
659
 
647
660
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -808,11 +821,18 @@ class TweedieRegressor(BaseTransformer):
808
821
  Transformed dataset.
809
822
  """
810
823
  if isinstance(dataset, DataFrame):
824
+ expected_type_inferred = "float"
825
+ # when it is classifier, infer the datatype from label columns
826
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
827
+ expected_type_inferred = convert_sp_to_sf_type(
828
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
829
+ )
830
+
811
831
  output_df = self._batch_inference(
812
832
  dataset=dataset,
813
833
  inference_method="predict",
814
834
  expected_output_cols_list=self.output_cols,
815
- expected_output_cols_type="float",
835
+ expected_output_cols_type=expected_type_inferred,
816
836
  )
817
837
  elif isinstance(dataset, pd.DataFrame):
818
838
  output_df = self._sklearn_inference(
@@ -883,10 +903,10 @@ class TweedieRegressor(BaseTransformer):
883
903
 
884
904
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
885
905
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
886
- Returns an empty list if current object is not a classifier or not yet fitted.
906
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
887
907
  """
888
908
  if getattr(self._sklearn_object, "classes_", None) is None:
889
- return []
909
+ return [output_cols_prefix]
890
910
 
891
911
  classes = self._sklearn_object.classes_
892
912
  if isinstance(classes, numpy.ndarray):
@@ -1111,7 +1131,7 @@ class TweedieRegressor(BaseTransformer):
1111
1131
  cp.dump(self._sklearn_object, local_score_file)
1112
1132
 
1113
1133
  # Create temp stage to run score.
1114
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1134
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1115
1135
  session = dataset._session
1116
1136
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1117
1137
  SqlResultValidator(
@@ -1125,8 +1145,9 @@ class TweedieRegressor(BaseTransformer):
1125
1145
  expected_value=f"Stage area {score_stage_name} successfully created."
1126
1146
  ).validate()
1127
1147
 
1128
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1129
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1148
+ # Use posixpath to construct stage paths
1149
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1150
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1130
1151
  statement_params = telemetry.get_function_usage_statement_params(
1131
1152
  project=_PROJECT,
1132
1153
  subproject=_SUBPROJECT,
@@ -1152,6 +1173,7 @@ class TweedieRegressor(BaseTransformer):
1152
1173
  replace=True,
1153
1174
  session=session,
1154
1175
  statement_params=statement_params,
1176
+ anonymous=True
1155
1177
  )
1156
1178
  def score_wrapper_sproc(
1157
1179
  session: Session,
@@ -1159,7 +1181,8 @@ class TweedieRegressor(BaseTransformer):
1159
1181
  stage_score_file_name: str,
1160
1182
  input_cols: List[str],
1161
1183
  label_cols: List[str],
1162
- sample_weight_col: Optional[str]
1184
+ sample_weight_col: Optional[str],
1185
+ statement_params: Dict[str, str]
1163
1186
  ) -> float:
1164
1187
  import cloudpickle as cp
1165
1188
  import numpy as np
@@ -1209,14 +1232,14 @@ class TweedieRegressor(BaseTransformer):
1209
1232
  api_calls=[Session.call],
1210
1233
  custom_tags=dict([("autogen", True)]),
1211
1234
  )
1212
- score = session.call(
1213
- score_sproc_name,
1235
+ score = score_wrapper_sproc(
1236
+ session,
1214
1237
  query,
1215
1238
  stage_score_file_name,
1216
1239
  identifier.get_unescaped_names(self.input_cols),
1217
1240
  identifier.get_unescaped_names(self.label_cols),
1218
1241
  identifier.get_unescaped_names(self.sample_weight_col),
1219
- statement_params=statement_params,
1242
+ statement_params,
1220
1243
  )
1221
1244
 
1222
1245
  cleanup_temp_files([local_score_file_name])
@@ -1234,18 +1257,20 @@ class TweedieRegressor(BaseTransformer):
1234
1257
  if self._sklearn_object._estimator_type == 'classifier':
1235
1258
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1236
1259
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1237
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1260
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1261
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1262
  # For regressor, the type of predict is float64
1239
1263
  elif self._sklearn_object._estimator_type == 'regressor':
1240
1264
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1241
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1242
-
1265
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1266
+ ([] if self._drop_input_cols else inputs) + outputs)
1243
1267
  for prob_func in PROB_FUNCTIONS:
1244
1268
  if hasattr(self, prob_func):
1245
1269
  output_cols_prefix: str = f"{prob_func}_"
1246
1270
  output_column_names = self._get_output_column_names(output_cols_prefix)
1247
1271
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1248
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1272
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1273
+ ([] if self._drop_input_cols else inputs) + outputs)
1249
1274
 
1250
1275
  @property
1251
1276
  def model_signatures(self) -> Dict[str, ModelSignature]: