snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -203,7 +205,6 @@ class LinearRegression(BaseTransformer):
203
205
  sample_weight_col: Optional[str] = None,
204
206
  ) -> None:
205
207
  super().__init__()
206
- self.id = str(uuid4()).replace("-", "_").upper()
207
208
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
208
209
 
209
210
  self._deps = list(deps)
@@ -226,6 +227,15 @@ class LinearRegression(BaseTransformer):
226
227
  self.set_drop_input_cols(drop_input_cols)
227
228
  self.set_sample_weight_col(sample_weight_col)
228
229
 
230
+ def _get_rand_id(self) -> str:
231
+ """
232
+ Generate random id to be used in sproc and stage names.
233
+
234
+ Returns:
235
+ Random id string usable in sproc, table, and stage names.
236
+ """
237
+ return str(uuid4()).replace("-", "_").upper()
238
+
229
239
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
230
240
  """
231
241
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -304,7 +314,7 @@ class LinearRegression(BaseTransformer):
304
314
  cp.dump(self._sklearn_object, local_transform_file)
305
315
 
306
316
  # Create temp stage to run fit.
307
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
317
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
308
318
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
309
319
  SqlResultValidator(
310
320
  session=session,
@@ -317,11 +327,12 @@ class LinearRegression(BaseTransformer):
317
327
  expected_value=f"Stage area {transform_stage_name} successfully created."
318
328
  ).validate()
319
329
 
320
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
330
+ # Use posixpath to construct stage paths
331
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
332
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
321
333
  local_result_file_name = get_temp_file_path()
322
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
323
334
 
324
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
335
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
325
336
  statement_params = telemetry.get_function_usage_statement_params(
326
337
  project=_PROJECT,
327
338
  subproject=_SUBPROJECT,
@@ -347,6 +358,7 @@ class LinearRegression(BaseTransformer):
347
358
  replace=True,
348
359
  session=session,
349
360
  statement_params=statement_params,
361
+ anonymous=True
350
362
  )
351
363
  def fit_wrapper_sproc(
352
364
  session: Session,
@@ -355,7 +367,8 @@ class LinearRegression(BaseTransformer):
355
367
  stage_result_file_name: str,
356
368
  input_cols: List[str],
357
369
  label_cols: List[str],
358
- sample_weight_col: Optional[str]
370
+ sample_weight_col: Optional[str],
371
+ statement_params: Dict[str, str]
359
372
  ) -> str:
360
373
  import cloudpickle as cp
361
374
  import numpy as np
@@ -422,15 +435,15 @@ class LinearRegression(BaseTransformer):
422
435
  api_calls=[Session.call],
423
436
  custom_tags=dict([("autogen", True)]),
424
437
  )
425
- sproc_export_file_name = session.call(
426
- fit_sproc_name,
438
+ sproc_export_file_name = fit_wrapper_sproc(
439
+ session,
427
440
  query,
428
441
  stage_transform_file_name,
429
442
  stage_result_file_name,
430
443
  identifier.get_unescaped_names(self.input_cols),
431
444
  identifier.get_unescaped_names(self.label_cols),
432
445
  identifier.get_unescaped_names(self.sample_weight_col),
433
- statement_params=statement_params,
446
+ statement_params,
434
447
  )
435
448
 
436
449
  if "|" in sproc_export_file_name:
@@ -440,7 +453,7 @@ class LinearRegression(BaseTransformer):
440
453
  print("\n".join(fields[1:]))
441
454
 
442
455
  session.file.get(
443
- os.path.join(stage_result_file_name, sproc_export_file_name),
456
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
444
457
  local_result_file_name,
445
458
  statement_params=statement_params
446
459
  )
@@ -486,7 +499,7 @@ class LinearRegression(BaseTransformer):
486
499
 
487
500
  # Register vectorized UDF for batch inference
488
501
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
489
- safe_id=self.id, method=inference_method)
502
+ safe_id=self._get_rand_id(), method=inference_method)
490
503
 
491
504
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
492
505
  # will try to pickle all of self which fails.
@@ -578,7 +591,7 @@ class LinearRegression(BaseTransformer):
578
591
  return transformed_pandas_df.to_dict("records")
579
592
 
580
593
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
581
- safe_id=self.id
594
+ safe_id=self._get_rand_id()
582
595
  )
583
596
 
584
597
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -745,11 +758,18 @@ class LinearRegression(BaseTransformer):
745
758
  Transformed dataset.
746
759
  """
747
760
  if isinstance(dataset, DataFrame):
761
+ expected_type_inferred = "float"
762
+ # when it is classifier, infer the datatype from label columns
763
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
764
+ expected_type_inferred = convert_sp_to_sf_type(
765
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
766
+ )
767
+
748
768
  output_df = self._batch_inference(
749
769
  dataset=dataset,
750
770
  inference_method="predict",
751
771
  expected_output_cols_list=self.output_cols,
752
- expected_output_cols_type="float",
772
+ expected_output_cols_type=expected_type_inferred,
753
773
  )
754
774
  elif isinstance(dataset, pd.DataFrame):
755
775
  output_df = self._sklearn_inference(
@@ -820,10 +840,10 @@ class LinearRegression(BaseTransformer):
820
840
 
821
841
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
822
842
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
823
- Returns an empty list if current object is not a classifier or not yet fitted.
843
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
824
844
  """
825
845
  if getattr(self._sklearn_object, "classes_", None) is None:
826
- return []
846
+ return [output_cols_prefix]
827
847
 
828
848
  classes = self._sklearn_object.classes_
829
849
  if isinstance(classes, numpy.ndarray):
@@ -1048,7 +1068,7 @@ class LinearRegression(BaseTransformer):
1048
1068
  cp.dump(self._sklearn_object, local_score_file)
1049
1069
 
1050
1070
  # Create temp stage to run score.
1051
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1071
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1052
1072
  session = dataset._session
1053
1073
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1054
1074
  SqlResultValidator(
@@ -1062,8 +1082,9 @@ class LinearRegression(BaseTransformer):
1062
1082
  expected_value=f"Stage area {score_stage_name} successfully created."
1063
1083
  ).validate()
1064
1084
 
1065
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1066
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1085
+ # Use posixpath to construct stage paths
1086
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1087
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1067
1088
  statement_params = telemetry.get_function_usage_statement_params(
1068
1089
  project=_PROJECT,
1069
1090
  subproject=_SUBPROJECT,
@@ -1089,6 +1110,7 @@ class LinearRegression(BaseTransformer):
1089
1110
  replace=True,
1090
1111
  session=session,
1091
1112
  statement_params=statement_params,
1113
+ anonymous=True
1092
1114
  )
1093
1115
  def score_wrapper_sproc(
1094
1116
  session: Session,
@@ -1096,7 +1118,8 @@ class LinearRegression(BaseTransformer):
1096
1118
  stage_score_file_name: str,
1097
1119
  input_cols: List[str],
1098
1120
  label_cols: List[str],
1099
- sample_weight_col: Optional[str]
1121
+ sample_weight_col: Optional[str],
1122
+ statement_params: Dict[str, str]
1100
1123
  ) -> float:
1101
1124
  import cloudpickle as cp
1102
1125
  import numpy as np
@@ -1146,14 +1169,14 @@ class LinearRegression(BaseTransformer):
1146
1169
  api_calls=[Session.call],
1147
1170
  custom_tags=dict([("autogen", True)]),
1148
1171
  )
1149
- score = session.call(
1150
- score_sproc_name,
1172
+ score = score_wrapper_sproc(
1173
+ session,
1151
1174
  query,
1152
1175
  stage_score_file_name,
1153
1176
  identifier.get_unescaped_names(self.input_cols),
1154
1177
  identifier.get_unescaped_names(self.label_cols),
1155
1178
  identifier.get_unescaped_names(self.sample_weight_col),
1156
- statement_params=statement_params,
1179
+ statement_params,
1157
1180
  )
1158
1181
 
1159
1182
  cleanup_temp_files([local_score_file_name])
@@ -1171,18 +1194,20 @@ class LinearRegression(BaseTransformer):
1171
1194
  if self._sklearn_object._estimator_type == 'classifier':
1172
1195
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1173
1196
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1174
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1197
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1198
+ ([] if self._drop_input_cols else inputs) + outputs)
1175
1199
  # For regressor, the type of predict is float64
1176
1200
  elif self._sklearn_object._estimator_type == 'regressor':
1177
1201
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1178
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1179
-
1202
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1203
+ ([] if self._drop_input_cols else inputs) + outputs)
1180
1204
  for prob_func in PROB_FUNCTIONS:
1181
1205
  if hasattr(self, prob_func):
1182
1206
  output_cols_prefix: str = f"{prob_func}_"
1183
1207
  output_column_names = self._get_output_column_names(output_cols_prefix)
1184
1208
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1185
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1209
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1210
+ ([] if self._drop_input_cols else inputs) + outputs)
1186
1211
 
1187
1212
  @property
1188
1213
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -306,7 +308,6 @@ class LogisticRegression(BaseTransformer):
306
308
  sample_weight_col: Optional[str] = None,
307
309
  ) -> None:
308
310
  super().__init__()
309
- self.id = str(uuid4()).replace("-", "_").upper()
310
311
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
311
312
 
312
313
  self._deps = list(deps)
@@ -340,6 +341,15 @@ class LogisticRegression(BaseTransformer):
340
341
  self.set_drop_input_cols(drop_input_cols)
341
342
  self.set_sample_weight_col(sample_weight_col)
342
343
 
344
+ def _get_rand_id(self) -> str:
345
+ """
346
+ Generate random id to be used in sproc and stage names.
347
+
348
+ Returns:
349
+ Random id string usable in sproc, table, and stage names.
350
+ """
351
+ return str(uuid4()).replace("-", "_").upper()
352
+
343
353
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
344
354
  """
345
355
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -418,7 +428,7 @@ class LogisticRegression(BaseTransformer):
418
428
  cp.dump(self._sklearn_object, local_transform_file)
419
429
 
420
430
  # Create temp stage to run fit.
421
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
431
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
422
432
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
423
433
  SqlResultValidator(
424
434
  session=session,
@@ -431,11 +441,12 @@ class LogisticRegression(BaseTransformer):
431
441
  expected_value=f"Stage area {transform_stage_name} successfully created."
432
442
  ).validate()
433
443
 
434
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
444
+ # Use posixpath to construct stage paths
445
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
446
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
435
447
  local_result_file_name = get_temp_file_path()
436
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
437
448
 
438
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
449
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
439
450
  statement_params = telemetry.get_function_usage_statement_params(
440
451
  project=_PROJECT,
441
452
  subproject=_SUBPROJECT,
@@ -461,6 +472,7 @@ class LogisticRegression(BaseTransformer):
461
472
  replace=True,
462
473
  session=session,
463
474
  statement_params=statement_params,
475
+ anonymous=True
464
476
  )
465
477
  def fit_wrapper_sproc(
466
478
  session: Session,
@@ -469,7 +481,8 @@ class LogisticRegression(BaseTransformer):
469
481
  stage_result_file_name: str,
470
482
  input_cols: List[str],
471
483
  label_cols: List[str],
472
- sample_weight_col: Optional[str]
484
+ sample_weight_col: Optional[str],
485
+ statement_params: Dict[str, str]
473
486
  ) -> str:
474
487
  import cloudpickle as cp
475
488
  import numpy as np
@@ -536,15 +549,15 @@ class LogisticRegression(BaseTransformer):
536
549
  api_calls=[Session.call],
537
550
  custom_tags=dict([("autogen", True)]),
538
551
  )
539
- sproc_export_file_name = session.call(
540
- fit_sproc_name,
552
+ sproc_export_file_name = fit_wrapper_sproc(
553
+ session,
541
554
  query,
542
555
  stage_transform_file_name,
543
556
  stage_result_file_name,
544
557
  identifier.get_unescaped_names(self.input_cols),
545
558
  identifier.get_unescaped_names(self.label_cols),
546
559
  identifier.get_unescaped_names(self.sample_weight_col),
547
- statement_params=statement_params,
560
+ statement_params,
548
561
  )
549
562
 
550
563
  if "|" in sproc_export_file_name:
@@ -554,7 +567,7 @@ class LogisticRegression(BaseTransformer):
554
567
  print("\n".join(fields[1:]))
555
568
 
556
569
  session.file.get(
557
- os.path.join(stage_result_file_name, sproc_export_file_name),
570
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
558
571
  local_result_file_name,
559
572
  statement_params=statement_params
560
573
  )
@@ -600,7 +613,7 @@ class LogisticRegression(BaseTransformer):
600
613
 
601
614
  # Register vectorized UDF for batch inference
602
615
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
603
- safe_id=self.id, method=inference_method)
616
+ safe_id=self._get_rand_id(), method=inference_method)
604
617
 
605
618
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
606
619
  # will try to pickle all of self which fails.
@@ -692,7 +705,7 @@ class LogisticRegression(BaseTransformer):
692
705
  return transformed_pandas_df.to_dict("records")
693
706
 
694
707
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
695
- safe_id=self.id
708
+ safe_id=self._get_rand_id()
696
709
  )
697
710
 
698
711
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -859,11 +872,18 @@ class LogisticRegression(BaseTransformer):
859
872
  Transformed dataset.
860
873
  """
861
874
  if isinstance(dataset, DataFrame):
875
+ expected_type_inferred = ""
876
+ # when it is classifier, infer the datatype from label columns
877
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
878
+ expected_type_inferred = convert_sp_to_sf_type(
879
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
880
+ )
881
+
862
882
  output_df = self._batch_inference(
863
883
  dataset=dataset,
864
884
  inference_method="predict",
865
885
  expected_output_cols_list=self.output_cols,
866
- expected_output_cols_type="",
886
+ expected_output_cols_type=expected_type_inferred,
867
887
  )
868
888
  elif isinstance(dataset, pd.DataFrame):
869
889
  output_df = self._sklearn_inference(
@@ -934,10 +954,10 @@ class LogisticRegression(BaseTransformer):
934
954
 
935
955
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
936
956
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
937
- Returns an empty list if current object is not a classifier or not yet fitted.
957
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
938
958
  """
939
959
  if getattr(self._sklearn_object, "classes_", None) is None:
940
- return []
960
+ return [output_cols_prefix]
941
961
 
942
962
  classes = self._sklearn_object.classes_
943
963
  if isinstance(classes, numpy.ndarray):
@@ -1168,7 +1188,7 @@ class LogisticRegression(BaseTransformer):
1168
1188
  cp.dump(self._sklearn_object, local_score_file)
1169
1189
 
1170
1190
  # Create temp stage to run score.
1171
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1191
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1172
1192
  session = dataset._session
1173
1193
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1174
1194
  SqlResultValidator(
@@ -1182,8 +1202,9 @@ class LogisticRegression(BaseTransformer):
1182
1202
  expected_value=f"Stage area {score_stage_name} successfully created."
1183
1203
  ).validate()
1184
1204
 
1185
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1186
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1205
+ # Use posixpath to construct stage paths
1206
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1207
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1187
1208
  statement_params = telemetry.get_function_usage_statement_params(
1188
1209
  project=_PROJECT,
1189
1210
  subproject=_SUBPROJECT,
@@ -1209,6 +1230,7 @@ class LogisticRegression(BaseTransformer):
1209
1230
  replace=True,
1210
1231
  session=session,
1211
1232
  statement_params=statement_params,
1233
+ anonymous=True
1212
1234
  )
1213
1235
  def score_wrapper_sproc(
1214
1236
  session: Session,
@@ -1216,7 +1238,8 @@ class LogisticRegression(BaseTransformer):
1216
1238
  stage_score_file_name: str,
1217
1239
  input_cols: List[str],
1218
1240
  label_cols: List[str],
1219
- sample_weight_col: Optional[str]
1241
+ sample_weight_col: Optional[str],
1242
+ statement_params: Dict[str, str]
1220
1243
  ) -> float:
1221
1244
  import cloudpickle as cp
1222
1245
  import numpy as np
@@ -1266,14 +1289,14 @@ class LogisticRegression(BaseTransformer):
1266
1289
  api_calls=[Session.call],
1267
1290
  custom_tags=dict([("autogen", True)]),
1268
1291
  )
1269
- score = session.call(
1270
- score_sproc_name,
1292
+ score = score_wrapper_sproc(
1293
+ session,
1271
1294
  query,
1272
1295
  stage_score_file_name,
1273
1296
  identifier.get_unescaped_names(self.input_cols),
1274
1297
  identifier.get_unescaped_names(self.label_cols),
1275
1298
  identifier.get_unescaped_names(self.sample_weight_col),
1276
- statement_params=statement_params,
1299
+ statement_params,
1277
1300
  )
1278
1301
 
1279
1302
  cleanup_temp_files([local_score_file_name])
@@ -1291,18 +1314,20 @@ class LogisticRegression(BaseTransformer):
1291
1314
  if self._sklearn_object._estimator_type == 'classifier':
1292
1315
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1293
1316
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1294
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1317
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1318
+ ([] if self._drop_input_cols else inputs) + outputs)
1295
1319
  # For regressor, the type of predict is float64
1296
1320
  elif self._sklearn_object._estimator_type == 'regressor':
1297
1321
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1298
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1299
-
1322
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1323
+ ([] if self._drop_input_cols else inputs) + outputs)
1300
1324
  for prob_func in PROB_FUNCTIONS:
1301
1325
  if hasattr(self, prob_func):
1302
1326
  output_cols_prefix: str = f"{prob_func}_"
1303
1327
  output_column_names = self._get_output_column_names(output_cols_prefix)
1304
1328
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1305
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1329
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1330
+ ([] if self._drop_input_cols else inputs) + outputs)
1306
1331
 
1307
1332
  @property
1308
1333
  def model_signatures(self) -> Dict[str, ModelSignature]: