snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -230,7 +232,6 @@ class PoissonRegressor(BaseTransformer):
230
232
  sample_weight_col: Optional[str] = None,
231
233
  ) -> None:
232
234
  super().__init__()
233
- self.id = str(uuid4()).replace("-", "_").upper()
234
235
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
235
236
 
236
237
  self._deps = list(deps)
@@ -256,6 +257,15 @@ class PoissonRegressor(BaseTransformer):
256
257
  self.set_drop_input_cols(drop_input_cols)
257
258
  self.set_sample_weight_col(sample_weight_col)
258
259
 
260
+ def _get_rand_id(self) -> str:
261
+ """
262
+ Generate random id to be used in sproc and stage names.
263
+
264
+ Returns:
265
+ Random id string usable in sproc, table, and stage names.
266
+ """
267
+ return str(uuid4()).replace("-", "_").upper()
268
+
259
269
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
260
270
  """
261
271
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -334,7 +344,7 @@ class PoissonRegressor(BaseTransformer):
334
344
  cp.dump(self._sklearn_object, local_transform_file)
335
345
 
336
346
  # Create temp stage to run fit.
337
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
347
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
338
348
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
339
349
  SqlResultValidator(
340
350
  session=session,
@@ -347,11 +357,12 @@ class PoissonRegressor(BaseTransformer):
347
357
  expected_value=f"Stage area {transform_stage_name} successfully created."
348
358
  ).validate()
349
359
 
350
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
360
+ # Use posixpath to construct stage paths
361
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
362
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
351
363
  local_result_file_name = get_temp_file_path()
352
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
353
364
 
354
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
365
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
355
366
  statement_params = telemetry.get_function_usage_statement_params(
356
367
  project=_PROJECT,
357
368
  subproject=_SUBPROJECT,
@@ -377,6 +388,7 @@ class PoissonRegressor(BaseTransformer):
377
388
  replace=True,
378
389
  session=session,
379
390
  statement_params=statement_params,
391
+ anonymous=True
380
392
  )
381
393
  def fit_wrapper_sproc(
382
394
  session: Session,
@@ -385,7 +397,8 @@ class PoissonRegressor(BaseTransformer):
385
397
  stage_result_file_name: str,
386
398
  input_cols: List[str],
387
399
  label_cols: List[str],
388
- sample_weight_col: Optional[str]
400
+ sample_weight_col: Optional[str],
401
+ statement_params: Dict[str, str]
389
402
  ) -> str:
390
403
  import cloudpickle as cp
391
404
  import numpy as np
@@ -452,15 +465,15 @@ class PoissonRegressor(BaseTransformer):
452
465
  api_calls=[Session.call],
453
466
  custom_tags=dict([("autogen", True)]),
454
467
  )
455
- sproc_export_file_name = session.call(
456
- fit_sproc_name,
468
+ sproc_export_file_name = fit_wrapper_sproc(
469
+ session,
457
470
  query,
458
471
  stage_transform_file_name,
459
472
  stage_result_file_name,
460
473
  identifier.get_unescaped_names(self.input_cols),
461
474
  identifier.get_unescaped_names(self.label_cols),
462
475
  identifier.get_unescaped_names(self.sample_weight_col),
463
- statement_params=statement_params,
476
+ statement_params,
464
477
  )
465
478
 
466
479
  if "|" in sproc_export_file_name:
@@ -470,7 +483,7 @@ class PoissonRegressor(BaseTransformer):
470
483
  print("\n".join(fields[1:]))
471
484
 
472
485
  session.file.get(
473
- os.path.join(stage_result_file_name, sproc_export_file_name),
486
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
474
487
  local_result_file_name,
475
488
  statement_params=statement_params
476
489
  )
@@ -516,7 +529,7 @@ class PoissonRegressor(BaseTransformer):
516
529
 
517
530
  # Register vectorized UDF for batch inference
518
531
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
519
- safe_id=self.id, method=inference_method)
532
+ safe_id=self._get_rand_id(), method=inference_method)
520
533
 
521
534
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
522
535
  # will try to pickle all of self which fails.
@@ -608,7 +621,7 @@ class PoissonRegressor(BaseTransformer):
608
621
  return transformed_pandas_df.to_dict("records")
609
622
 
610
623
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
611
- safe_id=self.id
624
+ safe_id=self._get_rand_id()
612
625
  )
613
626
 
614
627
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -664,26 +677,37 @@ class PoissonRegressor(BaseTransformer):
664
677
  # input cols need to match unquoted / quoted
665
678
  input_cols = self.input_cols
666
679
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
680
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
667
681
 
668
682
  estimator = self._sklearn_object
669
683
 
670
- input_df = dataset[input_cols] # Select input columns with quoted column names.
671
- if hasattr(estimator, "feature_names_in_"):
672
- missing_features = []
673
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
674
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
675
- missing_features.append(f)
676
-
677
- if len(missing_features) > 0:
678
- raise ValueError(
679
- "The feature names should match with those that were passed during fit.\n"
680
- f"Features seen during fit call but not present in the input: {missing_features}\n"
681
- f"Features in the input dataframe : {input_cols}\n"
682
- )
683
- input_df.columns = getattr(estimator, "feature_names_in_")
684
- else:
685
- # Just rename the column names to unquoted identifiers.
686
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
684
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
685
+ missing_features = []
686
+ features_in_dataset = set(dataset.columns)
687
+ columns_to_select = []
688
+ for i, f in enumerate(features_required_by_estimator):
689
+ if (
690
+ i >= len(input_cols)
691
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
692
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
693
+ and quoted_input_cols[i] not in features_in_dataset)
694
+ ):
695
+ missing_features.append(f)
696
+ elif input_cols[i] in features_in_dataset:
697
+ columns_to_select.append(input_cols[i])
698
+ elif unquoted_input_cols[i] in features_in_dataset:
699
+ columns_to_select.append(unquoted_input_cols[i])
700
+ else:
701
+ columns_to_select.append(quoted_input_cols[i])
702
+
703
+ if len(missing_features) > 0:
704
+ raise ValueError(
705
+ "The feature names should match with those that were passed during fit.\n"
706
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
707
+ f"Features in the input dataframe : {input_cols}\n"
708
+ )
709
+ input_df = dataset[columns_to_select]
710
+ input_df.columns = features_required_by_estimator
687
711
 
688
712
  transformed_numpy_array = getattr(estimator, inference_method)(
689
713
  input_df
@@ -764,11 +788,18 @@ class PoissonRegressor(BaseTransformer):
764
788
  Transformed dataset.
765
789
  """
766
790
  if isinstance(dataset, DataFrame):
791
+ expected_type_inferred = "float"
792
+ # when it is classifier, infer the datatype from label columns
793
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
794
+ expected_type_inferred = convert_sp_to_sf_type(
795
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
796
+ )
797
+
767
798
  output_df = self._batch_inference(
768
799
  dataset=dataset,
769
800
  inference_method="predict",
770
801
  expected_output_cols_list=self.output_cols,
771
- expected_output_cols_type="float",
802
+ expected_output_cols_type=expected_type_inferred,
772
803
  )
773
804
  elif isinstance(dataset, pd.DataFrame):
774
805
  output_df = self._sklearn_inference(
@@ -839,10 +870,10 @@ class PoissonRegressor(BaseTransformer):
839
870
 
840
871
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
841
872
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
842
- Returns an empty list if current object is not a classifier or not yet fitted.
873
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
843
874
  """
844
875
  if getattr(self._sklearn_object, "classes_", None) is None:
845
- return []
876
+ return [output_cols_prefix]
846
877
 
847
878
  classes = self._sklearn_object.classes_
848
879
  if isinstance(classes, numpy.ndarray):
@@ -1067,7 +1098,7 @@ class PoissonRegressor(BaseTransformer):
1067
1098
  cp.dump(self._sklearn_object, local_score_file)
1068
1099
 
1069
1100
  # Create temp stage to run score.
1070
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1101
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1071
1102
  session = dataset._session
1072
1103
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1073
1104
  SqlResultValidator(
@@ -1081,8 +1112,9 @@ class PoissonRegressor(BaseTransformer):
1081
1112
  expected_value=f"Stage area {score_stage_name} successfully created."
1082
1113
  ).validate()
1083
1114
 
1084
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1085
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1115
+ # Use posixpath to construct stage paths
1116
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1117
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1086
1118
  statement_params = telemetry.get_function_usage_statement_params(
1087
1119
  project=_PROJECT,
1088
1120
  subproject=_SUBPROJECT,
@@ -1108,6 +1140,7 @@ class PoissonRegressor(BaseTransformer):
1108
1140
  replace=True,
1109
1141
  session=session,
1110
1142
  statement_params=statement_params,
1143
+ anonymous=True
1111
1144
  )
1112
1145
  def score_wrapper_sproc(
1113
1146
  session: Session,
@@ -1115,7 +1148,8 @@ class PoissonRegressor(BaseTransformer):
1115
1148
  stage_score_file_name: str,
1116
1149
  input_cols: List[str],
1117
1150
  label_cols: List[str],
1118
- sample_weight_col: Optional[str]
1151
+ sample_weight_col: Optional[str],
1152
+ statement_params: Dict[str, str]
1119
1153
  ) -> float:
1120
1154
  import cloudpickle as cp
1121
1155
  import numpy as np
@@ -1165,14 +1199,14 @@ class PoissonRegressor(BaseTransformer):
1165
1199
  api_calls=[Session.call],
1166
1200
  custom_tags=dict([("autogen", True)]),
1167
1201
  )
1168
- score = session.call(
1169
- score_sproc_name,
1202
+ score = score_wrapper_sproc(
1203
+ session,
1170
1204
  query,
1171
1205
  stage_score_file_name,
1172
1206
  identifier.get_unescaped_names(self.input_cols),
1173
1207
  identifier.get_unescaped_names(self.label_cols),
1174
1208
  identifier.get_unescaped_names(self.sample_weight_col),
1175
- statement_params=statement_params,
1209
+ statement_params,
1176
1210
  )
1177
1211
 
1178
1212
  cleanup_temp_files([local_score_file_name])
@@ -1190,18 +1224,20 @@ class PoissonRegressor(BaseTransformer):
1190
1224
  if self._sklearn_object._estimator_type == 'classifier':
1191
1225
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1192
1226
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1193
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1227
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1228
+ ([] if self._drop_input_cols else inputs) + outputs)
1194
1229
  # For regressor, the type of predict is float64
1195
1230
  elif self._sklearn_object._estimator_type == 'regressor':
1196
1231
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1197
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1198
-
1232
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1233
+ ([] if self._drop_input_cols else inputs) + outputs)
1199
1234
  for prob_func in PROB_FUNCTIONS:
1200
1235
  if hasattr(self, prob_func):
1201
1236
  output_cols_prefix: str = f"{prob_func}_"
1202
1237
  output_column_names = self._get_output_column_names(output_cols_prefix)
1203
1238
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1204
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1205
1241
 
1206
1242
  @property
1207
1243
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -285,7 +287,6 @@ class RANSACRegressor(BaseTransformer):
285
287
  sample_weight_col: Optional[str] = None,
286
288
  ) -> None:
287
289
  super().__init__()
288
- self.id = str(uuid4()).replace("-", "_").upper()
289
290
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
290
291
  deps = deps | _gather_dependencies(estimator)
291
292
  deps = deps | _gather_dependencies(base_estimator)
@@ -319,6 +320,15 @@ class RANSACRegressor(BaseTransformer):
319
320
  self.set_drop_input_cols(drop_input_cols)
320
321
  self.set_sample_weight_col(sample_weight_col)
321
322
 
323
+ def _get_rand_id(self) -> str:
324
+ """
325
+ Generate random id to be used in sproc and stage names.
326
+
327
+ Returns:
328
+ Random id string usable in sproc, table, and stage names.
329
+ """
330
+ return str(uuid4()).replace("-", "_").upper()
331
+
322
332
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
323
333
  """
324
334
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -397,7 +407,7 @@ class RANSACRegressor(BaseTransformer):
397
407
  cp.dump(self._sklearn_object, local_transform_file)
398
408
 
399
409
  # Create temp stage to run fit.
400
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
410
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
401
411
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
402
412
  SqlResultValidator(
403
413
  session=session,
@@ -410,11 +420,12 @@ class RANSACRegressor(BaseTransformer):
410
420
  expected_value=f"Stage area {transform_stage_name} successfully created."
411
421
  ).validate()
412
422
 
413
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
423
+ # Use posixpath to construct stage paths
424
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
425
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
414
426
  local_result_file_name = get_temp_file_path()
415
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
416
427
 
417
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
428
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
418
429
  statement_params = telemetry.get_function_usage_statement_params(
419
430
  project=_PROJECT,
420
431
  subproject=_SUBPROJECT,
@@ -440,6 +451,7 @@ class RANSACRegressor(BaseTransformer):
440
451
  replace=True,
441
452
  session=session,
442
453
  statement_params=statement_params,
454
+ anonymous=True
443
455
  )
444
456
  def fit_wrapper_sproc(
445
457
  session: Session,
@@ -448,7 +460,8 @@ class RANSACRegressor(BaseTransformer):
448
460
  stage_result_file_name: str,
449
461
  input_cols: List[str],
450
462
  label_cols: List[str],
451
- sample_weight_col: Optional[str]
463
+ sample_weight_col: Optional[str],
464
+ statement_params: Dict[str, str]
452
465
  ) -> str:
453
466
  import cloudpickle as cp
454
467
  import numpy as np
@@ -515,15 +528,15 @@ class RANSACRegressor(BaseTransformer):
515
528
  api_calls=[Session.call],
516
529
  custom_tags=dict([("autogen", True)]),
517
530
  )
518
- sproc_export_file_name = session.call(
519
- fit_sproc_name,
531
+ sproc_export_file_name = fit_wrapper_sproc(
532
+ session,
520
533
  query,
521
534
  stage_transform_file_name,
522
535
  stage_result_file_name,
523
536
  identifier.get_unescaped_names(self.input_cols),
524
537
  identifier.get_unescaped_names(self.label_cols),
525
538
  identifier.get_unescaped_names(self.sample_weight_col),
526
- statement_params=statement_params,
539
+ statement_params,
527
540
  )
528
541
 
529
542
  if "|" in sproc_export_file_name:
@@ -533,7 +546,7 @@ class RANSACRegressor(BaseTransformer):
533
546
  print("\n".join(fields[1:]))
534
547
 
535
548
  session.file.get(
536
- os.path.join(stage_result_file_name, sproc_export_file_name),
549
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
537
550
  local_result_file_name,
538
551
  statement_params=statement_params
539
552
  )
@@ -579,7 +592,7 @@ class RANSACRegressor(BaseTransformer):
579
592
 
580
593
  # Register vectorized UDF for batch inference
581
594
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
582
- safe_id=self.id, method=inference_method)
595
+ safe_id=self._get_rand_id(), method=inference_method)
583
596
 
584
597
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
585
598
  # will try to pickle all of self which fails.
@@ -671,7 +684,7 @@ class RANSACRegressor(BaseTransformer):
671
684
  return transformed_pandas_df.to_dict("records")
672
685
 
673
686
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
674
- safe_id=self.id
687
+ safe_id=self._get_rand_id()
675
688
  )
676
689
 
677
690
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -727,26 +740,37 @@ class RANSACRegressor(BaseTransformer):
727
740
  # input cols need to match unquoted / quoted
728
741
  input_cols = self.input_cols
729
742
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
743
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
730
744
 
731
745
  estimator = self._sklearn_object
732
746
 
733
- input_df = dataset[input_cols] # Select input columns with quoted column names.
734
- if hasattr(estimator, "feature_names_in_"):
735
- missing_features = []
736
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
737
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
738
- missing_features.append(f)
739
-
740
- if len(missing_features) > 0:
741
- raise ValueError(
742
- "The feature names should match with those that were passed during fit.\n"
743
- f"Features seen during fit call but not present in the input: {missing_features}\n"
744
- f"Features in the input dataframe : {input_cols}\n"
745
- )
746
- input_df.columns = getattr(estimator, "feature_names_in_")
747
- else:
748
- # Just rename the column names to unquoted identifiers.
749
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
747
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
748
+ missing_features = []
749
+ features_in_dataset = set(dataset.columns)
750
+ columns_to_select = []
751
+ for i, f in enumerate(features_required_by_estimator):
752
+ if (
753
+ i >= len(input_cols)
754
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
755
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
756
+ and quoted_input_cols[i] not in features_in_dataset)
757
+ ):
758
+ missing_features.append(f)
759
+ elif input_cols[i] in features_in_dataset:
760
+ columns_to_select.append(input_cols[i])
761
+ elif unquoted_input_cols[i] in features_in_dataset:
762
+ columns_to_select.append(unquoted_input_cols[i])
763
+ else:
764
+ columns_to_select.append(quoted_input_cols[i])
765
+
766
+ if len(missing_features) > 0:
767
+ raise ValueError(
768
+ "The feature names should match with those that were passed during fit.\n"
769
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
770
+ f"Features in the input dataframe : {input_cols}\n"
771
+ )
772
+ input_df = dataset[columns_to_select]
773
+ input_df.columns = features_required_by_estimator
750
774
 
751
775
  transformed_numpy_array = getattr(estimator, inference_method)(
752
776
  input_df
@@ -827,11 +851,18 @@ class RANSACRegressor(BaseTransformer):
827
851
  Transformed dataset.
828
852
  """
829
853
  if isinstance(dataset, DataFrame):
854
+ expected_type_inferred = "float"
855
+ # when it is classifier, infer the datatype from label columns
856
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
857
+ expected_type_inferred = convert_sp_to_sf_type(
858
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
859
+ )
860
+
830
861
  output_df = self._batch_inference(
831
862
  dataset=dataset,
832
863
  inference_method="predict",
833
864
  expected_output_cols_list=self.output_cols,
834
- expected_output_cols_type="float",
865
+ expected_output_cols_type=expected_type_inferred,
835
866
  )
836
867
  elif isinstance(dataset, pd.DataFrame):
837
868
  output_df = self._sklearn_inference(
@@ -902,10 +933,10 @@ class RANSACRegressor(BaseTransformer):
902
933
 
903
934
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
904
935
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
905
- Returns an empty list if current object is not a classifier or not yet fitted.
936
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
906
937
  """
907
938
  if getattr(self._sklearn_object, "classes_", None) is None:
908
- return []
939
+ return [output_cols_prefix]
909
940
 
910
941
  classes = self._sklearn_object.classes_
911
942
  if isinstance(classes, numpy.ndarray):
@@ -1130,7 +1161,7 @@ class RANSACRegressor(BaseTransformer):
1130
1161
  cp.dump(self._sklearn_object, local_score_file)
1131
1162
 
1132
1163
  # Create temp stage to run score.
1133
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1164
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1134
1165
  session = dataset._session
1135
1166
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1136
1167
  SqlResultValidator(
@@ -1144,8 +1175,9 @@ class RANSACRegressor(BaseTransformer):
1144
1175
  expected_value=f"Stage area {score_stage_name} successfully created."
1145
1176
  ).validate()
1146
1177
 
1147
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1148
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1178
+ # Use posixpath to construct stage paths
1179
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1180
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1149
1181
  statement_params = telemetry.get_function_usage_statement_params(
1150
1182
  project=_PROJECT,
1151
1183
  subproject=_SUBPROJECT,
@@ -1171,6 +1203,7 @@ class RANSACRegressor(BaseTransformer):
1171
1203
  replace=True,
1172
1204
  session=session,
1173
1205
  statement_params=statement_params,
1206
+ anonymous=True
1174
1207
  )
1175
1208
  def score_wrapper_sproc(
1176
1209
  session: Session,
@@ -1178,7 +1211,8 @@ class RANSACRegressor(BaseTransformer):
1178
1211
  stage_score_file_name: str,
1179
1212
  input_cols: List[str],
1180
1213
  label_cols: List[str],
1181
- sample_weight_col: Optional[str]
1214
+ sample_weight_col: Optional[str],
1215
+ statement_params: Dict[str, str]
1182
1216
  ) -> float:
1183
1217
  import cloudpickle as cp
1184
1218
  import numpy as np
@@ -1228,14 +1262,14 @@ class RANSACRegressor(BaseTransformer):
1228
1262
  api_calls=[Session.call],
1229
1263
  custom_tags=dict([("autogen", True)]),
1230
1264
  )
1231
- score = session.call(
1232
- score_sproc_name,
1265
+ score = score_wrapper_sproc(
1266
+ session,
1233
1267
  query,
1234
1268
  stage_score_file_name,
1235
1269
  identifier.get_unescaped_names(self.input_cols),
1236
1270
  identifier.get_unescaped_names(self.label_cols),
1237
1271
  identifier.get_unescaped_names(self.sample_weight_col),
1238
- statement_params=statement_params,
1272
+ statement_params,
1239
1273
  )
1240
1274
 
1241
1275
  cleanup_temp_files([local_score_file_name])
@@ -1253,18 +1287,20 @@ class RANSACRegressor(BaseTransformer):
1253
1287
  if self._sklearn_object._estimator_type == 'classifier':
1254
1288
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1255
1289
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1256
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1290
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1291
+ ([] if self._drop_input_cols else inputs) + outputs)
1257
1292
  # For regressor, the type of predict is float64
1258
1293
  elif self._sklearn_object._estimator_type == 'regressor':
1259
1294
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1260
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1261
-
1295
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1296
+ ([] if self._drop_input_cols else inputs) + outputs)
1262
1297
  for prob_func in PROB_FUNCTIONS:
1263
1298
  if hasattr(self, prob_func):
1264
1299
  output_cols_prefix: str = f"{prob_func}_"
1265
1300
  output_column_names = self._get_output_column_names(output_cols_prefix)
1266
1301
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1267
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1302
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1303
+ ([] if self._drop_input_cols else inputs) + outputs)
1268
1304
 
1269
1305
  @property
1270
1306
  def model_signatures(self) -> Dict[str, ModelSignature]: